Package ganeti :: Module locking
[hide private]
[frames] | no frames]

Source Code for Module ganeti.locking

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc. 
   5  # 
   6  # This program is free software; you can redistribute it and/or modify 
   7  # it under the terms of the GNU General Public License as published by 
   8  # the Free Software Foundation; either version 2 of the License, or 
   9  # (at your option) any later version. 
  10  # 
  11  # This program is distributed in the hope that it will be useful, but 
  12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
  13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  14  # General Public License for more details. 
  15  # 
  16  # You should have received a copy of the GNU General Public License 
  17  # along with this program; if not, write to the Free Software 
  18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
  19  # 02110-1301, USA. 
  20   
  21  """Module implementing the Ganeti locking code.""" 
  22   
  23  # pylint: disable=W0212 
  24   
  25  # W0212 since e.g. LockSet methods use (a lot) the internals of 
  26  # SharedLock 
  27   
  28  import os 
  29  import select 
  30  import threading 
  31  import errno 
  32  import weakref 
  33  import logging 
  34  import heapq 
  35  import itertools 
  36  import time 
  37   
  38  from ganeti import errors 
  39  from ganeti import utils 
  40  from ganeti import compat 
  41  from ganeti import query 
  42   
  43   
  44  _EXCLUSIVE_TEXT = "exclusive" 
  45  _SHARED_TEXT = "shared" 
  46  _DELETED_TEXT = "deleted" 
  47   
  48  _DEFAULT_PRIORITY = 0 
  49   
  50  #: Minimum timeout required to consider scheduling a pending acquisition 
  51  #: (seconds) 
  52  _LOCK_ACQUIRE_MIN_TIMEOUT = (1.0 / 1000) 
  53   
  54  # Internal lock acquisition modes for L{LockSet} 
  55  (_LS_ACQUIRE_EXACT, 
  56   _LS_ACQUIRE_ALL, 
  57   _LS_ACQUIRE_OPPORTUNISTIC) = range(1, 4) 
  58   
  59  _LS_ACQUIRE_MODES = compat.UniqueFrozenset([ 
  60    _LS_ACQUIRE_EXACT, 
  61    _LS_ACQUIRE_ALL, 
  62    _LS_ACQUIRE_OPPORTUNISTIC, 
  63    ]) 
64 65 66 -def ssynchronized(mylock, shared=0):
67 """Shared Synchronization decorator. 68 69 Calls the function holding the given lock, either in exclusive or shared 70 mode. It requires the passed lock to be a SharedLock (or support its 71 semantics). 72 73 @type mylock: lockable object or string 74 @param mylock: lock to acquire or class member name of the lock to acquire 75 76 """ 77 def wrap(fn): 78 def sync_function(*args, **kwargs): 79 if isinstance(mylock, basestring): 80 assert args, "cannot ssynchronize on non-class method: self not found" 81 # args[0] is "self" 82 lock = getattr(args[0], mylock) 83 else: 84 lock = mylock 85 lock.acquire(shared=shared) 86 try: 87 return fn(*args, **kwargs) 88 finally: 89 lock.release()
90 return sync_function 91 return wrap 92
93 94 -class _SingleNotifyPipeConditionWaiter(object):
95 """Helper class for SingleNotifyPipeCondition 96 97 """ 98 __slots__ = [ 99 "_fd", 100 "_poller", 101 ] 102
103 - def __init__(self, poller, fd):
104 """Constructor for _SingleNotifyPipeConditionWaiter 105 106 @type poller: select.poll 107 @param poller: Poller object 108 @type fd: int 109 @param fd: File descriptor to wait for 110 111 """ 112 object.__init__(self) 113 self._poller = poller 114 self._fd = fd
115
116 - def __call__(self, timeout):
117 """Wait for something to happen on the pipe. 118 119 @type timeout: float or None 120 @param timeout: Timeout for waiting (can be None) 121 122 """ 123 running_timeout = utils.RunningTimeout(timeout, True) 124 125 while True: 126 remaining_time = running_timeout.Remaining() 127 128 if remaining_time is not None: 129 if remaining_time < 0.0: 130 break 131 132 # Our calculation uses seconds, poll() wants milliseconds 133 remaining_time *= 1000 134 135 try: 136 result = self._poller.poll(remaining_time) 137 except EnvironmentError, err: 138 if err.errno != errno.EINTR: 139 raise 140 result = None 141 142 # Check whether we were notified 143 if result and result[0][0] == self._fd: 144 break
145
146 147 -class _BaseCondition(object):
148 """Base class containing common code for conditions. 149 150 Some of this code is taken from python's threading module. 151 152 """ 153 __slots__ = [ 154 "_lock", 155 "acquire", 156 "release", 157 "_is_owned", 158 "_acquire_restore", 159 "_release_save", 160 ] 161
162 - def __init__(self, lock):
163 """Constructor for _BaseCondition. 164 165 @type lock: threading.Lock 166 @param lock: condition base lock 167 168 """ 169 object.__init__(self) 170 171 try: 172 self._release_save = lock._release_save 173 except AttributeError: 174 self._release_save = self._base_release_save 175 try: 176 self._acquire_restore = lock._acquire_restore 177 except AttributeError: 178 self._acquire_restore = self._base_acquire_restore 179 try: 180 self._is_owned = lock.is_owned 181 except AttributeError: 182 self._is_owned = self._base_is_owned 183 184 self._lock = lock 185 186 # Export the lock's acquire() and release() methods 187 self.acquire = lock.acquire 188 self.release = lock.release
189
190 - def _base_is_owned(self):
191 """Check whether lock is owned by current thread. 192 193 """ 194 if self._lock.acquire(0): 195 self._lock.release() 196 return False 197 return True
198
199 - def _base_release_save(self):
200 self._lock.release()
201
202 - def _base_acquire_restore(self, _):
203 self._lock.acquire()
204
205 - def _check_owned(self):
206 """Raise an exception if the current thread doesn't own the lock. 207 208 """ 209 if not self._is_owned(): 210 raise RuntimeError("cannot work with un-aquired lock")
211
212 213 -class SingleNotifyPipeCondition(_BaseCondition):
214 """Condition which can only be notified once. 215 216 This condition class uses pipes and poll, internally, to be able to wait for 217 notification with a timeout, without resorting to polling. It is almost 218 compatible with Python's threading.Condition, with the following differences: 219 - notifyAll can only be called once, and no wait can happen after that 220 - notify is not supported, only notifyAll 221 222 """ 223 224 __slots__ = [ 225 "_poller", 226 "_read_fd", 227 "_write_fd", 228 "_nwaiters", 229 "_notified", 230 ] 231 232 _waiter_class = _SingleNotifyPipeConditionWaiter 233
234 - def __init__(self, lock):
235 """Constructor for SingleNotifyPipeCondition 236 237 """ 238 _BaseCondition.__init__(self, lock) 239 self._nwaiters = 0 240 self._notified = False 241 self._read_fd = None 242 self._write_fd = None 243 self._poller = None
244
245 - def _check_unnotified(self):
246 """Throws an exception if already notified. 247 248 """ 249 if self._notified: 250 raise RuntimeError("cannot use already notified condition")
251
252 - def _Cleanup(self):
253 """Cleanup open file descriptors, if any. 254 255 """ 256 if self._read_fd is not None: 257 os.close(self._read_fd) 258 self._read_fd = None 259 260 if self._write_fd is not None: 261 os.close(self._write_fd) 262 self._write_fd = None 263 self._poller = None
264
265 - def wait(self, timeout):
266 """Wait for a notification. 267 268 @type timeout: float or None 269 @param timeout: Waiting timeout (can be None) 270 271 """ 272 self._check_owned() 273 self._check_unnotified() 274 275 self._nwaiters += 1 276 try: 277 if self._poller is None: 278 (self._read_fd, self._write_fd) = os.pipe() 279 self._poller = select.poll() 280 self._poller.register(self._read_fd, select.POLLHUP) 281 282 wait_fn = self._waiter_class(self._poller, self._read_fd) 283 state = self._release_save() 284 try: 285 # Wait for notification 286 wait_fn(timeout) 287 finally: 288 # Re-acquire lock 289 self._acquire_restore(state) 290 finally: 291 self._nwaiters -= 1 292 if self._nwaiters == 0: 293 self._Cleanup()
294
295 - def notifyAll(self): # pylint: disable=C0103
296 """Close the writing side of the pipe to notify all waiters. 297 298 """ 299 self._check_owned() 300 self._check_unnotified() 301 self._notified = True 302 if self._write_fd is not None: 303 os.close(self._write_fd) 304 self._write_fd = None
305
306 307 -class PipeCondition(_BaseCondition):
308 """Group-only non-polling condition with counters. 309 310 This condition class uses pipes and poll, internally, to be able to wait for 311 notification with a timeout, without resorting to polling. It is almost 312 compatible with Python's threading.Condition, but only supports notifyAll and 313 non-recursive locks. As an additional features it's able to report whether 314 there are any waiting threads. 315 316 """ 317 __slots__ = [ 318 "_waiters", 319 "_single_condition", 320 ] 321 322 _single_condition_class = SingleNotifyPipeCondition 323
324 - def __init__(self, lock):
325 """Initializes this class. 326 327 """ 328 _BaseCondition.__init__(self, lock) 329 self._waiters = set() 330 self._single_condition = self._single_condition_class(self._lock)
331
332 - def wait(self, timeout):
333 """Wait for a notification. 334 335 @type timeout: float or None 336 @param timeout: Waiting timeout (can be None) 337 338 """ 339 self._check_owned() 340 341 # Keep local reference to the pipe. It could be replaced by another thread 342 # notifying while we're waiting. 343 cond = self._single_condition 344 345 self._waiters.add(threading.currentThread()) 346 try: 347 cond.wait(timeout) 348 finally: 349 self._check_owned() 350 self._waiters.remove(threading.currentThread())
351
352 - def notifyAll(self): # pylint: disable=C0103
353 """Notify all currently waiting threads. 354 355 """ 356 self._check_owned() 357 self._single_condition.notifyAll() 358 self._single_condition = self._single_condition_class(self._lock)
359
360 - def get_waiting(self):
361 """Returns a list of all waiting threads. 362 363 """ 364 self._check_owned() 365 366 return self._waiters
367
368 - def has_waiting(self):
369 """Returns whether there are active waiters. 370 371 """ 372 self._check_owned() 373 374 return bool(self._waiters)
375
376 - def __repr__(self):
377 return ("<%s.%s waiters=%s at %#x>" % 378 (self.__class__.__module__, self.__class__.__name__, 379 self._waiters, id(self)))
380
381 382 -class _PipeConditionWithMode(PipeCondition):
383 __slots__ = [ 384 "shared", 385 ] 386
387 - def __init__(self, lock, shared):
388 """Initializes this class. 389 390 """ 391 self.shared = shared 392 PipeCondition.__init__(self, lock)
393
394 395 -class SharedLock(object):
396 """Implements a shared lock. 397 398 Multiple threads can acquire the lock in a shared way by calling 399 C{acquire(shared=1)}. In order to acquire the lock in an exclusive way 400 threads can call C{acquire(shared=0)}. 401 402 Notes on data structures: C{__pending} contains a priority queue (heapq) of 403 all pending acquires: C{[(priority1: prioqueue1), (priority2: prioqueue2), 404 ...]}. Each per-priority queue contains a normal in-order list of conditions 405 to be notified when the lock can be acquired. Shared locks are grouped 406 together by priority and the condition for them is stored in 407 C{__pending_shared} if it already exists. C{__pending_by_prio} keeps 408 references for the per-priority queues indexed by priority for faster access. 409 410 @type name: string 411 @ivar name: the name of the lock 412 413 """ 414 __slots__ = [ 415 "__weakref__", 416 "__deleted", 417 "__exc", 418 "__lock", 419 "__pending", 420 "__pending_by_prio", 421 "__pending_shared", 422 "__shr", 423 "__time_fn", 424 "name", 425 ] 426 427 __condition_class = _PipeConditionWithMode 428
429 - def __init__(self, name, monitor=None, _time_fn=time.time):
430 """Construct a new SharedLock. 431 432 @param name: the name of the lock 433 @type monitor: L{LockMonitor} 434 @param monitor: Lock monitor with which to register 435 436 """ 437 object.__init__(self) 438 439 self.name = name 440 441 # Used for unittesting 442 self.__time_fn = _time_fn 443 444 # Internal lock 445 self.__lock = threading.Lock() 446 447 # Queue containing waiting acquires 448 self.__pending = [] 449 self.__pending_by_prio = {} 450 self.__pending_shared = {} 451 452 # Current lock holders 453 self.__shr = set() 454 self.__exc = None 455 456 # is this lock in the deleted state? 457 self.__deleted = False 458 459 # Register with lock monitor 460 if monitor: 461 logging.debug("Adding lock %s to monitor", name) 462 monitor.RegisterLock(self)
463
464 - def __repr__(self):
465 return ("<%s.%s name=%s at %#x>" % 466 (self.__class__.__module__, self.__class__.__name__, 467 self.name, id(self)))
468
469 - def GetLockInfo(self, requested):
470 """Retrieves information for querying locks. 471 472 @type requested: set 473 @param requested: Requested information, see C{query.LQ_*} 474 475 """ 476 self.__lock.acquire() 477 try: 478 # Note: to avoid unintentional race conditions, no references to 479 # modifiable objects should be returned unless they were created in this 480 # function. 481 mode = None 482 owner_names = None 483 484 if query.LQ_MODE in requested: 485 if self.__deleted: 486 mode = _DELETED_TEXT 487 assert not (self.__exc or self.__shr) 488 elif self.__exc: 489 mode = _EXCLUSIVE_TEXT 490 elif self.__shr: 491 mode = _SHARED_TEXT 492 493 # Current owner(s) are wanted 494 if query.LQ_OWNER in requested: 495 if self.__exc: 496 owner = [self.__exc] 497 else: 498 owner = self.__shr 499 500 if owner: 501 assert not self.__deleted 502 owner_names = [i.getName() for i in owner] 503 504 # Pending acquires are wanted 505 if query.LQ_PENDING in requested: 506 pending = [] 507 508 # Sorting instead of copying and using heaq functions for simplicity 509 for (_, prioqueue) in sorted(self.__pending): 510 for cond in prioqueue: 511 if cond.shared: 512 pendmode = _SHARED_TEXT 513 else: 514 pendmode = _EXCLUSIVE_TEXT 515 516 # List of names will be sorted in L{query._GetLockPending} 517 pending.append((pendmode, [i.getName() 518 for i in cond.get_waiting()])) 519 else: 520 pending = None 521 522 return [(self.name, mode, owner_names, pending)] 523 finally: 524 self.__lock.release()
525
526 - def __check_deleted(self):
527 """Raises an exception if the lock has been deleted. 528 529 """ 530 if self.__deleted: 531 raise errors.LockError("Deleted lock %s" % self.name)
532
533 - def __is_sharer(self):
534 """Is the current thread sharing the lock at this time? 535 536 """ 537 return threading.currentThread() in self.__shr
538
539 - def __is_exclusive(self):
540 """Is the current thread holding the lock exclusively at this time? 541 542 """ 543 return threading.currentThread() == self.__exc
544
545 - def __is_owned(self, shared=-1):
546 """Is the current thread somehow owning the lock at this time? 547 548 This is a private version of the function, which presumes you're holding 549 the internal lock. 550 551 """ 552 if shared < 0: 553 return self.__is_sharer() or self.__is_exclusive() 554 elif shared: 555 return self.__is_sharer() 556 else: 557 return self.__is_exclusive()
558
559 - def is_owned(self, shared=-1):
560 """Is the current thread somehow owning the lock at this time? 561 562 @param shared: 563 - < 0: check for any type of ownership (default) 564 - 0: check for exclusive ownership 565 - > 0: check for shared ownership 566 567 """ 568 self.__lock.acquire() 569 try: 570 return self.__is_owned(shared=shared) 571 finally: 572 self.__lock.release()
573 574 #: Necessary to remain compatible with threading.Condition, which tries to 575 #: retrieve a locks' "_is_owned" attribute 576 _is_owned = is_owned 577
578 - def _count_pending(self):
579 """Returns the number of pending acquires. 580 581 @rtype: int 582 583 """ 584 self.__lock.acquire() 585 try: 586 return sum(len(prioqueue) for (_, prioqueue) in self.__pending) 587 finally: 588 self.__lock.release()
589
590 - def _check_empty(self):
591 """Checks whether there are any pending acquires. 592 593 @rtype: bool 594 595 """ 596 self.__lock.acquire() 597 try: 598 # Order is important: __find_first_pending_queue modifies __pending 599 (_, prioqueue) = self.__find_first_pending_queue() 600 601 return not (prioqueue or 602 self.__pending or 603 self.__pending_by_prio or 604 self.__pending_shared) 605 finally: 606 self.__lock.release()
607
608 - def __do_acquire(self, shared):
609 """Actually acquire the lock. 610 611 """ 612 if shared: 613 self.__shr.add(threading.currentThread()) 614 else: 615 self.__exc = threading.currentThread()
616
617 - def __can_acquire(self, shared):
618 """Determine whether lock can be acquired. 619 620 """ 621 if shared: 622 return self.__exc is None 623 else: 624 return len(self.__shr) == 0 and self.__exc is None
625
626 - def __find_first_pending_queue(self):
627 """Tries to find the topmost queued entry with pending acquires. 628 629 Removes empty entries while going through the list. 630 631 """ 632 while self.__pending: 633 (priority, prioqueue) = self.__pending[0] 634 635 if prioqueue: 636 return (priority, prioqueue) 637 638 # Remove empty queue 639 heapq.heappop(self.__pending) 640 del self.__pending_by_prio[priority] 641 assert priority not in self.__pending_shared 642 643 return (None, None)
644
645 - def __is_on_top(self, cond):
646 """Checks whether the passed condition is on top of the queue. 647 648 The caller must make sure the queue isn't empty. 649 650 """ 651 (_, prioqueue) = self.__find_first_pending_queue() 652 653 return cond == prioqueue[0]
654
655 - def __acquire_unlocked(self, shared, timeout, priority):
656 """Acquire a shared lock. 657 658 @param shared: whether to acquire in shared mode; by default an 659 exclusive lock will be acquired 660 @param timeout: maximum waiting time before giving up 661 @type priority: integer 662 @param priority: Priority for acquiring lock 663 664 """ 665 self.__check_deleted() 666 667 # We cannot acquire the lock if we already have it 668 assert not self.__is_owned(), ("double acquire() on a non-recursive lock" 669 " %s" % self.name) 670 671 # Remove empty entries from queue 672 self.__find_first_pending_queue() 673 674 # Check whether someone else holds the lock or there are pending acquires. 675 if not self.__pending and self.__can_acquire(shared): 676 # Apparently not, can acquire lock directly. 677 self.__do_acquire(shared) 678 return True 679 680 # The lock couldn't be acquired right away, so if a timeout is given and is 681 # considered too short, return right away as scheduling a pending 682 # acquisition is quite expensive 683 if timeout is not None and timeout < _LOCK_ACQUIRE_MIN_TIMEOUT: 684 return False 685 686 prioqueue = self.__pending_by_prio.get(priority, None) 687 688 if shared: 689 # Try to re-use condition for shared acquire 690 wait_condition = self.__pending_shared.get(priority, None) 691 assert (wait_condition is None or 692 (wait_condition.shared and wait_condition in prioqueue)) 693 else: 694 wait_condition = None 695 696 if wait_condition is None: 697 if prioqueue is None: 698 assert priority not in self.__pending_by_prio 699 700 prioqueue = [] 701 heapq.heappush(self.__pending, (priority, prioqueue)) 702 self.__pending_by_prio[priority] = prioqueue 703 704 wait_condition = self.__condition_class(self.__lock, shared) 705 prioqueue.append(wait_condition) 706 707 if shared: 708 # Keep reference for further shared acquires on same priority. This is 709 # better than trying to find it in the list of pending acquires. 710 assert priority not in self.__pending_shared 711 self.__pending_shared[priority] = wait_condition 712 713 wait_start = self.__time_fn() 714 acquired = False 715 716 try: 717 # Wait until we become the topmost acquire in the queue or the timeout 718 # expires. 719 while True: 720 if self.__is_on_top(wait_condition) and self.__can_acquire(shared): 721 self.__do_acquire(shared) 722 acquired = True 723 break 724 725 # A lot of code assumes blocking acquires always succeed, therefore we 726 # can never return False for a blocking acquire 727 if (timeout is not None and 728 utils.TimeoutExpired(wait_start, timeout, _time_fn=self.__time_fn)): 729 break 730 731 # Wait for notification 732 wait_condition.wait(timeout) 733 self.__check_deleted() 734 finally: 735 # Remove condition from queue if there are no more waiters 736 if not wait_condition.has_waiting(): 737 prioqueue.remove(wait_condition) 738 if wait_condition.shared: 739 # Remove from list of shared acquires if it wasn't while releasing 740 # (e.g. on lock deletion) 741 self.__pending_shared.pop(priority, None) 742 743 return acquired
744
745 - def acquire(self, shared=0, timeout=None, priority=None, 746 test_notify=None):
747 """Acquire a shared lock. 748 749 @type shared: integer (0/1) used as a boolean 750 @param shared: whether to acquire in shared mode; by default an 751 exclusive lock will be acquired 752 @type timeout: float 753 @param timeout: maximum waiting time before giving up 754 @type priority: integer 755 @param priority: Priority for acquiring lock 756 @type test_notify: callable or None 757 @param test_notify: Special callback function for unittesting 758 759 """ 760 if priority is None: 761 priority = _DEFAULT_PRIORITY 762 763 self.__lock.acquire() 764 try: 765 # We already got the lock, notify now 766 if __debug__ and callable(test_notify): 767 test_notify() 768 769 return self.__acquire_unlocked(shared, timeout, priority) 770 finally: 771 self.__lock.release()
772
773 - def downgrade(self):
774 """Changes the lock mode from exclusive to shared. 775 776 Pending acquires in shared mode on the same priority will go ahead. 777 778 """ 779 self.__lock.acquire() 780 try: 781 assert self.__is_owned(), "Lock must be owned" 782 783 if self.__is_exclusive(): 784 # Do nothing if the lock is already acquired in shared mode 785 self.__exc = None 786 self.__do_acquire(1) 787 788 # Important: pending shared acquires should only jump ahead if there 789 # was a transition from exclusive to shared, otherwise an owner of a 790 # shared lock can keep calling this function to push incoming shared 791 # acquires 792 (priority, prioqueue) = self.__find_first_pending_queue() 793 if prioqueue: 794 # Is there a pending shared acquire on this priority? 795 cond = self.__pending_shared.pop(priority, None) 796 if cond: 797 assert cond.shared 798 assert cond in prioqueue 799 800 # Ensure shared acquire is on top of queue 801 if len(prioqueue) > 1: 802 prioqueue.remove(cond) 803 prioqueue.insert(0, cond) 804 805 # Notify 806 cond.notifyAll() 807 808 assert not self.__is_exclusive() 809 assert self.__is_sharer() 810 811 return True 812 finally: 813 self.__lock.release()
814
815 - def release(self):
816 """Release a Shared Lock. 817 818 You must have acquired the lock, either in shared or in exclusive mode, 819 before calling this function. 820 821 """ 822 self.__lock.acquire() 823 try: 824 assert self.__is_exclusive() or self.__is_sharer(), \ 825 "Cannot release non-owned lock" 826 827 # Autodetect release type 828 if self.__is_exclusive(): 829 self.__exc = None 830 notify = True 831 else: 832 self.__shr.remove(threading.currentThread()) 833 notify = not self.__shr 834 835 # Notify topmost condition in queue if there are no owners left (for 836 # shared locks) 837 if notify: 838 self.__notify_topmost() 839 finally: 840 self.__lock.release()
841
842 - def __notify_topmost(self):
843 """Notifies topmost condition in queue of pending acquires. 844 845 """ 846 (priority, prioqueue) = self.__find_first_pending_queue() 847 if prioqueue: 848 cond = prioqueue[0] 849 cond.notifyAll() 850 if cond.shared: 851 # Prevent further shared acquires from sneaking in while waiters are 852 # notified 853 self.__pending_shared.pop(priority, None)
854
855 - def _notify_topmost(self):
856 """Exported version of L{__notify_topmost}. 857 858 """ 859 self.__lock.acquire() 860 try: 861 return self.__notify_topmost() 862 finally: 863 self.__lock.release()
864
865 - def delete(self, timeout=None, priority=None):
866 """Delete a Shared Lock. 867 868 This operation will declare the lock for removal. First the lock will be 869 acquired in exclusive mode if you don't already own it, then the lock 870 will be put in a state where any future and pending acquire() fail. 871 872 @type timeout: float 873 @param timeout: maximum waiting time before giving up 874 @type priority: integer 875 @param priority: Priority for acquiring lock 876 877 """ 878 if priority is None: 879 priority = _DEFAULT_PRIORITY 880 881 self.__lock.acquire() 882 try: 883 assert not self.__is_sharer(), "Cannot delete() a lock while sharing it" 884 885 self.__check_deleted() 886 887 # The caller is allowed to hold the lock exclusively already. 888 acquired = self.__is_exclusive() 889 890 if not acquired: 891 acquired = self.__acquire_unlocked(0, timeout, priority) 892 893 if acquired: 894 assert self.__is_exclusive() and not self.__is_sharer(), \ 895 "Lock wasn't acquired in exclusive mode" 896 897 self.__deleted = True 898 self.__exc = None 899 900 assert not (self.__exc or self.__shr), "Found owner during deletion" 901 902 # Notify all acquires. They'll throw an error. 903 for (_, prioqueue) in self.__pending: 904 for cond in prioqueue: 905 cond.notifyAll() 906 907 assert self.__deleted 908 909 return acquired 910 finally: 911 self.__lock.release()
912
913 - def _release_save(self):
914 shared = self.__is_sharer() 915 self.release() 916 return shared
917
918 - def _acquire_restore(self, shared):
919 self.acquire(shared=shared)
920 921 922 # Whenever we want to acquire a full LockSet we pass None as the value 923 # to acquire. Hide this behind this nicely named constant. 924 ALL_SET = None
925 926 927 -def _TimeoutZero():
928 """Returns the number zero. 929 930 """ 931 return 0
932
933 934 -def _GetLsAcquireModeAndTimeouts(want_all, timeout, opportunistic):
935 """Determines modes and timeouts for L{LockSet.acquire}. 936 937 @type want_all: boolean 938 @param want_all: Whether all locks in set should be acquired 939 @param timeout: Timeout in seconds or C{None} 940 @param opportunistic: Whther locks should be acquired opportunistically 941 @rtype: tuple 942 @return: Tuple containing mode to be passed to L{LockSet.__acquire_inner} 943 (one of L{_LS_ACQUIRE_MODES}), a function to calculate timeout for 944 acquiring the lockset-internal lock (might be C{None}) and a function to 945 calculate the timeout for acquiring individual locks 946 947 """ 948 # Short circuit when no running timeout is needed 949 if opportunistic and not want_all: 950 assert timeout is None, "Got timeout for an opportunistic acquisition" 951 return (_LS_ACQUIRE_OPPORTUNISTIC, None, _TimeoutZero) 952 953 # We need to keep track of how long we spent waiting for a lock. The 954 # timeout passed to this function is over all lock acquisitions. 955 running_timeout = utils.RunningTimeout(timeout, False) 956 957 if want_all: 958 mode = _LS_ACQUIRE_ALL 959 ls_timeout_fn = running_timeout.Remaining 960 else: 961 mode = _LS_ACQUIRE_EXACT 962 ls_timeout_fn = None 963 964 if opportunistic: 965 mode = _LS_ACQUIRE_OPPORTUNISTIC 966 timeout_fn = _TimeoutZero 967 else: 968 timeout_fn = running_timeout.Remaining 969 970 return (mode, ls_timeout_fn, timeout_fn)
971
972 973 -class _AcquireTimeout(Exception):
974 """Internal exception to abort an acquire on a timeout. 975 976 """
977
978 979 -class LockSet:
980 """Implements a set of locks. 981 982 This abstraction implements a set of shared locks for the same resource type, 983 distinguished by name. The user can lock a subset of the resources and the 984 LockSet will take care of acquiring the locks always in the same order, thus 985 preventing deadlock. 986 987 All the locks needed in the same set must be acquired together, though. 988 989 @type name: string 990 @ivar name: the name of the lockset 991 992 """
993 - def __init__(self, members, name, monitor=None):
994 """Constructs a new LockSet. 995 996 @type members: list of strings 997 @param members: initial members of the set 998 @type monitor: L{LockMonitor} 999 @param monitor: Lock monitor with which to register member locks 1000 1001 """ 1002 assert members is not None, "members parameter is not a list" 1003 self.name = name 1004 1005 # Lock monitor 1006 self.__monitor = monitor 1007 1008 # Used internally to guarantee coherency 1009 self.__lock = SharedLock(self._GetLockName("[lockset]"), monitor=monitor) 1010 1011 # The lockdict indexes the relationship name -> lock 1012 # The order-of-locking is implied by the alphabetical order of names 1013 self.__lockdict = {} 1014 1015 for mname in members: 1016 self.__lockdict[mname] = SharedLock(self._GetLockName(mname), 1017 monitor=monitor) 1018 1019 # The owner dict contains the set of locks each thread owns. For 1020 # performance each thread can access its own key without a global lock on 1021 # this structure. It is paramount though that *no* other type of access is 1022 # done to this structure (eg. no looping over its keys). *_owner helper 1023 # function are defined to guarantee access is correct, but in general never 1024 # do anything different than __owners[threading.currentThread()], or there 1025 # will be trouble. 1026 self.__owners = {}
1027
1028 - def _GetLockName(self, mname):
1029 """Returns the name for a member lock. 1030 1031 """ 1032 return "%s/%s" % (self.name, mname)
1033
1034 - def _get_lock(self):
1035 """Returns the lockset-internal lock. 1036 1037 """ 1038 return self.__lock
1039
1040 - def _get_lockdict(self):
1041 """Returns the lockset-internal lock dictionary. 1042 1043 Accessing this structure is only safe in single-thread usage or when the 1044 lockset-internal lock is held. 1045 1046 """ 1047 return self.__lockdict
1048
1049 - def is_owned(self):
1050 """Is the current thread a current level owner? 1051 1052 @note: Use L{check_owned} to check if a specific lock is held 1053 1054 """ 1055 return threading.currentThread() in self.__owners
1056
1057 - def check_owned(self, names, shared=-1):
1058 """Check if locks are owned in a specific mode. 1059 1060 @type names: sequence or string 1061 @param names: Lock names (or a single lock name) 1062 @param shared: See L{SharedLock.is_owned} 1063 @rtype: bool 1064 @note: Use L{is_owned} to check if the current thread holds I{any} lock and 1065 L{list_owned} to get the names of all owned locks 1066 1067 """ 1068 if isinstance(names, basestring): 1069 names = [names] 1070 1071 # Avoid check if no locks are owned anyway 1072 if names and self.is_owned(): 1073 candidates = [] 1074 1075 # Gather references to all locks (in case they're deleted in the meantime) 1076 for lname in names: 1077 try: 1078 lock = self.__lockdict[lname] 1079 except KeyError: 1080 raise errors.LockError("Non-existing lock '%s' in set '%s' (it may" 1081 " have been removed)" % (lname, self.name)) 1082 else: 1083 candidates.append(lock) 1084 1085 return compat.all(lock.is_owned(shared=shared) for lock in candidates) 1086 else: 1087 return False
1088
1089 - def owning_all(self):
1090 """Checks whether current thread owns internal lock. 1091 1092 Holding the internal lock is equivalent with holding all locks in the set 1093 (the opposite does not necessarily hold as it can not be easily 1094 determined). L{add} and L{remove} require the internal lock. 1095 1096 @rtype: boolean 1097 1098 """ 1099 return self.__lock.is_owned()
1100
1101 - def _add_owned(self, name=None):
1102 """Note the current thread owns the given lock""" 1103 if name is None: 1104 if not self.is_owned(): 1105 self.__owners[threading.currentThread()] = set() 1106 else: 1107 if self.is_owned(): 1108 self.__owners[threading.currentThread()].add(name) 1109 else: 1110 self.__owners[threading.currentThread()] = set([name])
1111
1112 - def _del_owned(self, name=None):
1113 """Note the current thread owns the given lock""" 1114 1115 assert not (name is None and self.__lock.is_owned()), \ 1116 "Cannot hold internal lock when deleting owner status" 1117 1118 if name is not None: 1119 self.__owners[threading.currentThread()].remove(name) 1120 1121 # Only remove the key if we don't hold the set-lock as well 1122 if not (self.__lock.is_owned() or 1123 self.__owners[threading.currentThread()]): 1124 del self.__owners[threading.currentThread()]
1125
1126 - def list_owned(self):
1127 """Get the set of resource names owned by the current thread""" 1128 if self.is_owned(): 1129 return self.__owners[threading.currentThread()].copy() 1130 else: 1131 return set()
1132
1133 - def _release_and_delete_owned(self):
1134 """Release and delete all resources owned by the current thread""" 1135 for lname in self.list_owned(): 1136 lock = self.__lockdict[lname] 1137 if lock.is_owned(): 1138 lock.release() 1139 self._del_owned(name=lname)
1140
1141 - def __names(self):
1142 """Return the current set of names. 1143 1144 Only call this function while holding __lock and don't iterate on the 1145 result after releasing the lock. 1146 1147 """ 1148 return self.__lockdict.keys()
1149
1150 - def _names(self):
1151 """Return a copy of the current set of elements. 1152 1153 Used only for debugging purposes. 1154 1155 """ 1156 # If we don't already own the set-level lock acquired 1157 # we'll get it and note we need to release it later. 1158 release_lock = False 1159 if not self.__lock.is_owned(): 1160 release_lock = True 1161 self.__lock.acquire(shared=1) 1162 try: 1163 result = self.__names() 1164 finally: 1165 if release_lock: 1166 self.__lock.release() 1167 return set(result)
1168
1169 - def acquire(self, names, timeout=None, shared=0, priority=None, 1170 opportunistic=False, test_notify=None):
1171 """Acquire a set of resource locks. 1172 1173 @note: When acquiring locks opportunistically, any number of locks might 1174 actually be acquired, even zero. 1175 1176 @type names: list of strings (or string) 1177 @param names: the names of the locks which shall be acquired 1178 (special lock names, or instance/node names) 1179 @type shared: integer (0/1) used as a boolean 1180 @param shared: whether to acquire in shared mode; by default an 1181 exclusive lock will be acquired 1182 @type timeout: float or None 1183 @param timeout: Maximum time to acquire all locks; for opportunistic 1184 acquisitions, a timeout can only be given when C{names} is C{None}, in 1185 which case it is exclusively used for acquiring the L{LockSet}-internal 1186 lock; opportunistic acquisitions don't use a timeout for acquiring 1187 individual locks 1188 @type priority: integer 1189 @param priority: Priority for acquiring locks 1190 @type opportunistic: boolean 1191 @param opportunistic: Acquire locks opportunistically; use the return value 1192 to determine which locks were actually acquired 1193 @type test_notify: callable or None 1194 @param test_notify: Special callback function for unittesting 1195 1196 @return: Set of all locks successfully acquired or None in case of timeout 1197 1198 @raise errors.LockError: when any lock we try to acquire has 1199 been deleted before we succeed. In this case none of the 1200 locks requested will be acquired. 1201 1202 """ 1203 assert timeout is None or timeout >= 0.0 1204 1205 # Check we don't already own locks at this level 1206 assert not self.is_owned(), ("Cannot acquire locks in the same set twice" 1207 " (lockset %s)" % self.name) 1208 1209 if priority is None: 1210 priority = _DEFAULT_PRIORITY 1211 1212 try: 1213 if names is not None: 1214 assert timeout is None or not opportunistic, \ 1215 ("Opportunistic acquisitions can only use a timeout if no" 1216 " names are given; see docstring for details") 1217 1218 # Support passing in a single resource to acquire rather than many 1219 if isinstance(names, basestring): 1220 names = [names] 1221 1222 (mode, _, timeout_fn) = \ 1223 _GetLsAcquireModeAndTimeouts(False, timeout, opportunistic) 1224 1225 return self.__acquire_inner(names, mode, shared, priority, 1226 timeout_fn, test_notify) 1227 1228 else: 1229 (mode, ls_timeout_fn, timeout_fn) = \ 1230 _GetLsAcquireModeAndTimeouts(True, timeout, opportunistic) 1231 1232 # If no names are given acquire the whole set by not letting new names 1233 # being added before we release, and getting the current list of names. 1234 # Some of them may then be deleted later, but we'll cope with this. 1235 # 1236 # We'd like to acquire this lock in a shared way, as it's nice if 1237 # everybody else can use the instances at the same time. If we are 1238 # acquiring them exclusively though they won't be able to do this 1239 # anyway, though, so we'll get the list lock exclusively as well in 1240 # order to be able to do add() on the set while owning it. 1241 if not self.__lock.acquire(shared=shared, priority=priority, 1242 timeout=ls_timeout_fn()): 1243 raise _AcquireTimeout() 1244 1245 try: 1246 # note we own the set-lock 1247 self._add_owned() 1248 1249 return self.__acquire_inner(self.__names(), mode, shared, 1250 priority, timeout_fn, test_notify) 1251 except: 1252 # We shouldn't have problems adding the lock to the owners list, but 1253 # if we did we'll try to release this lock and re-raise exception. 1254 # Of course something is going to be really wrong, after this. 1255 self.__lock.release() 1256 self._del_owned() 1257 raise 1258 1259 except _AcquireTimeout: 1260 return None
1261
1262 - def __acquire_inner(self, names, mode, shared, priority, 1263 timeout_fn, test_notify):
1264 """Inner logic for acquiring a number of locks. 1265 1266 Acquisition modes: 1267 1268 - C{_LS_ACQUIRE_ALL}: C{names} contains names of all locks in set, but 1269 deleted locks can be ignored as the whole set is being acquired with 1270 its internal lock held 1271 - C{_LS_ACQUIRE_EXACT}: The names listed in C{names} must be acquired; 1272 timeouts and deleted locks are fatal 1273 - C{_LS_ACQUIRE_OPPORTUNISTIC}: C{names} lists names of locks (potentially 1274 all within the set) which should be acquired opportunistically, that is 1275 failures are ignored 1276 1277 @param names: Names of the locks to be acquired 1278 @param mode: Lock acquisition mode (one of L{_LS_ACQUIRE_MODES}) 1279 @param shared: Whether to acquire in shared mode 1280 @param timeout_fn: Function returning remaining timeout (C{None} for 1281 opportunistic acquisitions) 1282 @param priority: Priority for acquiring locks 1283 @param test_notify: Special callback function for unittesting 1284 1285 """ 1286 assert mode in _LS_ACQUIRE_MODES 1287 1288 acquire_list = [] 1289 1290 # First we look the locks up on __lockdict. We have no way of being sure 1291 # they will still be there after, but this makes it a lot faster should 1292 # just one of them be the already wrong. Using a sorted sequence to prevent 1293 # deadlocks. 1294 for lname in sorted(frozenset(names)): 1295 try: 1296 lock = self.__lockdict[lname] # raises KeyError if lock is not there 1297 except KeyError: 1298 # We are acquiring the whole set, it doesn't matter if this particular 1299 # element is not there anymore. If, however, only certain names should 1300 # be acquired, not finding a lock is an error. 1301 if mode == _LS_ACQUIRE_EXACT: 1302 raise errors.LockError("Lock '%s' not found in set '%s' (it may have" 1303 " been removed)" % (lname, self.name)) 1304 else: 1305 acquire_list.append((lname, lock)) 1306 1307 # This will hold the locknames we effectively acquired. 1308 acquired = set() 1309 1310 try: 1311 # Now acquire_list contains a sorted list of resources and locks we 1312 # want. In order to get them we loop on this (private) list and 1313 # acquire() them. We gave no real guarantee they will still exist till 1314 # this is done but .acquire() itself is safe and will alert us if the 1315 # lock gets deleted. 1316 for (lname, lock) in acquire_list: 1317 if __debug__ and callable(test_notify): 1318 test_notify_fn = lambda: test_notify(lname) 1319 else: 1320 test_notify_fn = None 1321 1322 timeout = timeout_fn() 1323 1324 try: 1325 # raises LockError if the lock was deleted 1326 acq_success = lock.acquire(shared=shared, timeout=timeout, 1327 priority=priority, 1328 test_notify=test_notify_fn) 1329 except errors.LockError: 1330 if mode in (_LS_ACQUIRE_ALL, _LS_ACQUIRE_OPPORTUNISTIC): 1331 # We are acquiring the whole set, it doesn't matter if this 1332 # particular element is not there anymore. 1333 continue 1334 1335 raise errors.LockError("Lock '%s' not found in set '%s' (it may have" 1336 " been removed)" % (lname, self.name)) 1337 1338 if not acq_success: 1339 # Couldn't get lock or timeout occurred 1340 if mode == _LS_ACQUIRE_OPPORTUNISTIC: 1341 # Ignore timeouts on opportunistic acquisitions 1342 continue 1343 1344 if timeout is None: 1345 # This shouldn't happen as SharedLock.acquire(timeout=None) is 1346 # blocking. 1347 raise errors.LockError("Failed to get lock %s (set %s)" % 1348 (lname, self.name)) 1349 1350 raise _AcquireTimeout() 1351 1352 try: 1353 # now the lock cannot be deleted, we have it! 1354 self._add_owned(name=lname) 1355 acquired.add(lname) 1356 1357 except: 1358 # We shouldn't have problems adding the lock to the owners list, but 1359 # if we did we'll try to release this lock and re-raise exception. 1360 # Of course something is going to be really wrong after this. 1361 if lock.is_owned(): 1362 lock.release() 1363 raise 1364 1365 except: 1366 # Release all owned locks 1367 self._release_and_delete_owned() 1368 raise 1369 1370 return acquired
1371
1372 - def downgrade(self, names=None):
1373 """Downgrade a set of resource locks from exclusive to shared mode. 1374 1375 The locks must have been acquired in exclusive mode. 1376 1377 """ 1378 assert self.is_owned(), ("downgrade on lockset %s while not owning any" 1379 " lock" % self.name) 1380 1381 # Support passing in a single resource to downgrade rather than many 1382 if isinstance(names, basestring): 1383 names = [names] 1384 1385 owned = self.list_owned() 1386 1387 if names is None: 1388 names = owned 1389 else: 1390 names = set(names) 1391 assert owned.issuperset(names), \ 1392 ("downgrade() on unheld resources %s (set %s)" % 1393 (names.difference(owned), self.name)) 1394 1395 for lockname in names: 1396 self.__lockdict[lockname].downgrade() 1397 1398 # Do we own the lockset in exclusive mode? 1399 if self.__lock.is_owned(shared=0): 1400 # Have all locks been downgraded? 1401 if not compat.any(lock.is_owned(shared=0) 1402 for lock in self.__lockdict.values()): 1403 self.__lock.downgrade() 1404 assert self.__lock.is_owned(shared=1) 1405 1406 return True
1407
1408 - def release(self, names=None):
1409 """Release a set of resource locks, at the same level. 1410 1411 You must have acquired the locks, either in shared or in exclusive mode, 1412 before releasing them. 1413 1414 @type names: list of strings, or None 1415 @param names: the names of the locks which shall be released 1416 (defaults to all the locks acquired at that level). 1417 1418 """ 1419 assert self.is_owned(), ("release() on lock set %s while not owner" % 1420 self.name) 1421 1422 # Support passing in a single resource to release rather than many 1423 if isinstance(names, basestring): 1424 names = [names] 1425 1426 if names is None: 1427 names = self.list_owned() 1428 else: 1429 names = set(names) 1430 assert self.list_owned().issuperset(names), ( 1431 "release() on unheld resources %s (set %s)" % 1432 (names.difference(self.list_owned()), self.name)) 1433 1434 # First of all let's release the "all elements" lock, if set. 1435 # After this 'add' can work again 1436 if self.__lock.is_owned(): 1437 self.__lock.release() 1438 self._del_owned() 1439 1440 for lockname in names: 1441 # If we are sure the lock doesn't leave __lockdict without being 1442 # exclusively held we can do this... 1443 self.__lockdict[lockname].release() 1444 self._del_owned(name=lockname)
1445
1446 - def add(self, names, acquired=0, shared=0):
1447 """Add a new set of elements to the set 1448 1449 @type names: list of strings 1450 @param names: names of the new elements to add 1451 @type acquired: integer (0/1) used as a boolean 1452 @param acquired: pre-acquire the new resource? 1453 @type shared: integer (0/1) used as a boolean 1454 @param shared: is the pre-acquisition shared? 1455 1456 """ 1457 # Check we don't already own locks at this level 1458 assert not self.is_owned() or self.__lock.is_owned(shared=0), \ 1459 ("Cannot add locks if the set %s is only partially owned, or shared" % 1460 self.name) 1461 1462 # Support passing in a single resource to add rather than many 1463 if isinstance(names, basestring): 1464 names = [names] 1465 1466 # If we don't already own the set-level lock acquired in an exclusive way 1467 # we'll get it and note we need to release it later. 1468 release_lock = False 1469 if not self.__lock.is_owned(): 1470 release_lock = True 1471 self.__lock.acquire() 1472 1473 try: 1474 invalid_names = set(self.__names()).intersection(names) 1475 if invalid_names: 1476 # This must be an explicit raise, not an assert, because assert is 1477 # turned off when using optimization, and this can happen because of 1478 # concurrency even if the user doesn't want it. 1479 raise errors.LockError("duplicate add(%s) on lockset %s" % 1480 (invalid_names, self.name)) 1481 1482 for lockname in names: 1483 lock = SharedLock(self._GetLockName(lockname), monitor=self.__monitor) 1484 1485 if acquired: 1486 # No need for priority or timeout here as this lock has just been 1487 # created 1488 lock.acquire(shared=shared) 1489 # now the lock cannot be deleted, we have it! 1490 try: 1491 self._add_owned(name=lockname) 1492 except: 1493 # We shouldn't have problems adding the lock to the owners list, 1494 # but if we did we'll try to release this lock and re-raise 1495 # exception. Of course something is going to be really wrong, 1496 # after this. On the other hand the lock hasn't been added to the 1497 # __lockdict yet so no other threads should be pending on it. This 1498 # release is just a safety measure. 1499 lock.release() 1500 raise 1501 1502 self.__lockdict[lockname] = lock 1503 1504 finally: 1505 # Only release __lock if we were not holding it previously. 1506 if release_lock: 1507 self.__lock.release() 1508 1509 return True
1510
1511 - def remove(self, names):
1512 """Remove elements from the lock set. 1513 1514 You can either not hold anything in the lockset or already hold a superset 1515 of the elements you want to delete, exclusively. 1516 1517 @type names: list of strings 1518 @param names: names of the resource to remove. 1519 1520 @return: a list of locks which we removed; the list is always 1521 equal to the names list if we were holding all the locks 1522 exclusively 1523 1524 """ 1525 # Support passing in a single resource to remove rather than many 1526 if isinstance(names, basestring): 1527 names = [names] 1528 1529 # If we own any subset of this lock it must be a superset of what we want 1530 # to delete. The ownership must also be exclusive, but that will be checked 1531 # by the lock itself. 1532 assert not self.is_owned() or self.list_owned().issuperset(names), ( 1533 "remove() on acquired lockset %s while not owning all elements" % 1534 self.name) 1535 1536 removed = [] 1537 1538 for lname in names: 1539 # Calling delete() acquires the lock exclusively if we don't already own 1540 # it, and causes all pending and subsequent lock acquires to fail. It's 1541 # fine to call it out of order because delete() also implies release(), 1542 # and the assertion above guarantees that if we either already hold 1543 # everything we want to delete, or we hold none. 1544 try: 1545 self.__lockdict[lname].delete() 1546 removed.append(lname) 1547 except (KeyError, errors.LockError): 1548 # This cannot happen if we were already holding it, verify: 1549 assert not self.is_owned(), ("remove failed while holding lockset %s" % 1550 self.name) 1551 else: 1552 # If no LockError was raised we are the ones who deleted the lock. 1553 # This means we can safely remove it from lockdict, as any further or 1554 # pending delete() or acquire() will fail (and nobody can have the lock 1555 # since before our call to delete()). 1556 # 1557 # This is done in an else clause because if the exception was thrown 1558 # it's the job of the one who actually deleted it. 1559 del self.__lockdict[lname] 1560 # And let's remove it from our private list if we owned it. 1561 if self.is_owned(): 1562 self._del_owned(name=lname) 1563 1564 return removed
1565 1566 1567 # Locking levels, must be acquired in increasing order. Current rules are: 1568 # - At level LEVEL_CLUSTER resides the Big Ganeti Lock (BGL) which must be 1569 # acquired before performing any operation, either in shared or exclusive 1570 # mode. Acquiring the BGL in exclusive mode is discouraged and should be 1571 # avoided.. 1572 # - At levels LEVEL_NODE and LEVEL_INSTANCE reside node and instance locks. If 1573 # you need more than one node, or more than one instance, acquire them at the 1574 # same time. 1575 # - LEVEL_NODE_RES is for node resources and should be used by operations with 1576 # possibly high impact on the node's disks. 1577 # - LEVEL_NODE_ALLOC blocks instance allocations for the whole cluster 1578 # ("NAL" is the only lock at this level). It should be acquired in shared 1579 # mode when an opcode blocks all or a significant amount of a cluster's 1580 # locks. Opcodes doing instance allocations should acquire in exclusive mode. 1581 # Once the set of acquired locks for an opcode has been reduced to the working 1582 # set, the NAL should be released as well to allow allocations to proceed. 1583 (LEVEL_CLUSTER, 1584 LEVEL_INSTANCE, 1585 LEVEL_NODE_ALLOC, 1586 LEVEL_NODEGROUP, 1587 LEVEL_NODE, 1588 LEVEL_NODE_RES, 1589 LEVEL_NETWORK) = range(0, 7) 1590 1591 LEVELS = [ 1592 LEVEL_CLUSTER, 1593 LEVEL_INSTANCE, 1594 LEVEL_NODE_ALLOC, 1595 LEVEL_NODEGROUP, 1596 LEVEL_NODE, 1597 LEVEL_NODE_RES, 1598 LEVEL_NETWORK, 1599 ] 1600 1601 # Lock levels which are modifiable 1602 LEVELS_MOD = compat.UniqueFrozenset([ 1603 LEVEL_NODE_RES, 1604 LEVEL_NODE, 1605 LEVEL_NODEGROUP, 1606 LEVEL_INSTANCE, 1607 LEVEL_NETWORK, 1608 ]) 1609 1610 #: Lock level names (make sure to use singular form) 1611 LEVEL_NAMES = { 1612 LEVEL_CLUSTER: "cluster", 1613 LEVEL_INSTANCE: "instance", 1614 LEVEL_NODE_ALLOC: "node-alloc", 1615 LEVEL_NODEGROUP: "nodegroup", 1616 LEVEL_NODE: "node", 1617 LEVEL_NODE_RES: "node-res", 1618 LEVEL_NETWORK: "network", 1619 } 1620 1621 # Constant for the big ganeti lock 1622 BGL = "BGL" 1623 1624 #: Node allocation lock 1625 NAL = "NAL"
1626 1627 1628 -class GanetiLockManager:
1629 """The Ganeti Locking Library 1630 1631 The purpose of this small library is to manage locking for ganeti clusters 1632 in a central place, while at the same time doing dynamic checks against 1633 possible deadlocks. It will also make it easier to transition to a different 1634 lock type should we migrate away from python threads. 1635 1636 """ 1637 _instance = None 1638
1639 - def __init__(self, nodes, nodegroups, instances, networks):
1640 """Constructs a new GanetiLockManager object. 1641 1642 There should be only a GanetiLockManager object at any time, so this 1643 function raises an error if this is not the case. 1644 1645 @param nodes: list of node names 1646 @param nodegroups: list of nodegroup uuids 1647 @param instances: list of instance names 1648 1649 """ 1650 assert self.__class__._instance is None, \ 1651 "double GanetiLockManager instance" 1652 1653 self.__class__._instance = self 1654 1655 self._monitor = LockMonitor() 1656 1657 # The keyring contains all the locks, at their level and in the correct 1658 # locking order. 1659 self.__keyring = { 1660 LEVEL_CLUSTER: LockSet([BGL], "cluster", monitor=self._monitor), 1661 LEVEL_NODE: LockSet(nodes, "node", monitor=self._monitor), 1662 LEVEL_NODE_RES: LockSet(nodes, "node-res", monitor=self._monitor), 1663 LEVEL_NODEGROUP: LockSet(nodegroups, "nodegroup", monitor=self._monitor), 1664 LEVEL_INSTANCE: LockSet(instances, "instance", monitor=self._monitor), 1665 LEVEL_NETWORK: LockSet(networks, "network", monitor=self._monitor), 1666 LEVEL_NODE_ALLOC: LockSet([NAL], "node-alloc", monitor=self._monitor), 1667 } 1668 1669 assert compat.all(ls.name == LEVEL_NAMES[level] 1670 for (level, ls) in self.__keyring.items()), \ 1671 "Keyring name mismatch"
1672
1673 - def AddToLockMonitor(self, provider):
1674 """Registers a new lock with the monitor. 1675 1676 See L{LockMonitor.RegisterLock}. 1677 1678 """ 1679 return self._monitor.RegisterLock(provider)
1680
1681 - def QueryLocks(self, fields):
1682 """Queries information from all locks. 1683 1684 See L{LockMonitor.QueryLocks}. 1685 1686 """ 1687 return self._monitor.QueryLocks(fields)
1688
1689 - def _names(self, level):
1690 """List the lock names at the given level. 1691 1692 This can be used for debugging/testing purposes. 1693 1694 @param level: the level whose list of locks to get 1695 1696 """ 1697 assert level in LEVELS, "Invalid locking level %s" % level 1698 return self.__keyring[level]._names()
1699
1700 - def is_owned(self, level):
1701 """Check whether we are owning locks at the given level 1702 1703 """ 1704 return self.__keyring[level].is_owned()
1705
1706 - def list_owned(self, level):
1707 """Get the set of owned locks at the given level 1708 1709 """ 1710 return self.__keyring[level].list_owned()
1711
1712 - def check_owned(self, level, names, shared=-1):
1713 """Check if locks at a certain level are owned in a specific mode. 1714 1715 @see: L{LockSet.check_owned} 1716 1717 """ 1718 return self.__keyring[level].check_owned(names, shared=shared)
1719
1720 - def owning_all(self, level):
1721 """Checks whether current thread owns all locks at a certain level. 1722 1723 @see: L{LockSet.owning_all} 1724 1725 """ 1726 return self.__keyring[level].owning_all()
1727
1728 - def _upper_owned(self, level):
1729 """Check that we don't own any lock at a level greater than the given one. 1730 1731 """ 1732 # This way of checking only works if LEVELS[i] = i, which we check for in 1733 # the test cases. 1734 return compat.any((self.is_owned(l) for l in LEVELS[level + 1:]))
1735
1736 - def _BGL_owned(self): # pylint: disable=C0103
1737 """Check if the current thread owns the BGL. 1738 1739 Both an exclusive or a shared acquisition work. 1740 1741 """ 1742 return BGL in self.__keyring[LEVEL_CLUSTER].list_owned()
1743 1744 @staticmethod
1745 - def _contains_BGL(level, names): # pylint: disable=C0103
1746 """Check if the level contains the BGL. 1747 1748 Check if acting on the given level and set of names will change 1749 the status of the Big Ganeti Lock. 1750 1751 """ 1752 return level == LEVEL_CLUSTER and (names is None or BGL in names) 1753
1754 - def acquire(self, level, names, timeout=None, shared=0, priority=None, 1755 opportunistic=False):
1756 """Acquire a set of resource locks, at the same level. 1757 1758 @type level: member of locking.LEVELS 1759 @param level: the level at which the locks shall be acquired 1760 @type names: list of strings (or string) 1761 @param names: the names of the locks which shall be acquired 1762 (special lock names, or instance/node names) 1763 @type shared: integer (0/1) used as a boolean 1764 @param shared: whether to acquire in shared mode; by default 1765 an exclusive lock will be acquired 1766 @type timeout: float 1767 @param timeout: Maximum time to acquire all locks 1768 @type priority: integer 1769 @param priority: Priority for acquiring lock 1770 @type opportunistic: boolean 1771 @param opportunistic: Acquire locks opportunistically; use the return value 1772 to determine which locks were actually acquired 1773 1774 """ 1775 assert level in LEVELS, "Invalid locking level %s" % level 1776 1777 # Check that we are either acquiring the Big Ganeti Lock or we already own 1778 # it. Some "legacy" opcodes need to be sure they are run non-concurrently 1779 # so even if we've migrated we need to at least share the BGL to be 1780 # compatible with them. Of course if we own the BGL exclusively there's no 1781 # point in acquiring any other lock, unless perhaps we are half way through 1782 # the migration of the current opcode. 1783 assert (self._contains_BGL(level, names) or self._BGL_owned()), ( 1784 "You must own the Big Ganeti Lock before acquiring any other") 1785 1786 # Check we don't own locks at the same or upper levels. 1787 assert not self._upper_owned(level), ("Cannot acquire locks at a level" 1788 " while owning some at a greater one") 1789 1790 # Acquire the locks in the set. 1791 return self.__keyring[level].acquire(names, shared=shared, timeout=timeout, 1792 priority=priority, 1793 opportunistic=opportunistic)
1794
1795 - def downgrade(self, level, names=None):
1796 """Downgrade a set of resource locks from exclusive to shared mode. 1797 1798 You must have acquired the locks in exclusive mode. 1799 1800 @type level: member of locking.LEVELS 1801 @param level: the level at which the locks shall be downgraded 1802 @type names: list of strings, or None 1803 @param names: the names of the locks which shall be downgraded 1804 (defaults to all the locks acquired at the level) 1805 1806 """ 1807 assert level in LEVELS, "Invalid locking level %s" % level 1808 1809 return self.__keyring[level].downgrade(names=names)
1810
1811 - def release(self, level, names=None):
1812 """Release a set of resource locks, at the same level. 1813 1814 You must have acquired the locks, either in shared or in exclusive 1815 mode, before releasing them. 1816 1817 @type level: member of locking.LEVELS 1818 @param level: the level at which the locks shall be released 1819 @type names: list of strings, or None 1820 @param names: the names of the locks which shall be released 1821 (defaults to all the locks acquired at that level) 1822 1823 """ 1824 assert level in LEVELS, "Invalid locking level %s" % level 1825 assert (not self._contains_BGL(level, names) or 1826 not self._upper_owned(LEVEL_CLUSTER)), ( 1827 "Cannot release the Big Ganeti Lock while holding something" 1828 " at upper levels (%r)" % 1829 (utils.CommaJoin(["%s=%r" % (LEVEL_NAMES[i], self.list_owned(i)) 1830 for i in self.__keyring.keys()]), )) 1831 1832 # Release will complain if we don't own the locks already 1833 return self.__keyring[level].release(names)
1834
1835 - def add(self, level, names, acquired=0, shared=0):
1836 """Add locks at the specified level. 1837 1838 @type level: member of locking.LEVELS_MOD 1839 @param level: the level at which the locks shall be added 1840 @type names: list of strings 1841 @param names: names of the locks to acquire 1842 @type acquired: integer (0/1) used as a boolean 1843 @param acquired: whether to acquire the newly added locks 1844 @type shared: integer (0/1) used as a boolean 1845 @param shared: whether the acquisition will be shared 1846 1847 """ 1848 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level 1849 assert self._BGL_owned(), ("You must own the BGL before performing other" 1850 " operations") 1851 assert not self._upper_owned(level), ("Cannot add locks at a level" 1852 " while owning some at a greater one") 1853 return self.__keyring[level].add(names, acquired=acquired, shared=shared)
1854
1855 - def remove(self, level, names):
1856 """Remove locks from the specified level. 1857 1858 You must either already own the locks you are trying to remove 1859 exclusively or not own any lock at an upper level. 1860 1861 @type level: member of locking.LEVELS_MOD 1862 @param level: the level at which the locks shall be removed 1863 @type names: list of strings 1864 @param names: the names of the locks which shall be removed 1865 (special lock names, or instance/node names) 1866 1867 """ 1868 assert level in LEVELS_MOD, "Invalid or immutable level %s" % level 1869 assert self._BGL_owned(), ("You must own the BGL before performing other" 1870 " operations") 1871 # Check we either own the level or don't own anything from here 1872 # up. LockSet.remove() will check the case in which we don't own 1873 # all the needed resources, or we have a shared ownership. 1874 assert self.is_owned(level) or not self._upper_owned(level), ( 1875 "Cannot remove locks at a level while not owning it or" 1876 " owning some at a greater one") 1877 return self.__keyring[level].remove(names)
1878
1879 1880 -def _MonitorSortKey((item, idx, num)):
1881 """Sorting key function. 1882 1883 Sort by name, registration order and then order of information. This provides 1884 a stable sort order over different providers, even if they return the same 1885 name. 1886 1887 """ 1888 (name, _, _, _) = item 1889 1890 return (utils.NiceSortKey(name), num, idx)
1891
1892 1893 -class LockMonitor(object):
1894 _LOCK_ATTR = "_lock" 1895
1896 - def __init__(self):
1897 """Initializes this class. 1898 1899 """ 1900 self._lock = SharedLock("LockMonitor") 1901 1902 # Counter for stable sorting 1903 self._counter = itertools.count(0) 1904 1905 # Tracked locks. Weak references are used to avoid issues with circular 1906 # references and deletion. 1907 self._locks = weakref.WeakKeyDictionary()
1908 1909 @ssynchronized(_LOCK_ATTR)
1910 - def RegisterLock(self, provider):
1911 """Registers a new lock. 1912 1913 @param provider: Object with a callable method named C{GetLockInfo}, taking 1914 a single C{set} containing the requested information items 1915 @note: It would be nicer to only receive the function generating the 1916 requested information but, as it turns out, weak references to bound 1917 methods (e.g. C{self.GetLockInfo}) are tricky; there are several 1918 workarounds, but none of the ones I found works properly in combination 1919 with a standard C{WeakKeyDictionary} 1920 1921 """ 1922 assert provider not in self._locks, "Duplicate registration" 1923 1924 # There used to be a check for duplicate names here. As it turned out, when 1925 # a lock is re-created with the same name in a very short timeframe, the 1926 # previous instance might not yet be removed from the weakref dictionary. 1927 # By keeping track of the order of incoming registrations, a stable sort 1928 # ordering can still be guaranteed. 1929 1930 self._locks[provider] = self._counter.next()
1931
1932 - def _GetLockInfo(self, requested):
1933 """Get information from all locks. 1934 1935 """ 1936 # Must hold lock while getting consistent list of tracked items 1937 self._lock.acquire(shared=1) 1938 try: 1939 items = self._locks.items() 1940 finally: 1941 self._lock.release() 1942 1943 return [(info, idx, num) 1944 for (provider, num) in items 1945 for (idx, info) in enumerate(provider.GetLockInfo(requested))]
1946
1947 - def _Query(self, fields):
1948 """Queries information from all locks. 1949 1950 @type fields: list of strings 1951 @param fields: List of fields to return 1952 1953 """ 1954 qobj = query.Query(query.LOCK_FIELDS, fields) 1955 1956 # Get all data with internal lock held and then sort by name and incoming 1957 # order 1958 lockinfo = sorted(self._GetLockInfo(qobj.RequestedData()), 1959 key=_MonitorSortKey) 1960 1961 # Extract lock information and build query data 1962 return (qobj, query.LockQueryData(map(compat.fst, lockinfo)))
1963
1964 - def QueryLocks(self, fields):
1965 """Queries information from all locks. 1966 1967 @type fields: list of strings 1968 @param fields: List of fields to return 1969 1970 """ 1971 (qobj, ctx) = self._Query(fields) 1972 1973 # Prepare query response 1974 return query.GetQueryResponse(qobj, ctx)
1975