1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Configuration management for Ganeti
23
24 This module provides the interface to the Ganeti cluster configuration.
25
26 The configuration data is stored on every node but is updated on the master
27 only. After each update, the master distributes the data to the other nodes.
28
29 Currently, the data storage format is JSON. YAML was slow and consuming too
30 much memory.
31
32 """
33
34
35
36
37 import os
38 import random
39 import logging
40 import time
41 import itertools
42
43 from ganeti import errors
44 from ganeti import locking
45 from ganeti import utils
46 from ganeti import constants
47 from ganeti import rpc
48 from ganeti import objects
49 from ganeti import serializer
50 from ganeti import uidpool
51 from ganeti import netutils
52 from ganeti import runtime
53
54
55 _config_lock = locking.SharedLock("ConfigWriter")
56
57
58 _UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
72
75 """A temporary resource reservation manager.
76
77 This is used to reserve resources in a job, before using them, making sure
78 other jobs cannot get them in the meantime.
79
80 """
82 self._ec_reserved = {}
83
85 for holder_reserved in self._ec_reserved.values():
86 if resource in holder_reserved:
87 return True
88 return False
89
90 - def Reserve(self, ec_id, resource):
91 if self.Reserved(resource):
92 raise errors.ReservationError("Duplicate reservation for resource '%s'"
93 % str(resource))
94 if ec_id not in self._ec_reserved:
95 self._ec_reserved[ec_id] = set([resource])
96 else:
97 self._ec_reserved[ec_id].add(resource)
98
100 if ec_id in self._ec_reserved:
101 del self._ec_reserved[ec_id]
102
104 all_reserved = set()
105 for holder_reserved in self._ec_reserved.values():
106 all_reserved.update(holder_reserved)
107 return all_reserved
108
109 - def Generate(self, existing, generate_one_fn, ec_id):
110 """Generate a new resource of this type
111
112 """
113 assert callable(generate_one_fn)
114
115 all_elems = self.GetReserved()
116 all_elems.update(existing)
117 retries = 64
118 while retries > 0:
119 new_resource = generate_one_fn()
120 if new_resource is not None and new_resource not in all_elems:
121 break
122 else:
123 raise errors.ConfigurationError("Not able generate new resource"
124 " (last tried: %s)" % new_resource)
125 self.Reserve(ec_id, new_resource)
126 return new_resource
127
130 """Wrapper around L{utils.text.MatchNameComponent}.
131
132 """
133 return utils.MatchNameComponent(short_name, names, case_sensitive=False)
134
137 """The interface to the cluster configuration.
138
139 @ivar _temporary_lvs: reservation manager for temporary LVs
140 @ivar _all_rms: a list of all temporary reservation managers
141
142 """
169
170
171 @staticmethod
177
179 """Generate one mac address
180
181 """
182 prefix = self._config_data.cluster.mac_prefix
183 byte1 = random.randrange(0, 256)
184 byte2 = random.randrange(0, 256)
185 byte3 = random.randrange(0, 256)
186 mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
187 return mac
188
189 @locking.ssynchronized(_config_lock, shared=1)
191 """Get the node params populated with cluster defaults.
192
193 @type node: L{objects.Node}
194 @param node: The node we want to know the params for
195 @return: A dict with the filled in node params
196
197 """
198 nodegroup = self._UnlockedGetNodeGroup(node.group)
199 return self._config_data.cluster.FillND(node, nodegroup)
200
201 @locking.ssynchronized(_config_lock, shared=1)
203 """Generate a MAC for an instance.
204
205 This should check the current instances for duplicates.
206
207 """
208 existing = self._AllMACs()
209 return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
210
211 @locking.ssynchronized(_config_lock, shared=1)
213 """Reserve a MAC for an instance.
214
215 This only checks instances managed by this cluster, it does not
216 check for potential collisions elsewhere.
217
218 """
219 all_macs = self._AllMACs()
220 if mac in all_macs:
221 raise errors.ReservationError("mac already in use")
222 else:
223 self._temporary_macs.Reserve(ec_id, mac)
224
225 @locking.ssynchronized(_config_lock, shared=1)
227 """Reserve an VG/LV pair for an instance.
228
229 @type lv_name: string
230 @param lv_name: the logical volume name to reserve
231
232 """
233 all_lvs = self._AllLVs()
234 if lv_name in all_lvs:
235 raise errors.ReservationError("LV already in use")
236 else:
237 self._temporary_lvs.Reserve(ec_id, lv_name)
238
239 @locking.ssynchronized(_config_lock, shared=1)
249
251 """Compute the list of all LVs.
252
253 """
254 lvnames = set()
255 for instance in self._config_data.instances.values():
256 node_data = instance.MapLVsByNode()
257 for lv_list in node_data.values():
258 lvnames.update(lv_list)
259 return lvnames
260
261 - def _AllIDs(self, include_temporary):
262 """Compute the list of all UUIDs and names we have.
263
264 @type include_temporary: boolean
265 @param include_temporary: whether to include the _temporary_ids set
266 @rtype: set
267 @return: a set of IDs
268
269 """
270 existing = set()
271 if include_temporary:
272 existing.update(self._temporary_ids.GetReserved())
273 existing.update(self._AllLVs())
274 existing.update(self._config_data.instances.keys())
275 existing.update(self._config_data.nodes.keys())
276 existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
277 return existing
278
280 """Generate an unique UUID.
281
282 This checks the current node, instances and disk names for
283 duplicates.
284
285 @rtype: string
286 @return: the unique id
287
288 """
289 existing = self._AllIDs(include_temporary=False)
290 return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
291
292 @locking.ssynchronized(_config_lock, shared=1)
294 """Generate an unique ID.
295
296 This is just a wrapper over the unlocked version.
297
298 @type ec_id: string
299 @param ec_id: unique id for the job to reserve the id to
300
301 """
302 return self._GenerateUniqueID(ec_id)
303
305 """Return all MACs present in the config.
306
307 @rtype: list
308 @return: the list of all MACs
309
310 """
311 result = []
312 for instance in self._config_data.instances.values():
313 for nic in instance.nics:
314 result.append(nic.mac)
315
316 return result
317
319 """Return all DRBD secrets present in the config.
320
321 @rtype: list
322 @return: the list of all DRBD secrets
323
324 """
325 def helper(disk, result):
326 """Recursively gather secrets from this disk."""
327 if disk.dev_type == constants.DT_DRBD8:
328 result.append(disk.logical_id[5])
329 if disk.children:
330 for child in disk.children:
331 helper(child, result)
332
333 result = []
334 for instance in self._config_data.instances.values():
335 for disk in instance.disks:
336 helper(disk, result)
337
338 return result
339
341 """Compute duplicate disk IDs
342
343 @type disk: L{objects.Disk}
344 @param disk: the disk at which to start searching
345 @type l_ids: list
346 @param l_ids: list of current logical ids
347 @type p_ids: list
348 @param p_ids: list of current physical ids
349 @rtype: list
350 @return: a list of error messages
351
352 """
353 result = []
354 if disk.logical_id is not None:
355 if disk.logical_id in l_ids:
356 result.append("duplicate logical id %s" % str(disk.logical_id))
357 else:
358 l_ids.append(disk.logical_id)
359 if disk.physical_id is not None:
360 if disk.physical_id in p_ids:
361 result.append("duplicate physical id %s" % str(disk.physical_id))
362 else:
363 p_ids.append(disk.physical_id)
364
365 if disk.children:
366 for child in disk.children:
367 result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
368 return result
369
371 """Verify function.
372
373 @rtype: list
374 @return: a list of error messages; a non-empty list signifies
375 configuration errors
376
377 """
378
379 result = []
380 seen_macs = []
381 ports = {}
382 data = self._config_data
383 cluster = data.cluster
384 seen_lids = []
385 seen_pids = []
386
387
388 if not cluster.enabled_hypervisors:
389 result.append("enabled hypervisors list doesn't have any entries")
390 invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
391 if invalid_hvs:
392 result.append("enabled hypervisors contains invalid entries: %s" %
393 invalid_hvs)
394 missing_hvp = (set(cluster.enabled_hypervisors) -
395 set(cluster.hvparams.keys()))
396 if missing_hvp:
397 result.append("hypervisor parameters missing for the enabled"
398 " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
399
400 if cluster.master_node not in data.nodes:
401 result.append("cluster has invalid primary node '%s'" %
402 cluster.master_node)
403
404 def _helper(owner, attr, value, template):
405 try:
406 utils.ForceDictType(value, template)
407 except errors.GenericError, err:
408 result.append("%s has invalid %s: %s" % (owner, attr, err))
409
410 def _helper_nic(owner, params):
411 try:
412 objects.NIC.CheckParameterSyntax(params)
413 except errors.ConfigurationError, err:
414 result.append("%s has invalid nicparams: %s" % (owner, err))
415
416
417 _helper("cluster", "beparams", cluster.SimpleFillBE({}),
418 constants.BES_PARAMETER_TYPES)
419 _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
420 constants.NICS_PARAMETER_TYPES)
421 _helper_nic("cluster", cluster.SimpleFillNIC({}))
422 _helper("cluster", "ndparams", cluster.SimpleFillND({}),
423 constants.NDS_PARAMETER_TYPES)
424
425
426 for instance_name in data.instances:
427 instance = data.instances[instance_name]
428 if instance.name != instance_name:
429 result.append("instance '%s' is indexed by wrong name '%s'" %
430 (instance.name, instance_name))
431 if instance.primary_node not in data.nodes:
432 result.append("instance '%s' has invalid primary node '%s'" %
433 (instance_name, instance.primary_node))
434 for snode in instance.secondary_nodes:
435 if snode not in data.nodes:
436 result.append("instance '%s' has invalid secondary node '%s'" %
437 (instance_name, snode))
438 for idx, nic in enumerate(instance.nics):
439 if nic.mac in seen_macs:
440 result.append("instance '%s' has NIC %d mac %s duplicate" %
441 (instance_name, idx, nic.mac))
442 else:
443 seen_macs.append(nic.mac)
444 if nic.nicparams:
445 filled = cluster.SimpleFillNIC(nic.nicparams)
446 owner = "instance %s nic %d" % (instance.name, idx)
447 _helper(owner, "nicparams",
448 filled, constants.NICS_PARAMETER_TYPES)
449 _helper_nic(owner, filled)
450
451
452 if instance.beparams:
453 _helper("instance %s" % instance.name, "beparams",
454 cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
455
456
457 for dsk in instance.disks:
458 if dsk.dev_type in constants.LDS_DRBD:
459 tcp_port = dsk.logical_id[2]
460 if tcp_port not in ports:
461 ports[tcp_port] = []
462 ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
463
464 net_port = getattr(instance, "network_port", None)
465 if net_port is not None:
466 if net_port not in ports:
467 ports[net_port] = []
468 ports[net_port].append((instance.name, "network port"))
469
470
471 for idx, disk in enumerate(instance.disks):
472 result.extend(["instance '%s' disk %d error: %s" %
473 (instance.name, idx, msg) for msg in disk.Verify()])
474 result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
475
476
477 for free_port in cluster.tcpudp_port_pool:
478 if free_port not in ports:
479 ports[free_port] = []
480 ports[free_port].append(("cluster", "port marked as free"))
481
482
483 keys = ports.keys()
484 keys.sort()
485 for pnum in keys:
486 pdata = ports[pnum]
487 if len(pdata) > 1:
488 txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
489 result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))
490
491
492 if keys:
493 if keys[-1] > cluster.highest_used_port:
494 result.append("Highest used port mismatch, saved %s, computed %s" %
495 (cluster.highest_used_port, keys[-1]))
496
497 if not data.nodes[cluster.master_node].master_candidate:
498 result.append("Master node is not a master candidate")
499
500
501 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
502 if mc_now < mc_max:
503 result.append("Not enough master candidates: actual %d, target %d" %
504 (mc_now, mc_max))
505
506
507 for node_name, node in data.nodes.items():
508 if node.name != node_name:
509 result.append("Node '%s' is indexed by wrong name '%s'" %
510 (node.name, node_name))
511 if [node.master_candidate, node.drained, node.offline].count(True) > 1:
512 result.append("Node %s state is invalid: master_candidate=%s,"
513 " drain=%s, offline=%s" %
514 (node.name, node.master_candidate, node.drained,
515 node.offline))
516 if node.group not in data.nodegroups:
517 result.append("Node '%s' has invalid group '%s'" %
518 (node.name, node.group))
519 else:
520 _helper("node %s" % node.name, "ndparams",
521 cluster.FillND(node, data.nodegroups[node.group]),
522 constants.NDS_PARAMETER_TYPES)
523
524
525 nodegroups_names = set()
526 for nodegroup_uuid in data.nodegroups:
527 nodegroup = data.nodegroups[nodegroup_uuid]
528 if nodegroup.uuid != nodegroup_uuid:
529 result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
530 % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
531 if utils.UUID_RE.match(nodegroup.name.lower()):
532 result.append("node group '%s' (uuid: '%s') has uuid-like name" %
533 (nodegroup.name, nodegroup.uuid))
534 if nodegroup.name in nodegroups_names:
535 result.append("duplicate node group name '%s'" % nodegroup.name)
536 else:
537 nodegroups_names.add(nodegroup.name)
538 if nodegroup.ndparams:
539 _helper("group %s" % nodegroup.name, "ndparams",
540 cluster.SimpleFillND(nodegroup.ndparams),
541 constants.NDS_PARAMETER_TYPES)
542
543
544 _, duplicates = self._UnlockedComputeDRBDMap()
545 for node, minor, instance_a, instance_b in duplicates:
546 result.append("DRBD minor %d on node %s is assigned twice to instances"
547 " %s and %s" % (minor, node, instance_a, instance_b))
548
549
550 default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
551 ips = {}
552
553 def _AddIpAddress(ip, name):
554 ips.setdefault(ip, []).append(name)
555
556 _AddIpAddress(cluster.master_ip, "cluster_ip")
557
558 for node in data.nodes.values():
559 _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
560 if node.secondary_ip != node.primary_ip:
561 _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)
562
563 for instance in data.instances.values():
564 for idx, nic in enumerate(instance.nics):
565 if nic.ip is None:
566 continue
567
568 nicparams = objects.FillDict(default_nicparams, nic.nicparams)
569 nic_mode = nicparams[constants.NIC_MODE]
570 nic_link = nicparams[constants.NIC_LINK]
571
572 if nic_mode == constants.NIC_MODE_BRIDGED:
573 link = "bridge:%s" % nic_link
574 elif nic_mode == constants.NIC_MODE_ROUTED:
575 link = "route:%s" % nic_link
576 else:
577 raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)
578
579 _AddIpAddress("%s/%s" % (link, nic.ip),
580 "instance:%s/nic:%d" % (instance.name, idx))
581
582 for ip, owners in ips.items():
583 if len(owners) > 1:
584 result.append("IP address %s is used by multiple owners: %s" %
585 (ip, utils.CommaJoin(owners)))
586
587 return result
588
589 @locking.ssynchronized(_config_lock, shared=1)
591 """Verify function.
592
593 This is just a wrapper over L{_UnlockedVerifyConfig}.
594
595 @rtype: list
596 @return: a list of error messages; a non-empty list signifies
597 configuration errors
598
599 """
600 return self._UnlockedVerifyConfig()
601
603 """Convert the unique ID to the ID needed on the target nodes.
604
605 This is used only for drbd, which needs ip/port configuration.
606
607 The routine descends down and updates its children also, because
608 this helps when the only the top device is passed to the remote
609 node.
610
611 This function is for internal use, when the config lock is already held.
612
613 """
614 if disk.children:
615 for child in disk.children:
616 self._UnlockedSetDiskID(child, node_name)
617
618 if disk.logical_id is None and disk.physical_id is not None:
619 return
620 if disk.dev_type == constants.LD_DRBD8:
621 pnode, snode, port, pminor, sminor, secret = disk.logical_id
622 if node_name not in (pnode, snode):
623 raise errors.ConfigurationError("DRBD device not knowing node %s" %
624 node_name)
625 pnode_info = self._UnlockedGetNodeInfo(pnode)
626 snode_info = self._UnlockedGetNodeInfo(snode)
627 if pnode_info is None or snode_info is None:
628 raise errors.ConfigurationError("Can't find primary or secondary node"
629 " for %s" % str(disk))
630 p_data = (pnode_info.secondary_ip, port)
631 s_data = (snode_info.secondary_ip, port)
632 if pnode == node_name:
633 disk.physical_id = p_data + s_data + (pminor, secret)
634 else:
635 disk.physical_id = s_data + p_data + (sminor, secret)
636 else:
637 disk.physical_id = disk.logical_id
638 return
639
640 @locking.ssynchronized(_config_lock)
642 """Convert the unique ID to the ID needed on the target nodes.
643
644 This is used only for drbd, which needs ip/port configuration.
645
646 The routine descends down and updates its children also, because
647 this helps when the only the top device is passed to the remote
648 node.
649
650 """
651 return self._UnlockedSetDiskID(disk, node_name)
652
653 @locking.ssynchronized(_config_lock)
655 """Adds a new port to the available port pool.
656
657 @warning: this method does not "flush" the configuration (via
658 L{_WriteConfig}); callers should do that themselves once the
659 configuration is stable
660
661 """
662 if not isinstance(port, int):
663 raise errors.ProgrammerError("Invalid type passed for port")
664
665 self._config_data.cluster.tcpudp_port_pool.add(port)
666
667 @locking.ssynchronized(_config_lock, shared=1)
669 """Returns a copy of the current port list.
670
671 """
672 return self._config_data.cluster.tcpudp_port_pool.copy()
673
674 @locking.ssynchronized(_config_lock)
676 """Allocate a port.
677
678 The port will be taken from the available port pool or from the
679 default port range (and in this case we increase
680 highest_used_port).
681
682 """
683
684 if self._config_data.cluster.tcpudp_port_pool:
685 port = self._config_data.cluster.tcpudp_port_pool.pop()
686 else:
687 port = self._config_data.cluster.highest_used_port + 1
688 if port >= constants.LAST_DRBD_PORT:
689 raise errors.ConfigurationError("The highest used port is greater"
690 " than %s. Aborting." %
691 constants.LAST_DRBD_PORT)
692 self._config_data.cluster.highest_used_port = port
693
694 self._WriteConfig()
695 return port
696
698 """Compute the used DRBD minor/nodes.
699
700 @rtype: (dict, list)
701 @return: dictionary of node_name: dict of minor: instance_name;
702 the returned dict will have all the nodes in it (even if with
703 an empty list), and a list of duplicates; if the duplicates
704 list is not empty, the configuration is corrupted and its caller
705 should raise an exception
706
707 """
708 def _AppendUsedPorts(instance_name, disk, used):
709 duplicates = []
710 if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
711 node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
712 for node, port in ((node_a, minor_a), (node_b, minor_b)):
713 assert node in used, ("Node '%s' of instance '%s' not found"
714 " in node list" % (node, instance_name))
715 if port in used[node]:
716 duplicates.append((node, port, instance_name, used[node][port]))
717 else:
718 used[node][port] = instance_name
719 if disk.children:
720 for child in disk.children:
721 duplicates.extend(_AppendUsedPorts(instance_name, child, used))
722 return duplicates
723
724 duplicates = []
725 my_dict = dict((node, {}) for node in self._config_data.nodes)
726 for instance in self._config_data.instances.itervalues():
727 for disk in instance.disks:
728 duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
729 for (node, minor), instance in self._temporary_drbds.iteritems():
730 if minor in my_dict[node] and my_dict[node][minor] != instance:
731 duplicates.append((node, minor, instance, my_dict[node][minor]))
732 else:
733 my_dict[node][minor] = instance
734 return my_dict, duplicates
735
736 @locking.ssynchronized(_config_lock)
738 """Compute the used DRBD minor/nodes.
739
740 This is just a wrapper over L{_UnlockedComputeDRBDMap}.
741
742 @return: dictionary of node_name: dict of minor: instance_name;
743 the returned dict will have all the nodes in it (even if with
744 an empty list).
745
746 """
747 d_map, duplicates = self._UnlockedComputeDRBDMap()
748 if duplicates:
749 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
750 str(duplicates))
751 return d_map
752
753 @locking.ssynchronized(_config_lock)
755 """Allocate a drbd minor.
756
757 The free minor will be automatically computed from the existing
758 devices. A node can be given multiple times in order to allocate
759 multiple minors. The result is the list of minors, in the same
760 order as the passed nodes.
761
762 @type instance: string
763 @param instance: the instance for which we allocate minors
764
765 """
766 assert isinstance(instance, basestring), \
767 "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
768
769 d_map, duplicates = self._UnlockedComputeDRBDMap()
770 if duplicates:
771 raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
772 str(duplicates))
773 result = []
774 for nname in nodes:
775 ndata = d_map[nname]
776 if not ndata:
777
778 result.append(0)
779 ndata[0] = instance
780 self._temporary_drbds[(nname, 0)] = instance
781 continue
782 keys = ndata.keys()
783 keys.sort()
784 ffree = utils.FirstFree(keys)
785 if ffree is None:
786
787
788 minor = keys[-1] + 1
789 else:
790 minor = ffree
791
792 assert minor not in d_map[nname], \
793 ("Attempt to reuse allocated DRBD minor %d on node %s,"
794 " already allocated to instance %s" %
795 (minor, nname, d_map[nname][minor]))
796 ndata[minor] = instance
797
798 r_key = (nname, minor)
799 assert r_key not in self._temporary_drbds, \
800 ("Attempt to reuse reserved DRBD minor %d on node %s,"
801 " reserved for instance %s" %
802 (minor, nname, self._temporary_drbds[r_key]))
803 self._temporary_drbds[r_key] = instance
804 result.append(minor)
805 logging.debug("Request to allocate drbd minors, input: %s, returning %s",
806 nodes, result)
807 return result
808
810 """Release temporary drbd minors allocated for a given instance.
811
812 @type instance: string
813 @param instance: the instance for which temporary minors should be
814 released
815
816 """
817 assert isinstance(instance, basestring), \
818 "Invalid argument passed to ReleaseDRBDMinors"
819 for key, name in self._temporary_drbds.items():
820 if name == instance:
821 del self._temporary_drbds[key]
822
823 @locking.ssynchronized(_config_lock)
825 """Release temporary drbd minors allocated for a given instance.
826
827 This should be called on the error paths, on the success paths
828 it's automatically called by the ConfigWriter add and update
829 functions.
830
831 This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.
832
833 @type instance: string
834 @param instance: the instance for which temporary minors should be
835 released
836
837 """
838 self._UnlockedReleaseDRBDMinors(instance)
839
840 @locking.ssynchronized(_config_lock, shared=1)
842 """Get the configuration version.
843
844 @return: Config version
845
846 """
847 return self._config_data.version
848
849 @locking.ssynchronized(_config_lock, shared=1)
851 """Get cluster name.
852
853 @return: Cluster name
854
855 """
856 return self._config_data.cluster.cluster_name
857
858 @locking.ssynchronized(_config_lock, shared=1)
860 """Get the hostname of the master node for this cluster.
861
862 @return: Master hostname
863
864 """
865 return self._config_data.cluster.master_node
866
867 @locking.ssynchronized(_config_lock, shared=1)
869 """Get the IP of the master node for this cluster.
870
871 @return: Master IP
872
873 """
874 return self._config_data.cluster.master_ip
875
876 @locking.ssynchronized(_config_lock, shared=1)
878 """Get the master network device for this cluster.
879
880 """
881 return self._config_data.cluster.master_netdev
882
883 @locking.ssynchronized(_config_lock, shared=1)
885 """Get the file storage dir for this cluster.
886
887 """
888 return self._config_data.cluster.file_storage_dir
889
890 @locking.ssynchronized(_config_lock, shared=1)
892 """Get the shared file storage dir for this cluster.
893
894 """
895 return self._config_data.cluster.shared_file_storage_dir
896
897 @locking.ssynchronized(_config_lock, shared=1)
899 """Get the hypervisor type for this cluster.
900
901 """
902 return self._config_data.cluster.enabled_hypervisors[0]
903
904 @locking.ssynchronized(_config_lock, shared=1)
906 """Return the rsa hostkey from the config.
907
908 @rtype: string
909 @return: the rsa hostkey
910
911 """
912 return self._config_data.cluster.rsahostkeypub
913
914 @locking.ssynchronized(_config_lock, shared=1)
916 """Get the default instance allocator for this cluster.
917
918 """
919 return self._config_data.cluster.default_iallocator
920
921 @locking.ssynchronized(_config_lock, shared=1)
923 """Get cluster primary ip family.
924
925 @return: primary ip family
926
927 """
928 return self._config_data.cluster.primary_ip_family
929
930 @locking.ssynchronized(_config_lock)
932 """Add a node group to the configuration.
933
934 This method calls group.UpgradeConfig() to fill any missing attributes
935 according to their default values.
936
937 @type group: L{objects.NodeGroup}
938 @param group: the NodeGroup object to add
939 @type ec_id: string
940 @param ec_id: unique id for the job to use when creating a missing UUID
941 @type check_uuid: bool
942 @param check_uuid: add an UUID to the group if it doesn't have one or, if
943 it does, ensure that it does not exist in the
944 configuration already
945
946 """
947 self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
948 self._WriteConfig()
949
951 """Add a node group to the configuration.
952
953 """
954 logging.info("Adding node group %s to configuration", group.name)
955
956
957
958
959 if check_uuid:
960 self._EnsureUUID(group, ec_id)
961
962 try:
963 existing_uuid = self._UnlockedLookupNodeGroup(group.name)
964 except errors.OpPrereqError:
965 pass
966 else:
967 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
968 " node group (UUID: %s)" %
969 (group.name, existing_uuid),
970 errors.ECODE_EXISTS)
971
972 group.serial_no = 1
973 group.ctime = group.mtime = time.time()
974 group.UpgradeConfig()
975
976 self._config_data.nodegroups[group.uuid] = group
977 self._config_data.cluster.serial_no += 1
978
979 @locking.ssynchronized(_config_lock)
981 """Remove a node group from the configuration.
982
983 @type group_uuid: string
984 @param group_uuid: the UUID of the node group to remove
985
986 """
987 logging.info("Removing node group %s from configuration", group_uuid)
988
989 if group_uuid not in self._config_data.nodegroups:
990 raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)
991
992 assert len(self._config_data.nodegroups) != 1, \
993 "Group '%s' is the only group, cannot be removed" % group_uuid
994
995 del self._config_data.nodegroups[group_uuid]
996 self._config_data.cluster.serial_no += 1
997 self._WriteConfig()
998
1000 """Lookup a node group's UUID.
1001
1002 @type target: string or None
1003 @param target: group name or UUID or None to look for the default
1004 @rtype: string
1005 @return: nodegroup UUID
1006 @raises errors.OpPrereqError: when the target group cannot be found
1007
1008 """
1009 if target is None:
1010 if len(self._config_data.nodegroups) != 1:
1011 raise errors.OpPrereqError("More than one node group exists. Target"
1012 " group must be specified explicitely.")
1013 else:
1014 return self._config_data.nodegroups.keys()[0]
1015 if target in self._config_data.nodegroups:
1016 return target
1017 for nodegroup in self._config_data.nodegroups.values():
1018 if nodegroup.name == target:
1019 return nodegroup.uuid
1020 raise errors.OpPrereqError("Node group '%s' not found" % target,
1021 errors.ECODE_NOENT)
1022
1023 @locking.ssynchronized(_config_lock, shared=1)
1025 """Lookup a node group's UUID.
1026
1027 This function is just a wrapper over L{_UnlockedLookupNodeGroup}.
1028
1029 @type target: string or None
1030 @param target: group name or UUID or None to look for the default
1031 @rtype: string
1032 @return: nodegroup UUID
1033
1034 """
1035 return self._UnlockedLookupNodeGroup(target)
1036
1038 """Lookup a node group.
1039
1040 @type uuid: string
1041 @param uuid: group UUID
1042 @rtype: L{objects.NodeGroup} or None
1043 @return: nodegroup object, or None if not found
1044
1045 """
1046 if uuid not in self._config_data.nodegroups:
1047 return None
1048
1049 return self._config_data.nodegroups[uuid]
1050
1051 @locking.ssynchronized(_config_lock, shared=1)
1053 """Lookup a node group.
1054
1055 @type uuid: string
1056 @param uuid: group UUID
1057 @rtype: L{objects.NodeGroup} or None
1058 @return: nodegroup object, or None if not found
1059
1060 """
1061 return self._UnlockedGetNodeGroup(uuid)
1062
1063 @locking.ssynchronized(_config_lock, shared=1)
1065 """Get the configuration of all node groups.
1066
1067 """
1068 return dict(self._config_data.nodegroups)
1069
1070 @locking.ssynchronized(_config_lock, shared=1)
1072 """Get a list of node groups.
1073
1074 """
1075 return self._config_data.nodegroups.keys()
1076
1077 @locking.ssynchronized(_config_lock, shared=1)
1079 """Get nodes which are member in the same nodegroups as the given nodes.
1080
1081 """
1082 ngfn = lambda node_name: self._UnlockedGetNodeInfo(node_name).group
1083 return frozenset(member_name
1084 for node_name in nodes
1085 for member_name in
1086 self._UnlockedGetNodeGroup(ngfn(node_name)).members)
1087
1088 @locking.ssynchronized(_config_lock)
1120
1122 """Ensures a given object has a valid UUID.
1123
1124 @param item: the instance or node to be checked
1125 @param ec_id: the execution context id for the uuid reservation
1126
1127 """
1128 if not item.uuid:
1129 item.uuid = self._GenerateUniqueID(ec_id)
1130 elif item.uuid in self._AllIDs(include_temporary=True):
1131 raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
1132 " in use" % (item.name, item.uuid))
1133
1135 """Set the instance's status to a given value.
1136
1137 """
1138 assert isinstance(status, bool), \
1139 "Invalid status '%s' passed to SetInstanceStatus" % (status,)
1140
1141 if instance_name not in self._config_data.instances:
1142 raise errors.ConfigurationError("Unknown instance '%s'" %
1143 instance_name)
1144 instance = self._config_data.instances[instance_name]
1145 if instance.admin_up != status:
1146 instance.admin_up = status
1147 instance.serial_no += 1
1148 instance.mtime = time.time()
1149 self._WriteConfig()
1150
1151 @locking.ssynchronized(_config_lock)
1153 """Mark the instance status to up in the config.
1154
1155 """
1156 self._SetInstanceStatus(instance_name, True)
1157
1158 @locking.ssynchronized(_config_lock)
1160 """Remove the instance from the configuration.
1161
1162 """
1163 if instance_name not in self._config_data.instances:
1164 raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
1165
1166
1167
1168 inst = self._config_data.instances[instance_name]
1169 network_port = getattr(inst, "network_port", None)
1170 if network_port is not None:
1171 self._config_data.cluster.tcpudp_port_pool.add(network_port)
1172
1173 del self._config_data.instances[instance_name]
1174 self._config_data.cluster.serial_no += 1
1175 self._WriteConfig()
1176
1177 @locking.ssynchronized(_config_lock)
1179 """Rename an instance.
1180
1181 This needs to be done in ConfigWriter and not by RemoveInstance
1182 combined with AddInstance as only we can guarantee an atomic
1183 rename.
1184
1185 """
1186 if old_name not in self._config_data.instances:
1187 raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
1188 inst = self._config_data.instances[old_name]
1189 del self._config_data.instances[old_name]
1190 inst.name = new_name
1191
1192 for disk in inst.disks:
1193 if disk.dev_type == constants.LD_FILE:
1194
1195 file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
1196 disk_fname = "disk%s" % disk.iv_name.split("/")[1]
1197 disk.physical_id = disk.logical_id = (disk.logical_id[0],
1198 utils.PathJoin(file_storage_dir,
1199 inst.name,
1200 disk_fname))
1201
1202
1203 self._config_data.cluster.serial_no += 1
1204
1205 self._config_data.instances[inst.name] = inst
1206 self._WriteConfig()
1207
1208 @locking.ssynchronized(_config_lock)
1210 """Mark the status of an instance to down in the configuration.
1211
1212 """
1213 self._SetInstanceStatus(instance_name, False)
1214
1216 """Get the list of instances.
1217
1218 This function is for internal use, when the config lock is already held.
1219
1220 """
1221 return self._config_data.instances.keys()
1222
1223 @locking.ssynchronized(_config_lock, shared=1)
1225 """Get the list of instances.
1226
1227 @return: array of instances, ex. ['instance2.example.com',
1228 'instance1.example.com']
1229
1230 """
1231 return self._UnlockedGetInstanceList()
1232
1239
1241 """Returns information about an instance.
1242
1243 This function is for internal use, when the config lock is already held.
1244
1245 """
1246 if instance_name not in self._config_data.instances:
1247 return None
1248
1249 return self._config_data.instances[instance_name]
1250
1251 @locking.ssynchronized(_config_lock, shared=1)
1253 """Returns information about an instance.
1254
1255 It takes the information from the configuration file. Other information of
1256 an instance are taken from the live systems.
1257
1258 @param instance_name: name of the instance, e.g.
1259 I{instance1.example.com}
1260
1261 @rtype: L{objects.Instance}
1262 @return: the instance object
1263
1264 """
1265 return self._UnlockedGetInstanceInfo(instance_name)
1266
1267 @locking.ssynchronized(_config_lock, shared=1)
1285
1286 @locking.ssynchronized(_config_lock, shared=1)
1288 """Get the configuration of multiple instances.
1289
1290 @param instances: list of instance names
1291 @rtype: list
1292 @return: list of tuples (instance, instance_info), where
1293 instance_info is what would GetInstanceInfo return for the
1294 node, while keeping the original order
1295
1296 """
1297 return [(name, self._UnlockedGetInstanceInfo(name)) for name in instances]
1298
1299 @locking.ssynchronized(_config_lock, shared=1)
1311
1312 @locking.ssynchronized(_config_lock)
1314 """Add a node to the configuration.
1315
1316 @type node: L{objects.Node}
1317 @param node: a Node instance
1318
1319 """
1320 logging.info("Adding node %s to configuration", node.name)
1321
1322 self._EnsureUUID(node, ec_id)
1323
1324 node.serial_no = 1
1325 node.ctime = node.mtime = time.time()
1326 self._UnlockedAddNodeToGroup(node.name, node.group)
1327 self._config_data.nodes[node.name] = node
1328 self._config_data.cluster.serial_no += 1
1329 self._WriteConfig()
1330
1331 @locking.ssynchronized(_config_lock)
1333 """Remove a node from the configuration.
1334
1335 """
1336 logging.info("Removing node %s from configuration", node_name)
1337
1338 if node_name not in self._config_data.nodes:
1339 raise errors.ConfigurationError("Unknown node '%s'" % node_name)
1340
1341 self._UnlockedRemoveNodeFromGroup(self._config_data.nodes[node_name])
1342 del self._config_data.nodes[node_name]
1343 self._config_data.cluster.serial_no += 1
1344 self._WriteConfig()
1345
1352
1354 """Get the configuration of a node, as stored in the config.
1355
1356 This function is for internal use, when the config lock is already
1357 held.
1358
1359 @param node_name: the node name, e.g. I{node1.example.com}
1360
1361 @rtype: L{objects.Node}
1362 @return: the node object
1363
1364 """
1365 if node_name not in self._config_data.nodes:
1366 return None
1367
1368 return self._config_data.nodes[node_name]
1369
1370 @locking.ssynchronized(_config_lock, shared=1)
1372 """Get the configuration of a node, as stored in the config.
1373
1374 This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1375
1376 @param node_name: the node name, e.g. I{node1.example.com}
1377
1378 @rtype: L{objects.Node}
1379 @return: the node object
1380
1381 """
1382 return self._UnlockedGetNodeInfo(node_name)
1383
1384 @locking.ssynchronized(_config_lock, shared=1)
1386 """Get the instances of a node, as stored in the config.
1387
1388 @param node_name: the node name, e.g. I{node1.example.com}
1389
1390 @rtype: (list, list)
1391 @return: a tuple with two lists: the primary and the secondary instances
1392
1393 """
1394 pri = []
1395 sec = []
1396 for inst in self._config_data.instances.values():
1397 if inst.primary_node == node_name:
1398 pri.append(inst.name)
1399 if node_name in inst.secondary_nodes:
1400 sec.append(inst.name)
1401 return (pri, sec)
1402
1403 @locking.ssynchronized(_config_lock, shared=1)
1405 """Get the instances of a node group.
1406
1407 @param uuid: Node group UUID
1408 @param primary_only: Whether to only consider primary nodes
1409 @rtype: frozenset
1410 @return: List of instance names in node group
1411
1412 """
1413 if primary_only:
1414 nodes_fn = lambda inst: [inst.primary_node]
1415 else:
1416 nodes_fn = lambda inst: inst.all_nodes
1417
1418 return frozenset(inst.name
1419 for inst in self._config_data.instances.values()
1420 for node_name in nodes_fn(inst)
1421 if self._UnlockedGetNodeInfo(node_name).group == uuid)
1422
1424 """Return the list of nodes which are in the configuration.
1425
1426 This function is for internal use, when the config lock is already
1427 held.
1428
1429 @rtype: list
1430
1431 """
1432 return self._config_data.nodes.keys()
1433
1434 @locking.ssynchronized(_config_lock, shared=1)
1436 """Return the list of nodes which are in the configuration.
1437
1438 """
1439 return self._UnlockedGetNodeList()
1440
1448
1449 @locking.ssynchronized(_config_lock, shared=1)
1455
1456 @locking.ssynchronized(_config_lock, shared=1)
1464
1465 @locking.ssynchronized(_config_lock, shared=1)
1473
1474 @locking.ssynchronized(_config_lock, shared=1)
1476 """Get the configuration of multiple nodes.
1477
1478 @param nodes: list of node names
1479 @rtype: list
1480 @return: list of tuples of (node, node_info), where node_info is
1481 what would GetNodeInfo return for the node, in the original
1482 order
1483
1484 """
1485 return [(name, self._UnlockedGetNodeInfo(name)) for name in nodes]
1486
1487 @locking.ssynchronized(_config_lock, shared=1)
1489 """Get the configuration of all nodes.
1490
1491 @rtype: dict
1492 @return: dict of (node, node_info), where node_info is what
1493 would GetNodeInfo return for the node
1494
1495 """
1496 my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
1497 for node in self._UnlockedGetNodeList()])
1498 return my_dict
1499
1500 @locking.ssynchronized(_config_lock, shared=1)
1502 """Returns groups for a list of nodes.
1503
1504 @type nodes: list of string
1505 @param nodes: List of node names
1506 @rtype: frozenset
1507
1508 """
1509 return frozenset(self._UnlockedGetNodeInfo(name).group for name in nodes)
1510
1512 """Get the number of current and maximum desired and possible candidates.
1513
1514 @type exceptions: list
1515 @param exceptions: if passed, list of nodes that should be ignored
1516 @rtype: tuple
1517 @return: tuple of (current, desired and possible, possible)
1518
1519 """
1520 mc_now = mc_should = mc_max = 0
1521 for node in self._config_data.nodes.values():
1522 if exceptions and node.name in exceptions:
1523 continue
1524 if not (node.offline or node.drained) and node.master_capable:
1525 mc_max += 1
1526 if node.master_candidate:
1527 mc_now += 1
1528 mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
1529 return (mc_now, mc_should, mc_max)
1530
1531 @locking.ssynchronized(_config_lock, shared=1)
1533 """Get the number of current and maximum possible candidates.
1534
1535 This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.
1536
1537 @type exceptions: list
1538 @param exceptions: if passed, list of nodes that should be ignored
1539 @rtype: tuple
1540 @return: tuple of (current, max)
1541
1542 """
1543 return self._UnlockedGetMasterCandidateStats(exceptions)
1544
1545 @locking.ssynchronized(_config_lock)
1546 - def MaintainCandidatePool(self, exceptions):
1547 """Try to grow the candidate pool to the desired size.
1548
1549 @type exceptions: list
1550 @param exceptions: if passed, list of nodes that should be ignored
1551 @rtype: list
1552 @return: list with the adjusted nodes (L{objects.Node} instances)
1553
1554 """
1555 mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1556 mod_list = []
1557 if mc_now < mc_max:
1558 node_list = self._config_data.nodes.keys()
1559 random.shuffle(node_list)
1560 for name in node_list:
1561 if mc_now >= mc_max:
1562 break
1563 node = self._config_data.nodes[name]
1564 if (node.master_candidate or node.offline or node.drained or
1565 node.name in exceptions or not node.master_capable):
1566 continue
1567 mod_list.append(node)
1568 node.master_candidate = True
1569 node.serial_no += 1
1570 mc_now += 1
1571 if mc_now != mc_max:
1572
1573 logging.warning("Warning: MaintainCandidatePool didn't manage to"
1574 " fill the candidate pool (%d/%d)", mc_now, mc_max)
1575 if mod_list:
1576 self._config_data.cluster.serial_no += 1
1577 self._WriteConfig()
1578
1579 return mod_list
1580
1582 """Add a given node to the specified group.
1583
1584 """
1585 if nodegroup_uuid not in self._config_data.nodegroups:
1586
1587
1588
1589
1590 raise errors.OpExecError("Unknown node group: %s" % nodegroup_uuid)
1591 if node_name not in self._config_data.nodegroups[nodegroup_uuid].members:
1592 self._config_data.nodegroups[nodegroup_uuid].members.append(node_name)
1593
1595 """Remove a given node from its group.
1596
1597 """
1598 nodegroup = node.group
1599 if nodegroup not in self._config_data.nodegroups:
1600 logging.warning("Warning: node '%s' has unknown node group '%s'"
1601 " (while being removed from it)", node.name, nodegroup)
1602 nodegroup_obj = self._config_data.nodegroups[nodegroup]
1603 if node.name not in nodegroup_obj.members:
1604 logging.warning("Warning: node '%s' not a member of its node group '%s'"
1605 " (while being removed from it)", node.name, nodegroup)
1606 else:
1607 nodegroup_obj.members.remove(node.name)
1608
1609 @locking.ssynchronized(_config_lock)
1611 """Changes the group of a number of nodes.
1612
1613 @type mods: list of tuples; (node name, new group UUID)
1614 @param mods: Node membership modifications
1615
1616 """
1617 groups = self._config_data.nodegroups
1618 nodes = self._config_data.nodes
1619
1620 resmod = []
1621
1622
1623 for (node_name, new_group_uuid) in mods:
1624 try:
1625 node = nodes[node_name]
1626 except KeyError:
1627 raise errors.ConfigurationError("Unable to find node '%s'" % node_name)
1628
1629 if node.group == new_group_uuid:
1630
1631 logging.debug("Node '%s' was assigned to its current group (%s)",
1632 node_name, node.group)
1633 continue
1634
1635
1636 try:
1637 old_group = groups[node.group]
1638 except KeyError:
1639 raise errors.ConfigurationError("Unable to find old group '%s'" %
1640 node.group)
1641
1642
1643 try:
1644 new_group = groups[new_group_uuid]
1645 except KeyError:
1646 raise errors.ConfigurationError("Unable to find new group '%s'" %
1647 new_group_uuid)
1648
1649 assert node.name in old_group.members, \
1650 ("Inconsistent configuration: node '%s' not listed in members for its"
1651 " old group '%s'" % (node.name, old_group.uuid))
1652 assert node.name not in new_group.members, \
1653 ("Inconsistent configuration: node '%s' already listed in members for"
1654 " its new group '%s'" % (node.name, new_group.uuid))
1655
1656 resmod.append((node, old_group, new_group))
1657
1658
1659 for (node, old_group, new_group) in resmod:
1660 assert node.uuid != new_group.uuid and old_group.uuid != new_group.uuid, \
1661 "Assigning to current group is not possible"
1662
1663 node.group = new_group.uuid
1664
1665
1666 if node.name in old_group.members:
1667 old_group.members.remove(node.name)
1668 if node.name not in new_group.members:
1669 new_group.members.append(node.name)
1670
1671
1672 now = time.time()
1673 for obj in frozenset(itertools.chain(*resmod)):
1674 obj.serial_no += 1
1675 obj.mtime = now
1676
1677
1678 self._config_data.cluster.serial_no += 1
1679
1680 self._WriteConfig()
1681
1683 """Bump up the serial number of the config.
1684
1685 """
1686 self._config_data.serial_no += 1
1687 self._config_data.mtime = time.time()
1688
1690 """Returns all objects with uuid attributes.
1691
1692 """
1693 return (self._config_data.instances.values() +
1694 self._config_data.nodes.values() +
1695 self._config_data.nodegroups.values() +
1696 [self._config_data.cluster])
1697
1699 """Read the config data from disk.
1700
1701 """
1702 raw_data = utils.ReadFile(self._cfg_file)
1703
1704 try:
1705 data = objects.ConfigData.FromDict(serializer.Load(raw_data))
1706 except Exception, err:
1707 raise errors.ConfigurationError(err)
1708
1709
1710 _ValidateConfig(data)
1711
1712 if (not hasattr(data, 'cluster') or
1713 not hasattr(data.cluster, 'rsahostkeypub')):
1714 raise errors.ConfigurationError("Incomplete configuration"
1715 " (missing cluster.rsahostkeypub)")
1716
1717 if data.cluster.master_node != self._my_hostname and not accept_foreign:
1718 msg = ("The configuration denotes node %s as master, while my"
1719 " hostname is %s; opening a foreign configuration is only"
1720 " possible in accept_foreign mode" %
1721 (data.cluster.master_node, self._my_hostname))
1722 raise errors.ConfigurationError(msg)
1723
1724
1725 data.UpgradeConfig()
1726
1727 self._config_data = data
1728
1729
1730 self._last_cluster_serial = -1
1731
1732
1733 self._UpgradeConfig()
1734
1735 self._cfg_id = utils.GetFileID(path=self._cfg_file)
1736
1738 """Run upgrade steps that cannot be done purely in the objects.
1739
1740 This is because some data elements need uniqueness across the
1741 whole configuration, etc.
1742
1743 @warning: this function will call L{_WriteConfig()}, but also
1744 L{DropECReservations} so it needs to be called only from a
1745 "safe" place (the constructor). If one wanted to call it with
1746 the lock held, a DropECReservationUnlocked would need to be
1747 created first, to avoid causing deadlock.
1748
1749 """
1750 modified = False
1751 for item in self._AllUUIDObjects():
1752 if item.uuid is None:
1753 item.uuid = self._GenerateUniqueID(_UPGRADE_CONFIG_JID)
1754 modified = True
1755 if not self._config_data.nodegroups:
1756 default_nodegroup_name = constants.INITIAL_NODE_GROUP_NAME
1757 default_nodegroup = objects.NodeGroup(name=default_nodegroup_name,
1758 members=[])
1759 self._UnlockedAddNodeGroup(default_nodegroup, _UPGRADE_CONFIG_JID, True)
1760 modified = True
1761 for node in self._config_data.nodes.values():
1762 if not node.group:
1763 node.group = self.LookupNodeGroup(None)
1764 modified = True
1765
1766
1767
1768
1769 self._UnlockedAddNodeToGroup(node.name, node.group)
1770 if modified:
1771 self._WriteConfig()
1772
1773
1774 self.DropECReservations(_UPGRADE_CONFIG_JID)
1775
1777 """Distribute the configuration to the other nodes.
1778
1779 Currently, this only copies the configuration file. In the future,
1780 it could be used to encapsulate the 2/3-phase update mechanism.
1781
1782 """
1783 if self._offline:
1784 return True
1785
1786 bad = False
1787
1788 node_list = []
1789 addr_list = []
1790 myhostname = self._my_hostname
1791
1792
1793
1794
1795 for node_name in self._UnlockedGetNodeList():
1796 if node_name == myhostname:
1797 continue
1798 node_info = self._UnlockedGetNodeInfo(node_name)
1799 if not node_info.master_candidate:
1800 continue
1801 node_list.append(node_info.name)
1802 addr_list.append(node_info.primary_ip)
1803
1804 result = rpc.RpcRunner.call_upload_file(node_list, self._cfg_file,
1805 address_list=addr_list)
1806 for to_node, to_result in result.items():
1807 msg = to_result.fail_msg
1808 if msg:
1809 msg = ("Copy of file %s to node %s failed: %s" %
1810 (self._cfg_file, to_node, msg))
1811 logging.error(msg)
1812
1813 if feedback_fn:
1814 feedback_fn(msg)
1815
1816 bad = True
1817
1818 return not bad
1819
1820 - def _WriteConfig(self, destination=None, feedback_fn=None):
1821 """Write the configuration data to persistent storage.
1822
1823 """
1824 assert feedback_fn is None or callable(feedback_fn)
1825
1826
1827
1828
1829
1830 config_errors = self._UnlockedVerifyConfig()
1831 if config_errors:
1832 errmsg = ("Configuration data is not consistent: %s" %
1833 (utils.CommaJoin(config_errors)))
1834 logging.critical(errmsg)
1835 if feedback_fn:
1836 feedback_fn(errmsg)
1837
1838 if destination is None:
1839 destination = self._cfg_file
1840 self._BumpSerialNo()
1841 txt = serializer.Dump(self._config_data.ToDict())
1842
1843 getents = self._getents()
1844 try:
1845 fd = utils.SafeWriteFile(destination, self._cfg_id, data=txt,
1846 close=False, gid=getents.confd_gid, mode=0640)
1847 except errors.LockError:
1848 raise errors.ConfigurationError("The configuration file has been"
1849 " modified since the last write, cannot"
1850 " update")
1851 try:
1852 self._cfg_id = utils.GetFileID(fd=fd)
1853 finally:
1854 os.close(fd)
1855
1856 self.write_count += 1
1857
1858
1859 self._DistributeConfig(feedback_fn)
1860
1861
1862 if self._last_cluster_serial < self._config_data.cluster.serial_no:
1863 if not self._offline:
1864 result = rpc.RpcRunner.call_write_ssconf_files(
1865 self._UnlockedGetOnlineNodeList(),
1866 self._UnlockedGetSsconfValues())
1867
1868 for nname, nresu in result.items():
1869 msg = nresu.fail_msg
1870 if msg:
1871 errmsg = ("Error while uploading ssconf files to"
1872 " node %s: %s" % (nname, msg))
1873 logging.warning(errmsg)
1874
1875 if feedback_fn:
1876 feedback_fn(errmsg)
1877
1878 self._last_cluster_serial = self._config_data.cluster.serial_no
1879
1881 """Return the values needed by ssconf.
1882
1883 @rtype: dict
1884 @return: a dictionary with keys the ssconf names and values their
1885 associated value
1886
1887 """
1888 fn = "\n".join
1889 instance_names = utils.NiceSort(self._UnlockedGetInstanceList())
1890 node_names = utils.NiceSort(self._UnlockedGetNodeList())
1891 node_info = [self._UnlockedGetNodeInfo(name) for name in node_names]
1892 node_pri_ips = ["%s %s" % (ninfo.name, ninfo.primary_ip)
1893 for ninfo in node_info]
1894 node_snd_ips = ["%s %s" % (ninfo.name, ninfo.secondary_ip)
1895 for ninfo in node_info]
1896
1897 instance_data = fn(instance_names)
1898 off_data = fn(node.name for node in node_info if node.offline)
1899 on_data = fn(node.name for node in node_info if not node.offline)
1900 mc_data = fn(node.name for node in node_info if node.master_candidate)
1901 mc_ips_data = fn(node.primary_ip for node in node_info
1902 if node.master_candidate)
1903 node_data = fn(node_names)
1904 node_pri_ips_data = fn(node_pri_ips)
1905 node_snd_ips_data = fn(node_snd_ips)
1906
1907 cluster = self._config_data.cluster
1908 cluster_tags = fn(cluster.GetTags())
1909
1910 hypervisor_list = fn(cluster.enabled_hypervisors)
1911
1912 uid_pool = uidpool.FormatUidPool(cluster.uid_pool, separator="\n")
1913
1914 nodegroups = ["%s %s" % (nodegroup.uuid, nodegroup.name) for nodegroup in
1915 self._config_data.nodegroups.values()]
1916 nodegroups_data = fn(utils.NiceSort(nodegroups))
1917
1918 ssconf_values = {
1919 constants.SS_CLUSTER_NAME: cluster.cluster_name,
1920 constants.SS_CLUSTER_TAGS: cluster_tags,
1921 constants.SS_FILE_STORAGE_DIR: cluster.file_storage_dir,
1922 constants.SS_SHARED_FILE_STORAGE_DIR: cluster.shared_file_storage_dir,
1923 constants.SS_MASTER_CANDIDATES: mc_data,
1924 constants.SS_MASTER_CANDIDATES_IPS: mc_ips_data,
1925 constants.SS_MASTER_IP: cluster.master_ip,
1926 constants.SS_MASTER_NETDEV: cluster.master_netdev,
1927 constants.SS_MASTER_NODE: cluster.master_node,
1928 constants.SS_NODE_LIST: node_data,
1929 constants.SS_NODE_PRIMARY_IPS: node_pri_ips_data,
1930 constants.SS_NODE_SECONDARY_IPS: node_snd_ips_data,
1931 constants.SS_OFFLINE_NODES: off_data,
1932 constants.SS_ONLINE_NODES: on_data,
1933 constants.SS_PRIMARY_IP_FAMILY: str(cluster.primary_ip_family),
1934 constants.SS_INSTANCE_LIST: instance_data,
1935 constants.SS_RELEASE_VERSION: constants.RELEASE_VERSION,
1936 constants.SS_HYPERVISOR_LIST: hypervisor_list,
1937 constants.SS_MAINTAIN_NODE_HEALTH: str(cluster.maintain_node_health),
1938 constants.SS_UID_POOL: uid_pool,
1939 constants.SS_NODEGROUPS: nodegroups_data,
1940 }
1941 bad_values = [(k, v) for k, v in ssconf_values.items()
1942 if not isinstance(v, (str, basestring))]
1943 if bad_values:
1944 err = utils.CommaJoin("%s=%s" % (k, v) for k, v in bad_values)
1945 raise errors.ConfigurationError("Some ssconf key(s) have non-string"
1946 " values: %s" % err)
1947 return ssconf_values
1948
1949 @locking.ssynchronized(_config_lock, shared=1)
1955
1956 @locking.ssynchronized(_config_lock, shared=1)
1958 """Return the volume group name.
1959
1960 """
1961 return self._config_data.cluster.volume_group_name
1962
1963 @locking.ssynchronized(_config_lock)
1965 """Set the volume group name.
1966
1967 """
1968 self._config_data.cluster.volume_group_name = vg_name
1969 self._config_data.cluster.serial_no += 1
1970 self._WriteConfig()
1971
1972 @locking.ssynchronized(_config_lock, shared=1)
1974 """Return DRBD usermode helper.
1975
1976 """
1977 return self._config_data.cluster.drbd_usermode_helper
1978
1979 @locking.ssynchronized(_config_lock)
1981 """Set DRBD usermode helper.
1982
1983 """
1984 self._config_data.cluster.drbd_usermode_helper = drbd_helper
1985 self._config_data.cluster.serial_no += 1
1986 self._WriteConfig()
1987
1988 @locking.ssynchronized(_config_lock, shared=1)
1990 """Return the mac prefix.
1991
1992 """
1993 return self._config_data.cluster.mac_prefix
1994
1995 @locking.ssynchronized(_config_lock, shared=1)
1997 """Returns information about the cluster
1998
1999 @rtype: L{objects.Cluster}
2000 @return: the cluster object
2001
2002 """
2003 return self._config_data.cluster
2004
2005 @locking.ssynchronized(_config_lock, shared=1)
2007 """Check if in there is at disk of the given type in the configuration.
2008
2009 """
2010 return self._config_data.HasAnyDiskOfType(dev_type)
2011
2012 @locking.ssynchronized(_config_lock)
2013 - def Update(self, target, feedback_fn):
2014 """Notify function to be called after updates.
2015
2016 This function must be called when an object (as returned by
2017 GetInstanceInfo, GetNodeInfo, GetCluster) has been updated and the
2018 caller wants the modifications saved to the backing store. Note
2019 that all modified objects will be saved, but the target argument
2020 is the one the caller wants to ensure that it's saved.
2021
2022 @param target: an instance of either L{objects.Cluster},
2023 L{objects.Node} or L{objects.Instance} which is existing in
2024 the cluster
2025 @param feedback_fn: Callable feedback function
2026
2027 """
2028 if self._config_data is None:
2029 raise errors.ProgrammerError("Configuration file not read,"
2030 " cannot save.")
2031 update_serial = False
2032 if isinstance(target, objects.Cluster):
2033 test = target == self._config_data.cluster
2034 elif isinstance(target, objects.Node):
2035 test = target in self._config_data.nodes.values()
2036 update_serial = True
2037 elif isinstance(target, objects.Instance):
2038 test = target in self._config_data.instances.values()
2039 elif isinstance(target, objects.NodeGroup):
2040 test = target in self._config_data.nodegroups.values()
2041 else:
2042 raise errors.ProgrammerError("Invalid object type (%s) passed to"
2043 " ConfigWriter.Update" % type(target))
2044 if not test:
2045 raise errors.ConfigurationError("Configuration updated since object"
2046 " has been read or unknown object")
2047 target.serial_no += 1
2048 target.mtime = now = time.time()
2049
2050 if update_serial:
2051
2052 self._config_data.cluster.serial_no += 1
2053 self._config_data.cluster.mtime = now
2054
2055 if isinstance(target, objects.Instance):
2056 self._UnlockedReleaseDRBDMinors(target.name)
2057
2058 self._WriteConfig(feedback_fn=feedback_fn)
2059
2060 @locking.ssynchronized(_config_lock)
2062 """Drop per-execution-context reservations
2063
2064 """
2065 for rm in self._all_rms:
2066 rm.DropECReservations(ec_id)
2067