Package ganeti :: Package client :: Module gnt_cluster
[hide private]
[frames] | no frames]

Source Code for Module ganeti.client.gnt_cluster

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc. 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are 
   9  # met: 
  10  # 
  11  # 1. Redistributions of source code must retain the above copyright notice, 
  12  # this list of conditions and the following disclaimer. 
  13  # 
  14  # 2. Redistributions in binary form must reproduce the above copyright 
  15  # notice, this list of conditions and the following disclaimer in the 
  16  # documentation and/or other materials provided with the distribution. 
  17  # 
  18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
  19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
  20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
  22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
  27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
  28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  29   
  30  """Cluster related commands""" 
  31   
  32  # pylint: disable=W0401,W0613,W0614,C0103 
  33  # W0401: Wildcard import ganeti.cli 
  34  # W0613: Unused argument, since all functions follow the same API 
  35  # W0614: Unused import %s from wildcard import (since we need cli) 
  36  # C0103: Invalid name gnt-cluster 
  37   
  38  from cStringIO import StringIO 
  39  import os 
  40  import time 
  41  import OpenSSL 
  42  import tempfile 
  43  import itertools 
  44   
  45  from ganeti.cli import * 
  46  from ganeti import bootstrap 
  47  from ganeti import compat 
  48  from ganeti import constants 
  49  from ganeti import config 
  50  from ganeti import errors 
  51  from ganeti import netutils 
  52  from ganeti import objects 
  53  from ganeti import opcodes 
  54  from ganeti import pathutils 
  55  from ganeti import qlang 
  56  from ganeti import serializer 
  57  from ganeti import ssconf 
  58  from ganeti import ssh 
  59  from ganeti import uidpool 
  60  from ganeti import utils 
  61  from ganeti.client import base 
  62   
  63   
  64  ON_OPT = cli_option("--on", default=False, 
  65                      action="store_true", dest="on", 
  66                      help="Recover from an EPO") 
  67   
  68  GROUPS_OPT = cli_option("--groups", default=False, 
  69                          action="store_true", dest="groups", 
  70                          help="Arguments are node groups instead of nodes") 
  71   
  72  FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it", 
  73                              help="Override interactive check for --no-voting", 
  74                              default=False, action="store_true") 
  75   
  76  FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it", 
  77                                  help="Unconditionally distribute the" 
  78                                  " configuration, even if the queue" 
  79                                  " is drained", 
  80                                  default=False, action="store_true") 
  81   
  82  TO_OPT = cli_option("--to", default=None, type="string", 
  83                      help="The Ganeti version to upgrade to") 
  84   
  85  RESUME_OPT = cli_option("--resume", default=False, action="store_true", 
  86                          help="Resume any pending Ganeti upgrades") 
  87   
  88  DATA_COLLECTOR_INTERVAL_OPT = cli_option( 
  89      "--data-collector-interval", default={}, type="keyval", 
  90      help="Set collection intervals in seconds of data collectors.") 
  91   
  92  _EPO_PING_INTERVAL = 30 # 30 seconds between pings 
  93  _EPO_PING_TIMEOUT = 1 # 1 second 
  94  _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes 
95 96 97 -def _InitEnabledDiskTemplates(opts):
98 """Initialize the list of enabled disk templates. 99 100 """ 101 if opts.enabled_disk_templates: 102 return opts.enabled_disk_templates.split(",") 103 else: 104 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
105
106 107 -def _InitVgName(opts, enabled_disk_templates):
108 """Initialize the volume group name. 109 110 @type enabled_disk_templates: list of strings 111 @param enabled_disk_templates: cluster-wide enabled disk templates 112 113 """ 114 vg_name = None 115 if opts.vg_name is not None: 116 vg_name = opts.vg_name 117 if vg_name: 118 if not utils.IsLvmEnabled(enabled_disk_templates): 119 ToStdout("You specified a volume group with --vg-name, but you did not" 120 " enable any disk template that uses lvm.") 121 elif utils.IsLvmEnabled(enabled_disk_templates): 122 raise errors.OpPrereqError( 123 "LVM disk templates are enabled, but vg name not set.") 124 elif utils.IsLvmEnabled(enabled_disk_templates): 125 vg_name = constants.DEFAULT_VG 126 return vg_name
127
128 129 -def _InitDrbdHelper(opts, enabled_disk_templates, feedback_fn=ToStdout):
130 """Initialize the DRBD usermode helper. 131 132 """ 133 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 134 135 if not drbd_enabled and opts.drbd_helper is not None: 136 feedback_fn("Note: You specified a DRBD usermode helper, while DRBD storage" 137 " is not enabled.") 138 139 if drbd_enabled: 140 if opts.drbd_helper is None: 141 return constants.DEFAULT_DRBD_HELPER 142 if opts.drbd_helper == '': 143 raise errors.OpPrereqError( 144 "Unsetting the drbd usermode helper while enabling DRBD is not" 145 " allowed.") 146 147 return opts.drbd_helper
148
149 150 @UsesRPC 151 -def InitCluster(opts, args):
152 """Initialize the cluster. 153 154 @param opts: the command line options selected by the user 155 @type args: list 156 @param args: should contain only one element, the desired 157 cluster name 158 @rtype: int 159 @return: the desired exit code 160 161 """ 162 enabled_disk_templates = _InitEnabledDiskTemplates(opts) 163 164 try: 165 vg_name = _InitVgName(opts, enabled_disk_templates) 166 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates) 167 except errors.OpPrereqError, e: 168 ToStderr(str(e)) 169 return 1 170 171 master_netdev = opts.master_netdev 172 if master_netdev is None: 173 nic_mode = opts.nicparams.get(constants.NIC_MODE, None) 174 if not nic_mode: 175 # default case, use bridging 176 master_netdev = constants.DEFAULT_BRIDGE 177 elif nic_mode == constants.NIC_MODE_OVS: 178 # default ovs is different from default bridge 179 master_netdev = constants.DEFAULT_OVS 180 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS 181 182 hvlist = opts.enabled_hypervisors 183 if hvlist is None: 184 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR 185 hvlist = hvlist.split(",") 186 187 hvparams = dict(opts.hvparams) 188 beparams = opts.beparams 189 nicparams = opts.nicparams 190 191 diskparams = dict(opts.diskparams) 192 193 # check the disk template types here, as we cannot rely on the type check done 194 # by the opcode parameter types 195 diskparams_keys = set(diskparams.keys()) 196 if not (diskparams_keys <= constants.DISK_TEMPLATES): 197 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES) 198 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown)) 199 return 1 200 201 # prepare beparams dict 202 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams) 203 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) 204 205 # prepare nicparams dict 206 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams) 207 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) 208 209 # prepare ndparams dict 210 if opts.ndparams is None: 211 ndparams = dict(constants.NDC_DEFAULTS) 212 else: 213 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams) 214 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) 215 216 # prepare hvparams dict 217 for hv in constants.HYPER_TYPES: 218 if hv not in hvparams: 219 hvparams[hv] = {} 220 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv]) 221 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES) 222 223 # prepare diskparams dict 224 for templ in constants.DISK_TEMPLATES: 225 if templ not in diskparams: 226 diskparams[templ] = {} 227 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ], 228 diskparams[templ]) 229 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES) 230 231 # prepare ipolicy dict 232 ipolicy = CreateIPolicyFromOpts( 233 ispecs_mem_size=opts.ispecs_mem_size, 234 ispecs_cpu_count=opts.ispecs_cpu_count, 235 ispecs_disk_count=opts.ispecs_disk_count, 236 ispecs_disk_size=opts.ispecs_disk_size, 237 ispecs_nic_count=opts.ispecs_nic_count, 238 minmax_ispecs=opts.ipolicy_bounds_specs, 239 std_ispecs=opts.ipolicy_std_specs, 240 ipolicy_disk_templates=opts.ipolicy_disk_templates, 241 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, 242 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, 243 fill_all=True) 244 245 if opts.candidate_pool_size is None: 246 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT 247 248 if opts.mac_prefix is None: 249 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX 250 251 uid_pool = opts.uid_pool 252 if uid_pool is not None: 253 uid_pool = uidpool.ParseUidPool(uid_pool) 254 255 if opts.prealloc_wipe_disks is None: 256 opts.prealloc_wipe_disks = False 257 258 external_ip_setup_script = opts.use_external_mip_script 259 if external_ip_setup_script is None: 260 external_ip_setup_script = False 261 262 try: 263 primary_ip_version = int(opts.primary_ip_version) 264 except (ValueError, TypeError), err: 265 ToStderr("Invalid primary ip version value: %s" % str(err)) 266 return 1 267 268 master_netmask = opts.master_netmask 269 try: 270 if master_netmask is not None: 271 master_netmask = int(master_netmask) 272 except (ValueError, TypeError), err: 273 ToStderr("Invalid master netmask value: %s" % str(err)) 274 return 1 275 276 if opts.disk_state: 277 disk_state = utils.FlatToDict(opts.disk_state) 278 else: 279 disk_state = {} 280 281 hv_state = dict(opts.hv_state) 282 283 if opts.install_image: 284 install_image = opts.install_image 285 else: 286 install_image = "" 287 288 if opts.zeroing_image: 289 zeroing_image = opts.zeroing_image 290 else: 291 zeroing_image = "" 292 293 compression_tools = _GetCompressionTools(opts) 294 295 default_ialloc_params = opts.default_iallocator_params 296 297 if opts.enabled_user_shutdown: 298 enabled_user_shutdown = True 299 else: 300 enabled_user_shutdown = False 301 302 if opts.ssh_key_type: 303 ssh_key_type = opts.ssh_key_type 304 else: 305 ssh_key_type = constants.SSH_DEFAULT_KEY_TYPE 306 307 ssh_key_bits = ssh.DetermineKeyBits(ssh_key_type, opts.ssh_key_bits, None, 308 None) 309 310 bootstrap.InitCluster(cluster_name=args[0], 311 secondary_ip=opts.secondary_ip, 312 vg_name=vg_name, 313 mac_prefix=opts.mac_prefix, 314 master_netmask=master_netmask, 315 master_netdev=master_netdev, 316 file_storage_dir=opts.file_storage_dir, 317 shared_file_storage_dir=opts.shared_file_storage_dir, 318 gluster_storage_dir=opts.gluster_storage_dir, 319 enabled_hypervisors=hvlist, 320 hvparams=hvparams, 321 beparams=beparams, 322 nicparams=nicparams, 323 ndparams=ndparams, 324 diskparams=diskparams, 325 ipolicy=ipolicy, 326 candidate_pool_size=opts.candidate_pool_size, 327 modify_etc_hosts=opts.modify_etc_hosts, 328 modify_ssh_setup=opts.modify_ssh_setup, 329 maintain_node_health=opts.maintain_node_health, 330 drbd_helper=drbd_helper, 331 uid_pool=uid_pool, 332 default_iallocator=opts.default_iallocator, 333 default_iallocator_params=default_ialloc_params, 334 primary_ip_version=primary_ip_version, 335 prealloc_wipe_disks=opts.prealloc_wipe_disks, 336 use_external_mip_script=external_ip_setup_script, 337 hv_state=hv_state, 338 disk_state=disk_state, 339 enabled_disk_templates=enabled_disk_templates, 340 install_image=install_image, 341 zeroing_image=zeroing_image, 342 compression_tools=compression_tools, 343 enabled_user_shutdown=enabled_user_shutdown, 344 ssh_key_type=ssh_key_type, 345 ssh_key_bits=ssh_key_bits, 346 ) 347 op = opcodes.OpClusterPostInit() 348 SubmitOpCode(op, opts=opts) 349 return 0
350
351 352 @UsesRPC 353 -def DestroyCluster(opts, args):
354 """Destroy the cluster. 355 356 @param opts: the command line options selected by the user 357 @type args: list 358 @param args: should be an empty list 359 @rtype: int 360 @return: the desired exit code 361 362 """ 363 if not opts.yes_do_it: 364 ToStderr("Destroying a cluster is irreversible. If you really want" 365 " destroy this cluster, supply the --yes-do-it option.") 366 return 1 367 368 op = opcodes.OpClusterDestroy() 369 master_uuid = SubmitOpCode(op, opts=opts) 370 # if we reached this, the opcode didn't fail; we can proceed to 371 # shutdown all the daemons 372 bootstrap.FinalizeClusterDestroy(master_uuid) 373 return 0
374
375 376 -def RenameCluster(opts, args):
377 """Rename the cluster. 378 379 @param opts: the command line options selected by the user 380 @type args: list 381 @param args: should contain only one element, the new cluster name 382 @rtype: int 383 @return: the desired exit code 384 385 """ 386 cl = GetClient() 387 388 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"]) 389 390 new_name = args[0] 391 if not opts.force: 392 usertext = ("This will rename the cluster from '%s' to '%s'. If you are" 393 " connected over the network to the cluster name, the" 394 " operation is very dangerous as the IP address will be" 395 " removed from the node and the change may not go through." 396 " Continue?") % (cluster_name, new_name) 397 if not AskUser(usertext): 398 return 1 399 400 op = opcodes.OpClusterRename(name=new_name) 401 result = SubmitOpCode(op, opts=opts, cl=cl) 402 403 if result: 404 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result) 405 406 return 0
407
408 409 -def ActivateMasterIp(opts, args):
410 """Activates the master IP. 411 412 """ 413 op = opcodes.OpClusterActivateMasterIp() 414 SubmitOpCode(op) 415 return 0
416
417 418 -def DeactivateMasterIp(opts, args):
419 """Deactivates the master IP. 420 421 """ 422 if not opts.confirm: 423 usertext = ("This will disable the master IP. All the open connections to" 424 " the master IP will be closed. To reach the master you will" 425 " need to use its node IP." 426 " Continue?") 427 if not AskUser(usertext): 428 return 1 429 430 op = opcodes.OpClusterDeactivateMasterIp() 431 SubmitOpCode(op) 432 return 0
433
434 435 -def RedistributeConfig(opts, args):
436 """Forces push of the cluster configuration. 437 438 @param opts: the command line options selected by the user 439 @type args: list 440 @param args: empty list 441 @rtype: int 442 @return: the desired exit code 443 444 """ 445 op = opcodes.OpClusterRedistConf() 446 if opts.yes_do_it: 447 SubmitOpCodeToDrainedQueue(op) 448 else: 449 SubmitOrSend(op, opts) 450 return 0
451
452 453 -def ShowClusterVersion(opts, args):
454 """Write version of ganeti software to the standard output. 455 456 @param opts: the command line options selected by the user 457 @type args: list 458 @param args: should be an empty list 459 @rtype: int 460 @return: the desired exit code 461 462 """ 463 cl = GetClient() 464 result = cl.QueryClusterInfo() 465 ToStdout("Software version: %s", result["software_version"]) 466 ToStdout("Internode protocol: %s", result["protocol_version"]) 467 ToStdout("Configuration format: %s", result["config_version"]) 468 ToStdout("OS api version: %s", result["os_api_version"]) 469 ToStdout("Export interface: %s", result["export_version"]) 470 ToStdout("VCS version: %s", result["vcs_version"]) 471 return 0
472
473 474 -def ShowClusterMaster(opts, args):
475 """Write name of master node to the standard output. 476 477 @param opts: the command line options selected by the user 478 @type args: list 479 @param args: should be an empty list 480 @rtype: int 481 @return: the desired exit code 482 483 """ 484 master = bootstrap.GetMaster() 485 ToStdout(master) 486 return 0
487
488 489 -def _FormatGroupedParams(paramsdict, roman=False):
490 """Format Grouped parameters (be, nic, disk) by group. 491 492 @type paramsdict: dict of dicts 493 @param paramsdict: {group: {param: value, ...}, ...} 494 @rtype: dict of dicts 495 @return: copy of the input dictionaries with strings as values 496 497 """ 498 ret = {} 499 for (item, val) in paramsdict.items(): 500 if isinstance(val, dict): 501 ret[item] = _FormatGroupedParams(val, roman=roman) 502 elif roman and isinstance(val, int): 503 ret[item] = compat.TryToRoman(val) 504 else: 505 ret[item] = str(val) 506 return ret
507
508 509 -def _FormatDataCollectors(paramsdict):
510 """Format Grouped parameters (be, nic, disk) by group. 511 512 @type paramsdict: dict of dicts 513 @param paramsdict: response of QueryClusterInfo 514 @rtype: dict of dicts 515 @return: parameter grouped by data collector 516 517 """ 518 519 enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME] 520 interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME] 521 522 ret = {} 523 for key in enabled: 524 ret[key] = dict(active=enabled[key], 525 interval="%.3fs" % (interval[key] / 1e6)) 526 return ret
527
528 529 -def ShowClusterConfig(opts, args):
530 """Shows cluster information. 531 532 @param opts: the command line options selected by the user 533 @type args: list 534 @param args: should be an empty list 535 @rtype: int 536 @return: the desired exit code 537 538 """ 539 cl = GetClient() 540 result = cl.QueryClusterInfo() 541 542 if result["tags"]: 543 tags = utils.CommaJoin(utils.NiceSort(result["tags"])) 544 else: 545 tags = "(none)" 546 if result["reserved_lvs"]: 547 reserved_lvs = utils.CommaJoin(result["reserved_lvs"]) 548 else: 549 reserved_lvs = "(none)" 550 551 enabled_hv = result["enabled_hypervisors"] 552 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems() 553 if k in enabled_hv) 554 555 info = [ 556 ("Cluster name", result["name"]), 557 ("Cluster UUID", result["uuid"]), 558 559 ("Creation time", utils.FormatTime(result["ctime"])), 560 ("Modification time", utils.FormatTime(result["mtime"])), 561 562 ("Master node", result["master"]), 563 564 ("Architecture (this node)", 565 "%s (%s)" % (result["architecture"][0], result["architecture"][1])), 566 567 ("Tags", tags), 568 569 ("Default hypervisor", result["default_hypervisor"]), 570 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)), 571 572 ("Hypervisor parameters", _FormatGroupedParams(hvparams, 573 opts.roman_integers)), 574 575 ("OS-specific hypervisor parameters", 576 _FormatGroupedParams(result["os_hvp"], opts.roman_integers)), 577 578 ("OS parameters", _FormatGroupedParams(result["osparams"], 579 opts.roman_integers)), 580 581 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])), 582 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])), 583 584 ("Cluster parameters", [ 585 ("candidate pool size", 586 compat.TryToRoman(result["candidate_pool_size"], 587 convert=opts.roman_integers)), 588 ("maximal number of jobs running simultaneously", 589 compat.TryToRoman(result["max_running_jobs"], 590 convert=opts.roman_integers)), 591 ("maximal number of jobs simultaneously tracked by the scheduler", 592 compat.TryToRoman(result["max_tracked_jobs"], 593 convert=opts.roman_integers)), 594 ("mac prefix", result["mac_prefix"]), 595 ("master netdev", result["master_netdev"]), 596 ("master netmask", compat.TryToRoman(result["master_netmask"], 597 opts.roman_integers)), 598 ("use external master IP address setup script", 599 result["use_external_mip_script"]), 600 ("lvm volume group", result["volume_group_name"]), 601 ("lvm reserved volumes", reserved_lvs), 602 ("drbd usermode helper", result["drbd_usermode_helper"]), 603 ("file storage path", result["file_storage_dir"]), 604 ("shared file storage path", result["shared_file_storage_dir"]), 605 ("gluster storage path", result["gluster_storage_dir"]), 606 ("maintenance of node health", result["maintain_node_health"]), 607 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])), 608 ("default instance allocator", result["default_iallocator"]), 609 ("default instance allocator parameters", 610 result["default_iallocator_params"]), 611 ("primary ip version", compat.TryToRoman(result["primary_ip_version"], 612 opts.roman_integers)), 613 ("preallocation wipe disks", result["prealloc_wipe_disks"]), 614 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)), 615 ("ExtStorage Providers search path", 616 utils.CommaJoin(pathutils.ES_SEARCH_PATH)), 617 ("enabled disk templates", 618 utils.CommaJoin(result["enabled_disk_templates"])), 619 ("install image", result["install_image"]), 620 ("instance communication network", 621 result["instance_communication_network"]), 622 ("zeroing image", result["zeroing_image"]), 623 ("compression tools", result["compression_tools"]), 624 ("enabled user shutdown", result["enabled_user_shutdown"]), 625 ("modify ssh setup", result["modify_ssh_setup"]), 626 ("ssh_key_type", result["ssh_key_type"]), 627 ("ssh_key_bits", result["ssh_key_bits"]), 628 ]), 629 630 ("Default node parameters", 631 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)), 632 633 ("Default instance parameters", 634 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)), 635 636 ("Default nic parameters", 637 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)), 638 639 ("Default disk parameters", 640 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)), 641 642 ("Instance policy - limits for instances", 643 FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)), 644 ("Data collectors", _FormatDataCollectors(result)), 645 ] 646 647 PrintGenericInfo(info) 648 return 0
649
650 651 -def ClusterCopyFile(opts, args):
652 """Copy a file from master to some nodes. 653 654 @param opts: the command line options selected by the user 655 @type args: list 656 @param args: should contain only one element, the path of 657 the file to be copied 658 @rtype: int 659 @return: the desired exit code 660 661 """ 662 filename = args[0] 663 filename = os.path.abspath(filename) 664 665 if not os.path.exists(filename): 666 raise errors.OpPrereqError("No such filename '%s'" % filename, 667 errors.ECODE_INVAL) 668 669 cl = GetClient() 670 qcl = GetClient() 671 try: 672 cluster_name = cl.QueryConfigValues(["cluster_name"])[0] 673 674 results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True, 675 secondary_ips=opts.use_replication_network, 676 nodegroup=opts.nodegroup) 677 ports = GetNodesSshPorts(opts.nodes, qcl) 678 finally: 679 cl.Close() 680 qcl.Close() 681 682 srun = ssh.SshRunner(cluster_name) 683 for (node, port) in zip(results, ports): 684 if not srun.CopyFileToNode(node, port, filename): 685 ToStderr("Copy of file %s to node %s:%d failed", filename, node, port) 686 687 return 0
688
689 690 -def RunClusterCommand(opts, args):
691 """Run a command on some nodes. 692 693 @param opts: the command line options selected by the user 694 @type args: list 695 @param args: should contain the command to be run and its arguments 696 @rtype: int 697 @return: the desired exit code 698 699 """ 700 cl = GetClient() 701 qcl = GetClient() 702 703 command = " ".join(args) 704 705 nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup) 706 ports = GetNodesSshPorts(nodes, qcl) 707 708 cluster_name, master_node = cl.QueryConfigValues(["cluster_name", 709 "master_node"]) 710 711 srun = ssh.SshRunner(cluster_name=cluster_name) 712 713 # Make sure master node is at list end 714 if master_node in nodes: 715 nodes.remove(master_node) 716 nodes.append(master_node) 717 718 for (name, port) in zip(nodes, ports): 719 result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port) 720 721 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS: 722 # Do not output anything for successful commands 723 continue 724 725 ToStdout("------------------------------------------------") 726 if opts.show_machine_names: 727 for line in result.output.splitlines(): 728 ToStdout("%s: %s", name, line) 729 else: 730 ToStdout("node: %s", name) 731 ToStdout("%s", result.output) 732 ToStdout("return code = %s", result.exit_code) 733 734 return 0
735
736 737 -def VerifyCluster(opts, args):
738 """Verify integrity of cluster, performing various test on nodes. 739 740 @param opts: the command line options selected by the user 741 @type args: list 742 @param args: should be an empty list 743 @rtype: int 744 @return: the desired exit code 745 746 """ 747 skip_checks = [] 748 749 if opts.skip_nplusone_mem: 750 skip_checks.append(constants.VERIFY_NPLUSONE_MEM) 751 752 cl = GetClient() 753 754 op = opcodes.OpClusterVerify(verbose=opts.verbose, 755 error_codes=opts.error_codes, 756 debug_simulate_errors=opts.simulate_errors, 757 skip_checks=skip_checks, 758 ignore_errors=opts.ignore_errors, 759 group_name=opts.nodegroup, 760 verify_clutter=opts.verify_clutter) 761 result = SubmitOpCode(op, cl=cl, opts=opts) 762 763 # Keep track of submitted jobs 764 jex = JobExecutor(cl=cl, opts=opts) 765 766 for (status, job_id) in result[constants.JOB_IDS_KEY]: 767 jex.AddJobId(None, status, job_id) 768 769 results = jex.GetResults() 770 771 (bad_jobs, bad_results) = \ 772 map(len, 773 # Convert iterators to lists 774 map(list, 775 # Count errors 776 map(compat.partial(itertools.ifilterfalse, bool), 777 # Convert result to booleans in a tuple 778 zip(*((job_success, len(op_results) == 1 and op_results[0]) 779 for (job_success, op_results) in results))))) 780 781 if bad_jobs == 0 and bad_results == 0: 782 rcode = constants.EXIT_SUCCESS 783 else: 784 rcode = constants.EXIT_FAILURE 785 if bad_jobs > 0: 786 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs) 787 788 return rcode
789
790 791 -def VerifyDisks(opts, args):
792 """Verify integrity of cluster disks. 793 794 @param opts: the command line options selected by the user 795 @type args: list 796 @param args: should be an empty list 797 @rtype: int 798 @return: the desired exit code 799 800 """ 801 cl = GetClient() 802 803 op = opcodes.OpClusterVerifyDisks(group_name=opts.nodegroup) 804 805 result = SubmitOpCode(op, cl=cl, opts=opts) 806 807 # Keep track of submitted jobs 808 jex = JobExecutor(cl=cl, opts=opts) 809 810 for (status, job_id) in result[constants.JOB_IDS_KEY]: 811 jex.AddJobId(None, status, job_id) 812 813 retcode = constants.EXIT_SUCCESS 814 815 for (status, result) in jex.GetResults(): 816 if not status: 817 ToStdout("Job failed: %s", result) 818 continue 819 820 ((bad_nodes, instances, missing), ) = result 821 822 for node, text in bad_nodes.items(): 823 ToStdout("Error gathering data on node %s: %s", 824 node, utils.SafeEncode(text[-400:])) 825 retcode = constants.EXIT_FAILURE 826 ToStdout("You need to fix these nodes first before fixing instances") 827 828 for iname in instances: 829 if iname in missing: 830 continue 831 op = opcodes.OpInstanceActivateDisks(instance_name=iname) 832 try: 833 ToStdout("Activating disks for instance '%s'", iname) 834 SubmitOpCode(op, opts=opts, cl=cl) 835 except errors.GenericError, err: 836 nret, msg = FormatError(err) 837 retcode |= nret 838 ToStderr("Error activating disks for instance %s: %s", iname, msg) 839 840 if missing: 841 for iname, ival in missing.iteritems(): 842 all_missing = compat.all(x[0] in bad_nodes for x in ival) 843 if all_missing: 844 ToStdout("Instance %s cannot be verified as it lives on" 845 " broken nodes", iname) 846 else: 847 ToStdout("Instance %s has missing logical volumes:", iname) 848 ival.sort() 849 for node, vol in ival: 850 if node in bad_nodes: 851 ToStdout("\tbroken node %s /dev/%s", node, vol) 852 else: 853 ToStdout("\t%s /dev/%s", node, vol) 854 855 ToStdout("You need to replace or recreate disks for all the above" 856 " instances if this message persists after fixing broken nodes.") 857 retcode = constants.EXIT_FAILURE 858 elif not instances: 859 ToStdout("No disks need to be activated.") 860 861 return retcode
862
863 864 -def RepairDiskSizes(opts, args):
865 """Verify sizes of cluster disks. 866 867 @param opts: the command line options selected by the user 868 @type args: list 869 @param args: optional list of instances to restrict check to 870 @rtype: int 871 @return: the desired exit code 872 873 """ 874 op = opcodes.OpClusterRepairDiskSizes(instances=args) 875 SubmitOpCode(op, opts=opts)
876
877 878 @UsesRPC 879 -def MasterFailover(opts, args):
880 """Failover the master node. 881 882 This command, when run on a non-master node, will cause the current 883 master to cease being master, and the non-master to become new 884 master. 885 886 @param opts: the command line options selected by the user 887 @type args: list 888 @param args: should be an empty list 889 @rtype: int 890 @return: the desired exit code 891 892 """ 893 if not opts.no_voting: 894 # Verify that a majority of nodes is still healthy 895 if not bootstrap.MajorityHealthy(): 896 ToStderr("Master-failover with voting is only possible if the majority" 897 " of nodes is still healthy; use the --no-voting option after" 898 " ensuring by other means that you won't end up in a dual-master" 899 " scenario.") 900 return 1 901 if opts.no_voting and not opts.yes_do_it: 902 usertext = ("This will perform the failover even if most other nodes" 903 " are down, or if this node is outdated. This is dangerous" 904 " as it can lead to a non-consistent cluster. Check the" 905 " gnt-cluster(8) man page before proceeding. Continue?") 906 if not AskUser(usertext): 907 return 1 908 909 rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting) 910 for msg in msgs: 911 ToStderr(msg) 912 return rvlaue
913
914 915 -def MasterPing(opts, args):
916 """Checks if the master is alive. 917 918 @param opts: the command line options selected by the user 919 @type args: list 920 @param args: should be an empty list 921 @rtype: int 922 @return: the desired exit code 923 924 """ 925 try: 926 cl = GetClient() 927 cl.QueryClusterInfo() 928 return 0 929 except Exception: # pylint: disable=W0703 930 return 1
931
932 933 -def SearchTags(opts, args):
934 """Searches the tags on all the cluster. 935 936 @param opts: the command line options selected by the user 937 @type args: list 938 @param args: should contain only one element, the tag pattern 939 @rtype: int 940 @return: the desired exit code 941 942 """ 943 op = opcodes.OpTagsSearch(pattern=args[0]) 944 result = SubmitOpCode(op, opts=opts) 945 if not result: 946 return 1 947 result = list(result) 948 result.sort() 949 for path, tag in result: 950 ToStdout("%s %s", path, tag)
951
952 953 -def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
954 """Reads and verifies an X509 certificate. 955 956 @type cert_filename: string 957 @param cert_filename: the path of the file containing the certificate to 958 verify encoded in PEM format 959 @type verify_private_key: bool 960 @param verify_private_key: whether to verify the private key in addition to 961 the public certificate 962 @rtype: string 963 @return: a string containing the PEM-encoded certificate. 964 965 """ 966 try: 967 pem = utils.ReadFile(cert_filename) 968 except IOError, err: 969 raise errors.X509CertError(cert_filename, 970 "Unable to read certificate: %s" % str(err)) 971 972 try: 973 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem) 974 except Exception, err: 975 raise errors.X509CertError(cert_filename, 976 "Unable to load certificate: %s" % str(err)) 977 978 if verify_private_key: 979 try: 980 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem) 981 except Exception, err: 982 raise errors.X509CertError(cert_filename, 983 "Unable to load private key: %s" % str(err)) 984 985 return pem
986
987 988 # pylint: disable=R0913 989 -def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911 990 rapi_cert_filename, new_spice_cert, spice_cert_filename, 991 spice_cacert_filename, new_confd_hmac_key, new_cds, 992 cds_filename, force, new_node_cert, new_ssh_keys, 993 ssh_key_type, ssh_key_bits, verbose, debug):
994 """Renews cluster certificates, keys and secrets. 995 996 @type new_cluster_cert: bool 997 @param new_cluster_cert: Whether to generate a new cluster certificate 998 @type new_rapi_cert: bool 999 @param new_rapi_cert: Whether to generate a new RAPI certificate 1000 @type rapi_cert_filename: string 1001 @param rapi_cert_filename: Path to file containing new RAPI certificate 1002 @type new_spice_cert: bool 1003 @param new_spice_cert: Whether to generate a new SPICE certificate 1004 @type spice_cert_filename: string 1005 @param spice_cert_filename: Path to file containing new SPICE certificate 1006 @type spice_cacert_filename: string 1007 @param spice_cacert_filename: Path to file containing the certificate of the 1008 CA that signed the SPICE certificate 1009 @type new_confd_hmac_key: bool 1010 @param new_confd_hmac_key: Whether to generate a new HMAC key 1011 @type new_cds: bool 1012 @param new_cds: Whether to generate a new cluster domain secret 1013 @type cds_filename: string 1014 @param cds_filename: Path to file containing new cluster domain secret 1015 @type force: bool 1016 @param force: Whether to ask user for confirmation 1017 @type new_node_cert: bool 1018 @param new_node_cert: Whether to generate new node certificates 1019 @type new_ssh_keys: bool 1020 @param new_ssh_keys: Whether to generate new node SSH keys 1021 @type ssh_key_type: One of L{constants.SSHK_ALL} 1022 @param ssh_key_type: The type of SSH key to be generated 1023 @type ssh_key_bits: int 1024 @param ssh_key_bits: The length of the key to be generated 1025 @type verbose: boolean 1026 @param verbose: Show verbose output 1027 @type debug: boolean 1028 @param debug: Show debug output 1029 1030 """ 1031 ToStdout("Updating certificates now. Running \"gnt-cluster verify\" " 1032 " is recommended after this operation.") 1033 1034 if new_rapi_cert and rapi_cert_filename: 1035 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate" 1036 " options can be specified at the same time.") 1037 return 1 1038 1039 if new_cds and cds_filename: 1040 ToStderr("Only one of the --new-cluster-domain-secret and" 1041 " --cluster-domain-secret options can be specified at" 1042 " the same time.") 1043 return 1 1044 1045 if new_spice_cert and (spice_cert_filename or spice_cacert_filename): 1046 ToStderr("When using --new-spice-certificate, the --spice-certificate" 1047 " and --spice-ca-certificate must not be used.") 1048 return 1 1049 1050 if bool(spice_cacert_filename) ^ bool(spice_cert_filename): 1051 ToStderr("Both --spice-certificate and --spice-ca-certificate must be" 1052 " specified.") 1053 return 1 1054 1055 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None) 1056 try: 1057 if rapi_cert_filename: 1058 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True) 1059 if spice_cert_filename: 1060 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True) 1061 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename) 1062 except errors.X509CertError, err: 1063 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1]) 1064 return 1 1065 1066 if cds_filename: 1067 try: 1068 cds = utils.ReadFile(cds_filename) 1069 except Exception, err: # pylint: disable=W0703 1070 ToStderr("Can't load new cluster domain secret from %s: %s" % 1071 (cds_filename, str(err))) 1072 return 1 1073 else: 1074 cds = None 1075 1076 if not force: 1077 usertext = ("This requires all daemons on all nodes to be restarted and" 1078 " may take some time. Continue?") 1079 if not AskUser(usertext): 1080 return 1 1081 1082 def _RenewCryptoInner(ctx): 1083 ctx.feedback_fn("Updating certificates and keys") 1084 1085 bootstrap.GenerateClusterCrypto(False, 1086 new_rapi_cert, 1087 new_spice_cert, 1088 new_confd_hmac_key, 1089 new_cds, 1090 False, 1091 None, 1092 rapi_cert_pem=rapi_cert_pem, 1093 spice_cert_pem=spice_cert_pem, 1094 spice_cacert_pem=spice_cacert_pem, 1095 cds=cds) 1096 1097 files_to_copy = [] 1098 1099 if new_rapi_cert or rapi_cert_pem: 1100 files_to_copy.append(pathutils.RAPI_CERT_FILE) 1101 1102 if new_spice_cert or spice_cert_pem: 1103 files_to_copy.append(pathutils.SPICE_CERT_FILE) 1104 files_to_copy.append(pathutils.SPICE_CACERT_FILE) 1105 1106 if new_confd_hmac_key: 1107 files_to_copy.append(pathutils.CONFD_HMAC_KEY) 1108 1109 if new_cds or cds: 1110 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE) 1111 1112 if files_to_copy: 1113 for node_name in ctx.nonmaster_nodes: 1114 port = ctx.ssh_ports[node_name] 1115 ctx.feedback_fn("Copying %s to %s:%d" % 1116 (", ".join(files_to_copy), node_name, port)) 1117 for file_name in files_to_copy: 1118 ctx.ssh.CopyFileToNode(node_name, port, file_name)
1119 1120 def _RenewClientCerts(ctx): 1121 ctx.feedback_fn("Updating client SSL certificates.") 1122 1123 cluster_name = ssconf.SimpleStore().GetClusterName() 1124 1125 for node_name in ctx.nonmaster_nodes + [ctx.master_node]: 1126 ssh_port = ctx.ssh_ports[node_name] 1127 data = { 1128 constants.NDS_CLUSTER_NAME: cluster_name, 1129 constants.NDS_NODE_DAEMON_CERTIFICATE: 1130 utils.ReadFile(pathutils.NODED_CERT_FILE), 1131 constants.NDS_NODE_NAME: node_name, 1132 constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE, 1133 } 1134 1135 ssh.RunSshCmdWithStdin( 1136 cluster_name, 1137 node_name, 1138 pathutils.SSL_UPDATE, 1139 ssh_port, 1140 data, 1141 debug=ctx.debug, 1142 verbose=ctx.verbose, 1143 use_cluster_key=True, 1144 ask_key=False, 1145 strict_host_check=True) 1146 1147 # Create a temporary ssconf file using the master's client cert digest 1148 # and the 'bootstrap' keyword to enable distribution of all nodes' digests. 1149 master_digest = utils.GetCertificateDigest() 1150 ssconf_master_candidate_certs_filename = os.path.join( 1151 pathutils.DATA_DIR, "%s%s" % 1152 (constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS)) 1153 utils.WriteFile( 1154 ssconf_master_candidate_certs_filename, 1155 data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest)) 1156 for node_name in ctx.nonmaster_nodes: 1157 port = ctx.ssh_ports[node_name] 1158 ctx.feedback_fn("Copying %s to %s:%d" % 1159 (ssconf_master_candidate_certs_filename, node_name, port)) 1160 ctx.ssh.CopyFileToNode(node_name, port, 1161 ssconf_master_candidate_certs_filename) 1162 1163 # Write the boostrap entry to the config using wconfd. 1164 config_live_lock = utils.livelock.LiveLock("renew_crypto") 1165 cfg = config.GetConfig(None, config_live_lock) 1166 cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest) 1167 cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn) 1168 1169 def _RenewServerAndClientCerts(ctx): 1170 ctx.feedback_fn("Updating the cluster SSL certificate.") 1171 1172 master_name = ssconf.SimpleStore().GetMasterNode() 1173 bootstrap.GenerateClusterCrypto(True, # cluster cert 1174 False, # rapi cert 1175 False, # spice cert 1176 False, # confd hmac key 1177 False, # cds 1178 True, # client cert 1179 master_name) 1180 1181 for node_name in ctx.nonmaster_nodes: 1182 port = ctx.ssh_ports[node_name] 1183 server_cert = pathutils.NODED_CERT_FILE 1184 ctx.feedback_fn("Copying %s to %s:%d" % 1185 (server_cert, node_name, port)) 1186 ctx.ssh.CopyFileToNode(node_name, port, server_cert) 1187 1188 _RenewClientCerts(ctx) 1189 1190 if new_rapi_cert or new_spice_cert or new_confd_hmac_key or new_cds: 1191 RunWhileClusterStopped(ToStdout, _RenewCryptoInner) 1192 1193 # If only node certficates are recreated, call _RenewClientCerts only. 1194 if new_node_cert and not new_cluster_cert: 1195 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD], 1196 _RenewClientCerts, verbose=verbose, debug=debug) 1197 1198 # If the cluster certificate are renewed, the client certificates need 1199 # to be renewed too. 1200 if new_cluster_cert: 1201 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD], 1202 _RenewServerAndClientCerts, verbose=verbose, 1203 debug=debug) 1204 1205 if new_node_cert or new_cluster_cert or new_ssh_keys: 1206 cl = GetClient() 1207 renew_op = opcodes.OpClusterRenewCrypto( 1208 node_certificates=new_node_cert or new_cluster_cert, 1209 renew_ssh_keys=new_ssh_keys, 1210 ssh_key_type=ssh_key_type, 1211 ssh_key_bits=ssh_key_bits) 1212 SubmitOpCode(renew_op, cl=cl) 1213 1214 ToStdout("All requested certificates and keys have been replaced." 1215 " Running \"gnt-cluster verify\" now is recommended.") 1216 1217 return 0 1218
1219 1220 -def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None, 1221 get_online_nodes_fn=GetOnlineNodes, 1222 get_nodes_ssh_ports_fn=GetNodesSshPorts, 1223 get_node_uuids_fn=GetNodeUUIDs, 1224 homedir_fn=None):
1225 """Recreates the 'ganeti_pub_key' file by polling all nodes. 1226 1227 """ 1228 1229 if not cl: 1230 cl = GetClient() 1231 1232 (cluster_name, master_node, modify_ssh_setup, ssh_key_type) = \ 1233 cl.QueryConfigValues(["cluster_name", "master_node", "modify_ssh_setup", 1234 "ssh_key_type"]) 1235 1236 # In case Ganeti is not supposed to modify the SSH setup, simply exit and do 1237 # not update this file. 1238 if not modify_ssh_setup: 1239 return 1240 1241 if os.path.exists(pub_key_file): 1242 utils.CreateBackup(pub_key_file) 1243 utils.RemoveFile(pub_key_file) 1244 1245 ssh.ClearPubKeyFile(pub_key_file) 1246 1247 online_nodes = get_online_nodes_fn([], cl=cl) 1248 ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl) 1249 ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports)) 1250 1251 node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl) 1252 node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids)) 1253 1254 nonmaster_nodes = [name for name in online_nodes 1255 if name != master_node] 1256 1257 _, pub_key_filename, _ = \ 1258 ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False, 1259 kind=ssh_key_type, _homedir_fn=homedir_fn) 1260 1261 # get the key file of the master node 1262 pub_key = utils.ReadFile(pub_key_filename) 1263 ssh.AddPublicKey(node_uuid_map[master_node], pub_key, 1264 key_file=pub_key_file) 1265 1266 # get the key files of all non-master nodes 1267 for node in nonmaster_nodes: 1268 pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name, 1269 ssh_port_map[node], 1270 options.ssh_key_check, 1271 options.ssh_key_check) 1272 ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
1273
1274 1275 -def RenewCrypto(opts, args):
1276 """Renews cluster certificates, keys and secrets. 1277 1278 """ 1279 if opts.new_ssh_keys: 1280 _BuildGanetiPubKeys(opts) 1281 return _RenewCrypto(opts.new_cluster_cert, 1282 opts.new_rapi_cert, 1283 opts.rapi_cert, 1284 opts.new_spice_cert, 1285 opts.spice_cert, 1286 opts.spice_cacert, 1287 opts.new_confd_hmac_key, 1288 opts.new_cluster_domain_secret, 1289 opts.cluster_domain_secret, 1290 opts.force, 1291 opts.new_node_cert, 1292 opts.new_ssh_keys, 1293 opts.ssh_key_type, 1294 opts.ssh_key_bits, 1295 opts.verbose, 1296 opts.debug > 0)
1297
1298 1299 -def _GetEnabledDiskTemplates(opts):
1300 """Determine the list of enabled disk templates. 1301 1302 """ 1303 if opts.enabled_disk_templates: 1304 return opts.enabled_disk_templates.split(",") 1305 else: 1306 return None
1307
1308 1309 -def _GetVgName(opts, enabled_disk_templates):
1310 """Determine the volume group name. 1311 1312 @type enabled_disk_templates: list of strings 1313 @param enabled_disk_templates: cluster-wide enabled disk-templates 1314 1315 """ 1316 # consistency between vg name and enabled disk templates 1317 vg_name = None 1318 if opts.vg_name is not None: 1319 vg_name = opts.vg_name 1320 if enabled_disk_templates: 1321 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates): 1322 ToStdout("You specified a volume group with --vg-name, but you did not" 1323 " enable any of the following lvm-based disk templates: %s" % 1324 utils.CommaJoin(constants.DTS_LVM)) 1325 return vg_name
1326
1327 1328 -def _GetDrbdHelper(opts, enabled_disk_templates):
1329 """Determine the DRBD usermode helper. 1330 1331 """ 1332 drbd_helper = opts.drbd_helper 1333 if enabled_disk_templates: 1334 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 1335 if not drbd_enabled and opts.drbd_helper: 1336 ToStdout("You specified a DRBD usermode helper with " 1337 " --drbd-usermode-helper while DRBD is not enabled.") 1338 return drbd_helper
1339
1340 1341 -def _GetCompressionTools(opts):
1342 """Determine the list of custom compression tools. 1343 1344 """ 1345 if opts.compression_tools: 1346 return opts.compression_tools.split(",") 1347 elif opts.compression_tools is None: 1348 return None # To note the parameter was not provided 1349 else: 1350 return constants.IEC_DEFAULT_TOOLS # Resetting to default
1351
1352 1353 -def SetClusterParams(opts, args):
1354 """Modify the cluster. 1355 1356 @param opts: the command line options selected by the user 1357 @type args: list 1358 @param args: should be an empty list 1359 @rtype: int 1360 @return: the desired exit code 1361 1362 """ 1363 if not (opts.vg_name is not None or 1364 opts.drbd_helper is not None or 1365 opts.enabled_hypervisors or opts.hvparams or 1366 opts.beparams or opts.nicparams or 1367 opts.ndparams or opts.diskparams or 1368 opts.candidate_pool_size is not None or 1369 opts.max_running_jobs is not None or 1370 opts.max_tracked_jobs is not None or 1371 opts.uid_pool is not None or 1372 opts.maintain_node_health is not None or 1373 opts.add_uids is not None or 1374 opts.remove_uids is not None or 1375 opts.default_iallocator is not None or 1376 opts.default_iallocator_params is not None or 1377 opts.reserved_lvs is not None or 1378 opts.mac_prefix is not None or 1379 opts.master_netdev is not None or 1380 opts.master_netmask is not None or 1381 opts.use_external_mip_script is not None or 1382 opts.prealloc_wipe_disks is not None or 1383 opts.hv_state or 1384 opts.enabled_disk_templates or 1385 opts.disk_state or 1386 opts.ipolicy_bounds_specs is not None or 1387 opts.ipolicy_std_specs is not None or 1388 opts.ipolicy_disk_templates is not None or 1389 opts.ipolicy_vcpu_ratio is not None or 1390 opts.ipolicy_spindle_ratio is not None or 1391 opts.modify_etc_hosts is not None or 1392 opts.file_storage_dir is not None or 1393 opts.install_image is not None or 1394 opts.instance_communication_network is not None or 1395 opts.zeroing_image is not None or 1396 opts.shared_file_storage_dir is not None or 1397 opts.compression_tools is not None or 1398 opts.shared_file_storage_dir is not None or 1399 opts.enabled_user_shutdown is not None or 1400 opts.data_collector_interval or 1401 opts.enabled_data_collectors): 1402 ToStderr("Please give at least one of the parameters.") 1403 return 1 1404 1405 enabled_disk_templates = _GetEnabledDiskTemplates(opts) 1406 vg_name = _GetVgName(opts, enabled_disk_templates) 1407 1408 try: 1409 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates) 1410 except errors.OpPrereqError, e: 1411 ToStderr(str(e)) 1412 return 1 1413 1414 hvlist = opts.enabled_hypervisors 1415 if hvlist is not None: 1416 hvlist = hvlist.split(",") 1417 1418 # a list of (name, dict) we can pass directly to dict() (or []) 1419 hvparams = dict(opts.hvparams) 1420 for hv_params in hvparams.values(): 1421 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1422 1423 diskparams = dict(opts.diskparams) 1424 1425 for dt_params in diskparams.values(): 1426 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 1427 1428 beparams = opts.beparams 1429 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) 1430 1431 nicparams = opts.nicparams 1432 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) 1433 1434 ndparams = opts.ndparams 1435 if ndparams is not None: 1436 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) 1437 1438 ipolicy = CreateIPolicyFromOpts( 1439 minmax_ispecs=opts.ipolicy_bounds_specs, 1440 std_ispecs=opts.ipolicy_std_specs, 1441 ipolicy_disk_templates=opts.ipolicy_disk_templates, 1442 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, 1443 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, 1444 ) 1445 1446 mnh = opts.maintain_node_health 1447 1448 uid_pool = opts.uid_pool 1449 if uid_pool is not None: 1450 uid_pool = uidpool.ParseUidPool(uid_pool) 1451 1452 add_uids = opts.add_uids 1453 if add_uids is not None: 1454 add_uids = uidpool.ParseUidPool(add_uids) 1455 1456 remove_uids = opts.remove_uids 1457 if remove_uids is not None: 1458 remove_uids = uidpool.ParseUidPool(remove_uids) 1459 1460 if opts.reserved_lvs is not None: 1461 if opts.reserved_lvs == "": 1462 opts.reserved_lvs = [] 1463 else: 1464 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",") 1465 1466 if opts.master_netmask is not None: 1467 try: 1468 opts.master_netmask = int(opts.master_netmask) 1469 except ValueError: 1470 ToStderr("The --master-netmask option expects an int parameter.") 1471 return 1 1472 1473 ext_ip_script = opts.use_external_mip_script 1474 1475 if opts.disk_state: 1476 disk_state = utils.FlatToDict(opts.disk_state) 1477 else: 1478 disk_state = {} 1479 1480 hv_state = dict(opts.hv_state) 1481 1482 compression_tools = _GetCompressionTools(opts) 1483 1484 enabled_data_collectors = dict( 1485 (k, v.lower().startswith("t")) 1486 for k, v in opts.enabled_data_collectors.items()) 1487 1488 unrecognized_data_collectors = [ 1489 k for k in enabled_data_collectors.keys() 1490 if k not in constants.DATA_COLLECTOR_NAMES] 1491 if unrecognized_data_collectors: 1492 ToStderr("Data collector names not recognized: %s" % 1493 ", ".join(unrecognized_data_collectors)) 1494 1495 try: 1496 data_collector_interval = dict( 1497 (k, long(1e6 * float(v))) 1498 for (k, v) in opts.data_collector_interval.items()) 1499 except ValueError: 1500 ToStderr("Can't transform all values to integers: {}".format( 1501 opts.data_collector_interval)) 1502 return 1 1503 if any(v <= 0 for v in data_collector_interval): 1504 ToStderr("Some interval times where not above zero.") 1505 return 1 1506 1507 op = opcodes.OpClusterSetParams( 1508 vg_name=vg_name, 1509 drbd_helper=drbd_helper, 1510 enabled_hypervisors=hvlist, 1511 hvparams=hvparams, 1512 os_hvp=None, 1513 beparams=beparams, 1514 nicparams=nicparams, 1515 ndparams=ndparams, 1516 diskparams=diskparams, 1517 ipolicy=ipolicy, 1518 candidate_pool_size=opts.candidate_pool_size, 1519 max_running_jobs=opts.max_running_jobs, 1520 max_tracked_jobs=opts.max_tracked_jobs, 1521 maintain_node_health=mnh, 1522 modify_etc_hosts=opts.modify_etc_hosts, 1523 uid_pool=uid_pool, 1524 add_uids=add_uids, 1525 remove_uids=remove_uids, 1526 default_iallocator=opts.default_iallocator, 1527 default_iallocator_params=opts.default_iallocator_params, 1528 prealloc_wipe_disks=opts.prealloc_wipe_disks, 1529 mac_prefix=opts.mac_prefix, 1530 master_netdev=opts.master_netdev, 1531 master_netmask=opts.master_netmask, 1532 reserved_lvs=opts.reserved_lvs, 1533 use_external_mip_script=ext_ip_script, 1534 hv_state=hv_state, 1535 disk_state=disk_state, 1536 enabled_disk_templates=enabled_disk_templates, 1537 force=opts.force, 1538 file_storage_dir=opts.file_storage_dir, 1539 install_image=opts.install_image, 1540 instance_communication_network=opts.instance_communication_network, 1541 zeroing_image=opts.zeroing_image, 1542 shared_file_storage_dir=opts.shared_file_storage_dir, 1543 compression_tools=compression_tools, 1544 enabled_user_shutdown=opts.enabled_user_shutdown, 1545 enabled_data_collectors=enabled_data_collectors, 1546 data_collector_interval=data_collector_interval, 1547 ) 1548 return base.GetResult(None, opts, SubmitOrSend(op, opts))
1549
1550 1551 -def QueueOps(opts, args):
1552 """Queue operations. 1553 1554 @param opts: the command line options selected by the user 1555 @type args: list 1556 @param args: should contain only one element, the subcommand 1557 @rtype: int 1558 @return: the desired exit code 1559 1560 """ 1561 command = args[0] 1562 client = GetClient() 1563 if command in ("drain", "undrain"): 1564 drain_flag = command == "drain" 1565 client.SetQueueDrainFlag(drain_flag) 1566 elif command == "info": 1567 result = client.QueryConfigValues(["drain_flag"]) 1568 if result[0]: 1569 val = "set" 1570 else: 1571 val = "unset" 1572 ToStdout("The drain flag is %s" % val) 1573 else: 1574 raise errors.OpPrereqError("Command '%s' is not valid." % command, 1575 errors.ECODE_INVAL) 1576 1577 return 0
1578
1579 1580 -def _ShowWatcherPause(until):
1581 if until is None or until < time.time(): 1582 ToStdout("The watcher is not paused.") 1583 else: 1584 ToStdout("The watcher is paused until %s.", time.ctime(until))
1585
1586 1587 -def WatcherOps(opts, args):
1588 """Watcher operations. 1589 1590 @param opts: the command line options selected by the user 1591 @type args: list 1592 @param args: should contain only one element, the subcommand 1593 @rtype: int 1594 @return: the desired exit code 1595 1596 """ 1597 command = args[0] 1598 client = GetClient() 1599 1600 if command == "continue": 1601 client.SetWatcherPause(None) 1602 ToStdout("The watcher is no longer paused.") 1603 1604 elif command == "pause": 1605 if len(args) < 2: 1606 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL) 1607 1608 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1])) 1609 _ShowWatcherPause(result) 1610 1611 elif command == "info": 1612 result = client.QueryConfigValues(["watcher_pause"]) 1613 _ShowWatcherPause(result[0]) 1614 1615 else: 1616 raise errors.OpPrereqError("Command '%s' is not valid." % command, 1617 errors.ECODE_INVAL) 1618 1619 return 0
1620
1621 1622 -def _OobPower(opts, node_list, power):
1623 """Puts the node in the list to desired power state. 1624 1625 @param opts: The command line options selected by the user 1626 @param node_list: The list of nodes to operate on 1627 @param power: True if they should be powered on, False otherwise 1628 @return: The success of the operation (none failed) 1629 1630 """ 1631 if power: 1632 command = constants.OOB_POWER_ON 1633 else: 1634 command = constants.OOB_POWER_OFF 1635 1636 op = opcodes.OpOobCommand(node_names=node_list, 1637 command=command, 1638 ignore_status=True, 1639 timeout=opts.oob_timeout, 1640 power_delay=opts.power_delay) 1641 result = SubmitOpCode(op, opts=opts) 1642 errs = 0 1643 for node_result in result: 1644 (node_tuple, data_tuple) = node_result 1645 (_, node_name) = node_tuple 1646 (data_status, _) = data_tuple 1647 if data_status != constants.RS_NORMAL: 1648 assert data_status != constants.RS_UNAVAIL 1649 errs += 1 1650 ToStderr("There was a problem changing power for %s, please investigate", 1651 node_name) 1652 1653 if errs > 0: 1654 return False 1655 1656 return True
1657
1658 1659 -def _InstanceStart(opts, inst_list, start, no_remember=False):
1660 """Puts the instances in the list to desired state. 1661 1662 @param opts: The command line options selected by the user 1663 @param inst_list: The list of instances to operate on 1664 @param start: True if they should be started, False for shutdown 1665 @param no_remember: If the instance state should be remembered 1666 @return: The success of the operation (none failed) 1667 1668 """ 1669 if start: 1670 opcls = opcodes.OpInstanceStartup 1671 text_submit, text_success, text_failed = ("startup", "started", "starting") 1672 else: 1673 opcls = compat.partial(opcodes.OpInstanceShutdown, 1674 timeout=opts.shutdown_timeout, 1675 no_remember=no_remember) 1676 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping") 1677 1678 jex = JobExecutor(opts=opts) 1679 1680 for inst in inst_list: 1681 ToStdout("Submit %s of instance %s", text_submit, inst) 1682 op = opcls(instance_name=inst) 1683 jex.QueueJob(inst, op) 1684 1685 results = jex.GetResults() 1686 bad_cnt = len([1 for (success, _) in results if not success]) 1687 1688 if bad_cnt == 0: 1689 ToStdout("All instances have been %s successfully", text_success) 1690 else: 1691 ToStderr("There were errors while %s instances:\n" 1692 "%d error(s) out of %d instance(s)", text_failed, bad_cnt, 1693 len(results)) 1694 return False 1695 1696 return True
1697
1698 1699 -class _RunWhenNodesReachableHelper(object):
1700 """Helper class to make shared internal state sharing easier. 1701 1702 @ivar success: Indicates if all action_cb calls were successful 1703 1704 """
1705 - def __init__(self, node_list, action_cb, node2ip, port, feedback_fn, 1706 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1707 """Init the object. 1708 1709 @param node_list: The list of nodes to be reachable 1710 @param action_cb: Callback called when a new host is reachable 1711 @type node2ip: dict 1712 @param node2ip: Node to ip mapping 1713 @param port: The port to use for the TCP ping 1714 @param feedback_fn: The function used for feedback 1715 @param _ping_fn: Function to check reachabilty (for unittest use only) 1716 @param _sleep_fn: Function to sleep (for unittest use only) 1717 1718 """ 1719 self.down = set(node_list) 1720 self.up = set() 1721 self.node2ip = node2ip 1722 self.success = True 1723 self.action_cb = action_cb 1724 self.port = port 1725 self.feedback_fn = feedback_fn 1726 self._ping_fn = _ping_fn 1727 self._sleep_fn = _sleep_fn
1728
1729 - def __call__(self):
1730 """When called we run action_cb. 1731 1732 @raises utils.RetryAgain: When there are still down nodes 1733 1734 """ 1735 if not self.action_cb(self.up): 1736 self.success = False 1737 1738 if self.down: 1739 raise utils.RetryAgain() 1740 else: 1741 return self.success
1742
1743 - def Wait(self, secs):
1744 """Checks if a host is up or waits remaining seconds. 1745 1746 @param secs: The secs remaining 1747 1748 """ 1749 start = time.time() 1750 for node in self.down: 1751 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT, 1752 live_port_needed=True): 1753 self.feedback_fn("Node %s became available" % node) 1754 self.up.add(node) 1755 self.down -= self.up 1756 # If we have a node available there is the possibility to run the 1757 # action callback successfully, therefore we don't wait and return 1758 return 1759 1760 self._sleep_fn(max(0.0, start + secs - time.time()))
1761
1762 1763 -def _RunWhenNodesReachable(node_list, action_cb, interval):
1764 """Run action_cb when nodes become reachable. 1765 1766 @param node_list: The list of nodes to be reachable 1767 @param action_cb: Callback called when a new host is reachable 1768 @param interval: The earliest time to retry 1769 1770 """ 1771 client = GetClient() 1772 cluster_info = client.QueryClusterInfo() 1773 if cluster_info["primary_ip_version"] == constants.IP4_VERSION: 1774 family = netutils.IPAddress.family 1775 else: 1776 family = netutils.IP6Address.family 1777 1778 node2ip = dict((node, netutils.GetHostname(node, family=family).ip) 1779 for node in node_list) 1780 1781 port = netutils.GetDaemonPort(constants.NODED) 1782 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port, 1783 ToStdout) 1784 1785 try: 1786 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT, 1787 wait_fn=helper.Wait) 1788 except utils.RetryTimeout: 1789 ToStderr("Time exceeded while waiting for nodes to become reachable" 1790 " again:\n - %s", " - ".join(helper.down)) 1791 return False
1792
1793 1794 -def _MaybeInstanceStartup(opts, inst_map, nodes_online, 1795 _instance_start_fn=_InstanceStart):
1796 """Start the instances conditional based on node_states. 1797 1798 @param opts: The command line options selected by the user 1799 @param inst_map: A dict of inst -> nodes mapping 1800 @param nodes_online: A list of nodes online 1801 @param _instance_start_fn: Callback to start instances (unittest use only) 1802 @return: Success of the operation on all instances 1803 1804 """ 1805 start_inst_list = [] 1806 for (inst, nodes) in inst_map.items(): 1807 if not (nodes - nodes_online): 1808 # All nodes the instance lives on are back online 1809 start_inst_list.append(inst) 1810 1811 for inst in start_inst_list: 1812 del inst_map[inst] 1813 1814 if start_inst_list: 1815 return _instance_start_fn(opts, start_inst_list, True) 1816 1817 return True
1818
1819 1820 -def _EpoOn(opts, full_node_list, node_list, inst_map):
1821 """Does the actual power on. 1822 1823 @param opts: The command line options selected by the user 1824 @param full_node_list: All nodes to operate on (includes nodes not supporting 1825 OOB) 1826 @param node_list: The list of nodes to operate on (all need to support OOB) 1827 @param inst_map: A dict of inst -> nodes mapping 1828 @return: The desired exit status 1829 1830 """ 1831 if node_list and not _OobPower(opts, node_list, False): 1832 ToStderr("Not all nodes seem to get back up, investigate and start" 1833 " manually if needed") 1834 1835 # Wait for the nodes to be back up 1836 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map)) 1837 1838 ToStdout("Waiting until all nodes are available again") 1839 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL): 1840 ToStderr("Please investigate and start stopped instances manually") 1841 return constants.EXIT_FAILURE 1842 1843 return constants.EXIT_SUCCESS
1844
1845 1846 -def _EpoOff(opts, node_list, inst_map):
1847 """Does the actual power off. 1848 1849 @param opts: The command line options selected by the user 1850 @param node_list: The list of nodes to operate on (all need to support OOB) 1851 @param inst_map: A dict of inst -> nodes mapping 1852 @return: The desired exit status 1853 1854 """ 1855 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True): 1856 ToStderr("Please investigate and stop instances manually before continuing") 1857 return constants.EXIT_FAILURE 1858 1859 if not node_list: 1860 return constants.EXIT_SUCCESS 1861 1862 if _OobPower(opts, node_list, False): 1863 return constants.EXIT_SUCCESS 1864 else: 1865 return constants.EXIT_FAILURE
1866
1867 1868 -def Epo(opts, args, qcl=None, _on_fn=_EpoOn, _off_fn=_EpoOff, 1869 _confirm_fn=ConfirmOperation, 1870 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1871 """EPO operations. 1872 1873 @param opts: the command line options selected by the user 1874 @type args: list 1875 @param args: should contain only one element, the subcommand 1876 @rtype: int 1877 @return: the desired exit code 1878 1879 """ 1880 if opts.groups and opts.show_all: 1881 _stderr_fn("Only one of --groups or --all are allowed") 1882 return constants.EXIT_FAILURE 1883 elif args and opts.show_all: 1884 _stderr_fn("Arguments in combination with --all are not allowed") 1885 return constants.EXIT_FAILURE 1886 1887 if qcl is None: 1888 # Query client 1889 qcl = GetClient() 1890 1891 if opts.groups: 1892 node_query_list = \ 1893 itertools.chain(*qcl.QueryGroups(args, ["node_list"], False)) 1894 else: 1895 node_query_list = args 1896 1897 result = qcl.QueryNodes(node_query_list, ["name", "master", "pinst_list", 1898 "sinst_list", "powered", "offline"], 1899 False) 1900 1901 all_nodes = map(compat.fst, result) 1902 node_list = [] 1903 inst_map = {} 1904 for (node, master, pinsts, sinsts, powered, offline) in result: 1905 if not offline: 1906 for inst in (pinsts + sinsts): 1907 if inst in inst_map: 1908 if not master: 1909 inst_map[inst].add(node) 1910 elif master: 1911 inst_map[inst] = set() 1912 else: 1913 inst_map[inst] = set([node]) 1914 1915 if master and opts.on: 1916 # We ignore the master for turning on the machines, in fact we are 1917 # already operating on the master at this point :) 1918 continue 1919 elif master and not opts.show_all: 1920 _stderr_fn("%s is the master node, please do a master-failover to another" 1921 " node not affected by the EPO or use --all if you intend to" 1922 " shutdown the whole cluster", node) 1923 return constants.EXIT_FAILURE 1924 elif powered is None: 1925 _stdout_fn("Node %s does not support out-of-band handling, it can not be" 1926 " handled in a fully automated manner", node) 1927 elif powered == opts.on: 1928 _stdout_fn("Node %s is already in desired power state, skipping", node) 1929 elif not offline or (offline and powered): 1930 node_list.append(node) 1931 1932 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")): 1933 return constants.EXIT_FAILURE 1934 1935 if opts.on: 1936 return _on_fn(opts, all_nodes, node_list, inst_map) 1937 else: 1938 return _off_fn(opts, node_list, inst_map)
1939
1940 1941 -def _GetCreateCommand(info):
1942 buf = StringIO() 1943 buf.write("gnt-cluster init") 1944 PrintIPolicyCommand(buf, info["ipolicy"], False) 1945 buf.write(" ") 1946 buf.write(info["name"]) 1947 return buf.getvalue()
1948
1949 1950 -def ShowCreateCommand(opts, args):
1951 """Shows the command that can be used to re-create the cluster. 1952 1953 Currently it works only for ipolicy specs. 1954 1955 """ 1956 cl = GetClient() 1957 result = cl.QueryClusterInfo() 1958 ToStdout(_GetCreateCommand(result))
1959
1960 1961 -def _RunCommandAndReport(cmd):
1962 """Run a command and report its output, iff it failed. 1963 1964 @param cmd: the command to execute 1965 @type cmd: list 1966 @rtype: bool 1967 @return: False, if the execution failed. 1968 1969 """ 1970 result = utils.RunCmd(cmd) 1971 if result.failed: 1972 ToStderr("Command %s failed: %s; Output %s" % 1973 (cmd, result.fail_reason, result.output)) 1974 return False 1975 return True
1976
1977 1978 -def _VerifyCommand(cmd):
1979 """Verify that a given command succeeds on all online nodes. 1980 1981 As this function is intended to run during upgrades, it 1982 is implemented in such a way that it still works, if all Ganeti 1983 daemons are down. 1984 @param cmd: a list of unquoted shell arguments 1985 @type cmd: list 1986 @rtype: list 1987 @return: the list of node names that are online where 1988 the command failed. 1989 1990 """ 1991 command = utils.text.ShellQuoteArgs([str(val) for val in cmd]) 1992 return _VerifyCommandRaw(command)
1993
1994 1995 -def _VerifyCommandRaw(command):
1996 """Verify that a given command succeeds on all online nodes. 1997 1998 As this function is intended to run during upgrades, it 1999 is implemented in such a way that it still works, if all Ganeti 2000 daemons are down. 2001 @param cmd: a bare string to pass to SSH. The caller must do their 2002 own shell/ssh escaping. 2003 @type cmd: string 2004 @rtype: list 2005 @return: the list of node names that are online where 2006 the command failed. 2007 2008 """ 2009 2010 nodes = ssconf.SimpleStore().GetOnlineNodeList() 2011 master_node = ssconf.SimpleStore().GetMasterNode() 2012 cluster_name = ssconf.SimpleStore().GetClusterName() 2013 2014 # If master node is in 'nodes', make sure master node is at list end 2015 if master_node in nodes: 2016 nodes.remove(master_node) 2017 nodes.append(master_node) 2018 2019 failed = [] 2020 2021 srun = ssh.SshRunner(cluster_name=cluster_name) 2022 for name in nodes: 2023 result = srun.Run(name, constants.SSH_LOGIN_USER, command) 2024 if result.exit_code != 0: 2025 failed.append(name) 2026 2027 return failed
2028
2029 2030 -def _VerifyVersionInstalled(versionstring):
2031 """Verify that the given version of ganeti is installed on all online nodes. 2032 2033 Do nothing, if this is the case, otherwise print an appropriate 2034 message to stderr. 2035 2036 @param versionstring: the version to check for 2037 @type versionstring: string 2038 @rtype: bool 2039 @return: True, if the version is installed on all online nodes 2040 2041 """ 2042 badnodes = _VerifyCommand(["test", "-d", 2043 os.path.join(pathutils.PKGLIBDIR, versionstring)]) 2044 if badnodes: 2045 ToStderr("Ganeti version %s not installed on nodes %s" 2046 % (versionstring, ", ".join(badnodes))) 2047 return False 2048 2049 return True
2050
2051 2052 -def _GetRunning():
2053 """Determine the list of running jobs. 2054 2055 @rtype: list 2056 @return: the number of jobs still running 2057 2058 """ 2059 cl = GetClient() 2060 qfilter = qlang.MakeSimpleFilter("status", 2061 frozenset([constants.JOB_STATUS_RUNNING])) 2062 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
2063
2064 2065 -def _SetGanetiVersionAndEnsure(versionstring):
2066 """Symlink the active version of ganeti to the given versionstring, 2067 and run the ensure-dirs script. 2068 2069 @type versionstring: string 2070 @rtype: list 2071 @return: the list of nodes where the version change failed 2072 2073 """ 2074 2075 # Update symlinks to point at the new version. 2076 if constants.HAS_GNU_LN: 2077 link_lib_cmd = [ 2078 "ln", "-s", "-f", "-T", 2079 os.path.join(pathutils.PKGLIBDIR, versionstring), 2080 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")] 2081 link_share_cmd = [ 2082 "ln", "-s", "-f", "-T", 2083 os.path.join(pathutils.SHAREDIR, versionstring), 2084 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")] 2085 cmds = [link_lib_cmd, link_share_cmd] 2086 else: 2087 rm_lib_cmd = [ 2088 "rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")] 2089 link_lib_cmd = [ 2090 "ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring), 2091 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")] 2092 rm_share_cmd = [ 2093 "rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")] 2094 ln_share_cmd = [ 2095 "ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring), 2096 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")] 2097 cmds = [rm_lib_cmd, link_lib_cmd, rm_share_cmd, ln_share_cmd] 2098 2099 # Run the ensure-dirs script to verify the new version is OK. 2100 cmds.append([pathutils.ENSURE_DIRS]) 2101 2102 # Submit all commands to ssh, exiting on the first failure. 2103 # The command string is a single argument that's given to ssh to submit to 2104 # the remote shell, so it only needs enough escaping to satisfy the remote 2105 # shell, rather than the 2 levels of escaping usually required when using 2106 # ssh from the commandline. 2107 quoted_cmds = [utils.text.ShellQuoteArgs(cmd) for cmd in cmds] 2108 cmd = " && ".join(quoted_cmds) 2109 failed = _VerifyCommandRaw(cmd) 2110 return list(set(failed))
2111
2112 2113 -def _ExecuteCommands(fns):
2114 """Execute a list of functions, in reverse order. 2115 2116 @type fns: list of functions. 2117 @param fns: the functions to be executed. 2118 2119 """ 2120 for fn in reversed(fns): 2121 fn()
2122
2123 2124 -def _GetConfigVersion():
2125 """Determine the version the configuration file currently has. 2126 2127 @rtype: tuple or None 2128 @return: (major, minor, revision) if the version can be determined, 2129 None otherwise 2130 2131 """ 2132 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE)) 2133 try: 2134 config_version = config_data["version"] 2135 except KeyError: 2136 return None 2137 return utils.SplitVersion(config_version)
2138
2139 2140 -def _ReadIntentToUpgrade():
2141 """Read the file documenting the intent to upgrade the cluster. 2142 2143 @rtype: (string, string) or (None, None) 2144 @return: (old version, version to upgrade to), if the file exists, 2145 and (None, None) otherwise. 2146 2147 """ 2148 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE): 2149 return (None, None) 2150 2151 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE) 2152 contents = utils.UnescapeAndSplit(contentstring) 2153 if len(contents) != 3: 2154 # file syntactically mal-formed 2155 return (None, None) 2156 return (contents[0], contents[1])
2157
2158 2159 -def _WriteIntentToUpgrade(version):
2160 """Write file documenting the intent to upgrade the cluster. 2161 2162 @type version: string 2163 @param version: the version we intent to upgrade to 2164 2165 """ 2166 utils.WriteFile(pathutils.INTENT_TO_UPGRADE, 2167 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version, 2168 "%d" % os.getpid()]))
2169
2170 2171 -def _UpgradeBeforeConfigurationChange(versionstring):
2172 """ 2173 Carry out all the tasks necessary for an upgrade that happen before 2174 the configuration file, or Ganeti version, changes. 2175 2176 @type versionstring: string 2177 @param versionstring: the version to upgrade to 2178 @rtype: (bool, list) 2179 @return: tuple of a bool indicating success and a list of rollback tasks 2180 2181 """ 2182 rollback = [] 2183 2184 ToStdoutAndLoginfo("Verifying %s present on all nodes", versionstring) 2185 if not _VerifyVersionInstalled(versionstring): 2186 return (False, rollback) 2187 2188 _WriteIntentToUpgrade(versionstring) 2189 rollback.append( 2190 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE])) 2191 2192 ToStdoutAndLoginfo("Draining queue") 2193 client = GetClient() 2194 client.SetQueueDrainFlag(True) 2195 2196 rollback.append(lambda: GetClient().SetQueueDrainFlag(False)) 2197 2198 if utils.SimpleRetry(0, _GetRunning, 2199 constants.UPGRADE_QUEUE_POLL_INTERVAL, 2200 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT): 2201 ToStderr("Failed to completely empty the queue.") 2202 return (False, rollback) 2203 2204 ToStdoutAndLoginfo("Pausing the watcher for one hour.") 2205 rollback.append(lambda: GetClient().SetWatcherPause(None)) 2206 GetClient().SetWatcherPause(time.time() + 60 * 60) 2207 2208 ToStdoutAndLoginfo("Stopping daemons on master node.") 2209 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]): 2210 return (False, rollback) 2211 2212 ToStdoutAndLoginfo("Stopping daemons everywhere.") 2213 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])) 2214 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"]) 2215 if badnodes: 2216 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),)) 2217 return (False, rollback) 2218 2219 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time()) 2220 ToStdoutAndLoginfo("Backing up configuration as %s", backuptar) 2221 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]): 2222 return (False, rollback) 2223 2224 # Create the archive in a safe manner, as it contains sensitive 2225 # information. 2226 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR) 2227 if not _RunCommandAndReport(["tar", "-cf", tmp_name, 2228 "--exclude=queue/archive", 2229 pathutils.DATA_DIR]): 2230 return (False, rollback) 2231 2232 os.rename(tmp_name, backuptar) 2233 return (True, rollback)
2234
2235 2236 -def _VersionSpecificDowngrade():
2237 """ 2238 Perform any additional downrade tasks that are version specific 2239 and need to be done just after the configuration downgrade. This 2240 function needs to be idempotent, so that it can be redone if the 2241 downgrade procedure gets interrupted after changing the 2242 configuration. 2243 2244 Note that this function has to be reset with every version bump. 2245 2246 @return: True upon success 2247 """ 2248 ToStdoutAndLoginfo("Performing version-specific downgrade tasks.") 2249 2250 return True
2251
2252 2253 -def _SwitchVersionAndConfig(versionstring, downgrade):
2254 """ 2255 Switch to the new Ganeti version and change the configuration, 2256 in correct order. 2257 2258 @type versionstring: string 2259 @param versionstring: the version to change to 2260 @type downgrade: bool 2261 @param downgrade: True, if the configuration should be downgraded 2262 @rtype: (bool, list) 2263 @return: tupe of a bool indicating success, and a list of 2264 additional rollback tasks 2265 2266 """ 2267 rollback = [] 2268 if downgrade: 2269 ToStdoutAndLoginfo("Downgrading configuration") 2270 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]): 2271 return (False, rollback) 2272 # Note: version specific downgrades need to be done before switching 2273 # binaries, so that we still have the knowledgeable binary if the downgrade 2274 # process gets interrupted at this point. 2275 if not _VersionSpecificDowngrade(): 2276 return (False, rollback) 2277 2278 # Configuration change is the point of no return. From then onwards, it is 2279 # safer to push through the up/dowgrade than to try to roll it back. 2280 2281 ToStdoutAndLoginfo("Switching to version %s on all nodes", versionstring) 2282 rollback.append(lambda: _SetGanetiVersionAndEnsure(constants.DIR_VERSION)) 2283 badnodes = _SetGanetiVersionAndEnsure(versionstring) 2284 if badnodes: 2285 ToStderr("Failed to switch to Ganeti version %s on nodes %s" 2286 % (versionstring, ", ".join(badnodes))) 2287 if not downgrade: 2288 return (False, rollback) 2289 2290 # Now that we have changed to the new version of Ganeti we should 2291 # not communicate over luxi any more, as luxi might have changed in 2292 # incompatible ways. Therefore, manually call the corresponding ganeti 2293 # commands using their canonical (version independent) path. 2294 2295 if not downgrade: 2296 ToStdoutAndLoginfo("Upgrading configuration") 2297 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]): 2298 return (False, rollback) 2299 2300 return (True, rollback)
2301
2302 2303 -def _UpgradeAfterConfigurationChange(oldversion):
2304 """ 2305 Carry out the upgrade actions necessary after switching to the new 2306 Ganeti version and updating the configuration. 2307 2308 As this part is run at a time where the new version of Ganeti is already 2309 running, no communication should happen via luxi, as this is not a stable 2310 interface. Also, as the configuration change is the point of no return, 2311 all actions are pushed through, even if some of them fail. 2312 2313 @param oldversion: the version the upgrade started from 2314 @type oldversion: string 2315 @rtype: int 2316 @return: the intended return value 2317 2318 """ 2319 returnvalue = 0 2320 2321 ToStdoutAndLoginfo("Starting daemons everywhere.") 2322 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]) 2323 if badnodes: 2324 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),)) 2325 returnvalue = 1 2326 2327 ToStdoutAndLoginfo("Redistributing the configuration.") 2328 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]): 2329 returnvalue = 1 2330 2331 ToStdoutAndLoginfo("Restarting daemons everywhere.") 2332 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"]) 2333 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])) 2334 if badnodes: 2335 ToStderr("Warning: failed to start daemons on %s." % 2336 (", ".join(list(set(badnodes))),)) 2337 returnvalue = 1 2338 2339 ToStdoutAndLoginfo("Undraining the queue.") 2340 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]): 2341 returnvalue = 1 2342 2343 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE]) 2344 2345 ToStdoutAndLoginfo("Running post-upgrade hooks") 2346 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]): 2347 returnvalue = 1 2348 2349 ToStdoutAndLoginfo("Unpausing the watcher.") 2350 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]): 2351 returnvalue = 1 2352 2353 ToStdoutAndLoginfo("Verifying cluster.") 2354 if not _RunCommandAndReport(["gnt-cluster", "verify"]): 2355 returnvalue = 1 2356 2357 return returnvalue
2358
2359 2360 -def UpgradeGanetiCommand(opts, args):
2361 """Upgrade a cluster to a new ganeti version. 2362 2363 @param opts: the command line options selected by the user 2364 @type args: list 2365 @param args: should be an empty list 2366 @rtype: int 2367 @return: the desired exit code 2368 2369 """ 2370 if ((not opts.resume and opts.to is None) 2371 or (opts.resume and opts.to is not None)): 2372 ToStderr("Precisely one of the options --to and --resume" 2373 " has to be given") 2374 return 1 2375 2376 # If we're not told to resume, verify there is no upgrade 2377 # in progress. 2378 if not opts.resume: 2379 oldversion, versionstring = _ReadIntentToUpgrade() 2380 if versionstring is not None: 2381 # An upgrade is going on; verify whether the target matches 2382 if versionstring == opts.to: 2383 ToStderr("An upgrade is already in progress. Target version matches," 2384 " resuming.") 2385 opts.resume = True 2386 opts.to = None 2387 else: 2388 ToStderr("An upgrade from %s to %s is in progress; use --resume to" 2389 " finish it first" % (oldversion, versionstring)) 2390 return 1 2391 2392 utils.SetupLogging(pathutils.LOG_COMMANDS, 'gnt-cluster upgrade', debug=1) 2393 2394 oldversion = constants.RELEASE_VERSION 2395 2396 if opts.resume: 2397 ssconf.CheckMaster(False) 2398 oldversion, versionstring = _ReadIntentToUpgrade() 2399 if versionstring is None: 2400 return 0 2401 version = utils.version.ParseVersion(versionstring) 2402 if version is None: 2403 return 1 2404 configversion = _GetConfigVersion() 2405 if configversion is None: 2406 return 1 2407 # If the upgrade we resume was an upgrade between compatible 2408 # versions (like 2.10.0 to 2.10.1), the correct configversion 2409 # does not guarantee that the config has been updated. 2410 # However, in the case of a compatible update with the configuration 2411 # not touched, we are running a different dirversion with the same 2412 # config version. 2413 config_already_modified = \ 2414 (utils.IsCorrectConfigVersion(version, configversion) and 2415 not (versionstring != constants.DIR_VERSION and 2416 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR, 2417 constants.CONFIG_REVISION))) 2418 if not config_already_modified: 2419 # We have to start from the beginning; however, some daemons might have 2420 # already been stopped, so the only way to get into a well-defined state 2421 # is by starting all daemons again. 2422 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]) 2423 else: 2424 versionstring = opts.to 2425 config_already_modified = False 2426 version = utils.version.ParseVersion(versionstring) 2427 if version is None: 2428 ToStderr("Could not parse version string %s" % versionstring) 2429 return 1 2430 2431 msg = utils.version.UpgradeRange(version) 2432 if msg is not None: 2433 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg)) 2434 return 1 2435 2436 if not config_already_modified: 2437 success, rollback = _UpgradeBeforeConfigurationChange(versionstring) 2438 if not success: 2439 _ExecuteCommands(rollback) 2440 return 1 2441 else: 2442 rollback = [] 2443 2444 downgrade = utils.version.ShouldCfgdowngrade(version) 2445 2446 success, additionalrollback = \ 2447 _SwitchVersionAndConfig(versionstring, downgrade) 2448 if not success: 2449 rollback.extend(additionalrollback) 2450 _ExecuteCommands(rollback) 2451 return 1 2452 2453 return _UpgradeAfterConfigurationChange(oldversion)
2454 2455 2456 commands = { 2457 "init": ( 2458 InitCluster, [ArgHost(min=1, max=1)], 2459 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT, 2460 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, 2461 NIC_PARAMS_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT, 2462 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, 2463 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, DEFAULT_IALLOCATOR_PARAMS_OPT, 2464 PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, 2465 GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, 2466 HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT, 2467 IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT, 2468 ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT, 2469 ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT, 2470 ] 2471 + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, 2472 "[opts...] <cluster_name>", "Initialises a new cluster configuration"), 2473 "destroy": ( 2474 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT], 2475 "", "Destroy cluster"), 2476 "rename": ( 2477 RenameCluster, [ArgHost(min=1, max=1)], 2478 [FORCE_OPT, DRY_RUN_OPT], 2479 "<new_name>", 2480 "Renames the cluster"), 2481 "redist-conf": ( 2482 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS + 2483 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION], 2484 "", "Forces a push of the configuration file and ssconf files" 2485 " to the nodes in the cluster"), 2486 "verify": ( 2487 VerifyCluster, ARGS_NONE, 2488 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT, 2489 DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT, 2490 VERIFY_CLUTTER_OPT], 2491 "", "Does a check on the cluster configuration"), 2492 "verify-disks": ( 2493 VerifyDisks, ARGS_NONE, [PRIORITY_OPT, NODEGROUP_OPT], 2494 "", "Does a check on the cluster disk status"), 2495 "repair-disk-sizes": ( 2496 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT], 2497 "[instance...]", "Updates mismatches in recorded disk sizes"), 2498 "master-failover": ( 2499 MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER], 2500 "", "Makes the current node the master"), 2501 "master-ping": ( 2502 MasterPing, ARGS_NONE, [], 2503 "", "Checks if the master is alive"), 2504 "version": ( 2505 ShowClusterVersion, ARGS_NONE, [], 2506 "", "Shows the cluster version"), 2507 "getmaster": ( 2508 ShowClusterMaster, ARGS_NONE, [], 2509 "", "Shows the cluster master"), 2510 "copyfile": ( 2511 ClusterCopyFile, [ArgFile(min=1, max=1)], 2512 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT], 2513 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"), 2514 "command": ( 2515 RunClusterCommand, [ArgCommand(min=1)], 2516 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT], 2517 "[-n node...] <command>", "Runs a command on all (or only some) nodes"), 2518 "info": ( 2519 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT], 2520 "[--roman]", "Show cluster configuration"), 2521 "list-tags": ( 2522 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"), 2523 "add-tags": ( 2524 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, 2525 "tag...", "Add tags to the cluster"), 2526 "remove-tags": ( 2527 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, 2528 "tag...", "Remove tags from the cluster"), 2529 "search-tags": ( 2530 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "", 2531 "Searches the tags on all objects on" 2532 " the cluster for a given pattern (regex)"), 2533 "queue": ( 2534 QueueOps, 2535 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])], 2536 [], "drain|undrain|info", "Change queue properties"), 2537 "watcher": ( 2538 WatcherOps, 2539 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]), 2540 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])], 2541 [], 2542 "{pause <timespec>|continue|info}", "Change watcher properties"), 2543 "modify": ( 2544 SetClusterParams, ARGS_NONE, 2545 [FORCE_OPT, 2546 BACKEND_OPT, CP_SIZE_OPT, RQL_OPT, MAX_TRACK_OPT, INSTALL_IMAGE_OPT, 2547 INSTANCE_COMMUNICATION_NETWORK_OPT, ENABLED_HV_OPT, HVLIST_OPT, 2548 MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT, 2549 VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, 2550 REMOVE_UIDS_OPT, DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, 2551 DEFAULT_IALLOCATOR_PARAMS_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, 2552 PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, 2553 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + 2554 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT, 2555 ENABLED_USER_SHUTDOWN_OPT] + 2556 INSTANCE_POLICY_OPTS + 2557 [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT, 2558 COMPRESSION_TOOLS_OPT] + 2559 [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT], 2560 "[opts...]", 2561 "Alters the parameters of the cluster"), 2562 "renew-crypto": ( 2563 RenewCrypto, ARGS_NONE, 2564 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT, 2565 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT, 2566 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT, 2567 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT, 2568 NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT, 2569 VERBOSE_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT], 2570 "[opts...]", 2571 "Renews cluster certificates, keys and secrets"), 2572 "epo": ( 2573 Epo, [ArgUnknown()], 2574 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT, 2575 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT], 2576 "[opts...] [args]", 2577 "Performs an emergency power-off on given args"), 2578 "activate-master-ip": ( 2579 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"), 2580 "deactivate-master-ip": ( 2581 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "", 2582 "Deactivates the master IP"), 2583 "show-ispecs-cmd": ( 2584 ShowCreateCommand, ARGS_NONE, [], "", 2585 "Show the command line to re-create the cluster"), 2586 "upgrade": ( 2587 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "", 2588 "Upgrade (or downgrade) to a new Ganeti version"), 2589 } 2590 2591 2592 #: dictionary with aliases for commands 2593 aliases = { 2594 "masterfailover": "master-failover", 2595 "show": "info", 2596 }
2597 2598 2599 -def Main():
2600 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}, 2601 aliases=aliases)
2602