Package ganeti :: Package client :: Module gnt_cluster
[hide private]
[frames] | no frames]

Source Code for Module ganeti.client.gnt_cluster

   1  # 
   2  # 
   3   
   4  # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013 Google Inc. 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are 
   9  # met: 
  10  # 
  11  # 1. Redistributions of source code must retain the above copyright notice, 
  12  # this list of conditions and the following disclaimer. 
  13  # 
  14  # 2. Redistributions in binary form must reproduce the above copyright 
  15  # notice, this list of conditions and the following disclaimer in the 
  16  # documentation and/or other materials provided with the distribution. 
  17  # 
  18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
  19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
  20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
  22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
  25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
  26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
  27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
  28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  29   
  30  """Cluster related commands""" 
  31   
  32  # pylint: disable=W0401,W0613,W0614,C0103 
  33  # W0401: Wildcard import ganeti.cli 
  34  # W0613: Unused argument, since all functions follow the same API 
  35  # W0614: Unused import %s from wildcard import (since we need cli) 
  36  # C0103: Invalid name gnt-cluster 
  37   
  38  from cStringIO import StringIO 
  39  import os 
  40  import time 
  41  import OpenSSL 
  42  import tempfile 
  43  import itertools 
  44   
  45  from ganeti.cli import * 
  46  from ganeti import opcodes 
  47  from ganeti import constants 
  48  from ganeti import errors 
  49  from ganeti import utils 
  50  from ganeti import bootstrap 
  51  from ganeti import ssh 
  52  from ganeti import objects 
  53  from ganeti import uidpool 
  54  from ganeti import compat 
  55  from ganeti import netutils 
  56  from ganeti import ssconf 
  57  from ganeti import pathutils 
  58  from ganeti import serializer 
  59  from ganeti import qlang 
  60   
  61   
  62  ON_OPT = cli_option("--on", default=False, 
  63                      action="store_true", dest="on", 
  64                      help="Recover from an EPO") 
  65   
  66  GROUPS_OPT = cli_option("--groups", default=False, 
  67                          action="store_true", dest="groups", 
  68                          help="Arguments are node groups instead of nodes") 
  69   
  70  FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it", 
  71                              help="Override interactive check for --no-voting", 
  72                              default=False, action="store_true") 
  73   
  74  FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it", 
  75                                  help="Unconditionally distribute the" 
  76                                  " configuration, even if the queue" 
  77                                  " is drained", 
  78                                  default=False, action="store_true") 
  79   
  80  TO_OPT = cli_option("--to", default=None, type="string", 
  81                      help="The Ganeti version to upgrade to") 
  82   
  83  RESUME_OPT = cli_option("--resume", default=False, action="store_true", 
  84                          help="Resume any pending Ganeti upgrades") 
  85   
  86  _EPO_PING_INTERVAL = 30 # 30 seconds between pings 
  87  _EPO_PING_TIMEOUT = 1 # 1 second 
  88  _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes 
89 90 91 -def _CheckNoLvmStorageOptDeprecated(opts):
92 """Checks if the legacy option '--no-lvm-storage' is used. 93 94 """ 95 if not opts.lvm_storage: 96 ToStderr("The option --no-lvm-storage is no longer supported. If you want" 97 " to disable lvm-based storage cluster-wide, use the option" 98 " --enabled-disk-templates to disable all of these lvm-base disk " 99 " templates: %s" % 100 utils.CommaJoin(constants.DTS_LVM)) 101 return 1
102
103 104 -def _InitEnabledDiskTemplates(opts):
105 """Initialize the list of enabled disk templates. 106 107 """ 108 if opts.enabled_disk_templates: 109 return opts.enabled_disk_templates.split(",") 110 else: 111 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
112
113 114 -def _InitVgName(opts, enabled_disk_templates):
115 """Initialize the volume group name. 116 117 @type enabled_disk_templates: list of strings 118 @param enabled_disk_templates: cluster-wide enabled disk templates 119 120 """ 121 vg_name = None 122 if opts.vg_name is not None: 123 vg_name = opts.vg_name 124 if vg_name: 125 if not utils.IsLvmEnabled(enabled_disk_templates): 126 ToStdout("You specified a volume group with --vg-name, but you did not" 127 " enable any disk template that uses lvm.") 128 elif utils.IsLvmEnabled(enabled_disk_templates): 129 raise errors.OpPrereqError( 130 "LVM disk templates are enabled, but vg name not set.") 131 elif utils.IsLvmEnabled(enabled_disk_templates): 132 vg_name = constants.DEFAULT_VG 133 return vg_name
134
135 136 -def _InitDrbdHelper(opts, enabled_disk_templates):
137 """Initialize the DRBD usermode helper. 138 139 """ 140 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 141 142 if not drbd_enabled and opts.drbd_helper is not None: 143 ToStdout("Note: You specified a DRBD usermode helper, while DRBD storage" 144 " is not enabled.") 145 146 if drbd_enabled: 147 if opts.drbd_helper is None: 148 return constants.DEFAULT_DRBD_HELPER 149 if opts.drbd_helper == '': 150 raise errors.OpPrereqError( 151 "Unsetting the drbd usermode helper while enabling DRBD is not" 152 " allowed.") 153 154 return opts.drbd_helper
155
156 157 @UsesRPC 158 -def InitCluster(opts, args):
159 """Initialize the cluster. 160 161 @param opts: the command line options selected by the user 162 @type args: list 163 @param args: should contain only one element, the desired 164 cluster name 165 @rtype: int 166 @return: the desired exit code 167 168 """ 169 if _CheckNoLvmStorageOptDeprecated(opts): 170 return 1 171 172 enabled_disk_templates = _InitEnabledDiskTemplates(opts) 173 174 try: 175 vg_name = _InitVgName(opts, enabled_disk_templates) 176 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates) 177 except errors.OpPrereqError, e: 178 ToStderr(str(e)) 179 return 1 180 181 master_netdev = opts.master_netdev 182 if master_netdev is None: 183 nic_mode = opts.nicparams.get(constants.NIC_MODE, None) 184 if not nic_mode: 185 # default case, use bridging 186 master_netdev = constants.DEFAULT_BRIDGE 187 elif nic_mode == constants.NIC_MODE_OVS: 188 # default ovs is different from default bridge 189 master_netdev = constants.DEFAULT_OVS 190 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS 191 192 hvlist = opts.enabled_hypervisors 193 if hvlist is None: 194 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR 195 hvlist = hvlist.split(",") 196 197 hvparams = dict(opts.hvparams) 198 beparams = opts.beparams 199 nicparams = opts.nicparams 200 201 diskparams = dict(opts.diskparams) 202 203 # check the disk template types here, as we cannot rely on the type check done 204 # by the opcode parameter types 205 diskparams_keys = set(diskparams.keys()) 206 if not (diskparams_keys <= constants.DISK_TEMPLATES): 207 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES) 208 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown)) 209 return 1 210 211 # prepare beparams dict 212 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams) 213 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) 214 215 # prepare nicparams dict 216 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams) 217 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) 218 219 # prepare ndparams dict 220 if opts.ndparams is None: 221 ndparams = dict(constants.NDC_DEFAULTS) 222 else: 223 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams) 224 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) 225 226 # prepare hvparams dict 227 for hv in constants.HYPER_TYPES: 228 if hv not in hvparams: 229 hvparams[hv] = {} 230 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv]) 231 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES) 232 233 # prepare diskparams dict 234 for templ in constants.DISK_TEMPLATES: 235 if templ not in diskparams: 236 diskparams[templ] = {} 237 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ], 238 diskparams[templ]) 239 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES) 240 241 # prepare ipolicy dict 242 ipolicy = CreateIPolicyFromOpts( 243 ispecs_mem_size=opts.ispecs_mem_size, 244 ispecs_cpu_count=opts.ispecs_cpu_count, 245 ispecs_disk_count=opts.ispecs_disk_count, 246 ispecs_disk_size=opts.ispecs_disk_size, 247 ispecs_nic_count=opts.ispecs_nic_count, 248 minmax_ispecs=opts.ipolicy_bounds_specs, 249 std_ispecs=opts.ipolicy_std_specs, 250 ipolicy_disk_templates=opts.ipolicy_disk_templates, 251 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, 252 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, 253 fill_all=True) 254 255 if opts.candidate_pool_size is None: 256 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT 257 258 if opts.mac_prefix is None: 259 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX 260 261 uid_pool = opts.uid_pool 262 if uid_pool is not None: 263 uid_pool = uidpool.ParseUidPool(uid_pool) 264 265 if opts.prealloc_wipe_disks is None: 266 opts.prealloc_wipe_disks = False 267 268 external_ip_setup_script = opts.use_external_mip_script 269 if external_ip_setup_script is None: 270 external_ip_setup_script = False 271 272 try: 273 primary_ip_version = int(opts.primary_ip_version) 274 except (ValueError, TypeError), err: 275 ToStderr("Invalid primary ip version value: %s" % str(err)) 276 return 1 277 278 master_netmask = opts.master_netmask 279 try: 280 if master_netmask is not None: 281 master_netmask = int(master_netmask) 282 except (ValueError, TypeError), err: 283 ToStderr("Invalid master netmask value: %s" % str(err)) 284 return 1 285 286 if opts.disk_state: 287 disk_state = utils.FlatToDict(opts.disk_state) 288 else: 289 disk_state = {} 290 291 hv_state = dict(opts.hv_state) 292 293 bootstrap.InitCluster(cluster_name=args[0], 294 secondary_ip=opts.secondary_ip, 295 vg_name=vg_name, 296 mac_prefix=opts.mac_prefix, 297 master_netmask=master_netmask, 298 master_netdev=master_netdev, 299 file_storage_dir=opts.file_storage_dir, 300 shared_file_storage_dir=opts.shared_file_storage_dir, 301 enabled_hypervisors=hvlist, 302 hvparams=hvparams, 303 beparams=beparams, 304 nicparams=nicparams, 305 ndparams=ndparams, 306 diskparams=diskparams, 307 ipolicy=ipolicy, 308 candidate_pool_size=opts.candidate_pool_size, 309 modify_etc_hosts=opts.modify_etc_hosts, 310 modify_ssh_setup=opts.modify_ssh_setup, 311 maintain_node_health=opts.maintain_node_health, 312 drbd_helper=drbd_helper, 313 uid_pool=uid_pool, 314 default_iallocator=opts.default_iallocator, 315 primary_ip_version=primary_ip_version, 316 prealloc_wipe_disks=opts.prealloc_wipe_disks, 317 use_external_mip_script=external_ip_setup_script, 318 hv_state=hv_state, 319 disk_state=disk_state, 320 enabled_disk_templates=enabled_disk_templates, 321 ) 322 op = opcodes.OpClusterPostInit() 323 SubmitOpCode(op, opts=opts) 324 return 0
325
326 327 @UsesRPC 328 -def DestroyCluster(opts, args):
329 """Destroy the cluster. 330 331 @param opts: the command line options selected by the user 332 @type args: list 333 @param args: should be an empty list 334 @rtype: int 335 @return: the desired exit code 336 337 """ 338 if not opts.yes_do_it: 339 ToStderr("Destroying a cluster is irreversible. If you really want" 340 " destroy this cluster, supply the --yes-do-it option.") 341 return 1 342 343 op = opcodes.OpClusterDestroy() 344 master_uuid = SubmitOpCode(op, opts=opts) 345 # if we reached this, the opcode didn't fail; we can proceed to 346 # shutdown all the daemons 347 bootstrap.FinalizeClusterDestroy(master_uuid) 348 return 0
349
350 351 -def RenameCluster(opts, args):
352 """Rename the cluster. 353 354 @param opts: the command line options selected by the user 355 @type args: list 356 @param args: should contain only one element, the new cluster name 357 @rtype: int 358 @return: the desired exit code 359 360 """ 361 cl = GetClient() 362 363 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"]) 364 365 new_name = args[0] 366 if not opts.force: 367 usertext = ("This will rename the cluster from '%s' to '%s'. If you are" 368 " connected over the network to the cluster name, the" 369 " operation is very dangerous as the IP address will be" 370 " removed from the node and the change may not go through." 371 " Continue?") % (cluster_name, new_name) 372 if not AskUser(usertext): 373 return 1 374 375 op = opcodes.OpClusterRename(name=new_name) 376 result = SubmitOpCode(op, opts=opts, cl=cl) 377 378 if result: 379 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result) 380 381 return 0
382
383 384 -def ActivateMasterIp(opts, args):
385 """Activates the master IP. 386 387 """ 388 op = opcodes.OpClusterActivateMasterIp() 389 SubmitOpCode(op) 390 return 0
391
392 393 -def DeactivateMasterIp(opts, args):
394 """Deactivates the master IP. 395 396 """ 397 if not opts.confirm: 398 usertext = ("This will disable the master IP. All the open connections to" 399 " the master IP will be closed. To reach the master you will" 400 " need to use its node IP." 401 " Continue?") 402 if not AskUser(usertext): 403 return 1 404 405 op = opcodes.OpClusterDeactivateMasterIp() 406 SubmitOpCode(op) 407 return 0
408
409 410 -def RedistributeConfig(opts, args):
411 """Forces push of the cluster configuration. 412 413 @param opts: the command line options selected by the user 414 @type args: list 415 @param args: empty list 416 @rtype: int 417 @return: the desired exit code 418 419 """ 420 op = opcodes.OpClusterRedistConf() 421 if opts.yes_do_it: 422 SubmitOpCodeToDrainedQueue(op) 423 else: 424 SubmitOrSend(op, opts) 425 return 0
426
427 428 -def ShowClusterVersion(opts, args):
429 """Write version of ganeti software to the standard output. 430 431 @param opts: the command line options selected by the user 432 @type args: list 433 @param args: should be an empty list 434 @rtype: int 435 @return: the desired exit code 436 437 """ 438 cl = GetClient(query=True) 439 result = cl.QueryClusterInfo() 440 ToStdout("Software version: %s", result["software_version"]) 441 ToStdout("Internode protocol: %s", result["protocol_version"]) 442 ToStdout("Configuration format: %s", result["config_version"]) 443 ToStdout("OS api version: %s", result["os_api_version"]) 444 ToStdout("Export interface: %s", result["export_version"]) 445 ToStdout("VCS version: %s", result["vcs_version"]) 446 return 0
447
448 449 -def ShowClusterMaster(opts, args):
450 """Write name of master node to the standard output. 451 452 @param opts: the command line options selected by the user 453 @type args: list 454 @param args: should be an empty list 455 @rtype: int 456 @return: the desired exit code 457 458 """ 459 master = bootstrap.GetMaster() 460 ToStdout(master) 461 return 0
462
463 464 -def _FormatGroupedParams(paramsdict, roman=False):
465 """Format Grouped parameters (be, nic, disk) by group. 466 467 @type paramsdict: dict of dicts 468 @param paramsdict: {group: {param: value, ...}, ...} 469 @rtype: dict of dicts 470 @return: copy of the input dictionaries with strings as values 471 472 """ 473 ret = {} 474 for (item, val) in paramsdict.items(): 475 if isinstance(val, dict): 476 ret[item] = _FormatGroupedParams(val, roman=roman) 477 elif roman and isinstance(val, int): 478 ret[item] = compat.TryToRoman(val) 479 else: 480 ret[item] = str(val) 481 return ret
482
483 484 -def ShowClusterConfig(opts, args):
485 """Shows cluster information. 486 487 @param opts: the command line options selected by the user 488 @type args: list 489 @param args: should be an empty list 490 @rtype: int 491 @return: the desired exit code 492 493 """ 494 cl = GetClient(query=True) 495 result = cl.QueryClusterInfo() 496 497 if result["tags"]: 498 tags = utils.CommaJoin(utils.NiceSort(result["tags"])) 499 else: 500 tags = "(none)" 501 if result["reserved_lvs"]: 502 reserved_lvs = utils.CommaJoin(result["reserved_lvs"]) 503 else: 504 reserved_lvs = "(none)" 505 506 enabled_hv = result["enabled_hypervisors"] 507 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems() 508 if k in enabled_hv) 509 510 info = [ 511 ("Cluster name", result["name"]), 512 ("Cluster UUID", result["uuid"]), 513 514 ("Creation time", utils.FormatTime(result["ctime"])), 515 ("Modification time", utils.FormatTime(result["mtime"])), 516 517 ("Master node", result["master"]), 518 519 ("Architecture (this node)", 520 "%s (%s)" % (result["architecture"][0], result["architecture"][1])), 521 522 ("Tags", tags), 523 524 ("Default hypervisor", result["default_hypervisor"]), 525 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)), 526 527 ("Hypervisor parameters", _FormatGroupedParams(hvparams)), 528 529 ("OS-specific hypervisor parameters", 530 _FormatGroupedParams(result["os_hvp"])), 531 532 ("OS parameters", _FormatGroupedParams(result["osparams"])), 533 534 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])), 535 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])), 536 537 ("Cluster parameters", [ 538 ("candidate pool size", 539 compat.TryToRoman(result["candidate_pool_size"], 540 convert=opts.roman_integers)), 541 ("master netdev", result["master_netdev"]), 542 ("master netmask", result["master_netmask"]), 543 ("use external master IP address setup script", 544 result["use_external_mip_script"]), 545 ("lvm volume group", result["volume_group_name"]), 546 ("lvm reserved volumes", reserved_lvs), 547 ("drbd usermode helper", result["drbd_usermode_helper"]), 548 ("file storage path", result["file_storage_dir"]), 549 ("shared file storage path", result["shared_file_storage_dir"]), 550 ("maintenance of node health", result["maintain_node_health"]), 551 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])), 552 ("default instance allocator", result["default_iallocator"]), 553 ("primary ip version", result["primary_ip_version"]), 554 ("preallocation wipe disks", result["prealloc_wipe_disks"]), 555 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)), 556 ("ExtStorage Providers search path", 557 utils.CommaJoin(pathutils.ES_SEARCH_PATH)), 558 ("enabled disk templates", 559 utils.CommaJoin(result["enabled_disk_templates"])), 560 ]), 561 562 ("Default node parameters", 563 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)), 564 565 ("Default instance parameters", 566 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)), 567 568 ("Default nic parameters", 569 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)), 570 571 ("Default disk parameters", 572 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)), 573 574 ("Instance policy - limits for instances", 575 FormatPolicyInfo(result["ipolicy"], None, True)), 576 ] 577 578 PrintGenericInfo(info) 579 return 0
580
581 582 -def ClusterCopyFile(opts, args):
583 """Copy a file from master to some nodes. 584 585 @param opts: the command line options selected by the user 586 @type args: list 587 @param args: should contain only one element, the path of 588 the file to be copied 589 @rtype: int 590 @return: the desired exit code 591 592 """ 593 filename = args[0] 594 filename = os.path.abspath(filename) 595 596 if not os.path.exists(filename): 597 raise errors.OpPrereqError("No such filename '%s'" % filename, 598 errors.ECODE_INVAL) 599 600 cl = GetClient() 601 602 cluster_name = cl.QueryConfigValues(["cluster_name"])[0] 603 604 results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True, 605 secondary_ips=opts.use_replication_network, 606 nodegroup=opts.nodegroup) 607 608 srun = ssh.SshRunner(cluster_name) 609 for node in results: 610 if not srun.CopyFileToNode(node, filename): 611 ToStderr("Copy of file %s to node %s failed", filename, node) 612 613 return 0
614
615 616 -def RunClusterCommand(opts, args):
617 """Run a command on some nodes. 618 619 @param opts: the command line options selected by the user 620 @type args: list 621 @param args: should contain the command to be run and its arguments 622 @rtype: int 623 @return: the desired exit code 624 625 """ 626 cl = GetClient() 627 628 command = " ".join(args) 629 630 nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup) 631 632 cluster_name, master_node = cl.QueryConfigValues(["cluster_name", 633 "master_node"]) 634 635 srun = ssh.SshRunner(cluster_name=cluster_name) 636 637 # Make sure master node is at list end 638 if master_node in nodes: 639 nodes.remove(master_node) 640 nodes.append(master_node) 641 642 for name in nodes: 643 result = srun.Run(name, constants.SSH_LOGIN_USER, command) 644 645 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS: 646 # Do not output anything for successful commands 647 continue 648 649 ToStdout("------------------------------------------------") 650 if opts.show_machine_names: 651 for line in result.output.splitlines(): 652 ToStdout("%s: %s", name, line) 653 else: 654 ToStdout("node: %s", name) 655 ToStdout("%s", result.output) 656 ToStdout("return code = %s", result.exit_code) 657 658 return 0
659
660 661 -def VerifyCluster(opts, args):
662 """Verify integrity of cluster, performing various test on nodes. 663 664 @param opts: the command line options selected by the user 665 @type args: list 666 @param args: should be an empty list 667 @rtype: int 668 @return: the desired exit code 669 670 """ 671 skip_checks = [] 672 673 if opts.skip_nplusone_mem: 674 skip_checks.append(constants.VERIFY_NPLUSONE_MEM) 675 676 cl = GetClient() 677 678 op = opcodes.OpClusterVerify(verbose=opts.verbose, 679 error_codes=opts.error_codes, 680 debug_simulate_errors=opts.simulate_errors, 681 skip_checks=skip_checks, 682 ignore_errors=opts.ignore_errors, 683 group_name=opts.nodegroup) 684 result = SubmitOpCode(op, cl=cl, opts=opts) 685 686 # Keep track of submitted jobs 687 jex = JobExecutor(cl=cl, opts=opts) 688 689 for (status, job_id) in result[constants.JOB_IDS_KEY]: 690 jex.AddJobId(None, status, job_id) 691 692 results = jex.GetResults() 693 694 (bad_jobs, bad_results) = \ 695 map(len, 696 # Convert iterators to lists 697 map(list, 698 # Count errors 699 map(compat.partial(itertools.ifilterfalse, bool), 700 # Convert result to booleans in a tuple 701 zip(*((job_success, len(op_results) == 1 and op_results[0]) 702 for (job_success, op_results) in results))))) 703 704 if bad_jobs == 0 and bad_results == 0: 705 rcode = constants.EXIT_SUCCESS 706 else: 707 rcode = constants.EXIT_FAILURE 708 if bad_jobs > 0: 709 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs) 710 711 return rcode
712
713 714 -def VerifyDisks(opts, args):
715 """Verify integrity of cluster disks. 716 717 @param opts: the command line options selected by the user 718 @type args: list 719 @param args: should be an empty list 720 @rtype: int 721 @return: the desired exit code 722 723 """ 724 cl = GetClient() 725 726 op = opcodes.OpClusterVerifyDisks() 727 728 result = SubmitOpCode(op, cl=cl, opts=opts) 729 730 # Keep track of submitted jobs 731 jex = JobExecutor(cl=cl, opts=opts) 732 733 for (status, job_id) in result[constants.JOB_IDS_KEY]: 734 jex.AddJobId(None, status, job_id) 735 736 retcode = constants.EXIT_SUCCESS 737 738 for (status, result) in jex.GetResults(): 739 if not status: 740 ToStdout("Job failed: %s", result) 741 continue 742 743 ((bad_nodes, instances, missing), ) = result 744 745 for node, text in bad_nodes.items(): 746 ToStdout("Error gathering data on node %s: %s", 747 node, utils.SafeEncode(text[-400:])) 748 retcode = constants.EXIT_FAILURE 749 ToStdout("You need to fix these nodes first before fixing instances") 750 751 for iname in instances: 752 if iname in missing: 753 continue 754 op = opcodes.OpInstanceActivateDisks(instance_name=iname) 755 try: 756 ToStdout("Activating disks for instance '%s'", iname) 757 SubmitOpCode(op, opts=opts, cl=cl) 758 except errors.GenericError, err: 759 nret, msg = FormatError(err) 760 retcode |= nret 761 ToStderr("Error activating disks for instance %s: %s", iname, msg) 762 763 if missing: 764 for iname, ival in missing.iteritems(): 765 all_missing = compat.all(x[0] in bad_nodes for x in ival) 766 if all_missing: 767 ToStdout("Instance %s cannot be verified as it lives on" 768 " broken nodes", iname) 769 else: 770 ToStdout("Instance %s has missing logical volumes:", iname) 771 ival.sort() 772 for node, vol in ival: 773 if node in bad_nodes: 774 ToStdout("\tbroken node %s /dev/%s", node, vol) 775 else: 776 ToStdout("\t%s /dev/%s", node, vol) 777 778 ToStdout("You need to replace or recreate disks for all the above" 779 " instances if this message persists after fixing broken nodes.") 780 retcode = constants.EXIT_FAILURE 781 elif not instances: 782 ToStdout("No disks need to be activated.") 783 784 return retcode
785
786 787 -def RepairDiskSizes(opts, args):
788 """Verify sizes of cluster disks. 789 790 @param opts: the command line options selected by the user 791 @type args: list 792 @param args: optional list of instances to restrict check to 793 @rtype: int 794 @return: the desired exit code 795 796 """ 797 op = opcodes.OpClusterRepairDiskSizes(instances=args) 798 SubmitOpCode(op, opts=opts)
799
800 801 @UsesRPC 802 -def MasterFailover(opts, args):
803 """Failover the master node. 804 805 This command, when run on a non-master node, will cause the current 806 master to cease being master, and the non-master to become new 807 master. 808 809 @param opts: the command line options selected by the user 810 @type args: list 811 @param args: should be an empty list 812 @rtype: int 813 @return: the desired exit code 814 815 """ 816 if opts.no_voting and not opts.yes_do_it: 817 usertext = ("This will perform the failover even if most other nodes" 818 " are down, or if this node is outdated. This is dangerous" 819 " as it can lead to a non-consistent cluster. Check the" 820 " gnt-cluster(8) man page before proceeding. Continue?") 821 if not AskUser(usertext): 822 return 1 823 824 rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting) 825 for msg in msgs: 826 ToStderr(msg) 827 return rvlaue
828
829 830 -def MasterPing(opts, args):
831 """Checks if the master is alive. 832 833 @param opts: the command line options selected by the user 834 @type args: list 835 @param args: should be an empty list 836 @rtype: int 837 @return: the desired exit code 838 839 """ 840 try: 841 cl = GetClient() 842 cl.QueryClusterInfo() 843 return 0 844 except Exception: # pylint: disable=W0703 845 return 1
846
847 848 -def SearchTags(opts, args):
849 """Searches the tags on all the cluster. 850 851 @param opts: the command line options selected by the user 852 @type args: list 853 @param args: should contain only one element, the tag pattern 854 @rtype: int 855 @return: the desired exit code 856 857 """ 858 op = opcodes.OpTagsSearch(pattern=args[0]) 859 result = SubmitOpCode(op, opts=opts) 860 if not result: 861 return 1 862 result = list(result) 863 result.sort() 864 for path, tag in result: 865 ToStdout("%s %s", path, tag)
866
867 868 -def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
869 """Reads and verifies an X509 certificate. 870 871 @type cert_filename: string 872 @param cert_filename: the path of the file containing the certificate to 873 verify encoded in PEM format 874 @type verify_private_key: bool 875 @param verify_private_key: whether to verify the private key in addition to 876 the public certificate 877 @rtype: string 878 @return: a string containing the PEM-encoded certificate. 879 880 """ 881 try: 882 pem = utils.ReadFile(cert_filename) 883 except IOError, err: 884 raise errors.X509CertError(cert_filename, 885 "Unable to read certificate: %s" % str(err)) 886 887 try: 888 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem) 889 except Exception, err: 890 raise errors.X509CertError(cert_filename, 891 "Unable to load certificate: %s" % str(err)) 892 893 if verify_private_key: 894 try: 895 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem) 896 except Exception, err: 897 raise errors.X509CertError(cert_filename, 898 "Unable to load private key: %s" % str(err)) 899 900 return pem
901
902 903 -def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911 904 rapi_cert_filename, new_spice_cert, spice_cert_filename, 905 spice_cacert_filename, new_confd_hmac_key, new_cds, 906 cds_filename, force):
907 """Renews cluster certificates, keys and secrets. 908 909 @type new_cluster_cert: bool 910 @param new_cluster_cert: Whether to generate a new cluster certificate 911 @type new_rapi_cert: bool 912 @param new_rapi_cert: Whether to generate a new RAPI certificate 913 @type rapi_cert_filename: string 914 @param rapi_cert_filename: Path to file containing new RAPI certificate 915 @type new_spice_cert: bool 916 @param new_spice_cert: Whether to generate a new SPICE certificate 917 @type spice_cert_filename: string 918 @param spice_cert_filename: Path to file containing new SPICE certificate 919 @type spice_cacert_filename: string 920 @param spice_cacert_filename: Path to file containing the certificate of the 921 CA that signed the SPICE certificate 922 @type new_confd_hmac_key: bool 923 @param new_confd_hmac_key: Whether to generate a new HMAC key 924 @type new_cds: bool 925 @param new_cds: Whether to generate a new cluster domain secret 926 @type cds_filename: string 927 @param cds_filename: Path to file containing new cluster domain secret 928 @type force: bool 929 @param force: Whether to ask user for confirmation 930 931 """ 932 if new_rapi_cert and rapi_cert_filename: 933 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate" 934 " options can be specified at the same time.") 935 return 1 936 937 if new_cds and cds_filename: 938 ToStderr("Only one of the --new-cluster-domain-secret and" 939 " --cluster-domain-secret options can be specified at" 940 " the same time.") 941 return 1 942 943 if new_spice_cert and (spice_cert_filename or spice_cacert_filename): 944 ToStderr("When using --new-spice-certificate, the --spice-certificate" 945 " and --spice-ca-certificate must not be used.") 946 return 1 947 948 if bool(spice_cacert_filename) ^ bool(spice_cert_filename): 949 ToStderr("Both --spice-certificate and --spice-ca-certificate must be" 950 " specified.") 951 return 1 952 953 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None) 954 try: 955 if rapi_cert_filename: 956 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True) 957 if spice_cert_filename: 958 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True) 959 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename) 960 except errors.X509CertError, err: 961 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1]) 962 return 1 963 964 if cds_filename: 965 try: 966 cds = utils.ReadFile(cds_filename) 967 except Exception, err: # pylint: disable=W0703 968 ToStderr("Can't load new cluster domain secret from %s: %s" % 969 (cds_filename, str(err))) 970 return 1 971 else: 972 cds = None 973 974 if not force: 975 usertext = ("This requires all daemons on all nodes to be restarted and" 976 " may take some time. Continue?") 977 if not AskUser(usertext): 978 return 1 979 980 def _RenewCryptoInner(ctx): 981 ctx.feedback_fn("Updating certificates and keys") 982 bootstrap.GenerateClusterCrypto(new_cluster_cert, 983 new_rapi_cert, 984 new_spice_cert, 985 new_confd_hmac_key, 986 new_cds, 987 rapi_cert_pem=rapi_cert_pem, 988 spice_cert_pem=spice_cert_pem, 989 spice_cacert_pem=spice_cacert_pem, 990 cds=cds) 991 992 files_to_copy = [] 993 994 if new_cluster_cert: 995 files_to_copy.append(pathutils.NODED_CERT_FILE) 996 997 if new_rapi_cert or rapi_cert_pem: 998 files_to_copy.append(pathutils.RAPI_CERT_FILE) 999 1000 if new_spice_cert or spice_cert_pem: 1001 files_to_copy.append(pathutils.SPICE_CERT_FILE) 1002 files_to_copy.append(pathutils.SPICE_CACERT_FILE) 1003 1004 if new_confd_hmac_key: 1005 files_to_copy.append(pathutils.CONFD_HMAC_KEY) 1006 1007 if new_cds or cds: 1008 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE) 1009 1010 if files_to_copy: 1011 for node_name in ctx.nonmaster_nodes: 1012 ctx.feedback_fn("Copying %s to %s" % 1013 (", ".join(files_to_copy), node_name)) 1014 for file_name in files_to_copy: 1015 ctx.ssh.CopyFileToNode(node_name, file_name)
1016 1017 RunWhileClusterStopped(ToStdout, _RenewCryptoInner) 1018 1019 ToStdout("All requested certificates and keys have been replaced." 1020 " Running \"gnt-cluster verify\" now is recommended.") 1021 1022 return 0 1023
1024 1025 -def RenewCrypto(opts, args):
1026 """Renews cluster certificates, keys and secrets. 1027 1028 """ 1029 return _RenewCrypto(opts.new_cluster_cert, 1030 opts.new_rapi_cert, 1031 opts.rapi_cert, 1032 opts.new_spice_cert, 1033 opts.spice_cert, 1034 opts.spice_cacert, 1035 opts.new_confd_hmac_key, 1036 opts.new_cluster_domain_secret, 1037 opts.cluster_domain_secret, 1038 opts.force)
1039
1040 1041 -def _GetEnabledDiskTemplates(opts):
1042 """Determine the list of enabled disk templates. 1043 1044 """ 1045 if opts.enabled_disk_templates: 1046 return opts.enabled_disk_templates.split(",") 1047 else: 1048 return None
1049
1050 1051 -def _GetVgName(opts, enabled_disk_templates):
1052 """Determine the volume group name. 1053 1054 @type enabled_disk_templates: list of strings 1055 @param enabled_disk_templates: cluster-wide enabled disk-templates 1056 1057 """ 1058 # consistency between vg name and enabled disk templates 1059 vg_name = None 1060 if opts.vg_name is not None: 1061 vg_name = opts.vg_name 1062 if enabled_disk_templates: 1063 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates): 1064 ToStdout("You specified a volume group with --vg-name, but you did not" 1065 " enable any of the following lvm-based disk templates: %s" % 1066 utils.CommaJoin(constants.DTS_LVM)) 1067 return vg_name
1068
1069 1070 -def _GetDrbdHelper(opts, enabled_disk_templates):
1071 """Determine the DRBD usermode helper. 1072 1073 """ 1074 drbd_helper = opts.drbd_helper 1075 if enabled_disk_templates: 1076 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates 1077 if not drbd_enabled and opts.drbd_helper: 1078 ToStdout("You specified a DRBD usermode helper with " 1079 " --drbd-usermode-helper while DRBD is not enabled.") 1080 return drbd_helper
1081
1082 1083 -def SetClusterParams(opts, args):
1084 """Modify the cluster. 1085 1086 @param opts: the command line options selected by the user 1087 @type args: list 1088 @param args: should be an empty list 1089 @rtype: int 1090 @return: the desired exit code 1091 1092 """ 1093 if not (opts.vg_name is not None or 1094 opts.drbd_helper is not None or 1095 opts.enabled_hypervisors or opts.hvparams or 1096 opts.beparams or opts.nicparams or 1097 opts.ndparams or opts.diskparams or 1098 opts.candidate_pool_size is not None or 1099 opts.uid_pool is not None or 1100 opts.maintain_node_health is not None or 1101 opts.add_uids is not None or 1102 opts.remove_uids is not None or 1103 opts.default_iallocator is not None or 1104 opts.reserved_lvs is not None or 1105 opts.master_netdev is not None or 1106 opts.master_netmask is not None or 1107 opts.use_external_mip_script is not None or 1108 opts.prealloc_wipe_disks is not None or 1109 opts.hv_state or 1110 opts.enabled_disk_templates or 1111 opts.disk_state or 1112 opts.ipolicy_bounds_specs is not None or 1113 opts.ipolicy_std_specs is not None or 1114 opts.ipolicy_disk_templates is not None or 1115 opts.ipolicy_vcpu_ratio is not None or 1116 opts.ipolicy_spindle_ratio is not None or 1117 opts.modify_etc_hosts is not None or 1118 opts.file_storage_dir is not None or 1119 opts.shared_file_storage_dir is not None): 1120 ToStderr("Please give at least one of the parameters.") 1121 return 1 1122 1123 if _CheckNoLvmStorageOptDeprecated(opts): 1124 return 1 1125 1126 enabled_disk_templates = _GetEnabledDiskTemplates(opts) 1127 vg_name = _GetVgName(opts, enabled_disk_templates) 1128 1129 try: 1130 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates) 1131 except errors.OpPrereqError, e: 1132 ToStderr(str(e)) 1133 return 1 1134 1135 hvlist = opts.enabled_hypervisors 1136 if hvlist is not None: 1137 hvlist = hvlist.split(",") 1138 1139 # a list of (name, dict) we can pass directly to dict() (or []) 1140 hvparams = dict(opts.hvparams) 1141 for hv_params in hvparams.values(): 1142 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) 1143 1144 diskparams = dict(opts.diskparams) 1145 1146 for dt_params in diskparams.values(): 1147 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES) 1148 1149 beparams = opts.beparams 1150 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT) 1151 1152 nicparams = opts.nicparams 1153 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES) 1154 1155 ndparams = opts.ndparams 1156 if ndparams is not None: 1157 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES) 1158 1159 ipolicy = CreateIPolicyFromOpts( 1160 minmax_ispecs=opts.ipolicy_bounds_specs, 1161 std_ispecs=opts.ipolicy_std_specs, 1162 ipolicy_disk_templates=opts.ipolicy_disk_templates, 1163 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio, 1164 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio, 1165 ) 1166 1167 mnh = opts.maintain_node_health 1168 1169 uid_pool = opts.uid_pool 1170 if uid_pool is not None: 1171 uid_pool = uidpool.ParseUidPool(uid_pool) 1172 1173 add_uids = opts.add_uids 1174 if add_uids is not None: 1175 add_uids = uidpool.ParseUidPool(add_uids) 1176 1177 remove_uids = opts.remove_uids 1178 if remove_uids is not None: 1179 remove_uids = uidpool.ParseUidPool(remove_uids) 1180 1181 if opts.reserved_lvs is not None: 1182 if opts.reserved_lvs == "": 1183 opts.reserved_lvs = [] 1184 else: 1185 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",") 1186 1187 if opts.master_netmask is not None: 1188 try: 1189 opts.master_netmask = int(opts.master_netmask) 1190 except ValueError: 1191 ToStderr("The --master-netmask option expects an int parameter.") 1192 return 1 1193 1194 ext_ip_script = opts.use_external_mip_script 1195 1196 if opts.disk_state: 1197 disk_state = utils.FlatToDict(opts.disk_state) 1198 else: 1199 disk_state = {} 1200 1201 hv_state = dict(opts.hv_state) 1202 1203 op = opcodes.OpClusterSetParams( 1204 vg_name=vg_name, 1205 drbd_helper=drbd_helper, 1206 enabled_hypervisors=hvlist, 1207 hvparams=hvparams, 1208 os_hvp=None, 1209 beparams=beparams, 1210 nicparams=nicparams, 1211 ndparams=ndparams, 1212 diskparams=diskparams, 1213 ipolicy=ipolicy, 1214 candidate_pool_size=opts.candidate_pool_size, 1215 maintain_node_health=mnh, 1216 modify_etc_hosts=opts.modify_etc_hosts, 1217 uid_pool=uid_pool, 1218 add_uids=add_uids, 1219 remove_uids=remove_uids, 1220 default_iallocator=opts.default_iallocator, 1221 prealloc_wipe_disks=opts.prealloc_wipe_disks, 1222 master_netdev=opts.master_netdev, 1223 master_netmask=opts.master_netmask, 1224 reserved_lvs=opts.reserved_lvs, 1225 use_external_mip_script=ext_ip_script, 1226 hv_state=hv_state, 1227 disk_state=disk_state, 1228 enabled_disk_templates=enabled_disk_templates, 1229 force=opts.force, 1230 file_storage_dir=opts.file_storage_dir, 1231 shared_file_storage_dir=opts.shared_file_storage_dir, 1232 ) 1233 SubmitOrSend(op, opts) 1234 return 0
1235
1236 1237 -def QueueOps(opts, args):
1238 """Queue operations. 1239 1240 @param opts: the command line options selected by the user 1241 @type args: list 1242 @param args: should contain only one element, the subcommand 1243 @rtype: int 1244 @return: the desired exit code 1245 1246 """ 1247 command = args[0] 1248 client = GetClient() 1249 if command in ("drain", "undrain"): 1250 drain_flag = command == "drain" 1251 client.SetQueueDrainFlag(drain_flag) 1252 elif command == "info": 1253 result = client.QueryConfigValues(["drain_flag"]) 1254 if result[0]: 1255 val = "set" 1256 else: 1257 val = "unset" 1258 ToStdout("The drain flag is %s" % val) 1259 else: 1260 raise errors.OpPrereqError("Command '%s' is not valid." % command, 1261 errors.ECODE_INVAL) 1262 1263 return 0
1264
1265 1266 -def _ShowWatcherPause(until):
1267 if until is None or until < time.time(): 1268 ToStdout("The watcher is not paused.") 1269 else: 1270 ToStdout("The watcher is paused until %s.", time.ctime(until))
1271
1272 1273 -def WatcherOps(opts, args):
1274 """Watcher operations. 1275 1276 @param opts: the command line options selected by the user 1277 @type args: list 1278 @param args: should contain only one element, the subcommand 1279 @rtype: int 1280 @return: the desired exit code 1281 1282 """ 1283 command = args[0] 1284 client = GetClient() 1285 1286 if command == "continue": 1287 client.SetWatcherPause(None) 1288 ToStdout("The watcher is no longer paused.") 1289 1290 elif command == "pause": 1291 if len(args) < 2: 1292 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL) 1293 1294 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1])) 1295 _ShowWatcherPause(result) 1296 1297 elif command == "info": 1298 result = client.QueryConfigValues(["watcher_pause"]) 1299 _ShowWatcherPause(result[0]) 1300 1301 else: 1302 raise errors.OpPrereqError("Command '%s' is not valid." % command, 1303 errors.ECODE_INVAL) 1304 1305 return 0
1306
1307 1308 -def _OobPower(opts, node_list, power):
1309 """Puts the node in the list to desired power state. 1310 1311 @param opts: The command line options selected by the user 1312 @param node_list: The list of nodes to operate on 1313 @param power: True if they should be powered on, False otherwise 1314 @return: The success of the operation (none failed) 1315 1316 """ 1317 if power: 1318 command = constants.OOB_POWER_ON 1319 else: 1320 command = constants.OOB_POWER_OFF 1321 1322 op = opcodes.OpOobCommand(node_names=node_list, 1323 command=command, 1324 ignore_status=True, 1325 timeout=opts.oob_timeout, 1326 power_delay=opts.power_delay) 1327 result = SubmitOpCode(op, opts=opts) 1328 errs = 0 1329 for node_result in result: 1330 (node_tuple, data_tuple) = node_result 1331 (_, node_name) = node_tuple 1332 (data_status, _) = data_tuple 1333 if data_status != constants.RS_NORMAL: 1334 assert data_status != constants.RS_UNAVAIL 1335 errs += 1 1336 ToStderr("There was a problem changing power for %s, please investigate", 1337 node_name) 1338 1339 if errs > 0: 1340 return False 1341 1342 return True
1343
1344 1345 -def _InstanceStart(opts, inst_list, start, no_remember=False):
1346 """Puts the instances in the list to desired state. 1347 1348 @param opts: The command line options selected by the user 1349 @param inst_list: The list of instances to operate on 1350 @param start: True if they should be started, False for shutdown 1351 @param no_remember: If the instance state should be remembered 1352 @return: The success of the operation (none failed) 1353 1354 """ 1355 if start: 1356 opcls = opcodes.OpInstanceStartup 1357 text_submit, text_success, text_failed = ("startup", "started", "starting") 1358 else: 1359 opcls = compat.partial(opcodes.OpInstanceShutdown, 1360 timeout=opts.shutdown_timeout, 1361 no_remember=no_remember) 1362 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping") 1363 1364 jex = JobExecutor(opts=opts) 1365 1366 for inst in inst_list: 1367 ToStdout("Submit %s of instance %s", text_submit, inst) 1368 op = opcls(instance_name=inst) 1369 jex.QueueJob(inst, op) 1370 1371 results = jex.GetResults() 1372 bad_cnt = len([1 for (success, _) in results if not success]) 1373 1374 if bad_cnt == 0: 1375 ToStdout("All instances have been %s successfully", text_success) 1376 else: 1377 ToStderr("There were errors while %s instances:\n" 1378 "%d error(s) out of %d instance(s)", text_failed, bad_cnt, 1379 len(results)) 1380 return False 1381 1382 return True
1383
1384 1385 -class _RunWhenNodesReachableHelper(object):
1386 """Helper class to make shared internal state sharing easier. 1387 1388 @ivar success: Indicates if all action_cb calls were successful 1389 1390 """
1391 - def __init__(self, node_list, action_cb, node2ip, port, feedback_fn, 1392 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1393 """Init the object. 1394 1395 @param node_list: The list of nodes to be reachable 1396 @param action_cb: Callback called when a new host is reachable 1397 @type node2ip: dict 1398 @param node2ip: Node to ip mapping 1399 @param port: The port to use for the TCP ping 1400 @param feedback_fn: The function used for feedback 1401 @param _ping_fn: Function to check reachabilty (for unittest use only) 1402 @param _sleep_fn: Function to sleep (for unittest use only) 1403 1404 """ 1405 self.down = set(node_list) 1406 self.up = set() 1407 self.node2ip = node2ip 1408 self.success = True 1409 self.action_cb = action_cb 1410 self.port = port 1411 self.feedback_fn = feedback_fn 1412 self._ping_fn = _ping_fn 1413 self._sleep_fn = _sleep_fn
1414
1415 - def __call__(self):
1416 """When called we run action_cb. 1417 1418 @raises utils.RetryAgain: When there are still down nodes 1419 1420 """ 1421 if not self.action_cb(self.up): 1422 self.success = False 1423 1424 if self.down: 1425 raise utils.RetryAgain() 1426 else: 1427 return self.success
1428
1429 - def Wait(self, secs):
1430 """Checks if a host is up or waits remaining seconds. 1431 1432 @param secs: The secs remaining 1433 1434 """ 1435 start = time.time() 1436 for node in self.down: 1437 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT, 1438 live_port_needed=True): 1439 self.feedback_fn("Node %s became available" % node) 1440 self.up.add(node) 1441 self.down -= self.up 1442 # If we have a node available there is the possibility to run the 1443 # action callback successfully, therefore we don't wait and return 1444 return 1445 1446 self._sleep_fn(max(0.0, start + secs - time.time()))
1447
1448 1449 -def _RunWhenNodesReachable(node_list, action_cb, interval):
1450 """Run action_cb when nodes become reachable. 1451 1452 @param node_list: The list of nodes to be reachable 1453 @param action_cb: Callback called when a new host is reachable 1454 @param interval: The earliest time to retry 1455 1456 """ 1457 client = GetClient() 1458 cluster_info = client.QueryClusterInfo() 1459 if cluster_info["primary_ip_version"] == constants.IP4_VERSION: 1460 family = netutils.IPAddress.family 1461 else: 1462 family = netutils.IP6Address.family 1463 1464 node2ip = dict((node, netutils.GetHostname(node, family=family).ip) 1465 for node in node_list) 1466 1467 port = netutils.GetDaemonPort(constants.NODED) 1468 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port, 1469 ToStdout) 1470 1471 try: 1472 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT, 1473 wait_fn=helper.Wait) 1474 except utils.RetryTimeout: 1475 ToStderr("Time exceeded while waiting for nodes to become reachable" 1476 " again:\n - %s", " - ".join(helper.down)) 1477 return False
1478
1479 1480 -def _MaybeInstanceStartup(opts, inst_map, nodes_online, 1481 _instance_start_fn=_InstanceStart):
1482 """Start the instances conditional based on node_states. 1483 1484 @param opts: The command line options selected by the user 1485 @param inst_map: A dict of inst -> nodes mapping 1486 @param nodes_online: A list of nodes online 1487 @param _instance_start_fn: Callback to start instances (unittest use only) 1488 @return: Success of the operation on all instances 1489 1490 """ 1491 start_inst_list = [] 1492 for (inst, nodes) in inst_map.items(): 1493 if not (nodes - nodes_online): 1494 # All nodes the instance lives on are back online 1495 start_inst_list.append(inst) 1496 1497 for inst in start_inst_list: 1498 del inst_map[inst] 1499 1500 if start_inst_list: 1501 return _instance_start_fn(opts, start_inst_list, True) 1502 1503 return True
1504
1505 1506 -def _EpoOn(opts, full_node_list, node_list, inst_map):
1507 """Does the actual power on. 1508 1509 @param opts: The command line options selected by the user 1510 @param full_node_list: All nodes to operate on (includes nodes not supporting 1511 OOB) 1512 @param node_list: The list of nodes to operate on (all need to support OOB) 1513 @param inst_map: A dict of inst -> nodes mapping 1514 @return: The desired exit status 1515 1516 """ 1517 if node_list and not _OobPower(opts, node_list, False): 1518 ToStderr("Not all nodes seem to get back up, investigate and start" 1519 " manually if needed") 1520 1521 # Wait for the nodes to be back up 1522 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map)) 1523 1524 ToStdout("Waiting until all nodes are available again") 1525 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL): 1526 ToStderr("Please investigate and start stopped instances manually") 1527 return constants.EXIT_FAILURE 1528 1529 return constants.EXIT_SUCCESS
1530
1531 1532 -def _EpoOff(opts, node_list, inst_map):
1533 """Does the actual power off. 1534 1535 @param opts: The command line options selected by the user 1536 @param node_list: The list of nodes to operate on (all need to support OOB) 1537 @param inst_map: A dict of inst -> nodes mapping 1538 @return: The desired exit status 1539 1540 """ 1541 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True): 1542 ToStderr("Please investigate and stop instances manually before continuing") 1543 return constants.EXIT_FAILURE 1544 1545 if not node_list: 1546 return constants.EXIT_SUCCESS 1547 1548 if _OobPower(opts, node_list, False): 1549 return constants.EXIT_SUCCESS 1550 else: 1551 return constants.EXIT_FAILURE
1552
1553 1554 -def Epo(opts, args, cl=None, _on_fn=_EpoOn, _off_fn=_EpoOff, 1555 _confirm_fn=ConfirmOperation, 1556 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1557 """EPO operations. 1558 1559 @param opts: the command line options selected by the user 1560 @type args: list 1561 @param args: should contain only one element, the subcommand 1562 @rtype: int 1563 @return: the desired exit code 1564 1565 """ 1566 if opts.groups and opts.show_all: 1567 _stderr_fn("Only one of --groups or --all are allowed") 1568 return constants.EXIT_FAILURE 1569 elif args and opts.show_all: 1570 _stderr_fn("Arguments in combination with --all are not allowed") 1571 return constants.EXIT_FAILURE 1572 1573 if cl is None: 1574 cl = GetClient() 1575 1576 if opts.groups: 1577 node_query_list = \ 1578 itertools.chain(*cl.QueryGroups(args, ["node_list"], False)) 1579 else: 1580 node_query_list = args 1581 1582 result = cl.QueryNodes(node_query_list, ["name", "master", "pinst_list", 1583 "sinst_list", "powered", "offline"], 1584 False) 1585 1586 all_nodes = map(compat.fst, result) 1587 node_list = [] 1588 inst_map = {} 1589 for (node, master, pinsts, sinsts, powered, offline) in result: 1590 if not offline: 1591 for inst in (pinsts + sinsts): 1592 if inst in inst_map: 1593 if not master: 1594 inst_map[inst].add(node) 1595 elif master: 1596 inst_map[inst] = set() 1597 else: 1598 inst_map[inst] = set([node]) 1599 1600 if master and opts.on: 1601 # We ignore the master for turning on the machines, in fact we are 1602 # already operating on the master at this point :) 1603 continue 1604 elif master and not opts.show_all: 1605 _stderr_fn("%s is the master node, please do a master-failover to another" 1606 " node not affected by the EPO or use --all if you intend to" 1607 " shutdown the whole cluster", node) 1608 return constants.EXIT_FAILURE 1609 elif powered is None: 1610 _stdout_fn("Node %s does not support out-of-band handling, it can not be" 1611 " handled in a fully automated manner", node) 1612 elif powered == opts.on: 1613 _stdout_fn("Node %s is already in desired power state, skipping", node) 1614 elif not offline or (offline and powered): 1615 node_list.append(node) 1616 1617 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")): 1618 return constants.EXIT_FAILURE 1619 1620 if opts.on: 1621 return _on_fn(opts, all_nodes, node_list, inst_map) 1622 else: 1623 return _off_fn(opts, node_list, inst_map)
1624
1625 1626 -def _GetCreateCommand(info):
1627 buf = StringIO() 1628 buf.write("gnt-cluster init") 1629 PrintIPolicyCommand(buf, info["ipolicy"], False) 1630 buf.write(" ") 1631 buf.write(info["name"]) 1632 return buf.getvalue()
1633
1634 1635 -def ShowCreateCommand(opts, args):
1636 """Shows the command that can be used to re-create the cluster. 1637 1638 Currently it works only for ipolicy specs. 1639 1640 """ 1641 cl = GetClient(query=True) 1642 result = cl.QueryClusterInfo() 1643 ToStdout(_GetCreateCommand(result))
1644
1645 1646 -def _RunCommandAndReport(cmd):
1647 """Run a command and report its output, iff it failed. 1648 1649 @param cmd: the command to execute 1650 @type cmd: list 1651 @rtype: bool 1652 @return: False, if the execution failed. 1653 1654 """ 1655 result = utils.RunCmd(cmd) 1656 if result.failed: 1657 ToStderr("Command %s failed: %s; Output %s" % 1658 (cmd, result.fail_reason, result.output)) 1659 return False 1660 return True
1661
1662 1663 -def _VerifyCommand(cmd):
1664 """Verify that a given command succeeds on all online nodes. 1665 1666 As this function is intended to run during upgrades, it 1667 is implemented in such a way that it still works, if all Ganeti 1668 daemons are down. 1669 1670 @param cmd: the command to execute 1671 @type cmd: list 1672 @rtype: list 1673 @return: the list of node names that are online where 1674 the command failed. 1675 1676 """ 1677 command = utils.text.ShellQuoteArgs([str(val) for val in cmd]) 1678 1679 nodes = ssconf.SimpleStore().GetOnlineNodeList() 1680 master_node = ssconf.SimpleStore().GetMasterNode() 1681 cluster_name = ssconf.SimpleStore().GetClusterName() 1682 1683 # If master node is in 'nodes', make sure master node is at list end 1684 if master_node in nodes: 1685 nodes.remove(master_node) 1686 nodes.append(master_node) 1687 1688 failed = [] 1689 1690 srun = ssh.SshRunner(cluster_name=cluster_name) 1691 for name in nodes: 1692 result = srun.Run(name, constants.SSH_LOGIN_USER, command) 1693 if result.exit_code != 0: 1694 failed.append(name) 1695 1696 return failed
1697
1698 1699 -def _VerifyVersionInstalled(versionstring):
1700 """Verify that the given version of ganeti is installed on all online nodes. 1701 1702 Do nothing, if this is the case, otherwise print an appropriate 1703 message to stderr. 1704 1705 @param versionstring: the version to check for 1706 @type versionstring: string 1707 @rtype: bool 1708 @return: True, if the version is installed on all online nodes 1709 1710 """ 1711 badnodes = _VerifyCommand(["test", "-d", 1712 os.path.join(pathutils.PKGLIBDIR, versionstring)]) 1713 if badnodes: 1714 ToStderr("Ganeti version %s not installed on nodes %s" 1715 % (versionstring, ", ".join(badnodes))) 1716 return False 1717 1718 return True
1719
1720 1721 -def _GetRunning():
1722 """Determine the list of running jobs. 1723 1724 @rtype: list 1725 @return: the number of jobs still running 1726 1727 """ 1728 cl = GetClient() 1729 qfilter = qlang.MakeSimpleFilter("status", 1730 frozenset([constants.JOB_STATUS_RUNNING])) 1731 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
1732
1733 1734 -def _SetGanetiVersion(versionstring):
1735 """Set the active version of ganeti to the given versionstring 1736 1737 @type versionstring: string 1738 @rtype: list 1739 @return: the list of nodes where the version change failed 1740 1741 """ 1742 failed = [] 1743 if constants.HAS_GNU_LN: 1744 failed.extend(_VerifyCommand( 1745 ["ln", "-s", "-f", "-T", 1746 os.path.join(pathutils.PKGLIBDIR, versionstring), 1747 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")])) 1748 failed.extend(_VerifyCommand( 1749 ["ln", "-s", "-f", "-T", 1750 os.path.join(pathutils.SHAREDIR, versionstring), 1751 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")])) 1752 else: 1753 failed.extend(_VerifyCommand( 1754 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")])) 1755 failed.extend(_VerifyCommand( 1756 ["ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring), 1757 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")])) 1758 failed.extend(_VerifyCommand( 1759 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")])) 1760 failed.extend(_VerifyCommand( 1761 ["ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring), 1762 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")])) 1763 return list(set(failed))
1764
1765 1766 -def _ExecuteCommands(fns):
1767 """Execute a list of functions, in reverse order. 1768 1769 @type fns: list of functions. 1770 @param fns: the functions to be executed. 1771 1772 """ 1773 for fn in reversed(fns): 1774 fn()
1775
1776 1777 -def _GetConfigVersion():
1778 """Determine the version the configuration file currently has. 1779 1780 @rtype: tuple or None 1781 @return: (major, minor, revision) if the version can be determined, 1782 None otherwise 1783 1784 """ 1785 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE)) 1786 try: 1787 config_version = config_data["version"] 1788 except KeyError: 1789 return None 1790 return utils.SplitVersion(config_version)
1791
1792 1793 -def _ReadIntentToUpgrade():
1794 """Read the file documenting the intent to upgrade the cluster. 1795 1796 @rtype: (string, string) or (None, None) 1797 @return: (old version, version to upgrade to), if the file exists, 1798 and (None, None) otherwise. 1799 1800 """ 1801 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE): 1802 return (None, None) 1803 1804 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE) 1805 contents = utils.UnescapeAndSplit(contentstring) 1806 if len(contents) != 3: 1807 # file syntactically mal-formed 1808 return (None, None) 1809 return (contents[0], contents[1])
1810
1811 1812 -def _WriteIntentToUpgrade(version):
1813 """Write file documenting the intent to upgrade the cluster. 1814 1815 @type version: string 1816 @param version: the version we intent to upgrade to 1817 1818 """ 1819 utils.WriteFile(pathutils.INTENT_TO_UPGRADE, 1820 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version, 1821 "%d" % os.getpid()]))
1822
1823 1824 -def _UpgradeBeforeConfigurationChange(versionstring):
1825 """ 1826 Carry out all the tasks necessary for an upgrade that happen before 1827 the configuration file, or Ganeti version, changes. 1828 1829 @type versionstring: string 1830 @param versionstring: the version to upgrade to 1831 @rtype: (bool, list) 1832 @return: tuple of a bool indicating success and a list of rollback tasks 1833 1834 """ 1835 rollback = [] 1836 1837 if not _VerifyVersionInstalled(versionstring): 1838 return (False, rollback) 1839 1840 _WriteIntentToUpgrade(versionstring) 1841 rollback.append( 1842 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE])) 1843 1844 ToStdout("Draining queue") 1845 client = GetClient() 1846 client.SetQueueDrainFlag(True) 1847 1848 rollback.append(lambda: GetClient().SetQueueDrainFlag(False)) 1849 1850 if utils.SimpleRetry(0, _GetRunning, 1851 constants.UPGRADE_QUEUE_POLL_INTERVAL, 1852 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT): 1853 ToStderr("Failed to completely empty the queue.") 1854 return (False, rollback) 1855 1856 ToStdout("Pausing the watcher for one hour.") 1857 rollback.append(lambda: GetClient().SetWatcherPause(None)) 1858 GetClient().SetWatcherPause(time.time() + 60 * 60) 1859 1860 ToStdout("Stopping daemons on master node.") 1861 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]): 1862 return (False, rollback) 1863 1864 if not _VerifyVersionInstalled(versionstring): 1865 utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"]) 1866 return (False, rollback) 1867 1868 ToStdout("Stopping daemons everywhere.") 1869 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])) 1870 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"]) 1871 if badnodes: 1872 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),)) 1873 return (False, rollback) 1874 1875 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time()) 1876 ToStdout("Backing up configuration as %s" % backuptar) 1877 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]): 1878 return (False, rollback) 1879 1880 # Create the archive in a safe manner, as it contains sensitive 1881 # information. 1882 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR) 1883 if not _RunCommandAndReport(["tar", "-cf", tmp_name, 1884 "--exclude=queue/archive", 1885 pathutils.DATA_DIR]): 1886 return (False, rollback) 1887 1888 os.rename(tmp_name, backuptar) 1889 return (True, rollback)
1890
1891 1892 -def _SwitchVersionAndConfig(versionstring, downgrade):
1893 """ 1894 Switch to the new Ganeti version and change the configuration, 1895 in correct order. 1896 1897 @type versionstring: string 1898 @param versionstring: the version to change to 1899 @type downgrade: bool 1900 @param downgrade: True, if the configuration should be downgraded 1901 @rtype: (bool, list) 1902 @return: tupe of a bool indicating success, and a list of 1903 additional rollback tasks 1904 1905 """ 1906 rollback = [] 1907 if downgrade: 1908 ToStdout("Downgrading configuration") 1909 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]): 1910 return (False, rollback) 1911 1912 # Configuration change is the point of no return. From then onwards, it is 1913 # safer to push through the up/dowgrade than to try to roll it back. 1914 1915 ToStdout("Switching to version %s on all nodes" % versionstring) 1916 rollback.append(lambda: _SetGanetiVersion(constants.DIR_VERSION)) 1917 badnodes = _SetGanetiVersion(versionstring) 1918 if badnodes: 1919 ToStderr("Failed to switch to Ganeti version %s on nodes %s" 1920 % (versionstring, ", ".join(badnodes))) 1921 if not downgrade: 1922 return (False, rollback) 1923 1924 # Now that we have changed to the new version of Ganeti we should 1925 # not communicate over luxi any more, as luxi might have changed in 1926 # incompatible ways. Therefore, manually call the corresponding ganeti 1927 # commands using their canonical (version independent) path. 1928 1929 if not downgrade: 1930 ToStdout("Upgrading configuration") 1931 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]): 1932 return (False, rollback) 1933 1934 return (True, rollback)
1935
1936 1937 -def _UpgradeAfterConfigurationChange(oldversion):
1938 """ 1939 Carry out the upgrade actions necessary after switching to the new 1940 Ganeti version and updating the configuration. 1941 1942 As this part is run at a time where the new version of Ganeti is already 1943 running, no communication should happen via luxi, as this is not a stable 1944 interface. Also, as the configuration change is the point of no return, 1945 all actions are pushed trough, even if some of them fail. 1946 1947 @param oldversion: the version the upgrade started from 1948 @type oldversion: string 1949 @rtype: int 1950 @return: the intended return value 1951 1952 """ 1953 returnvalue = 0 1954 1955 ToStdout("Ensuring directories everywhere.") 1956 badnodes = _VerifyCommand([pathutils.ENSURE_DIRS]) 1957 if badnodes: 1958 ToStderr("Warning: failed to ensure directories on %s." % 1959 (", ".join(badnodes))) 1960 returnvalue = 1 1961 1962 ToStdout("Starting daemons everywhere.") 1963 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]) 1964 if badnodes: 1965 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),)) 1966 returnvalue = 1 1967 1968 ToStdout("Redistributing the configuration.") 1969 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]): 1970 returnvalue = 1 1971 1972 ToStdout("Restarting daemons everywhere.") 1973 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"]) 1974 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])) 1975 if badnodes: 1976 ToStderr("Warning: failed to start daemons on %s." % 1977 (", ".join(list(set(badnodes))),)) 1978 returnvalue = 1 1979 1980 ToStdout("Undraining the queue.") 1981 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]): 1982 returnvalue = 1 1983 1984 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE]) 1985 1986 ToStdout("Running post-upgrade hooks") 1987 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]): 1988 returnvalue = 1 1989 1990 ToStdout("Unpausing the watcher.") 1991 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]): 1992 returnvalue = 1 1993 1994 ToStdout("Verifying cluster.") 1995 if not _RunCommandAndReport(["gnt-cluster", "verify"]): 1996 returnvalue = 1 1997 1998 return returnvalue
1999
2000 2001 -def UpgradeGanetiCommand(opts, args):
2002 """Upgrade a cluster to a new ganeti version. 2003 2004 @param opts: the command line options selected by the user 2005 @type args: list 2006 @param args: should be an empty list 2007 @rtype: int 2008 @return: the desired exit code 2009 2010 """ 2011 if ((not opts.resume and opts.to is None) 2012 or (opts.resume and opts.to is not None)): 2013 ToStderr("Precisely one of the options --to and --resume" 2014 " has to be given") 2015 return 1 2016 2017 # If we're not told to resume, verify there is no upgrade 2018 # in progress. 2019 if not opts.resume: 2020 oldversion, versionstring = _ReadIntentToUpgrade() 2021 if versionstring is not None: 2022 # An upgrade is going on; verify whether the target matches 2023 if versionstring == opts.to: 2024 ToStderr("An upgrade is already in progress. Target version matches," 2025 " resuming.") 2026 opts.resume = True 2027 opts.to = None 2028 else: 2029 ToStderr("An upgrade from %s to %s is in progress; use --resume to" 2030 " finish it first" % (oldversion, versionstring)) 2031 return 1 2032 2033 oldversion = constants.RELEASE_VERSION 2034 2035 if opts.resume: 2036 ssconf.CheckMaster(False) 2037 oldversion, versionstring = _ReadIntentToUpgrade() 2038 if versionstring is None: 2039 return 0 2040 version = utils.version.ParseVersion(versionstring) 2041 if version is None: 2042 return 1 2043 configversion = _GetConfigVersion() 2044 if configversion is None: 2045 return 1 2046 # If the upgrade we resume was an upgrade between compatible 2047 # versions (like 2.10.0 to 2.10.1), the correct configversion 2048 # does not guarantee that the config has been updated. 2049 # However, in the case of a compatible update with the configuration 2050 # not touched, we are running a different dirversion with the same 2051 # config version. 2052 config_already_modified = \ 2053 (utils.IsCorrectConfigVersion(version, configversion) and 2054 not (versionstring != constants.DIR_VERSION and 2055 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR, 2056 constants.CONFIG_REVISION))) 2057 if not config_already_modified: 2058 # We have to start from the beginning; however, some daemons might have 2059 # already been stopped, so the only way to get into a well-defined state 2060 # is by starting all daemons again. 2061 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]) 2062 else: 2063 versionstring = opts.to 2064 config_already_modified = False 2065 version = utils.version.ParseVersion(versionstring) 2066 if version is None: 2067 ToStderr("Could not parse version string %s" % versionstring) 2068 return 1 2069 2070 msg = utils.version.UpgradeRange(version) 2071 if msg is not None: 2072 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg)) 2073 return 1 2074 2075 if not config_already_modified: 2076 success, rollback = _UpgradeBeforeConfigurationChange(versionstring) 2077 if not success: 2078 _ExecuteCommands(rollback) 2079 return 1 2080 else: 2081 rollback = [] 2082 2083 downgrade = utils.version.ShouldCfgdowngrade(version) 2084 2085 success, additionalrollback = \ 2086 _SwitchVersionAndConfig(versionstring, downgrade) 2087 if not success: 2088 rollback.extend(additionalrollback) 2089 _ExecuteCommands(rollback) 2090 return 1 2091 2092 return _UpgradeAfterConfigurationChange(oldversion)
2093 2094 2095 commands = { 2096 "init": ( 2097 InitCluster, [ArgHost(min=1, max=1)], 2098 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT, 2099 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, 2100 NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, NOMODIFY_ETCHOSTS_OPT, 2101 NOMODIFY_SSH_SETUP_OPT, SECONDARY_IP_OPT, VG_NAME_OPT, 2102 MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, DRBD_HELPER_OPT, 2103 DEFAULT_IALLOCATOR_OPT, PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, 2104 NODE_PARAMS_OPT, GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, 2105 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT, 2106 IPOLICY_STD_SPECS_OPT] + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, 2107 "[opts...] <cluster_name>", "Initialises a new cluster configuration"), 2108 "destroy": ( 2109 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT], 2110 "", "Destroy cluster"), 2111 "rename": ( 2112 RenameCluster, [ArgHost(min=1, max=1)], 2113 [FORCE_OPT, DRY_RUN_OPT], 2114 "<new_name>", 2115 "Renames the cluster"), 2116 "redist-conf": ( 2117 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS + 2118 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION], 2119 "", "Forces a push of the configuration file and ssconf files" 2120 " to the nodes in the cluster"), 2121 "verify": ( 2122 VerifyCluster, ARGS_NONE, 2123 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT, 2124 DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT], 2125 "", "Does a check on the cluster configuration"), 2126 "verify-disks": ( 2127 VerifyDisks, ARGS_NONE, [PRIORITY_OPT], 2128 "", "Does a check on the cluster disk status"), 2129 "repair-disk-sizes": ( 2130 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT], 2131 "[instance...]", "Updates mismatches in recorded disk sizes"), 2132 "master-failover": ( 2133 MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER], 2134 "", "Makes the current node the master"), 2135 "master-ping": ( 2136 MasterPing, ARGS_NONE, [], 2137 "", "Checks if the master is alive"), 2138 "version": ( 2139 ShowClusterVersion, ARGS_NONE, [], 2140 "", "Shows the cluster version"), 2141 "getmaster": ( 2142 ShowClusterMaster, ARGS_NONE, [], 2143 "", "Shows the cluster master"), 2144 "copyfile": ( 2145 ClusterCopyFile, [ArgFile(min=1, max=1)], 2146 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT], 2147 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"), 2148 "command": ( 2149 RunClusterCommand, [ArgCommand(min=1)], 2150 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT], 2151 "[-n node...] <command>", "Runs a command on all (or only some) nodes"), 2152 "info": ( 2153 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT], 2154 "[--roman]", "Show cluster configuration"), 2155 "list-tags": ( 2156 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"), 2157 "add-tags": ( 2158 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, 2159 "tag...", "Add tags to the cluster"), 2160 "remove-tags": ( 2161 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS, 2162 "tag...", "Remove tags from the cluster"), 2163 "search-tags": ( 2164 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "", 2165 "Searches the tags on all objects on" 2166 " the cluster for a given pattern (regex)"), 2167 "queue": ( 2168 QueueOps, 2169 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])], 2170 [], "drain|undrain|info", "Change queue properties"), 2171 "watcher": ( 2172 WatcherOps, 2173 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]), 2174 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])], 2175 [], 2176 "{pause <timespec>|continue|info}", "Change watcher properties"), 2177 "modify": ( 2178 SetClusterParams, ARGS_NONE, 2179 [FORCE_OPT, 2180 BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, HVLIST_OPT, MASTER_NETDEV_OPT, 2181 MASTER_NETMASK_OPT, NIC_PARAMS_OPT, NOLVM_STORAGE_OPT, VG_NAME_OPT, 2182 MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT, REMOVE_UIDS_OPT, 2183 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, 2184 RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT, PREALLOC_WIPE_DISKS_OPT, 2185 NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, HV_STATE_OPT, 2186 DISK_STATE_OPT] + SUBMIT_OPTS + 2187 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT] + 2188 INSTANCE_POLICY_OPTS + [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT], 2189 "[opts...]", 2190 "Alters the parameters of the cluster"), 2191 "renew-crypto": ( 2192 RenewCrypto, ARGS_NONE, 2193 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT, 2194 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT, 2195 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT, 2196 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT], 2197 "[opts...]", 2198 "Renews cluster certificates, keys and secrets"), 2199 "epo": ( 2200 Epo, [ArgUnknown()], 2201 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT, 2202 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT], 2203 "[opts...] [args]", 2204 "Performs an emergency power-off on given args"), 2205 "activate-master-ip": ( 2206 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"), 2207 "deactivate-master-ip": ( 2208 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "", 2209 "Deactivates the master IP"), 2210 "show-ispecs-cmd": ( 2211 ShowCreateCommand, ARGS_NONE, [], "", 2212 "Show the command line to re-create the cluster"), 2213 "upgrade": ( 2214 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "", 2215 "Upgrade (or downgrade) to a new Ganeti version"), 2216 } 2217 2218 2219 #: dictionary with aliases for commands 2220 aliases = { 2221 "masterfailover": "master-failover", 2222 "show": "info", 2223 }
2224 2225 2226 -def Main():
2227 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}, 2228 aliases=aliases)
2229