1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 """Debugging commands"""
31
32
33
34
35
36
37 import simplejson
38 import time
39 import socket
40 import logging
41
42 from ganeti.cli import *
43 from ganeti import cli
44 from ganeti import constants
45 from ganeti import opcodes
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import compat
49 from ganeti import ht
50
51
52
53 _LIST_LOCKS_DEF_FIELDS = [
54 "name",
55 "mode",
56 "owner",
57 "pending",
58 ]
59
60
62 """Sleeps for a while
63
64 @param opts: the command line options selected by the user
65 @type args: list
66 @param args: should contain only one element, the duration
67 the sleep
68 @rtype: int
69 @return: the desired exit code
70
71 """
72 delay = float(args[0])
73 op = opcodes.OpTestDelay(duration=delay,
74 on_master=opts.on_master,
75 on_nodes=opts.on_nodes,
76 repeat=opts.repeat,
77 no_locks=opts.no_locks)
78 SubmitOrSend(op, opts)
79
80 return 0
81
82
84 """Send any opcode to the master.
85
86 @param opts: the command line options selected by the user
87 @type args: list
88 @param args: should contain only one element, the path of
89 the file with the opcode definition
90 @rtype: int
91 @return: the desired exit code
92
93 """
94 cl = cli.GetClient()
95 jex = cli.JobExecutor(cl=cl, verbose=opts.verbose, opts=opts)
96
97 job_cnt = 0
98 op_cnt = 0
99 if opts.timing_stats:
100 ToStdout("Loading...")
101 for job_idx in range(opts.rep_job):
102 for fname in args:
103
104 op_data = simplejson.loads(utils.ReadFile(fname))
105 op_list = [opcodes.OpCode.LoadOpCode(val) for val in op_data]
106 op_list = op_list * opts.rep_op
107 jex.QueueJob("file %s/%d" % (fname, job_idx), *op_list)
108 op_cnt += len(op_list)
109 job_cnt += 1
110
111 if opts.timing_stats:
112 t1 = time.time()
113 ToStdout("Submitting...")
114
115 jex.SubmitPending(each=opts.each)
116
117 if opts.timing_stats:
118 t2 = time.time()
119 ToStdout("Executing...")
120
121 jex.GetResults()
122 if opts.timing_stats:
123 t3 = time.time()
124 ToStdout("C:op %4d" % op_cnt)
125 ToStdout("C:job %4d" % job_cnt)
126 ToStdout("T:submit %4.4f" % (t2 - t1))
127 ToStdout("T:exec %4.4f" % (t3 - t2))
128 ToStdout("T:total %4.4f" % (t3 - t1))
129 return 0
130
131
133 """Runs the test allocator opcode.
134
135 @param opts: the command line options selected by the user
136 @type args: list
137 @param args: should contain only one element, the iallocator name
138 @rtype: int
139 @return: the desired exit code
140
141 """
142 try:
143 disks = [{
144 constants.IDISK_SIZE: utils.ParseUnit(val),
145 constants.IDISK_MODE: constants.DISK_RDWR,
146 } for val in opts.disks.split(",")]
147 except errors.UnitParseError, err:
148 ToStderr("Invalid disks parameter '%s': %s", opts.disks, err)
149 return 1
150
151 nics = [val.split("/") for val in opts.nics.split(",")]
152 for row in nics:
153 while len(row) < 3:
154 row.append(None)
155 for i in range(3):
156 if row[i] == "":
157 row[i] = None
158 nic_dict = [{
159 constants.INIC_MAC: v[0],
160 constants.INIC_IP: v[1],
161
162 "bridge": v[2],
163 } for v in nics]
164
165 if opts.tags is None:
166 opts.tags = []
167 else:
168 opts.tags = opts.tags.split(",")
169 if opts.target_groups is None:
170 target_groups = []
171 else:
172 target_groups = opts.target_groups
173
174 op = opcodes.OpTestAllocator(mode=opts.mode,
175 name=args[0],
176 instances=args,
177 memory=opts.memory,
178 disks=disks,
179 disk_template=opts.disk_template,
180 nics=nic_dict,
181 os=opts.os,
182 vcpus=opts.vcpus,
183 tags=opts.tags,
184 direction=opts.direction,
185 iallocator=opts.iallocator,
186 evac_mode=opts.evac_mode,
187 target_groups=target_groups,
188 spindle_use=opts.spindle_use,
189 count=opts.count)
190 result = SubmitOpCode(op, opts=opts)
191 ToStdout("%s" % result)
192 return 0
193
194
196 """Tests job dependencies.
197
198 """
199 ToStdout("Testing job dependencies")
200
201 try:
202 cl = cli.GetClient()
203 SubmitOpCode(opcodes.OpTestDelay(duration=0, depends=[(-1, None)]), cl=cl)
204 except errors.GenericError, err:
205 if opts.debug:
206 ToStdout("Ignoring error for 'wrong dependencies' test: %s", err)
207 else:
208 raise errors.OpExecError("Submitting plain opcode with relative job ID"
209 " did not fail as expected")
210
211
212 jobs = [
213 [opcodes.OpTestDelay(duration=1)],
214 [opcodes.OpTestDelay(duration=1,
215 depends=[(-1, [])])],
216 [opcodes.OpTestDelay(duration=1,
217 depends=[(-2, [constants.JOB_STATUS_SUCCESS])])],
218 [opcodes.OpTestDelay(duration=1,
219 depends=[])],
220 [opcodes.OpTestDelay(duration=1,
221 depends=[(-2, [constants.JOB_STATUS_SUCCESS])])],
222 ]
223
224
225 check_fn = ht.TListOf(ht.TAnd(ht.TIsLength(2),
226 ht.TItems([ht.TBool,
227 ht.TOr(ht.TNonEmptyString,
228 ht.TJobId)])))
229
230 cl = cli.GetClient()
231 result = cl.SubmitManyJobs(jobs)
232 if not check_fn(result):
233 raise errors.OpExecError("Job submission doesn't match %s: %s" %
234 (check_fn, result))
235
236
237 jex = JobExecutor(cl=cl, opts=opts)
238
239 for (status, job_id) in result:
240 jex.AddJobId(None, status, job_id)
241
242 job_results = jex.GetResults()
243 if not compat.all(row[0] for row in job_results):
244 raise errors.OpExecError("At least one of the submitted jobs failed: %s" %
245 job_results)
246
247
248 data = cl.QueryJobs([job_id for (_, job_id) in result],
249 ["id", "opexec", "ops"])
250 data_job_id = [job_id for (job_id, _, _) in data]
251 data_opexec = [opexec for (_, opexec, _) in data]
252 data_op = [[opcodes.OpCode.LoadOpCode(op) for op in ops]
253 for (_, _, ops) in data]
254
255 assert compat.all(not op.depends or len(op.depends) == 1
256 for ops in data_op
257 for op in ops)
258
259
260 for (job_idx, res_jobdep) in [(1, data_job_id[0]),
261 (2, data_job_id[0]),
262 (4, data_job_id[2])]:
263 if data_op[job_idx][0].depends[0][0] != res_jobdep:
264 raise errors.OpExecError("Job %s's opcode doesn't depend on correct job"
265 " ID (%s)" % (job_idx, res_jobdep))
266
267
268 if not (data_opexec[0] <= data_opexec[1] and
269 data_opexec[0] <= data_opexec[2] and
270 data_opexec[2] <= data_opexec[4]):
271 raise errors.OpExecError("Jobs did not run in correct order: %s" % data)
272
273 assert len(jobs) == 5 and compat.all(len(ops) == 1 for ops in jobs)
274
275 ToStdout("Job dependency tests were successful")
276
277
279 """Tests submitting jobs.
280
281 """
282 ToStdout("Testing job submission")
283
284 testdata = [
285 (0, 0, constants.OP_PRIO_LOWEST),
286 (0, 0, constants.OP_PRIO_HIGHEST),
287 ]
288
289 for priority in (constants.OP_PRIO_SUBMIT_VALID |
290 frozenset([constants.OP_PRIO_LOWEST,
291 constants.OP_PRIO_HIGHEST])):
292 for offset in [-1, +1]:
293 testdata.extend([
294 (0, 0, priority + offset),
295 (3, 0, priority + offset),
296 (0, 3, priority + offset),
297 (4, 2, priority + offset),
298 ])
299
300 for before, after, failpriority in testdata:
301 ops = []
302 ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(before)])
303 ops.append(opcodes.OpTestDelay(duration=0, priority=failpriority))
304 ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(after)])
305
306 try:
307 cl = cli.GetClient()
308 cl.SubmitJob(ops)
309 except errors.GenericError, err:
310 if opts.debug:
311 ToStdout("Ignoring error for 'wrong priority' test: %s", err)
312 else:
313 raise errors.OpExecError("Submitting opcode with priority %s did not"
314 " fail when it should (allowed are %s)" %
315 (failpriority, constants.OP_PRIO_SUBMIT_VALID))
316
317 jobs = [
318 [opcodes.OpTestDelay(duration=0),
319 opcodes.OpTestDelay(duration=0, dry_run=False),
320 opcodes.OpTestDelay(duration=0, dry_run=True)],
321 ops,
322 ]
323 try:
324 cl = cli.GetClient()
325 cl.SubmitManyJobs(jobs)
326 except errors.GenericError, err:
327 if opts.debug:
328 ToStdout("Ignoring error for 'wrong priority' test: %s", err)
329 else:
330 raise errors.OpExecError("Submitting manyjobs with an incorrect one"
331 " did not fail when it should.")
332 ToStdout("Job submission tests were successful")
333
334
337 """Initializes this class.
338
339 """
340 cli.StdioJobPollReportCb.__init__(self)
341 self._expected_msgcount = 0
342 self._all_testmsgs = []
343 self._testmsgs = None
344 self._job_id = None
345
347 """Returns all test log messages received so far.
348
349 """
350 return self._all_testmsgs
351
353 """Returns the job ID.
354
355 """
356 return self._job_id
357
359 """Handles a log message.
360
361 """
362 if self._job_id is None:
363 self._job_id = job_id
364 elif self._job_id != job_id:
365 raise errors.ProgrammerError("The same reporter instance was used for"
366 " more than one job")
367
368 if log_type == constants.ELOG_JQUEUE_TEST:
369 (sockname, test, arg) = log_msg
370 return self._ProcessTestMessage(job_id, sockname, test, arg)
371
372 elif (log_type == constants.ELOG_MESSAGE and
373 log_msg.startswith(constants.JQT_MSGPREFIX)):
374 if self._testmsgs is None:
375 raise errors.OpExecError("Received test message without a preceding"
376 " start message")
377 testmsg = log_msg[len(constants.JQT_MSGPREFIX):]
378 self._testmsgs.append(testmsg)
379 self._all_testmsgs.append(testmsg)
380 return
381
382 return cli.StdioJobPollReportCb.ReportLogMessage(self, job_id, serial,
383 timestamp, log_type,
384 log_msg)
385
387 """Handles a job queue test message.
388
389 """
390 if test not in constants.JQT_ALL:
391 raise errors.OpExecError("Received invalid test message %s" % test)
392
393 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
394 try:
395 sock.settimeout(30.0)
396
397 logging.debug("Connecting to %s", sockname)
398 sock.connect(sockname)
399
400 logging.debug("Checking status")
401 jobdetails = cli.GetClient().QueryJobs([job_id], ["status"])[0]
402 if not jobdetails:
403 raise errors.OpExecError("Can't find job %s" % job_id)
404
405 status = jobdetails[0]
406
407 logging.debug("Status of job %s is %s", job_id, status)
408
409 if test == constants.JQT_EXPANDNAMES:
410 if status != constants.JOB_STATUS_WAITING:
411 raise errors.OpExecError("Job status while expanding names is '%s',"
412 " not '%s' as expected" %
413 (status, constants.JOB_STATUS_WAITING))
414 elif test in (constants.JQT_EXEC, constants.JQT_LOGMSG):
415 if status != constants.JOB_STATUS_RUNNING:
416 raise errors.OpExecError("Job status while executing opcode is '%s',"
417 " not '%s' as expected" %
418 (status, constants.JOB_STATUS_RUNNING))
419
420 if test == constants.JQT_STARTMSG:
421 logging.debug("Expecting %s test messages", arg)
422 self._testmsgs = []
423 elif test == constants.JQT_LOGMSG:
424 if len(self._testmsgs) != arg:
425 raise errors.OpExecError("Received %s test messages when %s are"
426 " expected" % (len(self._testmsgs), arg))
427 finally:
428 logging.debug("Closing socket")
429 sock.close()
430
431
433 """Runs a few tests on the job queue.
434
435 """
436 _TestJobSubmission(opts)
437 _TestJobDependency(opts)
438
439 (TM_SUCCESS,
440 TM_MULTISUCCESS,
441 TM_FAIL,
442 TM_PARTFAIL) = range(4)
443 TM_ALL = compat.UniqueFrozenset([
444 TM_SUCCESS,
445 TM_MULTISUCCESS,
446 TM_FAIL,
447 TM_PARTFAIL,
448 ])
449
450 for mode in TM_ALL:
451 test_messages = [
452 "Testing mode %s" % mode,
453 "Hello World",
454 "A",
455 "",
456 "B"
457 "Foo|bar|baz",
458 utils.TimestampForFilename(),
459 ]
460
461 fail = mode in (TM_FAIL, TM_PARTFAIL)
462
463 if mode == TM_PARTFAIL:
464 ToStdout("Testing partial job failure")
465 ops = [
466 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
467 log_messages=test_messages, fail=False),
468 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
469 log_messages=test_messages, fail=False),
470 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
471 log_messages=test_messages, fail=True),
472 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
473 log_messages=test_messages, fail=False),
474 ]
475 expect_messages = 3 * [test_messages]
476 expect_opstatus = [
477 constants.OP_STATUS_SUCCESS,
478 constants.OP_STATUS_SUCCESS,
479 constants.OP_STATUS_ERROR,
480 constants.OP_STATUS_ERROR,
481 ]
482 expect_resultlen = 2
483 elif mode == TM_MULTISUCCESS:
484 ToStdout("Testing multiple successful opcodes")
485 ops = [
486 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
487 log_messages=test_messages, fail=False),
488 opcodes.OpTestJqueue(notify_waitlock=True, notify_exec=True,
489 log_messages=test_messages, fail=False),
490 ]
491 expect_messages = 2 * [test_messages]
492 expect_opstatus = [
493 constants.OP_STATUS_SUCCESS,
494 constants.OP_STATUS_SUCCESS,
495 ]
496 expect_resultlen = 2
497 else:
498 if mode == TM_SUCCESS:
499 ToStdout("Testing job success")
500 expect_opstatus = [constants.OP_STATUS_SUCCESS]
501 elif mode == TM_FAIL:
502 ToStdout("Testing job failure")
503 expect_opstatus = [constants.OP_STATUS_ERROR]
504 else:
505 raise errors.ProgrammerError("Unknown test mode %s" % mode)
506
507 ops = [
508 opcodes.OpTestJqueue(notify_waitlock=True,
509 notify_exec=True,
510 log_messages=test_messages,
511 fail=fail),
512 ]
513 expect_messages = [test_messages]
514 expect_resultlen = 1
515
516 cl = cli.GetClient()
517 cli.SetGenericOpcodeOpts(ops, opts)
518
519
520 job_id = cli.SendJob(ops, cl=cl)
521
522 reporter = _JobQueueTestReporter()
523 results = None
524
525 try:
526 results = cli.PollJob(job_id, cl=cl, reporter=reporter)
527 except errors.OpExecError, err:
528 if not fail:
529 raise
530 ToStdout("Ignoring error for 'job fail' test: %s", err)
531 else:
532 if fail:
533 raise errors.OpExecError("Job didn't fail when it should")
534
535
536 if fail:
537 if results is not None:
538 raise errors.OpExecError("Received result from failed job")
539 elif len(results) != expect_resultlen:
540 raise errors.OpExecError("Received %s results (%s), expected %s" %
541 (len(results), results, expect_resultlen))
542
543
544 all_messages = [i for j in expect_messages for i in j]
545 if reporter.GetTestMessages() != all_messages:
546 raise errors.OpExecError("Received test messages don't match input"
547 " (input %r, received %r)" %
548 (all_messages, reporter.GetTestMessages()))
549
550
551 reported_job_id = reporter.GetJobId()
552 if reported_job_id != job_id:
553 raise errors.OpExecError("Reported job ID %s doesn't match"
554 "submission job ID %s" %
555 (reported_job_id, job_id))
556
557 jobdetails = cli.GetClient().QueryJobs([job_id], ["status", "opstatus"])[0]
558 if not jobdetails:
559 raise errors.OpExecError("Can't find job %s" % job_id)
560
561 if fail:
562 exp_status = constants.JOB_STATUS_ERROR
563 else:
564 exp_status = constants.JOB_STATUS_SUCCESS
565
566 (final_status, final_opstatus) = jobdetails
567 if final_status != exp_status:
568 raise errors.OpExecError("Final job status is %s, not %s as expected" %
569 (final_status, exp_status))
570 if len(final_opstatus) != len(ops):
571 raise errors.OpExecError("Did not receive status for all opcodes (got %s,"
572 " expected %s)" %
573 (len(final_opstatus), len(ops)))
574 if final_opstatus != expect_opstatus:
575 raise errors.OpExecError("Opcode status is %s, expected %s" %
576 (final_opstatus, expect_opstatus))
577
578 ToStdout("Job queue test successful")
579
580 return 0
581
582
584 """List all locks.
585
586 @param opts: the command line options selected by the user
587 @type args: list
588 @param args: should be an empty list
589 @rtype: int
590 @return: the desired exit code
591
592 """
593 selected_fields = ParseFields(opts.output, _LIST_LOCKS_DEF_FIELDS)
594
595 def _DashIfNone(fn):
596 def wrapper(value):
597 if not value:
598 return "-"
599 return fn(value)
600 return wrapper
601
602 def _FormatPending(value):
603 """Format pending acquires.
604
605 """
606 return utils.CommaJoin("%s:%s" % (mode, ",".join(map(str, threads)))
607 for mode, threads in value)
608
609
610 fmtoverride = {
611 "mode": (_DashIfNone(str), False),
612 "owner": (_DashIfNone(",".join), False),
613 "pending": (_DashIfNone(_FormatPending), False),
614 }
615
616 while True:
617 ret = GenericList(constants.QR_LOCK, selected_fields, None, None,
618 opts.separator, not opts.no_headers,
619 format_override=fmtoverride, verbose=opts.verbose)
620
621 if ret != constants.EXIT_SUCCESS:
622 return ret
623
624 if not opts.interval:
625 break
626
627 ToStdout("")
628 time.sleep(opts.interval)
629
630 return 0
631
632
633 commands = {
634 "delay": (
635 Delay, [ArgUnknown(min=1, max=1)],
636 [cli_option("--no-master", dest="on_master", default=True,
637 action="store_false", help="Do not sleep in the master code"),
638 cli_option("-n", dest="on_nodes", default=[],
639 action="append", help="Select nodes to sleep on"),
640 cli_option("-r", "--repeat", type="int", default="0", dest="repeat",
641 help="Number of times to repeat the sleep"),
642 cli_option("-l", "--no-locks", default=False, dest="no_locks",
643 action="store_true",
644 help="Don't take locks while performing the delay"),
645 DRY_RUN_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
646 "[opts...] <duration>", "Executes a TestDelay OpCode"),
647 "submit-job": (
648 GenericOpCodes, [ArgFile(min=1)],
649 [VERBOSE_OPT,
650 cli_option("--op-repeat", type="int", default="1", dest="rep_op",
651 help="Repeat the opcode sequence this number of times"),
652 cli_option("--job-repeat", type="int", default="1", dest="rep_job",
653 help="Repeat the job this number of times"),
654 cli_option("--timing-stats", default=False,
655 action="store_true", help="Show timing stats"),
656 cli_option("--each", default=False, action="store_true",
657 help="Submit each job separately"),
658 DRY_RUN_OPT, PRIORITY_OPT,
659 ],
660 "<op_list_file...>", "Submits jobs built from json files"
661 " containing a list of serialized opcodes"),
662 "iallocator": (
663 TestAllocator, [ArgUnknown(min=1)],
664 [cli_option("--dir", dest="direction", default=constants.IALLOCATOR_DIR_IN,
665 choices=list(constants.VALID_IALLOCATOR_DIRECTIONS),
666 help="Show allocator input (in) or allocator"
667 " results (out)"),
668 IALLOCATOR_OPT,
669 cli_option("-m", "--mode", default="relocate",
670 choices=list(constants.VALID_IALLOCATOR_MODES),
671 help=("Request mode (one of %s)" %
672 utils.CommaJoin(constants.VALID_IALLOCATOR_MODES))),
673 cli_option("--memory", default=128, type="unit",
674 help="Memory size for the instance (MiB)"),
675 cli_option("--disks", default="4096,4096",
676 help="Comma separated list of disk sizes (MiB)"),
677 DISK_TEMPLATE_OPT,
678 cli_option("--nics", default="00:11:22:33:44:55",
679 help="Comma separated list of nics, each nic"
680 " definition is of form mac/ip/bridge, if"
681 " missing values are replace by None"),
682 OS_OPT,
683 cli_option("-p", "--vcpus", default=1, type="int",
684 help="Select number of VCPUs for the instance"),
685 cli_option("--tags", default=None,
686 help="Comma separated list of tags"),
687 cli_option("--evac-mode", default=constants.NODE_EVAC_ALL,
688 choices=list(constants.NODE_EVAC_MODES),
689 help=("Node evacuation mode (one of %s)" %
690 utils.CommaJoin(constants.NODE_EVAC_MODES))),
691 cli_option("--target-groups", help="Target groups for relocation",
692 default=[], action="append"),
693 cli_option("--spindle-use", help="How many spindles to use",
694 default=1, type="int"),
695 cli_option("--count", help="How many instances to allocate",
696 default=2, type="int"),
697 DRY_RUN_OPT, PRIORITY_OPT,
698 ],
699 "{opts...} <instance>", "Executes a TestAllocator OpCode"),
700 "test-jobqueue": (
701 TestJobqueue, ARGS_NONE, [PRIORITY_OPT],
702 "", "Test a few aspects of the job queue"),
703 "locks": (
704 ListLocks, ARGS_NONE,
705 [NOHDR_OPT, SEP_OPT, FIELDS_OPT, INTERVAL_OPT, VERBOSE_OPT],
706 "[--interval N]", "Show a list of locks in the master daemon"),
707 }
708
709
710 aliases = {
711 "allocator": "iallocator",
712 }
713
714
717