Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30  """Utility functions for manipulating or working with text. 
 31   
 32  """ 
 33   
 34   
 35  import re 
 36  import os 
 37  import time 
 38  import collections 
 39   
 40  from ganeti import errors 
 41   
 42   
 43  #: Unit checker regexp 
 44  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 45   
 46  #: Characters which don't need to be quoted for shell commands 
 47  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 48   
 49  #: Shell param checker regexp 
 50  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 51   
 52  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 53  _ASCII_ELLIPSIS = "..." 
 54   
 55  #: MAC address octet 
 56  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 57   
 58   
59 -def MatchNameComponent(key, name_list, case_sensitive=True):
60 """Try to match a name against a list. 61 62 This function will try to match a name like test1 against a list 63 like C{['test1.example.com', 'test2.example.com', ...]}. Against 64 this list, I{'test1'} as well as I{'test1.example'} will match, but 65 not I{'test1.ex'}. A multiple match will be considered as no match 66 at all (e.g. I{'test1'} against C{['test1.example.com', 67 'test1.example.org']}), except when the key fully matches an entry 68 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 69 70 @type key: str 71 @param key: the name to be searched 72 @type name_list: list 73 @param name_list: the list of strings against which to search the key 74 @type case_sensitive: boolean 75 @param case_sensitive: whether to provide a case-sensitive match 76 77 @rtype: None or str 78 @return: None if there is no match I{or} if there are multiple matches, 79 otherwise the element from the list which matches 80 81 """ 82 if key in name_list: 83 return key 84 85 re_flags = 0 86 if not case_sensitive: 87 re_flags |= re.IGNORECASE 88 key = key.upper() 89 90 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 91 92 names_filtered = [] 93 string_matches = [] 94 for name in name_list: 95 if name_re.match(name) is not None: 96 names_filtered.append(name) 97 if not case_sensitive and key == name.upper(): 98 string_matches.append(name) 99 100 if len(string_matches) == 1: 101 return string_matches[0] 102 if len(names_filtered) == 1: 103 return names_filtered[0] 104 105 return None
106 107
108 -def _DnsNameGlobHelper(match):
109 """Helper function for L{DnsNameGlobPattern}. 110 111 Returns regular expression pattern for parts of the pattern. 112 113 """ 114 text = match.group(0) 115 116 if text == "*": 117 return "[^.]*" 118 elif text == "?": 119 return "[^.]" 120 else: 121 return re.escape(text)
122 123
124 -def DnsNameGlobPattern(pattern):
125 """Generates regular expression from DNS name globbing pattern. 126 127 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 128 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 129 130 Matching always starts at the leftmost part. An asterisk (*) matches all 131 characters except the dot (.) separating DNS name parts. A question mark (?) 132 matches a single character except the dot (.). 133 134 @type pattern: string 135 @param pattern: DNS name globbing pattern 136 @rtype: string 137 @return: Regular expression 138 139 """ 140 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
141 142
143 -def FormatUnit(value, units):
144 """Formats an incoming number of MiB with the appropriate unit. 145 146 @type value: int 147 @param value: integer representing the value in MiB (1048576) 148 @type units: char 149 @param units: the type of formatting we should do: 150 - 'h' for automatic scaling 151 - 'm' for MiBs 152 - 'g' for GiBs 153 - 't' for TiBs 154 @rtype: str 155 @return: the formatted value (with suffix) 156 157 """ 158 if units not in ("m", "g", "t", "h"): 159 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 160 161 suffix = "" 162 163 if units == "m" or (units == "h" and value < 1024): 164 if units == "h": 165 suffix = "M" 166 return "%d%s" % (round(value, 0), suffix) 167 168 elif units == "g" or (units == "h" and value < (1024 * 1024)): 169 if units == "h": 170 suffix = "G" 171 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 172 173 else: 174 if units == "h": 175 suffix = "T" 176 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
177 178
179 -def ParseUnit(input_string):
180 """Tries to extract number and scale from the given string. 181 182 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 183 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 184 is always an int in MiB. 185 186 """ 187 m = _PARSEUNIT_REGEX.match(str(input_string)) 188 if not m: 189 raise errors.UnitParseError("Invalid format") 190 191 value = float(m.groups()[0]) 192 193 unit = m.groups()[1] 194 if unit: 195 lcunit = unit.lower() 196 else: 197 lcunit = "m" 198 199 if lcunit in ("m", "mb", "mib"): 200 # Value already in MiB 201 pass 202 203 elif lcunit in ("g", "gb", "gib"): 204 value *= 1024 205 206 elif lcunit in ("t", "tb", "tib"): 207 value *= 1024 * 1024 208 209 else: 210 raise errors.UnitParseError("Unknown unit: %s" % unit) 211 212 # Make sure we round up 213 if int(value) < value: 214 value += 1 215 216 # Round up to the next multiple of 4 217 value = int(value) 218 if value % 4: 219 value += 4 - value % 4 220 221 return value
222 223
224 -def ShellQuote(value):
225 """Quotes shell argument according to POSIX. 226 227 @type value: str 228 @param value: the argument to be quoted 229 @rtype: str 230 @return: the quoted value 231 232 """ 233 if _SHELL_UNQUOTED_RE.match(value): 234 return value 235 else: 236 return "'%s'" % value.replace("'", "'\\''")
237 238
239 -def ShellQuoteArgs(args):
240 """Quotes a list of shell arguments. 241 242 @type args: list 243 @param args: list of arguments to be quoted 244 @rtype: str 245 @return: the quoted arguments concatenated with spaces 246 247 """ 248 return " ".join([ShellQuote(i) for i in args])
249 250
251 -def ShellCombineCommands(cmdlist):
252 """Out of a list of shell comands construct a single one. 253 254 """ 255 return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)]
256 257
258 -class ShellWriter(object):
259 """Helper class to write scripts with indentation. 260 261 """ 262 INDENT_STR = " " 263
264 - def __init__(self, fh, indent=True):
265 """Initializes this class. 266 267 """ 268 self._fh = fh 269 self._indent_enabled = indent 270 self._indent = 0
271
272 - def IncIndent(self):
273 """Increase indentation level by 1. 274 275 """ 276 self._indent += 1
277
278 - def DecIndent(self):
279 """Decrease indentation level by 1. 280 281 """ 282 assert self._indent > 0 283 self._indent -= 1
284
285 - def Write(self, txt, *args):
286 """Write line to output file. 287 288 """ 289 assert self._indent >= 0 290 291 if args: 292 line = txt % args 293 else: 294 line = txt 295 296 if line and self._indent_enabled: 297 # Indent only if there's something on the line 298 self._fh.write(self._indent * self.INDENT_STR) 299 300 self._fh.write(line) 301 302 self._fh.write("\n")
303 304
305 -def GenerateSecret(numbytes=20):
306 """Generates a random secret. 307 308 This will generate a pseudo-random secret returning an hex string 309 (so that it can be used where an ASCII string is needed). 310 311 @param numbytes: the number of bytes which will be represented by the returned 312 string (defaulting to 20, the length of a SHA1 hash) 313 @rtype: str 314 @return: an hex representation of the pseudo-random sequence 315 316 """ 317 return os.urandom(numbytes).encode("hex")
318 319
320 -def _MakeMacAddrRegexp(octets):
321 """Builds a regular expression for verifying MAC addresses. 322 323 @type octets: integer 324 @param octets: How many octets to expect (1-6) 325 @return: Compiled regular expression 326 327 """ 328 assert octets > 0 329 assert octets <= 6 330 331 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 332 re.I)
333 334 335 #: Regular expression for full MAC address 336 _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 337 338 #: Regular expression for half a MAC address 339 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 340 341
342 -def _MacAddressCheck(check_re, mac, msg):
343 """Checks a MAC address using a regular expression. 344 345 @param check_re: Compiled regular expression as returned by C{re.compile} 346 @type mac: string 347 @param mac: MAC address to be validated 348 @type msg: string 349 @param msg: Error message (%s will be replaced with MAC address) 350 351 """ 352 if check_re.match(mac): 353 return mac.lower() 354 355 raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
356 357
358 -def NormalizeAndValidateMac(mac):
359 """Normalizes and check if a MAC address is valid and contains six octets. 360 361 Checks whether the supplied MAC address is formally correct. Accepts 362 colon-separated format only. Normalize it to all lower case. 363 364 @type mac: string 365 @param mac: MAC address to be validated 366 @rtype: string 367 @return: Normalized and validated MAC address 368 @raise errors.OpPrereqError: If the MAC address isn't valid 369 370 """ 371 return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
372 373
374 -def NormalizeAndValidateThreeOctetMacPrefix(mac):
375 """Normalizes a potential MAC address prefix (three octets). 376 377 Checks whether the supplied string is a valid MAC address prefix consisting 378 of three colon-separated octets. The result is normalized to all lower case. 379 380 @type mac: string 381 @param mac: Prefix to be validated 382 @rtype: string 383 @return: Normalized and validated prefix 384 @raise errors.OpPrereqError: If the MAC address prefix isn't valid 385 386 """ 387 return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 388 "Invalid MAC address prefix '%s'")
389 390
391 -def SafeEncode(text):
392 """Return a 'safe' version of a source string. 393 394 This function mangles the input string and returns a version that 395 should be safe to display/encode as ASCII. To this end, we first 396 convert it to ASCII using the 'backslashreplace' encoding which 397 should get rid of any non-ASCII chars, and then we process it 398 through a loop copied from the string repr sources in the python; we 399 don't use string_escape anymore since that escape single quotes and 400 backslashes too, and that is too much; and that escaping is not 401 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 402 403 @type text: str or unicode 404 @param text: input data 405 @rtype: str 406 @return: a safe version of text 407 408 """ 409 if isinstance(text, unicode): 410 # only if unicode; if str already, we handle it below 411 text = text.encode("ascii", "backslashreplace") 412 resu = "" 413 for char in text: 414 c = ord(char) 415 if char == "\t": 416 resu += r"\t" 417 elif char == "\n": 418 resu += r"\n" 419 elif char == "\r": 420 resu += r'\'r' 421 elif c < 32 or c >= 127: # non-printable 422 resu += "\\x%02x" % (c & 0xff) 423 else: 424 resu += char 425 return resu
426 427
428 -def UnescapeAndSplit(text, sep=","):
429 r"""Split and unescape a string based on a given separator. 430 431 This function splits a string based on a separator where the 432 separator itself can be escape in order to be an element of the 433 elements. The escaping rules are (assuming coma being the 434 separator): 435 - a plain , separates the elements 436 - a sequence \\\\, (double backslash plus comma) is handled as a 437 backslash plus a separator comma 438 - a sequence \, (backslash plus comma) is handled as a 439 non-separator comma 440 441 @type text: string 442 @param text: the string to split 443 @type sep: string 444 @param text: the separator 445 @rtype: string 446 @return: a list of strings 447 448 """ 449 # we split the list by sep (with no escaping at this stage) 450 slist = text.split(sep) 451 # next, we revisit the elements and if any of them ended with an odd 452 # number of backslashes, then we join it with the next 453 rlist = [] 454 while slist: 455 e1 = slist.pop(0) 456 if e1.endswith("\\"): 457 num_b = len(e1) - len(e1.rstrip("\\")) 458 if num_b % 2 == 1 and slist: 459 e2 = slist.pop(0) 460 # Merge the two elements and push the result back to the source list for 461 # revisiting. If e2 ended with backslashes, further merging may need to 462 # be done. 463 slist.insert(0, e1 + sep + e2) 464 continue 465 # here the backslashes remain (all), and will be reduced in the next step 466 rlist.append(e1) 467 # finally, replace backslash-something with something 468 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 469 return rlist
470 471
472 -def EscapeAndJoin(slist, sep=","):
473 """Encode a list in a way parsable by UnescapeAndSplit. 474 475 @type slist: list of strings 476 @param slist: the strings to be encoded 477 @rtype: string 478 @return: the encoding of the list oas a string 479 480 """ 481 return sep.join([re.sub("\\" + sep, "\\\\" + sep, 482 re.sub(r"\\", r"\\\\", v)) for v in slist])
483 484
485 -def CommaJoin(names):
486 """Nicely join a set of identifiers. 487 488 @param names: set, list or tuple 489 @return: a string with the formatted results 490 491 """ 492 return ", ".join([str(val) for val in names])
493 494
495 -def FormatTime(val, usecs=None):
496 """Formats a time value. 497 498 @type val: float or None 499 @param val: Timestamp as returned by time.time() (seconds since Epoch, 500 1970-01-01 00:00:00 UTC) 501 @return: a string value or N/A if we don't have a valid timestamp 502 503 """ 504 if val is None or not isinstance(val, (int, float)): 505 return "N/A" 506 507 # these two codes works on Linux, but they are not guaranteed on all 508 # platforms 509 result = time.strftime("%F %T", time.localtime(val)) 510 511 if usecs is not None: 512 result += ".%06d" % usecs 513 514 return result
515 516
517 -def FormatSeconds(secs):
518 """Formats seconds for easier reading. 519 520 @type secs: number 521 @param secs: Number of seconds 522 @rtype: string 523 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 524 525 """ 526 parts = [] 527 528 secs = round(secs, 0) 529 530 if secs > 0: 531 # Negative values would be a bit tricky 532 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 533 (complete, secs) = divmod(secs, one) 534 if complete or parts: 535 parts.append("%d%s" % (complete, unit)) 536 537 parts.append("%ds" % secs) 538 539 return " ".join(parts)
540 541
542 -class LineSplitter(object):
543 """Splits data chunks into lines separated by newline. 544 545 Instances provide a file-like interface. 546 547 """
548 - def __init__(self, line_fn, *args):
549 """Initializes this class. 550 551 @type line_fn: callable 552 @param line_fn: Function called for each line, first parameter is line 553 @param args: Extra arguments for L{line_fn} 554 555 """ 556 assert callable(line_fn) 557 558 if args: 559 # Python 2.4 doesn't have functools.partial yet 560 self._line_fn = \ 561 lambda line: line_fn(line, *args) # pylint: disable=W0142 562 else: 563 self._line_fn = line_fn 564 565 self._lines = collections.deque() 566 self._buffer = ""
567
568 - def write(self, data):
569 parts = (self._buffer + data).split("\n") 570 self._buffer = parts.pop() 571 self._lines.extend(parts)
572
573 - def flush(self):
574 while self._lines: 575 self._line_fn(self._lines.popleft().rstrip("\r\n"))
576
577 - def close(self):
578 self.flush() 579 if self._buffer: 580 self._line_fn(self._buffer)
581 582
583 -def IsValidShellParam(word):
584 """Verifies is the given word is safe from the shell's p.o.v. 585 586 This means that we can pass this to a command via the shell and be 587 sure that it doesn't alter the command line and is passed as such to 588 the actual command. 589 590 Note that we are overly restrictive here, in order to be on the safe 591 side. 592 593 @type word: str 594 @param word: the word to check 595 @rtype: boolean 596 @return: True if the word is 'safe' 597 598 """ 599 return bool(_SHELLPARAM_REGEX.match(word))
600 601
602 -def BuildShellCmd(template, *args):
603 """Build a safe shell command line from the given arguments. 604 605 This function will check all arguments in the args list so that they 606 are valid shell parameters (i.e. they don't contain shell 607 metacharacters). If everything is ok, it will return the result of 608 template % args. 609 610 @type template: str 611 @param template: the string holding the template for the 612 string formatting 613 @rtype: str 614 @return: the expanded command line 615 616 """ 617 for word in args: 618 if not IsValidShellParam(word): 619 raise errors.ProgrammerError("Shell argument '%s' contains" 620 " invalid characters" % word) 621 return template % args
622 623
624 -def FormatOrdinal(value):
625 """Formats a number as an ordinal in the English language. 626 627 E.g. the number 1 becomes "1st", 22 becomes "22nd". 628 629 @type value: integer 630 @param value: Number 631 @rtype: string 632 633 """ 634 tens = value % 10 635 636 if value > 10 and value < 20: 637 suffix = "th" 638 elif tens == 1: 639 suffix = "st" 640 elif tens == 2: 641 suffix = "nd" 642 elif tens == 3: 643 suffix = "rd" 644 else: 645 suffix = "th" 646 647 return "%s%s" % (value, suffix)
648 649
650 -def Truncate(text, length):
651 """Truncate string and add ellipsis if needed. 652 653 @type text: string 654 @param text: Text 655 @type length: integer 656 @param length: Desired length 657 @rtype: string 658 @return: Truncated text 659 660 """ 661 assert length > len(_ASCII_ELLIPSIS) 662 663 # Serialize if necessary 664 if not isinstance(text, basestring): 665 text = str(text) 666 667 if len(text) <= length: 668 return text 669 else: 670 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
671 672
673 -def FilterEmptyLinesAndComments(text):
674 """Filters empty lines and comments from a line-based string. 675 676 Whitespace is also removed from the beginning and end of all lines. 677 678 @type text: string 679 @param text: Input string 680 @rtype: list 681 682 """ 683 return [line for line in map(lambda s: s.strip(), text.splitlines()) 684 # Ignore empty lines and comments 685 if line and not line.startswith("#")]
686 687
688 -def FormatKeyValue(data):
689 """Formats a dictionary as "key=value" parameters. 690 691 The keys are sorted to have a stable order. 692 693 @type data: dict 694 @rtype: list of string 695 696 """ 697 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]
698