Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30  """Utility functions for manipulating or working with text. 
 31   
 32  """ 
 33   
 34   
 35  import re 
 36  import os 
 37  import time 
 38  import collections 
 39   
 40  from ganeti import errors 
 41  from ganeti import compat 
 42   
 43   
 44  #: Unit checker regexp 
 45  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 46   
 47  #: Characters which don't need to be quoted for shell commands 
 48  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 49   
 50  #: Shell param checker regexp 
 51  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 52   
 53  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 54  _ASCII_ELLIPSIS = "..." 
 55   
 56  #: MAC address octet 
 57  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 58   
 59   
60 -def MatchNameComponent(key, name_list, case_sensitive=True):
61 """Try to match a name against a list. 62 63 This function will try to match a name like test1 against a list 64 like C{['test1.example.com', 'test2.example.com', ...]}. Against 65 this list, I{'test1'} as well as I{'test1.example'} will match, but 66 not I{'test1.ex'}. A multiple match will be considered as no match 67 at all (e.g. I{'test1'} against C{['test1.example.com', 68 'test1.example.org']}), except when the key fully matches an entry 69 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 70 71 @type key: str 72 @param key: the name to be searched 73 @type name_list: list 74 @param name_list: the list of strings against which to search the key 75 @type case_sensitive: boolean 76 @param case_sensitive: whether to provide a case-sensitive match 77 78 @rtype: None or str 79 @return: None if there is no match I{or} if there are multiple matches, 80 otherwise the element from the list which matches 81 82 """ 83 if key in name_list: 84 return key 85 86 re_flags = 0 87 if not case_sensitive: 88 re_flags |= re.IGNORECASE 89 key = key.upper() 90 91 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 92 93 names_filtered = [] 94 string_matches = [] 95 for name in name_list: 96 if name_re.match(name) is not None: 97 names_filtered.append(name) 98 if not case_sensitive and key == name.upper(): 99 string_matches.append(name) 100 101 if len(string_matches) == 1: 102 return string_matches[0] 103 if len(names_filtered) == 1: 104 return names_filtered[0] 105 106 return None
107 108
109 -def _DnsNameGlobHelper(match):
110 """Helper function for L{DnsNameGlobPattern}. 111 112 Returns regular expression pattern for parts of the pattern. 113 114 """ 115 text = match.group(0) 116 117 if text == "*": 118 return "[^.]*" 119 elif text == "?": 120 return "[^.]" 121 else: 122 return re.escape(text)
123 124
125 -def DnsNameGlobPattern(pattern):
126 """Generates regular expression from DNS name globbing pattern. 127 128 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 129 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 130 131 Matching always starts at the leftmost part. An asterisk (*) matches all 132 characters except the dot (.) separating DNS name parts. A question mark (?) 133 matches a single character except the dot (.). 134 135 @type pattern: string 136 @param pattern: DNS name globbing pattern 137 @rtype: string 138 @return: Regular expression 139 140 """ 141 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
142 143
144 -def FormatUnit(value, units, roman=False):
145 """Formats an incoming number of MiB with the appropriate unit. 146 147 @type value: int 148 @param value: integer representing the value in MiB (1048576) 149 @type units: char 150 @param units: the type of formatting we should do: 151 - 'h' for automatic scaling 152 - 'm' for MiBs 153 - 'g' for GiBs 154 - 't' for TiBs 155 @rtype: str 156 @return: the formatted value (with suffix) 157 158 """ 159 if units not in ("m", "g", "t", "h"): 160 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 161 162 suffix = "" 163 164 if units == "m" or (units == "h" and value < 1024): 165 if units == "h": 166 suffix = "M" 167 return "%s%s" % (compat.RomanOrRounded(value, 0, roman), suffix) 168 169 elif units == "g" or (units == "h" and value < (1024 * 1024)): 170 if units == "h": 171 suffix = "G" 172 return "%s%s" % (compat.RomanOrRounded(float(value) / 1024, 1, roman), 173 suffix) 174 175 else: 176 if units == "h": 177 suffix = "T" 178 return "%s%s" % (compat.RomanOrRounded(float(value) / 1024 / 1024, 1, 179 roman), suffix)
180 181
182 -def ParseUnit(input_string):
183 """Tries to extract number and scale from the given string. 184 185 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 186 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 187 is always an int in MiB. 188 189 """ 190 m = _PARSEUNIT_REGEX.match(str(input_string)) 191 if not m: 192 raise errors.UnitParseError("Invalid format") 193 194 value = float(m.groups()[0]) 195 196 unit = m.groups()[1] 197 if unit: 198 lcunit = unit.lower() 199 else: 200 lcunit = "m" 201 202 if lcunit in ("m", "mb", "mib"): 203 # Value already in MiB 204 pass 205 206 elif lcunit in ("g", "gb", "gib"): 207 value *= 1024 208 209 elif lcunit in ("t", "tb", "tib"): 210 value *= 1024 * 1024 211 212 else: 213 raise errors.UnitParseError("Unknown unit: %s" % unit) 214 215 # Make sure we round up 216 if int(value) < value: 217 value += 1 218 219 # Round up to the next multiple of 4 220 value = int(value) 221 if value % 4: 222 value += 4 - value % 4 223 224 return value
225 226
227 -def ShellQuote(value):
228 """Quotes shell argument according to POSIX. 229 230 @type value: str 231 @param value: the argument to be quoted 232 @rtype: str 233 @return: the quoted value 234 235 """ 236 if _SHELL_UNQUOTED_RE.match(value): 237 return value 238 else: 239 return "'%s'" % value.replace("'", "'\\''")
240 241
242 -def ShellQuoteArgs(args):
243 """Quotes a list of shell arguments. 244 245 @type args: list 246 @param args: list of arguments to be quoted 247 @rtype: str 248 @return: the quoted arguments concatenated with spaces 249 250 """ 251 return " ".join([ShellQuote(i) for i in args])
252 253
254 -def ShellCombineCommands(cmdlist):
255 """Out of a list of shell comands construct a single one. 256 257 """ 258 return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)]
259 260
261 -class ShellWriter(object):
262 """Helper class to write scripts with indentation. 263 264 """ 265 INDENT_STR = " " 266
267 - def __init__(self, fh, indent=True):
268 """Initializes this class. 269 270 """ 271 self._fh = fh 272 self._indent_enabled = indent 273 self._indent = 0
274
275 - def IncIndent(self):
276 """Increase indentation level by 1. 277 278 """ 279 self._indent += 1
280
281 - def DecIndent(self):
282 """Decrease indentation level by 1. 283 284 """ 285 assert self._indent > 0 286 self._indent -= 1
287
288 - def Write(self, txt, *args):
289 """Write line to output file. 290 291 """ 292 assert self._indent >= 0 293 294 if args: 295 line = txt % args 296 else: 297 line = txt 298 299 if line and self._indent_enabled: 300 # Indent only if there's something on the line 301 self._fh.write(self._indent * self.INDENT_STR) 302 303 self._fh.write(line) 304 305 self._fh.write("\n")
306 307
308 -def GenerateSecret(numbytes=20):
309 """Generates a random secret. 310 311 This will generate a pseudo-random secret returning an hex string 312 (so that it can be used where an ASCII string is needed). 313 314 @param numbytes: the number of bytes which will be represented by the returned 315 string (defaulting to 20, the length of a SHA1 hash) 316 @rtype: str 317 @return: an hex representation of the pseudo-random sequence 318 319 """ 320 return os.urandom(numbytes).encode("hex")
321 322
323 -def _MakeMacAddrRegexp(octets):
324 """Builds a regular expression for verifying MAC addresses. 325 326 @type octets: integer 327 @param octets: How many octets to expect (1-6) 328 @return: Compiled regular expression 329 330 """ 331 assert octets > 0 332 assert octets <= 6 333 334 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 335 re.I)
336 337 338 #: Regular expression for full MAC address 339 _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 340 341 #: Regular expression for half a MAC address 342 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 343 344
345 -def _MacAddressCheck(check_re, mac, msg):
346 """Checks a MAC address using a regular expression. 347 348 @param check_re: Compiled regular expression as returned by C{re.compile} 349 @type mac: string 350 @param mac: MAC address to be validated 351 @type msg: string 352 @param msg: Error message (%s will be replaced with MAC address) 353 354 """ 355 if check_re.match(mac): 356 return mac.lower() 357 358 raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
359 360
361 -def NormalizeAndValidateMac(mac):
362 """Normalizes and check if a MAC address is valid and contains six octets. 363 364 Checks whether the supplied MAC address is formally correct. Accepts 365 colon-separated format only. Normalize it to all lower case. 366 367 @type mac: string 368 @param mac: MAC address to be validated 369 @rtype: string 370 @return: Normalized and validated MAC address 371 @raise errors.OpPrereqError: If the MAC address isn't valid 372 373 """ 374 return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
375 376
377 -def NormalizeAndValidateThreeOctetMacPrefix(mac):
378 """Normalizes a potential MAC address prefix (three octets). 379 380 Checks whether the supplied string is a valid MAC address prefix consisting 381 of three colon-separated octets. The result is normalized to all lower case. 382 383 @type mac: string 384 @param mac: Prefix to be validated 385 @rtype: string 386 @return: Normalized and validated prefix 387 @raise errors.OpPrereqError: If the MAC address prefix isn't valid 388 389 """ 390 return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 391 "Invalid MAC address prefix '%s'")
392 393
394 -def SafeEncode(text):
395 """Return a 'safe' version of a source string. 396 397 This function mangles the input string and returns a version that 398 should be safe to display/encode as ASCII. To this end, we first 399 convert it to ASCII using the 'backslashreplace' encoding which 400 should get rid of any non-ASCII chars, and then we process it 401 through a loop copied from the string repr sources in the python; we 402 don't use string_escape anymore since that escape single quotes and 403 backslashes too, and that is too much; and that escaping is not 404 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 405 406 @type text: str or unicode 407 @param text: input data 408 @rtype: str 409 @return: a safe version of text 410 411 """ 412 if isinstance(text, unicode): 413 # only if unicode; if str already, we handle it below 414 text = text.encode("ascii", "backslashreplace") 415 resu = "" 416 for char in text: 417 c = ord(char) 418 if char == "\t": 419 resu += r"\t" 420 elif char == "\n": 421 resu += r"\n" 422 elif char == "\r": 423 resu += r'\'r' 424 elif c < 32 or c >= 127: # non-printable 425 resu += "\\x%02x" % (c & 0xff) 426 else: 427 resu += char 428 return resu
429 430
431 -def UnescapeAndSplit(text, sep=","):
432 r"""Split and unescape a string based on a given separator. 433 434 This function splits a string based on a separator where the 435 separator itself can be escape in order to be an element of the 436 elements. The escaping rules are (assuming coma being the 437 separator): 438 - a plain , separates the elements 439 - a sequence \\\\, (double backslash plus comma) is handled as a 440 backslash plus a separator comma 441 - a sequence \, (backslash plus comma) is handled as a 442 non-separator comma 443 444 @type text: string 445 @param text: the string to split 446 @type sep: string 447 @param text: the separator 448 @rtype: string 449 @return: a list of strings 450 451 """ 452 # we split the list by sep (with no escaping at this stage) 453 slist = text.split(sep) 454 # next, we revisit the elements and if any of them ended with an odd 455 # number of backslashes, then we join it with the next 456 rlist = [] 457 while slist: 458 e1 = slist.pop(0) 459 if e1.endswith("\\"): 460 num_b = len(e1) - len(e1.rstrip("\\")) 461 if num_b % 2 == 1 and slist: 462 e2 = slist.pop(0) 463 # Merge the two elements and push the result back to the source list for 464 # revisiting. If e2 ended with backslashes, further merging may need to 465 # be done. 466 slist.insert(0, e1 + sep + e2) 467 continue 468 # here the backslashes remain (all), and will be reduced in the next step 469 rlist.append(e1) 470 # finally, replace backslash-something with something 471 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 472 return rlist
473 474
475 -def EscapeAndJoin(slist, sep=","):
476 """Encode a list in a way parsable by UnescapeAndSplit. 477 478 @type slist: list of strings 479 @param slist: the strings to be encoded 480 @rtype: string 481 @return: the encoding of the list oas a string 482 483 """ 484 return sep.join([re.sub("\\" + sep, "\\\\" + sep, 485 re.sub(r"\\", r"\\\\", v)) for v in slist])
486 487
488 -def CommaJoin(names):
489 """Nicely join a set of identifiers. 490 491 @param names: set, list or tuple 492 @return: a string with the formatted results 493 494 """ 495 return ", ".join([str(val) for val in names])
496 497
498 -def FormatTime(val, usecs=None):
499 """Formats a time value. 500 501 @type val: float or None 502 @param val: Timestamp as returned by time.time() (seconds since Epoch, 503 1970-01-01 00:00:00 UTC) 504 @return: a string value or N/A if we don't have a valid timestamp 505 506 """ 507 if val is None or not isinstance(val, (int, float)): 508 return "N/A" 509 510 # these two codes works on Linux, but they are not guaranteed on all 511 # platforms 512 result = time.strftime("%F %T", time.localtime(val)) 513 514 if usecs is not None: 515 result += ".%06d" % usecs 516 517 return result
518 519
520 -def FormatSeconds(secs):
521 """Formats seconds for easier reading. 522 523 @type secs: number 524 @param secs: Number of seconds 525 @rtype: string 526 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 527 528 """ 529 parts = [] 530 531 secs = round(secs, 0) 532 533 if secs > 0: 534 # Negative values would be a bit tricky 535 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 536 (complete, secs) = divmod(secs, one) 537 if complete or parts: 538 parts.append("%d%s" % (complete, unit)) 539 540 parts.append("%ds" % secs) 541 542 return " ".join(parts)
543 544
545 -class LineSplitter(object):
546 """Splits data chunks into lines separated by newline. 547 548 Instances provide a file-like interface. 549 550 """
551 - def __init__(self, line_fn, *args):
552 """Initializes this class. 553 554 @type line_fn: callable 555 @param line_fn: Function called for each line, first parameter is line 556 @param args: Extra arguments for L{line_fn} 557 558 """ 559 assert callable(line_fn) 560 561 if args: 562 # Python 2.4 doesn't have functools.partial yet 563 self._line_fn = \ 564 lambda line: line_fn(line, *args) # pylint: disable=W0142 565 else: 566 self._line_fn = line_fn 567 568 self._lines = collections.deque() 569 self._buffer = ""
570
571 - def write(self, data):
572 parts = (self._buffer + data).split("\n") 573 self._buffer = parts.pop() 574 self._lines.extend(parts)
575
576 - def flush(self):
577 while self._lines: 578 self._line_fn(self._lines.popleft().rstrip("\r\n"))
579
580 - def close(self):
581 self.flush() 582 if self._buffer: 583 self._line_fn(self._buffer)
584 585
586 -def IsValidShellParam(word):
587 """Verifies is the given word is safe from the shell's p.o.v. 588 589 This means that we can pass this to a command via the shell and be 590 sure that it doesn't alter the command line and is passed as such to 591 the actual command. 592 593 Note that we are overly restrictive here, in order to be on the safe 594 side. 595 596 @type word: str 597 @param word: the word to check 598 @rtype: boolean 599 @return: True if the word is 'safe' 600 601 """ 602 return bool(_SHELLPARAM_REGEX.match(word))
603 604
605 -def BuildShellCmd(template, *args):
606 """Build a safe shell command line from the given arguments. 607 608 This function will check all arguments in the args list so that they 609 are valid shell parameters (i.e. they don't contain shell 610 metacharacters). If everything is ok, it will return the result of 611 template % args. 612 613 @type template: str 614 @param template: the string holding the template for the 615 string formatting 616 @rtype: str 617 @return: the expanded command line 618 619 """ 620 for word in args: 621 if not IsValidShellParam(word): 622 raise errors.ProgrammerError("Shell argument '%s' contains" 623 " invalid characters" % word) 624 return template % args
625 626
627 -def FormatOrdinal(value):
628 """Formats a number as an ordinal in the English language. 629 630 E.g. the number 1 becomes "1st", 22 becomes "22nd". 631 632 @type value: integer 633 @param value: Number 634 @rtype: string 635 636 """ 637 tens = value % 10 638 639 if value > 10 and value < 20: 640 suffix = "th" 641 elif tens == 1: 642 suffix = "st" 643 elif tens == 2: 644 suffix = "nd" 645 elif tens == 3: 646 suffix = "rd" 647 else: 648 suffix = "th" 649 650 return "%s%s" % (value, suffix)
651 652
653 -def Truncate(text, length):
654 """Truncate string and add ellipsis if needed. 655 656 @type text: string 657 @param text: Text 658 @type length: integer 659 @param length: Desired length 660 @rtype: string 661 @return: Truncated text 662 663 """ 664 assert length > len(_ASCII_ELLIPSIS) 665 666 # Serialize if necessary 667 if not isinstance(text, basestring): 668 text = str(text) 669 670 if len(text) <= length: 671 return text 672 else: 673 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
674 675
676 -def FilterEmptyLinesAndComments(text):
677 """Filters empty lines and comments from a line-based string. 678 679 Whitespace is also removed from the beginning and end of all lines. 680 681 @type text: string 682 @param text: Input string 683 @rtype: list 684 685 """ 686 return [line for line in map(lambda s: s.strip(), text.splitlines()) 687 # Ignore empty lines and comments 688 if line and not line.startswith("#")]
689 690
691 -def FormatKeyValue(data):
692 """Formats a dictionary as "key=value" parameters. 693 694 The keys are sorted to have a stable order. 695 696 @type data: dict 697 @rtype: list of string 698 699 """ 700 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]
701