Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30  """Utility functions for manipulating or working with text. 
 31   
 32  """ 
 33   
 34   
 35  import re 
 36  import os 
 37  import time 
 38  import collections 
 39   
 40  from ganeti import errors 
 41  from ganeti import compat 
 42   
 43   
 44  #: Unit checker regexp 
 45  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 46   
 47  #: Characters which don't need to be quoted for shell commands 
 48  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 49   
 50  #: Shell param checker regexp 
 51  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 52   
 53  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 54  _ASCII_ELLIPSIS = "..." 
 55   
 56  #: MAC address octet 
 57  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 58   
 59   
60 -def MatchNameComponent(key, name_list, case_sensitive=True):
61 """Try to match a name against a list. 62 63 This function will try to match a name like test1 against a list 64 like C{['test1.example.com', 'test2.example.com', ...]}. Against 65 this list, I{'test1'} as well as I{'test1.example'} will match, but 66 not I{'test1.ex'}. A multiple match will be considered as no match 67 at all (e.g. I{'test1'} against C{['test1.example.com', 68 'test1.example.org']}), except when the key fully matches an entry 69 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 70 71 @type key: str 72 @param key: the name to be searched 73 @type name_list: list 74 @param name_list: the list of strings against which to search the key 75 @type case_sensitive: boolean 76 @param case_sensitive: whether to provide a case-sensitive match 77 78 @rtype: None or str 79 @return: None if there is no match I{or} if there are multiple matches, 80 otherwise the element from the list which matches 81 82 """ 83 if key in name_list: 84 return key 85 86 re_flags = 0 87 if not case_sensitive: 88 re_flags |= re.IGNORECASE 89 key = key.upper() 90 91 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 92 93 names_filtered = [] 94 string_matches = [] 95 for name in name_list: 96 if name_re.match(name) is not None: 97 names_filtered.append(name) 98 if not case_sensitive and key == name.upper(): 99 string_matches.append(name) 100 101 if len(string_matches) == 1: 102 return string_matches[0] 103 if len(names_filtered) == 1: 104 return names_filtered[0] 105 106 return None
107 108
109 -def _DnsNameGlobHelper(match):
110 """Helper function for L{DnsNameGlobPattern}. 111 112 Returns regular expression pattern for parts of the pattern. 113 114 """ 115 text = match.group(0) 116 117 if text == "*": 118 return "[^.]*" 119 elif text == "?": 120 return "[^.]" 121 else: 122 return re.escape(text)
123 124
125 -def DnsNameGlobPattern(pattern):
126 """Generates regular expression from DNS name globbing pattern. 127 128 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 129 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 130 131 Matching always starts at the leftmost part. An asterisk (*) matches all 132 characters except the dot (.) separating DNS name parts. A question mark (?) 133 matches a single character except the dot (.). 134 135 @type pattern: string 136 @param pattern: DNS name globbing pattern 137 @rtype: string 138 @return: Regular expression 139 140 """ 141 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
142 143
144 -def FormatUnit(value, units, roman=False):
145 """Formats an incoming number of MiB with the appropriate unit. 146 147 @type value: int 148 @param value: integer representing the value in MiB (1048576) 149 @type units: char 150 @param units: the type of formatting we should do: 151 - 'h' for automatic scaling 152 - 'm' for MiBs 153 - 'g' for GiBs 154 - 't' for TiBs 155 @rtype: str 156 @return: the formatted value (with suffix) 157 158 """ 159 if units not in ("m", "g", "t", "h"): 160 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 161 162 if not isinstance(value, (int, long, float)): 163 raise errors.ProgrammerError("Invalid value specified '%s (%s)'" % ( 164 value, type(value))) 165 166 suffix = "" 167 168 if units == "m" or (units == "h" and value < 1024): 169 if units == "h": 170 suffix = "M" 171 return "%s%s" % (compat.RomanOrRounded(value, 0, roman), suffix) 172 173 elif units == "g" or (units == "h" and value < (1024 * 1024)): 174 if units == "h": 175 suffix = "G" 176 return "%s%s" % (compat.RomanOrRounded(float(value) / 1024, 1, roman), 177 suffix) 178 179 else: 180 if units == "h": 181 suffix = "T" 182 return "%s%s" % (compat.RomanOrRounded(float(value) / 1024 / 1024, 1, 183 roman), suffix)
184 185
186 -def ParseUnit(input_string):
187 """Tries to extract number and scale from the given string. 188 189 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 190 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 191 is always an int in MiB. 192 193 """ 194 m = _PARSEUNIT_REGEX.match(str(input_string)) 195 if not m: 196 raise errors.UnitParseError("Invalid format") 197 198 value = float(m.groups()[0]) 199 200 unit = m.groups()[1] 201 if unit: 202 lcunit = unit.lower() 203 else: 204 lcunit = "m" 205 206 if lcunit in ("m", "mb", "mib"): 207 # Value already in MiB 208 pass 209 210 elif lcunit in ("g", "gb", "gib"): 211 value *= 1024 212 213 elif lcunit in ("t", "tb", "tib"): 214 value *= 1024 * 1024 215 216 else: 217 raise errors.UnitParseError("Unknown unit: %s" % unit) 218 219 # Make sure we round up 220 if int(value) < value: 221 value += 1 222 223 # Round up to the next multiple of 4 224 value = int(value) 225 if value % 4: 226 value += 4 - value % 4 227 228 return value
229 230
231 -def ShellQuote(value):
232 """Quotes shell argument according to POSIX. 233 234 @type value: str 235 @param value: the argument to be quoted 236 @rtype: str 237 @return: the quoted value 238 239 """ 240 if _SHELL_UNQUOTED_RE.match(value): 241 return value 242 else: 243 return "'%s'" % value.replace("'", "'\\''")
244 245
246 -def ShellQuoteArgs(args):
247 """Quotes a list of shell arguments. 248 249 @type args: list 250 @param args: list of arguments to be quoted 251 @rtype: str 252 @return: the quoted arguments concatenated with spaces 253 254 """ 255 return " ".join([ShellQuote(i) for i in args])
256 257
258 -def ShellCombineCommands(cmdlist):
259 """Out of a list of shell comands construct a single one. 260 261 """ 262 return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)]
263 264
265 -class ShellWriter(object):
266 """Helper class to write scripts with indentation. 267 268 """ 269 INDENT_STR = " " 270
271 - def __init__(self, fh, indent=True):
272 """Initializes this class. 273 274 """ 275 self._fh = fh 276 self._indent_enabled = indent 277 self._indent = 0
278
279 - def IncIndent(self):
280 """Increase indentation level by 1. 281 282 """ 283 self._indent += 1
284
285 - def DecIndent(self):
286 """Decrease indentation level by 1. 287 288 """ 289 assert self._indent > 0 290 self._indent -= 1
291
292 - def Write(self, txt, *args):
293 """Write line to output file. 294 295 """ 296 assert self._indent >= 0 297 298 if args: 299 line = txt % args 300 else: 301 line = txt 302 303 if line and self._indent_enabled: 304 # Indent only if there's something on the line 305 self._fh.write(self._indent * self.INDENT_STR) 306 307 self._fh.write(line) 308 309 self._fh.write("\n")
310 311
312 -def GenerateSecret(numbytes=20):
313 """Generates a random secret. 314 315 This will generate a pseudo-random secret returning an hex string 316 (so that it can be used where an ASCII string is needed). 317 318 @param numbytes: the number of bytes which will be represented by the returned 319 string (defaulting to 20, the length of a SHA1 hash) 320 @rtype: str 321 @return: an hex representation of the pseudo-random sequence 322 323 """ 324 return os.urandom(numbytes).encode("hex")
325 326
327 -def _MakeMacAddrRegexp(octets):
328 """Builds a regular expression for verifying MAC addresses. 329 330 @type octets: integer 331 @param octets: How many octets to expect (1-6) 332 @return: Compiled regular expression 333 334 """ 335 assert octets > 0 336 assert octets <= 6 337 338 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 339 re.I)
340 341 342 #: Regular expression for full MAC address 343 _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 344 345 #: Regular expression for half a MAC address 346 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 347 348
349 -def _MacAddressCheck(check_re, mac, msg):
350 """Checks a MAC address using a regular expression. 351 352 @param check_re: Compiled regular expression as returned by C{re.compile} 353 @type mac: string 354 @param mac: MAC address to be validated 355 @type msg: string 356 @param msg: Error message (%s will be replaced with MAC address) 357 358 """ 359 if check_re.match(mac): 360 return mac.lower() 361 362 raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
363 364
365 -def NormalizeAndValidateMac(mac):
366 """Normalizes and check if a MAC address is valid and contains six octets. 367 368 Checks whether the supplied MAC address is formally correct. Accepts 369 colon-separated format only. Normalize it to all lower case. 370 371 @type mac: string 372 @param mac: MAC address to be validated 373 @rtype: string 374 @return: Normalized and validated MAC address 375 @raise errors.OpPrereqError: If the MAC address isn't valid 376 377 """ 378 return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
379 380
381 -def NormalizeAndValidateThreeOctetMacPrefix(mac):
382 """Normalizes a potential MAC address prefix (three octets). 383 384 Checks whether the supplied string is a valid MAC address prefix consisting 385 of three colon-separated octets. The result is normalized to all lower case. 386 387 @type mac: string 388 @param mac: Prefix to be validated 389 @rtype: string 390 @return: Normalized and validated prefix 391 @raise errors.OpPrereqError: If the MAC address prefix isn't valid 392 393 """ 394 return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 395 "Invalid MAC address prefix '%s'")
396 397
398 -def SafeEncode(text):
399 """Return a 'safe' version of a source string. 400 401 This function mangles the input string and returns a version that 402 should be safe to display/encode as ASCII. To this end, we first 403 convert it to ASCII using the 'backslashreplace' encoding which 404 should get rid of any non-ASCII chars, and then we process it 405 through a loop copied from the string repr sources in the python; we 406 don't use string_escape anymore since that escape single quotes and 407 backslashes too, and that is too much; and that escaping is not 408 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 409 410 @type text: str or unicode 411 @param text: input data 412 @rtype: str 413 @return: a safe version of text 414 415 """ 416 if isinstance(text, unicode): 417 # only if unicode; if str already, we handle it below 418 text = text.encode("ascii", "backslashreplace") 419 resu = "" 420 for char in text: 421 c = ord(char) 422 if char == "\t": 423 resu += r"\t" 424 elif char == "\n": 425 resu += r"\n" 426 elif char == "\r": 427 resu += r'\'r' 428 elif c < 32 or c >= 127: # non-printable 429 resu += "\\x%02x" % (c & 0xff) 430 else: 431 resu += char 432 return resu
433 434
435 -def UnescapeAndSplit(text, sep=","):
436 r"""Split and unescape a string based on a given separator. 437 438 This function splits a string based on a separator where the 439 separator itself can be escape in order to be an element of the 440 elements. The escaping rules are (assuming coma being the 441 separator): 442 - a plain , separates the elements 443 - a sequence \\\\, (double backslash plus comma) is handled as a 444 backslash plus a separator comma 445 - a sequence \, (backslash plus comma) is handled as a 446 non-separator comma 447 448 @type text: string 449 @param text: the string to split 450 @type sep: string 451 @param text: the separator 452 @rtype: string 453 @return: a list of strings 454 455 """ 456 # we split the list by sep (with no escaping at this stage) 457 slist = text.split(sep) 458 # next, we revisit the elements and if any of them ended with an odd 459 # number of backslashes, then we join it with the next 460 rlist = [] 461 while slist: 462 e1 = slist.pop(0) 463 if e1.endswith("\\"): 464 num_b = len(e1) - len(e1.rstrip("\\")) 465 if num_b % 2 == 1 and slist: 466 e2 = slist.pop(0) 467 # Merge the two elements and push the result back to the source list for 468 # revisiting. If e2 ended with backslashes, further merging may need to 469 # be done. 470 slist.insert(0, e1 + sep + e2) 471 continue 472 # here the backslashes remain (all), and will be reduced in the next step 473 rlist.append(e1) 474 # finally, replace backslash-something with something 475 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 476 return rlist
477 478
479 -def EscapeAndJoin(slist, sep=","):
480 """Encode a list in a way parsable by UnescapeAndSplit. 481 482 @type slist: list of strings 483 @param slist: the strings to be encoded 484 @rtype: string 485 @return: the encoding of the list oas a string 486 487 """ 488 return sep.join([re.sub("\\" + sep, "\\\\" + sep, 489 re.sub(r"\\", r"\\\\", v)) for v in slist])
490 491
492 -def CommaJoin(names):
493 """Nicely join a set of identifiers. 494 495 @param names: set, list or tuple 496 @return: a string with the formatted results 497 498 """ 499 return ", ".join([str(val) for val in names])
500 501
502 -def FormatTime(val, usecs=None):
503 """Formats a time value. 504 505 @type val: float or None 506 @param val: Timestamp as returned by time.time() (seconds since Epoch, 507 1970-01-01 00:00:00 UTC) 508 @return: a string value or N/A if we don't have a valid timestamp 509 510 """ 511 if val is None or not isinstance(val, (int, float)): 512 return "N/A" 513 514 # these two codes works on Linux, but they are not guaranteed on all 515 # platforms 516 result = time.strftime("%F %T", time.localtime(val)) 517 518 if usecs is not None: 519 result += ".%06d" % usecs 520 521 return result
522 523
524 -def FormatSeconds(secs):
525 """Formats seconds for easier reading. 526 527 @type secs: number 528 @param secs: Number of seconds 529 @rtype: string 530 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 531 532 """ 533 parts = [] 534 535 secs = round(secs, 0) 536 537 if secs > 0: 538 # Negative values would be a bit tricky 539 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 540 (complete, secs) = divmod(secs, one) 541 if complete or parts: 542 parts.append("%d%s" % (complete, unit)) 543 544 parts.append("%ds" % secs) 545 546 return " ".join(parts)
547 548
549 -class LineSplitter(object):
550 """Splits data chunks into lines separated by newline. 551 552 Instances provide a file-like interface. 553 554 """
555 - def __init__(self, line_fn, *args):
556 """Initializes this class. 557 558 @type line_fn: callable 559 @param line_fn: Function called for each line, first parameter is line 560 @param args: Extra arguments for L{line_fn} 561 562 """ 563 assert callable(line_fn) 564 565 if args: 566 # Python 2.4 doesn't have functools.partial yet 567 self._line_fn = \ 568 lambda line: line_fn(line, *args) # pylint: disable=W0142 569 else: 570 self._line_fn = line_fn 571 572 self._lines = collections.deque() 573 self._buffer = ""
574
575 - def write(self, data):
576 parts = (self._buffer + data).split("\n") 577 self._buffer = parts.pop() 578 self._lines.extend(parts)
579
580 - def flush(self):
581 while self._lines: 582 self._line_fn(self._lines.popleft().rstrip("\r\n"))
583
584 - def close(self):
585 self.flush() 586 if self._buffer: 587 self._line_fn(self._buffer)
588 589
590 -def IsValidShellParam(word):
591 """Verifies is the given word is safe from the shell's p.o.v. 592 593 This means that we can pass this to a command via the shell and be 594 sure that it doesn't alter the command line and is passed as such to 595 the actual command. 596 597 Note that we are overly restrictive here, in order to be on the safe 598 side. 599 600 @type word: str 601 @param word: the word to check 602 @rtype: boolean 603 @return: True if the word is 'safe' 604 605 """ 606 return bool(_SHELLPARAM_REGEX.match(word))
607 608
609 -def BuildShellCmd(template, *args):
610 """Build a safe shell command line from the given arguments. 611 612 This function will check all arguments in the args list so that they 613 are valid shell parameters (i.e. they don't contain shell 614 metacharacters). If everything is ok, it will return the result of 615 template % args. 616 617 @type template: str 618 @param template: the string holding the template for the 619 string formatting 620 @rtype: str 621 @return: the expanded command line 622 623 """ 624 for word in args: 625 if not IsValidShellParam(word): 626 raise errors.ProgrammerError("Shell argument '%s' contains" 627 " invalid characters" % word) 628 return template % args
629 630
631 -def FormatOrdinal(value):
632 """Formats a number as an ordinal in the English language. 633 634 E.g. the number 1 becomes "1st", 22 becomes "22nd". 635 636 @type value: integer 637 @param value: Number 638 @rtype: string 639 640 """ 641 tens = value % 10 642 643 if value > 10 and value < 20: 644 suffix = "th" 645 elif tens == 1: 646 suffix = "st" 647 elif tens == 2: 648 suffix = "nd" 649 elif tens == 3: 650 suffix = "rd" 651 else: 652 suffix = "th" 653 654 return "%s%s" % (value, suffix)
655 656
657 -def Truncate(text, length):
658 """Truncate string and add ellipsis if needed. 659 660 @type text: string 661 @param text: Text 662 @type length: integer 663 @param length: Desired length 664 @rtype: string 665 @return: Truncated text 666 667 """ 668 assert length > len(_ASCII_ELLIPSIS) 669 670 # Serialize if necessary 671 if not isinstance(text, basestring): 672 text = str(text) 673 674 if len(text) <= length: 675 return text 676 else: 677 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
678 679
680 -def FilterEmptyLinesAndComments(text):
681 """Filters empty lines and comments from a line-based string. 682 683 Whitespace is also removed from the beginning and end of all lines. 684 685 @type text: string 686 @param text: Input string 687 @rtype: list 688 689 """ 690 return [line for line in map(lambda s: s.strip(), text.splitlines()) 691 # Ignore empty lines and comments 692 if line and not line.startswith("#")]
693 694
695 -def FormatKeyValue(data):
696 """Formats a dictionary as "key=value" parameters. 697 698 The keys are sorted to have a stable order. 699 700 @type data: dict 701 @rtype: list of string 702 703 """ 704 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]
705