Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: Shell param checker regexp 
 41  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 42   
 43  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 44  _ASCII_ELLIPSIS = "..." 
 45   
 46  #: MAC address octet 
 47  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 48   
 49   
50 -def MatchNameComponent(key, name_list, case_sensitive=True):
51 """Try to match a name against a list. 52 53 This function will try to match a name like test1 against a list 54 like C{['test1.example.com', 'test2.example.com', ...]}. Against 55 this list, I{'test1'} as well as I{'test1.example'} will match, but 56 not I{'test1.ex'}. A multiple match will be considered as no match 57 at all (e.g. I{'test1'} against C{['test1.example.com', 58 'test1.example.org']}), except when the key fully matches an entry 59 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 60 61 @type key: str 62 @param key: the name to be searched 63 @type name_list: list 64 @param name_list: the list of strings against which to search the key 65 @type case_sensitive: boolean 66 @param case_sensitive: whether to provide a case-sensitive match 67 68 @rtype: None or str 69 @return: None if there is no match I{or} if there are multiple matches, 70 otherwise the element from the list which matches 71 72 """ 73 if key in name_list: 74 return key 75 76 re_flags = 0 77 if not case_sensitive: 78 re_flags |= re.IGNORECASE 79 key = key.upper() 80 81 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 82 83 names_filtered = [] 84 string_matches = [] 85 for name in name_list: 86 if name_re.match(name) is not None: 87 names_filtered.append(name) 88 if not case_sensitive and key == name.upper(): 89 string_matches.append(name) 90 91 if len(string_matches) == 1: 92 return string_matches[0] 93 if len(names_filtered) == 1: 94 return names_filtered[0] 95 96 return None
97 98
99 -def _DnsNameGlobHelper(match):
100 """Helper function for L{DnsNameGlobPattern}. 101 102 Returns regular expression pattern for parts of the pattern. 103 104 """ 105 text = match.group(0) 106 107 if text == "*": 108 return "[^.]*" 109 elif text == "?": 110 return "[^.]" 111 else: 112 return re.escape(text)
113 114
115 -def DnsNameGlobPattern(pattern):
116 """Generates regular expression from DNS name globbing pattern. 117 118 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 119 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 120 121 Matching always starts at the leftmost part. An asterisk (*) matches all 122 characters except the dot (.) separating DNS name parts. A question mark (?) 123 matches a single character except the dot (.). 124 125 @type pattern: string 126 @param pattern: DNS name globbing pattern 127 @rtype: string 128 @return: Regular expression 129 130 """ 131 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
132 133
134 -def FormatUnit(value, units):
135 """Formats an incoming number of MiB with the appropriate unit. 136 137 @type value: int 138 @param value: integer representing the value in MiB (1048576) 139 @type units: char 140 @param units: the type of formatting we should do: 141 - 'h' for automatic scaling 142 - 'm' for MiBs 143 - 'g' for GiBs 144 - 't' for TiBs 145 @rtype: str 146 @return: the formatted value (with suffix) 147 148 """ 149 if units not in ("m", "g", "t", "h"): 150 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 151 152 suffix = "" 153 154 if units == "m" or (units == "h" and value < 1024): 155 if units == "h": 156 suffix = "M" 157 return "%d%s" % (round(value, 0), suffix) 158 159 elif units == "g" or (units == "h" and value < (1024 * 1024)): 160 if units == "h": 161 suffix = "G" 162 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 163 164 else: 165 if units == "h": 166 suffix = "T" 167 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
168 169
170 -def ParseUnit(input_string):
171 """Tries to extract number and scale from the given string. 172 173 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 174 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 175 is always an int in MiB. 176 177 """ 178 m = _PARSEUNIT_REGEX.match(str(input_string)) 179 if not m: 180 raise errors.UnitParseError("Invalid format") 181 182 value = float(m.groups()[0]) 183 184 unit = m.groups()[1] 185 if unit: 186 lcunit = unit.lower() 187 else: 188 lcunit = "m" 189 190 if lcunit in ("m", "mb", "mib"): 191 # Value already in MiB 192 pass 193 194 elif lcunit in ("g", "gb", "gib"): 195 value *= 1024 196 197 elif lcunit in ("t", "tb", "tib"): 198 value *= 1024 * 1024 199 200 else: 201 raise errors.UnitParseError("Unknown unit: %s" % unit) 202 203 # Make sure we round up 204 if int(value) < value: 205 value += 1 206 207 # Round up to the next multiple of 4 208 value = int(value) 209 if value % 4: 210 value += 4 - value % 4 211 212 return value
213 214
215 -def ShellQuote(value):
216 """Quotes shell argument according to POSIX. 217 218 @type value: str 219 @param value: the argument to be quoted 220 @rtype: str 221 @return: the quoted value 222 223 """ 224 if _SHELL_UNQUOTED_RE.match(value): 225 return value 226 else: 227 return "'%s'" % value.replace("'", "'\\''")
228 229
230 -def ShellQuoteArgs(args):
231 """Quotes a list of shell arguments. 232 233 @type args: list 234 @param args: list of arguments to be quoted 235 @rtype: str 236 @return: the quoted arguments concatenated with spaces 237 238 """ 239 return " ".join([ShellQuote(i) for i in args])
240 241
242 -class ShellWriter:
243 """Helper class to write scripts with indentation. 244 245 """ 246 INDENT_STR = " " 247
248 - def __init__(self, fh, indent=True):
249 """Initializes this class. 250 251 """ 252 self._fh = fh 253 self._indent_enabled = indent 254 self._indent = 0
255
256 - def IncIndent(self):
257 """Increase indentation level by 1. 258 259 """ 260 self._indent += 1
261
262 - def DecIndent(self):
263 """Decrease indentation level by 1. 264 265 """ 266 assert self._indent > 0 267 self._indent -= 1
268
269 - def Write(self, txt, *args):
270 """Write line to output file. 271 272 """ 273 assert self._indent >= 0 274 275 if args: 276 line = txt % args 277 else: 278 line = txt 279 280 if line and self._indent_enabled: 281 # Indent only if there's something on the line 282 self._fh.write(self._indent * self.INDENT_STR) 283 284 self._fh.write(line) 285 286 self._fh.write("\n")
287 288
289 -def GenerateSecret(numbytes=20):
290 """Generates a random secret. 291 292 This will generate a pseudo-random secret returning an hex string 293 (so that it can be used where an ASCII string is needed). 294 295 @param numbytes: the number of bytes which will be represented by the returned 296 string (defaulting to 20, the length of a SHA1 hash) 297 @rtype: str 298 @return: an hex representation of the pseudo-random sequence 299 300 """ 301 return os.urandom(numbytes).encode("hex")
302 303
304 -def _MakeMacAddrRegexp(octets):
305 """Builds a regular expression for verifying MAC addresses. 306 307 @type octets: integer 308 @param octets: How many octets to expect (1-6) 309 @return: Compiled regular expression 310 311 """ 312 assert octets > 0 313 assert octets <= 6 314 315 return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 316 re.I)
317 318 319 #: Regular expression for full MAC address 320 _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 321 322 #: Regular expression for half a MAC address 323 _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 324 325
326 -def _MacAddressCheck(check_re, mac, msg):
327 """Checks a MAC address using a regular expression. 328 329 @param check_re: Compiled regular expression as returned by C{re.compile} 330 @type mac: string 331 @param mac: MAC address to be validated 332 @type msg: string 333 @param msg: Error message (%s will be replaced with MAC address) 334 335 """ 336 if check_re.match(mac): 337 return mac.lower() 338 339 raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
340 341
342 -def NormalizeAndValidateMac(mac):
343 """Normalizes and check if a MAC address is valid and contains six octets. 344 345 Checks whether the supplied MAC address is formally correct. Accepts 346 colon-separated format only. Normalize it to all lower case. 347 348 @type mac: string 349 @param mac: MAC address to be validated 350 @rtype: string 351 @return: Normalized and validated MAC address 352 @raise errors.OpPrereqError: If the MAC address isn't valid 353 354 """ 355 return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
356 357
358 -def NormalizeAndValidateThreeOctetMacPrefix(mac):
359 """Normalizes a potential MAC address prefix (three octets). 360 361 Checks whether the supplied string is a valid MAC address prefix consisting 362 of three colon-separated octets. The result is normalized to all lower case. 363 364 @type mac: string 365 @param mac: Prefix to be validated 366 @rtype: string 367 @return: Normalized and validated prefix 368 @raise errors.OpPrereqError: If the MAC address prefix isn't valid 369 370 """ 371 return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 372 "Invalid MAC address prefix '%s'")
373 374
375 -def SafeEncode(text):
376 """Return a 'safe' version of a source string. 377 378 This function mangles the input string and returns a version that 379 should be safe to display/encode as ASCII. To this end, we first 380 convert it to ASCII using the 'backslashreplace' encoding which 381 should get rid of any non-ASCII chars, and then we process it 382 through a loop copied from the string repr sources in the python; we 383 don't use string_escape anymore since that escape single quotes and 384 backslashes too, and that is too much; and that escaping is not 385 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 386 387 @type text: str or unicode 388 @param text: input data 389 @rtype: str 390 @return: a safe version of text 391 392 """ 393 if isinstance(text, unicode): 394 # only if unicode; if str already, we handle it below 395 text = text.encode("ascii", "backslashreplace") 396 resu = "" 397 for char in text: 398 c = ord(char) 399 if char == "\t": 400 resu += r"\t" 401 elif char == "\n": 402 resu += r"\n" 403 elif char == "\r": 404 resu += r'\'r' 405 elif c < 32 or c >= 127: # non-printable 406 resu += "\\x%02x" % (c & 0xff) 407 else: 408 resu += char 409 return resu
410 411
412 -def UnescapeAndSplit(text, sep=","):
413 """Split and unescape a string based on a given separator. 414 415 This function splits a string based on a separator where the 416 separator itself can be escape in order to be an element of the 417 elements. The escaping rules are (assuming coma being the 418 separator): 419 - a plain , separates the elements 420 - a sequence \\\\, (double backslash plus comma) is handled as a 421 backslash plus a separator comma 422 - a sequence \, (backslash plus comma) is handled as a 423 non-separator comma 424 425 @type text: string 426 @param text: the string to split 427 @type sep: string 428 @param text: the separator 429 @rtype: string 430 @return: a list of strings 431 432 """ 433 # we split the list by sep (with no escaping at this stage) 434 slist = text.split(sep) 435 # next, we revisit the elements and if any of them ended with an odd 436 # number of backslashes, then we join it with the next 437 rlist = [] 438 while slist: 439 e1 = slist.pop(0) 440 if e1.endswith("\\"): 441 num_b = len(e1) - len(e1.rstrip("\\")) 442 if num_b % 2 == 1 and slist: 443 e2 = slist.pop(0) 444 # Merge the two elements and push the result back to the source list for 445 # revisiting. If e2 ended with backslashes, further merging may need to 446 # be done. 447 slist.insert(0, e1 + sep + e2) 448 continue 449 # here the backslashes remain (all), and will be reduced in the next step 450 rlist.append(e1) 451 # finally, replace backslash-something with something 452 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 453 return rlist
454 455
456 -def CommaJoin(names):
457 """Nicely join a set of identifiers. 458 459 @param names: set, list or tuple 460 @return: a string with the formatted results 461 462 """ 463 return ", ".join([str(val) for val in names])
464 465
466 -def FormatTime(val, usecs=None):
467 """Formats a time value. 468 469 @type val: float or None 470 @param val: Timestamp as returned by time.time() (seconds since Epoch, 471 1970-01-01 00:00:00 UTC) 472 @return: a string value or N/A if we don't have a valid timestamp 473 474 """ 475 if val is None or not isinstance(val, (int, float)): 476 return "N/A" 477 478 # these two codes works on Linux, but they are not guaranteed on all 479 # platforms 480 result = time.strftime("%F %T", time.localtime(val)) 481 482 if usecs is not None: 483 result += ".%06d" % usecs 484 485 return result
486 487
488 -def FormatSeconds(secs):
489 """Formats seconds for easier reading. 490 491 @type secs: number 492 @param secs: Number of seconds 493 @rtype: string 494 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 495 496 """ 497 parts = [] 498 499 secs = round(secs, 0) 500 501 if secs > 0: 502 # Negative values would be a bit tricky 503 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 504 (complete, secs) = divmod(secs, one) 505 if complete or parts: 506 parts.append("%d%s" % (complete, unit)) 507 508 parts.append("%ds" % secs) 509 510 return " ".join(parts)
511 512
513 -class LineSplitter:
514 """Splits data chunks into lines separated by newline. 515 516 Instances provide a file-like interface. 517 518 """
519 - def __init__(self, line_fn, *args):
520 """Initializes this class. 521 522 @type line_fn: callable 523 @param line_fn: Function called for each line, first parameter is line 524 @param args: Extra arguments for L{line_fn} 525 526 """ 527 assert callable(line_fn) 528 529 if args: 530 # Python 2.4 doesn't have functools.partial yet 531 self._line_fn = \ 532 lambda line: line_fn(line, *args) # pylint: disable=W0142 533 else: 534 self._line_fn = line_fn 535 536 self._lines = collections.deque() 537 self._buffer = ""
538
539 - def write(self, data):
540 parts = (self._buffer + data).split("\n") 541 self._buffer = parts.pop() 542 self._lines.extend(parts)
543
544 - def flush(self):
545 while self._lines: 546 self._line_fn(self._lines.popleft().rstrip("\r\n"))
547
548 - def close(self):
549 self.flush() 550 if self._buffer: 551 self._line_fn(self._buffer)
552 553
554 -def IsValidShellParam(word):
555 """Verifies is the given word is safe from the shell's p.o.v. 556 557 This means that we can pass this to a command via the shell and be 558 sure that it doesn't alter the command line and is passed as such to 559 the actual command. 560 561 Note that we are overly restrictive here, in order to be on the safe 562 side. 563 564 @type word: str 565 @param word: the word to check 566 @rtype: boolean 567 @return: True if the word is 'safe' 568 569 """ 570 return bool(_SHELLPARAM_REGEX.match(word))
571 572
573 -def BuildShellCmd(template, *args):
574 """Build a safe shell command line from the given arguments. 575 576 This function will check all arguments in the args list so that they 577 are valid shell parameters (i.e. they don't contain shell 578 metacharacters). If everything is ok, it will return the result of 579 template % args. 580 581 @type template: str 582 @param template: the string holding the template for the 583 string formatting 584 @rtype: str 585 @return: the expanded command line 586 587 """ 588 for word in args: 589 if not IsValidShellParam(word): 590 raise errors.ProgrammerError("Shell argument '%s' contains" 591 " invalid characters" % word) 592 return template % args
593 594
595 -def FormatOrdinal(value):
596 """Formats a number as an ordinal in the English language. 597 598 E.g. the number 1 becomes "1st", 22 becomes "22nd". 599 600 @type value: integer 601 @param value: Number 602 @rtype: string 603 604 """ 605 tens = value % 10 606 607 if value > 10 and value < 20: 608 suffix = "th" 609 elif tens == 1: 610 suffix = "st" 611 elif tens == 2: 612 suffix = "nd" 613 elif tens == 3: 614 suffix = "rd" 615 else: 616 suffix = "th" 617 618 return "%s%s" % (value, suffix)
619 620
621 -def Truncate(text, length):
622 """Truncate string and add ellipsis if needed. 623 624 @type text: string 625 @param text: Text 626 @type length: integer 627 @param length: Desired length 628 @rtype: string 629 @return: Truncated text 630 631 """ 632 assert length > len(_ASCII_ELLIPSIS) 633 634 # Serialize if necessary 635 if not isinstance(text, basestring): 636 text = str(text) 637 638 if len(text) <= length: 639 return text 640 else: 641 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
642 643
644 -def FilterEmptyLinesAndComments(text):
645 """Filters empty lines and comments from a line-based string. 646 647 Whitespace is also removed from the beginning and end of all lines. 648 649 @type text: string 650 @param text: Input string 651 @rtype: list 652 653 """ 654 return [line for line in map(lambda s: s.strip(), text.splitlines()) 655 # Ignore empty lines and comments 656 if line and not line.startswith("#")]
657 658
659 -def FormatKeyValue(data):
660 """Formats a dictionary as "key=value" parameters. 661 662 The keys are sorted to have a stable order. 663 664 @type data: dict 665 @rtype: list of string 666 667 """ 668 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]
669