Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: MAC checker regexp 
 41  _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I) 
 42   
 43  #: Shell param checker regexp 
 44  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 45   
 46  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 47  _ASCII_ELLIPSIS = "..." 
 48   
 49   
50 -def MatchNameComponent(key, name_list, case_sensitive=True):
51 """Try to match a name against a list. 52 53 This function will try to match a name like test1 against a list 54 like C{['test1.example.com', 'test2.example.com', ...]}. Against 55 this list, I{'test1'} as well as I{'test1.example'} will match, but 56 not I{'test1.ex'}. A multiple match will be considered as no match 57 at all (e.g. I{'test1'} against C{['test1.example.com', 58 'test1.example.org']}), except when the key fully matches an entry 59 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 60 61 @type key: str 62 @param key: the name to be searched 63 @type name_list: list 64 @param name_list: the list of strings against which to search the key 65 @type case_sensitive: boolean 66 @param case_sensitive: whether to provide a case-sensitive match 67 68 @rtype: None or str 69 @return: None if there is no match I{or} if there are multiple matches, 70 otherwise the element from the list which matches 71 72 """ 73 if key in name_list: 74 return key 75 76 re_flags = 0 77 if not case_sensitive: 78 re_flags |= re.IGNORECASE 79 key = key.upper() 80 81 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 82 83 names_filtered = [] 84 string_matches = [] 85 for name in name_list: 86 if name_re.match(name) is not None: 87 names_filtered.append(name) 88 if not case_sensitive and key == name.upper(): 89 string_matches.append(name) 90 91 if len(string_matches) == 1: 92 return string_matches[0] 93 if len(names_filtered) == 1: 94 return names_filtered[0] 95 96 return None
97 98
99 -def _DnsNameGlobHelper(match):
100 """Helper function for L{DnsNameGlobPattern}. 101 102 Returns regular expression pattern for parts of the pattern. 103 104 """ 105 text = match.group(0) 106 107 if text == "*": 108 return "[^.]*" 109 elif text == "?": 110 return "[^.]" 111 else: 112 return re.escape(text)
113 114
115 -def DnsNameGlobPattern(pattern):
116 """Generates regular expression from DNS name globbing pattern. 117 118 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 119 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 120 121 Matching always starts at the leftmost part. An asterisk (*) matches all 122 characters except the dot (.) separating DNS name parts. A question mark (?) 123 matches a single character except the dot (.). 124 125 @type pattern: string 126 @param pattern: DNS name globbing pattern 127 @rtype: string 128 @return: Regular expression 129 130 """ 131 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
132 133
134 -def FormatUnit(value, units):
135 """Formats an incoming number of MiB with the appropriate unit. 136 137 @type value: int 138 @param value: integer representing the value in MiB (1048576) 139 @type units: char 140 @param units: the type of formatting we should do: 141 - 'h' for automatic scaling 142 - 'm' for MiBs 143 - 'g' for GiBs 144 - 't' for TiBs 145 @rtype: str 146 @return: the formatted value (with suffix) 147 148 """ 149 if units not in ("m", "g", "t", "h"): 150 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 151 152 suffix = "" 153 154 if units == "m" or (units == "h" and value < 1024): 155 if units == "h": 156 suffix = "M" 157 return "%d%s" % (round(value, 0), suffix) 158 159 elif units == "g" or (units == "h" and value < (1024 * 1024)): 160 if units == "h": 161 suffix = "G" 162 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 163 164 else: 165 if units == "h": 166 suffix = "T" 167 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
168 169
170 -def ParseUnit(input_string):
171 """Tries to extract number and scale from the given string. 172 173 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 174 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 175 is always an int in MiB. 176 177 """ 178 m = _PARSEUNIT_REGEX.match(str(input_string)) 179 if not m: 180 raise errors.UnitParseError("Invalid format") 181 182 value = float(m.groups()[0]) 183 184 unit = m.groups()[1] 185 if unit: 186 lcunit = unit.lower() 187 else: 188 lcunit = "m" 189 190 if lcunit in ("m", "mb", "mib"): 191 # Value already in MiB 192 pass 193 194 elif lcunit in ("g", "gb", "gib"): 195 value *= 1024 196 197 elif lcunit in ("t", "tb", "tib"): 198 value *= 1024 * 1024 199 200 else: 201 raise errors.UnitParseError("Unknown unit: %s" % unit) 202 203 # Make sure we round up 204 if int(value) < value: 205 value += 1 206 207 # Round up to the next multiple of 4 208 value = int(value) 209 if value % 4: 210 value += 4 - value % 4 211 212 return value
213 214
215 -def ShellQuote(value):
216 """Quotes shell argument according to POSIX. 217 218 @type value: str 219 @param value: the argument to be quoted 220 @rtype: str 221 @return: the quoted value 222 223 """ 224 if _SHELL_UNQUOTED_RE.match(value): 225 return value 226 else: 227 return "'%s'" % value.replace("'", "'\\''")
228 229
230 -def ShellQuoteArgs(args):
231 """Quotes a list of shell arguments. 232 233 @type args: list 234 @param args: list of arguments to be quoted 235 @rtype: str 236 @return: the quoted arguments concatenated with spaces 237 238 """ 239 return " ".join([ShellQuote(i) for i in args])
240 241
242 -class ShellWriter:
243 """Helper class to write scripts with indentation. 244 245 """ 246 INDENT_STR = " " 247
248 - def __init__(self, fh):
249 """Initializes this class. 250 251 """ 252 self._fh = fh 253 self._indent = 0
254
255 - def IncIndent(self):
256 """Increase indentation level by 1. 257 258 """ 259 self._indent += 1
260
261 - def DecIndent(self):
262 """Decrease indentation level by 1. 263 264 """ 265 assert self._indent > 0 266 self._indent -= 1
267
268 - def Write(self, txt, *args):
269 """Write line to output file. 270 271 """ 272 assert self._indent >= 0 273 274 if args: 275 line = txt % args 276 else: 277 line = txt 278 279 if line: 280 # Indent only if there's something on the line 281 self._fh.write(self._indent * self.INDENT_STR) 282 283 self._fh.write(line) 284 285 self._fh.write("\n")
286 287
288 -def GenerateSecret(numbytes=20):
289 """Generates a random secret. 290 291 This will generate a pseudo-random secret returning an hex string 292 (so that it can be used where an ASCII string is needed). 293 294 @param numbytes: the number of bytes which will be represented by the returned 295 string (defaulting to 20, the length of a SHA1 hash) 296 @rtype: str 297 @return: an hex representation of the pseudo-random sequence 298 299 """ 300 return os.urandom(numbytes).encode("hex")
301 302
303 -def NormalizeAndValidateMac(mac):
304 """Normalizes and check if a MAC address is valid. 305 306 Checks whether the supplied MAC address is formally correct, only 307 accepts colon separated format. Normalize it to all lower. 308 309 @type mac: str 310 @param mac: the MAC to be validated 311 @rtype: str 312 @return: returns the normalized and validated MAC. 313 314 @raise errors.OpPrereqError: If the MAC isn't valid 315 316 """ 317 if not _MAC_CHECK_RE.match(mac): 318 raise errors.OpPrereqError("Invalid MAC address '%s'" % mac, 319 errors.ECODE_INVAL) 320 321 return mac.lower()
322 323
324 -def SafeEncode(text):
325 """Return a 'safe' version of a source string. 326 327 This function mangles the input string and returns a version that 328 should be safe to display/encode as ASCII. To this end, we first 329 convert it to ASCII using the 'backslashreplace' encoding which 330 should get rid of any non-ASCII chars, and then we process it 331 through a loop copied from the string repr sources in the python; we 332 don't use string_escape anymore since that escape single quotes and 333 backslashes too, and that is too much; and that escaping is not 334 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 335 336 @type text: str or unicode 337 @param text: input data 338 @rtype: str 339 @return: a safe version of text 340 341 """ 342 if isinstance(text, unicode): 343 # only if unicode; if str already, we handle it below 344 text = text.encode("ascii", "backslashreplace") 345 resu = "" 346 for char in text: 347 c = ord(char) 348 if char == "\t": 349 resu += r"\t" 350 elif char == "\n": 351 resu += r"\n" 352 elif char == "\r": 353 resu += r'\'r' 354 elif c < 32 or c >= 127: # non-printable 355 resu += "\\x%02x" % (c & 0xff) 356 else: 357 resu += char 358 return resu
359 360
361 -def UnescapeAndSplit(text, sep=","):
362 """Split and unescape a string based on a given separator. 363 364 This function splits a string based on a separator where the 365 separator itself can be escape in order to be an element of the 366 elements. The escaping rules are (assuming coma being the 367 separator): 368 - a plain , separates the elements 369 - a sequence \\\\, (double backslash plus comma) is handled as a 370 backslash plus a separator comma 371 - a sequence \, (backslash plus comma) is handled as a 372 non-separator comma 373 374 @type text: string 375 @param text: the string to split 376 @type sep: string 377 @param text: the separator 378 @rtype: string 379 @return: a list of strings 380 381 """ 382 # we split the list by sep (with no escaping at this stage) 383 slist = text.split(sep) 384 # next, we revisit the elements and if any of them ended with an odd 385 # number of backslashes, then we join it with the next 386 rlist = [] 387 while slist: 388 e1 = slist.pop(0) 389 if e1.endswith("\\"): 390 num_b = len(e1) - len(e1.rstrip("\\")) 391 if num_b % 2 == 1 and slist: 392 e2 = slist.pop(0) 393 # Merge the two elements and push the result back to the source list for 394 # revisiting. If e2 ended with backslashes, further merging may need to 395 # be done. 396 slist.insert(0, e1 + sep + e2) 397 continue 398 # here the backslashes remain (all), and will be reduced in the next step 399 rlist.append(e1) 400 # finally, replace backslash-something with something 401 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 402 return rlist
403 404
405 -def CommaJoin(names):
406 """Nicely join a set of identifiers. 407 408 @param names: set, list or tuple 409 @return: a string with the formatted results 410 411 """ 412 return ", ".join([str(val) for val in names])
413 414
415 -def FormatTime(val, usecs=None):
416 """Formats a time value. 417 418 @type val: float or None 419 @param val: Timestamp as returned by time.time() (seconds since Epoch, 420 1970-01-01 00:00:00 UTC) 421 @return: a string value or N/A if we don't have a valid timestamp 422 423 """ 424 if val is None or not isinstance(val, (int, float)): 425 return "N/A" 426 427 # these two codes works on Linux, but they are not guaranteed on all 428 # platforms 429 result = time.strftime("%F %T", time.localtime(val)) 430 431 if usecs is not None: 432 result += ".%06d" % usecs 433 434 return result
435 436
437 -def FormatSeconds(secs):
438 """Formats seconds for easier reading. 439 440 @type secs: number 441 @param secs: Number of seconds 442 @rtype: string 443 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 444 445 """ 446 parts = [] 447 448 secs = round(secs, 0) 449 450 if secs > 0: 451 # Negative values would be a bit tricky 452 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 453 (complete, secs) = divmod(secs, one) 454 if complete or parts: 455 parts.append("%d%s" % (complete, unit)) 456 457 parts.append("%ds" % secs) 458 459 return " ".join(parts)
460 461
462 -class LineSplitter:
463 """Splits data chunks into lines separated by newline. 464 465 Instances provide a file-like interface. 466 467 """
468 - def __init__(self, line_fn, *args):
469 """Initializes this class. 470 471 @type line_fn: callable 472 @param line_fn: Function called for each line, first parameter is line 473 @param args: Extra arguments for L{line_fn} 474 475 """ 476 assert callable(line_fn) 477 478 if args: 479 # Python 2.4 doesn't have functools.partial yet 480 self._line_fn = \ 481 lambda line: line_fn(line, *args) # pylint: disable=W0142 482 else: 483 self._line_fn = line_fn 484 485 self._lines = collections.deque() 486 self._buffer = ""
487
488 - def write(self, data):
489 parts = (self._buffer + data).split("\n") 490 self._buffer = parts.pop() 491 self._lines.extend(parts)
492
493 - def flush(self):
494 while self._lines: 495 self._line_fn(self._lines.popleft().rstrip("\r\n"))
496
497 - def close(self):
498 self.flush() 499 if self._buffer: 500 self._line_fn(self._buffer)
501 502
503 -def IsValidShellParam(word):
504 """Verifies is the given word is safe from the shell's p.o.v. 505 506 This means that we can pass this to a command via the shell and be 507 sure that it doesn't alter the command line and is passed as such to 508 the actual command. 509 510 Note that we are overly restrictive here, in order to be on the safe 511 side. 512 513 @type word: str 514 @param word: the word to check 515 @rtype: boolean 516 @return: True if the word is 'safe' 517 518 """ 519 return bool(_SHELLPARAM_REGEX.match(word))
520 521
522 -def BuildShellCmd(template, *args):
523 """Build a safe shell command line from the given arguments. 524 525 This function will check all arguments in the args list so that they 526 are valid shell parameters (i.e. they don't contain shell 527 metacharacters). If everything is ok, it will return the result of 528 template % args. 529 530 @type template: str 531 @param template: the string holding the template for the 532 string formatting 533 @rtype: str 534 @return: the expanded command line 535 536 """ 537 for word in args: 538 if not IsValidShellParam(word): 539 raise errors.ProgrammerError("Shell argument '%s' contains" 540 " invalid characters" % word) 541 return template % args
542 543
544 -def FormatOrdinal(value):
545 """Formats a number as an ordinal in the English language. 546 547 E.g. the number 1 becomes "1st", 22 becomes "22nd". 548 549 @type value: integer 550 @param value: Number 551 @rtype: string 552 553 """ 554 tens = value % 10 555 556 if value > 10 and value < 20: 557 suffix = "th" 558 elif tens == 1: 559 suffix = "st" 560 elif tens == 2: 561 suffix = "nd" 562 elif tens == 3: 563 suffix = "rd" 564 else: 565 suffix = "th" 566 567 return "%s%s" % (value, suffix)
568 569
570 -def Truncate(text, length):
571 """Truncate string and add ellipsis if needed. 572 573 @type text: string 574 @param text: Text 575 @type length: integer 576 @param length: Desired length 577 @rtype: string 578 @return: Truncated text 579 580 """ 581 assert length > len(_ASCII_ELLIPSIS) 582 583 # Serialize if necessary 584 if not isinstance(text, basestring): 585 text = str(text) 586 587 if len(text) <= length: 588 return text 589 else: 590 return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
591 592
593 -def FormatKeyValue(data):
594 """Formats a dictionary as "key=value" parameters. 595 596 The keys are sorted to have a stable order. 597 598 @type data: dict 599 @rtype: list of string 600 601 """ 602 return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]
603