Package ganeti :: Package utils :: Module text
[hide private]
[frames] | no frames]

Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: MAC checker regexp 
 41  _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I) 
 42   
 43  #: Shell param checker regexp 
 44  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 45   
 46   
47 -def MatchNameComponent(key, name_list, case_sensitive=True):
48 """Try to match a name against a list. 49 50 This function will try to match a name like test1 against a list 51 like C{['test1.example.com', 'test2.example.com', ...]}. Against 52 this list, I{'test1'} as well as I{'test1.example'} will match, but 53 not I{'test1.ex'}. A multiple match will be considered as no match 54 at all (e.g. I{'test1'} against C{['test1.example.com', 55 'test1.example.org']}), except when the key fully matches an entry 56 (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 57 58 @type key: str 59 @param key: the name to be searched 60 @type name_list: list 61 @param name_list: the list of strings against which to search the key 62 @type case_sensitive: boolean 63 @param case_sensitive: whether to provide a case-sensitive match 64 65 @rtype: None or str 66 @return: None if there is no match I{or} if there are multiple matches, 67 otherwise the element from the list which matches 68 69 """ 70 if key in name_list: 71 return key 72 73 re_flags = 0 74 if not case_sensitive: 75 re_flags |= re.IGNORECASE 76 key = key.upper() 77 78 name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 79 80 names_filtered = [] 81 string_matches = [] 82 for name in name_list: 83 if name_re.match(name) is not None: 84 names_filtered.append(name) 85 if not case_sensitive and key == name.upper(): 86 string_matches.append(name) 87 88 if len(string_matches) == 1: 89 return string_matches[0] 90 if len(names_filtered) == 1: 91 return names_filtered[0] 92 93 return None
94 95
96 -def _DnsNameGlobHelper(match):
97 """Helper function for L{DnsNameGlobPattern}. 98 99 Returns regular expression pattern for parts of the pattern. 100 101 """ 102 text = match.group(0) 103 104 if text == "*": 105 return "[^.]*" 106 elif text == "?": 107 return "[^.]" 108 else: 109 return re.escape(text)
110 111
112 -def DnsNameGlobPattern(pattern):
113 """Generates regular expression from DNS name globbing pattern. 114 115 A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 116 expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 117 118 Matching always starts at the leftmost part. An asterisk (*) matches all 119 characters except the dot (.) separating DNS name parts. A question mark (?) 120 matches a single character except the dot (.). 121 122 @type pattern: string 123 @param pattern: DNS name globbing pattern 124 @rtype: string 125 @return: Regular expression 126 127 """ 128 return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)
129 130
131 -def FormatUnit(value, units):
132 """Formats an incoming number of MiB with the appropriate unit. 133 134 @type value: int 135 @param value: integer representing the value in MiB (1048576) 136 @type units: char 137 @param units: the type of formatting we should do: 138 - 'h' for automatic scaling 139 - 'm' for MiBs 140 - 'g' for GiBs 141 - 't' for TiBs 142 @rtype: str 143 @return: the formatted value (with suffix) 144 145 """ 146 if units not in ("m", "g", "t", "h"): 147 raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 148 149 suffix = "" 150 151 if units == "m" or (units == "h" and value < 1024): 152 if units == "h": 153 suffix = "M" 154 return "%d%s" % (round(value, 0), suffix) 155 156 elif units == "g" or (units == "h" and value < (1024 * 1024)): 157 if units == "h": 158 suffix = "G" 159 return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 160 161 else: 162 if units == "h": 163 suffix = "T" 164 return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix)
165 166
167 -def ParseUnit(input_string):
168 """Tries to extract number and scale from the given string. 169 170 Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 171 [UNIT]}. If no unit is specified, it defaults to MiB. Return value 172 is always an int in MiB. 173 174 """ 175 m = _PARSEUNIT_REGEX.match(str(input_string)) 176 if not m: 177 raise errors.UnitParseError("Invalid format") 178 179 value = float(m.groups()[0]) 180 181 unit = m.groups()[1] 182 if unit: 183 lcunit = unit.lower() 184 else: 185 lcunit = "m" 186 187 if lcunit in ("m", "mb", "mib"): 188 # Value already in MiB 189 pass 190 191 elif lcunit in ("g", "gb", "gib"): 192 value *= 1024 193 194 elif lcunit in ("t", "tb", "tib"): 195 value *= 1024 * 1024 196 197 else: 198 raise errors.UnitParseError("Unknown unit: %s" % unit) 199 200 # Make sure we round up 201 if int(value) < value: 202 value += 1 203 204 # Round up to the next multiple of 4 205 value = int(value) 206 if value % 4: 207 value += 4 - value % 4 208 209 return value
210 211
212 -def ShellQuote(value):
213 """Quotes shell argument according to POSIX. 214 215 @type value: str 216 @param value: the argument to be quoted 217 @rtype: str 218 @return: the quoted value 219 220 """ 221 if _SHELL_UNQUOTED_RE.match(value): 222 return value 223 else: 224 return "'%s'" % value.replace("'", "'\\''")
225 226
227 -def ShellQuoteArgs(args):
228 """Quotes a list of shell arguments. 229 230 @type args: list 231 @param args: list of arguments to be quoted 232 @rtype: str 233 @return: the quoted arguments concatenated with spaces 234 235 """ 236 return " ".join([ShellQuote(i) for i in args])
237 238
239 -class ShellWriter:
240 """Helper class to write scripts with indentation. 241 242 """ 243 INDENT_STR = " " 244
245 - def __init__(self, fh):
246 """Initializes this class. 247 248 """ 249 self._fh = fh 250 self._indent = 0
251
252 - def IncIndent(self):
253 """Increase indentation level by 1. 254 255 """ 256 self._indent += 1
257
258 - def DecIndent(self):
259 """Decrease indentation level by 1. 260 261 """ 262 assert self._indent > 0 263 self._indent -= 1
264
265 - def Write(self, txt, *args):
266 """Write line to output file. 267 268 """ 269 assert self._indent >= 0 270 271 self._fh.write(self._indent * self.INDENT_STR) 272 273 if args: 274 self._fh.write(txt % args) 275 else: 276 self._fh.write(txt) 277 278 self._fh.write("\n")
279 280
281 -def GenerateSecret(numbytes=20):
282 """Generates a random secret. 283 284 This will generate a pseudo-random secret returning an hex string 285 (so that it can be used where an ASCII string is needed). 286 287 @param numbytes: the number of bytes which will be represented by the returned 288 string (defaulting to 20, the length of a SHA1 hash) 289 @rtype: str 290 @return: an hex representation of the pseudo-random sequence 291 292 """ 293 return os.urandom(numbytes).encode("hex")
294 295
296 -def NormalizeAndValidateMac(mac):
297 """Normalizes and check if a MAC address is valid. 298 299 Checks whether the supplied MAC address is formally correct, only 300 accepts colon separated format. Normalize it to all lower. 301 302 @type mac: str 303 @param mac: the MAC to be validated 304 @rtype: str 305 @return: returns the normalized and validated MAC. 306 307 @raise errors.OpPrereqError: If the MAC isn't valid 308 309 """ 310 if not _MAC_CHECK_RE.match(mac): 311 raise errors.OpPrereqError("Invalid MAC address '%s'" % mac, 312 errors.ECODE_INVAL) 313 314 return mac.lower()
315 316
317 -def SafeEncode(text):
318 """Return a 'safe' version of a source string. 319 320 This function mangles the input string and returns a version that 321 should be safe to display/encode as ASCII. To this end, we first 322 convert it to ASCII using the 'backslashreplace' encoding which 323 should get rid of any non-ASCII chars, and then we process it 324 through a loop copied from the string repr sources in the python; we 325 don't use string_escape anymore since that escape single quotes and 326 backslashes too, and that is too much; and that escaping is not 327 stable, i.e. string_escape(string_escape(x)) != string_escape(x). 328 329 @type text: str or unicode 330 @param text: input data 331 @rtype: str 332 @return: a safe version of text 333 334 """ 335 if isinstance(text, unicode): 336 # only if unicode; if str already, we handle it below 337 text = text.encode("ascii", "backslashreplace") 338 resu = "" 339 for char in text: 340 c = ord(char) 341 if char == "\t": 342 resu += r"\t" 343 elif char == "\n": 344 resu += r"\n" 345 elif char == "\r": 346 resu += r'\'r' 347 elif c < 32 or c >= 127: # non-printable 348 resu += "\\x%02x" % (c & 0xff) 349 else: 350 resu += char 351 return resu
352 353
354 -def UnescapeAndSplit(text, sep=","):
355 """Split and unescape a string based on a given separator. 356 357 This function splits a string based on a separator where the 358 separator itself can be escape in order to be an element of the 359 elements. The escaping rules are (assuming coma being the 360 separator): 361 - a plain , separates the elements 362 - a sequence \\\\, (double backslash plus comma) is handled as a 363 backslash plus a separator comma 364 - a sequence \, (backslash plus comma) is handled as a 365 non-separator comma 366 367 @type text: string 368 @param text: the string to split 369 @type sep: string 370 @param text: the separator 371 @rtype: string 372 @return: a list of strings 373 374 """ 375 # we split the list by sep (with no escaping at this stage) 376 slist = text.split(sep) 377 # next, we revisit the elements and if any of them ended with an odd 378 # number of backslashes, then we join it with the next 379 rlist = [] 380 while slist: 381 e1 = slist.pop(0) 382 if e1.endswith("\\"): 383 num_b = len(e1) - len(e1.rstrip("\\")) 384 if num_b % 2 == 1 and slist: 385 e2 = slist.pop(0) 386 # Merge the two elements and push the result back to the source list for 387 # revisiting. If e2 ended with backslashes, further merging may need to 388 # be done. 389 slist.insert(0, e1 + sep + e2) 390 continue 391 # here the backslashes remain (all), and will be reduced in the next step 392 rlist.append(e1) 393 # finally, replace backslash-something with something 394 rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 395 return rlist
396 397
398 -def CommaJoin(names):
399 """Nicely join a set of identifiers. 400 401 @param names: set, list or tuple 402 @return: a string with the formatted results 403 404 """ 405 return ", ".join([str(val) for val in names])
406 407
408 -def FormatTime(val):
409 """Formats a time value. 410 411 @type val: float or None 412 @param val: Timestamp as returned by time.time() (seconds since Epoch, 413 1970-01-01 00:00:00 UTC) 414 @return: a string value or N/A if we don't have a valid timestamp 415 416 """ 417 if val is None or not isinstance(val, (int, float)): 418 return "N/A" 419 # these two codes works on Linux, but they are not guaranteed on all 420 # platforms 421 return time.strftime("%F %T", time.localtime(val))
422 423
424 -def FormatSeconds(secs):
425 """Formats seconds for easier reading. 426 427 @type secs: number 428 @param secs: Number of seconds 429 @rtype: string 430 @return: Formatted seconds (e.g. "2d 9h 19m 49s") 431 432 """ 433 parts = [] 434 435 secs = round(secs, 0) 436 437 if secs > 0: 438 # Negative values would be a bit tricky 439 for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 440 (complete, secs) = divmod(secs, one) 441 if complete or parts: 442 parts.append("%d%s" % (complete, unit)) 443 444 parts.append("%ds" % secs) 445 446 return " ".join(parts)
447 448
449 -class LineSplitter:
450 """Splits data chunks into lines separated by newline. 451 452 Instances provide a file-like interface. 453 454 """
455 - def __init__(self, line_fn, *args):
456 """Initializes this class. 457 458 @type line_fn: callable 459 @param line_fn: Function called for each line, first parameter is line 460 @param args: Extra arguments for L{line_fn} 461 462 """ 463 assert callable(line_fn) 464 465 if args: 466 # Python 2.4 doesn't have functools.partial yet 467 self._line_fn = \ 468 lambda line: line_fn(line, *args) # pylint: disable=W0142 469 else: 470 self._line_fn = line_fn 471 472 self._lines = collections.deque() 473 self._buffer = ""
474
475 - def write(self, data):
476 parts = (self._buffer + data).split("\n") 477 self._buffer = parts.pop() 478 self._lines.extend(parts)
479
480 - def flush(self):
481 while self._lines: 482 self._line_fn(self._lines.popleft().rstrip("\r\n"))
483
484 - def close(self):
485 self.flush() 486 if self._buffer: 487 self._line_fn(self._buffer)
488 489
490 -def IsValidShellParam(word):
491 """Verifies is the given word is safe from the shell's p.o.v. 492 493 This means that we can pass this to a command via the shell and be 494 sure that it doesn't alter the command line and is passed as such to 495 the actual command. 496 497 Note that we are overly restrictive here, in order to be on the safe 498 side. 499 500 @type word: str 501 @param word: the word to check 502 @rtype: boolean 503 @return: True if the word is 'safe' 504 505 """ 506 return bool(_SHELLPARAM_REGEX.match(word))
507 508
509 -def BuildShellCmd(template, *args):
510 """Build a safe shell command line from the given arguments. 511 512 This function will check all arguments in the args list so that they 513 are valid shell parameters (i.e. they don't contain shell 514 metacharacters). If everything is ok, it will return the result of 515 template % args. 516 517 @type template: str 518 @param template: the string holding the template for the 519 string formatting 520 @rtype: str 521 @return: the expanded command line 522 523 """ 524 for word in args: 525 if not IsValidShellParam(word): 526 raise errors.ProgrammerError("Shell argument '%s' contains" 527 " invalid characters" % word) 528 return template % args
529 530
531 -def FormatOrdinal(value):
532 """Formats a number as an ordinal in the English language. 533 534 E.g. the number 1 becomes "1st", 22 becomes "22nd". 535 536 @type value: integer 537 @param value: Number 538 @rtype: string 539 540 """ 541 tens = value % 10 542 543 if value > 10 and value < 20: 544 suffix = "th" 545 elif tens == 1: 546 suffix = "st" 547 elif tens == 2: 548 suffix = "nd" 549 elif tens == 3: 550 suffix = "rd" 551 else: 552 suffix = "th" 553 554 return "%s%s" % (value, suffix)
555