Package ganeti :: Package utils :: Module text
Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30  """Utility functions for manipulating or working with text. 
 31   
 32  """ 
 33   
 34   
 35  import re 
 36  import os 
 37  import time 
 38  import collections 
 39   
 40  from ganeti import errors 
 41   
 42   
 43  #: Unit checker regexp 
 44  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 45   
 46  #: Characters which don't need to be quoted for shell commands 
 47  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 48   
 49  #: Shell param checker regexp 
 50  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 51   
 52  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 53  _ASCII_ELLIPSIS = "..." 
 54   
 55  #: MAC address octet 
 56  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 57   
 58   
 59 -def MatchNameComponent(key, name_list, case_sensitive=True): 
 60    """Try to match a name against a list. 
 61   
 62    This function will try to match a name like test1 against a list 
 63    like C{['test1.example.com', 'test2.example.com', ...]}. Against 
 64    this list, I{'test1'} as well as I{'test1.example'} will match, but 
 65    not I{'test1.ex'}. A multiple match will be considered as no match 
 66    at all (e.g. I{'test1'} against C{['test1.example.com', 
 67    'test1.example.org']}), except when the key fully matches an entry 
 68    (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 
 69   
 70    @type key: str 
 71    @param key: the name to be searched 
 72    @type name_list: list 
 73    @param name_list: the list of strings against which to search the key 
 74    @type case_sensitive: boolean 
 75    @param case_sensitive: whether to provide a case-sensitive match 
 76   
 77    @rtype: None or str 
 78    @return: None if there is no match I{or} if there are multiple matches, 
 79        otherwise the element from the list which matches 
 80   
 81    """ 
 82    if key in name_list: 
 83      return key 
 84   
 85    re_flags = 0 
 86    if not case_sensitive: 
 87      re_flags |= re.IGNORECASE 
 88      key = key.upper() 
 89   
 90    name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 
 91   
 92    names_filtered = [] 
 93    string_matches = [] 
 94    for name in name_list: 
 95      if name_re.match(name) is not None: 
 96        names_filtered.append(name) 
 97        if not case_sensitive and key == name.upper(): 
 98          string_matches.append(name) 
 99   
100    if len(string_matches) == 1: 
101      return string_matches[0] 
102    if len(names_filtered) == 1: 
103      return names_filtered[0] 
104   
105    return None 
106   
107   
108 -def _DnsNameGlobHelper(match): 
109    """Helper function for L{DnsNameGlobPattern}. 
110   
111    Returns regular expression pattern for parts of the pattern. 
112   
113    """ 
114    text = match.group(0) 
115   
116    if text == "*": 
117      return "[^.]*" 
118    elif text == "?": 
119      return "[^.]" 
120    else: 
121      return re.escape(text) 
122   
123   
124 -def DnsNameGlobPattern(pattern): 
125    """Generates regular expression from DNS name globbing pattern. 
126   
127    A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 
128    expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 
129   
130    Matching always starts at the leftmost part. An asterisk (*) matches all 
131    characters except the dot (.) separating DNS name parts. A question mark (?) 
132    matches a single character except the dot (.). 
133   
134    @type pattern: string 
135    @param pattern: DNS name globbing pattern 
136    @rtype: string 
137    @return: Regular expression 
138   
139    """ 
140    return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) 
141   
142   
143 -def FormatUnit(value, units): 
144    """Formats an incoming number of MiB with the appropriate unit. 
145   
146    @type value: int 
147    @param value: integer representing the value in MiB (1048576) 
148    @type units: char 
149    @param units: the type of formatting we should do: 
150        - 'h' for automatic scaling 
151        - 'm' for MiBs 
152        - 'g' for GiBs 
153        - 't' for TiBs 
154    @rtype: str 
155    @return: the formatted value (with suffix) 
156   
157    """ 
158    if units not in ("m", "g", "t", "h"): 
159      raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 
160   
161    suffix = "" 
162   
163    if units == "m" or (units == "h" and value < 1024): 
164      if units == "h": 
165        suffix = "M" 
166      return "%d%s" % (round(value, 0), suffix) 
167   
168    elif units == "g" or (units == "h" and value < (1024 * 1024)): 
169      if units == "h": 
170        suffix = "G" 
171      return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 
172   
173    else: 
174      if units == "h": 
175        suffix = "T" 
176      return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix) 
177   
178   
179 -def ParseUnit(input_string): 
180    """Tries to extract number and scale from the given string. 
181   
182    Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 
183    [UNIT]}. If no unit is specified, it defaults to MiB. Return value 
184    is always an int in MiB. 
185   
186    """ 
187    m = _PARSEUNIT_REGEX.match(str(input_string)) 
188    if not m: 
189      raise errors.UnitParseError("Invalid format") 
190   
191    value = float(m.groups()[0]) 
192   
193    unit = m.groups()[1] 
194    if unit: 
195      lcunit = unit.lower() 
196    else: 
197      lcunit = "m" 
198   
199    if lcunit in ("m", "mb", "mib"): 
200      # Value already in MiB 
201      pass 
202   
203    elif lcunit in ("g", "gb", "gib"): 
204      value *= 1024 
205   
206    elif lcunit in ("t", "tb", "tib"): 
207      value *= 1024 * 1024 
208   
209    else: 
210      raise errors.UnitParseError("Unknown unit: %s" % unit) 
211   
212    # Make sure we round up 
213    if int(value) < value: 
214      value += 1 
215   
216    # Round up to the next multiple of 4 
217    value = int(value) 
218    if value % 4: 
219      value += 4 - value % 4 
220   
221    return value 
222   
223   
224 -def ShellQuote(value): 
225    """Quotes shell argument according to POSIX. 
226   
227    @type value: str 
228    @param value: the argument to be quoted 
229    @rtype: str 
230    @return: the quoted value 
231   
232    """ 
233    if _SHELL_UNQUOTED_RE.match(value): 
234      return value 
235    else: 
236      return "'%s'" % value.replace("'", "'\\''") 
237   
238   
239 -def ShellQuoteArgs(args): 
240    """Quotes a list of shell arguments. 
241   
242    @type args: list 
243    @param args: list of arguments to be quoted 
244    @rtype: str 
245    @return: the quoted arguments concatenated with spaces 
246   
247    """ 
248    return " ".join([ShellQuote(i) for i in args]) 
249   
250   
251 -def ShellCombineCommands(cmdlist): 
252    """Out of a list of shell comands construct a single one. 
253   
254    """ 
255    return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)] 
256   
257   
258 -class ShellWriter(object): 
259    """Helper class to write scripts with indentation. 
260   
261    """ 
262    INDENT_STR = "  " 
263   
264 -  def __init__(self, fh, indent=True): 
265      """Initializes this class. 
266   
267      """ 
268      self._fh = fh 
269      self._indent_enabled = indent 
270      self._indent = 0 
271   
272 -  def IncIndent(self): 
273      """Increase indentation level by 1. 
274   
275      """ 
276      self._indent += 1 
277   
278 -  def DecIndent(self): 
279      """Decrease indentation level by 1. 
280   
281      """ 
282      assert self._indent > 0 
283      self._indent -= 1 
284   
285 -  def Write(self, txt, *args): 
286      """Write line to output file. 
287   
288      """ 
289      assert self._indent >= 0 
290   
291      if args: 
292        line = txt % args 
293      else: 
294        line = txt 
295   
296      if line and self._indent_enabled: 
297        # Indent only if there's something on the line 
298        self._fh.write(self._indent * self.INDENT_STR) 
299   
300      self._fh.write(line) 
301   
302      self._fh.write("\n") 
303   
304   
305 -def GenerateSecret(numbytes=20): 
306    """Generates a random secret. 
307   
308    This will generate a pseudo-random secret returning an hex string 
309    (so that it can be used where an ASCII string is needed). 
310   
311    @param numbytes: the number of bytes which will be represented by the returned 
312        string (defaulting to 20, the length of a SHA1 hash) 
313    @rtype: str 
314    @return: an hex representation of the pseudo-random sequence 
315   
316    """ 
317    return os.urandom(numbytes).encode("hex") 
318   
319   
320 -def _MakeMacAddrRegexp(octets): 
321    """Builds a regular expression for verifying MAC addresses. 
322   
323    @type octets: integer 
324    @param octets: How many octets to expect (1-6) 
325    @return: Compiled regular expression 
326   
327    """ 
328    assert octets > 0 
329    assert octets <= 6 
330   
331    return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 
332                      re.I) 
333   
334   
335  #: Regular expression for full MAC address 
336  _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 
337   
338  #: Regular expression for half a MAC address 
339  _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 
340   
341   
342 -def _MacAddressCheck(check_re, mac, msg): 
343    """Checks a MAC address using a regular expression. 
344   
345    @param check_re: Compiled regular expression as returned by C{re.compile} 
346    @type mac: string 
347    @param mac: MAC address to be validated 
348    @type msg: string 
349    @param msg: Error message (%s will be replaced with MAC address) 
350   
351    """ 
352    if check_re.match(mac): 
353      return mac.lower() 
354   
355    raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL) 
356   
357   
358 -def NormalizeAndValidateMac(mac): 
359    """Normalizes and check if a MAC address is valid and contains six octets. 
360   
361    Checks whether the supplied MAC address is formally correct. Accepts 
362    colon-separated format only. Normalize it to all lower case. 
363   
364    @type mac: string 
365    @param mac: MAC address to be validated 
366    @rtype: string 
367    @return: Normalized and validated MAC address 
368    @raise errors.OpPrereqError: If the MAC address isn't valid 
369   
370    """ 
371    return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'") 
372   
373   
374 -def NormalizeAndValidateThreeOctetMacPrefix(mac): 
375    """Normalizes a potential MAC address prefix (three octets). 
376   
377    Checks whether the supplied string is a valid MAC address prefix consisting 
378    of three colon-separated octets. The result is normalized to all lower case. 
379   
380    @type mac: string 
381    @param mac: Prefix to be validated 
382    @rtype: string 
383    @return: Normalized and validated prefix 
384    @raise errors.OpPrereqError: If the MAC address prefix isn't valid 
385   
386    """ 
387    return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 
388                            "Invalid MAC address prefix '%s'") 
389   
390   
391 -def SafeEncode(text): 
392    """Return a 'safe' version of a source string. 
393   
394    This function mangles the input string and returns a version that 
395    should be safe to display/encode as ASCII. To this end, we first 
396    convert it to ASCII using the 'backslashreplace' encoding which 
397    should get rid of any non-ASCII chars, and then we process it 
398    through a loop copied from the string repr sources in the python; we 
399    don't use string_escape anymore since that escape single quotes and 
400    backslashes too, and that is too much; and that escaping is not 
401    stable, i.e. string_escape(string_escape(x)) != string_escape(x). 
402   
403    @type text: str or unicode 
404    @param text: input data 
405    @rtype: str 
406    @return: a safe version of text 
407   
408    """ 
409    if isinstance(text, unicode): 
410      # only if unicode; if str already, we handle it below 
411      text = text.encode("ascii", "backslashreplace") 
412    resu = "" 
413    for char in text: 
414      c = ord(char) 
415      if char == "\t": 
416        resu += r"\t" 
417      elif char == "\n": 
418        resu += r"\n" 
419      elif char == "\r": 
420        resu += r'\'r' 
421      elif c < 32 or c >= 127: # non-printable 
422        resu += "\\x%02x" % (c & 0xff) 
423      else: 
424        resu += char 
425    return resu 
426   
427   
428 -def UnescapeAndSplit(text, sep=","): 
429    r"""Split and unescape a string based on a given separator. 
430   
431    This function splits a string based on a separator where the 
432    separator itself can be escape in order to be an element of the 
433    elements. The escaping rules are (assuming coma being the 
434    separator): 
435      - a plain , separates the elements 
436      - a sequence \\\\, (double backslash plus comma) is handled as a 
437        backslash plus a separator comma 
438      - a sequence \, (backslash plus comma) is handled as a 
439        non-separator comma 
440   
441    @type text: string 
442    @param text: the string to split 
443    @type sep: string 
444    @param text: the separator 
445    @rtype: string 
446    @return: a list of strings 
447   
448    """ 
449    # we split the list by sep (with no escaping at this stage) 
450    slist = text.split(sep) 
451    # next, we revisit the elements and if any of them ended with an odd 
452    # number of backslashes, then we join it with the next 
453    rlist = [] 
454    while slist: 
455      e1 = slist.pop(0) 
456      if e1.endswith("\\"): 
457        num_b = len(e1) - len(e1.rstrip("\\")) 
458        if num_b % 2 == 1 and slist: 
459          e2 = slist.pop(0) 
460          # Merge the two elements and push the result back to the source list for 
461          # revisiting. If e2 ended with backslashes, further merging may need to 
462          # be done. 
463          slist.insert(0, e1 + sep + e2) 
464          continue 
465      # here the backslashes remain (all), and will be reduced in the next step 
466      rlist.append(e1) 
467    # finally, replace backslash-something with something 
468    rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 
469    return rlist 
470   
471   
472 -def EscapeAndJoin(slist, sep=","): 
473    """Encode a list in a way parsable by UnescapeAndSplit. 
474   
475    @type slist: list of strings 
476    @param slist: the strings to be encoded 
477    @rtype: string 
478    @return: the encoding of the list oas a string 
479   
480    """ 
481    return sep.join([re.sub("\\" + sep, "\\\\" + sep, 
482                            re.sub(r"\\", r"\\\\", v)) for v in slist]) 
483   
484   
485 -def CommaJoin(names): 
486    """Nicely join a set of identifiers. 
487   
488    @param names: set, list or tuple 
489    @return: a string with the formatted results 
490   
491    """ 
492    return ", ".join([str(val) for val in names]) 
493   
494   
495 -def FormatTime(val, usecs=None): 
496    """Formats a time value. 
497   
498    @type val: float or None 
499    @param val: Timestamp as returned by time.time() (seconds since Epoch, 
500      1970-01-01 00:00:00 UTC) 
501    @return: a string value or N/A if we don't have a valid timestamp 
502   
503    """ 
504    if val is None or not isinstance(val, (int, float)): 
505      return "N/A" 
506   
507    # these two codes works on Linux, but they are not guaranteed on all 
508    # platforms 
509    result = time.strftime("%F %T", time.localtime(val)) 
510   
511    if usecs is not None: 
512      result += ".%06d" % usecs 
513   
514    return result 
515   
516   
517 -def FormatSeconds(secs): 
518    """Formats seconds for easier reading. 
519   
520    @type secs: number 
521    @param secs: Number of seconds 
522    @rtype: string 
523    @return: Formatted seconds (e.g. "2d 9h 19m 49s") 
524   
525    """ 
526    parts = [] 
527   
528    secs = round(secs, 0) 
529   
530    if secs > 0: 
531      # Negative values would be a bit tricky 
532      for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 
533        (complete, secs) = divmod(secs, one) 
534        if complete or parts: 
535          parts.append("%d%s" % (complete, unit)) 
536   
537    parts.append("%ds" % secs) 
538   
539    return " ".join(parts) 
540   
541   
542 -class LineSplitter(object): 
543    """Splits data chunks into lines separated by newline. 
544   
545    Instances provide a file-like interface. 
546   
547    """ 
548 -  def __init__(self, line_fn, *args): 
549      """Initializes this class. 
550   
551      @type line_fn: callable 
552      @param line_fn: Function called for each line, first parameter is line 
553      @param args: Extra arguments for L{line_fn} 
554   
555      """ 
556      assert callable(line_fn) 
557   
558      if args: 
559        # Python 2.4 doesn't have functools.partial yet 
560        self._line_fn = \ 
561          lambda line: line_fn(line, *args) # pylint: disable=W0142 
562      else: 
563        self._line_fn = line_fn 
564   
565      self._lines = collections.deque() 
566      self._buffer = "" 
567   
568 -  def write(self, data): 
569      parts = (self._buffer + data).split("\n") 
570      self._buffer = parts.pop() 
571      self._lines.extend(parts) 
572   
573 -  def flush(self): 
574      while self._lines: 
575        self._line_fn(self._lines.popleft().rstrip("\r\n")) 
576   
577 -  def close(self): 
578      self.flush() 
579      if self._buffer: 
580        self._line_fn(self._buffer) 
581   
582   
583 -def IsValidShellParam(word): 
584    """Verifies is the given word is safe from the shell's p.o.v. 
585   
586    This means that we can pass this to a command via the shell and be 
587    sure that it doesn't alter the command line and is passed as such to 
588    the actual command. 
589   
590    Note that we are overly restrictive here, in order to be on the safe 
591    side. 
592   
593    @type word: str 
594    @param word: the word to check 
595    @rtype: boolean 
596    @return: True if the word is 'safe' 
597   
598    """ 
599    return bool(_SHELLPARAM_REGEX.match(word)) 
600   
601   
602 -def BuildShellCmd(template, *args): 
603    """Build a safe shell command line from the given arguments. 
604   
605    This function will check all arguments in the args list so that they 
606    are valid shell parameters (i.e. they don't contain shell 
607    metacharacters). If everything is ok, it will return the result of 
608    template % args. 
609   
610    @type template: str 
611    @param template: the string holding the template for the 
612        string formatting 
613    @rtype: str 
614    @return: the expanded command line 
615   
616    """ 
617    for word in args: 
618      if not IsValidShellParam(word): 
619        raise errors.ProgrammerError("Shell argument '%s' contains" 
620                                     " invalid characters" % word) 
621    return template % args 
622   
623   
624 -def FormatOrdinal(value): 
625    """Formats a number as an ordinal in the English language. 
626   
627    E.g. the number 1 becomes "1st", 22 becomes "22nd". 
628   
629    @type value: integer 
630    @param value: Number 
631    @rtype: string 
632   
633    """ 
634    tens = value % 10 
635   
636    if value > 10 and value < 20: 
637      suffix = "th" 
638    elif tens == 1: 
639      suffix = "st" 
640    elif tens == 2: 
641      suffix = "nd" 
642    elif tens == 3: 
643      suffix = "rd" 
644    else: 
645      suffix = "th" 
646   
647    return "%s%s" % (value, suffix) 
648   
649   
650 -def Truncate(text, length): 
651    """Truncate string and add ellipsis if needed. 
652   
653    @type text: string 
654    @param text: Text 
655    @type length: integer 
656    @param length: Desired length 
657    @rtype: string 
658    @return: Truncated text 
659   
660    """ 
661    assert length > len(_ASCII_ELLIPSIS) 
662   
663    # Serialize if necessary 
664    if not isinstance(text, basestring): 
665      text = str(text) 
666   
667    if len(text) <= length: 
668      return text 
669    else: 
670      return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS 
671   
672   
673 -def FilterEmptyLinesAndComments(text): 
674    """Filters empty lines and comments from a line-based string. 
675   
676    Whitespace is also removed from the beginning and end of all lines. 
677   
678    @type text: string 
679    @param text: Input string 
680    @rtype: list 
681   
682    """ 
683    return [line for line in map(lambda s: s.strip(), text.splitlines()) 
684            # Ignore empty lines and comments 
685            if line and not line.startswith("#")] 
686   
687   
688 -def FormatKeyValue(data): 
689    """Formats a dictionary as "key=value" parameters. 
690   
691    The keys are sorted to have a stable order. 
692   
693    @type data: dict 
694    @rtype: list of string 
695   
696    """ 
697    return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())] 
698