Package ganeti :: Package utils :: Module text
Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are 
  9  # met: 
 10  # 
 11  # 1. Redistributions of source code must retain the above copyright notice, 
 12  # this list of conditions and the following disclaimer. 
 13  # 
 14  # 2. Redistributions in binary form must reproduce the above copyright 
 15  # notice, this list of conditions and the following disclaimer in the 
 16  # documentation and/or other materials provided with the distribution. 
 17  # 
 18  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
 19  # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 20  # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 21  # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
 22  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
 23  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 24  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 25  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 26  # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 27  # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 28  # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 29   
 30  """Utility functions for manipulating or working with text. 
 31   
 32  """ 
 33   
 34   
 35  import re 
 36  import os 
 37  import time 
 38  import collections 
 39   
 40  from ganeti import errors 
 41  from ganeti import compat 
 42   
 43   
 44  #: Unit checker regexp 
 45  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 46   
 47  #: Characters which don't need to be quoted for shell commands 
 48  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 49   
 50  #: Shell param checker regexp 
 51  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 52   
 53  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 54  _ASCII_ELLIPSIS = "..." 
 55   
 56  #: MAC address octet 
 57  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 58   
 59   
 60 -def MatchNameComponent(key, name_list, case_sensitive=True): 
 61    """Try to match a name against a list. 
 62   
 63    This function will try to match a name like test1 against a list 
 64    like C{['test1.example.com', 'test2.example.com', ...]}. Against 
 65    this list, I{'test1'} as well as I{'test1.example'} will match, but 
 66    not I{'test1.ex'}. A multiple match will be considered as no match 
 67    at all (e.g. I{'test1'} against C{['test1.example.com', 
 68    'test1.example.org']}), except when the key fully matches an entry 
 69    (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 
 70   
 71    @type key: str 
 72    @param key: the name to be searched 
 73    @type name_list: list 
 74    @param name_list: the list of strings against which to search the key 
 75    @type case_sensitive: boolean 
 76    @param case_sensitive: whether to provide a case-sensitive match 
 77   
 78    @rtype: None or str 
 79    @return: None if there is no match I{or} if there are multiple matches, 
 80        otherwise the element from the list which matches 
 81   
 82    """ 
 83    if key in name_list: 
 84      return key 
 85   
 86    re_flags = 0 
 87    if not case_sensitive: 
 88      re_flags |= re.IGNORECASE 
 89      key = key.upper() 
 90   
 91    name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 
 92   
 93    names_filtered = [] 
 94    string_matches = [] 
 95    for name in name_list: 
 96      if name_re.match(name) is not None: 
 97        names_filtered.append(name) 
 98        if not case_sensitive and key == name.upper(): 
 99          string_matches.append(name) 
100   
101    if len(string_matches) == 1: 
102      return string_matches[0] 
103    if len(names_filtered) == 1: 
104      return names_filtered[0] 
105   
106    return None 
107   
108   
109 -def _DnsNameGlobHelper(match): 
110    """Helper function for L{DnsNameGlobPattern}. 
111   
112    Returns regular expression pattern for parts of the pattern. 
113   
114    """ 
115    text = match.group(0) 
116   
117    if text == "*": 
118      return "[^.]*" 
119    elif text == "?": 
120      return "[^.]" 
121    else: 
122      return re.escape(text) 
123   
124   
125 -def DnsNameGlobPattern(pattern): 
126    """Generates regular expression from DNS name globbing pattern. 
127   
128    A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 
129    expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 
130   
131    Matching always starts at the leftmost part. An asterisk (*) matches all 
132    characters except the dot (.) separating DNS name parts. A question mark (?) 
133    matches a single character except the dot (.). 
134   
135    @type pattern: string 
136    @param pattern: DNS name globbing pattern 
137    @rtype: string 
138    @return: Regular expression 
139   
140    """ 
141    return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) 
142   
143   
144 -def FormatUnit(value, units, roman=False): 
145    """Formats an incoming number of MiB with the appropriate unit. 
146   
147    @type value: int 
148    @param value: integer representing the value in MiB (1048576) 
149    @type units: char 
150    @param units: the type of formatting we should do: 
151        - 'h' for automatic scaling 
152        - 'm' for MiBs 
153        - 'g' for GiBs 
154        - 't' for TiBs 
155    @rtype: str 
156    @return: the formatted value (with suffix) 
157   
158    """ 
159    if units not in ("m", "g", "t", "h"): 
160      raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 
161   
162    if not isinstance(value, (int, long, float)): 
163      raise errors.ProgrammerError("Invalid value specified '%s (%s)'" % ( 
164          value, type(value))) 
165   
166    suffix = "" 
167   
168    if units == "m" or (units == "h" and value < 1024): 
169      if units == "h": 
170        suffix = "M" 
171      return "%s%s" % (compat.RomanOrRounded(value, 0, roman), suffix) 
172   
173    elif units == "g" or (units == "h" and value < (1024 * 1024)): 
174      if units == "h": 
175        suffix = "G" 
176      return "%s%s" % (compat.RomanOrRounded(float(value) / 1024, 1, roman), 
177                       suffix) 
178   
179    else: 
180      if units == "h": 
181        suffix = "T" 
182      return "%s%s" % (compat.RomanOrRounded(float(value) / 1024 / 1024, 1, 
183                                             roman), suffix) 
184   
185   
186 -def ParseUnit(input_string): 
187    """Tries to extract number and scale from the given string. 
188   
189    Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 
190    [UNIT]}. If no unit is specified, it defaults to MiB. Return value 
191    is always an int in MiB. 
192   
193    """ 
194    m = _PARSEUNIT_REGEX.match(str(input_string)) 
195    if not m: 
196      raise errors.UnitParseError("Invalid format") 
197   
198    value = float(m.groups()[0]) 
199   
200    unit = m.groups()[1] 
201    if unit: 
202      lcunit = unit.lower() 
203    else: 
204      lcunit = "m" 
205   
206    if lcunit in ("m", "mb", "mib"): 
207      # Value already in MiB 
208      pass 
209   
210    elif lcunit in ("g", "gb", "gib"): 
211      value *= 1024 
212   
213    elif lcunit in ("t", "tb", "tib"): 
214      value *= 1024 * 1024 
215   
216    else: 
217      raise errors.UnitParseError("Unknown unit: %s" % unit) 
218   
219    # Make sure we round up 
220    if int(value) < value: 
221      value += 1 
222   
223    # Round up to the next multiple of 4 
224    value = int(value) 
225    if value % 4: 
226      value += 4 - value % 4 
227   
228    return value 
229   
230   
231 -def ShellQuote(value): 
232    """Quotes shell argument according to POSIX. 
233   
234    @type value: str 
235    @param value: the argument to be quoted 
236    @rtype: str 
237    @return: the quoted value 
238   
239    """ 
240    if _SHELL_UNQUOTED_RE.match(value): 
241      return value 
242    else: 
243      return "'%s'" % value.replace("'", "'\\''") 
244   
245   
246 -def ShellQuoteArgs(args): 
247    """Quotes a list of shell arguments. 
248   
249    @type args: list 
250    @param args: list of arguments to be quoted 
251    @rtype: str 
252    @return: the quoted arguments concatenated with spaces 
253   
254    """ 
255    return " ".join([ShellQuote(i) for i in args]) 
256   
257   
258 -def ShellCombineCommands(cmdlist): 
259    """Out of a list of shell comands construct a single one. 
260   
261    """ 
262    return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)] 
263   
264   
265 -class ShellWriter(object): 
266    """Helper class to write scripts with indentation. 
267   
268    """ 
269    INDENT_STR = "  " 
270   
271 -  def __init__(self, fh, indent=True): 
272      """Initializes this class. 
273   
274      """ 
275      self._fh = fh 
276      self._indent_enabled = indent 
277      self._indent = 0 
278   
279 -  def IncIndent(self): 
280      """Increase indentation level by 1. 
281   
282      """ 
283      self._indent += 1 
284   
285 -  def DecIndent(self): 
286      """Decrease indentation level by 1. 
287   
288      """ 
289      assert self._indent > 0 
290      self._indent -= 1 
291   
292 -  def Write(self, txt, *args): 
293      """Write line to output file. 
294   
295      """ 
296      assert self._indent >= 0 
297   
298      if args: 
299        line = txt % args 
300      else: 
301        line = txt 
302   
303      if line and self._indent_enabled: 
304        # Indent only if there's something on the line 
305        self._fh.write(self._indent * self.INDENT_STR) 
306   
307      self._fh.write(line) 
308   
309      self._fh.write("\n") 
310   
311   
312 -def GenerateSecret(numbytes=20): 
313    """Generates a random secret. 
314   
315    This will generate a pseudo-random secret returning an hex string 
316    (so that it can be used where an ASCII string is needed). 
317   
318    @param numbytes: the number of bytes which will be represented by the returned 
319        string (defaulting to 20, the length of a SHA1 hash) 
320    @rtype: str 
321    @return: an hex representation of the pseudo-random sequence 
322   
323    """ 
324    return os.urandom(numbytes).encode("hex") 
325   
326   
327 -def _MakeMacAddrRegexp(octets): 
328    """Builds a regular expression for verifying MAC addresses. 
329   
330    @type octets: integer 
331    @param octets: How many octets to expect (1-6) 
332    @return: Compiled regular expression 
333   
334    """ 
335    assert octets > 0 
336    assert octets <= 6 
337   
338    return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 
339                      re.I) 
340   
341   
342  #: Regular expression for full MAC address 
343  _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 
344   
345  #: Regular expression for half a MAC address 
346  _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 
347   
348   
349 -def _MacAddressCheck(check_re, mac, msg): 
350    """Checks a MAC address using a regular expression. 
351   
352    @param check_re: Compiled regular expression as returned by C{re.compile} 
353    @type mac: string 
354    @param mac: MAC address to be validated 
355    @type msg: string 
356    @param msg: Error message (%s will be replaced with MAC address) 
357   
358    """ 
359    if check_re.match(mac): 
360      return mac.lower() 
361   
362    raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL) 
363   
364   
365 -def NormalizeAndValidateMac(mac): 
366    """Normalizes and check if a MAC address is valid and contains six octets. 
367   
368    Checks whether the supplied MAC address is formally correct. Accepts 
369    colon-separated format only. Normalize it to all lower case. 
370   
371    @type mac: string 
372    @param mac: MAC address to be validated 
373    @rtype: string 
374    @return: Normalized and validated MAC address 
375    @raise errors.OpPrereqError: If the MAC address isn't valid 
376   
377    """ 
378    return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'") 
379   
380   
381 -def NormalizeAndValidateThreeOctetMacPrefix(mac): 
382    """Normalizes a potential MAC address prefix (three octets). 
383   
384    Checks whether the supplied string is a valid MAC address prefix consisting 
385    of three colon-separated octets. The result is normalized to all lower case. 
386   
387    @type mac: string 
388    @param mac: Prefix to be validated 
389    @rtype: string 
390    @return: Normalized and validated prefix 
391    @raise errors.OpPrereqError: If the MAC address prefix isn't valid 
392   
393    """ 
394    return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 
395                            "Invalid MAC address prefix '%s'") 
396   
397   
398 -def SafeEncode(text): 
399    """Return a 'safe' version of a source string. 
400   
401    This function mangles the input string and returns a version that 
402    should be safe to display/encode as ASCII. To this end, we first 
403    convert it to ASCII using the 'backslashreplace' encoding which 
404    should get rid of any non-ASCII chars, and then we process it 
405    through a loop copied from the string repr sources in the python; we 
406    don't use string_escape anymore since that escape single quotes and 
407    backslashes too, and that is too much; and that escaping is not 
408    stable, i.e. string_escape(string_escape(x)) != string_escape(x). 
409   
410    @type text: str or unicode 
411    @param text: input data 
412    @rtype: str 
413    @return: a safe version of text 
414   
415    """ 
416    if isinstance(text, unicode): 
417      # only if unicode; if str already, we handle it below 
418      text = text.encode("ascii", "backslashreplace") 
419    resu = "" 
420    for char in text: 
421      c = ord(char) 
422      if char == "\t": 
423        resu += r"\t" 
424      elif char == "\n": 
425        resu += r"\n" 
426      elif char == "\r": 
427        resu += r'\'r' 
428      elif c < 32 or c >= 127: # non-printable 
429        resu += "\\x%02x" % (c & 0xff) 
430      else: 
431        resu += char 
432    return resu 
433   
434   
435 -def UnescapeAndSplit(text, sep=","): 
436    r"""Split and unescape a string based on a given separator. 
437   
438    This function splits a string based on a separator where the 
439    separator itself can be escape in order to be an element of the 
440    elements. The escaping rules are (assuming coma being the 
441    separator): 
442      - a plain , separates the elements 
443      - a sequence \\\\, (double backslash plus comma) is handled as a 
444        backslash plus a separator comma 
445      - a sequence \, (backslash plus comma) is handled as a 
446        non-separator comma 
447   
448    @type text: string 
449    @param text: the string to split 
450    @type sep: string 
451    @param text: the separator 
452    @rtype: string 
453    @return: a list of strings 
454   
455    """ 
456    # we split the list by sep (with no escaping at this stage) 
457    slist = text.split(sep) 
458    # next, we revisit the elements and if any of them ended with an odd 
459    # number of backslashes, then we join it with the next 
460    rlist = [] 
461    while slist: 
462      e1 = slist.pop(0) 
463      if e1.endswith("\\"): 
464        num_b = len(e1) - len(e1.rstrip("\\")) 
465        if num_b % 2 == 1 and slist: 
466          e2 = slist.pop(0) 
467          # Merge the two elements and push the result back to the source list for 
468          # revisiting. If e2 ended with backslashes, further merging may need to 
469          # be done. 
470          slist.insert(0, e1 + sep + e2) 
471          continue 
472      # here the backslashes remain (all), and will be reduced in the next step 
473      rlist.append(e1) 
474    # finally, replace backslash-something with something 
475    rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 
476    return rlist 
477   
478   
479 -def EscapeAndJoin(slist, sep=","): 
480    """Encode a list in a way parsable by UnescapeAndSplit. 
481   
482    @type slist: list of strings 
483    @param slist: the strings to be encoded 
484    @rtype: string 
485    @return: the encoding of the list oas a string 
486   
487    """ 
488    return sep.join([re.sub("\\" + sep, "\\\\" + sep, 
489                            re.sub(r"\\", r"\\\\", v)) for v in slist]) 
490   
491   
492 -def CommaJoin(names): 
493    """Nicely join a set of identifiers. 
494   
495    @param names: set, list or tuple 
496    @return: a string with the formatted results 
497   
498    """ 
499    return ", ".join([str(val) for val in names]) 
500   
501   
502 -def FormatTime(val, usecs=None): 
503    """Formats a time value. 
504   
505    @type val: float or None 
506    @param val: Timestamp as returned by time.time() (seconds since Epoch, 
507      1970-01-01 00:00:00 UTC) 
508    @return: a string value or N/A if we don't have a valid timestamp 
509   
510    """ 
511    if val is None or not isinstance(val, (int, float)): 
512      return "N/A" 
513   
514    # these two codes works on Linux, but they are not guaranteed on all 
515    # platforms 
516    result = time.strftime("%F %T", time.localtime(val)) 
517   
518    if usecs is not None: 
519      result += ".%06d" % usecs 
520   
521    return result 
522   
523   
524 -def FormatSeconds(secs): 
525    """Formats seconds for easier reading. 
526   
527    @type secs: number 
528    @param secs: Number of seconds 
529    @rtype: string 
530    @return: Formatted seconds (e.g. "2d 9h 19m 49s") 
531   
532    """ 
533    parts = [] 
534   
535    secs = round(secs, 0) 
536   
537    if secs > 0: 
538      # Negative values would be a bit tricky 
539      for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 
540        (complete, secs) = divmod(secs, one) 
541        if complete or parts: 
542          parts.append("%d%s" % (complete, unit)) 
543   
544    parts.append("%ds" % secs) 
545   
546    return " ".join(parts) 
547   
548   
549 -class LineSplitter(object): 
550    """Splits data chunks into lines separated by newline. 
551   
552    Instances provide a file-like interface. 
553   
554    """ 
555 -  def __init__(self, line_fn, *args): 
556      """Initializes this class. 
557   
558      @type line_fn: callable 
559      @param line_fn: Function called for each line, first parameter is line 
560      @param args: Extra arguments for L{line_fn} 
561   
562      """ 
563      assert callable(line_fn) 
564   
565      if args: 
566        # Python 2.4 doesn't have functools.partial yet 
567        self._line_fn = \ 
568          lambda line: line_fn(line, *args) # pylint: disable=W0142 
569      else: 
570        self._line_fn = line_fn 
571   
572      self._lines = collections.deque() 
573      self._buffer = "" 
574   
575 -  def write(self, data): 
576      parts = (self._buffer + data).split("\n") 
577      self._buffer = parts.pop() 
578      self._lines.extend(parts) 
579   
580 -  def flush(self): 
581      while self._lines: 
582        self._line_fn(self._lines.popleft().rstrip("\r\n")) 
583   
584 -  def close(self): 
585      self.flush() 
586      if self._buffer: 
587        self._line_fn(self._buffer) 
588   
589   
590 -def IsValidShellParam(word): 
591    """Verifies is the given word is safe from the shell's p.o.v. 
592   
593    This means that we can pass this to a command via the shell and be 
594    sure that it doesn't alter the command line and is passed as such to 
595    the actual command. 
596   
597    Note that we are overly restrictive here, in order to be on the safe 
598    side. 
599   
600    @type word: str 
601    @param word: the word to check 
602    @rtype: boolean 
603    @return: True if the word is 'safe' 
604   
605    """ 
606    return bool(_SHELLPARAM_REGEX.match(word)) 
607   
608   
609 -def BuildShellCmd(template, *args): 
610    """Build a safe shell command line from the given arguments. 
611   
612    This function will check all arguments in the args list so that they 
613    are valid shell parameters (i.e. they don't contain shell 
614    metacharacters). If everything is ok, it will return the result of 
615    template % args. 
616   
617    @type template: str 
618    @param template: the string holding the template for the 
619        string formatting 
620    @rtype: str 
621    @return: the expanded command line 
622   
623    """ 
624    for word in args: 
625      if not IsValidShellParam(word): 
626        raise errors.ProgrammerError("Shell argument '%s' contains" 
627                                     " invalid characters" % word) 
628    return template % args 
629   
630   
631 -def FormatOrdinal(value): 
632    """Formats a number as an ordinal in the English language. 
633   
634    E.g. the number 1 becomes "1st", 22 becomes "22nd". 
635   
636    @type value: integer 
637    @param value: Number 
638    @rtype: string 
639   
640    """ 
641    tens = value % 10 
642   
643    if value > 10 and value < 20: 
644      suffix = "th" 
645    elif tens == 1: 
646      suffix = "st" 
647    elif tens == 2: 
648      suffix = "nd" 
649    elif tens == 3: 
650      suffix = "rd" 
651    else: 
652      suffix = "th" 
653   
654    return "%s%s" % (value, suffix) 
655   
656   
657 -def Truncate(text, length): 
658    """Truncate string and add ellipsis if needed. 
659   
660    @type text: string 
661    @param text: Text 
662    @type length: integer 
663    @param length: Desired length 
664    @rtype: string 
665    @return: Truncated text 
666   
667    """ 
668    assert length > len(_ASCII_ELLIPSIS) 
669   
670    # Serialize if necessary 
671    if not isinstance(text, basestring): 
672      text = str(text) 
673   
674    if len(text) <= length: 
675      return text 
676    else: 
677      return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS 
678   
679   
680 -def FilterEmptyLinesAndComments(text): 
681    """Filters empty lines and comments from a line-based string. 
682   
683    Whitespace is also removed from the beginning and end of all lines. 
684   
685    @type text: string 
686    @param text: Input string 
687    @rtype: list 
688   
689    """ 
690    return [line for line in map(lambda s: s.strip(), text.splitlines()) 
691            # Ignore empty lines and comments 
692            if line and not line.startswith("#")] 
693   
694   
695 -def FormatKeyValue(data): 
696    """Formats a dictionary as "key=value" parameters. 
697   
698    The keys are sorted to have a stable order. 
699   
700    @type data: dict 
701    @rtype: list of string 
702   
703    """ 
704    return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())] 
705