Package ganeti :: Package utils :: Module text
Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: Shell param checker regexp 
 41  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 42   
 43  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 44  _ASCII_ELLIPSIS = "..." 
 45   
 46  #: MAC address octet 
 47  _MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}" 
 48   
 49   
 50 -def MatchNameComponent(key, name_list, case_sensitive=True): 
 51    """Try to match a name against a list. 
 52   
 53    This function will try to match a name like test1 against a list 
 54    like C{['test1.example.com', 'test2.example.com', ...]}. Against 
 55    this list, I{'test1'} as well as I{'test1.example'} will match, but 
 56    not I{'test1.ex'}. A multiple match will be considered as no match 
 57    at all (e.g. I{'test1'} against C{['test1.example.com', 
 58    'test1.example.org']}), except when the key fully matches an entry 
 59    (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 
 60   
 61    @type key: str 
 62    @param key: the name to be searched 
 63    @type name_list: list 
 64    @param name_list: the list of strings against which to search the key 
 65    @type case_sensitive: boolean 
 66    @param case_sensitive: whether to provide a case-sensitive match 
 67   
 68    @rtype: None or str 
 69    @return: None if there is no match I{or} if there are multiple matches, 
 70        otherwise the element from the list which matches 
 71   
 72    """ 
 73    if key in name_list: 
 74      return key 
 75   
 76    re_flags = 0 
 77    if not case_sensitive: 
 78      re_flags |= re.IGNORECASE 
 79      key = key.upper() 
 80   
 81    name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 
 82   
 83    names_filtered = [] 
 84    string_matches = [] 
 85    for name in name_list: 
 86      if name_re.match(name) is not None: 
 87        names_filtered.append(name) 
 88        if not case_sensitive and key == name.upper(): 
 89          string_matches.append(name) 
 90   
 91    if len(string_matches) == 1: 
 92      return string_matches[0] 
 93    if len(names_filtered) == 1: 
 94      return names_filtered[0] 
 95   
 96    return None 
 97   
 98   
 99 -def _DnsNameGlobHelper(match): 
100    """Helper function for L{DnsNameGlobPattern}. 
101   
102    Returns regular expression pattern for parts of the pattern. 
103   
104    """ 
105    text = match.group(0) 
106   
107    if text == "*": 
108      return "[^.]*" 
109    elif text == "?": 
110      return "[^.]" 
111    else: 
112      return re.escape(text) 
113   
114   
115 -def DnsNameGlobPattern(pattern): 
116    """Generates regular expression from DNS name globbing pattern. 
117   
118    A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 
119    expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 
120   
121    Matching always starts at the leftmost part. An asterisk (*) matches all 
122    characters except the dot (.) separating DNS name parts. A question mark (?) 
123    matches a single character except the dot (.). 
124   
125    @type pattern: string 
126    @param pattern: DNS name globbing pattern 
127    @rtype: string 
128    @return: Regular expression 
129   
130    """ 
131    return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) 
132   
133   
134 -def FormatUnit(value, units): 
135    """Formats an incoming number of MiB with the appropriate unit. 
136   
137    @type value: int 
138    @param value: integer representing the value in MiB (1048576) 
139    @type units: char 
140    @param units: the type of formatting we should do: 
141        - 'h' for automatic scaling 
142        - 'm' for MiBs 
143        - 'g' for GiBs 
144        - 't' for TiBs 
145    @rtype: str 
146    @return: the formatted value (with suffix) 
147   
148    """ 
149    if units not in ("m", "g", "t", "h"): 
150      raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 
151   
152    suffix = "" 
153   
154    if units == "m" or (units == "h" and value < 1024): 
155      if units == "h": 
156        suffix = "M" 
157      return "%d%s" % (round(value, 0), suffix) 
158   
159    elif units == "g" or (units == "h" and value < (1024 * 1024)): 
160      if units == "h": 
161        suffix = "G" 
162      return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 
163   
164    else: 
165      if units == "h": 
166        suffix = "T" 
167      return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix) 
168   
169   
170 -def ParseUnit(input_string): 
171    """Tries to extract number and scale from the given string. 
172   
173    Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 
174    [UNIT]}. If no unit is specified, it defaults to MiB. Return value 
175    is always an int in MiB. 
176   
177    """ 
178    m = _PARSEUNIT_REGEX.match(str(input_string)) 
179    if not m: 
180      raise errors.UnitParseError("Invalid format") 
181   
182    value = float(m.groups()[0]) 
183   
184    unit = m.groups()[1] 
185    if unit: 
186      lcunit = unit.lower() 
187    else: 
188      lcunit = "m" 
189   
190    if lcunit in ("m", "mb", "mib"): 
191      # Value already in MiB 
192      pass 
193   
194    elif lcunit in ("g", "gb", "gib"): 
195      value *= 1024 
196   
197    elif lcunit in ("t", "tb", "tib"): 
198      value *= 1024 * 1024 
199   
200    else: 
201      raise errors.UnitParseError("Unknown unit: %s" % unit) 
202   
203    # Make sure we round up 
204    if int(value) < value: 
205      value += 1 
206   
207    # Round up to the next multiple of 4 
208    value = int(value) 
209    if value % 4: 
210      value += 4 - value % 4 
211   
212    return value 
213   
214   
215 -def ShellQuote(value): 
216    """Quotes shell argument according to POSIX. 
217   
218    @type value: str 
219    @param value: the argument to be quoted 
220    @rtype: str 
221    @return: the quoted value 
222   
223    """ 
224    if _SHELL_UNQUOTED_RE.match(value): 
225      return value 
226    else: 
227      return "'%s'" % value.replace("'", "'\\''") 
228   
229   
230 -def ShellQuoteArgs(args): 
231    """Quotes a list of shell arguments. 
232   
233    @type args: list 
234    @param args: list of arguments to be quoted 
235    @rtype: str 
236    @return: the quoted arguments concatenated with spaces 
237   
238    """ 
239    return " ".join([ShellQuote(i) for i in args]) 
240   
241   
242 -class ShellWriter: 
243    """Helper class to write scripts with indentation. 
244   
245    """ 
246    INDENT_STR = "  " 
247   
248 -  def __init__(self, fh, indent=True): 
249      """Initializes this class. 
250   
251      """ 
252      self._fh = fh 
253      self._indent_enabled = indent 
254      self._indent = 0 
255   
256 -  def IncIndent(self): 
257      """Increase indentation level by 1. 
258   
259      """ 
260      self._indent += 1 
261   
262 -  def DecIndent(self): 
263      """Decrease indentation level by 1. 
264   
265      """ 
266      assert self._indent > 0 
267      self._indent -= 1 
268   
269 -  def Write(self, txt, *args): 
270      """Write line to output file. 
271   
272      """ 
273      assert self._indent >= 0 
274   
275      if args: 
276        line = txt % args 
277      else: 
278        line = txt 
279   
280      if line and self._indent_enabled: 
281        # Indent only if there's something on the line 
282        self._fh.write(self._indent * self.INDENT_STR) 
283   
284      self._fh.write(line) 
285   
286      self._fh.write("\n") 
287   
288   
289 -def GenerateSecret(numbytes=20): 
290    """Generates a random secret. 
291   
292    This will generate a pseudo-random secret returning an hex string 
293    (so that it can be used where an ASCII string is needed). 
294   
295    @param numbytes: the number of bytes which will be represented by the returned 
296        string (defaulting to 20, the length of a SHA1 hash) 
297    @rtype: str 
298    @return: an hex representation of the pseudo-random sequence 
299   
300    """ 
301    return os.urandom(numbytes).encode("hex") 
302   
303   
304 -def _MakeMacAddrRegexp(octets): 
305    """Builds a regular expression for verifying MAC addresses. 
306   
307    @type octets: integer 
308    @param octets: How many octets to expect (1-6) 
309    @return: Compiled regular expression 
310   
311    """ 
312    assert octets > 0 
313    assert octets <= 6 
314   
315    return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets), 
316                      re.I) 
317   
318   
319  #: Regular expression for full MAC address 
320  _MAC_CHECK_RE = _MakeMacAddrRegexp(6) 
321   
322  #: Regular expression for half a MAC address 
323  _MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3) 
324   
325   
326 -def _MacAddressCheck(check_re, mac, msg): 
327    """Checks a MAC address using a regular expression. 
328   
329    @param check_re: Compiled regular expression as returned by C{re.compile} 
330    @type mac: string 
331    @param mac: MAC address to be validated 
332    @type msg: string 
333    @param msg: Error message (%s will be replaced with MAC address) 
334   
335    """ 
336    if check_re.match(mac): 
337      return mac.lower() 
338   
339    raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL) 
340   
341   
342 -def NormalizeAndValidateMac(mac): 
343    """Normalizes and check if a MAC address is valid and contains six octets. 
344   
345    Checks whether the supplied MAC address is formally correct. Accepts 
346    colon-separated format only. Normalize it to all lower case. 
347   
348    @type mac: string 
349    @param mac: MAC address to be validated 
350    @rtype: string 
351    @return: Normalized and validated MAC address 
352    @raise errors.OpPrereqError: If the MAC address isn't valid 
353   
354    """ 
355    return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'") 
356   
357   
358 -def NormalizeAndValidateThreeOctetMacPrefix(mac): 
359    """Normalizes a potential MAC address prefix (three octets). 
360   
361    Checks whether the supplied string is a valid MAC address prefix consisting 
362    of three colon-separated octets. The result is normalized to all lower case. 
363   
364    @type mac: string 
365    @param mac: Prefix to be validated 
366    @rtype: string 
367    @return: Normalized and validated prefix 
368    @raise errors.OpPrereqError: If the MAC address prefix isn't valid 
369   
370    """ 
371    return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac, 
372                            "Invalid MAC address prefix '%s'") 
373   
374   
375 -def SafeEncode(text): 
376    """Return a 'safe' version of a source string. 
377   
378    This function mangles the input string and returns a version that 
379    should be safe to display/encode as ASCII. To this end, we first 
380    convert it to ASCII using the 'backslashreplace' encoding which 
381    should get rid of any non-ASCII chars, and then we process it 
382    through a loop copied from the string repr sources in the python; we 
383    don't use string_escape anymore since that escape single quotes and 
384    backslashes too, and that is too much; and that escaping is not 
385    stable, i.e. string_escape(string_escape(x)) != string_escape(x). 
386   
387    @type text: str or unicode 
388    @param text: input data 
389    @rtype: str 
390    @return: a safe version of text 
391   
392    """ 
393    if isinstance(text, unicode): 
394      # only if unicode; if str already, we handle it below 
395      text = text.encode("ascii", "backslashreplace") 
396    resu = "" 
397    for char in text: 
398      c = ord(char) 
399      if char == "\t": 
400        resu += r"\t" 
401      elif char == "\n": 
402        resu += r"\n" 
403      elif char == "\r": 
404        resu += r'\'r' 
405      elif c < 32 or c >= 127: # non-printable 
406        resu += "\\x%02x" % (c & 0xff) 
407      else: 
408        resu += char 
409    return resu 
410   
411   
412 -def UnescapeAndSplit(text, sep=","): 
413    """Split and unescape a string based on a given separator. 
414   
415    This function splits a string based on a separator where the 
416    separator itself can be escape in order to be an element of the 
417    elements. The escaping rules are (assuming coma being the 
418    separator): 
419      - a plain , separates the elements 
420      - a sequence \\\\, (double backslash plus comma) is handled as a 
421        backslash plus a separator comma 
422      - a sequence \, (backslash plus comma) is handled as a 
423        non-separator comma 
424   
425    @type text: string 
426    @param text: the string to split 
427    @type sep: string 
428    @param text: the separator 
429    @rtype: string 
430    @return: a list of strings 
431   
432    """ 
433    # we split the list by sep (with no escaping at this stage) 
434    slist = text.split(sep) 
435    # next, we revisit the elements and if any of them ended with an odd 
436    # number of backslashes, then we join it with the next 
437    rlist = [] 
438    while slist: 
439      e1 = slist.pop(0) 
440      if e1.endswith("\\"): 
441        num_b = len(e1) - len(e1.rstrip("\\")) 
442        if num_b % 2 == 1 and slist: 
443          e2 = slist.pop(0) 
444          # Merge the two elements and push the result back to the source list for 
445          # revisiting. If e2 ended with backslashes, further merging may need to 
446          # be done. 
447          slist.insert(0, e1 + sep + e2) 
448          continue 
449      # here the backslashes remain (all), and will be reduced in the next step 
450      rlist.append(e1) 
451    # finally, replace backslash-something with something 
452    rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 
453    return rlist 
454   
455   
456 -def CommaJoin(names): 
457    """Nicely join a set of identifiers. 
458   
459    @param names: set, list or tuple 
460    @return: a string with the formatted results 
461   
462    """ 
463    return ", ".join([str(val) for val in names]) 
464   
465   
466 -def FormatTime(val, usecs=None): 
467    """Formats a time value. 
468   
469    @type val: float or None 
470    @param val: Timestamp as returned by time.time() (seconds since Epoch, 
471      1970-01-01 00:00:00 UTC) 
472    @return: a string value or N/A if we don't have a valid timestamp 
473   
474    """ 
475    if val is None or not isinstance(val, (int, float)): 
476      return "N/A" 
477   
478    # these two codes works on Linux, but they are not guaranteed on all 
479    # platforms 
480    result = time.strftime("%F %T", time.localtime(val)) 
481   
482    if usecs is not None: 
483      result += ".%06d" % usecs 
484   
485    return result 
486   
487   
488 -def FormatSeconds(secs): 
489    """Formats seconds for easier reading. 
490   
491    @type secs: number 
492    @param secs: Number of seconds 
493    @rtype: string 
494    @return: Formatted seconds (e.g. "2d 9h 19m 49s") 
495   
496    """ 
497    parts = [] 
498   
499    secs = round(secs, 0) 
500   
501    if secs > 0: 
502      # Negative values would be a bit tricky 
503      for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 
504        (complete, secs) = divmod(secs, one) 
505        if complete or parts: 
506          parts.append("%d%s" % (complete, unit)) 
507   
508    parts.append("%ds" % secs) 
509   
510    return " ".join(parts) 
511   
512   
513 -class LineSplitter: 
514    """Splits data chunks into lines separated by newline. 
515   
516    Instances provide a file-like interface. 
517   
518    """ 
519 -  def __init__(self, line_fn, *args): 
520      """Initializes this class. 
521   
522      @type line_fn: callable 
523      @param line_fn: Function called for each line, first parameter is line 
524      @param args: Extra arguments for L{line_fn} 
525   
526      """ 
527      assert callable(line_fn) 
528   
529      if args: 
530        # Python 2.4 doesn't have functools.partial yet 
531        self._line_fn = \ 
532          lambda line: line_fn(line, *args) # pylint: disable=W0142 
533      else: 
534        self._line_fn = line_fn 
535   
536      self._lines = collections.deque() 
537      self._buffer = "" 
538   
539 -  def write(self, data): 
540      parts = (self._buffer + data).split("\n") 
541      self._buffer = parts.pop() 
542      self._lines.extend(parts) 
543   
544 -  def flush(self): 
545      while self._lines: 
546        self._line_fn(self._lines.popleft().rstrip("\r\n")) 
547   
548 -  def close(self): 
549      self.flush() 
550      if self._buffer: 
551        self._line_fn(self._buffer) 
552   
553   
554 -def IsValidShellParam(word): 
555    """Verifies is the given word is safe from the shell's p.o.v. 
556   
557    This means that we can pass this to a command via the shell and be 
558    sure that it doesn't alter the command line and is passed as such to 
559    the actual command. 
560   
561    Note that we are overly restrictive here, in order to be on the safe 
562    side. 
563   
564    @type word: str 
565    @param word: the word to check 
566    @rtype: boolean 
567    @return: True if the word is 'safe' 
568   
569    """ 
570    return bool(_SHELLPARAM_REGEX.match(word)) 
571   
572   
573 -def BuildShellCmd(template, *args): 
574    """Build a safe shell command line from the given arguments. 
575   
576    This function will check all arguments in the args list so that they 
577    are valid shell parameters (i.e. they don't contain shell 
578    metacharacters). If everything is ok, it will return the result of 
579    template % args. 
580   
581    @type template: str 
582    @param template: the string holding the template for the 
583        string formatting 
584    @rtype: str 
585    @return: the expanded command line 
586   
587    """ 
588    for word in args: 
589      if not IsValidShellParam(word): 
590        raise errors.ProgrammerError("Shell argument '%s' contains" 
591                                     " invalid characters" % word) 
592    return template % args 
593   
594   
595 -def FormatOrdinal(value): 
596    """Formats a number as an ordinal in the English language. 
597   
598    E.g. the number 1 becomes "1st", 22 becomes "22nd". 
599   
600    @type value: integer 
601    @param value: Number 
602    @rtype: string 
603   
604    """ 
605    tens = value % 10 
606   
607    if value > 10 and value < 20: 
608      suffix = "th" 
609    elif tens == 1: 
610      suffix = "st" 
611    elif tens == 2: 
612      suffix = "nd" 
613    elif tens == 3: 
614      suffix = "rd" 
615    else: 
616      suffix = "th" 
617   
618    return "%s%s" % (value, suffix) 
619   
620   
621 -def Truncate(text, length): 
622    """Truncate string and add ellipsis if needed. 
623   
624    @type text: string 
625    @param text: Text 
626    @type length: integer 
627    @param length: Desired length 
628    @rtype: string 
629    @return: Truncated text 
630   
631    """ 
632    assert length > len(_ASCII_ELLIPSIS) 
633   
634    # Serialize if necessary 
635    if not isinstance(text, basestring): 
636      text = str(text) 
637   
638    if len(text) <= length: 
639      return text 
640    else: 
641      return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS 
642   
643   
644 -def FilterEmptyLinesAndComments(text): 
645    """Filters empty lines and comments from a line-based string. 
646   
647    Whitespace is also removed from the beginning and end of all lines. 
648   
649    @type text: string 
650    @param text: Input string 
651    @rtype: list 
652   
653    """ 
654    return [line for line in map(lambda s: s.strip(), text.splitlines()) 
655            # Ignore empty lines and comments 
656            if line and not line.startswith("#")] 
657   
658   
659 -def FormatKeyValue(data): 
660    """Formats a dictionary as "key=value" parameters. 
661   
662    The keys are sorted to have a stable order. 
663   
664    @type data: dict 
665    @rtype: list of string 
666   
667    """ 
668    return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())] 
669