Package ganeti :: Package utils :: Module text
Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: MAC checker regexp 
 41  _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I) 
 42   
 43  #: Shell param checker regexp 
 44  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 45   
 46  #: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026) 
 47  _ASCII_ELLIPSIS = "..." 
 48   
 49   
 50 -def MatchNameComponent(key, name_list, case_sensitive=True): 
 51    """Try to match a name against a list. 
 52   
 53    This function will try to match a name like test1 against a list 
 54    like C{['test1.example.com', 'test2.example.com', ...]}. Against 
 55    this list, I{'test1'} as well as I{'test1.example'} will match, but 
 56    not I{'test1.ex'}. A multiple match will be considered as no match 
 57    at all (e.g. I{'test1'} against C{['test1.example.com', 
 58    'test1.example.org']}), except when the key fully matches an entry 
 59    (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 
 60   
 61    @type key: str 
 62    @param key: the name to be searched 
 63    @type name_list: list 
 64    @param name_list: the list of strings against which to search the key 
 65    @type case_sensitive: boolean 
 66    @param case_sensitive: whether to provide a case-sensitive match 
 67   
 68    @rtype: None or str 
 69    @return: None if there is no match I{or} if there are multiple matches, 
 70        otherwise the element from the list which matches 
 71   
 72    """ 
 73    if key in name_list: 
 74      return key 
 75   
 76    re_flags = 0 
 77    if not case_sensitive: 
 78      re_flags |= re.IGNORECASE 
 79      key = key.upper() 
 80   
 81    name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 
 82   
 83    names_filtered = [] 
 84    string_matches = [] 
 85    for name in name_list: 
 86      if name_re.match(name) is not None: 
 87        names_filtered.append(name) 
 88        if not case_sensitive and key == name.upper(): 
 89          string_matches.append(name) 
 90   
 91    if len(string_matches) == 1: 
 92      return string_matches[0] 
 93    if len(names_filtered) == 1: 
 94      return names_filtered[0] 
 95   
 96    return None 
 97   
 98   
 99 -def _DnsNameGlobHelper(match): 
100    """Helper function for L{DnsNameGlobPattern}. 
101   
102    Returns regular expression pattern for parts of the pattern. 
103   
104    """ 
105    text = match.group(0) 
106   
107    if text == "*": 
108      return "[^.]*" 
109    elif text == "?": 
110      return "[^.]" 
111    else: 
112      return re.escape(text) 
113   
114   
115 -def DnsNameGlobPattern(pattern): 
116    """Generates regular expression from DNS name globbing pattern. 
117   
118    A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 
119    expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 
120   
121    Matching always starts at the leftmost part. An asterisk (*) matches all 
122    characters except the dot (.) separating DNS name parts. A question mark (?) 
123    matches a single character except the dot (.). 
124   
125    @type pattern: string 
126    @param pattern: DNS name globbing pattern 
127    @rtype: string 
128    @return: Regular expression 
129   
130    """ 
131    return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) 
132   
133   
134 -def FormatUnit(value, units): 
135    """Formats an incoming number of MiB with the appropriate unit. 
136   
137    @type value: int 
138    @param value: integer representing the value in MiB (1048576) 
139    @type units: char 
140    @param units: the type of formatting we should do: 
141        - 'h' for automatic scaling 
142        - 'm' for MiBs 
143        - 'g' for GiBs 
144        - 't' for TiBs 
145    @rtype: str 
146    @return: the formatted value (with suffix) 
147   
148    """ 
149    if units not in ("m", "g", "t", "h"): 
150      raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 
151   
152    suffix = "" 
153   
154    if units == "m" or (units == "h" and value < 1024): 
155      if units == "h": 
156        suffix = "M" 
157      return "%d%s" % (round(value, 0), suffix) 
158   
159    elif units == "g" or (units == "h" and value < (1024 * 1024)): 
160      if units == "h": 
161        suffix = "G" 
162      return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 
163   
164    else: 
165      if units == "h": 
166        suffix = "T" 
167      return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix) 
168   
169   
170 -def ParseUnit(input_string): 
171    """Tries to extract number and scale from the given string. 
172   
173    Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 
174    [UNIT]}. If no unit is specified, it defaults to MiB. Return value 
175    is always an int in MiB. 
176   
177    """ 
178    m = _PARSEUNIT_REGEX.match(str(input_string)) 
179    if not m: 
180      raise errors.UnitParseError("Invalid format") 
181   
182    value = float(m.groups()[0]) 
183   
184    unit = m.groups()[1] 
185    if unit: 
186      lcunit = unit.lower() 
187    else: 
188      lcunit = "m" 
189   
190    if lcunit in ("m", "mb", "mib"): 
191      # Value already in MiB 
192      pass 
193   
194    elif lcunit in ("g", "gb", "gib"): 
195      value *= 1024 
196   
197    elif lcunit in ("t", "tb", "tib"): 
198      value *= 1024 * 1024 
199   
200    else: 
201      raise errors.UnitParseError("Unknown unit: %s" % unit) 
202   
203    # Make sure we round up 
204    if int(value) < value: 
205      value += 1 
206   
207    # Round up to the next multiple of 4 
208    value = int(value) 
209    if value % 4: 
210      value += 4 - value % 4 
211   
212    return value 
213   
214   
215 -def ShellQuote(value): 
216    """Quotes shell argument according to POSIX. 
217   
218    @type value: str 
219    @param value: the argument to be quoted 
220    @rtype: str 
221    @return: the quoted value 
222   
223    """ 
224    if _SHELL_UNQUOTED_RE.match(value): 
225      return value 
226    else: 
227      return "'%s'" % value.replace("'", "'\\''") 
228   
229   
230 -def ShellQuoteArgs(args): 
231    """Quotes a list of shell arguments. 
232   
233    @type args: list 
234    @param args: list of arguments to be quoted 
235    @rtype: str 
236    @return: the quoted arguments concatenated with spaces 
237   
238    """ 
239    return " ".join([ShellQuote(i) for i in args]) 
240   
241   
242 -class ShellWriter: 
243    """Helper class to write scripts with indentation. 
244   
245    """ 
246    INDENT_STR = "  " 
247   
248 -  def __init__(self, fh): 
249      """Initializes this class. 
250   
251      """ 
252      self._fh = fh 
253      self._indent = 0 
254   
255 -  def IncIndent(self): 
256      """Increase indentation level by 1. 
257   
258      """ 
259      self._indent += 1 
260   
261 -  def DecIndent(self): 
262      """Decrease indentation level by 1. 
263   
264      """ 
265      assert self._indent > 0 
266      self._indent -= 1 
267   
268 -  def Write(self, txt, *args): 
269      """Write line to output file. 
270   
271      """ 
272      assert self._indent >= 0 
273   
274      if args: 
275        line = txt % args 
276      else: 
277        line = txt 
278   
279      if line: 
280        # Indent only if there's something on the line 
281        self._fh.write(self._indent * self.INDENT_STR) 
282   
283      self._fh.write(line) 
284   
285      self._fh.write("\n") 
286   
287   
288 -def GenerateSecret(numbytes=20): 
289    """Generates a random secret. 
290   
291    This will generate a pseudo-random secret returning an hex string 
292    (so that it can be used where an ASCII string is needed). 
293   
294    @param numbytes: the number of bytes which will be represented by the returned 
295        string (defaulting to 20, the length of a SHA1 hash) 
296    @rtype: str 
297    @return: an hex representation of the pseudo-random sequence 
298   
299    """ 
300    return os.urandom(numbytes).encode("hex") 
301   
302   
303 -def NormalizeAndValidateMac(mac): 
304    """Normalizes and check if a MAC address is valid. 
305   
306    Checks whether the supplied MAC address is formally correct, only 
307    accepts colon separated format. Normalize it to all lower. 
308   
309    @type mac: str 
310    @param mac: the MAC to be validated 
311    @rtype: str 
312    @return: returns the normalized and validated MAC. 
313   
314    @raise errors.OpPrereqError: If the MAC isn't valid 
315   
316    """ 
317    if not _MAC_CHECK_RE.match(mac): 
318      raise errors.OpPrereqError("Invalid MAC address '%s'" % mac, 
319                                 errors.ECODE_INVAL) 
320   
321    return mac.lower() 
322   
323   
324 -def SafeEncode(text): 
325    """Return a 'safe' version of a source string. 
326   
327    This function mangles the input string and returns a version that 
328    should be safe to display/encode as ASCII. To this end, we first 
329    convert it to ASCII using the 'backslashreplace' encoding which 
330    should get rid of any non-ASCII chars, and then we process it 
331    through a loop copied from the string repr sources in the python; we 
332    don't use string_escape anymore since that escape single quotes and 
333    backslashes too, and that is too much; and that escaping is not 
334    stable, i.e. string_escape(string_escape(x)) != string_escape(x). 
335   
336    @type text: str or unicode 
337    @param text: input data 
338    @rtype: str 
339    @return: a safe version of text 
340   
341    """ 
342    if isinstance(text, unicode): 
343      # only if unicode; if str already, we handle it below 
344      text = text.encode("ascii", "backslashreplace") 
345    resu = "" 
346    for char in text: 
347      c = ord(char) 
348      if char == "\t": 
349        resu += r"\t" 
350      elif char == "\n": 
351        resu += r"\n" 
352      elif char == "\r": 
353        resu += r'\'r' 
354      elif c < 32 or c >= 127: # non-printable 
355        resu += "\\x%02x" % (c & 0xff) 
356      else: 
357        resu += char 
358    return resu 
359   
360   
361 -def UnescapeAndSplit(text, sep=","): 
362    """Split and unescape a string based on a given separator. 
363   
364    This function splits a string based on a separator where the 
365    separator itself can be escape in order to be an element of the 
366    elements. The escaping rules are (assuming coma being the 
367    separator): 
368      - a plain , separates the elements 
369      - a sequence \\\\, (double backslash plus comma) is handled as a 
370        backslash plus a separator comma 
371      - a sequence \, (backslash plus comma) is handled as a 
372        non-separator comma 
373   
374    @type text: string 
375    @param text: the string to split 
376    @type sep: string 
377    @param text: the separator 
378    @rtype: string 
379    @return: a list of strings 
380   
381    """ 
382    # we split the list by sep (with no escaping at this stage) 
383    slist = text.split(sep) 
384    # next, we revisit the elements and if any of them ended with an odd 
385    # number of backslashes, then we join it with the next 
386    rlist = [] 
387    while slist: 
388      e1 = slist.pop(0) 
389      if e1.endswith("\\"): 
390        num_b = len(e1) - len(e1.rstrip("\\")) 
391        if num_b % 2 == 1 and slist: 
392          e2 = slist.pop(0) 
393          # Merge the two elements and push the result back to the source list for 
394          # revisiting. If e2 ended with backslashes, further merging may need to 
395          # be done. 
396          slist.insert(0, e1 + sep + e2) 
397          continue 
398      # here the backslashes remain (all), and will be reduced in the next step 
399      rlist.append(e1) 
400    # finally, replace backslash-something with something 
401    rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 
402    return rlist 
403   
404   
405 -def CommaJoin(names): 
406    """Nicely join a set of identifiers. 
407   
408    @param names: set, list or tuple 
409    @return: a string with the formatted results 
410   
411    """ 
412    return ", ".join([str(val) for val in names]) 
413   
414   
415 -def FormatTime(val, usecs=None): 
416    """Formats a time value. 
417   
418    @type val: float or None 
419    @param val: Timestamp as returned by time.time() (seconds since Epoch, 
420      1970-01-01 00:00:00 UTC) 
421    @return: a string value or N/A if we don't have a valid timestamp 
422   
423    """ 
424    if val is None or not isinstance(val, (int, float)): 
425      return "N/A" 
426   
427    # these two codes works on Linux, but they are not guaranteed on all 
428    # platforms 
429    result = time.strftime("%F %T", time.localtime(val)) 
430   
431    if usecs is not None: 
432      result += ".%06d" % usecs 
433   
434    return result 
435   
436   
437 -def FormatSeconds(secs): 
438    """Formats seconds for easier reading. 
439   
440    @type secs: number 
441    @param secs: Number of seconds 
442    @rtype: string 
443    @return: Formatted seconds (e.g. "2d 9h 19m 49s") 
444   
445    """ 
446    parts = [] 
447   
448    secs = round(secs, 0) 
449   
450    if secs > 0: 
451      # Negative values would be a bit tricky 
452      for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 
453        (complete, secs) = divmod(secs, one) 
454        if complete or parts: 
455          parts.append("%d%s" % (complete, unit)) 
456   
457    parts.append("%ds" % secs) 
458   
459    return " ".join(parts) 
460   
461   
462 -class LineSplitter: 
463    """Splits data chunks into lines separated by newline. 
464   
465    Instances provide a file-like interface. 
466   
467    """ 
468 -  def __init__(self, line_fn, *args): 
469      """Initializes this class. 
470   
471      @type line_fn: callable 
472      @param line_fn: Function called for each line, first parameter is line 
473      @param args: Extra arguments for L{line_fn} 
474   
475      """ 
476      assert callable(line_fn) 
477   
478      if args: 
479        # Python 2.4 doesn't have functools.partial yet 
480        self._line_fn = \ 
481          lambda line: line_fn(line, *args) # pylint: disable=W0142 
482      else: 
483        self._line_fn = line_fn 
484   
485      self._lines = collections.deque() 
486      self._buffer = "" 
487   
488 -  def write(self, data): 
489      parts = (self._buffer + data).split("\n") 
490      self._buffer = parts.pop() 
491      self._lines.extend(parts) 
492   
493 -  def flush(self): 
494      while self._lines: 
495        self._line_fn(self._lines.popleft().rstrip("\r\n")) 
496   
497 -  def close(self): 
498      self.flush() 
499      if self._buffer: 
500        self._line_fn(self._buffer) 
501   
502   
503 -def IsValidShellParam(word): 
504    """Verifies is the given word is safe from the shell's p.o.v. 
505   
506    This means that we can pass this to a command via the shell and be 
507    sure that it doesn't alter the command line and is passed as such to 
508    the actual command. 
509   
510    Note that we are overly restrictive here, in order to be on the safe 
511    side. 
512   
513    @type word: str 
514    @param word: the word to check 
515    @rtype: boolean 
516    @return: True if the word is 'safe' 
517   
518    """ 
519    return bool(_SHELLPARAM_REGEX.match(word)) 
520   
521   
522 -def BuildShellCmd(template, *args): 
523    """Build a safe shell command line from the given arguments. 
524   
525    This function will check all arguments in the args list so that they 
526    are valid shell parameters (i.e. they don't contain shell 
527    metacharacters). If everything is ok, it will return the result of 
528    template % args. 
529   
530    @type template: str 
531    @param template: the string holding the template for the 
532        string formatting 
533    @rtype: str 
534    @return: the expanded command line 
535   
536    """ 
537    for word in args: 
538      if not IsValidShellParam(word): 
539        raise errors.ProgrammerError("Shell argument '%s' contains" 
540                                     " invalid characters" % word) 
541    return template % args 
542   
543   
544 -def FormatOrdinal(value): 
545    """Formats a number as an ordinal in the English language. 
546   
547    E.g. the number 1 becomes "1st", 22 becomes "22nd". 
548   
549    @type value: integer 
550    @param value: Number 
551    @rtype: string 
552   
553    """ 
554    tens = value % 10 
555   
556    if value > 10 and value < 20: 
557      suffix = "th" 
558    elif tens == 1: 
559      suffix = "st" 
560    elif tens == 2: 
561      suffix = "nd" 
562    elif tens == 3: 
563      suffix = "rd" 
564    else: 
565      suffix = "th" 
566   
567    return "%s%s" % (value, suffix) 
568   
569   
570 -def Truncate(text, length): 
571    """Truncate string and add ellipsis if needed. 
572   
573    @type text: string 
574    @param text: Text 
575    @type length: integer 
576    @param length: Desired length 
577    @rtype: string 
578    @return: Truncated text 
579   
580    """ 
581    assert length > len(_ASCII_ELLIPSIS) 
582   
583    # Serialize if necessary 
584    if not isinstance(text, basestring): 
585      text = str(text) 
586   
587    if len(text) <= length: 
588      return text 
589    else: 
590      return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS 
591   
592   
593 -def FormatKeyValue(data): 
594    """Formats a dictionary as "key=value" parameters. 
595   
596    The keys are sorted to have a stable order. 
597   
598    @type data: dict 
599    @rtype: list of string 
600   
601    """ 
602    return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())] 
603