Package ganeti :: Package utils :: Module text
Source Code for Module ganeti.utils.text

  1  # 
  2  # 
  3   
  4  # Copyright (C) 2006, 2007, 2010, 2011 Google Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify 
  7  # it under the terms of the GNU General Public License as published by 
  8  # the Free Software Foundation; either version 2 of the License, or 
  9  # (at your option) any later version. 
 10  # 
 11  # This program is distributed in the hope that it will be useful, but 
 12  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 14  # General Public License for more details. 
 15  # 
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
 19  # 02110-1301, USA. 
 20   
 21  """Utility functions for manipulating or working with text. 
 22   
 23  """ 
 24   
 25   
 26  import re 
 27  import os 
 28  import time 
 29  import collections 
 30   
 31  from ganeti import errors 
 32   
 33   
 34  #: Unit checker regexp 
 35  _PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$") 
 36   
 37  #: Characters which don't need to be quoted for shell commands 
 38  _SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$") 
 39   
 40  #: MAC checker regexp 
 41  _MAC_CHECK_RE = re.compile("^([0-9a-f]{2}:){5}[0-9a-f]{2}$", re.I) 
 42   
 43  #: Shell param checker regexp 
 44  _SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$") 
 45   
 46   
 47 -def MatchNameComponent(key, name_list, case_sensitive=True): 
 48    """Try to match a name against a list. 
 49   
 50    This function will try to match a name like test1 against a list 
 51    like C{['test1.example.com', 'test2.example.com', ...]}. Against 
 52    this list, I{'test1'} as well as I{'test1.example'} will match, but 
 53    not I{'test1.ex'}. A multiple match will be considered as no match 
 54    at all (e.g. I{'test1'} against C{['test1.example.com', 
 55    'test1.example.org']}), except when the key fully matches an entry 
 56    (e.g. I{'test1'} against C{['test1', 'test1.example.com']}). 
 57   
 58    @type key: str 
 59    @param key: the name to be searched 
 60    @type name_list: list 
 61    @param name_list: the list of strings against which to search the key 
 62    @type case_sensitive: boolean 
 63    @param case_sensitive: whether to provide a case-sensitive match 
 64   
 65    @rtype: None or str 
 66    @return: None if there is no match I{or} if there are multiple matches, 
 67        otherwise the element from the list which matches 
 68   
 69    """ 
 70    if key in name_list: 
 71      return key 
 72   
 73    re_flags = 0 
 74    if not case_sensitive: 
 75      re_flags |= re.IGNORECASE 
 76      key = key.upper() 
 77   
 78    name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags) 
 79   
 80    names_filtered = [] 
 81    string_matches = [] 
 82    for name in name_list: 
 83      if name_re.match(name) is not None: 
 84        names_filtered.append(name) 
 85        if not case_sensitive and key == name.upper(): 
 86          string_matches.append(name) 
 87   
 88    if len(string_matches) == 1: 
 89      return string_matches[0] 
 90    if len(names_filtered) == 1: 
 91      return names_filtered[0] 
 92   
 93    return None 
 94   
 95   
 96 -def _DnsNameGlobHelper(match): 
 97    """Helper function for L{DnsNameGlobPattern}. 
 98   
 99    Returns regular expression pattern for parts of the pattern. 
100   
101    """ 
102    text = match.group(0) 
103   
104    if text == "*": 
105      return "[^.]*" 
106    elif text == "?": 
107      return "[^.]" 
108    else: 
109      return re.escape(text) 
110   
111   
112 -def DnsNameGlobPattern(pattern): 
113    """Generates regular expression from DNS name globbing pattern. 
114   
115    A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular 
116    expression. Escape sequences or ranges (e.g. [a-z]) are not supported. 
117   
118    Matching always starts at the leftmost part. An asterisk (*) matches all 
119    characters except the dot (.) separating DNS name parts. A question mark (?) 
120    matches a single character except the dot (.). 
121   
122    @type pattern: string 
123    @param pattern: DNS name globbing pattern 
124    @rtype: string 
125    @return: Regular expression 
126   
127    """ 
128    return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern) 
129   
130   
131 -def FormatUnit(value, units): 
132    """Formats an incoming number of MiB with the appropriate unit. 
133   
134    @type value: int 
135    @param value: integer representing the value in MiB (1048576) 
136    @type units: char 
137    @param units: the type of formatting we should do: 
138        - 'h' for automatic scaling 
139        - 'm' for MiBs 
140        - 'g' for GiBs 
141        - 't' for TiBs 
142    @rtype: str 
143    @return: the formatted value (with suffix) 
144   
145    """ 
146    if units not in ("m", "g", "t", "h"): 
147      raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units)) 
148   
149    suffix = "" 
150   
151    if units == "m" or (units == "h" and value < 1024): 
152      if units == "h": 
153        suffix = "M" 
154      return "%d%s" % (round(value, 0), suffix) 
155   
156    elif units == "g" or (units == "h" and value < (1024 * 1024)): 
157      if units == "h": 
158        suffix = "G" 
159      return "%0.1f%s" % (round(float(value) / 1024, 1), suffix) 
160   
161    else: 
162      if units == "h": 
163        suffix = "T" 
164      return "%0.1f%s" % (round(float(value) / 1024 / 1024, 1), suffix) 
165   
166   
167 -def ParseUnit(input_string): 
168    """Tries to extract number and scale from the given string. 
169   
170    Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE* 
171    [UNIT]}. If no unit is specified, it defaults to MiB. Return value 
172    is always an int in MiB. 
173   
174    """ 
175    m = _PARSEUNIT_REGEX.match(str(input_string)) 
176    if not m: 
177      raise errors.UnitParseError("Invalid format") 
178   
179    value = float(m.groups()[0]) 
180   
181    unit = m.groups()[1] 
182    if unit: 
183      lcunit = unit.lower() 
184    else: 
185      lcunit = "m" 
186   
187    if lcunit in ("m", "mb", "mib"): 
188      # Value already in MiB 
189      pass 
190   
191    elif lcunit in ("g", "gb", "gib"): 
192      value *= 1024 
193   
194    elif lcunit in ("t", "tb", "tib"): 
195      value *= 1024 * 1024 
196   
197    else: 
198      raise errors.UnitParseError("Unknown unit: %s" % unit) 
199   
200    # Make sure we round up 
201    if int(value) < value: 
202      value += 1 
203   
204    # Round up to the next multiple of 4 
205    value = int(value) 
206    if value % 4: 
207      value += 4 - value % 4 
208   
209    return value 
210   
211   
212 -def ShellQuote(value): 
213    """Quotes shell argument according to POSIX. 
214   
215    @type value: str 
216    @param value: the argument to be quoted 
217    @rtype: str 
218    @return: the quoted value 
219   
220    """ 
221    if _SHELL_UNQUOTED_RE.match(value): 
222      return value 
223    else: 
224      return "'%s'" % value.replace("'", "'\\''") 
225   
226   
227 -def ShellQuoteArgs(args): 
228    """Quotes a list of shell arguments. 
229   
230    @type args: list 
231    @param args: list of arguments to be quoted 
232    @rtype: str 
233    @return: the quoted arguments concatenated with spaces 
234   
235    """ 
236    return " ".join([ShellQuote(i) for i in args]) 
237   
238   
239 -class ShellWriter: 
240    """Helper class to write scripts with indentation. 
241   
242    """ 
243    INDENT_STR = "  " 
244   
245 -  def __init__(self, fh): 
246      """Initializes this class. 
247   
248      """ 
249      self._fh = fh 
250      self._indent = 0 
251   
252 -  def IncIndent(self): 
253      """Increase indentation level by 1. 
254   
255      """ 
256      self._indent += 1 
257   
258 -  def DecIndent(self): 
259      """Decrease indentation level by 1. 
260   
261      """ 
262      assert self._indent > 0 
263      self._indent -= 1 
264   
265 -  def Write(self, txt, *args): 
266      """Write line to output file. 
267   
268      """ 
269      assert self._indent >= 0 
270   
271      self._fh.write(self._indent * self.INDENT_STR) 
272   
273      if args: 
274        self._fh.write(txt % args) 
275      else: 
276        self._fh.write(txt) 
277   
278      self._fh.write("\n") 
279   
280   
281 -def GenerateSecret(numbytes=20): 
282    """Generates a random secret. 
283   
284    This will generate a pseudo-random secret returning an hex string 
285    (so that it can be used where an ASCII string is needed). 
286   
287    @param numbytes: the number of bytes which will be represented by the returned 
288        string (defaulting to 20, the length of a SHA1 hash) 
289    @rtype: str 
290    @return: an hex representation of the pseudo-random sequence 
291   
292    """ 
293    return os.urandom(numbytes).encode("hex") 
294   
295   
296 -def NormalizeAndValidateMac(mac): 
297    """Normalizes and check if a MAC address is valid. 
298   
299    Checks whether the supplied MAC address is formally correct, only 
300    accepts colon separated format. Normalize it to all lower. 
301   
302    @type mac: str 
303    @param mac: the MAC to be validated 
304    @rtype: str 
305    @return: returns the normalized and validated MAC. 
306   
307    @raise errors.OpPrereqError: If the MAC isn't valid 
308   
309    """ 
310    if not _MAC_CHECK_RE.match(mac): 
311      raise errors.OpPrereqError("Invalid MAC address '%s'" % mac, 
312                                 errors.ECODE_INVAL) 
313   
314    return mac.lower() 
315   
316   
317 -def SafeEncode(text): 
318    """Return a 'safe' version of a source string. 
319   
320    This function mangles the input string and returns a version that 
321    should be safe to display/encode as ASCII. To this end, we first 
322    convert it to ASCII using the 'backslashreplace' encoding which 
323    should get rid of any non-ASCII chars, and then we process it 
324    through a loop copied from the string repr sources in the python; we 
325    don't use string_escape anymore since that escape single quotes and 
326    backslashes too, and that is too much; and that escaping is not 
327    stable, i.e. string_escape(string_escape(x)) != string_escape(x). 
328   
329    @type text: str or unicode 
330    @param text: input data 
331    @rtype: str 
332    @return: a safe version of text 
333   
334    """ 
335    if isinstance(text, unicode): 
336      # only if unicode; if str already, we handle it below 
337      text = text.encode("ascii", "backslashreplace") 
338    resu = "" 
339    for char in text: 
340      c = ord(char) 
341      if char == "\t": 
342        resu += r"\t" 
343      elif char == "\n": 
344        resu += r"\n" 
345      elif char == "\r": 
346        resu += r'\'r' 
347      elif c < 32 or c >= 127: # non-printable 
348        resu += "\\x%02x" % (c & 0xff) 
349      else: 
350        resu += char 
351    return resu 
352   
353   
354 -def UnescapeAndSplit(text, sep=","): 
355    """Split and unescape a string based on a given separator. 
356   
357    This function splits a string based on a separator where the 
358    separator itself can be escape in order to be an element of the 
359    elements. The escaping rules are (assuming coma being the 
360    separator): 
361      - a plain , separates the elements 
362      - a sequence \\\\, (double backslash plus comma) is handled as a 
363        backslash plus a separator comma 
364      - a sequence \, (backslash plus comma) is handled as a 
365        non-separator comma 
366   
367    @type text: string 
368    @param text: the string to split 
369    @type sep: string 
370    @param text: the separator 
371    @rtype: string 
372    @return: a list of strings 
373   
374    """ 
375    # we split the list by sep (with no escaping at this stage) 
376    slist = text.split(sep) 
377    # next, we revisit the elements and if any of them ended with an odd 
378    # number of backslashes, then we join it with the next 
379    rlist = [] 
380    while slist: 
381      e1 = slist.pop(0) 
382      if e1.endswith("\\"): 
383        num_b = len(e1) - len(e1.rstrip("\\")) 
384        if num_b % 2 == 1 and slist: 
385          e2 = slist.pop(0) 
386          # Merge the two elements and push the result back to the source list for 
387          # revisiting. If e2 ended with backslashes, further merging may need to 
388          # be done. 
389          slist.insert(0, e1 + sep + e2) 
390          continue 
391      # here the backslashes remain (all), and will be reduced in the next step 
392      rlist.append(e1) 
393    # finally, replace backslash-something with something 
394    rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist] 
395    return rlist 
396   
397   
398 -def CommaJoin(names): 
399    """Nicely join a set of identifiers. 
400   
401    @param names: set, list or tuple 
402    @return: a string with the formatted results 
403   
404    """ 
405    return ", ".join([str(val) for val in names]) 
406   
407   
408 -def FormatTime(val): 
409    """Formats a time value. 
410   
411    @type val: float or None 
412    @param val: Timestamp as returned by time.time() (seconds since Epoch, 
413      1970-01-01 00:00:00 UTC) 
414    @return: a string value or N/A if we don't have a valid timestamp 
415   
416    """ 
417    if val is None or not isinstance(val, (int, float)): 
418      return "N/A" 
419    # these two codes works on Linux, but they are not guaranteed on all 
420    # platforms 
421    return time.strftime("%F %T", time.localtime(val)) 
422   
423   
424 -def FormatSeconds(secs): 
425    """Formats seconds for easier reading. 
426   
427    @type secs: number 
428    @param secs: Number of seconds 
429    @rtype: string 
430    @return: Formatted seconds (e.g. "2d 9h 19m 49s") 
431   
432    """ 
433    parts = [] 
434   
435    secs = round(secs, 0) 
436   
437    if secs > 0: 
438      # Negative values would be a bit tricky 
439      for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]: 
440        (complete, secs) = divmod(secs, one) 
441        if complete or parts: 
442          parts.append("%d%s" % (complete, unit)) 
443   
444    parts.append("%ds" % secs) 
445   
446    return " ".join(parts) 
447   
448   
449 -class LineSplitter: 
450    """Splits data chunks into lines separated by newline. 
451   
452    Instances provide a file-like interface. 
453   
454    """ 
455 -  def __init__(self, line_fn, *args): 
456      """Initializes this class. 
457   
458      @type line_fn: callable 
459      @param line_fn: Function called for each line, first parameter is line 
460      @param args: Extra arguments for L{line_fn} 
461   
462      """ 
463      assert callable(line_fn) 
464   
465      if args: 
466        # Python 2.4 doesn't have functools.partial yet 
467        self._line_fn = \ 
468          lambda line: line_fn(line, *args) # pylint: disable=W0142 
469      else: 
470        self._line_fn = line_fn 
471   
472      self._lines = collections.deque() 
473      self._buffer = "" 
474   
475 -  def write(self, data): 
476      parts = (self._buffer + data).split("\n") 
477      self._buffer = parts.pop() 
478      self._lines.extend(parts) 
479   
480 -  def flush(self): 
481      while self._lines: 
482        self._line_fn(self._lines.popleft().rstrip("\r\n")) 
483   
484 -  def close(self): 
485      self.flush() 
486      if self._buffer: 
487        self._line_fn(self._buffer) 
488   
489   
490 -def IsValidShellParam(word): 
491    """Verifies is the given word is safe from the shell's p.o.v. 
492   
493    This means that we can pass this to a command via the shell and be 
494    sure that it doesn't alter the command line and is passed as such to 
495    the actual command. 
496   
497    Note that we are overly restrictive here, in order to be on the safe 
498    side. 
499   
500    @type word: str 
501    @param word: the word to check 
502    @rtype: boolean 
503    @return: True if the word is 'safe' 
504   
505    """ 
506    return bool(_SHELLPARAM_REGEX.match(word)) 
507   
508   
509 -def BuildShellCmd(template, *args): 
510    """Build a safe shell command line from the given arguments. 
511   
512    This function will check all arguments in the args list so that they 
513    are valid shell parameters (i.e. they don't contain shell 
514    metacharacters). If everything is ok, it will return the result of 
515    template % args. 
516   
517    @type template: str 
518    @param template: the string holding the template for the 
519        string formatting 
520    @rtype: str 
521    @return: the expanded command line 
522   
523    """ 
524    for word in args: 
525      if not IsValidShellParam(word): 
526        raise errors.ProgrammerError("Shell argument '%s' contains" 
527                                     " invalid characters" % word) 
528    return template % args 
529   
530   
531 -def FormatOrdinal(value): 
532    """Formats a number as an ordinal in the English language. 
533   
534    E.g. the number 1 becomes "1st", 22 becomes "22nd". 
535   
536    @type value: integer 
537    @param value: Number 
538    @rtype: string 
539   
540    """ 
541    tens = value % 10 
542   
543    if value > 10 and value < 20: 
544      suffix = "th" 
545    elif tens == 1: 
546      suffix = "st" 
547    elif tens == 2: 
548      suffix = "nd" 
549    elif tens == 3: 
550      suffix = "rd" 
551    else: 
552      suffix = "th" 
553   
554    return "%s%s" % (value, suffix) 
555