Package recsql :: Module convert
[hide private]
[frames] | no frames]

Source Code for Module recsql.convert

  1  """ 
  2  :mod:`recsql.convert` --- converting entries of tables 
  3  ====================================================== 
  4   
  5  .. autoclass:: Autoconverter 
  6     :members: __init__ 
  7  .. function:: Autoconverter.convert(x) 
  8    
  9                Convert *x* (if in the active state) 
 10  .. attribute:: Autoconverter.active 
 11   
 12                 If set  to ``True`` then conversion takes place; ``False``  
 13                 just returns :func:`besttype` applid to the value. 
 14   
 15  .. autofunction:: besttype 
 16  .. autofunction:: to_unicode 
 17  """ 
 18   
 19  import re 
 20   
21 -def to_unicode(obj, encoding='utf-8'):
22 """Convert obj to unicode (if it can be be converted) 23 24 from http://farmdev.com/talks/unicode/""" 25 if isinstance(obj, basestring): 26 if not isinstance(obj, unicode): 27 obj = unicode(obj, encoding) 28 return obj
29
30 -class Autoconverter(object):
31 """Automatically convert an input value to a special python object. 32 33 The :meth:`Autoconverter.convert` method turns the value into a special 34 python value and casts strings to the "best" type (see :func:`besttype`). 35 36 The defaults for the conversion of a input field value to a 37 special python value are: 38 39 =========== =============== 40 value python 41 =========== =============== 42 '---' ``None`` 43 'none' 44 'None' 45 '' 46 47 'True' ``True`` 48 'x' 49 'X' 50 'yes' 51 52 'False' ``False`` 53 '-' 54 'no' 55 =========== =============== 56 57 If the *sep* keyword is set to a string instead of ``False`` then 58 values are split into tuples. Probably the most convenient way to 59 use this is to set *sep* = ``True`` (or ``None``) because this 60 splits on all white space whereas *sep* = ' ' would split multiple 61 spaces. 62 63 **Example** 64 - With *sep* = ``True``: 'foo bar 22 boing ---' --> ('foo', 'boing', 22, None) 65 - With *sep* = ',': 1,2,3,4 --> (1,2,3,4) 66 67 """ 68
69 - def __init__(self, mode="fancy", mapping=None, active=True, sep=False, **kwargs):
70 """Initialize the converter. 71 72 :Arguments: 73 *mode* 74 defines what the converter does 75 76 "simple" 77 convert entries with :func:`besttype` 78 "singlet" 79 convert entries with :func:`besttype` and apply 80 mappings 81 "fancy" 82 first splits fields into lists, tries mappings, 83 and does the stuff that "singlet" does 84 "unicode" 85 convert all entries with :func:`to_unicode` 86 87 *mapping* 88 any dict-like mapping that supports lookup. If``None`` then the 89 hard-coded defaults are used 90 *active* or *autoconvert* 91 initial state of the :attr:`Autoconverter.active` toggle. 92 ``False`` deactivates any conversion. [``True``] 93 *sep* 94 character to split on (produces lists); use ``True`` or ``None`` 95 (!) to split on all white space. 96 *encoding* 97 encoding of the input data [utf-8] 98 99 """ 100 self._convertors = {'unicode': unicode, 101 'simple': besttype, 102 'singlet': self._convert_singlet, 103 'fancy': self._convert_fancy, 104 } 105 106 if mapping is None: 107 mapping = {'---': None, 'None':None, 'none':None, '':None, 108 'True':True, 'x': True, 'X':True, 'yes':True, 109 'False':False, 'no': False, '-':False} 110 self.mapping = mapping 111 self.encoding = kwargs.pop('encoding', "utf-8") 112 self.mode = mode 113 self.__active = None 114 self.active = kwargs.pop('autoconvert', active) # 'autoconvert' is a "strong" alias or 'active' 115 if sep is True: 116 sep = None # split on *all* white space, sep=' ' splits single spaces! 117 self.sep = sep
118
119 - def active():
120 doc = """Toggle the state of the Autoconverter. ``True`` uses the mode, ``False`` does nothing""" 121 def fget(self): 122 return self.__active
123 def fset(self, x): 124 self.__active = x 125 if self.__active: 126 self.convert = self._convertors[self.mode] 127 else: 128 self.convert = lambda x: x # do nothing
129 return locals() 130 active = property(**active()) 131
132 - def _convert_singlet(self, s):
133 x = besttype(s, self.encoding) 134 try: 135 return self.mapping[x] 136 except KeyError: 137 return x
138
139 - def _convert_fancy(self, field):
140 """Convert to a list (sep != None) and convert list elements.""" 141 if self.sep is False: 142 return self._convert_singlet(field) 143 else: 144 x = tuple([self._convert_singlet(s) for s in field.split(self.sep)]) 145 if len(x) == 0: 146 x = '' 147 elif len(x) == 1: 148 x = x[0] 149 return x
150
151 -def besttype(x, encoding="utf-8"):
152 """Convert string x to the most useful type, i.e. int, float or unicode string. 153 154 If x is a quoted string (single or double quotes) then the quotes 155 are stripped and the enclosed string returned. 156 157 .. Note:: Strings will be returned as Unicode strings (using 158 :func:`unicode`), based on the *encoding* argument, which is 159 utf-8 by default. 160 """ 161 def unicodify(x): 162 return to_unicode(x, encoding)
163 x = unicodify(x) # make unicode as soon as possible 164 try: 165 x = x.strip() 166 except AttributeError: 167 pass 168 m = re.match(r"""['"](?P<value>.*)["']$""", x) 169 if m is None: 170 # not a quoted string, try different types 171 for converter in int, float, unicodify: # try them in increasing order of lenience 172 try: 173 return converter(x) 174 except ValueError: 175 pass 176 else: 177 # quoted string 178 x = unicodify(m.group('value')) 179 return x 180 181
182 -def to_int64(a):
183 """Return view of the recarray with all int32 cast to int64.""" 184 # build new dtype and replace i4 --> i8 185 def promote_i4(typestr): 186 if typestr[1:] == 'i4': 187 typestr = typestr[0]+'i8' 188 return typestr
189 190 dtype = [(name, promote_i4(typestr)) for name,typestr in a.dtype.descr] 191 return a.astype(dtype) 192