1 """
2 :mod:`recsql.convert` --- converting entries of tables
3 ======================================================
4
5 .. autoclass:: Autoconverter
6 :members: __init__
7 .. function:: Autoconverter.convert(x)
8
9 Convert *x* (if in the active state)
10 .. attribute:: Autoconverter.active
11
12 If set to ``True`` then conversion takes place; ``False``
13 just returns :func:`besttype` applid to the value.
14
15 .. autofunction:: besttype
16 .. autofunction:: to_unicode
17 """
18
19 import re
20
22 """Convert obj to unicode (if it can be be converted)
23
24 from http://farmdev.com/talks/unicode/"""
25 if isinstance(obj, basestring):
26 if not isinstance(obj, unicode):
27 obj = unicode(obj, encoding)
28 return obj
29
31 """Automatically convert an input value to a special python object.
32
33 The :meth:`Autoconverter.convert` method turns the value into a special
34 python value and casts strings to the "best" type (see :func:`besttype`).
35
36 The defaults for the conversion of a input field value to a
37 special python value are:
38
39 =========== ===============
40 value python
41 =========== ===============
42 '---' ``None``
43 'none'
44 'None'
45 ''
46
47 'True' ``True``
48 'x'
49 'X'
50 'yes'
51
52 'False' ``False``
53 '-'
54 'no'
55 =========== ===============
56
57 If the *sep* keyword is set to a string instead of ``False`` then
58 values are split into tuples. Probably the most convenient way to
59 use this is to set *sep* = ``True`` (or ``None``) because this
60 splits on all white space whereas *sep* = ' ' would split multiple
61 spaces.
62
63 **Example**
64 - With *sep* = ``True``: 'foo bar 22 boing ---' --> ('foo', 'boing', 22, None)
65 - With *sep* = ',': 1,2,3,4 --> (1,2,3,4)
66
67 """
68
69 - def __init__(self, mode="fancy", mapping=None, active=True, sep=False, **kwargs):
70 """Initialize the converter.
71
72 :Arguments:
73 *mode*
74 defines what the converter does
75
76 "simple"
77 convert entries with :func:`besttype`
78 "singlet"
79 convert entries with :func:`besttype` and apply
80 mappings
81 "fancy"
82 first splits fields into lists, tries mappings,
83 and does the stuff that "singlet" does
84 "unicode"
85 convert all entries with :func:`to_unicode`
86
87 *mapping*
88 any dict-like mapping that supports lookup. If``None`` then the
89 hard-coded defaults are used
90 *active* or *autoconvert*
91 initial state of the :attr:`Autoconverter.active` toggle.
92 ``False`` deactivates any conversion. [``True``]
93 *sep*
94 character to split on (produces lists); use ``True`` or ``None``
95 (!) to split on all white space.
96 *encoding*
97 encoding of the input data [utf-8]
98
99 """
100 self._convertors = {'unicode': unicode,
101 'simple': besttype,
102 'singlet': self._convert_singlet,
103 'fancy': self._convert_fancy,
104 }
105
106 if mapping is None:
107 mapping = {'---': None, 'None':None, 'none':None, '':None,
108 'True':True, 'x': True, 'X':True, 'yes':True,
109 'False':False, 'no': False, '-':False}
110 self.mapping = mapping
111 self.encoding = kwargs.pop('encoding', "utf-8")
112 self.mode = mode
113 self.__active = None
114 self.active = kwargs.pop('autoconvert', active)
115 if sep is True:
116 sep = None
117 self.sep = sep
118
120 doc = """Toggle the state of the Autoconverter. ``True`` uses the mode, ``False`` does nothing"""
121 def fget(self):
122 return self.__active
123 def fset(self, x):
124 self.__active = x
125 if self.__active:
126 self.convert = self._convertors[self.mode]
127 else:
128 self.convert = lambda x: x
129 return locals()
130 active = property(**active())
131
133 x = besttype(s, self.encoding)
134 try:
135 return self.mapping[x]
136 except KeyError:
137 return x
138
140 """Convert to a list (sep != None) and convert list elements."""
141 if self.sep is False:
142 return self._convert_singlet(field)
143 else:
144 x = tuple([self._convert_singlet(s) for s in field.split(self.sep)])
145 if len(x) == 0:
146 x = ''
147 elif len(x) == 1:
148 x = x[0]
149 return x
150
152 """Convert string x to the most useful type, i.e. int, float or unicode string.
153
154 If x is a quoted string (single or double quotes) then the quotes
155 are stripped and the enclosed string returned.
156
157 .. Note:: Strings will be returned as Unicode strings (using
158 :func:`unicode`), based on the *encoding* argument, which is
159 utf-8 by default.
160 """
161 def unicodify(x):
162 return to_unicode(x, encoding)
163 x = unicodify(x)
164 try:
165 x = x.strip()
166 except AttributeError:
167 pass
168 m = re.match(r"""['"](?P<value>.*)["']$""", x)
169 if m is None:
170
171 for converter in int, float, unicodify:
172 try:
173 return converter(x)
174 except ValueError:
175 pass
176 else:
177
178 x = unicodify(m.group('value'))
179 return x
180
181
183 """Return view of the recarray with all int32 cast to int64."""
184
185 def promote_i4(typestr):
186 if typestr[1:] == 'i4':
187 typestr = typestr[0]+'i8'
188 return typestr
189
190 dtype = [(name, promote_i4(typestr)) for name,typestr in a.dtype.descr]
191 return a.astype(dtype)
192