recsql.rest

110 """Primitive parser that converts a simple reST table into ``numpy.recarray``. 111 112 The table must be the only table in the text. It must look similar to the 113 example below (variable parts in angle brackets, optional in double 114 brackets, everything else must be there, matching is case sensitive, '....' 115 signifies repetition in kind):: 116 117 Table[<NAME>]: <<CAPTION>> 118 ============ =========== ====================== .... 119 <COLNAME 1> <COLNAME 2> .... .... 120 ============ =========== ====================== .... 121 <VALUE> <VALUE> <VALUE> <VALUE> .... 122 .... 123 .... 124 ============ =========== ====================== .... 125 126 Rows may *not* span multiple lines. The column names must be single words 127 and legal python names (no spaces, no dots, not starting with a number). 128 129 Field values are converted to one of the following python types: *int*, 130 *float*, or *str*. 131 132 If a value is quote with single or double quotation marks then the 133 outermost quotation marks are stripped and the enclosed value treated as a string. 134 135 .. Note:: Values such as 001 must be quoted as '001' or they will be 136 interpreted as integers (1 in this case). 137 """ 138

139 - def __init__(self, string=None, **kwargs):

140 """Table2array(string) --> parser 141 142 :Arguments: 143 *string* 144 string to be parsed 145 *filename* 146 read from *filename* instead of string 147 *autoconvert* 148 EXPERIMENTAL. ``True``: replace certain values 149 with special python values (see :class:`convert.Autoconverter`) and possibly 150 split values into lists (see *sep*). 151 ``False``: leave everything as it is (numbers as numbers and strings 152 as strings). 153 *mode* 154 mode of the :class:`~convert.Autoconverter` 155 *sep* 156 If set and *autoconvert* = ``True`` then split field values on the 157 separator (using :func:`split`) before possible autoconversion. 158 (NOT WORKING PROPERLY YET) 159 """ 160 self.filename = kwargs.pop('filename', None) 161 if self.filename: 162 with open(self.filename, 'rb') as f: 163 string = "".join(f.readlines()) # encoding ?? 164 self.string = string 165 m = TABLE.search(string) # extract table from string with regular expression 166 if m is None: 167 raise ParseError('Table cannot be parsed.') 168 self.t = m.groupdict() 169 #: <NAME> of the table 170 self.tablename = self.t['name'] 171 #: <CAPTION> of the table. 172 self.caption = self.t['title'] 173 #: parsed table as records (populate with :meth:`Table2array.parse`) 174 self.records = None 175 self.names = None 176 self.autoconvert = convert.Autoconverter(**kwargs).convert 177 178 self.parse()

179

180 - def parse(self):

181 """Parse the table data string into records.""" 182 183 self.parse_fields() 184 records = [] 185 for line in self.t['data'].split('\n'): 186 if EMPTY_ROW.match(line): 187 continue 188 row = [self.autoconvert(line[start_field:end_field+1]) 189 for start_field, end_field in self.fields] 190 records.append(tuple(row)) 191 self.records = records

192

193 - def recarray(self):

194 """Return a recarray from the (parsed) string.""" 195 196 if self.records is None: 197 self.parse() 198 try: 199 # simple 200 return numpy.rec.fromrecords(self.records, names=self.names) 201 except ValueError: 202 # complicated because fromrecords cannot deal with records of lists 203 # Quick hack: use objects for lists etc (instead of building the proper 204 # data types (see docs for numpy.dtype , eg dtype('coord', (float, 3)) ) 205 206 D = numpy.empty(len(self.records[0]), dtype=object) # number of fileds from first record 207 types = numpy.array([map(type, r) for r in self.records]) # types of all fields 208 for icol, isSame in enumerate([numpy.all(col) for col in types.T]): 209 if isSame: 210 D[icol] = types[0][icol] 211 else: 212 D[icol] = object 213 dtype = numpy.dtype(zip(self.names, D)) 214 # from numpy.rec.records (for debugging...) 215 retval = numpy.array(self.records, dtype=dtype) 216 res = retval.view(numpy.recarray) 217 ## res.dtype = numpy.dtype((numpy.rec.record, res.dtype)) # fails -- ARGH, this makes it a recarray 218 return convert.to_int64(res)

219

220 - def parse_fields(self):

221 """Determine the start and end columns and names of the fields.""" 222 223 rule = self.t['toprule'].rstrip() # keep leading space for correct columns!! 224 if not (rule == self.t['midrule'].rstrip() and rule == self.t['botrule'].rstrip()): 225 raise ParseError("Table rules differ from each other (check white space).") 226 names = self.t['fields'].split() 227 nfields = len(rule.split()) 228 if nfields != len(names): 229 raise ParseError("number of field names (%d) does not match number of fields (%d)" 230 % (nfields, len(names))) 231 fields = [] # list of tuples (first,last) column of the field 232 ifield = 0 233 is_field = rule.startswith('=') # state 234 len_rule = len(rule) 235 start_field = 0 236 end_field = 0 237 for c in xrange(len_rule): 238 char = rule[c] 239 if not is_field and char == '=': 240 start_field = c 241 is_field = True 242 if is_field and (char == ' ' or c == len_rule-1): 243 # finished field 244 fields.append((start_field, c)) 245 ifield += 1 246 is_field = False 247 self.names = names 248 self.fields = fields

Source Code for Module recsql.rest_table