recsql.csv

1 """ 2 :mod:`recsql.csv_table` --- Parse a simple CSV table 3 ==================================================== 4 5 Turn a CSV table into a numpy array. 6 7 Uses :mod:`csv` (requires python 2.6 or better). 8 9 .. autoclass:: Table2array 10 :members: __init__, recarray 11 .. autofunction:: make_python_name 12 """ 13 14 # notes on csv (from http://farmdev.com/talks/unicode/) 15 # encode temp. to utf-8 16 # s_bytes = s_uni.encode('utf-8') 17 # do stuff 18 # s_bytes.decode('utf-8') 19 20 try: 21 # needs python >= 2.6 22 import csv 23 except ImportError: 24 import warnings 25 warnings.warn("csv module not available (needs python >=2.6)", category=ImportWarning) 26 # ... just go ahead and fail later miserably ... 27 import numpy 28 import re 29 30 from convert import Autoconverter 31 32 # from the csv examples: http://docs.python.org/library/csv.html#csv-examples 33 import codecs 34

35 -class UTF8Recoder(object):

36 """ 37 Iterator that reads an encoded stream and reencodes the input to UTF-8 38 """

39 - def __init__(self, f, encoding):

40 self.reader = codecs.getreader(encoding)(f)

41

42 - def __iter__(self):

43 return self

44

45 - def next(self):

46 return self.reader.next().encode("utf-8")

47

48 -class UnicodeReader(object):

49 """ 50 A CSV reader which will iterate over lines in the CSV file "f", 51 which is encoded in the given encoding. 52 """ 53

54 - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):

55 f = UTF8Recoder(f, encoding) 56 self.reader = csv.reader(f, dialect=dialect, **kwds)

57

58 - def next(self):

59 row = self.reader.next() 60 return [unicode(s, "utf-8") for s in row]

61

62 - def __iter__(self):

63 return self

64 65

66 -def make_python_name(s, default=None, number_prefix='N',encoding="utf-8"):

67 """Returns a unicode string that can be used as a legal python identifier. 68 69 :Arguments: 70 *s* 71 string 72 *default* 73 use *default* if *s* is ``None`` 74 *number_prefix* 75 string to prepend if *s* starts with a number 76 """ 77 if s in ('', None): 78 s = default 79 s = str(s) 80 s = re.sub("[^a-zA-Z0-9_]", "_", s) 81 if not re.match('\d', s) is None: 82 s = number_prefix+s 83 return unicode(s, encoding)

84

85 -class Table2array(object):

86 """Read a csv file and provide conversion to a :class:`numpy.recarray`. 87 88 * Depending on the arguments, autoconversion of values can take 89 place. See :class:`recsql.convert.Autoconverter` for details. 90 91 * Table column headers are always read from the first row of the file. 92 93 * Empty rows are discarded. 94 """

95 - def __init__(self, filename=None, tablename="CSV", encoding="utf-8", **kwargs):

96 """ 97 :Arguments: 98 *filename* 99 CSV file (encoded with *encoding*) 100 *name* 101 name of the table 102 *autoconvert* 103 EXPERIMENTAL. ``True``: replace certain values 104 with special python values (see :class:`convert.Autoconverter`) and possibly 105 split values into lists (see *sep*). 106 ``False``: leave everything as it is (numbers as numbers and strings 107 as strings). 108 *mode* 109 mode of the :class:`~convert.Autoconverter` 110 """ 111 if filename is None: 112 raise TypeError("filename is actually required") 113 self.tablename = tablename 114 self.autoconvert = Autoconverter(**kwargs).convert 115 csvtab = UnicodeReader(open(filename, "rb"), encoding=encoding) 116 self.names = [make_python_name(s,default=n,encoding=encoding) for n,s in enumerate(csvtab.next())] 117 # read the rest after the column headers 118 self.records = [tuple(map(self.autoconvert, line)) for line in csvtab \ 119 if len(line) > 0 and not numpy.all(numpy.array(line) == '')]

120

121 - def recarray(self):

122 """Returns data as :class:`numpy.recarray`.""" 123 return numpy.rec.fromrecords(self.records, names=self.names)

124

Source Code for Module recsql.csv_table