1 """
2 :mod:`recsql.csv_table` --- Parse a simple CSV table
3 ====================================================
4
5 Turn a CSV table into a numpy array.
6
7 Uses :mod:`csv` (requires python 2.6 or better).
8
9 .. autoclass:: Table2array
10 :members: __init__, recarray
11 .. autofunction:: make_python_name
12 """
13
14
15
16
17
18
19
20 try:
21
22 import csv
23 except ImportError:
24 import warnings
25 warnings.warn("csv module not available (needs python >=2.6)", category=ImportWarning)
26
27 import numpy
28 import re
29
30 from convert import Autoconverter
31
32
33 import codecs
34
36 """
37 Iterator that reads an encoded stream and reencodes the input to UTF-8
38 """
40 self.reader = codecs.getreader(encoding)(f)
41
44
46 return self.reader.next().encode("utf-8")
47
49 """
50 A CSV reader which will iterate over lines in the CSV file "f",
51 which is encoded in the given encoding.
52 """
53
54 - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
55 f = UTF8Recoder(f, encoding)
56 self.reader = csv.reader(f, dialect=dialect, **kwds)
57
59 row = self.reader.next()
60 return [unicode(s, "utf-8") for s in row]
61
64
65
67 """Returns a unicode string that can be used as a legal python identifier.
68
69 :Arguments:
70 *s*
71 string
72 *default*
73 use *default* if *s* is ``None``
74 *number_prefix*
75 string to prepend if *s* starts with a number
76 """
77 if s in ('', None):
78 s = default
79 s = str(s)
80 s = re.sub("[^a-zA-Z0-9_]", "_", s)
81 if not re.match('\d', s) is None:
82 s = number_prefix+s
83 return unicode(s, encoding)
84
86 """Read a csv file and provide conversion to a :class:`numpy.recarray`.
87
88 * Depending on the arguments, autoconversion of values can take
89 place. See :class:`recsql.convert.Autoconverter` for details.
90
91 * Table column headers are always read from the first row of the file.
92
93 * Empty rows are discarded.
94 """
95 - def __init__(self, filename=None, tablename="CSV", encoding="utf-8", **kwargs):
96 """
97 :Arguments:
98 *filename*
99 CSV file (encoded with *encoding*)
100 *name*
101 name of the table
102 *autoconvert*
103 EXPERIMENTAL. ``True``: replace certain values
104 with special python values (see :class:`convert.Autoconverter`) and possibly
105 split values into lists (see *sep*).
106 ``False``: leave everything as it is (numbers as numbers and strings
107 as strings).
108 *mode*
109 mode of the :class:`~convert.Autoconverter`
110 """
111 if filename is None:
112 raise TypeError("filename is actually required")
113 self.tablename = tablename
114 self.autoconvert = Autoconverter(**kwargs).convert
115 csvtab = UnicodeReader(open(filename, "rb"), encoding=encoding)
116 self.names = [make_python_name(s,default=n,encoding=encoding) for n,s in enumerate(csvtab.next())]
117
118 self.records = [tuple(map(self.autoconvert, line)) for line in csvtab \
119 if len(line) > 0 and not numpy.all(numpy.array(line) == '')]
120
122 """Returns data as :class:`numpy.recarray`."""
123 return numpy.rec.fromrecords(self.records, names=self.names)
124