Package recsql :: Module csv_table
[hide private]
[frames] | no frames]

Source Code for Module recsql.csv_table

  1  """ 
  2  :mod:`recsql.csv_table` --- Parse a simple CSV table 
  3  ==================================================== 
  4   
  5  Turn a CSV table into a numpy array.  
  6   
  7  Uses :mod:`csv` (requires python 2.6 or better). 
  8   
  9  .. autoclass:: Table2array 
 10     :members: __init__, recarray 
 11  .. autofunction:: make_python_name 
 12  """ 
 13   
 14  # notes on csv (from http://farmdev.com/talks/unicode/) 
 15  # encode temp. to utf-8  
 16  #   s_bytes = s_uni.encode('utf-8') 
 17  #   do stuff  
 18  #   s_bytes.decode('utf-8') 
 19   
 20  try: 
 21      # needs python >= 2.6 
 22      import csv 
 23  except ImportError: 
 24      import warnings 
 25      warnings.warn("csv module not available (needs python >=2.6)", category=ImportWarning) 
 26      # ... just go ahead and fail later miserably ... 
 27  import numpy 
 28  import re 
 29   
 30  from convert import Autoconverter 
 31   
 32  # from the csv examples: http://docs.python.org/library/csv.html#csv-examples 
 33  import codecs 
 34   
35 -class UTF8Recoder(object):
36 """ 37 Iterator that reads an encoded stream and reencodes the input to UTF-8 38 """
39 - def __init__(self, f, encoding):
40 self.reader = codecs.getreader(encoding)(f)
41
42 - def __iter__(self):
43 return self
44
45 - def next(self):
46 return self.reader.next().encode("utf-8")
47
48 -class UnicodeReader(object):
49 """ 50 A CSV reader which will iterate over lines in the CSV file "f", 51 which is encoded in the given encoding. 52 """ 53
54 - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
55 f = UTF8Recoder(f, encoding) 56 self.reader = csv.reader(f, dialect=dialect, **kwds)
57
58 - def next(self):
59 row = self.reader.next() 60 return [unicode(s, "utf-8") for s in row]
61
62 - def __iter__(self):
63 return self
64 65
66 -def make_python_name(s, default=None, number_prefix='N',encoding="utf-8"):
67 """Returns a unicode string that can be used as a legal python identifier. 68 69 :Arguments: 70 *s* 71 string 72 *default* 73 use *default* if *s* is ``None`` 74 *number_prefix* 75 string to prepend if *s* starts with a number 76 """ 77 if s in ('', None): 78 s = default 79 s = str(s) 80 s = re.sub("[^a-zA-Z0-9_]", "_", s) 81 if not re.match('\d', s) is None: 82 s = number_prefix+s 83 return unicode(s, encoding)
84
85 -class Table2array(object):
86 """Read a csv file and provide conversion to a :class:`numpy.recarray`. 87 88 * Depending on the arguments, autoconversion of values can take 89 place. See :class:`recsql.convert.Autoconverter` for details. 90 91 * Table column headers are always read from the first row of the file. 92 93 * Empty rows are discarded. 94 """
95 - def __init__(self, filename=None, tablename="CSV", encoding="utf-8", **kwargs):
96 """ 97 :Arguments: 98 *filename* 99 CSV file (encoded with *encoding*) 100 *name* 101 name of the table 102 *autoconvert* 103 EXPERIMENTAL. ``True``: replace certain values 104 with special python values (see :class:`convert.Autoconverter`) and possibly 105 split values into lists (see *sep*). 106 ``False``: leave everything as it is (numbers as numbers and strings 107 as strings). 108 *mode* 109 mode of the :class:`~convert.Autoconverter` 110 """ 111 if filename is None: 112 raise TypeError("filename is actually required") 113 self.tablename = tablename 114 self.autoconvert = Autoconverter(**kwargs).convert 115 csvtab = UnicodeReader(open(filename, "rb"), encoding=encoding) 116 self.names = [make_python_name(s,default=n,encoding=encoding) for n,s in enumerate(csvtab.next())] 117 # read the rest after the column headers 118 self.records = [tuple(map(self.autoconvert, line)) for line in csvtab \ 119 if len(line) > 0 and not numpy.all(numpy.array(line) == '')]
120
121 - def recarray(self):
122 """Returns data as :class:`numpy.recarray`.""" 123 return numpy.rec.fromrecords(self.records, names=self.names)
124