1
2
3
4
5
6 """
7 :mod:`gromacs.formats` -- Accessing various files
8 =================================================
9
10 This module contains classes that represent data files on
11 disk. Typically one creates an instance and
12
13 - reads from a file using a :meth:`read` method, or
14
15 - populates the instance (in the simplest case with a :meth:`set`
16 method) and the uses the :meth:`write` method to write the data to
17 disk in the appropriate format.
18
19 For function data there typically also exists a :meth:`plot` method
20 which produces a graph (using matplotlib).
21
22 The module defines some classes that are used in other modules; they
23 do *not* make use of :mod:`gromacs.tools` or :mod:`gromacs.cbook` and
24 can be safely imported at any time.
25
26
27 Classes
28 -------
29
30 .. autoclass:: XVG
31 :members:
32 .. autoclass:: NDX
33 :members:
34 .. autoclass:: uniqueNDX
35 :members:
36 .. autoclass:: GRO
37 :members:
38
39 (Not implemented yet)
40 """
41 from __future__ import with_statement
42
43 __docformat__ = "restructuredtext en"
44
45 import os
46 import re
47 import warnings
48 import errno
49 import operator
50
51 import numpy
52
53 from odict import odict
54
55 import utilities
56 from gromacs import ParseError, AutoCorrectionWarning
57
58 import logging
59
60 -class XVG(utilities.FileUtils):
61 """Class that represents the numerical data in a grace xvg file.
62
63 All data must be numerical. :const:`NAN` and :const:`INF` values are
64 supported via python's :func:`float` builtin function.
65
66 The :attr:`~XVG.array` attribute can be used to access the the
67 array once it has been read and parsed. The :attr:`~XVG.ma`
68 attribute is a numpy masked array (good for plotting).
69
70 Conceptually, the file on disk and the XVG instance are considered the same
71 data. Whenever the filename for I/O (:meth:`XVG.read` and :meth:`XVG.write`) is
72 changed then the filename associated with the instance is also changed to reflect
73 the association between file and instance.
74
75 With the *permissive* = ``True`` flag one can instruct the file reader to skip
76 unparseable lines. In this case the line numbers of the skipped lines are stored
77 in :attr:`XVG.corrupted_lineno`.
78
79 .. Note:: - Only simple XY or NXY files are currently supported, not
80 Grace files that contain multiple data sets separated by '&'.
81 - Any kind of formatting (xmgrace commands) are discarded.
82 """
83
84 default_extension = "xvg"
85 logger = logging.getLogger('gromacs.formats.XVG')
86
87 - def __init__(self, filename=None, names=None, permissive=False):
88 """Initialize the class from a xvg file.
89
90 :Arguments:
91 *filename*
92 is the xvg file; it can only be of type XY or
93 NXY. If it is supplied then it is read and parsed
94 when :attr:`XVG.array` is accessed.
95 *names*
96 optional labels for the columns (currently only
97 written as comments to file); string with columns
98 separated by commas or a list of strings
99 *permissive*
100 ``False`` raises a :exc:`ValueError` and logs and errior
101 when encountering data lines that it cannot parse.
102 ``True`` ignores those lines and logs a warning---this is
103 a risk because it might read a corrupted input file [``False``]
104
105 """
106 self.__array = None
107 if not filename is None:
108 self._init_filename(filename)
109 if names is None:
110 self.names = []
111 else:
112 try:
113 self.names = names.split(',')
114 except AttributeError:
115 self.names = names
116 self.permissive = permissive
117 self.corrupted_lineno = None
118
119 - def read(self, filename=None):
123
124 - def write(self, filename=None):
125 """Write array to xvg file *filename* in NXY format.
126
127 .. Note:: Only plain files working at the moment, not compressed.
128 """
129 self._init_filename(filename)
130 with utilities.openany(self.real_filename, 'w') as xvg:
131 xvg.write("# xmgrace compatible NXY data file\n"
132 "# Written by gromacs.formats.XVG()\n")
133 xvg.write("# :columns: %r" % self.names)
134 for xyy in self.array.T:
135 xyy.tofile(xvg, sep=" ", format="%-8s")
136 xvg.write('\n')
137
138 @property
140 """Represent xvg data as a (cached) numpy array.
141
142 The array is returned with column-first indexing, i.e. for a data file with
143 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
144 """
145 if self.__array is None:
146 self.parse()
147 return self.__array
148
149 @property
151 """Represent data as a masked array.
152
153 The array is returned with column-first indexing, i.e. for a data file with
154 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
155
156 inf and nan are filtered via :func:`numpy.isfinite`.
157 """
158 a = self.array
159 return numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
160
161 @property
163 """Mean value of all data columns."""
164 return self.array[1:].mean(axis=1)
165
166 @property
168 """Standard deviation from the mean of all data columns."""
169 return self.array[1:].std(axis=1)
170
171 @property
173 """Minimum of the data columns."""
174 return self.array[1:].min(axis=1)
175
176 @property
178 """Maximum of the data columns."""
179 return self.array[1:].max(axis=1)
180
182 """Read and cache the file as a numpy array.
183
184 The array is returned with column-first indexing, i.e. for a data file with
185 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
186 """
187 self.corrupted_lineno = []
188
189 with utilities.openany(self.real_filename) as xvg:
190 rows = []
191 ncol = None
192 for lineno,line in enumerate(xvg):
193 line = line.strip()
194 if line.startswith(('#', '@')) or len(line) == 0:
195 continue
196 if line.startswith('&'):
197 raise NotImplementedError('%s: Multi-data not supported, only simple NXY format.'
198 % self.real_filename)
199
200 try:
201 row = map(float, line.split())
202 except:
203 if self.permissive:
204 self.logger.warn("%s: SKIPPING unparsable line %d: %r",
205 self.real_filename, lineno+1, line)
206 self.corrupted_lineno.append(lineno+1)
207 continue
208 self.logger.error("%s: Cannot parse line %d: %r",
209 self.real_filename, lineno+1, line)
210 raise
211
212 if not ncol is None and len(row) != ncol:
213 if self.permissive:
214 self.logger.warn("%s: SKIPPING line %d with wrong number of columns: %r",
215 self.real_filename, lineno+1, line)
216 self.corrupted_lineno.append(lineno+1)
217 continue
218 errmsg = "%s: Wrong number of columns in line %d: %r" % (self.real_filename, lineno+1, line)
219 self.logger.error(errmsg)
220 raise IOError(errno.ENODATA, errmsg, self.real_filename)
221
222 ncol = len(row)
223 rows.append(row)
224 try:
225 self.__array = numpy.array(rows).transpose()
226 except:
227 self.logger.error("%s: Failed reading XVG file, possibly data corrupted. "
228 "Check the last line of the file...", self.real_filename)
229 raise
230
232 """Set the *array* data from *a* (i.e. completely replace).
233
234 No sanity checks at the moment...
235 """
236 self.__array = numpy.asarray(a)
237
238 - def plot(self, **kwargs):
239 """Plot xvg file data.
240
241 The first column of the data is always taken as the abscissa
242 X. Additional columns are plotted as ordinates Y1, Y2, ...
243
244 In the special case that there is only a single column then this column
245 is plotted against the index, i.e. (N, Y).
246
247 :Keywords:
248 *columns* : list
249 Select the columns of the data to be plotted; the list
250 is used as a numpy.array extended slice. The default is
251 to use all columns. Columns are selected *after* a transform.
252 *transform* : function
253 function ``transform(array) -> array`` which transforms
254 the original array; must return a 2D numpy array of
255 shape [X, Y1, Y2, ...] where X, Y1, ... are column
256 vectors. By default the transformation is the
257 identity [``lambda x: x``].
258 *maxpoints* : int
259 limit the total number of data points; matplotlib has issues processing
260 png files with >100,000 points and pdfs take forever to display. Set to
261 ``None`` if really all data should be displayed. At the moment we simply
262 subsample the data at regular intervals. [10000]
263 *kwargs*
264 All other keyword arguments are passed on to :func:`pylab.plot`.
265 """
266 import pylab
267
268 maxpoints_default = 10000
269 columns = kwargs.pop('columns', Ellipsis)
270 maxpoints = kwargs.pop('maxpoints', maxpoints_default)
271 transform = kwargs.pop('transform', lambda x: x)
272 a = numpy.asarray(transform(self.array))[columns]
273
274 ny = a.shape[-1]
275 if not maxpoints is None and ny > maxpoints:
276
277
278 stepsize = int(ny / maxpoints)
279 a = a[..., ::stepsize]
280 if maxpoints == maxpoints_default:
281 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points."
282 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning)
283
284 if len(a.shape) == 1:
285
286
287 X = numpy.arange(len(a))
288 a = numpy.concatenate([[X], [a]])
289
290
291 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
292
293
294 kwargs['xdata'] = ma[0]
295 pylab.plot(ma[1:].T, **kwargs)
296
298 """Quick hack: errorbar plot.
299
300 Set columns to select [x, y, dy].
301 """
302 import pylab
303
304 kwargs.setdefault('capsize', 0)
305 kwargs.setdefault('elinewidth', 1)
306 kwargs.setdefault('alpha', 0.3)
307 kwargs.setdefault('fmt', None)
308
309 maxpoints_default = 10000
310 columns = kwargs.pop('columns', Ellipsis)
311 maxpoints = kwargs.pop('maxpoints', maxpoints_default)
312 transform = kwargs.pop('transform', lambda x: x)
313 a = numpy.asarray(transform(self.array))[columns]
314
315 ny = a.shape[-1]
316 if not maxpoints is None and ny > maxpoints:
317
318
319 stepsize = int(ny / maxpoints)
320 a = a[..., ::stepsize]
321 if maxpoints == maxpoints_default:
322 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points."
323 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning)
324
325 if len(a.shape) == 1:
326
327
328 X = numpy.arange(len(a))
329 a = numpy.concatenate([[X], [a]])
330
331
332 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
333
334
335 X = ma[0]
336 Y = ma[1]
337 try:
338 kwargs['yerr'] = ma[3]
339 kwargs['xerr'] = ma[2]
340 except IndexError:
341 kwargs['yerr'] = ma[2]
342
343 pylab.errorbar(X, Y, **kwargs)
344
345
346 -class NDX(odict, utilities.FileUtils):
347 """Gromacs index file.
348
349 Represented as a ordered dict where the keys are index group names and
350 values are numpy arrays of atom numbers.
351
352 Use the :meth:`NDX.read` and :meth:`NDX.write` methods for
353 I/O. Access groups by name via the :meth:`NDX.get` and
354 :meth:`NDX.set` methods.
355
356 Alternatively, simply treat the :class:`NDX` instance as a
357 dictionary. Setting a key automatically transforms the new value
358 into a integer 1D numpy array (*not* a set, as would be the
359 :program:`make_ndx` behaviour).
360
361 .. Note:: The index entries themselves are ordered and can contain
362 duplicates so that output from NDX can be easily used for
363 :program:`g_dih` and friends. If you need set-like behaviour
364 you will have do use :class:`gromacs.formats.uniqueNDX` or
365 :class:`gromacs.cbook.IndexBuilder` (which uses
366 :program:`make_ndx` throughout).
367
368 **Example**
369
370 Read index file, make new group and write to disk::
371
372 ndx = NDX()
373 ndx.read('system.ndx')
374 print ndx['Protein']
375 ndx['my_group'] = [2, 4, 1, 5] # add new group
376 ndx.write('new.ndx')
377
378 Or quicker (replacing the input file ``system.ndx``)::
379
380 ndx = NDX('system') # suffix .ndx is automatically added
381 ndx['chi1'] = [2, 7, 8, 10]
382 ndx.write()
383
384 """
385 default_extension = "ndx"
386
387
388 SECTION = re.compile("""\s*\[\s*(?P<name>\S.*\S)\s*\]\s*""")
389
390
391 ncol = 15
392
393 format = '%6d'
394
395 - def __init__(self, filename=None, **kwargs):
401
402 - def read(self, filename=None):
403 """Read and parse index file *filename*."""
404 self._init_filename(filename)
405
406 data = odict()
407 with open(self.real_filename) as ndx:
408 current_section = None
409 for line in ndx:
410 line = line.strip()
411 if len(line) == 0:
412 continue
413 m = self.SECTION.match(line)
414 if m:
415 current_section = m.group('name')
416 data[current_section] = []
417 continue
418 if not current_section is None:
419 data[current_section].extend(map(int, line.split()))
420
421 super(NDX,self).update(odict([(name, self._transform(atomnumbers))
422 for name, atomnumbers in data.items()]))
423
425 """Write index file to *filename* (or overwrite the file that the index was read from)"""
426 with open(self.filename(filename, ext='ndx'), 'w') as ndx:
427 for name in self:
428 atomnumbers = self._getarray(name)
429 ndx.write('[ %s ]\n' % name)
430 for k in xrange(0, len(atomnumbers), ncol):
431 line = atomnumbers[k:k+ncol].astype(int)
432 n = len(line)
433 ndx.write((" ".join(n*[format])+'\n') % tuple(line))
434 ndx.write('\n')
435
436 - def get(self, name):
437 """Return index array for index group *name*."""
438 return self[name]
439
440 - def set(self, name, value):
441 """Set or add group *name* as a 1D numpy array."""
442 self[name] = value
443
444 - def size(self, name):
445 """Return number of entries for group *name*."""
446 return len(self[name])
447
448 @property
450 """Return a list of all groups."""
451 return self.keys()
452
453 @property
455 """Return a dict with group names and number of entries,"""
456 return dict([(name, len(atomnumbers)) for name, atomnumbers in self.items()])
457
458 @property
460 """Return a list of groups in the same format as :func:`gromacs.cbook.get_ndx_groups`.
461
462 Format:
463 [ {'name': group_name, 'natoms': number_atoms, 'nr': # group_number}, ....]
464 """
465 return [{'name': name, 'natoms': len(atomnumbers), 'nr': nr+1} for
466 nr,(name,atomnumbers) in enumerate(self.items())]
467
469 """Helper getter that is used in write().
470 Override when using a _transform that stores something that
471 cannot be indexed, e.g. when using set()s.
472 """
473 return self[name]
474
481
484
486 raise NotImplementedError
487
490 """set which defines '+' as union (OR) and '-' as intersection (AND)."""
494 return self.intersection(x)
495
498 """Index that behaves like make_ndx, i.e. entries behaves as sets,
499 not lists.
500
501 The index lists behave like sets:
502 - adding sets with '+' is equivalent to a logical OR: x + y == "x | y"
503 - subtraction '-' is AND: x - y == "x & y"
504 - see :meth:`~gromacs.formats.join` for ORing multiple groups (x+y+z+...)
505
506 **Example** ::
507 I = uniqueNDX('system.ndx')
508 I['SOLVENT'] = I['SOL'] + I['NA+'] + I['CL-']
509 """
510
511 - def join(self, *groupnames):
512 """Return an index group that contains atoms from all *groupnames*.
513
514 The method will silently ignore any groups that are not in the
515 index.
516
517 **Example**
518
519 Always make a solvent group from water and ions, even if not
520 all ions are present in all simulations::
521
522 I['SOLVENT'] = I.join('SOL', 'NA+', 'K+', 'CL-')
523 """
524 return self._sum([self[k] for k in groupnames if k in self])
525
526 - def _sum(self, sequence):
527 return reduce(operator.add, sequence)
528
531
533 return numpy.sort(numpy.fromiter(self[k],dtype=int,count=len(self[k])))
534
535
536
537
538
539
540
541
542
543
544
545
546 -class GRO(utilities.FileUtils):
547 """Class that represents a GROMOS (gro) structure file.
548
549
550 File format:
551 """
552 default_extension = "gro"
553 logger = logging.getLogger('gromacs.formats.GRO')
554
565
566 - def read(self, filename=None):
567 """Read and parse index file *filename*."""
568 self._init_filename(filename)
569
570 with open(self.real_filename) as gro:
571 pass
572
573
574
575 -class MDP(odict, utilities.FileUtils):
576 """Class that represents a Gromacs mdp run input file.
577
578 The MDP instance is an ordered dictionary.
579
580 - *Parameter names* are keys in the dictionary.
581 - *Comments* are sequentially numbered with keys Comment0001,
582 Comment0002, ...
583 - *Empty lines* are similarly preserved as Blank0001, ....
584
585 When writing, the dictionary is dumped in the recorded order to a
586 file. Inserting keys at a specific position is not possible.
587
588 Currently, comments after a parameter on the same line are
589 discarded. Leading and trailing spaces are always stripped.
590
591 .. SeeAlso:: For editing a mdp file one can also use
592 :func:`gromacs.cbook.edit_mdp` (which works like a
593 poor replacement for sed).
594 """
595 default_extension = "mdp"
596 logger = logging.getLogger('gromacs.formats.MDP')
597
598 COMMENT = re.compile("""\s*;\s*(?P<value>.*)""")
599
600 PARAMETER = re.compile("""
601 \s*(?P<parameter>[^=]+?)\s*=\s* # parameter (ws-stripped), before '='
602 (?P<value>[^;]*) # value (stop before comment=;)
603 (?P<comment>\s*;.*)? # optional comment
604 """, re.VERBOSE)
605
606 - def __init__(self, filename=None, autoconvert=True, **kwargs):
607 """Initialize mdp structure.
608
609 :Arguments:
610 *filename*
611 read from mdp file
612 *autoconvert* : boolean
613 ``True`` converts numerical values to python numerical types;
614 ``False`` keeps everything as strings [``True``]
615 *kwargs*
616 Populate the MDP with key=value pairs. (NO SANITY CHECKS; and also
617 does not work for keys that are not legal python variable names such
618 as anything that includes a minus '-' sign or starts with a number).
619 """
620 super(MDP, self).__init__(**kwargs)
621
622 self.autoconvert = autoconvert
623
624 if not filename is None:
625 self._init_filename(filename)
626 self.read(filename)
627
633
634 - def read(self, filename=None):
635 """Read and parse mdp file *filename*."""
636 self._init_filename(filename)
637
638 def BLANK(i):
639 return "B%04d" % i
640 def COMMENT(i):
641 return "C%04d" % i
642
643 data = odict()
644 iblank = icomment = 0
645 with open(self.real_filename) as mdp:
646 for line in mdp:
647 line = line.strip()
648 if len(line) == 0:
649 iblank += 1
650 data[BLANK(iblank)] = ''
651 continue
652 m = self.COMMENT.match(line)
653 if m:
654 icomment += 1
655 data[COMMENT(icomment)] = m.group('value')
656 continue
657
658 m = self.PARAMETER.match(line)
659 if m:
660
661 parameter = m.group('parameter')
662 value = self._transform(m.group('value'))
663 data[parameter] = value
664 else:
665 errmsg = '%(filename)r: unknown line in mdp file, %(line)r' % vars()
666 self.logger.error(errmsg)
667 raise ParseError(errmsg)
668
669 super(MDP,self).update(data)
670
671
672 - def write(self, filename=None, skipempty=False):
673 """Write mdp file to *filename*.
674
675 :Keywords:
676 *filename*
677 output mdp file; default is the filename the mdp
678 was read from
679 *skipempty* : boolean
680 ``True`` removes any parameter lines from output that
681 contain empty values [``False``]
682
683 .. Note:: Overwrites the file that the mdp was read from if no
684 *filename* supplied.
685 """
686
687 with open(self.filename(filename, ext='mdp'), 'w') as mdp:
688 for k,v in self.items():
689 if k[0] == 'B':
690 mdp.write("\n")
691 elif k[0] == 'C':
692 mdp.write("; %(v)s\n" % vars())
693 else:
694 if skipempty and (v == '' or v is None):
695 continue
696 mdp.write("%(k)s = %(v)s\n" % vars())
697
700 """Convert input to a numerical type if possible.
701
702 1. A non-string object is returned as it is
703 2. Try conversion to int, float, str.
704 """
705 if not type(s) is str:
706 return s
707 for converter in int, float, str:
708 try:
709 return converter(s)
710 except ValueError:
711 pass
712 raise ValueError("Failed to autoconvert %r" % s)
713