1
2
3
4
5
6 """
7 :mod:`gromacs.formats` -- Accessing various files
8 =================================================
9
10 This module contains classes that represent data files on
11 disk. Typically one creates an instance and
12
13 - reads from a file using a :meth:`read` method, or
14
15 - populates the instance (in the simplest case with a :meth:`set`
16 method) and the uses the :meth:`write` method to write the data to
17 disk in the appropriate format.
18
19 For function data there typically also exists a :meth:`plot` method
20 which produces a graph (using matplotlib).
21
22 The module defines some classes that are used in other modules; they
23 do *not* make use of :mod:`gromacs.tools` or :mod:`gromacs.cbook` and
24 can be safely imported at any time.
25
26
27 Classes
28 -------
29
30 .. autoclass:: XVG
31 :members:
32 .. autoclass:: NDX
33 :members:
34 .. autoclass:: uniqueNDX
35 :members:
36 .. autoclass:: GRO
37 :members:
38
39 (Not implemented yet)
40 """
41 from __future__ import with_statement
42
43 __docformat__ = "restructuredtext en"
44
45 import os
46 import re
47 import warnings
48 import errno
49 import operator
50
51 import numpy
52
53 from odict import odict
54
55 import utilities
56 from gromacs import ParseError, AutoCorrectionWarning
57
58 -class XVG(utilities.FileUtils):
59 """Class that represents the numerical data in a grace xvg file.
60
61 All data must be numerical. :const:`NAN` and :const:`INF` values are
62 supported via python's :func:`float` builtin function.
63
64 The :attr:`~XVG.array` attribute can be used to access the the
65 array once it has been read and parsed. The :attr:`~XVG.ma`
66 attribute is a numpy masked array (good for plotting).
67
68 Conceptually, the file on disk and the XVG instance are considered the same
69 data. This means that whenever the filename for I/O (:meth:`XVG.read` and
70 :meth:`XVG.write`) is changed then the filename associated with the
71 instance is also changed to reflect the association between file and
72 instance.
73
74 .. Note:: - Only simple XY or NXY files are currently supported, not
75 Grace files that contain multiple data sets separated by '&'.
76 - Any kind of formatting (xmgrace commands) are discarded.
77 """
78
79 default_extension = "xvg"
80
81 - def __init__(self, filename=None, names=None):
82 """Initialize the class from a xvg file.
83
84 :Arguments:
85 *filename*
86 is the xvg file; it can only be of type XY or
87 NXY. If it is supplied then it is read and parsed
88 when :attr:`XVG.array` is accessed.
89 *names*
90 optional labels for the columns (currently only
91 written as comments to file); string with columns
92 separated by commas or a list of strings
93 """
94 self.__array = None
95 if not filename is None:
96 self._init_filename(filename)
97 if names is None:
98 self.names = []
99 else:
100 try:
101 self.names = names.split(',')
102 except AttributeError:
103 self.names = names
104
105 - def read(self, filename=None):
109
110 - def write(self, filename=None):
111 """Write array to xvg file *filename* in NXY format.
112
113 .. Note:: Only plain files working at the moment, not compressed.
114 """
115 self._init_filename(filename)
116 with utilities.openany(self.real_filename, 'w') as xvg:
117 xvg.write("# xmgrace compatible NXY data file\n"
118 "# Written by gromacs.formats.XVG()\n")
119 xvg.write("# :columns: %r" % self.names)
120 for xyy in self.array.T:
121 xyy.tofile(xvg, sep=" ", format="%-8s")
122 xvg.write('\n')
123
124 @property
126 """Represent xvg data as a (cached) numpy array.
127
128 The array is returned with column-first indexing, i.e. for a data file with
129 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
130 """
131 if self.__array is None:
132 self.parse()
133 return self.__array
134
135 @property
137 """Represent data as a masked array.
138
139 The array is returned with column-first indexing, i.e. for a data file with
140 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
141
142 inf and nan are filtered via :func:`numpy.isfinite`.
143 """
144 a = self.array
145 return numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
146
147 @property
149 """Mean value of all data columns."""
150 return self.array[1:].mean(axis=1)
151
152 @property
154 """Standard deviation from the mean of all data columns."""
155 return self.array[1:].std(axis=1)
156
157 @property
159 """Minimum of the data columns."""
160 return self.array[1:].min(axis=1)
161
162 @property
164 """Maximum of the data columns."""
165 return self.array[1:].max(axis=1)
166
168 """Read and cache the file as a numpy array.
169
170 The array is returned with column-first indexing, i.e. for a data file with
171 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... .
172 """
173
174 with utilities.openany(self.real_filename) as xvg:
175 rows = []
176 for line in xvg:
177 line = line.strip()
178 if line.startswith(('#', '@')) or len(line) == 0:
179 continue
180 if line.startswith('&'):
181 raise NotImplementedError('Sorry only simple NXY format is supported.')
182 rows.append(map(float, line.split()))
183 self.__array = numpy.array(rows).transpose()
184
186 """Set the *array* data from *a* (i.e. completely replace).
187
188 No sanity checks at the moment...
189 """
190 self.__array = numpy.asarray(a)
191
192 - def plot(self, **kwargs):
193 """Plot xvg file data.
194
195 The first column of the data is always taken as the abscissa
196 X. Additional columns are plotted as ordinates Y1, Y2, ...
197
198 In the special case that there is only a single column then this column
199 is plotted against the index, i.e. (N, Y).
200
201 :Keywords:
202 *columns* : list
203 Select the columns of the data to be plotted; the list
204 is used as a numpy.array extended slice. The default is
205 to use all columns. Columns are selected *after* a transform.
206 *transform* : function
207 function ``transform(array) -> array`` which transforms
208 the original array; must return a 2D numpy array of
209 shape [X, Y1, Y2, ...] where X, Y1, ... are column
210 vectors. By default the transformation is the
211 identity [``lambda x: x``].
212 *maxpoints* : int
213 limit the total number of data points; matplotlib has issues processing
214 png files with >100,000 points and pdfs take forever to display. Set to
215 ``None`` if really all data should be displayed. At the moment we simply
216 subsample the data at regular intervals. [10000]
217 *kwargs*
218 All other keyword arguments are passed on to :func:`pylab.plot`.
219 """
220 import pylab
221
222 maxpoints_default = 10000
223 columns = kwargs.pop('columns', Ellipsis)
224 maxpoints = kwargs.pop('maxpoints', maxpoints_default)
225 transform = kwargs.pop('transform', lambda x: x)
226 a = numpy.asarray(transform(self.array))[columns]
227
228 ny = a.shape[-1]
229 if not maxpoints is None and ny > maxpoints:
230
231
232 stepsize = int(ny / maxpoints)
233 a = a[..., ::stepsize]
234 if maxpoints == maxpoints_default:
235 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points."
236 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning)
237
238 if len(a.shape) == 1:
239
240
241 X = numpy.arange(len(a))
242 a = numpy.concatenate([[X], [a]])
243
244
245 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
246
247
248 kwargs['xdata'] = ma[0]
249 pylab.plot(ma[1:].T, **kwargs)
250
252 """Quick hack: errorbar plot.
253
254 Set columns to select [x, y, dy].
255 """
256 import pylab
257
258 kwargs.setdefault('capsize', 0)
259 kwargs.setdefault('elinewidth', 1)
260 kwargs.setdefault('alpha', 0.3)
261 kwargs.setdefault('fmt', None)
262
263 maxpoints_default = 10000
264 columns = kwargs.pop('columns', Ellipsis)
265 maxpoints = kwargs.pop('maxpoints', maxpoints_default)
266 transform = kwargs.pop('transform', lambda x: x)
267 a = numpy.asarray(transform(self.array))[columns]
268
269 ny = a.shape[-1]
270 if not maxpoints is None and ny > maxpoints:
271
272
273 stepsize = int(ny / maxpoints)
274 a = a[..., ::stepsize]
275 if maxpoints == maxpoints_default:
276 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points."
277 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning)
278
279 if len(a.shape) == 1:
280
281
282 X = numpy.arange(len(a))
283 a = numpy.concatenate([[X], [a]])
284
285
286 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
287
288
289 X = ma[0]
290 Y = ma[1]
291 try:
292 kwargs['yerr'] = ma[3]
293 kwargs['xerr'] = ma[2]
294 except IndexError:
295 kwargs['yerr'] = ma[2]
296
297 pylab.errorbar(X, Y, **kwargs)
298
299
300 -class NDX(odict, utilities.FileUtils):
301 """Gromacs index file.
302
303 Represented as a ordered dict where the keys are index group names and
304 values are numpy arrays of atom numbers.
305
306 Use the :meth:`NDX.read` and :meth:`NDX.write` methods for
307 I/O. Access groups by name via the :meth:`NDX.get` and
308 :meth:`NDX.set` methods.
309
310 Alternatively, simply treat the :class:`NDX` instance as a
311 dictionary. Setting a key automatically transforms the new value
312 into a integer 1D numpy array (*not* a set, as would be the
313 :program:`make_ndx` behaviour).
314
315 .. Note:: The index entries themselves are ordered and can contain
316 duplicates so that output from NDX can be easily used for
317 :program:`g_dih` and friends. If you need set-like behaviour
318 you will have do use :class:`gromacs.formats.uniqueNDX` or
319 :class:`gromacs.cbook.IndexBuilder` (which uses
320 :program:`make_ndx` throughout).
321
322 **Example**
323
324 Read index file, make new group and write to disk::
325
326 ndx = NDX()
327 ndx.read('system.ndx')
328 print ndx['Protein']
329 ndx['my_group'] = [2, 4, 1, 5] # add new group
330 ndx.write('new.ndx')
331
332 Or quicker (replacing the input file ``system.ndx``)::
333
334 ndx = NDX('system') # suffix .ndx is automatically added
335 ndx['chi1'] = [2, 7, 8, 10]
336 ndx.write()
337
338 """
339 default_extension = "ndx"
340
341
342 SECTION = re.compile("""\s*\[\s*(?P<name>\S.*\S)\s*\]\s*""")
343
344
345 ncol = 15
346
347 format = '%6d'
348
349 - def __init__(self, filename=None, **kwargs):
355
356 - def read(self, filename=None):
357 """Read and parse index file *filename*."""
358 self._init_filename(filename)
359
360 data = odict()
361 with open(self.real_filename) as ndx:
362 current_section = None
363 for line in ndx:
364 line = line.strip()
365 if len(line) == 0:
366 continue
367 m = self.SECTION.match(line)
368 if m:
369 current_section = m.group('name')
370 data[current_section] = []
371 continue
372 if not current_section is None:
373 data[current_section].extend(map(int, line.split()))
374
375 super(NDX,self).update(odict([(name, self._transform(atomnumbers))
376 for name, atomnumbers in data.items()]))
377
379 """Write index file to *filename* (or overwrite the file that the index was read from)"""
380 with open(self.filename(filename, ext='ndx'), 'w') as ndx:
381 for name in self:
382 atomnumbers = self._getarray(name)
383 ndx.write('[ %s ]\n' % name)
384 for k in xrange(0, len(atomnumbers), ncol):
385 line = atomnumbers[k:k+ncol].astype(int)
386 n = len(line)
387 ndx.write((" ".join(n*[format])+'\n') % tuple(line))
388 ndx.write('\n')
389
390 - def get(self, name):
391 """Return index array for index group *name*."""
392 return self[name]
393
394 - def set(self, name, value):
395 """Set or add group *name* as a 1D numpy array."""
396 self[name] = value
397
398 - def size(self, name):
399 """Return number of entries for group *name*."""
400 return len(self[name])
401
402 @property
404 """Return a list of all groups."""
405 return self.keys()
406
407 @property
409 """Return a dict with group names and number of entries,"""
410 return dict([(name, len(atomnumbers)) for name, atomnumbers in self.items()])
411
412 @property
414 """Return a list of groups in the same format as :func:`gromacs.cbook.get_ndx_groups`.
415
416 Format:
417 [ {'name': group_name, 'natoms': number_atoms, 'nr': # group_number}, ....]
418 """
419 return [{'name': name, 'natoms': len(atomnumbers), 'nr': nr+1} for
420 nr,(name,atomnumbers) in enumerate(self.items())]
421
423 """Helper getter that is used in write().
424 Override when using a _transform that stores something that
425 cannot be indexed, e.g. when using set()s.
426 """
427 return self[name]
428
435
438
440 raise NotImplementedError
441
444 """set which defines '+' as union (OR) and '-' as intersection (AND)."""
448 return self.intersection(x)
449
452 """Index that behaves like make_ndx, i.e. entries behaves as sets,
453 not lists.
454
455 The index lists behave like sets:
456 - adding sets with '+' is equivalent to a logical OR: x + y == "x | y"
457 - subtraction '-' is AND: x - y == "x & y"
458 - see :meth:`~gromacs.formats.join` for ORing multiple groups (x+y+z+...)
459
460 **Example** ::
461 I = uniqueNDX('system.ndx')
462 I['SOLVENT'] = I['SOL'] + I['NA+'] + I['CL-']
463 """
464
465 - def join(self, *groupnames):
466 """Return an index group that contains atoms from all *groupnames*.
467
468 The method will silently ignore any groups that are not in the
469 index.
470
471 **Example**
472
473 Always make a solvent group from water and ions, even if not
474 all ions are present in all simulations::
475
476 I['SOLVENT'] = I.join('SOL', 'NA+', 'K+', 'CL-')
477 """
478 return self._sum([self[k] for k in groupnames if k in self])
479
480 - def _sum(self, sequence):
481 return reduce(operator.add, sequence)
482
485
487 return numpy.sort(numpy.fromiter(self[k],dtype=int,count=len(self[k])))
488
489
490
491
492
493
494
495
496
497
498
499
500 -class GRO(utilities.FileUtils):
501 """Class that represents a GROMOS (gro) structure file.
502
503
504 File format:
505 """
506 default_extension = "gro"
507
518
519 - def read(self, filename=None):
520 """Read and parse index file *filename*."""
521 self._init_filename(filename)
522
523 with open(self.real_filename) as gro:
524 pass
525
526
527
528 -class MDP(odict, utilities.FileUtils):
529 """Class that represents a Gromacs mdp run input file.
530
531 The MDP instance is an ordered dictionary.
532
533 - *Parameter names* are keys in the dictionary.
534 - *Comments* are sequentially numbered with keys Comment0001,
535 Comment0002, ...
536 - *Empty lines* are similarly preserved as Blank0001, ....
537
538 When writing, the dictionary is dumped in the recorded order to a
539 file. Inserting keys at a specific position is not possible.
540
541 Currently, comments after a parameter on the same line are
542 discarded. Leading and trailing spaces are always stripped.
543
544 .. SeeAlso:: For editing a mdp file one can also use
545 :func:`gromacs.cbook.edit_mdp` (which works like a
546 poor replacement for sed).
547 """
548 default_extension = "mdp"
549
550
551 COMMENT = re.compile("""\s*;\s*(?P<value>.*)""")
552
553 PARAMETER = re.compile("""
554 \s*(?P<parameter>[^=]+?)\s*=\s* # parameter (ws-stripped), before '='
555 (?P<value>[^;]*) # value (stop before comment=;)
556 (?P<comment>\s*;.*)? # optional comment
557 """, re.VERBOSE)
558
559 - def __init__(self, filename=None, autoconvert=True, **kwargs):
560 """Initialize mdp structure.
561
562 :Arguments:
563 *filename*
564 read from mdp file
565 *autoconvert* : boolean
566 ``True`` converts numerical values to python numerical types;
567 ``False`` keeps everything as strings [``True``]
568 *kwargs*
569 Populate the MDP with key=value pairs. (NO SANITY CHECKS; and also
570 does not work for keys that are not legal python variable names such
571 as anything that includes a minus '-' sign or starts with a number).
572 """
573 super(MDP, self).__init__(**kwargs)
574
575 self.autoconvert = autoconvert
576
577 if not filename is None:
578 self._init_filename(filename)
579 self.read(filename)
580
586
587 - def read(self, filename=None):
588 """Read and parse mdp file *filename*."""
589 self._init_filename(filename)
590
591 def BLANK(i):
592 return "B%04d" % i
593 def COMMENT(i):
594 return "C%04d" % i
595
596 data = odict()
597 iblank = icomment = 0
598 with open(self.real_filename) as mdp:
599 for line in mdp:
600 line = line.strip()
601 if len(line) == 0:
602 iblank += 1
603 data[BLANK(iblank)] = ''
604 continue
605 m = self.COMMENT.match(line)
606 if m:
607 icomment += 1
608 data[COMMENT(icomment)] = m.group('value')
609 continue
610
611 m = self.PARAMETER.match(line)
612 if m:
613
614 parameter = m.group('parameter')
615 value = self._transform(m.group('value'))
616 data[parameter] = value
617 else:
618 raise ParseError('unknown line in mdp file %(filename)r: %(line)r' % vars())
619
620 super(MDP,self).update(data)
621
622
623 - def write(self, filename=None, skipempty=False):
624 """Write mdp file to *filename*.
625
626 :Keywords:
627 *filename*
628 output mdp file; default is the filename the mdp
629 was read from
630 *skipempty* : boolean
631 ``True`` removes any parameter lines from output that
632 contain empty values [``False``]
633
634 .. Note:: Overwrites the file that the mdp was read from if no
635 *filename* supplied.
636 """
637
638 with open(self.filename(filename, ext='mdp'), 'w') as mdp:
639 for k,v in self.items():
640 if k[0] == 'B':
641 mdp.write("\n")
642 elif k[0] == 'C':
643 mdp.write("; %(v)s\n" % vars())
644 else:
645 if skipempty and (v == '' or v is None):
646 continue
647 mdp.write("%(k)s = %(v)s\n" % vars())
648
651 """Convert input to a numerical type if possible.
652
653 1. A non-string object is returned as it is
654 2. Try conversion to int, float, str.
655 """
656 if not type(s) is str:
657 return s
658 for converter in int, float, str:
659 try:
660 return converter(s)
661 except ValueError:
662 pass
663 raise ValueError("Failed to autoconvert %r" % s)
664