Package gromacs :: Module formats
[hide private]
[frames] | no frames]

Source Code for Module gromacs.formats

  1  # GromacsWrapper: formats.py 
  2  # Copyright (c) 2009-2010 Oliver Beckstein <orbeckst@gmail.com> 
  3  # Released under the GNU Public License 3 (or higher, your choice) 
  4  # See the file COPYING for details. 
  5   
  6  """ 
  7  :mod:`gromacs.formats` -- Accessing various files 
  8  ================================================= 
  9   
 10  This module contains classes that represent data files on 
 11  disk. Typically one creates an instance and 
 12   
 13  - reads from a file using a :meth:`read` method, or 
 14   
 15  - populates the instance (in the simplest case with a :meth:`set` 
 16    method) and the uses the :meth:`write` method to write the data to 
 17    disk in the appropriate format. 
 18   
 19  For function data there typically also exists a :meth:`plot` method 
 20  which produces a graph (using matplotlib). 
 21   
 22  The module defines some classes that are used in other modules; they 
 23  do *not* make use of :mod:`gromacs.tools` or :mod:`gromacs.cbook` and 
 24  can be safely imported at any time. 
 25   
 26   
 27  Classes 
 28  ------- 
 29   
 30  .. autoclass:: XVG 
 31     :members: 
 32  .. autoclass:: NDX 
 33     :members: 
 34  .. autoclass:: uniqueNDX 
 35     :members: 
 36  .. autoclass:: GRO 
 37     :members: 
 38   
 39     (Not implemented yet) 
 40  """ 
 41  from __future__ import with_statement 
 42   
 43  __docformat__ = "restructuredtext en" 
 44   
 45  import os 
 46  import re 
 47  import warnings 
 48  import errno 
 49  import operator 
 50   
 51  import numpy 
 52   
 53  from odict import odict 
 54   
 55  import utilities 
 56  from gromacs import ParseError, AutoCorrectionWarning 
57 58 -class XVG(utilities.FileUtils):
59 """Class that represents the numerical data in a grace xvg file. 60 61 All data must be numerical. :const:`NAN` and :const:`INF` values are 62 supported via python's :func:`float` builtin function. 63 64 The :attr:`~XVG.array` attribute can be used to access the the 65 array once it has been read and parsed. The :attr:`~XVG.ma` 66 attribute is a numpy masked array (good for plotting). 67 68 Conceptually, the file on disk and the XVG instance are considered the same 69 data. This means that whenever the filename for I/O (:meth:`XVG.read` and 70 :meth:`XVG.write`) is changed then the filename associated with the 71 instance is also changed to reflect the association between file and 72 instance. 73 74 .. Note:: - Only simple XY or NXY files are currently supported, not 75 Grace files that contain multiple data sets separated by '&'. 76 - Any kind of formatting (xmgrace commands) are discarded. 77 """ 78 79 default_extension = "xvg" 80
81 - def __init__(self, filename=None, names=None):
82 """Initialize the class from a xvg file. 83 84 :Arguments: 85 *filename* 86 is the xvg file; it can only be of type XY or 87 NXY. If it is supplied then it is read and parsed 88 when :attr:`XVG.array` is accessed. 89 *names* 90 optional labels for the columns (currently only 91 written as comments to file); string with columns 92 separated by commas or a list of strings 93 """ 94 self.__array = None # cache for array property 95 if not filename is None: 96 self._init_filename(filename) # reading from file is delayed until required 97 if names is None: 98 self.names = [] 99 else: 100 try: 101 self.names = names.split(',') 102 except AttributeError: 103 self.names = names
104
105 - def read(self, filename=None):
106 """Read and parse xvg file *filename*.""" 107 self._init_filename(filename) 108 self.parse()
109
110 - def write(self, filename=None):
111 """Write array to xvg file *filename* in NXY format. 112 113 .. Note:: Only plain files working at the moment, not compressed. 114 """ 115 self._init_filename(filename) 116 with utilities.openany(self.real_filename, 'w') as xvg: 117 xvg.write("# xmgrace compatible NXY data file\n" 118 "# Written by gromacs.formats.XVG()\n") 119 xvg.write("# :columns: %r" % self.names) 120 for xyy in self.array.T: 121 xyy.tofile(xvg, sep=" ", format="%-8s") # quick and dirty ascii output...--no compression! 122 xvg.write('\n')
123 124 @property
125 - def array(self):
126 """Represent xvg data as a (cached) numpy array. 127 128 The array is returned with column-first indexing, i.e. for a data file with 129 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... . 130 """ 131 if self.__array is None: 132 self.parse() 133 return self.__array
134 135 @property
136 - def ma(self):
137 """Represent data as a masked array. 138 139 The array is returned with column-first indexing, i.e. for a data file with 140 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... . 141 142 inf and nan are filtered via :func:`numpy.isfinite`. 143 """ 144 a = self.array 145 return numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a)))
146 147 @property
148 - def mean(self):
149 """Mean value of all data columns.""" 150 return self.array[1:].mean(axis=1)
151 152 @property
153 - def std(self):
154 """Standard deviation from the mean of all data columns.""" 155 return self.array[1:].std(axis=1)
156 157 @property
158 - def min(self):
159 """Minimum of the data columns.""" 160 return self.array[1:].min(axis=1)
161 162 @property
163 - def max(self):
164 """Maximum of the data columns.""" 165 return self.array[1:].max(axis=1)
166
167 - def parse(self):
168 """Read and cache the file as a numpy array. 169 170 The array is returned with column-first indexing, i.e. for a data file with 171 columns X Y1 Y2 Y3 ... the array a will be a[0] = X, a[1] = Y1, ... . 172 """ 173 # cannot use numpy.loadtxt() because xvg can have two types of 'comment' lines 174 with utilities.openany(self.real_filename) as xvg: 175 rows = [] 176 for line in xvg: 177 line = line.strip() 178 if line.startswith(('#', '@')) or len(line) == 0: 179 continue 180 if line.startswith('&'): 181 raise NotImplementedError('Sorry only simple NXY format is supported.') 182 rows.append(map(float, line.split())) 183 self.__array = numpy.array(rows).transpose() # cache result
184
185 - def set(self, a):
186 """Set the *array* data from *a* (i.e. completely replace). 187 188 No sanity checks at the moment... 189 """ 190 self.__array = numpy.asarray(a)
191
192 - def plot(self, **kwargs):
193 """Plot xvg file data. 194 195 The first column of the data is always taken as the abscissa 196 X. Additional columns are plotted as ordinates Y1, Y2, ... 197 198 In the special case that there is only a single column then this column 199 is plotted against the index, i.e. (N, Y). 200 201 :Keywords: 202 *columns* : list 203 Select the columns of the data to be plotted; the list 204 is used as a numpy.array extended slice. The default is 205 to use all columns. Columns are selected *after* a transform. 206 *transform* : function 207 function ``transform(array) -> array`` which transforms 208 the original array; must return a 2D numpy array of 209 shape [X, Y1, Y2, ...] where X, Y1, ... are column 210 vectors. By default the transformation is the 211 identity [``lambda x: x``]. 212 *maxpoints* : int 213 limit the total number of data points; matplotlib has issues processing 214 png files with >100,000 points and pdfs take forever to display. Set to 215 ``None`` if really all data should be displayed. At the moment we simply 216 subsample the data at regular intervals. [10000] 217 *kwargs* 218 All other keyword arguments are passed on to :func:`pylab.plot`. 219 """ 220 import pylab 221 222 maxpoints_default = 10000 223 columns = kwargs.pop('columns', Ellipsis) # slice for everything 224 maxpoints = kwargs.pop('maxpoints', maxpoints_default) 225 transform = kwargs.pop('transform', lambda x: x) # default is identity transformation 226 a = numpy.asarray(transform(self.array))[columns] # (slice o transform)(array) 227 228 ny = a.shape[-1] # assume 1D or 2D array with last dimension varying fastest 229 if not maxpoints is None and ny > maxpoints: 230 # reduce size by subsampling (primitive --- can leave out 231 # bits at the end or end up with almost twice of maxpoints) 232 stepsize = int(ny / maxpoints) 233 a = a[..., ::stepsize] 234 if maxpoints == maxpoints_default: # only warn if user did not set maxpoints 235 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points." 236 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning) 237 238 if len(a.shape) == 1: 239 # special case: plot against index; plot would do this automatically but 240 # we'll just produce our own xdata and pretend that this was X all along 241 X = numpy.arange(len(a)) 242 a = numpy.concatenate([[X], [a]]) # does NOT overwrite original a but make a new one 243 244 # now deal with infs, nans etc AFTER all transformations (needed for plotting across inf/nan) 245 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a))) 246 247 # finally plot 248 kwargs['xdata'] = ma[0] # abscissa set separately 249 pylab.plot(ma[1:].T, **kwargs) # plot all other columns in parallel
250
251 - def errorbar(self, **kwargs):
252 """Quick hack: errorbar plot. 253 254 Set columns to select [x, y, dy]. 255 """ 256 import pylab 257 258 kwargs.setdefault('capsize', 0) 259 kwargs.setdefault('elinewidth', 1) 260 kwargs.setdefault('alpha', 0.3) 261 kwargs.setdefault('fmt', None) 262 263 maxpoints_default = 10000 264 columns = kwargs.pop('columns', Ellipsis) # slice for everything 265 maxpoints = kwargs.pop('maxpoints', maxpoints_default) 266 transform = kwargs.pop('transform', lambda x: x) # default is identity transformation 267 a = numpy.asarray(transform(self.array))[columns] # (slice o transform)(array) 268 269 ny = a.shape[-1] # assume 1D or 2D array with last dimension varying fastest 270 if not maxpoints is None and ny > maxpoints: 271 # reduce size by subsampling (primitive --- can leave out 272 # bits at the end or end up with almost twice of maxpoints) 273 stepsize = int(ny / maxpoints) 274 a = a[..., ::stepsize] 275 if maxpoints == maxpoints_default: # only warn if user did not set maxpoints 276 warnings.warn("Plot had %d datapoints > maxpoints = %d; subsampled to %d regularly spaced points." 277 % (ny, maxpoints, a.shape[-1]), category=AutoCorrectionWarning) 278 279 if len(a.shape) == 1: 280 # special case: plot against index; plot would do this automatically but 281 # we'll just produce our own xdata and pretend that this was X all along 282 X = numpy.arange(len(a)) 283 a = numpy.concatenate([[X], [a]]) # does NOT overwrite original a but make a new one 284 285 # now deal with infs, nans etc AFTER all transformations (needed for plotting across inf/nan) 286 ma = numpy.ma.MaskedArray(a, mask=numpy.logical_not(numpy.isfinite(a))) 287 288 # finally plot 289 X = ma[0] # abscissa set separately 290 Y = ma[1] 291 try: 292 kwargs['yerr'] = ma[3] 293 kwargs['xerr'] = ma[2] 294 except IndexError: 295 kwargs['yerr'] = ma[2] 296 297 pylab.errorbar(X, Y, **kwargs)
298
299 300 -class NDX(odict, utilities.FileUtils):
301 """Gromacs index file. 302 303 Represented as a ordered dict where the keys are index group names and 304 values are numpy arrays of atom numbers. 305 306 Use the :meth:`NDX.read` and :meth:`NDX.write` methods for 307 I/O. Access groups by name via the :meth:`NDX.get` and 308 :meth:`NDX.set` methods. 309 310 Alternatively, simply treat the :class:`NDX` instance as a 311 dictionary. Setting a key automatically transforms the new value 312 into a integer 1D numpy array (*not* a set, as would be the 313 :program:`make_ndx` behaviour). 314 315 .. Note:: The index entries themselves are ordered and can contain 316 duplicates so that output from NDX can be easily used for 317 :program:`g_dih` and friends. If you need set-like behaviour 318 you will have do use :class:`gromacs.formats.uniqueNDX` or 319 :class:`gromacs.cbook.IndexBuilder` (which uses 320 :program:`make_ndx` throughout). 321 322 **Example** 323 324 Read index file, make new group and write to disk:: 325 326 ndx = NDX() 327 ndx.read('system.ndx') 328 print ndx['Protein'] 329 ndx['my_group'] = [2, 4, 1, 5] # add new group 330 ndx.write('new.ndx') 331 332 Or quicker (replacing the input file ``system.ndx``):: 333 334 ndx = NDX('system') # suffix .ndx is automatically added 335 ndx['chi1'] = [2, 7, 8, 10] 336 ndx.write() 337 338 """ 339 default_extension = "ndx" 340 341 # match: [ index_groupname ] 342 SECTION = re.compile("""\s*\[\s*(?P<name>\S.*\S)\s*\]\s*""") 343 344 #: standard ndx file format: 15 columns 345 ncol = 15 346 #: standard ndx file format: '%6d' 347 format = '%6d' 348
349 - def __init__(self, filename=None, **kwargs):
350 super(NDX, self).__init__(**kwargs) # can use kwargs to set dict! (but no sanity checks!) 351 352 if not filename is None: 353 self._init_filename(filename) 354 self.read(filename)
355
356 - def read(self, filename=None):
357 """Read and parse index file *filename*.""" 358 self._init_filename(filename) 359 360 data = odict() 361 with open(self.real_filename) as ndx: 362 current_section = None 363 for line in ndx: 364 line = line.strip() 365 if len(line) == 0: 366 continue 367 m = self.SECTION.match(line) 368 if m: 369 current_section = m.group('name') 370 data[current_section] = [] # can fail if name not legal python key 371 continue 372 if not current_section is None: 373 data[current_section].extend(map(int, line.split())) 374 375 super(NDX,self).update(odict([(name, self._transform(atomnumbers)) 376 for name, atomnumbers in data.items()]))
377
378 - def write(self, filename=None, ncol=ncol, format=format):
379 """Write index file to *filename* (or overwrite the file that the index was read from)""" 380 with open(self.filename(filename, ext='ndx'), 'w') as ndx: 381 for name in self: 382 atomnumbers = self._getarray(name) # allows overriding 383 ndx.write('[ %s ]\n' % name) 384 for k in xrange(0, len(atomnumbers), ncol): 385 line = atomnumbers[k:k+ncol].astype(int) # nice formatting in ncol-blocks 386 n = len(line) 387 ndx.write((" ".join(n*[format])+'\n') % tuple(line)) 388 ndx.write('\n')
389
390 - def get(self, name):
391 """Return index array for index group *name*.""" 392 return self[name]
393
394 - def set(self, name, value):
395 """Set or add group *name* as a 1D numpy array.""" 396 self[name] = value
397
398 - def size(self, name):
399 """Return number of entries for group *name*.""" 400 return len(self[name])
401 402 @property
403 - def groups(self):
404 """Return a list of all groups.""" 405 return self.keys()
406 407 @property
408 - def sizes(self):
409 """Return a dict with group names and number of entries,""" 410 return dict([(name, len(atomnumbers)) for name, atomnumbers in self.items()])
411 412 @property
413 - def ndxlist(self):
414 """Return a list of groups in the same format as :func:`gromacs.cbook.get_ndx_groups`. 415 416 Format: 417 [ {'name': group_name, 'natoms': number_atoms, 'nr': # group_number}, ....] 418 """ 419 return [{'name': name, 'natoms': len(atomnumbers), 'nr': nr+1} for 420 nr,(name,atomnumbers) in enumerate(self.items())]
421
422 - def _getarray(self, name):
423 """Helper getter that is used in write(). 424 Override when using a _transform that stores something that 425 cannot be indexed, e.g. when using set()s. 426 """ 427 return self[name]
428
429 - def _transform(self, v):
430 """Transform input to the stored representation. 431 432 Override eg with ``return set(v)`` for index lists as sets. 433 """ 434 return numpy.ravel(v).astype(int)
435
436 - def __setitem__(self, k, v):
437 super(NDX, self).__setitem__(k, self._transform(v))
438
439 - def setdefault(*args,**kwargs):
440 raise NotImplementedError
441
442 443 -class IndexSet(set):
444 """set which defines '+' as union (OR) and '-' as intersection (AND)."""
445 - def __add__(self, x):
446 return self.union(x)
447 - def __sub__(self, x):
448 return self.intersection(x)
449
450 451 -class uniqueNDX(NDX):
452 """Index that behaves like make_ndx, i.e. entries behaves as sets, 453 not lists. 454 455 The index lists behave like sets: 456 - adding sets with '+' is equivalent to a logical OR: x + y == "x | y" 457 - subtraction '-' is AND: x - y == "x & y" 458 - see :meth:`~gromacs.formats.join` for ORing multiple groups (x+y+z+...) 459 460 **Example** :: 461 I = uniqueNDX('system.ndx') 462 I['SOLVENT'] = I['SOL'] + I['NA+'] + I['CL-'] 463 """ 464
465 - def join(self, *groupnames):
466 """Return an index group that contains atoms from all *groupnames*. 467 468 The method will silently ignore any groups that are not in the 469 index. 470 471 **Example** 472 473 Always make a solvent group from water and ions, even if not 474 all ions are present in all simulations:: 475 476 I['SOLVENT'] = I.join('SOL', 'NA+', 'K+', 'CL-') 477 """ 478 return self._sum([self[k] for k in groupnames if k in self])
479
480 - def _sum(self, sequence):
481 return reduce(operator.add, sequence)
482
483 - def _transform(self, v):
484 return IndexSet(v)
485
486 - def _getarray(self, k):
487 return numpy.sort(numpy.fromiter(self[k],dtype=int,count=len(self[k])))
488
489 490 491 # or use list of these? 492 # class IndexGroup(dict): 493 # def __init__(self, groupnumber=None, name="empty", atomnumbers=None, **kwargs): 494 # atomnumbers = atomnumbers or [] 495 # _atomnumbers = numpy.asarray(atomnumbers).astype(int) 496 # super(IndexGroup, self).__init__(name=str(name), 497 # atomnumbers=_atomnumbers, 498 # nr=groupnumber) 499 500 -class GRO(utilities.FileUtils):
501 """Class that represents a GROMOS (gro) structure file. 502 503 504 File format: 505 """ 506 default_extension = "gro" 507
508 - def __init__(self, **kwargs):
509 510 raise NotImplementedError 511 512 filename = kwargs.pop('filename',None) 513 super(GRO, self).__init__(**kwargs) 514 515 if not filename is None: 516 self._init_filename(filename) 517 self.read(filename)
518
519 - def read(self, filename=None):
520 """Read and parse index file *filename*.""" 521 self._init_filename(filename) 522 523 with open(self.real_filename) as gro: 524 pass
525
526 527 528 -class MDP(odict, utilities.FileUtils):
529 """Class that represents a Gromacs mdp run input file. 530 531 The MDP instance is an ordered dictionary. 532 533 - *Parameter names* are keys in the dictionary. 534 - *Comments* are sequentially numbered with keys Comment0001, 535 Comment0002, ... 536 - *Empty lines* are similarly preserved as Blank0001, .... 537 538 When writing, the dictionary is dumped in the recorded order to a 539 file. Inserting keys at a specific position is not possible. 540 541 Currently, comments after a parameter on the same line are 542 discarded. Leading and trailing spaces are always stripped. 543 544 .. SeeAlso:: For editing a mdp file one can also use 545 :func:`gromacs.cbook.edit_mdp` (which works like a 546 poor replacement for sed). 547 """ 548 default_extension = "mdp" 549 550 551 COMMENT = re.compile("""\s*;\s*(?P<value>.*)""") # eat initial ws 552 # see regex in cbook.edit_mdp() 553 PARAMETER = re.compile(""" 554 \s*(?P<parameter>[^=]+?)\s*=\s* # parameter (ws-stripped), before '=' 555 (?P<value>[^;]*) # value (stop before comment=;) 556 (?P<comment>\s*;.*)? # optional comment 557 """, re.VERBOSE) 558
559 - def __init__(self, filename=None, autoconvert=True, **kwargs):
560 """Initialize mdp structure. 561 562 :Arguments: 563 *filename* 564 read from mdp file 565 *autoconvert* : boolean 566 ``True`` converts numerical values to python numerical types; 567 ``False`` keeps everything as strings [``True``] 568 *kwargs* 569 Populate the MDP with key=value pairs. (NO SANITY CHECKS; and also 570 does not work for keys that are not legal python variable names such 571 as anything that includes a minus '-' sign or starts with a number). 572 """ 573 super(MDP, self).__init__(**kwargs) # can use kwargs to set dict! (but no sanity checks!) 574 575 self.autoconvert = autoconvert 576 577 if not filename is None: 578 self._init_filename(filename) 579 self.read(filename)
580
581 - def _transform(self, value):
582 if self.autoconvert: 583 return autoconvert(value) 584 else: 585 return value
586
587 - def read(self, filename=None):
588 """Read and parse mdp file *filename*.""" 589 self._init_filename(filename) 590 591 def BLANK(i): 592 return "B%04d" % i
593 def COMMENT(i): 594 return "C%04d" % i
595 596 data = odict() 597 iblank = icomment = 0 598 with open(self.real_filename) as mdp: 599 for line in mdp: 600 line = line.strip() 601 if len(line) == 0: 602 iblank += 1 603 data[BLANK(iblank)] = '' 604 continue 605 m = self.COMMENT.match(line) 606 if m: 607 icomment += 1 608 data[COMMENT(icomment)] = m.group('value') 609 continue 610 # parameter 611 m = self.PARAMETER.match(line) 612 if m: 613 # check for comments after parameter?? -- currently discarded 614 parameter = m.group('parameter') 615 value = self._transform(m.group('value')) 616 data[parameter] = value 617 else: 618 raise ParseError('unknown line in mdp file %(filename)r: %(line)r' % vars()) 619 620 super(MDP,self).update(data) 621 622
623 - def write(self, filename=None, skipempty=False):
624 """Write mdp file to *filename*. 625 626 :Keywords: 627 *filename* 628 output mdp file; default is the filename the mdp 629 was read from 630 *skipempty* : boolean 631 ``True`` removes any parameter lines from output that 632 contain empty values [``False``] 633 634 .. Note:: Overwrites the file that the mdp was read from if no 635 *filename* supplied. 636 """ 637 638 with open(self.filename(filename, ext='mdp'), 'w') as mdp: 639 for k,v in self.items(): 640 if k[0] == 'B': # blank line 641 mdp.write("\n") 642 elif k[0] == 'C': # comment 643 mdp.write("; %(v)s\n" % vars()) 644 else: # parameter = value 645 if skipempty and (v == '' or v is None): 646 continue 647 mdp.write("%(k)s = %(v)s\n" % vars())
648
649 650 -def autoconvert(s):
651 """Convert input to a numerical type if possible. 652 653 1. A non-string object is returned as it is 654 2. Try conversion to int, float, str. 655 """ 656 if not type(s) is str: 657 return s 658 for converter in int, float, str: # try them in increasing order of lenience 659 try: 660 return converter(s) 661 except ValueError: 662 pass 663 raise ValueError("Failed to autoconvert %r" % s)
664