Package gromacs :: Package analysis :: Package plugins :: Module dist
[hide private]
[frames] | no frames]

Source Code for Module gromacs.analysis.plugins.dist

  1  # $Id$ 
  2  # Copyright (c) 2009 Oliver Beckstein <orbeckst@gmail.com> 
  3  # Released under the GNU Public License 3 (or higher, your choice) 
  4  # See the file COPYING for details. 
  5   
  6  """ 
  7  ``analysis.plugins.dist`` --- Helper Class for ``g_dist`` 
  8  ========================================================= 
  9   
 10  :mod:`dist` contains helper classes for other analysis plugins that 
 11  want to make use of the Gromacs command ``g_dist``. 
 12   
 13   
 14  Overview 
 15  -------- 
 16   
 17  The task we are solving is to analyze output from :: 
 18   
 19     g_dist -f md.xtc -s md.tpr -n cys_ow.ndx -dist 1.0 | bzip2 -vc > mindist_C60_OW_1nm.dat.bz2  
 20   
 21  and produce a histogram of minimum contact distances. This should 
 22  provide an estimate for water accessibility of the atom (here: SG of 
 23  Cys60). 
 24   
 25  File format 
 26  ----------- 
 27     
 28  ``g_dist`` with the ``-dist CUTOFF`` option writes to stdout the 
 29  identity of all atoms within the cutoff distance and the distance 
 30  itself:: 
 31   
 32     Selected 22: 'CYSH_CYSH_60_&_SG' 
 33     Selected 25: 'OW' 
 34     .... 
 35     t: 184  6682 SOL 35993 OW  0.955138 (nm) 
 36     t: 184  10028 SOL 46031 OW  0.803889 (nm) 
 37     t: 185  6682 SOL 35993 OW  0.879949 (nm) 
 38     t: 185  10028 SOL 46031 OW  0.738299 (nm) 
 39     t: 186  6682 SOL 35993 OW  0.897016 (nm) 
 40     t: 186  10028 SOL 46031 OW  0.788268 (nm) 
 41     t: 187  6682 SOL 35993 OW  0.997688 (nm) 
 42     .... 
 43      
 44   
 45  Classes 
 46  ------- 
 47   
 48  .. autoclass:: Mindist 
 49     :members: histogram, hist, dist, edges, midpoints, plot 
 50  .. autoclass:: GdistData 
 51     :members: __iter__ 
 52   
 53  """ 
 54  __docformat__ = "restructuredtext en" 
 55   
 56  import re 
 57  import numpy 
 58   
 59  from recsql import SQLarray   # my own ReqSQL module 
 60   
 61  import gromacs.utilities 
 62   
63 -class Mindist(object):
64 """The Mindist class allows analysis of the output from ``g_dist -dist CUTOFF``. 65 66 Output is read from a file or stream. The raw data is transformed 67 into a true 'mindist' time series (available in the 68 :attr:`Mindist.distances` attribute): for each frame only the 69 shortest distance is stored (whereas ``g_dist`` provides *all* 70 distances below the cutoff). 71 72 :TODO: 73 * Save analysis to pickle or data files. 74 * Export data as simple data files for plotting in other programs. 75 76 .. Note:: :class:`gromacs.tools.G_mindist` is apparently providing 77 exactly the service that is required: a timeseries of 78 the minimum distance between two groups. Feel free to 79 use that tool instead :-). 80 81 """ 82
83 - def __init__(self,datasource,cutoff=None):
84 """Read mindist data from file or stream. 85 86 :Arguments: 87 *datasource* 88 a filename (plain, gzip, bzip2) or file object 89 *cutoff* 90 the ``-dist CUTOFF`` that was provided to ``g_dist``; if supplied 91 we work around a bug in ``g_dist`` (at least in Gromacs 4.0.2) in which 92 sometimes numbers >> CUTOFF are printed. 93 """ 94 stream, self.filename = gromacs.utilities.anyopen(datasource) 95 try: 96 M = GdistData(stream) 97 # BIG database in memory ... can be accessed via SQL 98 all_distances = SQLarray('distances', records=M, columns=('frame','distance')) 99 finally: 100 stream.close() 101 if cutoff is None: 102 cutoff_filter = "" 103 else: 104 cutoff_filter = "WHERE distance <= %d" % float(cutoff) 105 self.distances = all_distances.selection( 106 "SELECT frame, MIN(distance) AS distance FROM __self__ "+cutoff_filter+" GROUP BY frame", 107 name="mindistances", cache=False)
108
109 - def histogram(self,nbins=None,lo=None,hi=None,midpoints=False,normed=True):
110 """Returns a distribution or histogram of the minimum distances:: 111 112 If no values for the bin edges are given then they are set to 113 0.1 below and 0.1 above the minimum and maximum values seen in 114 the data. 115 116 If the number of bins is not provided then it is set so that 117 on average 100 counts come to a bin. Set nbins manually if the 118 histogram only contains a single bin (and then get more data)! 119 120 :Keywords: 121 *nbins* : int 122 number of bins 123 *lo* : float 124 lower edge of histogram 125 *hi* : float 126 upper edge of histogram 127 *midpoints* : boolean 128 False: return edges. True: return midpoints 129 *normed* : boolean 130 True: return probability distribution. False: histogram 131 """ 132 D = self.distances 133 if lo is None or hi is None: 134 dmin,dmax = D.limits('distance') 135 if lo is None: 136 lo = round(dmin - 0.1, 1) 137 if lo < 0: 138 lo = 0.0 139 if hi is None: 140 hi = round(dmax + 0.1, 1) 141 if nbins is None: 142 nbins = int(len(D)/100) 143 FUNC = 'distribution' 144 if not normed: 145 FUNC = 'histogram' 146 SQL = """SELECT %(FUNC)s(distance,%(nbins)d,%(lo)f,%(hi)f) AS "h [Object]" 147 FROM __self__""" % vars() 148 (((h,e),),) = D.sql(SQL, asrecarray=False) 149 # should probably cache this... 150 if normed: 151 self.__distribution = h 152 else: 153 self.__histogram = h 154 self.__edges = e 155 if midpoints: 156 return h, self.midpoints 157 return h, e
158
159 - def hist():
160 doc = "Histogram of the minimum distances." 161 def fget(self): 162 try: 163 return self.__histogram 164 except AttributeError: 165 raise RuntimeError("run histogram(...,normed=False) first")
166 return locals()
167 hist = property(**hist()) 168
169 - def dist():
170 doc = "Distribution of the minimum distances." 171 def fget(self): 172 try: 173 return self.__distribution 174 except AttributeError: 175 raise RuntimeError("run histogram(...,normed=True) first")
176 return locals() 177 dist = property(**dist()) 178
179 - def edges():
180 doc = "Edges of the histogram of the minimum distances." 181 def fget(self): 182 try: 183 return self.__edges 184 except AttributeError: 185 raise RuntimeError("run histogram(...) first")
186 return locals() 187 edges = property(**edges()) 188
189 - def midpoints():
190 doc = "Midpoints of the histogram of the minimum distances." 191 def fget(self): 192 e = self.__edges 193 return 0.5*(e[:-1] + e[1:])
194 return locals() 195 midpoints = property(**midpoints()) 196
197 - def plot(self,**kwargs):
198 """Plot histograms with matplotlib's plot() function:: 199 200 plot(**histogramargs, **plotargs) 201 202 Arguments for both :meth:`Mindist.histogram` and :func:`pylab.plot` can be provided (qv). 203 """ 204 import pylab 205 kwargs['midpoints'] = True 206 histogram_args = ('nbins','lo','hi','midpoints','normed') 207 histargs = kwargs.copy() 208 histargs = dict([(k, kwargs.pop(k)) for k in histargs if k in histogram_args]) 209 h,m = self.histogram(**histargs) 210 211 pylab.plot(m, h, **kwargs) 212 pylab.xlabel('minimum distance (nm)')
213
214 -class GdistData(object):
215 """Object that represents the standard output of ``g_dist -dist CUTOFF``. 216 217 Initialize from a stream (e.g. a pipe) and the iterate over the 218 instance to get the data line by line. Each line consists of a 219 tuple 220 221 (*frame*, *distance*) 222 """ 223 data_pattern = re.compile("""t:\s* # marker for beginning of line 224 (?P<FRAME>\d+)\s+ # frame number (?) 225 (?P<RESID>\d+)\s+(?P<RESNAME>\w+)\s+ # resid and residue name 226 (?P<ATOMID>\d+)\s+(?P<ATOMNAME>\w+)\s+ # atomid and atom name 227 (?P<DISTANCE>[0-9]+\.?[0-9]+) # distance 228 \s+\(nm\)""", re.VERBOSE) 229
230 - def __init__(self,stream):
231 """Initialize with an open stream to the data (eg stdin or file). 232 233 :Arguments: 234 *stream* 235 open stream (file or pipe or really any iterator 236 providing the data from ``g_dist``); the *stream* is not 237 closed automatically when the iterator completes. 238 """ 239 self.stream = stream
240
241 - def __iter__(self):
242 """Iterator that filters the input stream and returns (frame, distance) tuples.""" 243 for line in self.stream: 244 line = line.strip() 245 m = self.data_pattern.search(line) 246 if m: 247 distance = float(m.group('DISTANCE')) 248 frame = int(m.group('FRAME')) 249 yield frame,distance
250