1
2
3
4
5
6 """
7 :mod:`gromacs.utilities` -- Helper functions and classes
8 ========================================================
9
10 The module defines some convenience functions and classes that are
11 used in other modules; they do *not* make use of :mod:`gromacs.tools`
12 or :mod:`gromacs.cbook` and can be safely imported at any time.
13
14
15 Classes
16 -------
17
18 :class:`FileUtils` provides functions related to filename handling. It
19 can be used as a base or mixin class. The :class:`gromacs.analysis.Simulation`
20 class is derived from it.
21
22 .. autoclass:: FileUtils
23 :members:
24 .. autoclass:: AttributeDict
25 .. autoclass:: Timedelta
26
27 Functions
28 ---------
29
30 Some additional convenience functions that deal with files and
31 directories:
32
33 .. function:: openany(directory[,mode='r'])
34
35 Context manager to open a compressed (bzip2, gzip) or plain file
36 (uses :func:`anyopen`).
37
38 .. autofunction:: anyopen
39 .. autofunction:: realpath
40 .. function:: in_dir(directory[,create=True])
41
42 Context manager to execute a code block in a directory.
43
44 * The *directory* is created if it does not exist (unless
45 *create* = ``False`` is set)
46 * At the end or after an exception code always returns to
47 the directory that was the current directory before entering
48 the block.
49
50 .. autofunction:: find_first
51 .. autofunction:: withextsep
52
53 Functions that improve list processing and which do *not* treat
54 strings as lists:
55
56 .. autofunction:: iterable
57 .. autofunction:: asiterable
58
59
60 Functions that help handling Gromacs files:
61
62 .. autofunction:: unlink_f
63 .. autofunction:: unlink_gmx
64 .. autofunction:: unlink_gmx_backups
65 .. autofunction:: number_pdbs
66
67 Functions that make working with matplotlib_ easier:
68
69 .. _matplotlib: http://matplotlib.sourceforge.net/
70
71 .. autofunction:: activate_subplot
72 .. autofunction:: remove_legend
73
74
75 Miscellaneous functions:
76
77 .. autofunction:: convert_aa_code
78
79
80 Data
81 ----
82
83 .. autodata:: amino_acid_codes
84
85 """
86 from __future__ import with_statement
87
88 __docformat__ = "restructuredtext en"
89
90 import os
91 import glob
92 import re
93 import warnings
94 import errno
95 import subprocess
96 from contextlib import contextmanager
97 import bz2, gzip
98 import datetime
99
100 import logging
101 logger = logging.getLogger('gromacs.utilities')
102
103 from gromacs import AutoCorrectionWarning
107 """Simple decorator wrapper to make full fledged properties.
108 See eg http://adam.gomaa.us/blog/2008/aug/11/the-python-property-builtin/
109 """
110 return property(**func())
111
114 """A dictionary with pythonic access to keys as attributes --- useful for interactive work."""
125
128
131
132 @contextmanager
133 -def openany(datasource, mode='r'):
134 """Open the datasource and close it when the context exits."""
135 stream, filename = anyopen(datasource, mode=mode)
136 try:
137 yield stream
138 finally:
139 stream.close()
140
142 """Open datasource (gzipped, bzipped, uncompressed) and return a stream.
143
144 :Arguments:
145 - *datasource*: a file or a stream
146 - *mode*: 'r' or 'w'
147 """
148
149
150
151 handlers = {'bz2': bz2.BZ2File, 'gz': gzip.open, '': file}
152
153 if mode.startswith('r'):
154 if hasattr(datasource,'next') or hasattr(datasource,'readline'):
155 stream = datasource
156 filename = '(%s)' % stream.name
157 else:
158 stream = None
159 filename = datasource
160 for ext in ('bz2', 'gz', ''):
161 openfunc = handlers[ext]
162 stream = _get_stream(datasource, openfunc, mode=mode)
163 if not stream is None:
164 break
165 if stream is None:
166 raise IOError("Cannot open %(filename)r in mode=%(mode)r." % vars())
167 elif mode.startswith('w'):
168 if hasattr(datasource, 'write'):
169 stream = datasource
170 filename = '(%s)' % stream.name
171 else:
172 stream = None
173 filename = datasource
174 name, ext = os.path.splitext(filename)
175 if ext.startswith('.'):
176 ext = ext[1:]
177 if not ext in ('bz2', 'gz'):
178 ext = ''
179 openfunc = handlers[ext]
180 stream = openfunc(datasource, mode=mode)
181 if stream is None:
182 raise IOError("Cannot open %(filename)r in mode=%(mode)r with type %(ext)r." % vars())
183 else:
184 raise NotImplementedError("Sorry, mode=%(mode)r is not implemented for %(datasource)r" % vars())
185
186 return stream, filename
187
188 -def _get_stream(filename, openfunction=file, mode='r'):
189 try:
190 stream = openfunction(filename, mode=mode)
191 except IOError:
192 return None
193
194 try:
195 stream.readline()
196 stream.close()
197 stream = openfunction(filename,'r')
198 except IOError:
199 stream.close()
200 stream = None
201 return stream
202
203
204
205
206 amino_acid_codes = {'A':'ALA', 'C':'CYS', 'D':'ASP', 'E':'GLU',
207 'F':'PHE', 'G':'GLY', 'H':'HIS', 'I':'ILE',
208 'K':'LYS', 'L':'LEU', 'M':'MET', 'N':'ASN',
209 'P':'PRO', 'Q':'GLN', 'R':'ARG', 'S':'SER',
210 'T':'THR', 'V':'VAL', 'W':'TRP', 'Y':'TYR'}
211 inverse_aa_codes = dict([(three, one) for one,three in amino_acid_codes.items()])
214 """Converts between 3-letter and 1-letter amino acid codes."""
215 if len(x) == 1:
216 return amino_acid_codes[x.upper()]
217 elif len(x) == 3:
218 return inverse_aa_codes[x.upper()]
219 else:
220 raise ValueError("Can only convert 1-letter or 3-letter amino acid codes, "
221 "not %r" % x)
222
223 @contextmanager
224 -def in_dir(directory, create=True):
225 """Context manager to execute a code block in a directory.
226
227 * The directory is created if it does not exist (unless
228 create=False is set)
229 * At the end or after an exception code always returns to
230 the directory that was the current directory before entering
231 the block.
232 """
233 startdir = os.getcwd()
234 try:
235 try:
236 os.chdir(directory)
237 logger.info("Working in %(directory)r..." % vars())
238 except OSError, err:
239 if create and err.errno == errno.ENOENT:
240 os.makedirs(directory)
241 os.chdir(directory)
242 logger.info("Working in %(directory)r (newly created)..." % vars())
243 else:
244 logger.exception("Failed to start working in %(directory)r." % vars())
245 raise
246 yield os.getcwd()
247 finally:
248 os.chdir(startdir)
249
251 """Join all args and return the real path, rooted at /.
252
253 Returns ``None`` if any of the args is none.
254 """
255 if None in args:
256 return None
257 return os.path.realpath(os.path.join(*args))
258
260 """Find first *filename* with a suffix from *suffices*.
261
262 :Arguments:
263 *filename*
264 base filename; this file name is checked first
265 *suffices*
266 list of suffices that are tried in turn on the root of *filename*; can contain the
267 ext separator (:data:`os.path.extsep`) or not
268
269 :Returns: The first match or ``None``.
270 """
271
272
273 root,extension = os.path.splitext(filename)
274 if suffices is None:
275 suffices = []
276 else:
277 suffices = withextsep(suffices)
278 extensions = [extension] + suffices
279 for ext in extensions:
280 fn = root + ext
281 if os.path.exists(fn):
282 return fn
283 return None
284
286 """Return list in which each element is guaranteed to start with :data:`os.path.extsep`."""
287 def dottify(x):
288 if x.startswith(os.path.extsep):
289 return x
290 return os.path.extsep + x
291 return [dottify(x) for x in asiterable(extensions)]
292
296 """Mixin class to provide additional file-related capabilities."""
297
298
299 default_extension = None
300
302 """Initialize the current filename :attr:`FileUtils.real_filename` of the object.
303
304 Bit of a hack.
305
306 - The first invocation must have ``filename != None``; this will set a
307 default filename with suffix :attr:`FileUtils.default_extension`
308 unless another one was supplied.
309
310 - Subsequent invocations either change the filename accordingly or
311 ensure that the default filename is set with the proper suffix.
312
313 """
314
315 extension = ext or self.default_extension
316 filename = self.filename(filename, ext=extension, use_my_ext=True, set_default=True)
317
318 self.real_filename = os.path.realpath(filename)
319
320 - def filename(self,filename=None,ext=None,set_default=False,use_my_ext=False):
321 """Supply a file name for the class object.
322
323 Typical uses::
324
325 fn = filename() ---> <default_filename>
326 fn = filename('name.ext') ---> 'name'
327 fn = filename(ext='pickle') ---> <default_filename>'.pickle'
328 fn = filename('name.inp','pdf') --> 'name.pdf'
329 fn = filename('foo.pdf',ext='png',use_my_ext=True) --> 'foo.pdf'
330
331 The returned filename is stripped of the extension
332 (``use_my_ext=False``) and if provided, another extension is
333 appended. Chooses a default if no filename is given.
334
335 Raises a ``ValueError`` exception if no default file name is known.
336
337 If ``set_default=True`` then the default filename is also set.
338
339 ``use_my_ext=True`` lets the suffix of a provided filename take
340 priority over a default ``ext`` tension.
341 """
342 if filename is None:
343 if not hasattr(self,'_filename'):
344 self._filename = None
345 if self._filename:
346 filename = self._filename
347 else:
348 raise ValueError("A file name is required because no default file name was defined.")
349 my_ext = None
350 else:
351 filename, my_ext = os.path.splitext(filename)
352 if set_default:
353 self._filename = filename
354 if my_ext and use_my_ext:
355 ext = my_ext
356 if ext is not None:
357 if ext.startswith('.'):
358 ext = ext[1:]
359 filename = filename + '.' + ext
360 return filename
361
363 """If a file exists then continue with the action specified in ``resolve``.
364
365 ``resolve`` must be one of
366
367 "ignore"
368 always return ``False``
369 "indicate"
370 return ``True`` if it exists
371 "warn"
372 indicate and issue a :exc:`UserWarning`
373 "exception"
374 raise :exc:`IOError` if it exists
375
376 Alternatively, set *force* for the following behaviour (which
377 ignores *resolve*):
378
379 ``True``
380 same as *resolve* = "ignore" (will allow overwriting of files)
381 ``False``
382 same as *resolve* = "exception" (will prevent overwriting of files)
383 ``None``
384 ignored, do whatever *resolve* says
385 """
386 def _warn(x):
387 msg = "File %r already exists." % x
388 logger.warn(msg)
389 warnings.warn(msg)
390 return True
391 def _raise(x):
392 msg = "File %r already exists." % x
393 logger.error(msg)
394 raise IOError(errno.EEXIST, msg)
395 solutions = {'ignore': lambda x: False,
396 'indicate': lambda x: True,
397 'warn': _warn,
398 'warning': _warn,
399 'exception': _raise,
400 'raise': _raise,
401 }
402
403 if force is True:
404 resolve = 'ignore'
405 elif force is False:
406 resolve = 'exception'
407
408 if not os.path.isfile(filename):
409 return False
410 else:
411 return solutions[resolve](filename)
412
414 """Unless *name* is provided, insert *infix* before the extension *ext* of *default*."""
415 if name is None:
416 p, oldext = os.path.splitext(default)
417 if ext is None:
418 ext = oldext
419 if ext.startswith('.'):
420 ext = ext[1:]
421 name = self.filename(p+infix, ext=ext)
422 return name
423
425 fmt = "%s(filename=%%r)" % self.__class__.__name__
426 try:
427 fn = self.filename()
428 except ValueError:
429 fn = None
430 return fmt % fn
431
434 """Returns ``True`` if *obj* can be iterated over and is *not* a string."""
435 if type(obj) is str:
436 return False
437
438 if hasattr(obj, 'next'):
439 return True
440 try:
441 len(obj)
442 except TypeError:
443 return False
444 return True
445
447 """Returns obj so that it can be iterated over; a string is *not* treated as iterable"""
448 if not iterable(obj):
449 obj = [obj]
450 return obj
451
456 """Unlink path but do not complain if file does not exist."""
457 try:
458 os.unlink(path)
459 except OSError, err:
460 if err.errno != errno.ENOENT:
461 raise
462
468
470 """Unlink (rm) all backup files corresponding to the listed files."""
471 for path in args:
472 dirname, filename = os.path.split(path)
473 fbaks = glob.glob(os.path.join(dirname, '#'+filename+'.*#'))
474 for bak in fbaks:
475 unlink_f(bak)
476
478 """Create a directory *path* with subdirs but do not complain if it exists.
479
480 This is like GNU ``mkdir -p path``.
481 """
482 try:
483 os.makedirs(path)
484 except OSError, err:
485 if err.errno != errno.EEXIST:
486 raise
487
488 -def cat(f=None, o=None):
489 """Concatenate files *f*=[...] and write to *o*"""
490
491 if f is None or o is None:
492 return
493 target = o
494 infiles = asiterable(f)
495 logger.debug("cat %s > %s " % (" ".join(infiles), target))
496 with open(target, 'w') as out:
497 rc = subprocess.call(['cat'] + infiles, stdout=out)
498 if rc != 0:
499 msg = "failed with return code %d: cat %r > %r " % (rc, " ".join(infiles), target)
500 logger.exception(msg)
501 raise OSError(errno.EIO, msg, target)
502
506 """Make subplot *numPlot* active on the canvas.
507
508 Use this if a simple ``subplot(numRows, numCols, numPlot)``
509 overwrites the subplot instead of activating it.
510 """
511
512 from pylab import gcf, axes
513 numPlot -= 1
514 return axes(gcf().get_axes()[numPlot])
515
517 """Remove legend for axes or gca.
518
519 See http://osdir.com/ml/python.matplotlib.general/2005-07/msg00285.html
520 """
521 from pylab import gca, draw
522 if ax is None:
523 ax = gca()
524 ax.legend_ = None
525 draw()
526
530 """Extension of :class:`datetime.timedelta`.
531
532 Provides attributes ddays, dhours, dminutes, dseconds to measure
533 the delta in normal time units.
534
535 ashours gives the total time in fractional hours.
536 """
537
538 @property
540 """Hours component of the timedelta."""
541 return self.seconds / 3600
542
543 @property
545 """Minutes component of the timedelta."""
546 return self.seconds/60 - 60*self.dhours
547
548 @property
550 """Seconds component of the timedelta."""
551 return self.seconds - 3600*self.dhours - 60*self.dminutes
552
553 @property
555 """Timedelta in (fractional) hours."""
556 return 24*self.days + self.seconds / 3600.0
557
559 """Primitive string formatter.
560
561 The only directives understood are the following:
562 ============ ==========================
563 Directive meaning
564 ============ ==========================
565 %d day as integer
566 %H hour [00-23]
567 %h hours including days
568 %M minute as integer [00-59]
569 %S second as integer [00-59]
570 ============ ==========================
571 """
572 substitutions = {
573 "%d": str(self.days),
574 "%H": "%02d" % self.dhours,
575 "%h": str(24*self.days + self.dhours),
576 "%M": "%02d" % self.dminutes,
577 "%S": "%02d" % self.dseconds,
578 }
579 s = fmt
580 for search, replacement in substitutions.items():
581 s = s.replace(search, replacement)
582 return s
583
584
585 NUMBERED_PDB = re.compile(r"(?P<PREFIX>.*\D)(?P<NUMBER>\d+)\.(?P<SUFFIX>pdb)")
588 """Rename pdbs x1.pdb ... x345.pdb --> x0001.pdb ... x0345.pdb
589
590 :Arguments:
591 - *args*: filenames or glob patterns (such as "pdb/md*.pdb")
592 - *format*: format string including keyword *num* ["%(num)04d"]
593 """
594
595 format = kwargs.pop('format', "%(num)04d")
596 name_format = "%(prefix)s" + format +".%(suffix)s"
597 filenames = []
598 map(filenames.append, map(glob.glob, args))
599 filenames = filenames[0]
600 for f in filenames:
601 m = NUMBERED_PDB.search(f)
602 if m is None:
603 continue
604 num = int(m.group('NUMBER'))
605 prefix = m.group('PREFIX')
606 suffix = m.group('SUFFIX')
607 newname = name_format % vars()
608 logger.info("Renaming %(f)r --> %(newname)r" % vars())
609 try:
610 os.rename(f, newname)
611 except OSError:
612 logger.exception("renaming failed")
613