gromacs.cbook

1 # GromacsWrapper -- cbook.py 2 # Copyright (c) 2009 Oliver Beckstein <orbeckst@gmail.com> 3 # Released under the GNU Public License 3 (or higher, your choice) 4 5 """ 6 :mod:`gromacs.cbook` -- Gromacs Cook Book 7 ========================================= 8 9 The :mod:`~gromacs.cbook` (cook book) module contains short recipes for tasks 10 that are often repeated. In the simplest case this is just one of the 11 gromacs tools with a certain set of default command line options. 12 13 By abstracting and collecting these invocations here, errors can be 14 reduced and the code snippets can also serve as canonical examples for 15 how to do simple things. 16 17 Miscellaneous canned Gromacs commands 18 ------------------------------------- 19 20 Simple commands with new default options so that they solve a specific 21 problem (see also `Manipulating trajectories and structures`_): 22 23 .. function:: rmsd_backbone([s="md.tpr", f="md.xtc"[, ...]]) 24 25 Computes the RMSD of the "Backbone" atoms after fitting to the 26 "Backbone" (including both translation and rotation). 27 28 29 Manipulating trajectories and structures 30 ---------------------------------------- 31 32 Standard invocations for manipulating trajectories. 33 34 .. function:: trj_compact([s="md.tpr", f="md.xtc", o="compact.xtc"[, ...]]) 35 36 Writes an output trajectory or frame with a compact representation 37 of the system centered on the protein. It centers on the group 38 "Protein" and outputs the whole "System" group. 39 40 41 .. function:: trj_xyfitted([s="md.tpr", f="md.xtc"[, ...]]) 42 43 Writes a trajectory centered and fitted to the protein in the XY-plane only. 44 45 This is useful for membrane proteins. The system *must* be oriented so that 46 the membrane is in the XY plane. The protein backbone is used for the least 47 square fit, centering is done for the whole protein., but this can be 48 changed with the *input* = ``('backbone', 'protein','system')`` keyword. 49 50 .. Note:: Gromacs 4.x only 51 52 .. autofunction:: trj_fitandcenter 53 .. autofunction:: cat 54 .. autoclass:: Frames 55 :members: 56 .. autoclass:: Transformer 57 :members: 58 .. autofunction:: get_volume 59 60 Processing output 61 ----------------- 62 63 There are cases when a script has to to do different things depending 64 on the output from a Gromacs tool. 65 66 For instance, a common case is to check the total charge after 67 grompping a tpr file. The ``grompp_qtot`` function does just that. 68 69 .. autofunction:: grompp_qtot 70 .. autofunction:: get_volume 71 .. autofunction:: parse_ndxlist 72 73 74 Working with topologies and mdp files 75 ------------------------------------- 76 77 .. autofunction:: create_portable_topology 78 .. autofunction:: edit_mdp 79 .. autofunction:: add_mdp_includes 80 .. autofunction:: grompp_qtot 81 82 83 Working with index files 84 ------------------------ 85 86 Manipulation of index files (``ndx``) can be cumbersome because the 87 ``make_ndx`` program is not very sophisticated (yet) compared to 88 full-fledged atom selection expression as available in Charmm_, VMD_, or 89 MDAnalysis_. Some tools help in building and interpreting index files. 90 91 .. SeeAlso:: The :class:`gromacs.formats.NDX` class can solve a number 92 of index problems in a cleaner way than the classes and 93 functions here. 94 95 .. autoclass:: IndexBuilder 96 :members: combine, gmx_resid 97 98 .. autofunction:: parse_ndxlist 99 .. autofunction:: get_ndx_groups 100 .. autofunction:: make_ndx_captured 101 102 103 .. _MDAnalysis: http://mdanalysis.googlecode.com 104 .. _VMD: http://www.ks.uiuc.edu/Research/vmd/current/ug/node87.html 105 .. _Charmm: http://www.charmm.org/html/documentation/c35b1/select.html 106 107 108 File editing functions 109 ---------------------- 110 111 It is often rather useful to be able to change parts of a template 112 file. For specialized cases the two following functions are useful: 113 114 .. autofunction:: edit_mdp 115 .. autofunction:: edit_txt 116 """ 117 118 # Right now the simplest thing to do is to just create instances with pre-set 119 # values; this works fine and is succinct but has some disadvantages: 120 # * the underlying gromacs tool is executed to extract the help string; this 121 # adds to the import time 122 # * adding documentation is awkward 123 # 124 # For more complicated cases one is probably better off by properly deriving a 125 # new class and set arguments explicitly in init (using kwargs['flag'] = 126 # default) ... or I can write some meta(??) class to do this nicely 127 128 from __future__ import with_statement 129 130 __docformat__ = "restructuredtext en" 131 132 import sys 133 import os 134 import re 135 import warnings 136 import tempfile 137 import shutil 138 import glob 139 140 import logging 141 logger = logging.getLogger('gromacs.cbook') 142 143 import gromacs 144 from gromacs import GromacsError, BadParameterWarning, MissingDataWarning, GromacsValueWarning 145 import tools 146 import utilities 147 from utilities import asiterable 148 149 trj_compact = tools.Trjconv(ur='compact', center=True, boxcenter='tric', pbc='mol', 150 input=('protein','system'), 151 doc=""" 152 Writes a compact representation of the system centered on the protein""") 153 154 rmsd_backbone = tools.G_rms(what='rmsd', fit='rot+trans', 155 input=('Backbone','Backbone'), 156 doc=""" 157 Computes RMSD of backbone after fitting to the backbone.""") 158 159 # Gromacs 4.x 160 trj_xyfitted = tools.Trjconv(fit='rotxy+transxy', 161 center=True, boxcenter='rect', pbc='whole', 162 input=('backbone', 'protein','system'), 163 doc=""" 164 Writes a trajectory centered and fitted to the protein in the XY-plane only. 165 166 This is useful for membrane proteins. The system *must* be oriented so 167 that the membrane is in the XY plane. The protein backbone is used 168 for the least square fit, centering is done for the whole protein. 169 170 .. Note:: Gromacs 4.x only""")

171 172 -def trj_fitandcenter(xy=False, **kwargs):

173 """Center everything and make a compact representation (pass 1) and fit the system to a reference (pass 2). 174 175 :Keywords: 176 *s* 177 input structure file (tpr file required to make molecule whole) 178 *f* 179 input trajectory 180 *o* 181 output trajectory 182 *input* 183 A list with three groups. The default is 184 ['backbone', 'protein','system'] 185 The fit command uses all three (1st for least square fit, 186 2nd for centering, 3rd for output), the centered/make-whole stage use 187 2nd for centering and 3rd for output. 188 *input1* 189 If *input1* is supplied then *input* is used exclusively 190 for the fitting stage (pass 2) and *input1* for the centering (pass 1). 191 *n* 192 Index file used for pass 1 and pass 2. 193 *n1* 194 If *n1* is supplied then index *n1* is only used for pass 1 195 (centering) and *n* for pass 2 (fitting). 196 *xy* : boolean 197 If ``True`` then only do a rot+trans fit in the xy plane 198 (good for membrane simulations); default is ``False``. 199 *kwargs* 200 All other arguments are passed to :class:`~gromacs.tools.Trjconv`. 201 202 Note that here we first center the protein and create a compact box, using 203 ``-pbc mol -ur compact -center -boxcenter tric`` and write an intermediate 204 xtc. Then in a second pass we perform a rotation+translation fit (or 205 restricted to the xy plane if *xy* = ``True`` is set) on the intermediate 206 xtc to produce the final trajectory. Doing it in this order has the 207 disadvantage that the solvent box is rotating around the protein but the 208 opposite order (with center/compact second) produces strange artifacts 209 where columns of solvent appear cut out from the box---it probably means 210 that after rotation the information for the periodic boundaries is not 211 correct any more. 212 213 Most kwargs are passed to both invocations of 214 :class:`gromacs.tools.Trjconv` so it does not really make sense to use eg 215 *skip*; in this case do things manually. 216 217 By default the *input* to the fit command is ('backbone', 218 'protein','system'); the compact command always uses the second and third 219 group for its purposes or if this fails, prompts the user. 220 221 Both steps cannot performed in one pass; this is a known limitation of 222 ``trjconv``. An intermediate temporary XTC files is generated which should 223 be automatically cleaned up unless bad things happened. 224 225 The function tries to honour the input/output formats. For instance, if you 226 want trr output you need to supply a trr file as input and explicitly give 227 the output file also a trr suffix. 228 229 .. Note:: For big trajectories it can **take a very long time** 230 and consume a **large amount of temporary diskspace**. 231 232 We follow the `g_spatial documentation`_ in preparing the trajectories:: 233 234 trjconv -s a.tpr -f a.xtc -o b.xtc -center tric -ur compact -pbc none 235 trjconv -s a.tpr -f b.xtc -o c.xtc -fit rot+trans 236 237 .. _`g_spatial documentation`: http://oldwiki.gromacs.org/index.php/Manual:g_spatial_4.0.3 238 """ 239 if xy: 240 fitmode = 'rotxy+transxy' 241 kwargs.pop('fit', None) 242 else: 243 fitmode = kwargs.pop('fit', 'rot+trans') # user can use progressive, too 244 245 intrj = kwargs.pop('f', None) 246 # get the correct suffix for the intermediate step: only trr will 247 # keep velocities/forces! 248 suffix = os.path.splitext(intrj)[1] 249 if not suffix in ('xtc', 'trr'): 250 suffix = '.xtc' 251 outtrj = kwargs.pop('o', None) 252 253 ndx = kwargs.pop('n', None) 254 ndxcompact = kwargs.pop('n1', ndx) 255 256 257 inpfit = kwargs.pop('input', ('backbone', 'protein','system')) 258 try: 259 _inpcompact = inpfit[1:] # use 2nd and 3rd group for compact 260 except TypeError: 261 _inpcompact = None 262 inpcompact = kwargs.pop('input1', _inpcompact) # ... or the user supplied ones 263 264 fd, tmptrj = tempfile.mkstemp(suffix=suffix, prefix='fitted_') 265 266 logger.info("Input trajectory: %(intrj)r\nOutput trajectory: %(outtrj)r"% vars()) 267 logger.info("... writing temporary trajectory %(tmptrj)r (will be auto-cleaned)." % vars()) 268 sys.stdout.flush() 269 try: 270 trj_compact(f=intrj, o=tmptrj, n=ndxcompact, input=inpcompact, **kwargs) 271 trj_xyfitted(f=tmptrj, o=outtrj, n=ndx, fit=fitmode, input=inpfit, **kwargs) 272 finally: 273 utilities.unlink_gmx(tmptrj)

274

275 -def cat(prefix="md", dirname=os.path.curdir, partsdir="parts", fulldir="full", 276 resolve_multi="pass"):

277 """Concatenate all parts of a simulation. 278 279 The xtc, trr, and edr files in *dirname* such as prefix.xtc, 280 prefix.part0002.xtc, prefix.part0003.xtc, ... are 281 282 1) moved to the *partsdir* (under *dirname*) 283 2) concatenated with the Gromacs tools to yield prefix.xtc, prefix.trr, 284 prefix.edr, prefix.gro (or prefix.md) in *dirname* 285 3) Store these trajectories in *fulldir* 286 287 .. Note:: Trajectory files are *never* deleted by this function to avoid 288 data loss in case of bugs. You will have to clean up yourself 289 by deleting *dirname*/*partsdir*. 290 291 Symlinks for the trajectories are *not* handled well and 292 break the function. Use hard links instead. 293 294 .. Warning:: If an exception occurs when running this function then make 295 doubly and triply sure where your files are before running 296 this function again; otherwise you might **overwrite data**. 297 Possibly you will need to manually move the files from *partsdir* 298 back into the working directory *dirname*; this should onlu overwrite 299 generated files so far but *check carefully*! 300 301 :Keywords: 302 *prefix* 303 deffnm of the trajectories [md] 304 *resolve_multi" 305 how to deal with multiple "final" gro or pdb files: normally there should 306 only be one but in case of restarting from the checkpoint of a finished 307 simulation one can end up with multiple identical ones. 308 - "pass" : do nothing and log a warning 309 - "guess" : take prefix.pdb or prefix.gro if it exists, otherwise the one of 310 prefix.partNNNN.gro|pdb with the highes NNNN 311 *dirname* 312 change to *dirname* and assume all tarjectories are located there [.] 313 *partsdir* 314 directory where to store the input files (they are moved out of the way); 315 *partsdir* must be manually deleted [parts] 316 *fulldir* 317 directory where to store the final results [full] 318 """ 319 320 gmxcat = {'xtc': gromacs.trjcat, 321 'trr': gromacs.trjcat, 322 'edr': gromacs.eneconv, 323 'log': utilities.cat, 324 } 325 326 def _cat(prefix, ext, partsdir=partsdir, fulldir=fulldir): 327 filenames = glob_parts(prefix, ext) 328 if ext.startswith('.'): 329 ext = ext[1:] 330 outfile = os.path.join(fulldir, prefix + '.' + ext) 331 if not filenames: 332 return None 333 nonempty_files = [] 334 for f in filenames: 335 if os.stat(f).st_size == 0: 336 logger.warn("File %(f)r is empty, skipping" % vars()) 337 continue 338 if os.path.islink(f): 339 # TODO: re-write the symlink to point to the original file 340 errmsg = "Symbolic links do not work (file %(f)r), sorry. " \ 341 "CHECK LOCATION OF FILES MANUALLY BEFORE RUNNING gromacs.cbook.cat() AGAIN!" % vars() 342 logger.exception(errmsg) 343 raise NotImplementedError(errmsg) 344 shutil.move(f, partsdir) 345 nonempty_files.append(f) 346 filepaths = [os.path.join(partsdir, f) for f in nonempty_files] 347 gmxcat[ext](f=filepaths, o=outfile) 348 return outfile

349 350 _resolve_options = ("pass", "guess") 351 if not resolve_multi in _resolve_options: 352 raise ValueError("resolve_multi must be one of %(_resolve_options)r, " 353 "not %(resolve_multi)r" % vars()) 354 355 if fulldir == os.path.curdir: 356 wmsg = "Using the current directory as fulldir can potentially lead to data loss if you run this function multiple times." 357 logger.warning(wmsg) 358 warnings.warn(wmsg, category=BadParameterWarning) 359 360 with utilities.in_dir(dirname, create=False): 361 utilities.mkdir_p(partsdir) 362 utilities.mkdir_p(fulldir) 363 for ext in ('log', 'edr', 'trr', 'xtc'): 364 logger.info("[%(dirname)s] concatenating %(ext)s files...", vars()) 365 outfile = _cat(prefix, ext, partsdir) 366 logger.info("[%(dirname)s] created %(outfile)r", vars()) 367 for ext in ('gro', 'pdb'): # XXX: ugly, make method out of parts? 368 filenames = glob_parts(prefix, ext) 369 if len(filenames) == 0: 370 continue # goto next ext 371 elif len(filenames) == 1: 372 pick = filenames[0] 373 else: 374 if resolve_multi == "pass": 375 logger.warning("[%(dirname)s] too many output structures %(filenames)r, " 376 "cannot decide which one --- resolve manually!", vars()) 377 for f in filenames: 378 shutil.move(f, partsdir) 379 continue # goto next ext 380 elif resolve_multi == "guess": 381 pick = prefix + '.' + ext 382 if not pick in filenames: 383 pick = filenames[-1] # filenames are ordered with highest parts at end 384 final = os.path.join(fulldir, prefix + '.' + ext) 385 shutil.copy(pick, final) # copy2 fails on nfs with Darwin at least 386 for f in filenames: 387 shutil.move(f, partsdir) 388 logger.info("[%(dirname)s] collected final structure %(final)r " 389 "(from %(pick)r)", vars()) 390 391 392 partsdirpath = utilities.realpath(dirname, partsdir) 393 logger.warn("[%(dirname)s] cat() complete in %(fulldir)r but original files " 394 "in %(partsdirpath)r must be manually removed", vars()) 395

396 -def glob_parts(prefix, ext):

397 """Find files from a continuation run""" 398 if ext.startswith('.'): 399 ext = ext[1:] 400 files = glob.glob(prefix+'.'+ext) + glob.glob(prefix+'.part[0-9][0-9][0-9][0-9].'+ext) 401 files.sort() # at least some rough sorting... 402 return files

403

404 405 -class Frames(object):

406 """A iterator that transparently provides frames from a trajectory. 407 408 The iterator chops a trajectory into individual frames for 409 analysis tools that only work on separate structures such as 410 ``gro`` or ``pdb`` files. Instead of turning the whole trajectory 411 immediately into pdb files (and potentially filling the disk), the 412 iterator can be instructed to only provide a fixed number of 413 frames and compute more frames when needed. 414 415 .. Note:: Setting a limit on the number of frames on disk can lead 416 to longish waiting times because ``trjconv`` must 417 re-seek to the middle of the trajectory and the only way 418 it can do this at the moment is by reading frames 419 sequentially. This might still be preferrable to filling 420 up a disk, though. 421 422 .. Warning:: The *maxframes* option is not implemented yet; use 423 the *dt* option or similar to keep the number of frames 424 manageable. 425 """ 426

427 - def __init__(self, structure, trj, maxframes=None, format='pdb', **kwargs):

428 """Set up the Frames iterator. 429 430 :Arguments: 431 structure 432 name of a structure file (tpr, pdb, ...) 433 trj 434 name of the trajectory (xtc, trr, ...) 435 format 436 output format for the frames, eg "pdb" or "gro" [pdb] 437 maxframes : int 438 maximum number of frames that are extracted to disk at 439 one time; set to ``None`` to extract the whole trajectory 440 at once. [``None``] 441 kwargs 442 All other arguments are passed to 443 `class:~gromacs.tools.Trjconv`; the only options that 444 cannot be changed are *sep* and the output file name *o*. 445 446 """ 447 self.structure = structure # tpr or equivalent 448 self.trj = trj # xtc, trr, ... 449 self.maxframes = maxframes 450 if not self.maxframes is None: 451 raise NotImplementedError('sorry, maxframes feature not implemented yet') 452 453 self.framedir = tempfile.mkdtemp(prefix="Frames_", suffix='_'+format) 454 self.frameprefix = os.path.join(self.framedir, 'frame') 455 self.frametemplate = self.frameprefix + '%d' + '.' + format # depends on trjconv 456 self.frameglob = self.frameprefix + '*' + '.' + format 457 kwargs['sep'] = True 458 kwargs['o'] = self.frameprefix + '.' + format 459 kwargs.setdefault('input', ('System',)) 460 self.extractor = tools.Trjconv(s=self.structure, f=self.trj, **kwargs) 461 462 #: Holds the current frame number of the currently extracted 463 #: batch of frames. Increases when iterating. 464 self.framenumber = 0 465 #: Total number of frames read so far; only important when *maxframes* > 0 is used. 466 self.totalframes = 0

467

468 - def extract(self):

469 """Extract frames from the trajectory to the temporary directory.""" 470 # XXX: extract everything at the moment, logic for maxframes not done yet 471 self.extractor.run()

472 473 @property

474 - def all_frames(self):

475 """Unordered list of all frames currently held on disk.""" 476 return glob.glob(self.frameglob)

477 478 @property

479 - def current_framename(self):

480 return self.frametemplate % self.framenumber

481

482 - def __iter__(self):

483 """Primitive iterator.""" 484 frames = self.all_frames 485 if len(frames) == 0: 486 self.extract() 487 frames = self.all_frames 488 489 # filenames are 'Frame0.pdb', 'Frame11.pdb', ... so I must 490 # order manually because glob does not give it in sequence. 491 for i in xrange(len(frames)): 492 self.framenumber = i 493 yield self.current_framename 494 self.totalframes += len(frames)

495

496 - def delete_frames(self):

497 """Delete all frames.""" 498 for frame in glob.glob(self.frameglob): 499 os.unlink(frame)

500

501 - def cleanup(self):

502 """Clean up all temporary frames (which can be HUGE).""" 503 shutil.rmtree(self.framedir) 504 self.framedir = None

505

506 - def __del__(self):

507 if not self.framedir is None: 508 self.cleanup()

509

510 # Working with topologies 511 # ----------------------- 512 513 -def grompp_qtot(*args, **kwargs):

514 """Run ``gromacs.grompp`` and return the total charge of the system. 515 516 :Arguments: 517 The arguments are the ones one would pass to :func:`gromacs.grompp`. 518 :Returns: 519 The total charge as reported 520 521 Some things to keep in mind: 522 523 * The stdout output of grompp is not shown. This can make debugging 524 pretty hard. Try running the normal :func:`gromacs.grompp` command and 525 analyze the output if the debugging messages are not sufficient. 526 * Check that ``qtot`` is correct; because the function is based on pattern 527 matching of the output it can break when the output format changes. 528 """ 529 530 # match ' System has non-zero total charge: -4.000001e+00', 531 qtot_pattern = re.compile('System has non-zero total charge: *(?P<qtot>[-+]?\d*\.\d+[eE][-+]\d+)') 532 kwargs['stdout'] = False 533 rc, output, junk = gromacs.grompp(*args, **kwargs) 534 qtot = 0 535 for line in output.split('\n'): 536 m = qtot_pattern.search(line) 537 if m: 538 qtot = float(m.group('qtot')) 539 break 540 logger.info("system total charge qtot = %(qtot)r" % vars()) 541 return qtot

542

543 -def _mdp_include_string(dirs):

544 """Generate a string that can be added to a mdp 'include = ' line.""" 545 include_paths = [os.path.expanduser(p) for p in dirs] 546 return ' -I'.join([''] + include_paths)

547

548 -def add_mdp_includes(topology=None, kwargs=None):

549 """Set the mdp *include* key in the *kwargs* dict. 550 551 1. Add the directory containing *topology*. 552 2. Add all directories appearing under the key *includes* 553 3. Generate a string of the form "-Idir1 -Idir2 ..." that 554 is stored under the key *include* (the corresponding 555 mdp parameter) 556 557 By default, the directories ``.`` and ``..`` are also added to the 558 *include* string for the mdp; when fed into 559 :func:`gromacs.cbook.edit_mdp` it will result in a line such as :: 560 561 include = -I. -I.. -I../topology_dir .... 562 563 Note that the user can always override the behaviour by setting 564 the *include* keyword herself; in this case this function does 565 nothing. 566 567 If no *kwargs* were supplied then a dict is generated with the 568 single *include* entry. 569 570 :Arguments: 571 *topology* : top filename 572 Topology file; the name of the enclosing directory is added 573 to the include path (if supplied) [``None``] 574 *kwargs* : dict 575 Optional dictionary of mdp keywords; will be modified in place. 576 If it contains the *includes* keyword with either a single string 577 or a list of strings then these paths will be added to the 578 include statement. 579 :Returns: *kwargs* with the *include* keyword added if it did not 580 exist previously; if the keyword already existed, nothing 581 happens. 582 583 .. Note:: The *kwargs* dict is **modified in place**. This 584 function is a bit of a hack. It might be removed once 585 all setup functions become methods in a nice class. 586 """ 587 if kwargs is None: 588 kwargs = {} 589 590 if not topology is None: 591 # half-hack: find additional itps in the same directory as the 592 # topology 593 topology_dir = os.path.dirname(topology) 594 include_dirs = ['.', '..', topology_dir] # should . & .. always be added? 595 596 include_dirs.extend(asiterable(kwargs.pop('includes', []))) # includes can be a list or a string 597 598 # 1. setdefault: we do nothing if user defined include 599 # 2. modify input in place! 600 kwargs.setdefault('include', _mdp_include_string(include_dirs)) 601 return kwargs

602

603 604 -def create_portable_topology(topol, struct, **kwargs):

605 """Create a processed topology. 606 607 The processed (or portable) topology file does not contain any 608 ``#include`` statements and hence can be easily copied around. It 609 also makes it possible to re-grompp without having any special itp 610 files available. 611 612 :Arguments: 613 *topol* 614 topology file 615 *struct* 616 coordinat (structure) file 617 618 :Keywords: 619 *processed* 620 name of the new topology file; if not set then it is named like 621 *topol* but with ``pp_`` prepended 622 *includes* 623 path or list of paths of directories in which itp files are 624 searched for 625 626 :Returns: full path to the processed trajectory 627 """ 628 _topoldir, _topol = os.path.split(topol) 629 processed = kwargs.pop('processed', os.path.join(_topoldir, 'pp_'+_topol)) 630 mdp_kwargs = add_mdp_includes(topol, kwargs) 631 with tempfile.NamedTemporaryFile(suffix='.mdp') as mdp: 632 mdp.write('; empty mdp file\ninclude = %(include)s\n' % mdp_kwargs) 633 mdp.flush() 634 try: 635 gromacs.grompp(v=False, f=mdp.name, p=topol, c=struct, pp=processed) 636 finally: 637 utilities.unlink_gmx('topol.tpr', 'mdout.mdp') 638 return utilities.realpath(processed)

639

640 -def get_volume(f):

641 """Return the volume in nm^3 of structure file *f*. 642 643 (Uses :func:`gromacs.editconf`; error handling is not good) 644 """ 645 fd, temp = tempfile.mkstemp('.gro') 646 try: 647 rc,out,err = gromacs.editconf(f=f, o=temp, stdout=False) 648 finally: 649 os.unlink(temp) 650 return [float(x.split()[1]) for x in out.splitlines() 651 if x.startswith('Volume:')][0]

652

653 654 # Editing textual input files 655 # --------------------------- 656 657 -def edit_mdp(mdp, new_mdp=None, extend_parameters=None, **substitutions):

658 """Change values in a Gromacs mdp file. 659 660 Parameters and values are supplied as substitutions, eg ``nsteps=1000``. 661 662 By default the template mdp file is **overwritten in place**. 663 664 If a parameter does not exist in the template then it cannot be substituted 665 and the parameter/value pair is returned. The user has to check the 666 returned list in order to make sure that everything worked as expected. At 667 the moment it is not possible to automatically append the new values to the 668 mdp file because of ambiguities when having to replace dashes in parameter 669 names with underscores (see the notes below on dashes/underscores). 670 671 If a parameter is set to the value ``None`` then it will be ignored. 672 673 :Arguments: 674 *mdp* : filename 675 filename of input (and output filename of ``new_mdp=None``) 676 *new_mdp* : filename 677 filename of alternative output mdp file [None] 678 *extend_parameters* : string or list of strings 679 single parameter or list of parameters for which the new values 680 should be appended to the existing value in the mdp file. This 681 makes mostly sense for a single parameter, namely 'include', which 682 is set as the default. Set to ``[]`` to disable. ['include'] 683 *substitutions* 684 parameter=value pairs, where parameter is defined by the Gromacs mdp file; 685 dashes in parameter names have to be replaced by underscores. 686 687 :Returns: 688 Dict of parameters that have *not* been substituted. 689 690 **Example** :: 691 692 edit_mdp('md.mdp', new_mdp='long_md.mdp', nsteps=100000, nstxtcout=1000, lincs_iter=2) 693 694 .. Note:: 695 696 * Dashes in Gromacs mdp parameters have to be replaced by an underscore 697 when supplied as python keyword arguments (a limitation of python). For example 698 the MDP syntax is ``lincs-iter = 4`` but the corresponding keyword would be 699 ``lincs_iter = 4``. 700 * If the keyword is set as a dict key, eg ``mdp_params['lincs-iter']=4`` then one 701 does not have to substitute. 702 * Parameters *aa_bb* and *aa-bb* are considered the same (although this should 703 not be a problem in practice because there are no mdp parameters that only 704 differ by a underscore). 705 * This code is more compact in ``Perl`` as one can use ``s///`` operators: 706 ``s/^(\s*${key}\s*=\s*).*/$1${val}/`` 707 708 .. SeeAlso:: One can also load the mdp file with 709 :class:`gromacs.formats.MDP`, edit the object (a dict), and save it again. 710 """ 711 if new_mdp is None: 712 new_mdp = mdp 713 if extend_parameters is None: 714 extend_parameters = ['include'] 715 else: 716 extend_parameters = list(asiterable(extend_parameters)) 717 718 # None parameters should be ignored (simple way to keep the template defaults) 719 substitutions = dict([(k,v) for k,v in substitutions.items() if not v is None]) 720 721 params = substitutions.keys()[:] # list will be reduced for each match 722 723 def demangled(p): 724 """Return a RE string that matches the parameter.""" 725 return p.replace('_', '[-_]') # must catch either - or _

726 727 patterns = dict([(parameter, 728 re.compile("""\ 729 (?P<assignment>\s*%s\s*=\s*) # parameter == everything before the value 730 (?P<value>[^;]*) # value (stop before comment=;) 731 (?P<comment>\s*;.*)? # optional comment 732 """ % demangled(parameter), re.VERBOSE)) 733 for parameter in substitutions]) 734 735 target = tempfile.TemporaryFile() 736 with open(mdp) as src: 737 logger.info("editing mdp = %r: %r" % (mdp, substitutions.keys())) 738 for line in src: 739 new_line = line.strip() # \n must be stripped to ensure that new line is built without break 740 for p in params[:]: 741 m = patterns[p].match(new_line) 742 if m: 743 # I am too stupid to replace a specific region in the string so I rebuild it 744 # (matching a line and then replacing value requires TWO re calls) 745 #print 'line:' + new_line 746 #print m.groupdict() 747 if m.group('comment') is None: 748 comment = '' 749 else: 750 comment = " "+m.group('comment') 751 assignment = m.group('assignment') 752 if not assignment.endswith(' '): 753 assignment += ' ' 754 # build new line piece-wise: 755 new_line = assignment 756 if p in extend_parameters: 757 # keep original value and add new stuff at end 758 new_line += str(m.group('value')) + ' ' 759 new_line += str(substitutions[p]) + comment 760 params.remove(p) 761 break 762 target.write(new_line+'\n') 763 target.seek(0) 764 # XXX: Is there a danger of corrupting the original mdp if something went wrong? 765 with open(new_mdp, 'w') as final: 766 shutil.copyfileobj(target, final) 767 target.close() 768 # return all parameters that have NOT been substituted 769 if len(params) > 0: 770 logger.warn("Not substituted in %(new_mdp)r: %(params)r" % vars()) 771 return dict([(p, substitutions[p]) for p in params]) 772

773 -def edit_txt(filename, substitutions, newname=None):

774 """Primitive text file stream editor. 775 776 This function can be used to edit free-form text files such as the 777 topology file. By default it does an **in-place edit** of 778 *filename*. If *newname* is supplied then the edited 779 file is written to *newname*. 780 781 :Arguments: 782 *filename* 783 input text file 784 *substitutions* 785 substitution commands (see below for format) 786 *newname* 787 output filename; if ``None`` then *filename* is changed in 788 place [``None``] 789 790 *substitutions* is a list of triplets; the first two elements are regular 791 expression strings, the last is the substitution value. It mimics 792 ``sed`` search and replace. The rules for *substitutions*: 793 794 .. productionlist:: 795 substitutions: "[" search_replace_tuple, ... "]" 796 search_replace_tuple: "(" line_match_RE "," search_RE "," replacement ")" 797 line_match_RE: regular expression that selects the line (uses match) 798 search_RE: regular expression that is searched in the line 799 replacement: replacement string for search_RE 800 801 Running :func:`edit_txt` does pretty much what a simple :: 802 803 sed /line_match_RE/s/search_RE/replacement/ 804 805 with repeated substitution commands does. 806 807 Special replacement values: 808 - ``None``: the rule is ignored 809 - ``False``: the line is deleted (even if other rules match) 810 811 .. note:: 812 813 * No sanity checks are performed and the substitutions must be supplied 814 exactly as shown. 815 * All substitutions are applied to a line; thus the order of the substitution 816 commands may matter when one substitution generates a match for a subsequent rule. 817 * If replacement is set to ``None`` then the whole expression is ignored and 818 whatever is in the template is used. To unset values you must provided an 819 empty string or similar. 820 * Delete a matching line if replacement=``False``. 821 """ 822 if newname is None: 823 newname = filename 824 825 # No sanity checks (figure out later how to give decent diagnostics). 826 # Filter out any rules that have None in replacement. 827 _substitutions = [{'lRE': re.compile(str(lRE)), 828 'sRE': re.compile(str(sRE)), 829 'repl': repl} 830 for lRE,sRE,repl in substitutions if not repl is None] 831 832 target = tempfile.TemporaryFile() 833 with open(filename) as src: 834 logger.info("editing txt = %r (%d substitutions)" % (filename, len(substitutions))) 835 for line in src: 836 keep_line = True 837 for subst in _substitutions: 838 m = subst['lRE'].match(line) 839 if m: # apply substition to this line? 840 logger.debug('match: '+line.rstrip()) 841 if subst['repl'] is False: # special rule: delete line 842 keep_line = False 843 else: # standard replacement 844 line = subst['sRE'].sub(str(subst['repl']), line) 845 logger.debug('replaced: '+line.rstrip()) 846 if keep_line: 847 target.write(line) 848 else: 849 logger.debug("Deleting line %r", line) 850 851 target.seek(0L) 852 with open(newname, 'w') as final: 853 shutil.copyfileobj(target, final) 854 target.close() 855 logger.info("edited txt = %(newname)r" % vars())

856 857 858 # Working with index files and index groups 859 # ----------------------------------------- 860 # 861 # TODO: move this mess into a separate module (for 0.2); 862 # Once Gromacs 4.1 comes around it will be outdated/useless 863 # anyway. 864 865 #: compiled regular expression to match a list of index groups 866 #: in the output of ``make_ndx``s <Enter> (empty) command. 867 NDXLIST = re.compile(r""">\s+\n # '> ' marker line from '' input (input not echoed) 868 \n # empty line 869 (?P<LIST> # list of groups 870 ( # consists of repeats of the same pattern: 871 \s*\d+ # group number 872 \s+[^\s]+\s*: # group name, separator ':' 873 \s*\d+\satoms # number of atoms in group 874 \n 875 )+ # multiple repeats 876 )""", re.VERBOSE) 877 #: compiled regular expression to match a single line of 878 #: ``make_ndx`` output (e.g. after a successful group creation) 879 NDXGROUP = re.compile(r""" 880 \s*(?P<GROUPNUMBER>\d+) # group number 881 \s+(?P<GROUPNAME>[^\s]+)\s*: # group name, separator ':' 882 \s*(?P<NATOMS>\d+)\satoms # number of atoms in group 883 """, re.VERBOSE)

884 885 -def make_ndx_captured(**kwargs):

886 """make_ndx that captures all output 887 888 Standard :func:`~gromacs.make_ndx` command with the input and 889 output pre-set in such a way that it can be conveniently used for 890 :func:`parse_ndxlist`. 891 892 Example:: 893 ndx_groups = parse_ndxlist(make_ndx_captured(n=ndx)[0]) 894 895 Note that the convenient :func:`get_ndx_groups` function does exactly 896 that and can probably used in most cases. 897 898 :Arguments: 899 keywords are passed on to :func:`~gromacs.make_ndx` 900 :Returns: 901 (*returncode*, *output*, ``None``) 902 """ 903 kwargs['stdout']=False # required for proper output as described in doc 904 kwargs['stderr']=True # ... 905 user_input = kwargs.pop('input',[]) 906 user_input = [cmd for cmd in user_input if cmd != 'q'] # filter any quit 907 kwargs['input'] = user_input + ['', 'q'] # necessary commands 908 return gromacs.make_ndx(**kwargs)

909

910 -def get_ndx_groups(ndx, **kwargs):

911 """Return a list of index groups in the index file *ndx*. 912 913 :Arguments: 914 - *ndx* is a Gromacs index file. 915 - kwargs are passed to :func:`make_ndx_captured`. 916 917 :Returns: 918 list of groups as supplied by :func:`parse_ndxlist` 919 920 Alternatively, load the index file with 921 :class:`gromacs.formats.NDX` for full control. 922 """ 923 fd, tmp_ndx = tempfile.mkstemp(suffix='.ndx') 924 kwargs['o'] = tmp_ndx 925 try: 926 g = parse_ndxlist(make_ndx_captured(n=ndx, **kwargs)[1]) 927 finally: 928 utilities.unlink_gmx(tmp_ndx) 929 return g

930

931 -def parse_ndxlist(output):

932 """Parse output from make_ndx to build list of index groups:: 933 934 groups = parse_ndxlist(output) 935 936 output should be the standard output from ``make_ndx``, e.g.:: 937 938 rc,output,junk = gromacs.make_ndx(..., input=('', 'q'), stdout=False, stderr=True) 939 940 (or simply use 941 942 rc,output,junk = cbook.make_ndx_captured(...) 943 944 which presets input, stdout and stderr; of course input can be overriden.) 945 946 :Returns: 947 The function returns a list of dicts (``groups``) with fields 948 949 name 950 name of the groups 951 nr 952 number of the group (starts at 0) 953 natoms 954 number of atoms in the group 955 956 """ 957 958 m = NDXLIST.search(output) # make sure we pick up a proper full list 959 grouplist = m.group('LIST') 960 return parse_groups(grouplist)

961

962 -def parse_groups(output):

963 """Parse ``make_ndx`` output and return groups as a list of dicts.""" 964 groups = [] 965 for line in output.split('\n'): 966 m = NDXGROUP.match(line) 967 if m: 968 d = m.groupdict() 969 groups.append({'name': d['GROUPNAME'], 970 'nr': int(d['GROUPNUMBER']), 971 'natoms': int(d['NATOMS'])}) 972 return groups

973

974 -class IndexBuilder(object):

975 """Build an index file with specified groups and the combined group. 976 977 This is *not* a full blown selection parser a la Charmm, VMD or 978 MDAnalysis but a very quick hack. 979 980 **Example** 981 982 How to use the :class:`IndexBuilder`:: 983 984 G = gromacs.cbook.IndexBuilder('md_posres.pdb', 985 ['S312:OG','T313:OG1','A38:O','A309:O','@a62549 & r NA'], 986 offset=-9, out_ndx='selection.ndx') 987 groupname, ndx = G.combine() 988 del G 989 990 The residue numbers are given with their canonical resids from the 991 sequence or pdb. *offset=-9* says that one calculates Gromacs topology 992 resids by subtracting 9 from the canonical resid. 993 994 The combined selection is ``OR`` ed by default and written to 995 *selection.ndx*. One can also add all the groups in the initial *ndx* 996 file (or the :program:`make_ndx` default groups) to the output (see the 997 *defaultgroups* keyword for :meth:`IndexBuilder.combine`). 998 999 Generating an index file always requires calling 1000 :meth:`~IndexBuilder.combine` even if there is only a single group. 1001 1002 Deleting the class removes all temporary files associated with it (see 1003 :attr:`IndexBuilder.indexfiles`). 1004 1005 :Raises: 1006 If an empty group is detected (which does not always work) then a 1007 :exc:`gromacs.BadParameterWarning` is issued. 1008 1009 :Bugs: 1010 If ``make_ndx`` crashes with an unexpected error then this is fairly hard to 1011 diagnose. For instance, in certain cases it segmentation faults when a tpr 1012 is provided as a *struct* file and the resulting error messages becomes :: 1013 1014 GromacsError: [Errno -11] Gromacs tool failed 1015 Command invocation: make_ndx -o /tmp/tmp_Na1__NK7cT3.ndx -f md_posres.tpr 1016 1017 In this case run the command invocation manually to see what the problem 1018 could be. 1019 1020 1021 .. SeeAlso:: In some cases it might be more straightforward to use 1022 :class:`gromacs.formats.NDX`. 1023 1024 """ 1025

1026 - def __init__(self, struct=None, selections=None, names=None, name_all=None, 1027 ndx=None, out_ndx="selection.ndx", offset=0):

1028 """Build a index group from the selection arguments. 1029 1030 If selections and a structure file are supplied then the individual 1031 selections are constructed with separate calls to 1032 :func:`gromacs.make_ndx`. Use :meth:`IndexBuilder.combine` to combine 1033 them into a joint selection. 1034 1035 :Arguments: 1036 1037 struct : filename 1038 Structure file (tpr, pdb, ...) 1039 1040 selections : list 1041 The list must contain strings or tuples, which must be be one of 1042 the following constructs: 1043 1044 "<1-letter aa code><resid>[:<atom name]" 1045 1046 Selects the CA of the residue or the specified atom 1047 name. 1048 1049 example: ``"S312:OA"`` or ``"A22"`` (equivalent to ``"A22:CA"``) 1050 1051 ("<1-letter aa code><resid>", "<1-letter aa code><resid>, ["<atom name>"]) 1052 1053 Selects a *range* of residues. If only two residue 1054 identifiers are provided then all atoms are 1055 selected. With an optional third atom identifier, 1056 only this atom anme is selected for each residue 1057 in the range. [EXPERIMENTAL] 1058 1059 "@<make_ndx selection>" 1060 1061 The ``@`` letter introduces a verbatim ``make_ndx`` 1062 command. It will apply the given selection without any 1063 further processing or checks. 1064 1065 example: ``"@a 6234 - 6238"`` or ``'@"SOL"'`` (note the quoting) 1066 or ``"@r SER & r 312 & t OA"``. 1067 1068 names : list 1069 Strings to name the selections; if not supplied or if individuals 1070 are ``None`` then a default name is created. 1071 1072 offset : int, dict 1073 This number is added to the resids in the first selection scheme; this 1074 allows names to be the same as in a crystal structure. If offset is a 1075 dict then it is used to directly look up the resids. 1076 1077 ndx : filename or list of filenames 1078 Optional input index file(s). 1079 1080 out_ndx : filename 1081 Output index file. 1082 1083 """ 1084 self.structure = struct 1085 self.ndx = ndx 1086 self.output = out_ndx 1087 self.name_all = name_all 1088 #: *offset* as a number is added to the resids in the first selection 1089 #: scheme; this 1090 #: allows names to be the same as in a crystal structure. If *offset* is a 1091 #: dict then it is used to directly look up the resids. Use :meth:`gmx_resid` 1092 #: to transform a crystal resid to a gromacs resid. 1093 #: 1094 #: The attribute may be changed directly after init. 1095 self.offset = offset 1096 1097 #: Auto-labelled groups use this counter. 1098 self._command_counter = 0 1099 1100 if selections is None: 1101 selections = [] 1102 if not utilities.iterable(selections): 1103 selections = [selections] 1104 self.selections = selections 1105 if names is None: 1106 names = [None] * len(selections) 1107 1108 #: Specialized ``make_ndx`` that always uses same structure 1109 #: and redirection (can be overridden) 1110 self.make_ndx = tools.Make_ndx(f=self.structure, n=self.ndx, 1111 stdout=False, stderr=False) 1112 1113 #: dict, keyed by group name and pointing to index file for group 1114 #: (Groups are built in separate files because that is more robust 1115 #: as I can clear groups easily.) 1116 self.indexfiles = dict([self.parse_selection(selection, name) 1117 for selection, name in zip(selections, names)])

1118 1119 @property

1120 - def names(self):

1121 """Names of all generated index groups.""" 1122 return self.indexfiles.keys()

1123

1124 - def gmx_resid(self, resid):

1125 """Returns resid in the Gromacs index by transforming with offset.""" 1126 try: 1127 gmx_resid = int(self.offset[resid]) 1128 except (TypeError, IndexError): 1129 gmx_resid = resid + self.offset 1130 except KeyError: 1131 raise KeyError("offset must be a dict that contains the gmx resid for %d" % resid) 1132 return gmx_resid

1133

1134 - def combine(self, name_all=None, out_ndx=None, operation='|', defaultgroups=False):

1135 """Combine individual groups into a single one and write output. 1136 1137 :Keywords: 1138 name_all : string 1139 Name of the combined group, ``None`` generates a name. [``None``] 1140 out_ndx : filename 1141 Name of the output file that will contain the individual groups 1142 and the combined group. If ``None`` then default from the class 1143 constructor is used. [``None``] 1144 operation : character 1145 Logical operation that is used to generate the combined group from 1146 the individual groups: "|" (OR) or "&" (AND) ["|"] 1147 defaultgroups : bool 1148 ``True``: append everything to the default groups produced by 1149 :program:`make_ndx` (or rather, the groups provided in the ndx file on 1150 initialization --- if this was ``None`` then these are truly default groups); 1151 ``False``: only use the generated groups 1152 1153 :Returns: 1154 ``(combinedgroup_name, output_ndx)``, a tuple showing the 1155 actual group name and the name of the file; useful when all names are autogenerated. 1156 1157 .. Warning:: The order of the atom numbers in the combined group is 1158 *not* guaranteed to be the same as the selections on input because 1159 ``make_ndx`` sorts them ascending. Thus you should be careful when 1160 using these index files for calculations of angles and dihedrals. 1161 Use :class:`gromacs.formats.NDX` in these cases. 1162 """ 1163 if name_all is None: 1164 name_all = self.name_all or operation.join(self.indexfiles) 1165 if not operation in ('|', '&'): 1166 raise ValueError("Illegal operation %r, only '|' (OR) and '&' (AND) allowed." % 1167 operation) 1168 if out_ndx is None: 1169 out_ndx = self.output 1170 1171 if defaultgroups: 1172 # make a default file (using the original ndx where provided!!) 1173 fd, default_ndx = tempfile.mkstemp(suffix='.ndx', prefix='default__') 1174 try: 1175 self.make_ndx(o=default_ndx, input=['q']) 1176 except: 1177 utilities.unlink_gmx(default_ndx) 1178 raise 1179 ndxfiles = [default_ndx] 1180 else: 1181 ndxfiles = [] 1182 1183 ndxfiles.extend(self.indexfiles.values()) 1184 1185 # combine multiple selections and name them 1186 try: 1187 fd, tmp_ndx = tempfile.mkstemp(suffix='.ndx', prefix='combined__') 1188 # combine all selections by loading ALL temporary index files 1189 operation = ' '+operation.strip()+' ' 1190 cmd = [operation.join(['"%s"' % gname for gname in self.indexfiles]), 1191 '', 'q'] 1192 rc,out,err = self.make_ndx(n=ndxfiles, o=tmp_ndx, input=cmd) 1193 if self._is_empty_group(out): 1194 warnings.warn("No atoms found for %(cmd)r" % vars(), 1195 category=BadParameterWarning) 1196 1197 # second pass for naming, sigh (or: use NDX ?) 1198 groups = parse_ndxlist(out) 1199 last = groups[-1] 1200 # name this group 1201 name_cmd = ["name %d %s" % (last['nr'], name_all), 1202 'q'] 1203 rc,out,err = self.make_ndx(n=tmp_ndx, o=out_ndx, input=name_cmd) 1204 # For debugging, look at out and err or set stdout=True, stderr=True 1205 # TODO: check out if at least 1 atom selected 1206 ##print "DEBUG: combine()" 1207 ##print out 1208 finally: 1209 utilities.unlink_gmx(tmp_ndx) 1210 if defaultgroups: 1211 utilities.unlink_gmx(default_ndx) 1212 1213 return name_all, out_ndx

1214

1215 - def cat(self, out_ndx=None):

1216 """Concatenate input index files. 1217 1218 Generate a new index file that contains the default Gromacs index 1219 groups (if a structure file was defined) and all index groups from the 1220 input index files. 1221 1222 :Arguments: 1223 out_ndx : filename 1224 Name of the output index file; if ``None`` then use the default 1225 provided to the constructore. [``None``]. 1226 """ 1227 if out_ndx is None: 1228 out_ndx = self.output 1229 self.make_ndx(o=out_ndx, input=['q']) 1230 return out_ndx

1231

1232 - def parse_selection(self, selection, name=None):

1233 """Retuns (groupname, filename) with index group.""" 1234 1235 if type(selection) is tuple: 1236 # range 1237 process = self._process_range 1238 elif selection.startswith('@'): 1239 # verbatim make_ndx command 1240 process = self._process_command 1241 selection = selection[1:] 1242 else: 1243 process = self._process_residue 1244 return process(selection, name)

1245

1246 - def _process_command(self, command, name=None):

1247 """Process ``make_ndx`` command and return name and temp index file.""" 1248 1249 self._command_counter += 1 1250 if name is None: 1251 name = "CMD%03d" % self._command_counter 1252 1253 # Need to build it with two make_ndx calls because I cannot reliably 1254 # name the new group without knowing its number. 1255 try: 1256 fd, tmp_ndx = tempfile.mkstemp(suffix='.ndx', prefix='tmp_'+name+'__') 1257 cmd = [command, '', 'q'] # empty command '' necessary to get list 1258 # This sometimes fails with 'OSError: Broken Pipe' --- hard to debug 1259 rc,out,err = self.make_ndx(o=tmp_ndx, input=cmd) 1260 self.check_output(out, "No atoms found for selection %(command)r." % vars(), err=err) 1261 # For debugging, look at out and err or set stdout=True, stderr=True 1262 # TODO: check ' 0 r_300_&_ALA_&_O : 1 atoms' has at least 1 atom 1263 ##print "DEBUG: _process_command()" 1264 ##print out 1265 groups = parse_ndxlist(out) 1266 last = groups[-1] 1267 # reduce and name this group 1268 fd, ndx = tempfile.mkstemp(suffix='.ndx', prefix=name+'__') 1269 name_cmd = ["keep %d" % last['nr'], 1270 "name 0 %s" % name, 'q'] 1271 rc,out,err = self.make_ndx(n=tmp_ndx, o=ndx, input=name_cmd) 1272 finally: 1273 utilities.unlink_gmx(tmp_ndx) 1274 1275 return name, ndx

1276 1277 #: regular expression to match and parse a residue-atom selection 1278 RESIDUE = re.compile(""" 1279 (?P<aa>([ACDEFGHIKLMNPQRSTVWY]) # 1-letter amino acid 1280 | # or 1281 (\S\S\S) # 3-letter residue name 1282 ) 1283 (?P<resid>\d+) # resid 1284 (: # separator ':' 1285 (?P<atom>\w+) # atom name 1286 )? # possibly one 1287 """, re.VERBOSE) 1288

1289 - def _process_residue(self, selection, name=None):

1290 """Process residue/atom selection and return name and temp index file.""" 1291 1292 if name is None: 1293 name = selection.replace(':', '_') 1294 1295 # XXX: use _translate_residue() .... 1296 m = self.RESIDUE.match(selection) 1297 if not m: 1298 raise ValueError("Selection %(selection)r is not valid." % vars()) 1299 1300 gmx_resid = self.gmx_resid(int(m.group('resid'))) 1301 residue = m.group('aa') 1302 if len(residue) == 1: 1303 gmx_resname = utilities.convert_aa_code(residue) # only works for AA 1304 else: 1305 gmx_resname = residue # use 3-letter for any resname 1306 gmx_atomname = m.group('atom') 1307 if gmx_atomname is None: 1308 gmx_atomname = 'CA' 1309 1310 #: select residue <gmx_resname><gmx_resid> atom <gmx_atomname> 1311 _selection = 'r %(gmx_resid)d & r %(gmx_resname)s & a %(gmx_atomname)s' % vars() 1312 cmd = ['keep 0', 'del 0', 1313 _selection, 1314 'name 0 %(name)s' % vars(), 1315 'q'] 1316 fd, ndx = tempfile.mkstemp(suffix='.ndx', prefix=name+'__') 1317 rc,out,err = self.make_ndx(n=self.ndx, o=ndx, input=cmd) 1318 self.check_output(out, "No atoms found for " 1319 "%(selection)r --> %(_selection)r" % vars()) 1320 # For debugging, look at out and err or set stdout=True, stderr=True 1321 ##print "DEBUG: _process_residue()" 1322 ##print out 1323 1324 return name, ndx

1325

1326 - def _process_range(self, selection, name=None):

1327 """Process a range selection. 1328 1329 ("S234", "A300", "CA") --> selected all CA in this range 1330 ("S234", "A300") --> selected all atoms in this range 1331 1332 .. Note:: Ignores residue type, only cares about the resid (but still required) 1333 """ 1334 1335 try: 1336 first, last, gmx_atomname = selection 1337 except ValueError: 1338 try: 1339 first, last = selection 1340 gmx_atomname = '*' 1341 except: 1342 logger.error("%r is not a valid range selection", selection) 1343 raise 1344 if name is None: 1345 name = "%(first)s-%(last)s_%(gmx_atomname)s" % vars() 1346 1347 _first = self._translate_residue(first, default_atomname=gmx_atomname) 1348 _last = self._translate_residue(last, default_atomname=gmx_atomname) 1349 1350 _selection = 'r %d - %d & & a %s' % (_first['resid'], _last['resid'], gmx_atomname) 1351 cmd = ['keep 0', 'del 0', 1352 _selection, 1353 'name 0 %(name)s' % vars(), 1354 'q'] 1355 fd, ndx = tempfile.mkstemp(suffix='.ndx', prefix=name+'__') 1356 rc,out,err = self.make_ndx(n=self.ndx, o=ndx, input=cmd) 1357 self.check_output(out, "No atoms found for " 1358 "%(selection)r --> %(_selection)r" % vars()) 1359 # For debugging, look at out and err or set stdout=True, stderr=True 1360 ##print "DEBUG: _process_residue()" 1361 ##print out 1362 1363 return name, ndx

1364 1365

1366 - def _translate_residue(self, selection, default_atomname='CA'):

1367 """Translate selection for a single res to make_ndx syntax.""" 1368 m = self.RESIDUE.match(selection) 1369 if not m: 1370 errmsg = "Selection %(selection)r is not valid." % vars() 1371 logger.error(errmsg) 1372 raise ValueError(errmsg) 1373 1374 gmx_resid = self.gmx_resid(int(m.group('resid'))) # magic offset correction 1375 residue = m.group('aa') 1376 if len(residue) == 1: 1377 gmx_resname = utilities.convert_aa_code(residue) # only works for AA 1378 else: 1379 gmx_resname = residue # use 3-letter for any resname 1380 1381 gmx_atomname = m.group('atom') 1382 if gmx_atomname is None: 1383 gmx_atomname = default_atomname 1384 1385 return {'resname':gmx_resname, 'resid':gmx_resid, 'atomname':gmx_atomname}

1386 1387 1388

1389 - def check_output(self, make_ndx_output, message=None, err=None):

1390 """Simple tests to flag problems with a ``make_ndx`` run.""" 1391 if message is None: 1392 message = "" 1393 else: 1394 message = '\n' + message 1395 def format(output, w=60): 1396 hrule = "====[ GromacsError (diagnostic output) ]".ljust(w,"=") 1397 return hrule + '\n' + str(output) + hrule

1398 1399 rc = True 1400 if self._is_empty_group(make_ndx_output): 1401 warnings.warn("Selection produced empty group.%(message)s" 1402 % vars(), category=GromacsValueWarning) 1403 rc = False 1404 if self._has_syntax_error(make_ndx_output): 1405 rc = False 1406 out_formatted = format(make_ndx_output) 1407 raise GromacsError("make_ndx encountered a Syntax Error, " 1408 "%(message)s\noutput:\n%(out_formatted)s" % vars()) 1409 if make_ndx_output.strip() == "": 1410 rc = False 1411 out_formatted = format(err) 1412 raise GromacsError("make_ndx produced no output, " 1413 "%(message)s\nerror output:\n%(out_formatted)s" % vars()) 1414 return rc

1415

1416 - def _is_empty_group(self, make_ndx_output):

1417 m = re.search('Group is empty', make_ndx_output) 1418 return not (m is None)

1419

1420 - def _has_syntax_error(self, make_ndx_output):

1421 m = re.search('Syntax error:', make_ndx_output) 1422 return not (m is None)

1423 1424

1425 - def __del__(self):

1426 try: 1427 for path in self.indexfiles.values(): 1428 utilities.unlink_gmx(path) 1429 # Removes auto-backup files, too (which we have because mkstemp creates 1430 # an empty file and make_ndx backs that up). 1431 except (AttributeError, OSError): 1432 # all exceptions are ignored inside __del__ anyway but these 1433 # two we do not even want to be noticed off: 1434 # AttributeError: when reloading the module, OSError: when file disappeared 1435 pass

1436

1437 1438 -class Transformer(utilities.FileUtils):

1439 """Class to handle transformations of trajectories. 1440 1441 1. Center, compact, and fit to reference structure in tpr 1442 (optionally, only center in the xy plane): :meth:`~Transformer.center_fit` 1443 2. Write compact xtc and tpr with water removed: :meth:`~Transformer.strip_water` 1444 3. Write compact xtc and tpr only with protein: :meth:`~Transformer.keep_protein_only` 1445 1446 """ 1447

1448 - def __init__(self, s="topol.tpr", f="traj.xtc", n=None, force=None, dirname=os.path.curdir):

1449 """Set up Transformer with structure and trajectory. 1450 1451 Supply *n* = tpr, *f* = xtc (and *n* = ndx) relative to dirname. 1452 1453 :Keywords: 1454 *s* 1455 tpr file (or similar); note that this should not contain 1456 position restraints if it is to be used with a reduced 1457 system (see :meth:`~Transformer.strip_water`) 1458 *f* 1459 trajectory (xtc, trr, ...) 1460 *n* 1461 index file (it is typically safe to leave this as ``None``; in 1462 cases where a trajectory needs to be centered on non-standard 1463 groups this should contain those groups) 1464 *force* 1465 Set the default behaviour for handling existing files: 1466 - ``True``: overwrite existing trajectories 1467 - ``False``: throw a IOError exception 1468 - ``None``: skip existing and log a warning [default] 1469 """ 1470 1471 self.tpr = self.filename(s, ext="tpr", use_my_ext=True) 1472 self.xtc = self.filename(f, ext="xtc", use_my_ext=True) 1473 self.ndx = n 1474 self.dirname = dirname 1475 self.force = force 1476 self.nowater = {} # data for trajectory stripped from water 1477 self.proteinonly = {} # data for a protein-only trajectory 1478 1479 with utilities.in_dir(self.dirname, create=False): 1480 for f in (self.tpr, self.xtc, self.ndx): 1481 if f is None: 1482 continue 1483 if not os.path.exists(f): 1484 msg = "Possible problem: File %(f)r not found in %(dirname)r." % vars() 1485 warnings.warn(msg, category=MissingDataWarning) 1486 logger.warn(msg)

1487

1488 - def rp(self, *args):

1489 """Return canonical path to file under *dirname* with components *args* 1490 1491 If *args* form an absolute path then just return it as the absolute path. 1492 """ 1493 try: 1494 p = os.path.join(*args) 1495 if os.path.isabs(p): 1496 return p 1497 except TypeError: 1498 pass 1499 return utilities.realpath(self.dirname, *args)

1500

1501 - def center_fit(self, **kwargs):

1502 """Write compact xtc that is fitted to the tpr reference structure. 1503 1504 See :func:gromacs.cbook.trj_fitandcenter` for details and 1505 description of *kwargs*. The most important ones are listed 1506 here but in most cases the defaults should work. 1507 1508 :Keywords: 1509 *s* 1510 Input structure (typically the default tpr file but can be set to 1511 some other file with a different conformation for fitting) 1512 *n* 1513 Alternative index file. 1514 *o* 1515 Name of the output trajectory. 1516 *xy* : Boolean 1517 If ``True`` then only fit in xy-plane (useful for a membrane normal 1518 to z). The default is ``False``. 1519 *force* 1520 - ``True``: overwrite existing trajectories 1521 - ``False``: throw a IOError exception 1522 - ``None``: skip existing and log a warning [default] 1523 1524 :Returns: 1525 dictionary with keys *tpr*, *xtc*, which are the names of the 1526 the new files 1527 """ 1528 kwargs.setdefault('s', self.tpr) 1529 kwargs.setdefault('n', self.ndx) 1530 kwargs['f'] = self.xtc 1531 kwargs.setdefault('o', self.infix_filename(None, self.xtc, '_centfit', 'xtc')) 1532 force = kwargs.pop('force', self.force) 1533 1534 logger.info("Centering and fitting trajectory %(f)r..." % kwargs) 1535 with utilities.in_dir(self.dirname): 1536 if not self.check_file_exists(kwargs['o'], resolve="indicate", force=force): 1537 trj_fitandcenter(**kwargs) 1538 logger.info("Centered and fit trajectory: %(o)r." % kwargs) 1539 return {'tpr': self.rp(kwargs['s']), 'xtc': self.rp(kwargs['o'])}

1540

1541 - def fit(self, xy=False, **kwargs):

1542 """Write xtc that is fitted to the tpr reference structure. 1543 1544 See :func:`gromacs.cbook.trj_xyfitted` for details and 1545 description of *kwargs*. The most important ones are listed 1546 here but in most cases the defaults should work. 1547 1548 :Keywords: 1549 *s* 1550 Input structure (typically the default tpr file but can be set to 1551 some other file with a different conformation for fitting) 1552 *n* 1553 Alternative index file. 1554 *o* 1555 Name of the output trajectory. A default name is created. 1556 If e.g. *dt* = 100 is one of the *kwargs* then the default name includes 1557 "_dt100ps". 1558 *xy* : boolean 1559 If ``True`` then only do a rot+trans fit in the xy plane 1560 (good for membrane simulations); default is ``False``. 1561 *force* 1562 ``True``: overwrite existing trajectories 1563 ``False``: throw a IOError exception 1564 ``None``: skip existing and log a warning [default] 1565 *kwargs* 1566 kwargs are passed to :func:`~gromacs.cbook.trj_xyfitted` 1567 1568 :Returns: 1569 dictionary with keys *tpr*, *xtc*, which are the names of the 1570 the new files 1571 """ 1572 kwargs.setdefault('s', self.tpr) 1573 kwargs.setdefault('n', self.ndx) 1574 kwargs['f'] = self.xtc 1575 force = kwargs.pop('force', self.force) 1576 if xy: 1577 fitmode = 'rotxy+transxy' 1578 kwargs.pop('fit', None) 1579 infix_default = '_fitxy' 1580 else: 1581 fitmode = kwargs.pop('fit', 'rot+trans') # user can use 'progressive', too 1582 infix_default = '_fit' 1583 1584 dt = kwargs.get('dt', None) 1585 if dt: 1586 infix_default += '_dt%dps' % int(dt) # dt in ps 1587 1588 kwargs.setdefault('o', self.infix_filename(None, self.xtc, infix_default, 'xtc')) 1589 1590 logger.info("Fitting trajectory %r to with xy=%r...", kwargs['f'], xy) 1591 with utilities.in_dir(self.dirname): 1592 if self.check_file_exists(kwargs['o'], resolve="indicate", force=force): 1593 logger.warn("File %r exists; force regenerating it with force=True.", kwargs['o']) 1594 else: 1595 trj_xyfitted(fit=fitmode, **kwargs) 1596 logger.info("Fitted trajectory (fitmode=%s): %r.", fitmode, kwargs['o']) 1597 return {'tpr': self.rp(kwargs['s']), 'xtc': self.rp(kwargs['o'])}

1598

1599 - def strip_water(self, os=None, o=None, on=None, compact=False, 1600 resn="SOL", groupname="notwater", **kwargs):

1601 """Write xtc and tpr with water (by resname) removed. 1602 1603 :Keywords: 1604 *os* 1605 Name of the output tpr file; by default use the original but 1606 insert "nowater" before suffix. 1607 *o* 1608 Name of the output trajectory; by default use the original name but 1609 insert "nowater" before suffix. 1610 *on* 1611 Name of a new index file (without water). 1612 *compact* 1613 ``True``: write a compact and centered trajectory 1614 ``False``: use trajectory as it is [``False``] 1615 *resn* 1616 Residue name of the water molecules; all these residues are excluded. 1617 *groupname* 1618 Name of the group that is generated by subtracting all waters 1619 from the system. 1620 *force* : Boolean 1621 - ``True``: overwrite existing trajectories 1622 - ``False``: throw a IOError exception 1623 - ``None``: skip existing and log a warning [default] 1624 *kwargs* 1625 are passed on to :func:`gromacs.cbook.trj_compact` (unless the 1626 values have to be set to certain values such as s, f, n, o 1627 keywords). The *input* keyword is always mangled: Only the first 1628 entry (the group to centre the trajectory on) is kept, and as a 1629 second group (the output group) *groupname* is used. 1630 1631 :Returns: 1632 dictionary with keys *tpr*, *xtc*, *ndx* which are the names of the 1633 the new files 1634 1635 .. warning:: The input tpr file should *not* have *any position restraints*; 1636 otherwise Gromacs will throw a hissy-fit and say 1637 1638 *Software inconsistency error: Position restraint coordinates are 1639 missing* 1640 1641 (This appears to be a bug in Gromacs 4.x.) 1642 """ 1643 force = kwargs.pop('force', self.force) 1644 1645 newtpr = self.infix_filename(os, self.tpr, '_nowater') 1646 newxtc = self.infix_filename(o, self.xtc, '_nowater') 1647 newndx = self.infix_filename(on, self.tpr, '_nowater', 'ndx') 1648 1649 nowater_ndx = "nowater.ndx" # refers to original tpr 1650 1651 if compact: 1652 TRJCONV = trj_compact 1653 _input = kwargs.get('input', ['Protein']) 1654 kwargs['input'] = [_input[0], groupname] # [center group, write-out selection] 1655 del _input 1656 else: 1657 TRJCONV = gromacs.trjconv 1658 kwargs['input'] = [groupname] 1659 1660 NOTwater = "! r %(resn)s" % vars() # make_ndx selection ("not water residues") 1661 with utilities.in_dir(self.dirname): 1662 # ugly because I cannot break from the block 1663 if not self.check_file_exists(newxtc, resolve="indicate", force=force): 1664 # make no-water index 1665 B = IndexBuilder(struct=self.tpr, selections=['@'+NOTwater], 1666 ndx=self.ndx, out_ndx=nowater_ndx) 1667 B.combine(name_all=groupname, operation="|", defaultgroups=True) 1668 1669 logger.info("TPR file without water %(newtpr)r" % vars()) 1670 gromacs.tpbconv(s=self.tpr, o=newtpr, n=nowater_ndx, input=[groupname]) 1671 1672 logger.info("NDX of the new system %(newndx)r" % vars()) 1673 gromacs.make_ndx(f=newtpr, o=newndx, input=['q'], stderr=False, stdout=False) 1674 1675 logger.info("Trajectory without water %(newxtc)r" % vars()) 1676 kwargs['s'] = self.tpr 1677 kwargs['f'] = self.xtc 1678 kwargs['n'] = nowater_ndx 1679 kwargs['o'] = newxtc 1680 TRJCONV(**kwargs) 1681 1682 logger.info("pdb and gro for visualization") 1683 for ext in 'pdb', 'gro': 1684 try: 1685 # see warning in doc ... so we don't use the new xtc but the old one 1686 kwargs['o'] = self.filename(newtpr, ext=ext) 1687 TRJCONV(dump=0, stdout=False, stderr=False, **kwargs) # silent 1688 except: 1689 logger.exception("Failed building the water-less %(ext)s. " 1690 "Position restraints in tpr file (see docs)?" % vars()) 1691 logger.info("strip_water() complete") 1692 1693 self.nowater[newxtc] = Transformer(dirname=self.dirname, s=newtpr, 1694 f=newxtc, n=newndx) 1695 return {'tpr':self.rp(newtpr), 'xtc':self.rp(newxtc), 'ndx':self.rp(newndx)}

1696 1697 1698 # TODO: could probably unify strip_water() and keep_protein_only() 1699 # (given that the latter was produced by copy&paste+search&replace...) 1700

1701 - def keep_protein_only(self, os=None, o=None, on=None, compact=False, 1702 groupname="proteinonly", **kwargs):

1703 """Write xtc and tpr only containing the protein. 1704 1705 :Keywords: 1706 *os* 1707 Name of the output tpr file; by default use the original but 1708 insert "proteinonly" before suffix. 1709 *o* 1710 Name of the output trajectory; by default use the original name but 1711 insert "proteinonly" before suffix. 1712 *on* 1713 Name of a new index file. 1714 *compact* 1715 ``True``: write a compact and centered trajectory 1716 ``False``: use trajectory as it is [``False``] 1717 *groupname* 1718 Name of the protein-only group. 1719 *keepalso* 1720 List of literal make_ndx selections of additional groups that should 1721 be kept, e.g. ['resname DRUG', 'atom 6789']. 1722 *force* : Boolean 1723 - ``True``: overwrite existing trajectories 1724 - ``False``: throw a IOError exception 1725 - ``None``: skip existing and log a warning [default] 1726 *kwargs* 1727 are passed on to :func:`gromacs.cbook.trj_compact` (unless the 1728 values have to be set to certain values such as s, f, n, o 1729 keywords). The *input* keyword is always mangled: Only the first 1730 entry (the group to centre the trajectory on) is kept, and as a 1731 second group (the output group) *groupname* is used. 1732 1733 :Returns: 1734 dictionary with keys *tpr*, *xtc*, *ndx* which are the names of the 1735 the new files 1736 1737 .. warning:: The input tpr file should *not* have *any position restraints*; 1738 otherwise Gromacs will throw a hissy-fit and say 1739 1740 *Software inconsistency error: Position restraint coordinates are 1741 missing* 1742 1743 (This appears to be a bug in Gromacs 4.x.) 1744 """ 1745 force = kwargs.pop('force', self.force) 1746 suffix = 'proteinonly' 1747 newtpr = self.infix_filename(os, self.tpr, '_'+suffix) 1748 newxtc = self.infix_filename(o, self.xtc, '_'+suffix) 1749 newndx = self.infix_filename(on, self.tpr, '_'+suffix, 'ndx') 1750 1751 selection_ndx = suffix+".ndx" # refers to original tpr 1752 1753 if compact: 1754 TRJCONV = trj_compact 1755 _input = kwargs.get('input', ['Protein']) 1756 kwargs['input'] = [_input[0], groupname] # [center group, write-out selection] 1757 del _input 1758 else: 1759 TRJCONV = gromacs.trjconv 1760 kwargs['input'] = [groupname] 1761 1762 selections = ['@'+sel for sel in ['"Protein"'] + kwargs.pop('keepalso',[])] 1763 with utilities.in_dir(self.dirname): 1764 # ugly because I cannot break from the block 1765 if not self.check_file_exists(newxtc, resolve="indicate", force=force): 1766 # make index (overkill for 'Protein' but maybe we want to enhance 1767 # it in the future, e.g. with keeping ions/ligands as well? 1768 B = IndexBuilder(struct=self.tpr, selections=selections, 1769 ndx=self.ndx, out_ndx=selection_ndx) 1770 B.combine(name_all=groupname, operation="|", defaultgroups=True) 1771 1772 logger.info("TPR file containg the protein %(newtpr)r" % vars()) 1773 gromacs.tpbconv(s=self.tpr, o=newtpr, n=selection_ndx, input=[groupname]) 1774 1775 logger.info("NDX of the new system %(newndx)r" % vars()) 1776 gromacs.make_ndx(f=newtpr, o=newndx, input=['q'], stderr=False, stdout=False) 1777 1778 logger.info("Trajectory with only the protein %(newxtc)r" % vars()) 1779 kwargs['s'] = self.tpr 1780 kwargs['f'] = self.xtc 1781 kwargs['n'] = selection_ndx 1782 kwargs['o'] = newxtc 1783 TRJCONV(**kwargs) 1784 1785 logger.info("pdb and gro for visualization") 1786 for ext in 'pdb', 'gro': 1787 try: 1788 # see warning in doc ... so we don't use the new xtc but the old one 1789 kwargs['o'] = self.filename(newtpr, ext=ext) 1790 TRJCONV(dump=0, stdout=False, stderr=False, **kwargs) # silent 1791 except: 1792 logger.exception("Failed building the protein-only %(ext)s. " 1793 "Position restraints in tpr file (see docs)?" % vars()) 1794 logger.info("keep_protein_only() complete") 1795 1796 self.proteinonly[newxtc] = Transformer(dirname=self.dirname, s=newtpr, 1797 f=newxtc, n=newndx) 1798 return {'tpr':self.rp(newtpr), 'xtc':self.rp(newxtc), 'ndx':self.rp(newndx)}

1799

Source Code for Module gromacs.cbook