1   
  2  """ 
  3  :mod:`edPDB.cbook` -- Recipes for editing PDB files 
  4  =================================================== 
  5   
  6  The cook book contains short python functions that demonstrate how to 
  7  implement basic PDB editing functionality. They do not do exhaustive 
  8  error checking and might have to be altered for your purpose. 
  9   
 10  .. autoclass:: PDB 
 11     :members: 
 12  .. autofunction:: align_ligand 
 13  .. autofunction:: remove_overlap_water 
 14  .. autofunction:: extract_residue 
 15   
 16  """ 
 17   
 18  import os.path 
 19  from warnings import warn 
 20   
 21  import Bio.PDB 
 22   
 23  import xpdb 
 24  import selections 
 25   
 26  import logging 
 27  logger = logging.getLogger('edPDB.cbook') 
 28   
 29 -def align_ligand(protein_struct, ligand_struct, ligand_resname, output='ligand_aligned.pdb'): 
  30      """Align a ligand to the same ligand in a protein, based on the heavy atoms. 
 31   
 32      This is useful when a new ligand was generated with hydrogens but 
 33      the position in space changed. 
 34   
 35      :Arguments: 
 36        *protein_struct* 
 37           protein + ligand pdb 
 38        *ligand_struct* 
 39           ligand pdb 
 40        *ligand_resname* 
 41           residue name of the ligand in the file *protein_struct* 
 42        *ligand_aligned* 
 43           output 
 44   
 45      :Returns: RMSD of the fit in Angstroem. 
 46   
 47      .. Warning:: Assumes only heavy atoms in PDB (I think... check source!)  
 48      """ 
 49      logger.debug("align_ligand(%(protein_struct)r, %(ligand_struct)r, " 
 50                  "%(ligand_resname)r, output=%(output)r)" % vars()) 
 51   
 52      prot = xpdb.get_structure('prot', protein_struct) 
 53      lig = xpdb.get_structure('lig', ligand_struct) 
 54      plig = selections.residues_by_resname(prot, ligand_resname) 
 55   
 56      heavyMoving = [a for a in lig.get_atoms() if a.name[0] != 'H'] 
 57      heavyFixed = Bio.PDB.Selection.unfold_entities(plig, 'A')   
 58   
 59      S = Bio.PDB.Superimposer() 
 60      S.set_atoms(heavyFixed, heavyMoving)     
 61      S.apply(lig.get_atoms())                 
 62   
 63      io = Bio.PDB.PDBIO() 
 64      io.set_structure(lig) 
 65      io.save(output) 
 66   
 67      logger.info("Wrote aligned ligand  %r (RMSD = %g A)" % (ligand_aligned, S.rms)) 
 68      return S.rms 
  69   
 70   
 72      """Remove water (SOL) molecules overlapping with ligand. 
 73   
 74      :Arguments: 
 75        pdbname 
 76          pdb file that contains the ligand and the water 
 77        output 
 78          pdb output filename 
 79        ligand_resname 
 80          name of the ligand residue(s) in the pdb 
 81        distance 
 82          overlap is defined as a centre-centre distance of any solvent 
 83          OW atom with any ligand atom of less than *distance* 
 84   
 85      .. note:: The residue and atom numbering will be fairly 
 86                meaningless in the final PDB because it wraps at 100,000 
 87                or 10,000. 
 88   
 89                Also make sure that there are either consistent chain 
 90                identifiers or none (blank) because otherwise the 
 91                residue blocks migh become reordered. (This is due to the 
 92                way the Bio.PDB.PDBIO writes files.) 
 93      """ 
 94      logger.debug("remove_overlap_water(%(pdbname)r, %(output)r, %(ligand_resname)r, " 
 95                  "distance=%(distance)r)" % vars()) 
 96   
 97      structure = xpdb.get_structure(pdbname) 
 98      ligand = selections.residues_by_resname(structure, ligand_resname) 
 99   
100      w = xpdb.find_water(structure, ligand, radius=distance, water=water) 
101      logger.debug("waters found: %r" % w) 
102      logger.info("removed %d %r molecules overlapping %r",  
103                  len(w), water, ligand_resname) 
104      xpdb.write_pdb(structure, output, exclusions=w, **kwargs) 
 105   
106   
107   
109      """Write a pdb file with *resname* extracted. 
110   
111      """ 
112      warn("extract_resname() will be removed", category=DeprecationWarning) 
113      logger.debug("extract_residue(%(pdbname)r, %(output)r, %(resnames)r)" % vars()) 
114      structure = xpdb.get_structure(pdbname) 
115      residues = selections.residues_by_resname(structure, resnames) 
116      xpdb.write_pdb(structure, output, inclusions=residues, **kwargs) 
 117   
119      """Write a pdb file with the protein (i.e. all amino acids) extracted. 
120      """ 
121      warn("extract_protein() will be removed", category=DeprecationWarning) 
122      logger.debug("extract_protein(%(pdbname)r, %(output)r)" % vars()) 
123      structure = xpdb.get_structure(pdbname) 
124      residues = selections.residues_by_selection(structure, xpdb.ProteinSelect()) 
125      xpdb.write_pdb(structure, output, inclusions=residues, **kwargs) 
 126   
128      """Write a pdb file with the lipids extracted. 
129   
130      Note that resnames are also tried truncated to the first three 
131      characters, which means that POPE and POPG are identical and 
132      cannot be distinguished. 
133      """ 
134      warn("extract_lipids() will be removed", category=DeprecationWarning) 
135      logger.debug("extract_lipids(%(pdbname)r, %(output)r, lipid_resnames=%(lipid_resnames)r)" % vars()) 
136      resnames = lipid_resnames.split('|') 
137      resnames.extend([r[:3] for r in resnames]) 
138   
139      structure = xpdb.get_structure(pdbname) 
140      residues = selections.residues_by_selection(structure, xpdb.ResnameSelect(resnames)) 
141      xpdb.write_pdb(structure, output, inclusions=residues, **kwargs) 
 142   
143   
145      """Class that represents a PDB file and allows extractions of interesting parts. 
146       
147      The structure itself is never changed. In order to extract 
148      sub-parts of a structure one selects these parts and writes them 
149      as new pdb file. 
150   
151      The advantage over a simple :program:`grep` is that you will be able to 
152      read any odd pdb file and you will also able to do things like 
153      :meth:`~edPDB.cbook.PDB.extract_protein` or 
154      :meth:`~edPDB.cbook.PDB.extract_lipids`. 
155      """ 
156   
158          """Load structure from file *pdbname*.""" 
159          self.pdbname = pdbname 
160          self.structure = xpdb.get_structure(pdbname) 
161          self.logger = logging.getLogger('edPDB.PDB') 
162          self.logger.info("Loaded pdb file %(pdbname)r." % vars()) 
 163   
164 -    def write(self, filename, **kwargs): 
 165          """Write pdbfile which includes or excludes residues. 
166   
167          :Arguments: 
168              *filename* 
169                  output pdb filename 
170              *inclusions* 
171                  list of residues to include 
172              *exclusions* 
173                  list of residues to exclude 
174              *chain* 
175                  relabel the selection with a new chain identifier 
176   
177   
178           Residues must be BioPDB residues as returned by, for 
179           instance, :meth:`~edPDB.cbook.PDB.residues_by_resname`. 
180   
181           .. Note:: Currently only either *inclusions* or *exclusions* can be 
182                     supplied, not both. 
183          """ 
184          self.logger.debug("write(): file %r, args %r", filename, kwargs.keys()) 
185          xpdb.write_pdb(self.structure, filename, **kwargs) 
 186           
188          """Return a list of BioPDB residues that match *resnames*. 
189   
190          *resnames* can be a string or a list. 
191          """ 
192          return selections.residues_by_resname(self.structure, resnames) 
 193   
195          """Return  a list of BioPDB residues that are selected by *selection*. 
196   
197          *selection* must be BioPDB.PDB.PDBIO.Select instance (see for 
198          example :class:`edPDB.xpdb.ProteinSelect`). 
199          """ 
200          return selections.residues_by_selection(self.structure, selection) 
 201   
203          """Write a pdb file with *resnames* extracted.""" 
204          self.logger.info("extract_resnames(%(filename)r, %(resnames)r)" % vars()) 
205          kwargs['selection'] = selections.ResnameSelect(resnames) 
206          self.write(filename, **kwargs) 
 207   
209          """Write a pdb file with the protein (i.e. all amino acids) extracted.""" 
210          self.logger.info("extract_protein(%(filename)r)" % vars()) 
211          kwargs['selection'] = selections.ProteinSelect() 
212          self.write(filename, **kwargs) 
 213   
219   
221          """Write a pdb file with the lipids extracted. 
222   
223          Note that resnames are also tried truncated to the first three 
224          characters, which means that POPE and POPG are identical and 
225          cannot be distinguished. 
226          """ 
227          self.logger.info("extract_lipids(%(filename)r, lipid_resnames=%(lipid_resnames)r)" % vars()) 
228          resnames = lipid_resnames.split('|') 
229          resnames.extend([r[:3] for r in resnames]) 
230          kwargs['selection'] = selections.ResnameSelect(resnames) 
231          self.write(filename, **kwargs) 
  232