edPDB.cbook

1 # edPDB.cbook 2 """ 3 :mod:`edPDB.cbook` -- Recipes for editing PDB files 4 =================================================== 5 6 The cook book contains short python functions that demonstrate how to 7 implement basic PDB editing functionality. They do not do exhaustive 8 error checking and might have to be altered for your purpose. 9 10 .. autoclass:: PDB 11 :members: 12 .. autofunction:: align_ligand 13 .. autofunction:: remove_overlap_water 14 .. autofunction:: extract_residue 15 16 """ 17 18 import os.path 19 from warnings import warn 20 21 import Bio.PDB 22 23 import xpdb 24 import selections 25 26 import logging 27 logger = logging.getLogger('edPDB.cbook') 28

29 -def align_ligand(protein_struct, ligand_struct, ligand_resname, output='ligand_aligned.pdb'):

30 """Align a ligand to the same ligand in a protein, based on the heavy atoms. 31 32 This is useful when a new ligand was generated with hydrogens but 33 the position in space changed. 34 35 :Arguments: 36 *protein_struct* 37 protein + ligand pdb 38 *ligand_struct* 39 ligand pdb 40 *ligand_resname* 41 residue name of the ligand in the file *protein_struct* 42 *ligand_aligned* 43 output 44 45 :Returns: RMSD of the fit in Angstroem. 46 47 .. Warning:: Assumes only heavy atoms in PDB (I think... check source!) 48 """ 49 logger.debug("align_ligand(%(protein_struct)r, %(ligand_struct)r, " 50 "%(ligand_resname)r, output=%(output)r)" % vars()) 51 52 prot = xpdb.get_structure('prot', protein_struct) 53 lig = xpdb.get_structure('lig', ligand_struct) 54 plig = selections.residues_by_resname(prot, ligand_resname) 55 56 heavyMoving = [a for a in lig.get_atoms() if a.name[0] != 'H'] 57 heavyFixed = Bio.PDB.Selection.unfold_entities(plig, 'A') # only heavies in pdb 58 59 S = Bio.PDB.Superimposer() 60 S.set_atoms(heavyFixed, heavyMoving) # get rotation matrix 61 S.apply(lig.get_atoms()) # rotate ligand atoms 62 63 io = Bio.PDB.PDBIO() 64 io.set_structure(lig) 65 io.save(output) 66 67 logger.info("Wrote aligned ligand %r (RMSD = %g A)" % (ligand_aligned, S.rms)) 68 return S.rms

69 70

71 -def remove_overlap_water(pdbname, output, ligand_resname, distance=3.0, water="SOL", **kwargs):

72 """Remove water (SOL) molecules overlapping with ligand. 73 74 :Arguments: 75 pdbname 76 pdb file that contains the ligand and the water 77 output 78 pdb output filename 79 ligand_resname 80 name of the ligand residue(s) in the pdb 81 distance 82 overlap is defined as a centre-centre distance of any solvent 83 OW atom with any ligand atom of less than *distance* 84 85 .. note:: The residue and atom numbering will be fairly 86 meaningless in the final PDB because it wraps at 100,000 87 or 10,000. 88 89 Also make sure that there are either consistent chain 90 identifiers or none (blank) because otherwise the 91 residue blocks migh become reordered. (This is due to the 92 way the Bio.PDB.PDBIO writes files.) 93 """ 94 logger.debug("remove_overlap_water(%(pdbname)r, %(output)r, %(ligand_resname)r, " 95 "distance=%(distance)r)" % vars()) 96 97 structure = xpdb.get_structure(pdbname) 98 ligand = selections.residues_by_resname(structure, ligand_resname) 99 100 w = xpdb.find_water(structure, ligand, radius=distance, water=water) 101 logger.debug("waters found: %r" % w) 102 logger.info("removed %d %r molecules overlapping %r", 103 len(w), water, ligand_resname) 104 xpdb.write_pdb(structure, output, exclusions=w, **kwargs)

105 106 # the following are deprecated 107

108 -def extract_resnames(pdbname, output, resnames, **kwargs):

109 """Write a pdb file with *resname* extracted. 110 111 """ 112 warn("extract_resname() will be removed", category=DeprecationWarning) 113 logger.debug("extract_residue(%(pdbname)r, %(output)r, %(resnames)r)" % vars()) 114 structure = xpdb.get_structure(pdbname) 115 residues = selections.residues_by_resname(structure, resnames) 116 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)

117

118 -def extract_protein(pdbname, output, **kwargs):

119 """Write a pdb file with the protein (i.e. all amino acids) extracted. 120 """ 121 warn("extract_protein() will be removed", category=DeprecationWarning) 122 logger.debug("extract_protein(%(pdbname)r, %(output)r)" % vars()) 123 structure = xpdb.get_structure(pdbname) 124 residues = selections.residues_by_selection(structure, xpdb.ProteinSelect()) 125 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)

126

128 """Write a pdb file with the lipids extracted. 129 130 Note that resnames are also tried truncated to the first three 131 characters, which means that POPE and POPG are identical and 132 cannot be distinguished. 133 """ 134 warn("extract_lipids() will be removed", category=DeprecationWarning) 135 logger.debug("extract_lipids(%(pdbname)r, %(output)r, lipid_resnames=%(lipid_resnames)r)" % vars()) 136 resnames = lipid_resnames.split('|') 137 resnames.extend([r[:3] for r in resnames]) 138 139 structure = xpdb.get_structure(pdbname) 140 residues = selections.residues_by_selection(structure, xpdb.ResnameSelect(resnames)) 141 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)

142 143

144 -class PDB(object):

145 """Class that represents a PDB file and allows extractions of interesting parts. 146 147 The structure itself is never changed. In order to extract 148 sub-parts of a structure one selects these parts and writes them 149 as new pdb file. 150 151 The advantage over a simple :program:`grep` is that you will be able to 152 read any odd pdb file and you will also able to do things like 153 :meth:`~edPDB.cbook.PDB.extract_protein` or 154 :meth:`~edPDB.cbook.PDB.extract_lipids`. 155 """ 156

157 - def __init__(self, pdbname):

158 """Load structure from file *pdbname*.""" 159 self.pdbname = pdbname 160 self.structure = xpdb.get_structure(pdbname) 161 self.logger = logging.getLogger('edPDB.PDB') 162 self.logger.info("Loaded pdb file %(pdbname)r." % vars())

163

164 - def write(self, filename, **kwargs):

165 """Write pdbfile which includes or excludes residues. 166 167 :Arguments: 168 *filename* 169 output pdb filename 170 *inclusions* 171 list of residues to include 172 *exclusions* 173 list of residues to exclude 174 *chain* 175 relabel the selection with a new chain identifier 176 177 178 Residues must be BioPDB residues as returned by, for 179 instance, :meth:`~edPDB.cbook.PDB.residues_by_resname`. 180 181 .. Note:: Currently only either *inclusions* or *exclusions* can be 182 supplied, not both. 183 """ 184 self.logger.debug("write(): file %r, args %r", filename, kwargs.keys()) 185 xpdb.write_pdb(self.structure, filename, **kwargs)

186

187 - def residues_by_resname(self, resnames, **kwargs):

188 """Return a list of BioPDB residues that match *resnames*. 189 190 *resnames* can be a string or a list. 191 """ 192 return selections.residues_by_resname(self.structure, resnames)

193

194 - def residues_by_selection(self, selection):

195 """Return a list of BioPDB residues that are selected by *selection*. 196 197 *selection* must be BioPDB.PDB.PDBIO.Select instance (see for 198 example :class:`edPDB.xpdb.ProteinSelect`). 199 """ 200 return selections.residues_by_selection(self.structure, selection)

201

202 - def extract_resnames(self, filename, resnames, **kwargs):

203 """Write a pdb file with *resnames* extracted.""" 204 self.logger.info("extract_resnames(%(filename)r, %(resnames)r)" % vars()) 205 kwargs['selection'] = selections.ResnameSelect(resnames) 206 self.write(filename, **kwargs)

207

208 - def extract_protein(self, filename, **kwargs):

209 """Write a pdb file with the protein (i.e. all amino acids) extracted.""" 210 self.logger.info("extract_protein(%(filename)r)" % vars()) 211 kwargs['selection'] = selections.ProteinSelect() 212 self.write(filename, **kwargs)

213

214 - def extract_notprotein(self, filename, **kwargs):

215 """Write a pdb file without any amino acids extracted.""" 216 self.logger.info("extract_notprotein(%(filename)r)" % vars()) 217 kwargs['selection'] = selections.NotProteinSelect() 218 self.write(filename, **kwargs)

219

221 """Write a pdb file with the lipids extracted. 222 223 Note that resnames are also tried truncated to the first three 224 characters, which means that POPE and POPG are identical and 225 cannot be distinguished. 226 """ 227 self.logger.info("extract_lipids(%(filename)r, lipid_resnames=%(lipid_resnames)r)" % vars()) 228 resnames = lipid_resnames.split('|') 229 resnames.extend([r[:3] for r in resnames]) 230 kwargs['selection'] = selections.ResnameSelect(resnames) 231 self.write(filename, **kwargs)

232

Source Code for Module edPDB.cbook