1
2 """
3 :mod:`edPDB.cbook` -- Recipes for editing PDB files
4 ===================================================
5
6 The cook book contains short python functions that demonstrate how to
7 implement basic PDB editing functionality. They do not do exhaustive
8 error checking and might have to be altered for your purpose.
9
10 .. autoclass:: PDB
11 :members:
12 .. autofunction:: align_ligand
13 .. autofunction:: remove_overlap_water
14 .. autofunction:: extract_residue
15
16 """
17
18 import os.path
19 from warnings import warn
20
21 import Bio.PDB
22
23 import xpdb
24 import selections
25
26 import logging
27 logger = logging.getLogger('edPDB.cbook')
28
29 -def align_ligand(protein_struct, ligand_struct, ligand_resname, output='ligand_aligned.pdb'):
30 """Align a ligand to the same ligand in a protein, based on the heavy atoms.
31
32 This is useful when a new ligand was generated with hydrogens but
33 the position in space changed.
34
35 :Arguments:
36 *protein_struct*
37 protein + ligand pdb
38 *ligand_struct*
39 ligand pdb
40 *ligand_resname*
41 residue name of the ligand in the file *protein_struct*
42 *ligand_aligned*
43 output
44
45 :Returns: RMSD of the fit in Angstroem.
46
47 .. Warning:: Assumes only heavy atoms in PDB (I think... check source!)
48 """
49 logger.debug("align_ligand(%(protein_struct)r, %(ligand_struct)r, "
50 "%(ligand_resname)r, output=%(output)r)" % vars())
51
52 prot = xpdb.get_structure('prot', protein_struct)
53 lig = xpdb.get_structure('lig', ligand_struct)
54 plig = selections.residues_by_resname(prot, ligand_resname)
55
56 heavyMoving = [a for a in lig.get_atoms() if a.name[0] != 'H']
57 heavyFixed = Bio.PDB.Selection.unfold_entities(plig, 'A')
58
59 S = Bio.PDB.Superimposer()
60 S.set_atoms(heavyFixed, heavyMoving)
61 S.apply(lig.get_atoms())
62
63 io = Bio.PDB.PDBIO()
64 io.set_structure(lig)
65 io.save(output)
66
67 logger.info("Wrote aligned ligand %r (RMSD = %g A)" % (ligand_aligned, S.rms))
68 return S.rms
69
70
72 """Remove water (SOL) molecules overlapping with ligand.
73
74 :Arguments:
75 pdbname
76 pdb file that contains the ligand and the water
77 output
78 pdb output filename
79 ligand_resname
80 name of the ligand residue(s) in the pdb
81 distance
82 overlap is defined as a centre-centre distance of any solvent
83 OW atom with any ligand atom of less than *distance*
84
85 .. note:: The residue and atom numbering will be fairly
86 meaningless in the final PDB because it wraps at 100,000
87 or 10,000.
88
89 Also make sure that there are either consistent chain
90 identifiers or none (blank) because otherwise the
91 residue blocks migh become reordered. (This is due to the
92 way the Bio.PDB.PDBIO writes files.)
93 """
94 logger.debug("remove_overlap_water(%(pdbname)r, %(output)r, %(ligand_resname)r, "
95 "distance=%(distance)r)" % vars())
96
97 structure = xpdb.get_structure(pdbname)
98 ligand = selections.residues_by_resname(structure, ligand_resname)
99
100 w = xpdb.find_water(structure, ligand, radius=distance, water=water)
101 logger.debug("waters found: %r" % w)
102 logger.info("removed %d %r molecules overlapping %r",
103 len(w), water, ligand_resname)
104 xpdb.write_pdb(structure, output, exclusions=w, **kwargs)
105
106
107
109 """Write a pdb file with *resname* extracted.
110
111 """
112 warn("extract_resname() will be removed", category=DeprecationWarning)
113 logger.debug("extract_residue(%(pdbname)r, %(output)r, %(resnames)r)" % vars())
114 structure = xpdb.get_structure(pdbname)
115 residues = selections.residues_by_resname(structure, resnames)
116 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)
117
119 """Write a pdb file with the protein (i.e. all amino acids) extracted.
120 """
121 warn("extract_protein() will be removed", category=DeprecationWarning)
122 logger.debug("extract_protein(%(pdbname)r, %(output)r)" % vars())
123 structure = xpdb.get_structure(pdbname)
124 residues = selections.residues_by_selection(structure, xpdb.ProteinSelect())
125 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)
126
128 """Write a pdb file with the lipids extracted.
129
130 Note that resnames are also tried truncated to the first three
131 characters, which means that POPE and POPG are identical and
132 cannot be distinguished.
133 """
134 warn("extract_lipids() will be removed", category=DeprecationWarning)
135 logger.debug("extract_lipids(%(pdbname)r, %(output)r, lipid_resnames=%(lipid_resnames)r)" % vars())
136 resnames = lipid_resnames.split('|')
137 resnames.extend([r[:3] for r in resnames])
138
139 structure = xpdb.get_structure(pdbname)
140 residues = selections.residues_by_selection(structure, xpdb.ResnameSelect(resnames))
141 xpdb.write_pdb(structure, output, inclusions=residues, **kwargs)
142
143
145 """Class that represents a PDB file and allows extractions of interesting parts.
146
147 The structure itself is never changed. In order to extract
148 sub-parts of a structure one selects these parts and writes them
149 as new pdb file.
150
151 The advantage over a simple :program:`grep` is that you will be able to
152 read any odd pdb file and you will also able to do things like
153 :meth:`~edPDB.cbook.PDB.extract_protein` or
154 :meth:`~edPDB.cbook.PDB.extract_lipids`.
155 """
156
158 """Load structure from file *pdbname*."""
159 self.pdbname = pdbname
160 self.structure = xpdb.get_structure(pdbname)
161 self.logger = logging.getLogger('edPDB.PDB')
162 self.logger.info("Loaded pdb file %(pdbname)r." % vars())
163
164 - def write(self, filename, **kwargs):
165 """Write pdbfile which includes or excludes residues.
166
167 :Arguments:
168 *filename*
169 output pdb filename
170 *inclusions*
171 list of residues to include
172 *exclusions*
173 list of residues to exclude
174 *chain*
175 relabel the selection with a new chain identifier
176
177
178 Residues must be BioPDB residues as returned by, for
179 instance, :meth:`~edPDB.cbook.PDB.residues_by_resname`.
180
181 .. Note:: Currently only either *inclusions* or *exclusions* can be
182 supplied, not both.
183 """
184 self.logger.debug("write(): file %r, args %r", filename, kwargs.keys())
185 xpdb.write_pdb(self.structure, filename, **kwargs)
186
188 """Return a list of BioPDB residues that match *resnames*.
189
190 *resnames* can be a string or a list.
191 """
192 return selections.residues_by_resname(self.structure, resnames)
193
195 """Return a list of BioPDB residues that are selected by *selection*.
196
197 *selection* must be BioPDB.PDB.PDBIO.Select instance (see for
198 example :class:`edPDB.xpdb.ProteinSelect`).
199 """
200 return selections.residues_by_selection(self.structure, selection)
201
203 """Write a pdb file with *resnames* extracted."""
204 self.logger.info("extract_resnames(%(filename)r, %(resnames)r)" % vars())
205 kwargs['selection'] = selections.ResnameSelect(resnames)
206 self.write(filename, **kwargs)
207
209 """Write a pdb file with the protein (i.e. all amino acids) extracted."""
210 self.logger.info("extract_protein(%(filename)r)" % vars())
211 kwargs['selection'] = selections.ProteinSelect()
212 self.write(filename, **kwargs)
213
219
221 """Write a pdb file with the lipids extracted.
222
223 Note that resnames are also tried truncated to the first three
224 characters, which means that POPE and POPG are identical and
225 cannot be distinguished.
226 """
227 self.logger.info("extract_lipids(%(filename)r, lipid_resnames=%(lipid_resnames)r)" % vars())
228 resnames = lipid_resnames.split('|')
229 resnames.extend([r[:3] for r in resnames])
230 kwargs['selection'] = selections.ResnameSelect(resnames)
231 self.write(filename, **kwargs)
232