1
2 """
3 :mod:`edPDB.xpdb` -- Extensions to :mod:`Bio.PDB`
4 =================================================
5
6 Extensions to Bio.PDB, such as handling of large pdb files and some
7 useful selections (see :mod:`edPDB.selections`).
8
9 Partly published on http://biopython.org/wiki/Reading_large_PDB_files
10
11 License: like Biopython
12
13 Module content
14 --------------
15
16 .. autoclass:: SloppyStructureBuilder
17 .. autoclass:: SloppyPDBIO
18 .. autoclass:: AtomGroup
19
20 .. autofunction:: get_structure
21 .. autofunction:: write_pdb
22 """
23
24 import sys
25 import Bio.PDB
26 import Bio.PDB.StructureBuilder
27
28 from utilities import asiterable
29
30 import logging
31 logger = logging.getLogger('edPDB.xpdb')
34 """Cope with resSeq < 10,000 limitation by just incrementing internally.
35
36 Solves the follwing problem with :class:`Bio.PDB.StructureBuilder.StructureBuilder`:
37 Q: What's wrong here??
38 Some atoms or residues will be missing in the data structure.
39 WARNING: Residue (' ', 8954, ' ') redefined at line 74803.
40 PDBConstructionException: Blank altlocs in duplicate residue SOL (' ', 8954, ' ') at line 74803.
41
42 A: resSeq only goes to 9999 --> goes back to 0 (PDB format is not really good here)
43
44 .. warning:: H and W records are probably not handled yet (don't have examples to test)
45 """
46
48 Bio.PDB.StructureBuilder.StructureBuilder.__init__(self)
49 self.max_resseq = -1
50 self.verbose = verbose
51
53 """
54 Initiate a new Residue object.
55
56 Arguments:
57 o resname - string, e.g. "ASN"
58 o field - hetero flag, "W" for waters, "H" for
59 hetero residues, otherwise blanc.
60 o resseq - int, sequence identifier
61 o icode - string, insertion code
62 """
63 if field!=" ":
64 if field=="H":
65
66 field="H_"+resname
67 res_id=(field, resseq, icode)
68
69 if resseq > self.max_resseq:
70 self.max_resseq = resseq
71
72 if field==" ":
73 fudged_resseq = False
74 while (self.chain.has_id(res_id) or resseq == 0):
75
76
77
78
79
80
81
82
83 self.max_resseq += 1
84 resseq = self.max_resseq
85 res_id = (field, resseq, icode)
86 fudged_resseq = True
87
88 if fudged_resseq and self.verbose:
89 logger.debug("Residues are wrapping (Residue ('%s', %i, '%s') at line %i)."
90 % (field, resseq, icode, self.line_counter) +
91 ".... assigning new resid %d.\n" % self.max_resseq)
92 residue=Bio.PDB.Residue.Residue(res_id, resname, self.segid)
93 self.chain.add(residue)
94 self.residue=residue
95
97 """PDBIO class that can deal with large pdb files as used in MD simulations.
98
99 - resSeq simply wrap and are printed modulo 10,000.
100 - atom numbers wrap at 99,999 and are printed modulo 100,000
101 """
102
103
104 _ATOM_FORMAT_STRING="%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n"
105
106 - def _get_atom_line(self, atom, hetfield, segid, atom_number, resname,
107 resseq, icode, chain_id, element=" ", charge=" "):
108 """
109 Returns an ATOM PDB string that is guaranteed to fit into the ATOM format.
110
111 - Resid (resseq) is wrapped (modulo 10,000) to fit into %4i (4I) format
112 - Atom number (atom_number) is wrapped (modulo 100,000) to fit into %4i (4I) format
113 """
114 if hetfield!=" ":
115 record_type="HETATM"
116 else:
117 record_type="ATOM "
118 name=atom.get_fullname()
119 altloc=atom.get_altloc()
120 x, y, z=atom.get_coord()
121 bfactor=atom.get_bfactor()
122 occupancy=atom.get_occupancy()
123 args=(record_type, atom_number % 100000, name, altloc, resname, chain_id,
124 resseq % 10000, icode, x, y, z, occupancy, bfactor, segid,
125 element, charge)
126 return self._ATOM_FORMAT_STRING % args
127
128
129
130
131 sloppyparser = Bio.PDB.PDBParser(PERMISSIVE=True,structure_builder=SloppyStructureBuilder())
135
141
142 @property
144 return [(a.parent.resname, a.parent.id[1], a.name) for a in self]
145
147 return "[" + ", ".join(["%s%d:%s" % x for x in self.identifiers]) + "]"
148
149
150 from selections import ResidueSelect, NotResidueSelect
151
152 writelogger = logging.getLogger('edPDB.write_pdb')
153 -def write_pdb(structure, filename, **kwargs):
154 """Write Bio.PDB molecule *structure* to *filename*.
155
156 :Arguments:
157 *structure*
158 Bio.PDB structure instance
159 *filename*
160 pdb file
161 *selection*
162 Bio.PDB.Selection
163 *exclusions*
164 list of **residue** instances that will *not* be included
165 *inclusions*
166 list of **residue** instances that will be included
167 *chain*
168 set the chain identifier for **all** atoms written; this can be
169 useful to simply to erase all chain ids by setting it to ' '
170
171 Typical use is to supply a list of water molecules that should not
172 be written or a ligand that should be include.
173
174 .. Note:: Currently only one of *selection, *exclusions* or
175 *inclusions* is supported.
176 """
177 exclusions = kwargs.pop('exclusions', None)
178 inclusions = kwargs.pop('inclusions', None)
179 selection = kwargs.pop('selection', None)
180
181 if selection and (exclusions or inclusions):
182 raise NotImplementedError("only use selection on its own")
183 if exclusions and inclusions:
184 raise NotImplementedError("Sorry, only either exclusions OR "
185 "inclusions are supported")
186 if exclusions:
187 selection = NotResidueSelect(exclusions)
188 elif inclusions:
189 selection = ResidueSelect(inclusions)
190
191 chain = kwargs.pop('chain',None)
192 if not chain is None:
193 writelogger.info("Setting the chain id for ALL atoms to %(chain)r", vars())
194 for c in structure.get_chains():
195 c.id = chain
196
197 writelogger.debug("Starting PDBIO...")
198 io = SloppyPDBIO()
199 io.set_structure(structure)
200 io.save(filename, select=selection)
201
202 writelogger.info("Wrote pdb %(filename)r." % vars())
203