Package edPDB :: Module selections
[hide private]
[frames] | no frames]

Source Code for Module edPDB.selections

  1  # edPDB.selections  
  2  """ 
  3  :mod:`edPDB.selections` --- Selections 
  4  ====================================== 
  5   
  6  Extensions to Bio.PDB, some useful selections. 
  7   
  8  Partly published on http://biopython.org/wiki/Reading_large_PDB_files 
  9   
 10  License: like Biopython 
 11   
 12  Selection classes 
 13  ----------------- 
 14   
 15  Provide an instance to PDBIO to select a subset of a structure or use 
 16  it with :func:`residues_by_selection` to obtain a list of residues. 
 17   
 18  .. autoclass:: ResnameSelect 
 19  .. autoclass:: ResidueSelect 
 20  .. autoclass:: NotResidueSelect 
 21  .. autoclass:: ProteinSelect 
 22  .. autoclass:: NotProteinSelect 
 23   
 24  Selection functions 
 25  ------------------- 
 26   
 27  Functions always act on a structure and return a list of residues. 
 28   
 29  .. autofunction::  residues_by_resname 
 30  .. autofunction::  residues_by_selection 
 31  .. autofunction::  find_water 
 32   
 33   
 34  Utility functions 
 35  ----------------- 
 36   
 37  .. autofunction::  canonical 
 38  .. autodata::      PROTEIN_RESNAMES 
 39  """ 
 40   
 41  import Bio.PDB 
 42  from Bio.PDB.PDBIO import Select 
 43  from Bio.PDB.Residue import Residue 
 44   
 45  from  utilities import asiterable 
 46   
 47   
 48  #: List of residue names that determine what is recognized as a 
 49  #: protein with :class:`ProteinSelect`. Can be extended with non-standard residues. 
 50  PROTEIN_RESNAMES = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 
 51                      'CYS':'C', 'GLN':'Q', 'GLU':'E', 'GLY':'G',  
 52                      'HIS':'H', 'HSD':'H', 'HSE':'H', 'HSP':'H',  
 53                      'ILE':'I', 'LEU':'L', 'LYS':'K', 'MET':'M',  
 54                      'PHE':'F', 'PRO':'P', 'SER':'S', 'THR':'T', 
 55                      'TRP':'W', 'TYR':'Y', 'VAL':'V',  
 56                      'ALAD':'AA', 'CHO':'?', 'EAM':'?'} 
 57   
58 -def canonical(resname):
59 """Return canonical representation of resname. 60 61 space stripped and upper case 62 """ 63 return resname.strip().upper()
64 65 # TODO: Why do I use these clunk complement selectors?? 66 # It would be cleaner to simply set up two classes; 67 # maybe have NotX inherit from X but override everything. 68
69 -class ResnameSelect(Select):
70 """Select all atoms that match *resnames*."""
71 - def __init__(self, resnames, complement=False):
72 """Supply a *resname*, e.g. 'SOL' or 'PHE' or a list.""" 73 self.resnames = dict([(canonical(r),True) for r in asiterable(resnames)]) 74 if not complement: 75 self.accept_residue = self._accept_residue 76 else: 77 self.accept_residue = self._accept_not_residue
78 - def _accept_residue(self,residue):
79 # use a dict --- 'in' is probably faster on dict keys than on 80 # lists ... TODO = check ;-) --- this seems to be a bottle neck 81 return canonical(residue.resname) in self.resnames
82 - def _accept_not_residue(self,residue):
83 return not canonical(residue.resname) in self.resnames
84 85
86 -class ResidueSelect(Select):
87 """Select all atoms that are in the *residues* list."""
88 - def __init__(self, residues, complement=False):
89 """Supply a list of Bio.PDB residues for the search.""" 90 self.residues = residues 91 if not complement: 92 self.accept_residue = self._accept_residue 93 else: 94 self.accept_residue = self._accept_not_residue
95 - def _accept_residue(self,residue):
96 return residue in self.residues
97 - def _accept_not_residue(self,residue):
98 return not residue in self.residues
99
100 -class NotResidueSelect(ResidueSelect):
101 """Select all atoms that are *not* in the *residues* list. 102 103 (Same as :class:`ResidueSelect(residues, complement=True)`.) 104 """
105 - def __init__(self, residues, complement=False):
106 """Supply a list of Bio.PDB residues for the search.""" 107 ResidueSelect.__init__(self, residues,complement=(not complement))
108
109 -class ProteinSelect(Select):
110 """Select all amino acid residues."""
111 - def __init__(self, complement=False):
112 if not complement: 113 self.accept_residue = self._accept_residue 114 else: 115 self.accept_residue = self._accept_not_residue
116 - def _accept_residue(self,residue):
117 return canonical(residue.resname) in PROTEIN_RESNAMES
118 - def _accept_not_residue(self,residue):
119 return not canonical(residue.resname) in PROTEIN_RESNAMES
120
121 -class NotProteinSelect(ProteinSelect):
122 """Select all non-aminoacid residues."""
123 - def __init__(self, complement=False):
124 """Supply a list of Bio.PDB residues for the search.""" 125 ProteinSelect.__init__(self, complement=(not complement))
126 127
128 -def residues_by_resname(structure, resnames):
129 """Return a list of residue instances that match *resnames*. 130 131 *resnames* can be a single string or a list of strings. 132 """ 133 #return [r for r in Bio.PDB.Selection.unfold_entities(structure, 'R') 134 # if r.resname.strip() == resname] 135 return residues_by_selection(structure, ResnameSelect(resnames))
136
137 -def residues_by_selection(structure, selection):
138 """General residue selection: supply a Bio.PDB.PDBIO.Select instance.""" 139 return [r for r in Bio.PDB.Selection.unfold_entities(structure, 'R') 140 if selection.accept_residue(r)]
141 142
143 -def find_water(structure, ligand, radius=3.0, water='SOL'):
144 """Find all water (SOL) molecules within radius of ligand. 145 146 :Arguments: 147 *structure* 148 Bio.PDB structure of system with water 149 *ligand* : list 150 Bio.PDB list of atoms of the ligand (Bio.PDB.Atom.Atom 151 instances) 152 *radius* : float 153 Find waters for which the ligand-atom - OW distance is < 154 radius [3.0] 155 *water* : string 156 resname of a water molecule [SOL] 157 158 :Returns: list of residue instances 159 """ 160 161 # get all SOL (water) 162 solvent = residues_by_resname(structure, water) 163 # NOT working in script (but in ipython) ?!?! 164 #solvent_OW = [a for a in r.get_list() for r in solvent if a.name == 'OW'] 165 solvent_OW = [] 166 for r in solvent: 167 for a in r.get_list(): 168 if a.name == "OW": 169 solvent_OW.append(a) 170 # sanity check: 171 assert len(solvent) == len(solvent_OW) 172 173 # set up KDtree neighbour search (use the biggest group for the 174 # tree, i.e. solvent not the ligand) 175 ns = Bio.PDB.NeighborSearch(solvent_OW) 176 177 names_centers = [(a.name, a.get_coord()) 178 for a in Bio.PDB.Selection.unfold_entities(ligand, 'A')] 179 water_shell = AtomGroup() 180 for name,center in names_centers: 181 _shell = AtomGroup(ns.search(center, radius)) 182 logger.debug("around %6r: %3d OW = " % (name, len(_shell)) + str(_shell)) 183 water_shell.update(_shell) # keep unique residues only 184 return sorted([a.parent for a in water_shell])
185