Package edPDB :: Module selections
[hide private]
[frames] | no frames]

Source Code for Module edPDB.selections

  1  # edPDB.selections  
  2  """ 
  3  :mod:`edPDB.selections` --- Selections 
  4  ====================================== 
  5   
  6  Extensions to Bio.PDB, some useful selections. 
  7   
  8  Partly published on http://biopython.org/wiki/Reading_large_PDB_files 
  9   
 10  License: like Biopython 
 11   
 12  Selection classes 
 13  ----------------- 
 14   
 15  Provide an instance to PDBIO to select a subset of a structure or use 
 16  it with :func:`residues_by_selection` to obtain a list of residues. 
 17   
 18  .. autoclass:: ResnameSelect 
 19  .. autoclass:: ResidueSelect 
 20  .. autoclass:: NotResidueSelect 
 21  .. autoclass:: ProteinSelect 
 22   
 23  Selection functions 
 24  ------------------- 
 25   
 26  Functions always act on a structure and return a list of residues. 
 27   
 28  .. autofunction::  residues_by_resname 
 29  .. autofunction::  residues_by_selection 
 30  .. autofunction::  find_water 
 31   
 32   
 33  Utility functions 
 34  ----------------- 
 35   
 36  .. autofunction::  canonical 
 37  .. autodata::      PROTEIN_RESNAMES 
 38  """ 
 39   
 40  import Bio.PDB 
 41  from Bio.PDB.PDBIO import Select 
 42  from Bio.PDB.Residue import Residue 
 43   
 44  from  utilities import asiterable 
 45   
 46   
 47  #: List of residue names that determine what is recognized as a 
 48  #: protein with :class:`ProteinSelect`. Can be extended with non-standard residues. 
 49  PROTEIN_RESNAMES = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 
 50                      'CYS':'C', 'GLN':'Q', 'GLU':'E', 'GLY':'G',  
 51                      'HIS':'H', 'HSD':'H', 'HSE':'H', 'HSP':'H',  
 52                      'ILE':'I', 'LEU':'L', 'LYS':'K', 'MET':'M',  
 53                      'PHE':'F', 'PRO':'P', 'SER':'S', 'THR':'T', 
 54                      'TRP':'W', 'TYR':'Y', 'VAL':'V',  
 55                      'ALAD':'AA', 'CHO':'?', 'EAM':'?'} 
 56   
57 -def canonical(resname):
58 """Return canonical representation of resname. 59 60 space stripped and upper case 61 """ 62 return resname.strip().upper()
63
64 -class ResnameSelect(Select):
65 """Select all atoms that match *resnames*."""
66 - def __init__(self, resnames, complement=False):
67 """Supply a *resname*, e.g. 'SOL' or 'PHE' or a list.""" 68 self.resnames = dict([(canonical(r),True) for r in asiterable(resnames)]) 69 if not complement: 70 self.accept_residue = self._accept_residue 71 else: 72 self.accept_residue = self._accept_not_residue
73 - def _accept_residue(self,residue):
74 # use a dict --- 'in' is probably faster on dict keys than on 75 # lists ... TODO = check ;-) --- this seems to be a bottle neck 76 return canonical(residue.resname) in self.resnames
77 - def _accept_not_residue(self,residue):
78 return not canonical(residue.resname) in self.resnames
79 80
81 -class ResidueSelect(Select):
82 """Select all atoms that are in the *residues* list."""
83 - def __init__(self, residues, complement=False):
84 """Supply a list of Bio.PDB residues for the search.""" 85 self.residues = residues 86 if not complement: 87 self.accept_residue = self._accept_residue 88 else: 89 self.accept_residue = self._accept_not_residue
90 - def _accept_residue(self,residue):
91 return residue in self.residues
92 - def _accept_not_residue(self,residue):
93 return not residue in self.residues
94
95 -class NotResidueSelect(ResidueSelect):
96 """Select all atoms that are *not* in the *residues* list. 97 98 (Same as :class:`ResidueSelect(residues, complement=True)`.) 99 """
100 - def __init__(self, residues, complement=False):
101 """Supply a list of Bio.PDB residues for the search.""" 102 ResidueSelect.__init__(self, residues,complement=(not complement))
103
104 -class ProteinSelect(Select):
105 """Select all amino acid residues."""
106 - def __init__(self, complement=False):
107 if not complement: 108 self.accept_residue = self._accept_residue 109 else: 110 self.accept_residue = self._accept_not_residue
111 - def _accept_residue(self,residue):
112 return canonical(residue.resname) in PROTEIN_RESNAMES
113 - def _accept_not_residue(self,residue):
114 return not canonical(residue.resname) in PROTEIN_RESNAMES
115 116 117
118 -def residues_by_resname(structure, resnames):
119 """Return a list of residue instances that match *resnames*. 120 121 *resnames* can be a single string or a list of strings. 122 """ 123 #return [r for r in Bio.PDB.Selection.unfold_entities(structure, 'R') 124 # if r.resname.strip() == resname] 125 return residues_by_selection(structure, ResnameSelect(resnames))
126
127 -def residues_by_selection(structure, selection):
128 """General residue selection: supply a Bio.PDB.PDBIO.Select instance.""" 129 return [r for r in Bio.PDB.Selection.unfold_entities(structure, 'R') 130 if selection.accept_residue(r)]
131 132
133 -def find_water(structure, ligand, radius=3.0, water='SOL'):
134 """Find all water (SOL) molecules within radius of ligand. 135 136 :Arguments: 137 *structure* 138 Bio.PDB structure of system with water 139 *ligand* : list 140 Bio.PDB list of atoms of the ligand (Bio.PDB.Atom.Atom 141 instances) 142 *radius* : float 143 Find waters for which the ligand-atom - OW distance is < 144 radius [3.0] 145 *water* : string 146 resname of a water molecule [SOL] 147 148 :Returns: list of residue instances 149 """ 150 151 # get all SOL (water) 152 solvent = residues_by_resname(structure, water) 153 # NOT working in script (but in ipython) ?!?! 154 #solvent_OW = [a for a in r.get_list() for r in solvent if a.name == 'OW'] 155 solvent_OW = [] 156 for r in solvent: 157 for a in r.get_list(): 158 if a.name == "OW": 159 solvent_OW.append(a) 160 # sanity check: 161 assert len(solvent) == len(solvent_OW) 162 163 # set up KDtree neighbour search (use the biggest group for the 164 # tree, i.e. solvent not the ligand) 165 ns = Bio.PDB.NeighborSearch(solvent_OW) 166 167 names_centers = [(a.name, a.get_coord()) 168 for a in Bio.PDB.Selection.unfold_entities(ligand, 'A')] 169 water_shell = AtomGroup() 170 for name,center in names_centers: 171 _shell = AtomGroup(ns.search(center, radius)) 172 logger.debug("around %6r: %3d OW = " % (name, len(_shell)) + str(_shell)) 173 water_shell.update(_shell) # keep unique residues only 174 return sorted([a.parent for a in water_shell])
175