Spaces:
No application file
No application file
| # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Classify protein backbone structure with Kolodny et al's fragment libraries. | |
| It can be regarded as a form of objective secondary structure classification. | |
| Only fragments of length 5 or 7 are supported (ie. there is a 'central' | |
| residue). | |
| Full reference: | |
| Kolodny R, Koehl P, Guibas L, Levitt M. | |
| Small libraries of protein fragments model native protein structures accurately. | |
| J Mol Biol. 2002 323(2):297-307. | |
| The definition files of the fragments can be obtained from: | |
| http://github.com/csblab/fragments/ | |
| You need these files to use this module. | |
| The following example uses the library with 10 fragments of length 5. | |
| The library files can be found in directory 'fragment_data'. | |
| >>> from Bio.PDB.PDBParser import PDBParser | |
| >>> from Bio.PDB.FragmentMapper import FragmentMapper | |
| >>> parser = PDBParser() | |
| >>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb") | |
| >>> model = structure[0] | |
| >>> fm = FragmentMapper(model, lsize=10, flength=5, fdir="PDB") | |
| >>> chain = model['A'] | |
| >>> res152 = chain[152] | |
| >>> res157 = chain[157] | |
| >>> res152 in fm # is res152 mapped? (fragment of a C-alpha polypeptide) | |
| False | |
| >>> res157 in fm # is res157 mapped? (fragment of a C-alpha polypeptide) | |
| True | |
| """ | |
| import numpy | |
| from Bio.SVDSuperimposer import SVDSuperimposer | |
| from Bio.PDB.PDBExceptions import PDBException | |
| from Bio.PDB.Polypeptide import PPBuilder | |
| # fragment file (lib_SIZE_z_LENGTH.txt) | |
| # SIZE=number of fragments | |
| # LENGTH=length of fragment (4,5,6,7) | |
| _FRAGMENT_FILE = "lib_%s_z_%s.txt" | |
| def _read_fragments(size, length, dir="."): | |
| """Read a fragment spec file (PRIVATE). | |
| Read a fragment spec file available from | |
| http://github.com/csblab/fragments/ | |
| and return a list of Fragment objects. | |
| :param size: number of fragments in the library | |
| :type size: int | |
| :param length: length of the fragments | |
| :type length: int | |
| :param dir: directory where the fragment spec files can be found | |
| :type dir: string | |
| """ | |
| filename = (dir + "/" + _FRAGMENT_FILE) % (size, length) | |
| with open(filename) as fp: | |
| flist = [] | |
| # ID of fragment=rank in spec file | |
| fid = 0 | |
| for line in fp: | |
| # skip comment and blank lines | |
| if line[0] == "*" or line[0] == "\n": | |
| continue | |
| sl = line.split() | |
| if sl[1] == "------": | |
| # Start of fragment definition | |
| f = Fragment(length, fid) | |
| flist.append(f) | |
| # increase fragment id (rank) | |
| fid += 1 | |
| continue | |
| # Add CA coord to Fragment | |
| coord = numpy.array([float(x) for x in sl[0:3]]) | |
| # XXX= dummy residue name | |
| f.add_residue("XXX", coord) | |
| return flist | |
| class Fragment: | |
| """Represent a polypeptide C-alpha fragment.""" | |
| def __init__(self, length, fid): | |
| """Initialize fragment object. | |
| :param length: length of the fragment | |
| :type length: int | |
| :param fid: id for the fragment | |
| :type fid: int | |
| """ | |
| # nr of residues in fragment | |
| self.length = length | |
| # nr of residues added | |
| self.counter = 0 | |
| self.resname_list = [] | |
| # CA coordinate matrix | |
| self.coords_ca = numpy.zeros((length, 3), "d") | |
| self.fid = fid | |
| def get_resname_list(self): | |
| """Get residue list. | |
| :return: the residue names | |
| :rtype: [string, string,...] | |
| """ | |
| return self.resname_list | |
| def get_id(self): | |
| """Get identifier for the fragment. | |
| :return: id for the fragment | |
| :rtype: int | |
| """ | |
| return self.fid | |
| def get_coords(self): | |
| """Get the CA coordinates in the fragment. | |
| :return: the CA coords in the fragment | |
| :rtype: Numeric (Nx3) array | |
| """ | |
| return self.coords_ca | |
| def add_residue(self, resname, ca_coord): | |
| """Add a residue. | |
| :param resname: residue name (eg. GLY). | |
| :type resname: string | |
| :param ca_coord: the c-alpha coordinates of the residues | |
| :type ca_coord: Numeric array with length 3 | |
| """ | |
| if self.counter >= self.length: | |
| raise PDBException("Fragment boundary exceeded.") | |
| self.resname_list.append(resname) | |
| self.coords_ca[self.counter] = ca_coord | |
| self.counter = self.counter + 1 | |
| def __len__(self): | |
| """Return length of the fragment.""" | |
| return self.length | |
| def __sub__(self, other): | |
| """Return rmsd between two fragments. | |
| :return: rmsd between fragments | |
| :rtype: float | |
| Examples | |
| -------- | |
| This is an incomplete but illustrative example:: | |
| rmsd = fragment1 - fragment2 | |
| """ | |
| sup = SVDSuperimposer() | |
| sup.set(self.coords_ca, other.coords_ca) | |
| sup.run() | |
| return sup.get_rms() | |
| def __repr__(self): | |
| """Represent the fragment object as a string. | |
| Returns <Fragment length=L id=ID> where L=length of fragment | |
| and ID the identifier (rank in the library). | |
| """ | |
| return "<Fragment length=%i id=%i>" % (self.length, self.fid) | |
| def _make_fragment_list(pp, length): | |
| """Dice up a peptide in fragments of length "length" (PRIVATE). | |
| :param pp: a list of residues (part of one peptide) | |
| :type pp: [L{Residue}, L{Residue}, ...] | |
| :param length: fragment length | |
| :type length: int | |
| """ | |
| frag_list = [] | |
| for i in range(0, len(pp) - length + 1): | |
| f = Fragment(length, -1) | |
| for j in range(0, length): | |
| residue = pp[i + j] | |
| resname = residue.get_resname() | |
| if residue.has_id("CA"): | |
| ca = residue["CA"] | |
| else: | |
| raise PDBException("CHAINBREAK") | |
| if ca.is_disordered(): | |
| raise PDBException("CHAINBREAK") | |
| ca_coord = ca.get_coord() | |
| f.add_residue(resname, ca_coord) | |
| frag_list.append(f) | |
| return frag_list | |
| def _map_fragment_list(flist, reflist): | |
| """Map flist fragments to closest entry in reflist (PRIVATE). | |
| Map all frgaments in flist to the closest (in RMSD) fragment in reflist. | |
| Returns a list of reflist indices. | |
| :param flist: list of protein fragments | |
| :type flist: [L{Fragment}, L{Fragment}, ...] | |
| :param reflist: list of reference (ie. library) fragments | |
| :type reflist: [L{Fragment}, L{Fragment}, ...] | |
| """ | |
| mapped = [] | |
| for f in flist: | |
| rank = [] | |
| for i in range(0, len(reflist)): | |
| rf = reflist[i] | |
| rms = f - rf | |
| rank.append((rms, rf)) | |
| rank.sort() | |
| fragment = rank[0][1] | |
| mapped.append(fragment) | |
| return mapped | |
| class FragmentMapper: | |
| """Map polypeptides in a model to lists of representative fragments.""" | |
| def __init__(self, model, lsize=20, flength=5, fdir="."): | |
| """Create instance of FragmentMapper. | |
| :param model: the model that will be mapped | |
| :type model: L{Model} | |
| :param lsize: number of fragments in the library | |
| :type lsize: int | |
| :param flength: length of fragments in the library | |
| :type flength: int | |
| :param fdir: directory where the definition files are | |
| found (default=".") | |
| :type fdir: string | |
| """ | |
| if flength == 5: | |
| self.edge = 2 | |
| elif flength == 7: | |
| self.edge = 3 | |
| else: | |
| raise PDBException("Fragment length should be 5 or 7.") | |
| self.flength = flength | |
| self.lsize = lsize | |
| self.reflist = _read_fragments(lsize, flength, fdir) | |
| self.model = model | |
| self.fd = self._map(self.model) | |
| def _map(self, model): | |
| """Map (PRIVATE). | |
| :param model: the model that will be mapped | |
| :type model: L{Model} | |
| """ | |
| ppb = PPBuilder() | |
| ppl = ppb.build_peptides(model) | |
| fd = {} | |
| for pp in ppl: | |
| try: | |
| # make fragments | |
| flist = _make_fragment_list(pp, self.flength) | |
| # classify fragments | |
| mflist = _map_fragment_list(flist, self.reflist) | |
| for i in range(0, len(pp)): | |
| res = pp[i] | |
| if i < self.edge: | |
| # start residues | |
| continue | |
| elif i >= (len(pp) - self.edge): | |
| # end residues | |
| continue | |
| else: | |
| # fragment | |
| index = i - self.edge | |
| assert index >= 0 | |
| fd[res] = mflist[index] | |
| except PDBException as why: | |
| if why == "CHAINBREAK": | |
| # Funny polypeptide - skip | |
| pass | |
| else: | |
| raise PDBException(why) from None | |
| return fd | |
| def __contains__(self, res): | |
| """Check if the given residue is in any of the mapped fragments. | |
| :type res: L{Residue} | |
| """ | |
| return res in self.fd | |
| def __getitem__(self, res): | |
| """Get an entry. | |
| :type res: L{Residue} | |
| :return: fragment classification | |
| :rtype: L{Fragment} | |
| """ | |
| return self.fd[res] | |