Spaces:
No application file
No application file
| # Copyright 2019-2022 by Robert T. Miller. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """PICIO: read and write Protein Internal Coordinate (.pic) data files.""" | |
| import re | |
| from datetime import date | |
| from io import StringIO | |
| try: | |
| import numpy | |
| except ImportError: | |
| from Bio import MissingPythonDependencyError | |
| raise MissingPythonDependencyError( | |
| "Install NumPy to build proteins from internal coordinates." | |
| ) | |
| from Bio.File import as_handle | |
| from Bio.PDB.StructureBuilder import StructureBuilder | |
| from Bio.PDB.parse_pdb_header import _parse_pdb_header_list | |
| from Bio.PDB.PDBExceptions import PDBException | |
| from Bio.Data.PDBData import protein_letters_1to3 | |
| from Bio.PDB.internal_coords import ( | |
| IC_Residue, | |
| IC_Chain, | |
| Edron, | |
| Hedron, | |
| Dihedron, | |
| AtomKey, | |
| ) | |
| from Bio.PDB.ic_data import ( | |
| ic_data_backbone, | |
| ic_data_sidechains, | |
| hedra_defaults, | |
| dihedra_primary_defaults, | |
| dihedra_secondary_defaults, | |
| dihedra_secondary_xoxt_defaults, | |
| ) | |
| from typing import TextIO, Set, List, Tuple, Union, Optional | |
| from Bio.PDB.Structure import Structure | |
| from Bio.PDB.Residue import Residue | |
| from Bio import SeqIO | |
| # @profile | |
| def read_PIC( | |
| file: TextIO, | |
| verbose: bool = False, | |
| quick: bool = False, | |
| defaults: bool = False, | |
| ) -> Structure: | |
| """Load Protein Internal Coordinate (.pic) data from file. | |
| PIC file format: | |
| - comment lines start with # | |
| - (optional) PDB HEADER record | |
| - idcode and deposition date recommended but optional | |
| - deposition date in PDB format or as changed by Biopython | |
| - (optional) PDB TITLE record | |
| - repeat: | |
| - Biopython Residue Full ID - sets residue IDs of returned structure | |
| - (optional) PDB N, CA, C ATOM records for chain start | |
| - (optional) PIC Hedra records for residue | |
| - (optional) PIC Dihedra records for residue | |
| - (optional) BFAC records listing AtomKeys and b-factors | |
| An improvement would define relative positions for HOH (water) entries. | |
| Defaults will be supplied for any value if defaults=True. Default values | |
| are supplied in ic_data.py, but structures degrade quickly with any | |
| deviation from true coordinates. Experiment with | |
| :data:`Bio.PDB.internal_coords.IC_Residue.pic_flags` options to | |
| :func:`write_PIC` to verify this. | |
| N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists. | |
| C-beta is by default placed using O-C-CA-CB, but O is missing | |
| in some PDB file residues, which means the sidechain cannot be | |
| placed. The alternate CB path (i-1)C-N-CA-CB is provided to | |
| circumvent this, but if this is needed then it must be adjusted in | |
| conjunction with PHI ((i-1)C-N-CA-C) as they overlap (see :meth:`.bond_set` | |
| and :meth:`.bond_rotate` to handle this automatically). | |
| :param Bio.File file: :func:`.as_handle` file name or handle | |
| :param bool verbose: complain when lines not as expected | |
| :param bool quick: don't check residues for all dihedra (no default values) | |
| :param bool defaults: create di/hedra as needed from reference database. | |
| Amide proton created if 'H' is in IC_Residue.accept_atoms | |
| :returns: Biopython Structure object, Residues with .internal_coord | |
| attributes but no coordinates except for chain start N, CA, C atoms if | |
| supplied, **OR** None on parse fail (silent unless verbose=True) | |
| """ | |
| proton = "H" in IC_Residue.accept_atoms | |
| pdb_hdr_re = re.compile( | |
| r"^HEADER\s{4}(?P<cf>.{1,40})" | |
| r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?" | |
| r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$" | |
| ) | |
| pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$") | |
| biop_id_re = re.compile( | |
| r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s" | |
| r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)" | |
| r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)" | |
| r"\s+(?P<res>[\w]{1,3})" | |
| r"(\s\[(?P<segid>[a-zA-z\s]+)\])?" | |
| r"\s*$" | |
| ) | |
| pdb_atm_re = re.compile( | |
| r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})" | |
| r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)" | |
| r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s" | |
| r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})" | |
| r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})" | |
| r"(?P<tfac>[\s\d\.]{6})\s{6}" | |
| r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})" | |
| r"(?P<chg>.{2})?\s*$" | |
| ) | |
| pdbx_atm_re = re.compile( | |
| r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})" | |
| r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)" | |
| r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s" | |
| r"(?P<x>[\s\-\d\.]{10})(?P<y>[\s\-\d\.]{10})" | |
| r"(?P<z>[\s\-\d\.]{10})(?P<occ>[\s\d\.]{7})" | |
| r"(?P<tfac>[\s\d\.]{6})\s{6}" | |
| r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})" | |
| r"(?P<chg>.{2})?\s*$" | |
| ) | |
| bfac_re = re.compile( | |
| r"^BFAC:\s([^\s]+\s+[\-\d\.]+)" | |
| r"\s*([^\s]+\s+[\-\d\.]+)?" | |
| r"\s*([^\s]+\s+[\-\d\.]+)?" | |
| r"\s*([^\s]+\s+[\-\d\.]+)?" | |
| r"\s*([^\s]+\s+[\-\d\.]+)?" | |
| ) | |
| bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)") | |
| struct_builder = StructureBuilder() | |
| # init empty header dict | |
| # - could use to parse HEADER and TITLE lines except | |
| # deposition_date format changed from original PDB header | |
| header_dict = _parse_pdb_header_list([]) | |
| curr_SMCS = [None, None, None, None] # struct model chain seg | |
| SMCS_init = [ | |
| struct_builder.init_structure, | |
| struct_builder.init_model, | |
| struct_builder.init_chain, | |
| struct_builder.init_seg, | |
| ] | |
| sb_res = None | |
| rkl = None | |
| sb_chain = None | |
| sbcic = None | |
| sbric = None | |
| akc = {} | |
| hl12 = {} | |
| ha = {} | |
| hl23 = {} | |
| da = {} | |
| bfacs = {} | |
| orphan_aks = set() # [] | |
| tr = [] # this residue | |
| pr = [] # previous residue | |
| def akcache(akstr: str) -> AtomKey: | |
| """Maintain dictionary of AtomKeys seen while reading this PIC file.""" | |
| # akstr: full AtomKey string read from .pic file, includes residue info | |
| try: | |
| return akc[akstr] | |
| except (KeyError): | |
| ak = akc[akstr] = AtomKey(akstr) | |
| return ak | |
| def link_residues(ppr: List[Residue], pr: List[Residue]) -> None: | |
| """Set next and prev links between i-1 and i-2 residues.""" | |
| for p_r in pr: | |
| pric = p_r.internal_coord | |
| for p_p_r in ppr: | |
| ppric = p_p_r.internal_coord | |
| if p_r.id[0] == " ": # not heteroatoms | |
| if pric not in ppric.rnext: | |
| ppric.rnext.append(pric) | |
| if p_p_r.id[0] == " ": | |
| if ppric not in pric.rprev: | |
| pric.rprev.append(ppric) | |
| def process_hedron( | |
| a1: str, | |
| a2: str, | |
| a3: str, | |
| l12: str, | |
| ang: str, | |
| l23: str, | |
| ric: IC_Residue, | |
| ) -> Tuple: | |
| """Create Hedron on current (sbcic) Chain.internal_coord.""" | |
| ek = (akcache(a1), akcache(a2), akcache(a3)) | |
| atmNdx = AtomKey.fields.atm | |
| accpt = IC_Residue.accept_atoms | |
| if not all(ek[i].akl[atmNdx] in accpt for i in range(3)): | |
| return | |
| hl12[ek] = float(l12) | |
| ha[ek] = float(ang) | |
| hl23[ek] = float(l23) | |
| sbcic.hedra[ek] = ric.hedra[ek] = h = Hedron(ek) | |
| h.cic = sbcic | |
| ak_add(ek, ric) | |
| return ek | |
| def default_hedron(ek: Tuple, ric: IC_Residue) -> None: | |
| """Create Hedron based on same re_class hedra in ref database. | |
| Adds Hedron to current Chain.internal_coord, see ic_data for default | |
| values and reference database source. | |
| """ | |
| atomkeys = [] | |
| hkey = None | |
| atmNdx = AtomKey.fields.atm | |
| resNdx = AtomKey.fields.resname | |
| resPos = AtomKey.fields.respos | |
| atomkeys = [ek[i].akl for i in range(3)] | |
| atpl = tuple([atomkeys[i][atmNdx] for i in range(3)]) | |
| res = atomkeys[0][resNdx] | |
| if ( | |
| atomkeys[0][resPos] | |
| != atomkeys[2][resPos] # hedra crosses amide bond so not reversed | |
| or atpl == ("N", "CA", "C") # or chain start tau | |
| or atpl in ic_data_backbone # or found forward hedron in ic_data | |
| or (res not in ["A", "G"] and atpl in ic_data_sidechains[res]) | |
| ): | |
| hkey = ek | |
| rhcl = [atomkeys[i][resNdx] + atomkeys[i][atmNdx] for i in range(3)] | |
| try: | |
| dflts = hedra_defaults["".join(rhcl)][0] | |
| except KeyError: | |
| if atomkeys[0][resPos] == atomkeys[1][resPos]: | |
| rhcl = [atomkeys[i][resNdx] + atomkeys[i][atmNdx] for i in range(2)] | |
| rhc = "".join(rhcl) + "X" + atomkeys[2][atmNdx] | |
| else: | |
| rhcl = [ | |
| atomkeys[i][resNdx] + atomkeys[i][atmNdx] for i in range(1, 3) | |
| ] | |
| rhc = "X" + atomkeys[0][atmNdx] + "".join(rhcl) | |
| dflts = hedra_defaults[rhc][0] | |
| else: | |
| # must be reversed or fail | |
| hkey = ek[::-1] | |
| rhcl = [atomkeys[i][resNdx] + atomkeys[i][atmNdx] for i in range(2, -1, -1)] | |
| dflts = hedra_defaults["".join(rhcl)][0] | |
| process_hedron( | |
| str(hkey[0]), | |
| str(hkey[1]), | |
| str(hkey[2]), | |
| dflts[0], | |
| dflts[1], | |
| dflts[2], | |
| ric, | |
| ) | |
| if verbose: | |
| print(f" default for {ek}") | |
| def hedra_check(dk: Tuple, ric: IC_Residue) -> None: | |
| """Confirm both hedra present for dihedron key, use default if set.""" | |
| if dk[0:3] not in sbcic.hedra and dk[2::-1] not in sbcic.hedra: | |
| if defaults: | |
| default_hedron(dk[0:3], ric) | |
| else: | |
| print(f"{dk} missing h1") | |
| if dk[1:4] not in sbcic.hedra and dk[3:0:-1] not in sbcic.hedra: | |
| if defaults: | |
| default_hedron(dk[1:4], ric) | |
| else: | |
| print(f"{dk} missing h2") | |
| def process_dihedron( | |
| a1: str, a2: str, a3: str, a4: str, dangle: str, ric: IC_Residue | |
| ) -> Set: | |
| """Create Dihedron on current Chain.internal_coord.""" | |
| ek = ( | |
| akcache(a1), | |
| akcache(a2), | |
| akcache(a3), | |
| akcache(a4), | |
| ) | |
| atmNdx = AtomKey.fields.atm | |
| accpt = IC_Residue.accept_atoms | |
| if not all(ek[i].akl[atmNdx] in accpt for i in range(4)): | |
| return | |
| da[ek] = float(dangle) | |
| sbcic.dihedra[ek] = ric.dihedra[ek] = d = Dihedron(ek) | |
| d.cic = sbcic | |
| if not quick: | |
| hedra_check(ek, ric) | |
| ak_add(ek, ric) | |
| return ek | |
| def default_dihedron(ek: List, ric: IC_Residue) -> None: | |
| """Create Dihedron based on same residue class dihedra in ref database. | |
| Adds Dihedron to current Chain.internal_coord, see ic_data for default | |
| values and reference database source. | |
| """ | |
| atmNdx = AtomKey.fields.atm | |
| resNdx = AtomKey.fields.resname | |
| resPos = AtomKey.fields.respos | |
| rdclass = "" | |
| dclass = "" | |
| for ak in ek: | |
| dclass += ak.akl[atmNdx] | |
| rdclass += ak.akl[resNdx] + ak.akl[atmNdx] | |
| if dclass == "NCACN": | |
| rdclass = rdclass[0:7] + "XN" | |
| elif dclass == "CACNCA": | |
| rdclass = "XCAXC" + rdclass[5:] | |
| elif dclass == "CNCAC": | |
| rdclass = "XC" + rdclass[2:] | |
| if rdclass in dihedra_primary_defaults: | |
| process_dihedron( | |
| str(ek[0]), | |
| str(ek[1]), | |
| str(ek[2]), | |
| str(ek[3]), | |
| dihedra_primary_defaults[rdclass][0], | |
| ric, | |
| ) | |
| if verbose: | |
| print(f" default for {ek}") | |
| elif rdclass in dihedra_secondary_defaults: | |
| primAngle, offset = dihedra_secondary_defaults[rdclass] | |
| rname = ek[2].akl[resNdx] | |
| rnum = int(ek[2].akl[resPos]) | |
| paKey = None | |
| if primAngle == ("N", "CA", "C", "N") and ek[0].ric.rnext != []: | |
| paKey = [ | |
| AtomKey((rnum, None, rname, primAngle[x], None, None)) | |
| for x in range(3) | |
| ] | |
| rnext = ek[0].ric.rnext | |
| paKey.append( | |
| AtomKey( | |
| ( | |
| rnext[0].rbase[0], | |
| None, | |
| rnext[0].rbase[2], | |
| "N", | |
| None, | |
| None, | |
| ) | |
| ) | |
| ) | |
| paKey = tuple(paKey) | |
| elif primAngle == ("CA", "C", "N", "CA"): | |
| prname = pr.akl[0][resNdx] | |
| prnum = pr.akl[0][resPos] | |
| paKey = [ | |
| AtomKey(prnum, None, prname, primAngle[x], None, None) | |
| for x in range(0, 2) | |
| ] | |
| paKey.add( | |
| [ | |
| AtomKey((rnum, None, rname, primAngle[x], None, None)) | |
| for x in range(2, 4) | |
| ] | |
| ) | |
| paKey = tuple(paKey) | |
| else: | |
| paKey = tuple( | |
| AtomKey((rnum, None, rname, atm, None, None)) for atm in primAngle | |
| ) | |
| if paKey in da: | |
| process_dihedron( | |
| str(ek[0]), | |
| str(ek[1]), | |
| str(ek[2]), | |
| str(ek[3]), | |
| da[paKey] + dihedra_secondary_defaults[rdclass][1], | |
| ric, | |
| ) | |
| if verbose: | |
| print(f" secondary default for {ek}") | |
| elif rdclass in dihedra_secondary_xoxt_defaults: | |
| if primAngle == ("C", "N", "CA", "C"): # primary for alt cb | |
| # no way to trigger alt cb with default=True | |
| # because will generate default N-CA-C-O | |
| prname = pr.akl[0][resNdx] | |
| prnum = pr.akl[0][resPos] | |
| paKey = [AtomKey(prnum, None, prname, primAngle[0], None, None)] | |
| paKey.add( | |
| [ | |
| AtomKey((rnum, None, rname, primAngle[x], None, None)) | |
| for x in range(1, 4) | |
| ] | |
| ) | |
| paKey = tuple(paKey) | |
| else: | |
| primAngle, offset = dihedra_secondary_xoxt_defaults[rdclass] | |
| rname = ek[2].akl[resNdx] | |
| rnum = int(ek[2].akl[resPos]) | |
| paKey = tuple( | |
| AtomKey((rnum, None, rname, atm, None, None)) | |
| for atm in primAngle | |
| ) | |
| if paKey in da: | |
| process_dihedron( | |
| str(ek[0]), | |
| str(ek[1]), | |
| str(ek[2]), | |
| str(ek[3]), | |
| da[paKey] + offset, | |
| ric, | |
| ) | |
| if verbose: | |
| print(f" oxt default for {ek}") | |
| else: | |
| print( | |
| f"missing primary angle {paKey} {primAngle} to " | |
| f"generate {rnum}{rname} {rdclass}" | |
| ) | |
| else: | |
| print( | |
| f"missing {ek} -> {rdclass} ({dclass}) not found in primary or" | |
| " secondary defaults" | |
| ) | |
| def dihedra_check(ric: IC_Residue) -> None: | |
| """Look for required dihedra in residue, generate defaults if set.""" | |
| # rnext should be set | |
| def ake_recurse(akList: List) -> List: | |
| """Bulid combinatorics of AtomKey lists.""" | |
| car = akList[0] | |
| if len(akList) > 1: | |
| retList = [] | |
| for ak in car: | |
| cdr = akList[1:] | |
| rslt = ake_recurse(cdr) | |
| for r in rslt: | |
| r.insert(0, ak) | |
| retList.append(r) | |
| return retList | |
| else: | |
| if len(car) == 1: | |
| return [list(car)] | |
| else: | |
| retList = [[ak] for ak in car] | |
| return retList | |
| def ak_expand(eLst: List) -> List: | |
| """Expand AtomKey list with altlocs, all combinatorics.""" | |
| retList = [] | |
| for edron in eLst: | |
| newList = [] | |
| for ak in edron: | |
| rslt = ak.ric.split_akl([ak]) | |
| rlst = [r[0] for r in rslt] | |
| if rlst != []: | |
| newList.append(rlst) | |
| else: | |
| newList.append([ak]) | |
| rslt = ake_recurse(newList) | |
| for r in rslt: | |
| retList.append(r) | |
| return retList | |
| # dihedra_check processing starts here | |
| # generate the list of dihedra this residue should have | |
| chkLst = [] | |
| sN, sCA, sC = AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C") | |
| sO, sCB, sH = AtomKey(ric, "O"), AtomKey(ric, "CB"), AtomKey(ric, "H") | |
| if ric.rnext != []: | |
| for rn in ric.rnext: | |
| nN, nCA, nC = ( | |
| AtomKey(rn, "N"), | |
| AtomKey(rn, "CA"), | |
| AtomKey(rn, "C"), | |
| ) | |
| # intermediate residue, need psi, phi, omg | |
| chkLst.append((sN, sCA, sC, nN)) # psi | |
| chkLst.append((sCA, sC, nN, nCA)) # omg i+1 | |
| chkLst.append((sC, nN, nCA, nC)) # phi i+1 | |
| else: | |
| chkLst.append((sN, sCA, sC, AtomKey(ric, "OXT"))) # psi | |
| rn = "(no rnext)" | |
| chkLst.append((sN, sCA, sC, sO)) # locate backbone O | |
| if ric.lc != "G": | |
| chkLst.append((sO, sC, sCA, sCB)) # locate CB | |
| if ric.lc == "A": | |
| chkLst.append((sN, sCA, sCB)) # missed for generate from seq | |
| if ric.rprev != [] and ric.lc != "P" and proton: | |
| chkLst.append((sC, sCA, sN, sH)) # amide proton | |
| try: | |
| for edron in ic_data_sidechains[ric.lc]: | |
| if len(edron) > 3: # dihedra only | |
| if all(not atm[0] == "H" for atm in edron): | |
| akl = [AtomKey(ric, atm) for atm in edron[0:4]] | |
| chkLst.append(akl) | |
| except KeyError: | |
| pass | |
| # now compare generated list to ric.dihedra, get defaults if set. | |
| chkLst = ak_expand(chkLst) | |
| altloc_ndx = AtomKey.fields.altloc | |
| for dk in chkLst: | |
| if tuple(dk) in ric.dihedra: | |
| pass | |
| elif sH in dk: | |
| pass # ignore missing hydrogens | |
| elif all(atm.akl[altloc_ndx] is None for atm in dk): | |
| if defaults: | |
| if len(dk) != 3: | |
| default_dihedron(dk, ric) | |
| else: | |
| default_hedron(dk, ric) # add ALA N-Ca-Cb | |
| else: | |
| if verbose: | |
| print(f"{ric}-{rn} missing {dk}") | |
| else: | |
| # print(f"skip {ek}") | |
| pass # ignore missing combinatoric of altloc atoms | |
| # need more here? | |
| def ak_add(ek: Tuple, ric: IC_Residue) -> None: | |
| """Allocate edron key AtomKeys to current residue as appropriate. | |
| A hedron or dihedron may span a backbone amide bond, this routine | |
| allocates atoms in the (h/di)edron to the ric residue or saves them | |
| for a residue yet to be processed. | |
| :param set ek: AtomKeys in edron | |
| :param IC_Residue ric: current residue to assign AtomKeys to | |
| """ | |
| res = ric.residue | |
| reskl = ( | |
| str(res.id[1]), | |
| (None if res.id[2] == " " else res.id[2]), | |
| ric.lc, | |
| ) | |
| for ak in ek: | |
| if ak.ric is None: | |
| sbcic.akset.add(ak) | |
| if ak.akl[0:3] == reskl: | |
| ak.ric = ric | |
| ric.ak_set.add(ak) | |
| else: | |
| orphan_aks.add(ak) | |
| def finish_chain() -> None: | |
| """Do last rnext, rprev links and process chain edra data.""" | |
| link_residues(pr, tr) | |
| # check/confirm completeness | |
| if not quick: | |
| for r in pr: | |
| dihedra_check(r.internal_coord) | |
| for r in tr: | |
| dihedra_check(r.internal_coord) | |
| if ha != {}: | |
| sha = {k: ha[k] for k in sorted(ha)} | |
| shl12 = {k: hl12[k] for k in sorted(hl12)} | |
| shl23 = {k: hl23[k] for k in sorted(hl23)} | |
| sbcic._hedraDict2chain(shl12, sha, shl23, da, bfacs) | |
| # read_PIC processing starts here: | |
| with as_handle(file, mode="r") as handle: | |
| for line in handle.readlines(): | |
| if line.startswith("#"): | |
| pass # skip comment lines | |
| elif line.startswith("HEADER "): | |
| m = pdb_hdr_re.match(line) | |
| if m: | |
| header_dict["head"] = m.group("cf") # classification | |
| header_dict["idcode"] = m.group("id") | |
| header_dict["deposition_date"] = m.group("dd") | |
| elif verbose: | |
| print("Reading pic file", file, "HEADER parse fail: ", line) | |
| elif line.startswith("TITLE "): | |
| m = pdb_ttl_re.match(line) | |
| if m: | |
| header_dict["name"] = m.group("ttl").strip() | |
| # print('TTL: ', m.group('ttl').strip()) | |
| elif verbose: | |
| print("Reading pic file", file, "TITLE parse fail:, ", line) | |
| elif line.startswith("("): # Biopython ID line for Residue | |
| m = biop_id_re.match(line) | |
| if m: | |
| # check SMCS = Structure, Model, Chain, SegID | |
| segid = m.group(9) | |
| if segid is None: | |
| segid = " " | |
| this_SMCS = [ | |
| m.group(1), | |
| int(m.group(2)), | |
| m.group(3), | |
| segid, | |
| ] | |
| if curr_SMCS != this_SMCS: | |
| if curr_SMCS[:3] != this_SMCS[:3] and ha != {}: | |
| # chain change so process current chain data | |
| finish_chain() | |
| akc = {} # atomkey cache, used by akcache() | |
| hl12 = {} # hedra key -> len12 | |
| ha = {} # -> hedra angle | |
| hl23 = {} # -> len23 | |
| da = {} # dihedra key -> angle value | |
| bfacs = {} # atomkey string -> b-factor | |
| # init new Biopython SMCS level as needed | |
| for i in range(4): | |
| if curr_SMCS[i] != this_SMCS[i]: | |
| SMCS_init[i](this_SMCS[i]) | |
| curr_SMCS[i] = this_SMCS[i] | |
| if i == 0: | |
| # 0 = init structure so add header | |
| struct_builder.set_header(header_dict) | |
| elif i == 1: | |
| # new model means new chain and new segid | |
| curr_SMCS[2] = curr_SMCS[3] = None | |
| elif i == 2: | |
| # new chain so init internal_coord | |
| sb_chain = struct_builder.chain | |
| sbcic = sb_chain.internal_coord = IC_Chain(sb_chain) | |
| struct_builder.init_residue( | |
| m.group("res"), | |
| m.group("het"), | |
| int(m.group("pos")), | |
| m.group("icode"), | |
| ) | |
| sb_res = struct_builder.residue | |
| if sb_res.id[0] != " ": # skip hetatm | |
| continue | |
| if 2 == sb_res.is_disordered(): | |
| for r in sb_res.child_dict.values(): | |
| if not r.internal_coord: | |
| sb_res = r | |
| break | |
| # added to disordered res | |
| tr.append(sb_res) | |
| else: | |
| # new res so fix up previous residue as feasible | |
| link_residues(pr, tr) | |
| if not quick: | |
| for r in pr: | |
| # create di/hedra if default for residue i-1 | |
| # just linked | |
| dihedra_check(r.internal_coord) | |
| pr = tr | |
| tr = [sb_res] | |
| sbric = sb_res.internal_coord = IC_Residue( | |
| sb_res | |
| ) # no atoms so no rak | |
| sbric.cic = sbcic | |
| rkl = ( | |
| str(sb_res.id[1]), | |
| (None if sb_res.id[2] == " " else sb_res.id[2]), | |
| sbric.lc, | |
| ) | |
| sbcic.ordered_aa_ic_list.append(sbric) | |
| # update AtomKeys w/o IC_Residue references, in case | |
| # chain ends before di/hedra sees them (2XHE test case) | |
| for ak in orphan_aks: | |
| if ak.akl[0:3] == rkl: | |
| ak.ric = sbric | |
| sbric.ak_set.add(ak) | |
| # may need altoc support here | |
| orphan_aks = set(filter(lambda ak: ak.ric is None, orphan_aks)) | |
| else: | |
| if verbose: | |
| print( | |
| "Reading pic file", | |
| file, | |
| "residue ID parse fail: ", | |
| line, | |
| ) | |
| return None | |
| elif line.startswith("ATOM "): | |
| m = pdb_atm_re.match(line) | |
| if not m: | |
| m = pdbx_atm_re.match(line) | |
| if m: | |
| if sb_res is None: | |
| # ATOM without res spec already loaded, not a pic file | |
| if verbose: | |
| print( | |
| "Reading pic file", | |
| file, | |
| "ATOM without residue configured:, ", | |
| line, | |
| ) | |
| return None | |
| if sb_res.resname != m.group("res") or sb_res.id[1] != int( | |
| m.group("pos") | |
| ): | |
| if verbose: | |
| print( | |
| "Reading pic file", | |
| file, | |
| "ATOM not in configured residue (", | |
| sb_res.resname, | |
| str(sb_res.id), | |
| "):", | |
| line, | |
| ) | |
| return None | |
| coord = numpy.array( | |
| ( | |
| float(m.group("x")), | |
| float(m.group("y")), | |
| float(m.group("z")), | |
| ), | |
| "f", | |
| ) | |
| struct_builder.init_atom( | |
| m.group("atm").strip(), | |
| coord, | |
| float(m.group("tfac")), | |
| float(m.group("occ")), | |
| m.group("alc"), | |
| m.group("atm"), | |
| int(m.group("ser")), | |
| m.group("elm").strip(), | |
| ) | |
| # reset because prev does not link to this residue | |
| # (chainBreak) | |
| pr = [] | |
| elif line.startswith("BFAC: "): | |
| m = bfac_re.match(line) | |
| if m: | |
| for bfac_pair in m.groups(): | |
| if bfac_pair is not None: | |
| m2 = bfac2_re.match(bfac_pair) | |
| bfacs[m2.group(1)] = float(m2.group(2)) | |
| # else: | |
| # print f"Reading pic file {file} B-factor fail: {line}" | |
| else: | |
| m = Edron.edron_re.match(line) | |
| if m and sb_res is not None: | |
| if m["a4"] is None: | |
| process_hedron( | |
| m["a1"], | |
| m["a2"], | |
| m["a3"], | |
| m["len12"], | |
| m["angle"], | |
| m["len23"], | |
| sb_res.internal_coord, | |
| ) | |
| else: | |
| process_dihedron( | |
| m["a1"], | |
| m["a2"], | |
| m["a3"], | |
| m["a4"], | |
| m["dihedral"], | |
| sb_res.internal_coord, | |
| ) | |
| elif m: | |
| print( | |
| "PIC file: ", | |
| file, | |
| " error: no residue info before reading (di/h)edron: ", | |
| line, | |
| ) | |
| return None | |
| elif line.strip(): | |
| if verbose: | |
| print( | |
| "Reading PIC file", | |
| file, | |
| "parse fail on: .", | |
| line, | |
| ".", | |
| ) | |
| return None | |
| # reached end of input | |
| finish_chain() | |
| # print(report_PIC(struct_builder.get_structure())) | |
| return struct_builder.get_structure() | |
| def read_PIC_seq( | |
| seqRec: "SeqIO.SeqRecord", | |
| pdbid: str = None, | |
| title: str = None, | |
| chain: str = None, | |
| ) -> Structure: | |
| """Read :class:`.SeqRecord` into Structure with default internal coords.""" | |
| read_pdbid, read_title, read_chain = None, None, None | |
| if seqRec.id is not None: | |
| read_pdbid = seqRec.id | |
| if seqRec.description is not None: | |
| read_title = seqRec.description.replace(f"{read_pdbid} ", "") | |
| if ":" in read_pdbid: | |
| read_pdbid, read_chain = read_pdbid.split(":") | |
| if chain is None: | |
| chain = read_chain if read_chain is not None else "A" | |
| if title is None: | |
| title = ( | |
| read_title | |
| if read_title is not None | |
| else f"sequence input {seqRec.id if seqRec.id is not None else ''}" | |
| ) | |
| if pdbid is None: | |
| pdbid = read_pdbid if read_pdbid is not None else "0PDB" | |
| today = date.today() | |
| datestr = (today.strftime("%d-%b-%y")).upper() | |
| output = f"HEADER {'GENERATED STRUCTURE':40}{datestr} {pdbid}\n" | |
| output += f"TITLE {title.upper():69}\n" | |
| ndx = 1 | |
| for r in seqRec.seq: | |
| output += ( | |
| f"('{pdbid}', 0, '{chain}', (' ', {ndx}, ' ')) {protein_letters_1to3[r]}\n" | |
| ) | |
| ndx += 1 | |
| sp = StringIO() | |
| sp.write(output) | |
| sp.seek(0) | |
| return read_PIC(sp, defaults=True) | |
| def _wpr( | |
| entity, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags: int = IC_Residue.picFlagsDefault, | |
| hCut: Optional[Union[float, None]] = None, | |
| pCut: Optional[Union[float, None]] = None, | |
| ): | |
| if entity.internal_coord: | |
| if not chainid or not pdbid: | |
| chain = entity.parent | |
| if not chainid: | |
| chainid = chain.id | |
| if not pdbid: | |
| struct = chain.parent.parent | |
| pdbid = struct.header.get("idcode") | |
| fp.write( | |
| entity.internal_coord._write_PIC( | |
| pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut | |
| ) | |
| ) | |
| else: | |
| fp.write(IC_Residue._residue_string(entity)) | |
| def _enumerate_entity_atoms(entity): | |
| need = False | |
| for atm in entity.get_atoms(): | |
| need = not atm.get_serial_number() | |
| break | |
| if need: | |
| anum = 1 | |
| for res in entity.get_residues(): | |
| if 2 == res.is_disordered(): | |
| for r in res.child_dict.values(): | |
| for atm in r.get_unpacked_list(): | |
| atm.set_serial_number(anum) | |
| anum += 1 | |
| else: | |
| for atm in res.get_unpacked_list(): | |
| atm.set_serial_number(anum) | |
| anum += 1 | |
| def enumerate_atoms(entity): | |
| """Ensure all atoms in entity have serial_number set.""" | |
| while entity.get_parent(): | |
| entity = entity.get_parent() # get to top level | |
| if "S" == entity.level: | |
| for mdl in entity: # each model starts with 1 | |
| _enumerate_entity_atoms(mdl) | |
| else: # only Chain or Residue, start with 1 | |
| _enumerate_entity_atoms(entity) | |
| def pdb_date(datestr: str) -> str: | |
| """Convert yyyy-mm-dd date to dd-month-yy.""" | |
| if datestr: | |
| m = re.match(r"(\d{4})-(\d{2})-(\d{2})", datestr) | |
| if m: | |
| mo = [ | |
| "XXX", | |
| "JAN", | |
| "FEB", | |
| "MAR", | |
| "APR", | |
| "MAY", | |
| "JUN", | |
| "JUL", | |
| "AUG", | |
| "SEP", | |
| "OCT", | |
| "NOV", | |
| "DEC", | |
| ][int(m.group(2))] | |
| datestr = m.group(3) + "-" + mo + "-" + m.group(1)[-2:] | |
| return datestr | |
| def write_PIC( | |
| entity, | |
| file, | |
| pdbid=None, | |
| chainid=None, | |
| picFlags: int = IC_Residue.picFlagsDefault, | |
| hCut: Optional[Union[float, None]] = None, | |
| pCut: Optional[Union[float, None]] = None, | |
| ): | |
| """Write Protein Internal Coordinates (PIC) to file. | |
| See :func:`read_PIC` for file format. | |
| See :data:`IC_Residue.pic_accuracy` to vary numeric accuracy. | |
| Recurses to lower entity levels (M, C, R). | |
| :param Entity entity: Biopython PDB Entity object: S, M, C or R | |
| :param Bio.File file: :func:`.as_handle` file name or handle | |
| :param str pdbid: PDB idcode, read from entity if not supplied | |
| :param char chainid: PDB Chain ID, set from C level entity.id if needed | |
| :param int picFlags: boolean flags controlling output, defined in | |
| :data:`Bio.PDB.internal_coords.IC_Residue.pic_flags` | |
| * "psi", | |
| * "omg", | |
| * "phi", | |
| * "tau", # tau hedron (N-Ca-C) | |
| * "chi1", | |
| * "chi2", | |
| * "chi3", | |
| * "chi4", | |
| * "chi5", | |
| * "pomg", # proline omega | |
| * "chi", # chi1 through chi5 | |
| * "classic_b", # psi | phi | tau | pomg | |
| * "classic", # classic_b | chi | |
| * "hedra", # all hedra including bond lengths | |
| * "primary", # all primary dihedra | |
| * "secondary", # all secondary dihedra (fixed angle from primary dihedra) | |
| * "all", # hedra | primary | secondary | |
| * "initAtoms", # XYZ coordinates of initial Tau (N-Ca-C) | |
| * "bFactors" | |
| default is everything:: | |
| picFlagsDefault = ( | |
| pic_flags.all | pic_flags.initAtoms | pic_flags.bFactors | |
| ) | |
| Usage in your code:: | |
| # just primary dihedra and all hedra | |
| picFlags = ( | |
| IC_Residue.pic_flags.primary | IC_Residue.pic_flags.hedra | |
| ) | |
| # no B-factors: | |
| picFlags = IC_Residue.picFlagsDefault | |
| picFlags &= ~IC_Residue.pic_flags.bFactors | |
| :func:`read_PIC` with `(defaults=True)` will use default values for | |
| anything left out | |
| :param float hCut: default None | |
| only write hedra with ref db angle std dev greater than this value | |
| :param float pCut: default None | |
| only write primary dihedra with ref db angle std dev greater than this | |
| value | |
| **Default values**: | |
| Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0. | |
| Please see | |
| `PISCES: A Protein Sequence Culling Server <https://dunbrack.fccc.edu/pisces/>`_ | |
| 'G. Wang and R. L. Dunbrack, Jr. PISCES: a protein sequence culling | |
| server. Bioinformatics, 19:1589-1591, 2003.' | |
| 'primary' and 'secondary' dihedra are defined in ic_data.py. Specifically, | |
| secondary dihedra can be determined as a fixed rotation from another known | |
| angle, for example N-Ca-C-O can be estimated from N-Ca-C-N (psi). | |
| Standard deviations are listed in | |
| <biopython distribution>/Bio/PDB/ic_data.py for default values, and can be | |
| used to limit which hedra and dihedra are defaulted vs. output exact | |
| measurements from structure (see hCut and pCut above). Default values for | |
| primary dihedra (psi, phi, omega, chi1, etc.) are chosen as the most common | |
| integer value, not an average. | |
| :raises PDBException: if entity level is A (Atom) | |
| :raises Exception: if entity does not have .level attribute | |
| """ | |
| enumerate_atoms(entity) | |
| with as_handle(file, "w") as fp: | |
| try: | |
| if "A" == entity.level: | |
| raise PDBException("No PIC output at Atom level") | |
| elif "R" == entity.level: | |
| if 2 == entity.is_disordered(): | |
| for r in entity.child_dict.values(): | |
| _wpr( | |
| r, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags=picFlags, | |
| hCut=hCut, | |
| pCut=pCut, | |
| ) | |
| else: | |
| _wpr( | |
| entity, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags=picFlags, | |
| hCut=hCut, | |
| pCut=pCut, | |
| ) | |
| elif "C" == entity.level: | |
| if not chainid: | |
| chainid = entity.id | |
| for res in entity: | |
| write_PIC( | |
| res, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags=picFlags, | |
| hCut=hCut, | |
| pCut=pCut, | |
| ) | |
| elif "M" == entity.level: | |
| for chn in entity: | |
| write_PIC( | |
| chn, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags=picFlags, | |
| hCut=hCut, | |
| pCut=pCut, | |
| ) | |
| elif "S" == entity.level: | |
| if not pdbid: | |
| pdbid = entity.header.get("idcode", None) | |
| hdr = entity.header.get("head", None) | |
| dd = pdb_date(entity.header.get("deposition_date", None)) | |
| if hdr: | |
| fp.write( | |
| ("HEADER {:40}{:8} {:4}\n").format( | |
| hdr.upper(), (dd or ""), (pdbid or "") | |
| ) | |
| ) | |
| nam = entity.header.get("name", None) | |
| if nam: | |
| fp.write("TITLE " + nam.upper() + "\n") | |
| for mdl in entity: | |
| write_PIC( | |
| mdl, | |
| fp, | |
| pdbid, | |
| chainid, | |
| picFlags=picFlags, | |
| hCut=hCut, | |
| pCut=pCut, | |
| ) | |
| else: | |
| raise PDBException("Cannot identify level: " + str(entity.level)) | |
| except KeyError: | |
| raise Exception( | |
| "write_PIC: argument is not a Biopython PDB Entity " + str(entity) | |
| ) | |