Spaces:
No application file
No application file
| # Copyright 2001 by Gavin E. Crooks. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Handle the SCOP DEScription file. | |
| The file format is described in the scop | |
| "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
| The latest DES file can be found | |
| "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| "Release 1.55":http://scop.berkeley.edu/parse/des.cla.scop.txt_1.55 (July 2001) | |
| """ | |
| class Record: | |
| """Holds information for one node in the SCOP hierarchy. | |
| Attributes: | |
| - sunid - SCOP unique identifiers | |
| - nodetype - One of 'cl' (class), 'cf' (fold), 'sf' (superfamily), | |
| 'fa' (family), 'dm' (protein), 'sp' (species), 'px' (domain). | |
| Additional node types may be added. | |
| - sccs - SCOP concise classification strings. e.g. b.1.2.1 | |
| - name - The SCOP ID (sid) for domains (e.g. d1anu1), currently empty for other node types | |
| - description - e.g. "All beta proteins","Fibronectin type III", | |
| """ | |
| def __init__(self, line=None): | |
| """Initialize the class.""" | |
| self.sunid = "" | |
| self.nodetype = "" | |
| self.sccs = "" | |
| self.name = "" | |
| self.description = "" | |
| if line: | |
| self._process(line) | |
| def _process(self, line): | |
| """Parse DES records (PRIVATE). | |
| Records consist of 5 tab deliminated fields, | |
| sunid, node type, sccs, node name, node description. | |
| """ | |
| # For example :: | |
| # | |
| # 21953 px b.1.2.1 d1dan.1 1dan T:,U:91-106 | |
| # 48724 cl b - All beta proteins | |
| # 48725 cf b.1 - Immunoglobulin-like beta-sandwich | |
| # 49265 sf b.1.2 - Fibronectin type III | |
| # 49266 fa b.1.2.1 - Fibronectin type III | |
| line = line.rstrip() # no trailing whitespace | |
| columns = line.split("\t") # separate the tab-delineated cols | |
| if len(columns) != 5: | |
| raise ValueError(f"I don't understand the format of {line}") | |
| sunid, self.nodetype, self.sccs, self.name, self.description = columns | |
| if self.name == "-": | |
| self.name = "" | |
| self.sunid = int(sunid) | |
| def __str__(self): | |
| """Represent the SCOP description record as a tab-separated string.""" | |
| s = [] | |
| s.append(self.sunid) | |
| s.append(self.nodetype) | |
| s.append(self.sccs) | |
| if self.name: | |
| s.append(self.name) | |
| else: | |
| s.append("-") | |
| s.append(self.description) | |
| return "\t".join(map(str, s)) + "\n" | |
| def parse(handle): | |
| """Iterate over a DES file as a Des record for each line. | |
| Arguments: | |
| - handle - file-like object | |
| """ | |
| for line in handle: | |
| if line.startswith("#"): | |
| continue | |
| yield Record(line) | |