Spaces:
No application file
No application file
| # Copyright 1999 by Jeffrey Chang. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Code to parse the keywlist.txt file from SwissProt/UniProt. | |
| See: | |
| - https://www.uniprot.org/docs/keywlist.txt | |
| Classes: | |
| - Record Stores the information about one keyword or one category | |
| in the keywlist.txt file. | |
| Functions: | |
| - parse Parses the keywlist.txt file and returns an iterator to | |
| the records it contains. | |
| """ | |
| class Record(dict): | |
| """Store information of one keyword or category from the keywords list. | |
| This record stores the information of one keyword or category in the | |
| keywlist.txt as a Python dictionary. The keys in this dictionary are | |
| the line codes that can appear in the keywlist.txt file:: | |
| --------- --------------------------- ---------------------- | |
| Line code Content Occurrence in an entry | |
| --------- --------------------------- ---------------------- | |
| ID Identifier (keyword) Once; starts a keyword entry | |
| IC Identifier (category) Once; starts a category entry | |
| AC Accession (KW-xxxx) Once | |
| DE Definition Once or more | |
| SY Synonyms Optional; once or more | |
| GO Gene ontology (GO) mapping Optional; once or more | |
| HI Hierarchy Optional; once or more | |
| WW Relevant WWW site Optional; once or more | |
| CA Category Once per keyword entry; absent | |
| in category entries | |
| """ | |
| def __init__(self): | |
| """Initialize the class.""" | |
| dict.__init__(self) | |
| for keyword in ("DE", "SY", "GO", "HI", "WW"): | |
| self[keyword] = [] | |
| def parse(handle): | |
| """Parse the keyword list from file handle. | |
| Returns a generator object which yields keyword entries as | |
| Bio.SwissProt.KeyWList.Record() object. | |
| """ | |
| record = Record() | |
| # First, skip the header - look for start of a record | |
| for line in handle: | |
| if line.startswith("ID "): | |
| # Looks like there was no header | |
| record["ID"] = line[5:].strip() | |
| break | |
| if line.startswith("IC "): | |
| # Looks like there was no header | |
| record["IC"] = line[5:].strip() | |
| break | |
| # Now parse the records | |
| for line in handle: | |
| if line.startswith("-------------------------------------"): | |
| # We have reached the footer | |
| break | |
| key = line[:2] | |
| if key == "//": | |
| record["DE"] = " ".join(record["DE"]) | |
| record["SY"] = " ".join(record["SY"]) | |
| yield record | |
| record = Record() | |
| elif line[2:5] == " ": | |
| value = line[5:].strip() | |
| if key in ("ID", "IC", "AC", "CA"): | |
| record[key] = value | |
| elif key in ("DE", "SY", "GO", "HI", "WW"): | |
| record[key].append(value) | |
| else: | |
| raise ValueError(f"Cannot parse line '{line.strip()}'") | |
| # Read the footer and throw it away | |
| for line in handle: | |
| pass | |