Spaces:
No application file
No application file
| # Copyright 2006-2013,2020 by Peter Cock. | |
| # Revisions copyright 2008-2009 by Michiel de Hoon. | |
| # All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format. | |
| You are expected to use this module via the Bio.SeqIO functions. | |
| See also the Bio.SwissProt module which offers more than just accessing | |
| the sequences as SeqRecord objects. | |
| See also Bio.SeqIO.UniprotIO.py which supports the "uniprot-xml" format. | |
| """ | |
| from Bio import SeqFeature | |
| from Bio import SwissProt | |
| from Bio.Seq import Seq | |
| from Bio.SeqRecord import SeqRecord | |
| def SwissIterator(source): | |
| """Break up a Swiss-Prot/UniProt file into SeqRecord objects. | |
| Argument source is a file-like object or a path to a file. | |
| Every section from the ID line to the terminating // becomes | |
| a single SeqRecord with associated annotation and features. | |
| This parser is for the flat file "swiss" format as used by: | |
| - Swiss-Prot aka SwissProt | |
| - TrEMBL | |
| - UniProtKB aka UniProt Knowledgebase | |
| For consistency with BioPerl and EMBOSS we call this the "swiss" | |
| format. See also the SeqIO support for "uniprot-xml" format. | |
| Rather than calling it directly, you are expected to use this | |
| parser via Bio.SeqIO.parse(..., format="swiss") instead. | |
| """ | |
| swiss_records = SwissProt.parse(source) | |
| for swiss_record in swiss_records: | |
| # Convert the SwissProt record to a SeqRecord | |
| record = SeqRecord( | |
| Seq(swiss_record.sequence), | |
| id=swiss_record.accessions[0], | |
| name=swiss_record.entry_name, | |
| description=swiss_record.description, | |
| features=swiss_record.features, | |
| ) | |
| for cross_reference in swiss_record.cross_references: | |
| if len(cross_reference) < 2: | |
| continue | |
| database, accession = cross_reference[:2] | |
| dbxref = f"{database}:{accession}" | |
| if dbxref not in record.dbxrefs: | |
| record.dbxrefs.append(dbxref) | |
| annotations = record.annotations | |
| annotations["molecule_type"] = "protein" | |
| annotations["accessions"] = swiss_record.accessions | |
| if swiss_record.protein_existence: | |
| annotations["protein_existence"] = swiss_record.protein_existence | |
| if swiss_record.created: | |
| date, version = swiss_record.created | |
| annotations["date"] = date | |
| annotations["sequence_version"] = version | |
| if swiss_record.sequence_update: | |
| date, version = swiss_record.sequence_update | |
| annotations["date_last_sequence_update"] = date | |
| annotations["sequence_version"] = version | |
| if swiss_record.annotation_update: | |
| date, version = swiss_record.annotation_update | |
| annotations["date_last_annotation_update"] = date | |
| annotations["entry_version"] = version | |
| if swiss_record.gene_name: | |
| annotations["gene_name"] = swiss_record.gene_name | |
| annotations["organism"] = swiss_record.organism.rstrip(".") | |
| annotations["taxonomy"] = swiss_record.organism_classification | |
| annotations["ncbi_taxid"] = swiss_record.taxonomy_id | |
| if swiss_record.host_organism: | |
| annotations["organism_host"] = swiss_record.host_organism | |
| if swiss_record.host_taxonomy_id: | |
| annotations["host_ncbi_taxid"] = swiss_record.host_taxonomy_id | |
| if swiss_record.comments: | |
| annotations["comment"] = "\n".join(swiss_record.comments) | |
| if swiss_record.references: | |
| annotations["references"] = [] | |
| for reference in swiss_record.references: | |
| feature = SeqFeature.Reference() | |
| feature.comment = " ".join("%s=%s;" % k_v for k_v in reference.comments) | |
| for key, value in reference.references: | |
| if key == "PubMed": | |
| feature.pubmed_id = value | |
| elif key == "MEDLINE": | |
| feature.medline_id = value | |
| elif key == "DOI": | |
| pass | |
| elif key == "AGRICOLA": | |
| pass | |
| else: | |
| raise ValueError(f"Unknown key {key} found in references") | |
| feature.authors = reference.authors | |
| feature.title = reference.title | |
| feature.journal = reference.location | |
| annotations["references"].append(feature) | |
| if swiss_record.keywords: | |
| record.annotations["keywords"] = swiss_record.keywords | |
| yield record | |