Spaces:
No application file
No application file
| # Copyright 2009 by Michiel de Hoon. All rights reserved. | |
| # This code is part of the Biopython distribution and governed by its | |
| # license. Please see the LICENSE file that should have been included | |
| # as part of this package. | |
| """Code for calling and parsing ScanProsite from ExPASy.""" | |
| # Importing these functions with leading underscore as not intended for reuse | |
| from urllib.request import urlopen | |
| from urllib.parse import urlencode | |
| from xml.sax import handler | |
| from xml.sax.expatreader import ExpatParser | |
| class Record(list): | |
| """Represents search results returned by ScanProsite. | |
| This record is a list containing the search results returned by | |
| ScanProsite. The record also contains the data members n_match, | |
| n_seq, capped, and warning. | |
| """ | |
| def __init__(self): | |
| """Initialize the class.""" | |
| self.n_match = None | |
| self.n_seq = None | |
| self.capped = None | |
| self.warning = None | |
| # October 28th 2020 it was recognised that between October 10th 2020 and October | |
| # 28th the main url of prosite changed from https://www.expasy.org to | |
| # https://prosite.expasy.org. Thus a change in the mirror was issued from | |
| # https://www.expasy.org to https://prosite.expasy.org. | |
| def scan(seq="", mirror="https://prosite.expasy.org", output="xml", **keywords): | |
| """Execute a ScanProsite search. | |
| Arguments: | |
| - mirror: The ScanProsite mirror to be used | |
| (default: https://prosite.expasy.org). | |
| - seq: The query sequence, or UniProtKB (Swiss-Prot, | |
| TrEMBL) accession | |
| - output: Format of the search results | |
| (default: xml) | |
| Further search parameters can be passed as keywords; see the | |
| documentation for programmatic access to ScanProsite at | |
| https://prosite.expasy.org/scanprosite/scanprosite_doc.html | |
| for a description of such parameters. | |
| This function returns a handle to the search results returned by | |
| ScanProsite. Search results in the XML format can be parsed into a | |
| Python object, by using the Bio.ExPASy.ScanProsite.read function. | |
| """ | |
| parameters = {"seq": seq, "output": output} | |
| for key, value in keywords.items(): | |
| if value is not None: | |
| parameters[key] = value | |
| command = urlencode(parameters) | |
| url = f"{mirror}/cgi-bin/prosite/PSScan.cgi?{command}" | |
| handle = urlopen(url) | |
| return handle | |
| def read(handle): | |
| """Parse search results returned by ScanProsite into a Python object.""" | |
| content_handler = ContentHandler() | |
| saxparser = Parser() | |
| saxparser.setContentHandler(content_handler) | |
| saxparser.parse(handle) | |
| record = content_handler.record | |
| return record | |
| # The classes below are considered private | |
| class Parser(ExpatParser): | |
| """Process the result from a ScanProsite search (PRIVATE).""" | |
| def __init__(self): | |
| """Initialize the class.""" | |
| ExpatParser.__init__(self) | |
| self.firsttime = True | |
| def feed(self, data, isFinal=0): | |
| """Raise an Error if plain text is received in the data. | |
| This is to show the Error messages returned by ScanProsite. | |
| """ | |
| # Error messages returned by the ScanProsite server are formatted as | |
| # as plain text instead of an XML document. To catch such error | |
| # messages, we override the feed method of the Expat parser. | |
| # The error message is (hopefully) contained in the data that was just | |
| # fed to the parser. | |
| if self.firsttime: | |
| if data[:5].decode("utf-8") != "<?xml": | |
| raise ValueError(data) | |
| self.firsttime = False | |
| return ExpatParser.feed(self, data, isFinal) | |
| class ContentHandler(handler.ContentHandler): | |
| """Process and fill in the records, results of the search (PRIVATE).""" | |
| integers = ("start", "stop") | |
| strings = ( | |
| "sequence_ac", | |
| "sequence_id", | |
| "sequence_db", | |
| "signature_ac", | |
| "level", | |
| "level_tag", | |
| ) | |
| def __init__(self): | |
| """Initialize the class.""" | |
| self.element = [] | |
| def startElement(self, name, attrs): | |
| """Define the beginning of a record and stores the search record.""" | |
| self.element.append(name) | |
| self.content = "" | |
| if self.element == ["matchset"]: | |
| self.record = Record() | |
| self.record.n_match = int(attrs["n_match"]) | |
| self.record.n_seq = int(attrs["n_seq"]) | |
| elif self.element == ["matchset", "match"]: | |
| match = {} | |
| self.record.append(match) | |
| def endElement(self, name): | |
| """Define the end of the search record.""" | |
| assert name == self.element.pop() | |
| if self.element == ["matchset", "match"]: | |
| match = self.record[-1] | |
| if name in ContentHandler.integers: | |
| match[name] = int(self.content) | |
| elif name in ContentHandler.strings: | |
| match[name] = self.content | |
| else: | |
| # Unknown type, treat it as a string | |
| match[name] = self.content | |
| def characters(self, content): | |
| """Store the record content.""" | |
| self.content += content | |