Spaces:
No application file
No application file
DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /SearchIO /ExonerateIO /exonerate_cigar.py
| # Copyright 2012 by Wibowo Arindrarto. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Bio.SearchIO parser for Exonerate cigar output format.""" | |
| import re | |
| from ._base import _BaseExonerateParser, _STRAND_MAP | |
| from .exonerate_vulgar import ExonerateVulgarIndexer | |
| __all__ = ("ExonerateCigarParser", "ExonerateCigarIndexer") | |
| # precompile regex | |
| _RE_CIGAR = re.compile( | |
| r"""^cigar:\s+ | |
| (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # query: ID, start, end, strand | |
| (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # hit: ID, start, end, strand | |
| (\d+)(\s+.*)$ # score, vulgar components | |
| """, | |
| re.VERBOSE, | |
| ) | |
| class ExonerateCigarParser(_BaseExonerateParser): | |
| """Parser for Exonerate cigar strings.""" | |
| _ALN_MARK = "cigar" | |
| def parse_alignment_block(self, header): | |
| """Parse alignment block for cigar format, return query results, hits, hsps.""" | |
| qresult = header["qresult"] | |
| hit = header["hit"] | |
| hsp = header["hsp"] | |
| self.read_until(lambda line: line.startswith("cigar")) | |
| cigars = re.search(_RE_CIGAR, self.line) | |
| # if the file has c4 alignments | |
| # check if cigar values match our previously parsed header values | |
| if self.has_c4_alignment: | |
| assert qresult["id"] == cigars.group(1) | |
| assert hsp["query_start"] == cigars.group(2) | |
| assert hsp["query_end"] == cigars.group(3) | |
| assert hsp["query_strand"] == cigars.group(4) | |
| assert hit["id"] == cigars.group(5) | |
| assert hsp["hit_start"] == cigars.group(6) | |
| assert hsp["hit_end"] == cigars.group(7) | |
| assert hsp["hit_strand"] == cigars.group(8) | |
| assert hsp["score"] == cigars.group(9) | |
| else: | |
| qresult["id"] = cigars.group(1) | |
| hsp["query_start"] = cigars.group(2) | |
| hsp["query_end"] = cigars.group(3) | |
| hsp["query_strand"] = cigars.group(4) | |
| hit["id"] = cigars.group(5) | |
| hsp["hit_start"] = cigars.group(6) | |
| hsp["hit_end"] = cigars.group(7) | |
| hsp["hit_strand"] = cigars.group(8) | |
| hsp["score"] = cigars.group(9) | |
| # adjust strands | |
| hsp["query_strand"] = _STRAND_MAP[hsp["query_strand"]] | |
| hsp["hit_strand"] = _STRAND_MAP[hsp["hit_strand"]] | |
| # cast coords into ints | |
| qstart = int(hsp["query_start"]) | |
| qend = int(hsp["query_end"]) | |
| hstart = int(hsp["hit_start"]) | |
| hend = int(hsp["hit_end"]) | |
| # set coords (start <= end) | |
| hsp["query_start"] = min(qstart, qend) | |
| hsp["query_end"] = max(qstart, qend) | |
| hsp["hit_start"] = min(hstart, hend) | |
| hsp["hit_end"] = max(hstart, hend) | |
| # cast score into int | |
| hsp["score"] = int(hsp["score"]) | |
| # store cigar components | |
| hsp["cigar_comp"] = cigars.group(10) | |
| # HACK: since we can't really figure out exactly when a | |
| # HSP starts or ends, we set the entire alignment as one HSP | |
| hsp["query_ranges"] = [(hsp["query_start"], hsp["query_end"])] | |
| hsp["hit_ranges"] = [(hsp["hit_start"], hsp["hit_end"])] | |
| return {"qresult": qresult, "hit": hit, "hsp": hsp} | |
| class ExonerateCigarIndexer(ExonerateVulgarIndexer): | |
| """Indexer class for exonerate cigar lines.""" | |
| _parser = ExonerateCigarParser | |
| _query_mark = b"cigar" | |
| def get_qresult_id(self, pos): | |
| """Return the query ID of the nearest cigar line.""" | |
| handle = self._handle | |
| handle.seek(pos) | |
| # get line, check if it's a vulgar line, and get query ID | |
| line = handle.readline() | |
| assert line.startswith(self._query_mark), line | |
| id = re.search(_RE_CIGAR, line.decode()) | |
| return id.group(1) | |
| # if not used as a module, run the doctest | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |