Spaces:
No application file
No application file
| #!/usr/bin/env python | |
| # Copyright 2004-2005 by Michael Hoffman. All rights reserved. | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Run and process output from the Wise2 package tools. | |
| Bio.Wise contains modules for running and processing the output of | |
| some of the models in the Wise2 package by Ewan Birney available from: | |
| ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ | |
| http://www.ebi.ac.uk/Wise2/ | |
| Bio.Wise.psw is for protein Smith-Waterman alignments | |
| Bio.Wise.dnal is for Smith-Waterman DNA alignments | |
| """ | |
| import os | |
| import sys | |
| import tempfile | |
| import warnings | |
| from Bio import SeqIO | |
| from Bio import BiopythonDeprecationWarning | |
| warnings.warn( | |
| "The 'Bio.Wise' module is deprecated and will be removed in a future " | |
| "release of Biopython.", | |
| BiopythonDeprecationWarning, | |
| ) | |
| def _build_align_cmdline( | |
| cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False | |
| ): | |
| """Build a command line string (PRIVATE). | |
| >>> os.environ["WISE_KBYTE"]="300000" | |
| >>> if os.isatty(sys.stderr.fileno()): | |
| ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), | |
| ... "/tmp/output", kbyte=100000) | |
| ... assert c == 'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output', c | |
| ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), | |
| ... "/tmp/output_aa") | |
| ... assert c == 'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa', c | |
| ... else: | |
| ... c = _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), | |
| ... "/tmp/output", kbyte=100000) | |
| ... assert c == 'dnal -kbyte 100000 -quiet seq1.fna seq2.fna > /tmp/output', c | |
| ... c = _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), | |
| ... "/tmp/output_aa") | |
| ... assert c == 'psw -kbyte 300000 -quiet seq1.faa seq2.faa > /tmp/output_aa', c | |
| """ | |
| cmdline = cmdline[:] | |
| # XXX: force_type ignored | |
| if kbyte is None: | |
| try: | |
| cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"])) | |
| except KeyError: | |
| pass | |
| else: | |
| cmdline.extend(("-kbyte", str(kbyte))) | |
| if not os.isatty(sys.stderr.fileno()): | |
| cmdline.append("-quiet") | |
| cmdline.extend(pair) | |
| cmdline.extend((">", output_filename)) | |
| if quiet: | |
| cmdline.extend(("2>", "/dev/null")) | |
| return " ".join(cmdline) | |
| def align( | |
| cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False | |
| ): | |
| """Run an alignment. Returns a filehandle.""" | |
| if not pair or len(pair) != 2: | |
| raise ValueError(f"Expected pair of filename, not {pair!r}") | |
| output_file = tempfile.NamedTemporaryFile(mode="r") | |
| input_files = ( | |
| tempfile.NamedTemporaryFile(mode="w"), | |
| tempfile.NamedTemporaryFile(mode="w"), | |
| ) | |
| if dry_run: | |
| print( | |
| _build_align_cmdline( | |
| cmdline, pair, output_file.name, kbyte, force_type, quiet | |
| ) | |
| ) | |
| return | |
| for filename, input_file in zip(pair, input_files): | |
| # Pipe the file through Biopython's Fasta parser/writer | |
| # to make sure it conforms to the Fasta standard (in particular, | |
| # Wise2 may choke on long lines in the Fasta file) | |
| records = SeqIO.parse(open(filename), "fasta") | |
| SeqIO.write(records, input_file, "fasta") | |
| input_file.flush() | |
| input_file_names = [input_file.name for input_file in input_files] | |
| cmdline_str = _build_align_cmdline( | |
| cmdline, input_file_names, output_file.name, kbyte, force_type, quiet | |
| ) | |
| if debug: | |
| sys.stderr.write(f"{cmdline_str}\n") | |
| status = os.system(cmdline_str) >> 8 | |
| # `status` here will be >1 for error codes >=256 | |
| if status > 1: | |
| if kbyte != 0: # possible memory problem; could be None | |
| sys.stderr.write("INFO trying again with the linear model\n") | |
| return align(cmdline, pair, 0, force_type, dry_run, quiet, debug) | |
| else: | |
| raise OSError(f"{' '.join(cmdline)} returned {status}") | |
| return output_file | |
| def all_pairs(singles): | |
| """Generate pairs list for all-against-all alignments. | |
| >>> all_pairs(range(4)) | |
| [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)] | |
| """ | |
| pairs = [] | |
| singles = list(singles) | |
| while singles: | |
| suitor = singles.pop(0) # if sorted, stay sorted | |
| pairs.extend((suitor, single) for single in singles) | |
| return pairs | |
| def main(): | |
| """Provision for command line testing.""" | |
| pass | |
| def _test(*args, **keywds): | |
| import doctest | |
| doctest.testmod(sys.modules[__name__], *args, **keywds) | |
| if __name__ == "__main__": | |
| if __debug__: | |
| _test() | |
| main() | |