Spaces:
No application file
No application file
DrVai-Rag-Testing
/
myenv
/lib
/python3.10
/site-packages
/Bio
/Sequencing
/Applications
/_Novoalign.py
| # Copyright 2009 by Osvaldo Zagordi. All rights reserved. | |
| # Revisions copyright 2010 by Peter Cock. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Command line wrapper for the short read aligner Novoalign by Novocraft.""" | |
| from Bio.Application import _Option, AbstractCommandline | |
| class NovoalignCommandline(AbstractCommandline): | |
| """Command line wrapper for novoalign by Novocraft. | |
| See www.novocraft.com - novoalign is a short read alignment program. | |
| Examples | |
| -------- | |
| >>> from Bio.Sequencing.Applications import NovoalignCommandline | |
| >>> novoalign_cline = NovoalignCommandline(database='some_db', | |
| ... readfile='some_seq.txt') | |
| >>> print(novoalign_cline) | |
| novoalign -d some_db -f some_seq.txt | |
| As with all the Biopython application wrappers, you can also add or | |
| change options after creating the object: | |
| >>> novoalign_cline.format = 'PRBnSEQ' | |
| >>> novoalign_cline.r_method='0.99' # limited valid values | |
| >>> novoalign_cline.fragment = '250 20' # must be given as a string | |
| >>> novoalign_cline.miRNA = 100 | |
| >>> print(novoalign_cline) | |
| novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100 | |
| You would typically run the command line with novoalign_cline() or via | |
| the Python subprocess module, as described in the Biopython tutorial. | |
| Last checked against version: 2.05.04 | |
| """ | |
| def __init__(self, cmd="novoalign", **kwargs): | |
| """Initialize the class.""" | |
| READ_FORMAT = ["FA", "SLXFQ", "STDFQ", "ILMFQ", "PRB", "PRBnSEQ"] | |
| REPORT_FORMAT = ["Native", "Pairwise", "SAM"] | |
| REPEAT_METHOD = ["None", "Random", "All", "Exhaustive", "0.99"] | |
| self.parameters = [ | |
| _Option( | |
| ["-d", "database"], "database filename", filename=True, equate=False | |
| ), | |
| _Option(["-f", "readfile"], "read file", filename=True, equate=False), | |
| _Option( | |
| ["-F", "format"], | |
| f"Format of read files.\n\nAllowed values: {', '.join(READ_FORMAT)}", | |
| checker_function=lambda x: x in READ_FORMAT, | |
| equate=False, | |
| ), | |
| # Alignment scoring options | |
| _Option( | |
| ["-t", "threshold"], | |
| "Threshold for alignment score", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-g", "gap_open"], | |
| "Gap opening penalty [default: 40]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-x", "gap_extend"], | |
| "Gap extend penalty [default: 15]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-u", "unconverted"], | |
| "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" | |
| "Default: no penalty", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # Quality control and read filtering | |
| _Option( | |
| ["-l", "good_bases"], | |
| "Minimum number of good quality bases [default: log(N_g, 4) + 5]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-h", "homopolymer"], | |
| "Homopolymer read filter [default: 20; disable: negative value]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # Read preprocessing options | |
| _Option( | |
| ["-a", "adapter3"], | |
| "Strips a 3' adapter sequence prior to alignment.\n\n" | |
| "With paired ends two adapters can be specified", | |
| checker_function=lambda x: isinstance(x, str), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-n", "truncate"], | |
| "Truncate to specific length before alignment", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-s", "trimming"], | |
| "If fail to align, trim by s bases until they map or become shorter than l.\n\n" | |
| "Ddefault: 2", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-5", "adapter5"], | |
| "Strips a 5' adapter sequence.\n\n" | |
| "Similar to -a (adaptor3), but on the 5' end.", | |
| checker_function=lambda x: isinstance(x, str), | |
| equate=False, | |
| ), | |
| # Reporting options | |
| _Option( | |
| ["-o", "report"], | |
| "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" | |
| % ", ".join(REPORT_FORMAT), | |
| checker_function=lambda x: x in REPORT_FORMAT, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-Q", "quality"], | |
| "Lower threshold for an alignment to be reported [default: 0]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-R", "repeats"], | |
| "If score difference is higher, report repeats.\n\n" | |
| "Otherwise -r read method applies [default: 5]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-r", "r_method"], | |
| "Methods to report reads with multiple matches.\n\n" | |
| "Allowed values: %s\n" | |
| "'All' and 'Exhaustive' accept limits." % ", ".join(REPEAT_METHOD), | |
| checker_function=lambda x: x.split()[0] in REPEAT_METHOD, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-e", "recorded"], | |
| "Alignments recorded with score equal to the best.\n\n" | |
| "Default: 1000 in default read method, otherwise no limit.", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-q", "qual_digits"], | |
| "Decimal digits for quality scores [default: 0]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # Paired end options | |
| _Option( | |
| ["-i", "fragment"], | |
| "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", | |
| checker_function=lambda x: len(x.split()) == 2, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-v", "variation"], | |
| "Structural variation penalty [default: 70]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # miRNA mode | |
| _Option( | |
| ["-m", "miRNA"], | |
| "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # Multithreading | |
| _Option( | |
| ["-c", "cores"], | |
| "Number of threads, disabled on free versions [default: number of cores]", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| # Quality calibrations | |
| _Option( | |
| ["-k", "read_cal"], | |
| "Read quality calibration from file (mismatch counts)", | |
| checker_function=lambda x: isinstance(x, str), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-K", "write_cal"], | |
| "Accumulate mismatch counts and write to file", | |
| checker_function=lambda x: isinstance(x, str), | |
| equate=False, | |
| ), | |
| ] | |
| AbstractCommandline.__init__(self, cmd, **kwargs) | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |