Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /Sequencing /Applications /_Novoalign.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

8.59 kB

	# Copyright 2009 by Osvaldo Zagordi. All rights reserved.
	# Revisions copyright 2010 by Peter Cock.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Command line wrapper for the short read aligner Novoalign by Novocraft."""


	from Bio.Application import _Option, AbstractCommandline


	class NovoalignCommandline(AbstractCommandline):
	"""Command line wrapper for novoalign by Novocraft.

	See www.novocraft.com - novoalign is a short read alignment program.

	Examples
	--------
	>>> from Bio.Sequencing.Applications import NovoalignCommandline
	>>> novoalign_cline = NovoalignCommandline(database='some_db',
	... readfile='some_seq.txt')
	>>> print(novoalign_cline)
	novoalign -d some_db -f some_seq.txt

	As with all the Biopython application wrappers, you can also add or
	change options after creating the object:

	>>> novoalign_cline.format = 'PRBnSEQ'
	>>> novoalign_cline.r_method='0.99' # limited valid values
	>>> novoalign_cline.fragment = '250 20' # must be given as a string
	>>> novoalign_cline.miRNA = 100
	>>> print(novoalign_cline)
	novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100

	You would typically run the command line with novoalign_cline() or via
	the Python subprocess module, as described in the Biopython tutorial.

	Last checked against version: 2.05.04

	"""

	def __init__(self, cmd="novoalign", **kwargs):
	"""Initialize the class."""
	READ_FORMAT = ["FA", "SLXFQ", "STDFQ", "ILMFQ", "PRB", "PRBnSEQ"]
	REPORT_FORMAT = ["Native", "Pairwise", "SAM"]
	REPEAT_METHOD = ["None", "Random", "All", "Exhaustive", "0.99"]

	self.parameters = [
	_Option(
	["-d", "database"], "database filename", filename=True, equate=False
	),
	_Option(["-f", "readfile"], "read file", filename=True, equate=False),
	_Option(
	["-F", "format"],
	f"Format of read files.\n\nAllowed values: {', '.join(READ_FORMAT)}",
	checker_function=lambda x: x in READ_FORMAT,
	equate=False,
	),
	# Alignment scoring options
	_Option(
	["-t", "threshold"],
	"Threshold for alignment score",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-g", "gap_open"],
	"Gap opening penalty [default: 40]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-x", "gap_extend"],
	"Gap extend penalty [default: 15]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-u", "unconverted"],
	"Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
	"Default: no penalty",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# Quality control and read filtering
	_Option(
	["-l", "good_bases"],
	"Minimum number of good quality bases [default: log(N_g, 4) + 5]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-h", "homopolymer"],
	"Homopolymer read filter [default: 20; disable: negative value]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# Read preprocessing options
	_Option(
	["-a", "adapter3"],
	"Strips a 3' adapter sequence prior to alignment.\n\n"
	"With paired ends two adapters can be specified",
	checker_function=lambda x: isinstance(x, str),
	equate=False,
	),
	_Option(
	["-n", "truncate"],
	"Truncate to specific length before alignment",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-s", "trimming"],
	"If fail to align, trim by s bases until they map or become shorter than l.\n\n"
	"Ddefault: 2",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-5", "adapter5"],
	"Strips a 5' adapter sequence.\n\n"
	"Similar to -a (adaptor3), but on the 5' end.",
	checker_function=lambda x: isinstance(x, str),
	equate=False,
	),
	# Reporting options
	_Option(
	["-o", "report"],
	"Specifies the report format.\n\nAllowed values: %s\nDefault: Native"
	% ", ".join(REPORT_FORMAT),
	checker_function=lambda x: x in REPORT_FORMAT,
	equate=False,
	),
	_Option(
	["-Q", "quality"],
	"Lower threshold for an alignment to be reported [default: 0]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-R", "repeats"],
	"If score difference is higher, report repeats.\n\n"
	"Otherwise -r read method applies [default: 5]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-r", "r_method"],
	"Methods to report reads with multiple matches.\n\n"
	"Allowed values: %s\n"
	"'All' and 'Exhaustive' accept limits." % ", ".join(REPEAT_METHOD),
	checker_function=lambda x: x.split()[0] in REPEAT_METHOD,
	equate=False,
	),
	_Option(
	["-e", "recorded"],
	"Alignments recorded with score equal to the best.\n\n"
	"Default: 1000 in default read method, otherwise no limit.",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Option(
	["-q", "qual_digits"],
	"Decimal digits for quality scores [default: 0]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# Paired end options
	_Option(
	["-i", "fragment"],
	"Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
	checker_function=lambda x: len(x.split()) == 2,
	equate=False,
	),
	_Option(
	["-v", "variation"],
	"Structural variation penalty [default: 70]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# miRNA mode
	_Option(
	["-m", "miRNA"],
	"Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# Multithreading
	_Option(
	["-c", "cores"],
	"Number of threads, disabled on free versions [default: number of cores]",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	# Quality calibrations
	_Option(
	["-k", "read_cal"],
	"Read quality calibration from file (mismatch counts)",
	checker_function=lambda x: isinstance(x, str),
	equate=False,
	),
	_Option(
	["-K", "write_cal"],
	"Accumulate mismatch counts and write to file",
	checker_function=lambda x: isinstance(x, str),
	equate=False,
	),
	]
	AbstractCommandline.__init__(self, cmd, **kwargs)


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()