Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /Align /Applications /_Dialign.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 2 years ago

raw

history blame contribute delete

10.1 kB

	# Copyright 2009 by Cymon J. Cox. All rights reserved.
	#
	# This file is part of the Biopython distribution and governed by your
	# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
	# Please see the LICENSE file that should have been included as part of this
	# package.
	"""Command line wrapper for the multiple alignment program DIALIGN2-2."""

	from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline


	class DialignCommandline(AbstractCommandline):
	"""Command line wrapper for the multiple alignment program DIALIGN2-2.

	http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html

	Notes
	-----
	Last checked against version: 2.2

	References
	----------
	B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
	Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.

	Examples
	--------
	To align a FASTA file (unaligned.fasta) with the output files names
	aligned.* including a FASTA output file (aligned.fa), use:

	>>> from Bio.Align.Applications import DialignCommandline
	>>> dialign_cline = DialignCommandline(input="unaligned.fasta",
	... fn="aligned", fa=True)
	>>> print(dialign_cline)
	dialign2-2 -fa -fn aligned unaligned.fasta

	You would typically run the command line with dialign_cline() or via
	the Python subprocess module, as described in the Biopython tutorial.

	"""

	def __init__(self, cmd="dialign2-2", **kwargs):
	"""Initialize the class."""
	self.program_name = cmd
	self.parameters = [
	_Switch(
	["-afc", "afc"],
	r"Creates additional output file '\*.afc' "
	"containing data of all fragments considered "
	"for alignment WARNING: this file can be HUGE !",
	),
	_Switch(
	["-afc_v", "afc_v"],
	"Like '-afc' but verbose: fragments are explicitly "
	"printed. WARNING: this file can be EVEN BIGGER !",
	),
	_Switch(
	["-anc", "anc"],
	"Anchored alignment. Requires a file <seq_file>.anc "
	"containing anchor points.",
	),
	_Switch(
	["-cs", "cs"],
	"If segments are translated, not only the 'Watson "
	"strand' but also the 'Crick strand' is looked at.",
	),
	_Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."),
	_Switch(
	["-ds", "ds"],
	"'dna alignment speed up' - non-translated nucleic acid "
	"fragments are taken into account only if they start "
	"with at least two matches. Speeds up DNA alignment at "
	"the expense of sensitivity.",
	),
	_Switch(["-fa", "fa"], "Additional output file in FASTA format."),
	_Switch(
	["-ff", "ff"],
	r"Creates file \*.frg containing information about all "
	"fragments that are part of the respective optimal "
	"pairwise alignmnets plus information about "
	"consistency in the multiple alignment",
	),
	_Option(
	["-fn", "fn"],
	"Output files are named <out_file>.<extension>.",
	equate=False,
	),
	_Switch(
	["-fop", "fop"],
	r"Creates file \*.fop containing coordinates of all "
	"fragments that are part of the respective pairwise alignments.",
	),
	_Switch(
	["-fsm", "fsm"],
	r"Creates file \*.fsm containing coordinates of all "
	"fragments that are part of the final alignment",
	),
	_Switch(
	["-iw", "iw"],
	"Overlap weights switched off (by default, overlap "
	"weights are used if up to 35 sequences are aligned). "
	"This option speeds up the alignment but may lead "
	"to reduced alignment quality.",
	),
	_Switch(
	["-lgs", "lgs"],
	"'long genomic sequences' - combines the following "
	"options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
	"-fop, -ff, -cs, -ds, -pst ",
	),
	_Switch(
	["-lgs_t", "lgs_t"],
	"Like '-lgs' but with all segment pairs assessed "
	"at the peptide level (rather than 'mixed alignments' "
	"as with the '-lgs' option). Therefore faster than "
	"-lgs but not very sensitive for non-coding regions.",
	),
	_Option(
	["-lmax", "lmax"],
	"Maximum fragment length = x (default: x = 40 or "
	"x = 120 for 'translated' fragments). Shorter x "
	"speeds up the program but may affect alignment quality.",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Switch(
	["-lo", "lo"],
	r"(Long Output) Additional file \*.log with information "
	"about fragments selected for pairwise alignment and "
	"about consistency in multi-alignment procedure.",
	),
	_Switch(
	["-ma", "ma"],
	"'mixed alignments' consisting of P-fragments and "
	"N-fragments if nucleic acid sequences are aligned.",
	),
	_Switch(
	["-mask", "mask"],
	"Residues not belonging to selected fragments are "
	r"replaced by '\*' characters in output alignment "
	"(rather than being printed in lower-case characters)",
	),
	_Switch(
	["-mat", "mat"],
	r"Creates file \*mat with substitution counts derived "
	"from the fragments that have been selected for alignment.",
	),
	_Switch(
	["-mat_thr", "mat_thr"],
	"Like '-mat' but only fragments with weight score "
	"> t are considered",
	),
	_Switch(
	["-max_link", "max_link"],
	"'maximum linkage' clustering used to construct "
	"sequence tree (instead of UPGMA).",
	),
	_Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."),
	_Option(["-mot", "mot"], "'motif' option.", equate=False),
	_Switch(["-msf", "msf"], "Separate output file in MSF format."),
	_Switch(
	["-n", "n"],
	"Input sequences are nucleic acid sequences. "
	"No translation of fragments.",
	),
	_Switch(
	["-nt", "nt"],
	"Input sequences are nucleic acid sequences and "
	"'nucleic acid segments' are translated to 'peptide "
	"segments'.",
	),
	_Switch(
	["-nta", "nta"],
	"'no textual alignment' - textual alignment suppressed. "
	"This option makes sense if other output files are of "
	"interest -- e.g. the fragment files created with -ff, "
	"-fop, -fsm or -lo.",
	),
	_Switch(
	["-o", "o"],
	"Fast version, resulting alignments may be slightly different.",
	),
	_Switch(
	["-ow", "ow"],
	"Overlap weights enforced (By default, overlap weights "
	"are used only if up to 35 sequences are aligned since "
	"calculating overlap weights is time consuming).",
	),
	_Switch(
	["-pst", "pst"],
	r"'print status'. Creates and updates a file \*.sta with "
	"information about the current status of the program "
	"run. This option is recommended if large data sets "
	"are aligned since it allows the user to estimate the "
	"remaining running time.",
	),
	_Switch(
	["-smin", "smin"],
	"Minimum similarity value for first residue pair "
	"(or codon pair) in fragments. Speeds up protein "
	"alignment or alignment of translated DNA fragments "
	"at the expense of sensitivity.",
	),
	_Option(
	["-stars", "stars"],
	r"Maximum number of '\*' characters indicating degree "
	"of local similarity among sequences. By default, no "
	"stars are used but numbers between 0 and 9, instead.",
	checker_function=lambda x: x in range(0, 10),
	equate=False,
	),
	_Switch(["-stdo", "stdo"], "Results written to standard output."),
	_Switch(
	["-ta", "ta"],
	"Standard textual alignment printed (overrides "
	"suppression of textual alignments in special "
	"options, e.g. -lgs)",
	),
	_Option(
	["-thr", "thr"],
	"Threshold T = x.",
	checker_function=lambda x: isinstance(x, int),
	equate=False,
	),
	_Switch(
	["-xfr", "xfr"],
	"'exclude fragments' - list of fragments can be "
	"specified that are NOT considered for pairwise alignment",
	),
	_Argument(
	["input"],
	"Input file name. Must be FASTA format",
	filename=True,
	is_required=True,
	),
	]
	AbstractCommandline.__init__(self, cmd, **kwargs)


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()