Spaces:
No application file
No application file
| # Copyright 2009 by Cymon J. Cox. All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Command line wrapper for the multiple alignment program DIALIGN2-2.""" | |
| from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline | |
| class DialignCommandline(AbstractCommandline): | |
| """Command line wrapper for the multiple alignment program DIALIGN2-2. | |
| http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html | |
| Notes | |
| ----- | |
| Last checked against version: 2.2 | |
| References | |
| ---------- | |
| B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence | |
| Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. | |
| Examples | |
| -------- | |
| To align a FASTA file (unaligned.fasta) with the output files names | |
| aligned.* including a FASTA output file (aligned.fa), use: | |
| >>> from Bio.Align.Applications import DialignCommandline | |
| >>> dialign_cline = DialignCommandline(input="unaligned.fasta", | |
| ... fn="aligned", fa=True) | |
| >>> print(dialign_cline) | |
| dialign2-2 -fa -fn aligned unaligned.fasta | |
| You would typically run the command line with dialign_cline() or via | |
| the Python subprocess module, as described in the Biopython tutorial. | |
| """ | |
| def __init__(self, cmd="dialign2-2", **kwargs): | |
| """Initialize the class.""" | |
| self.program_name = cmd | |
| self.parameters = [ | |
| _Switch( | |
| ["-afc", "afc"], | |
| r"Creates additional output file '\*.afc' " | |
| "containing data of all fragments considered " | |
| "for alignment WARNING: this file can be HUGE !", | |
| ), | |
| _Switch( | |
| ["-afc_v", "afc_v"], | |
| "Like '-afc' but verbose: fragments are explicitly " | |
| "printed. WARNING: this file can be EVEN BIGGER !", | |
| ), | |
| _Switch( | |
| ["-anc", "anc"], | |
| "Anchored alignment. Requires a file <seq_file>.anc " | |
| "containing anchor points.", | |
| ), | |
| _Switch( | |
| ["-cs", "cs"], | |
| "If segments are translated, not only the 'Watson " | |
| "strand' but also the 'Crick strand' is looked at.", | |
| ), | |
| _Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."), | |
| _Switch( | |
| ["-ds", "ds"], | |
| "'dna alignment speed up' - non-translated nucleic acid " | |
| "fragments are taken into account only if they start " | |
| "with at least two matches. Speeds up DNA alignment at " | |
| "the expense of sensitivity.", | |
| ), | |
| _Switch(["-fa", "fa"], "Additional output file in FASTA format."), | |
| _Switch( | |
| ["-ff", "ff"], | |
| r"Creates file \*.frg containing information about all " | |
| "fragments that are part of the respective optimal " | |
| "pairwise alignmnets plus information about " | |
| "consistency in the multiple alignment", | |
| ), | |
| _Option( | |
| ["-fn", "fn"], | |
| "Output files are named <out_file>.<extension>.", | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["-fop", "fop"], | |
| r"Creates file \*.fop containing coordinates of all " | |
| "fragments that are part of the respective pairwise alignments.", | |
| ), | |
| _Switch( | |
| ["-fsm", "fsm"], | |
| r"Creates file \*.fsm containing coordinates of all " | |
| "fragments that are part of the final alignment", | |
| ), | |
| _Switch( | |
| ["-iw", "iw"], | |
| "Overlap weights switched off (by default, overlap " | |
| "weights are used if up to 35 sequences are aligned). " | |
| "This option speeds up the alignment but may lead " | |
| "to reduced alignment quality.", | |
| ), | |
| _Switch( | |
| ["-lgs", "lgs"], | |
| "'long genomic sequences' - combines the following " | |
| "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " | |
| "-fop, -ff, -cs, -ds, -pst ", | |
| ), | |
| _Switch( | |
| ["-lgs_t", "lgs_t"], | |
| "Like '-lgs' but with all segment pairs assessed " | |
| "at the peptide level (rather than 'mixed alignments' " | |
| "as with the '-lgs' option). Therefore faster than " | |
| "-lgs but not very sensitive for non-coding regions.", | |
| ), | |
| _Option( | |
| ["-lmax", "lmax"], | |
| "Maximum fragment length = x (default: x = 40 or " | |
| "x = 120 for 'translated' fragments). Shorter x " | |
| "speeds up the program but may affect alignment quality.", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["-lo", "lo"], | |
| r"(Long Output) Additional file \*.log with information " | |
| "about fragments selected for pairwise alignment and " | |
| "about consistency in multi-alignment procedure.", | |
| ), | |
| _Switch( | |
| ["-ma", "ma"], | |
| "'mixed alignments' consisting of P-fragments and " | |
| "N-fragments if nucleic acid sequences are aligned.", | |
| ), | |
| _Switch( | |
| ["-mask", "mask"], | |
| "Residues not belonging to selected fragments are " | |
| r"replaced by '\*' characters in output alignment " | |
| "(rather than being printed in lower-case characters)", | |
| ), | |
| _Switch( | |
| ["-mat", "mat"], | |
| r"Creates file \*mat with substitution counts derived " | |
| "from the fragments that have been selected for alignment.", | |
| ), | |
| _Switch( | |
| ["-mat_thr", "mat_thr"], | |
| "Like '-mat' but only fragments with weight score " | |
| "> t are considered", | |
| ), | |
| _Switch( | |
| ["-max_link", "max_link"], | |
| "'maximum linkage' clustering used to construct " | |
| "sequence tree (instead of UPGMA).", | |
| ), | |
| _Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."), | |
| _Option(["-mot", "mot"], "'motif' option.", equate=False), | |
| _Switch(["-msf", "msf"], "Separate output file in MSF format."), | |
| _Switch( | |
| ["-n", "n"], | |
| "Input sequences are nucleic acid sequences. " | |
| "No translation of fragments.", | |
| ), | |
| _Switch( | |
| ["-nt", "nt"], | |
| "Input sequences are nucleic acid sequences and " | |
| "'nucleic acid segments' are translated to 'peptide " | |
| "segments'.", | |
| ), | |
| _Switch( | |
| ["-nta", "nta"], | |
| "'no textual alignment' - textual alignment suppressed. " | |
| "This option makes sense if other output files are of " | |
| "interest -- e.g. the fragment files created with -ff, " | |
| "-fop, -fsm or -lo.", | |
| ), | |
| _Switch( | |
| ["-o", "o"], | |
| "Fast version, resulting alignments may be slightly different.", | |
| ), | |
| _Switch( | |
| ["-ow", "ow"], | |
| "Overlap weights enforced (By default, overlap weights " | |
| "are used only if up to 35 sequences are aligned since " | |
| "calculating overlap weights is time consuming).", | |
| ), | |
| _Switch( | |
| ["-pst", "pst"], | |
| r"'print status'. Creates and updates a file \*.sta with " | |
| "information about the current status of the program " | |
| "run. This option is recommended if large data sets " | |
| "are aligned since it allows the user to estimate the " | |
| "remaining running time.", | |
| ), | |
| _Switch( | |
| ["-smin", "smin"], | |
| "Minimum similarity value for first residue pair " | |
| "(or codon pair) in fragments. Speeds up protein " | |
| "alignment or alignment of translated DNA fragments " | |
| "at the expense of sensitivity.", | |
| ), | |
| _Option( | |
| ["-stars", "stars"], | |
| r"Maximum number of '\*' characters indicating degree " | |
| "of local similarity among sequences. By default, no " | |
| "stars are used but numbers between 0 and 9, instead.", | |
| checker_function=lambda x: x in range(0, 10), | |
| equate=False, | |
| ), | |
| _Switch(["-stdo", "stdo"], "Results written to standard output."), | |
| _Switch( | |
| ["-ta", "ta"], | |
| "Standard textual alignment printed (overrides " | |
| "suppression of textual alignments in special " | |
| "options, e.g. -lgs)", | |
| ), | |
| _Option( | |
| ["-thr", "thr"], | |
| "Threshold T = x.", | |
| checker_function=lambda x: isinstance(x, int), | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["-xfr", "xfr"], | |
| "'exclude fragments' - list of fragments can be " | |
| "specified that are NOT considered for pairwise alignment", | |
| ), | |
| _Argument( | |
| ["input"], | |
| "Input file name. Must be FASTA format", | |
| filename=True, | |
| is_required=True, | |
| ), | |
| ] | |
| AbstractCommandline.__init__(self, cmd, **kwargs) | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |