Spaces:
No application file
No application file
| # Copyright 2009 by Cymon J. Cox. All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Command line wrapper for the multiple alignment program Clustal W.""" | |
| import os | |
| from Bio.Application import _Option, _Switch, AbstractCommandline | |
| class ClustalwCommandline(AbstractCommandline): | |
| """Command line wrapper for clustalw (version one or two). | |
| http://www.clustal.org/ | |
| Notes | |
| ----- | |
| Last checked against versions: 1.83 and 2.1 | |
| References | |
| ---------- | |
| Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, | |
| McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, | |
| Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. | |
| Bioinformatics, 23, 2947-2948. | |
| Examples | |
| -------- | |
| >>> from Bio.Align.Applications import ClustalwCommandline | |
| >>> in_file = "unaligned.fasta" | |
| >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) | |
| >>> print(clustalw_cline) | |
| clustalw2 -infile=unaligned.fasta | |
| You would typically run the command line with clustalw_cline() or via | |
| the Python subprocess module, as described in the Biopython tutorial. | |
| """ | |
| # TODO - Should we default to cmd="clustalw2" now? | |
| def __init__(self, cmd="clustalw", **kwargs): | |
| """Initialize the class.""" | |
| self.parameters = [ | |
| _Option( | |
| ["-infile", "-INFILE", "INFILE", "infile"], | |
| "Input sequences.", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-profile1", "-PROFILE1", "PROFILE1", "profile1"], | |
| "Profiles (old alignment).", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-profile2", "-PROFILE2", "PROFILE2", "profile2"], | |
| "Profiles (old alignment).", | |
| filename=True, | |
| ), | |
| # ################# VERBS (do things) ############################# | |
| _Switch( | |
| ["-options", "-OPTIONS", "OPTIONS", "options"], | |
| "List the command line parameters", | |
| ), | |
| _Switch( | |
| ["-help", "-HELP", "HELP", "help"], "Outline the command line params." | |
| ), | |
| _Switch( | |
| ["-check", "-CHECK", "CHECK", "check"], | |
| "Outline the command line params.", | |
| ), | |
| _Switch( | |
| ["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], | |
| "Output full help content.", | |
| ), | |
| _Switch( | |
| ["-align", "-ALIGN", "ALIGN", "align"], "Do full multiple alignment." | |
| ), | |
| _Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."), | |
| _Switch( | |
| ["-pim", "-PIM", "PIM", "pim"], | |
| "Output percent identity matrix (while calculating the tree).", | |
| ), | |
| _Option( | |
| ["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], | |
| "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Switch( | |
| ["-convert", "-CONVERT", "CONVERT", "convert"], | |
| "Output the input sequences in a different file format.", | |
| ), | |
| # #################### PARAMETERS (set things) ######################### | |
| # ***General settings:**** | |
| # Makes no sense in biopython | |
| # _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], | |
| # [], | |
| # lambda x: 0, # Does not take value | |
| # False, | |
| # "read command line, then enter normal interactive menus", | |
| # False), | |
| _Switch( | |
| ["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], | |
| "Use FAST algorithm for the alignment guide tree", | |
| ), | |
| _Option( | |
| ["-type", "-TYPE", "TYPE", "type"], | |
| "PROTEIN or DNA sequences", | |
| checker_function=lambda x: x in ["PROTEIN", "DNA", "protein", "dna"], | |
| ), | |
| _Switch( | |
| ["-negative", "-NEGATIVE", "NEGATIVE", "negative"], | |
| "Protein alignment with negative values in matrix", | |
| ), | |
| _Option( | |
| ["-outfile", "-OUTFILE", "OUTFILE", "outfile"], | |
| "Output sequence alignment file name", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-output", "-OUTPUT", "OUTPUT", "output"], | |
| "Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA", | |
| checker_function=lambda x: x | |
| in [ | |
| "CLUSTAL", | |
| "GCG", | |
| "GDE", | |
| "PHYLIP", | |
| "PIR", | |
| "NEXUS", | |
| "FASTA", | |
| "clustal", | |
| "gcg", | |
| "gde", | |
| "phylip", | |
| "pir", | |
| "nexus", | |
| "fasta", | |
| ], | |
| ), | |
| _Option( | |
| ["-outorder", "-OUTORDER", "OUTORDER", "outorder"], | |
| "Output taxon order: INPUT or ALIGNED", | |
| checker_function=lambda x: x | |
| in ["INPUT", "input", "ALIGNED", "aligned"], | |
| ), | |
| _Option( | |
| ["-case", "-CASE", "CASE", "case"], | |
| "LOWER or UPPER (for GDE output only)", | |
| checker_function=lambda x: x in ["UPPER", "upper", "LOWER", "lower"], | |
| ), | |
| _Option( | |
| ["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], | |
| "OFF or ON (for Clustal output only)", | |
| checker_function=lambda x: x in ["ON", "on", "OFF", "off"], | |
| ), | |
| _Option( | |
| ["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], | |
| "OFF or ON (NEW- for all output formats)", | |
| checker_function=lambda x: x in ["ON", "on", "OFF", "off"], | |
| ), | |
| _Option( | |
| ["-range", "-RANGE", "RANGE", "range"], | |
| "Sequence range to write starting m to m+n. " | |
| "Input as string eg. '24,200'", | |
| ), | |
| _Option( | |
| ["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], | |
| "Maximum allowed input sequence length", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Switch( | |
| ["-quiet", "-QUIET", "QUIET", "quiet"], | |
| "Reduce console output to minimum", | |
| ), | |
| _Option( | |
| ["-stats", "-STATS", "STATS", "stats"], | |
| "Log some alignment statistics to file", | |
| filename=True, | |
| ), | |
| # ***Fast Pairwise Alignments:*** | |
| _Option( | |
| ["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], | |
| "Word size", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], | |
| "Number of best diags.", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-window", "-WINDOW", "WINDOW", "window"], | |
| "Window around best diags.", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], | |
| "Gap penalty", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-score", "-SCORE", "SCORE", "score"], | |
| "Either: PERCENT or ABSOLUTE", | |
| checker_function=lambda x: x | |
| in ["percent", "PERCENT", "absolute", "ABSOLUTE"], | |
| ), | |
| # ***Slow Pairwise Alignments:*** | |
| _Option( | |
| ["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], | |
| "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", | |
| checker_function=lambda x: ( | |
| x | |
| in [ | |
| "BLOSUM", | |
| "PAM", | |
| "GONNET", | |
| "ID", | |
| "blosum", | |
| "pam", | |
| "gonnet", | |
| "id", | |
| ] | |
| or os.path.exists(x) | |
| ), | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], | |
| "DNA weight matrix=IUB, CLUSTALW or filename", | |
| checker_function=lambda x: ( | |
| x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x) | |
| ), | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], | |
| "Gap opening penalty", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], | |
| "Gap extension penalty", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| # ***Multiple Alignments:*** | |
| _Option( | |
| ["-newtree", "-NEWTREE", "NEWTREE", "newtree"], | |
| "Output file name for newly created guide tree", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-usetree", "-USETREE", "USETREE", "usetree"], | |
| "File name of guide tree", | |
| checker_function=lambda x: os.path.exists, | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-matrix", "-MATRIX", "MATRIX", "matrix"], | |
| "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", | |
| checker_function=lambda x: ( | |
| x | |
| in [ | |
| "BLOSUM", | |
| "PAM", | |
| "GONNET", | |
| "ID", | |
| "blosum", | |
| "pam", | |
| "gonnet", | |
| "id", | |
| ] | |
| or os.path.exists(x) | |
| ), | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], | |
| "DNA weight matrix=IUB, CLUSTALW or filename", | |
| checker_function=lambda x: ( | |
| x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x) | |
| ), | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], | |
| "Gap opening penalty", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-gapext", "-GAPEXT", "GAPEXT", "gapext"], | |
| "Gap extension penalty", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Switch( | |
| ["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], | |
| "No end gap separation pen.", | |
| ), | |
| _Option( | |
| ["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], | |
| "Gap separation pen. range", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Switch( | |
| ["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off" | |
| ), | |
| _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], "Hydrophilic gaps off"), | |
| _Switch( | |
| ["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], | |
| "List hydrophilic res.", | |
| ), | |
| _Option( | |
| ["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], | |
| "% ident. for delay", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| # Already handled in General Settings section, but appears a second | |
| # time under Multiple Alignments in the help | |
| # _Option(["-type", "-TYPE", "TYPE", "type"], | |
| # "PROTEIN or DNA", | |
| # checker_function=lambda x: x in ["PROTEIN", "DNA", | |
| # "protein", "dna"]), | |
| _Option( | |
| ["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], | |
| "Transitions weighting", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-iteration", "-ITERATION", "ITERATION", "iteration"], | |
| "NONE or TREE or ALIGNMENT", | |
| checker_function=lambda x: x | |
| in ["NONE", "TREE", "ALIGNMENT", "none", "tree", "alignment"], | |
| ), | |
| _Option( | |
| ["-numiter", "-NUMITER", "NUMITER", "numiter"], | |
| "maximum number of iterations to perform", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Switch( | |
| ["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], | |
| "Disable sequence weighting", | |
| ), | |
| # ***Profile Alignments:*** | |
| _Switch( | |
| ["-profile", "-PROFILE", "PROFILE", "profile"], | |
| "Merge two alignments by profile alignment", | |
| ), | |
| _Option( | |
| ["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], | |
| "Output file name for new guide tree of profile1", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], | |
| "Output file for new guide tree of profile2", | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-usetree1", "-USETREE1", "USETREE1", "usetree1"], | |
| "File name of guide tree for profile1", | |
| checker_function=lambda x: os.path.exists, | |
| filename=True, | |
| ), | |
| _Option( | |
| ["-usetree2", "-USETREE2", "USETREE2", "usetree2"], | |
| "File name of guide tree for profile2", | |
| checker_function=lambda x: os.path.exists, | |
| filename=True, | |
| ), | |
| # ***Sequence to Profile Alignments:*** | |
| _Switch( | |
| ["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], | |
| "Sequentially add profile2 sequences to profile1 alignment", | |
| ), | |
| # These are already handled in the Multiple Alignments section, | |
| # but appear a second time here in the help. | |
| # _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], | |
| # "File for new guide tree", | |
| # filename=True), | |
| # _Option(["-usetree", "-USETREE", "USETREE", "usetree"], | |
| # "File for old guide tree", | |
| # checker_function=lambda x: os.path.exists, | |
| # filename=True), | |
| # ***Structure Alignments:*** | |
| _Switch( | |
| ["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], | |
| "Do not use secondary structure-gap penalty mask for profile 1", | |
| ), | |
| _Switch( | |
| ["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], | |
| "Do not use secondary structure-gap penalty mask for profile 2", | |
| ), | |
| _Option( | |
| ["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], | |
| "STRUCTURE or MASK or BOTH or NONE output in alignment file", | |
| checker_function=lambda x: x | |
| in [ | |
| "STRUCTURE", | |
| "MASK", | |
| "BOTH", | |
| "NONE", | |
| "structure", | |
| "mask", | |
| "both", | |
| "none", | |
| ], | |
| ), | |
| _Option( | |
| ["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], | |
| "Gap penalty for helix core residues", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], | |
| "gap penalty for strand core residues", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], | |
| "Gap penalty for loop regions", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], | |
| "Gap penalty for structure termini", | |
| checker_function=lambda x: (isinstance(x, int) or isinstance(x, float)), | |
| ), | |
| _Option( | |
| ["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], | |
| "Number of residues inside helix to be treated as terminal", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], | |
| "Number of residues outside helix to be treated as terminal", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], | |
| "Number of residues inside strand to be treated as terminal", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], | |
| "Number of residues outside strand to be treated as terminal", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| # ***Trees:*** | |
| _Option( | |
| ["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], | |
| "nj OR phylip OR dist OR nexus", | |
| checker_function=lambda x: x | |
| in ["NJ", "PHYLIP", "DIST", "NEXUS", "nj", "phylip", "dist", "nexus"], | |
| ), | |
| _Option( | |
| ["-seed", "-SEED", "SEED", "seed"], | |
| "Seed number for bootstraps.", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Switch( | |
| ["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction." | |
| ), | |
| _Switch( | |
| ["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], | |
| "Ignore positions with gaps.", | |
| ), | |
| _Option( | |
| ["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], | |
| "Node OR branch position of bootstrap values in tree display", | |
| checker_function=lambda x: x in ["NODE", "BRANCH", "node", "branch"], | |
| ), | |
| _Option( | |
| ["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], | |
| "NJ or UPGMA", | |
| checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"], | |
| ), | |
| ] | |
| AbstractCommandline.__init__(self, cmd, **kwargs) | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |