Spaces:
No application file
No application file
DrVai-Rag-Testing
/
myenv
/lib
/python3.10
/site-packages
/Bio
/Align
/Applications
/_ClustalOmega.py
| # Copyright 2011 by Andreas Wilm. All rights reserved. | |
| # Based on ClustalW wrapper copyright 2009 by Cymon J. Cox. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Command line wrapper for the multiple alignment program Clustal Omega.""" | |
| from Bio.Application import _Option, _Switch, AbstractCommandline | |
| class ClustalOmegaCommandline(AbstractCommandline): | |
| """Command line wrapper for clustal omega. | |
| http://www.clustal.org/omega | |
| Notes | |
| ----- | |
| Last checked against version: 1.2.0 | |
| References | |
| ---------- | |
| Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R, | |
| McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011). | |
| Fast, scalable generation of high-quality protein multiple | |
| sequence alignments using Clustal Omega. | |
| Molecular Systems Biology 7:539 https://doi.org/10.1038/msb.2011.75 | |
| Examples | |
| -------- | |
| >>> from Bio.Align.Applications import ClustalOmegaCommandline | |
| >>> in_file = "unaligned.fasta" | |
| >>> out_file = "aligned.fasta" | |
| >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True) | |
| >>> print(clustalomega_cline) | |
| clustalo -i unaligned.fasta -o aligned.fasta --auto -v | |
| You would typically run the command line with clustalomega_cline() or via | |
| the Python subprocess module, as described in the Biopython tutorial. | |
| """ | |
| def __init__(self, cmd="clustalo", **kwargs): | |
| """Initialize the class.""" | |
| # order parameters in the same order as clustalo --help | |
| self.parameters = [ | |
| # Sequence Input | |
| _Option( | |
| ["-i", "--in", "--infile", "infile"], | |
| "Multiple sequence input file", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--hmm-in", "HMM input", "hmm_input"], | |
| "HMM input files", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Switch(["--dealign", "dealign"], "Dealign input sequences"), | |
| _Option( | |
| ["--profile1", "--p1", "profile1"], | |
| "Pre-aligned multiple sequence file (aligned columns will be kept fix).", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--profile2", "--p2", "profile2"], | |
| "Pre-aligned multiple sequence file (aligned columns will be kept fix).", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-t", "--seqtype", "seqtype"], | |
| "{Protein, RNA, DNA} Force a sequence type (default: auto).", | |
| equate=False, | |
| checker_function=lambda x: x | |
| in ["protein", "rna", "dna", "Protein", "RNA", "DNA", "PROTEIN"], | |
| ), | |
| _Switch( | |
| ["--is-profile", "isprofile"], | |
| "disable check if profile, force profile (default no)", | |
| ), | |
| _Option( | |
| ["--infmt", "infmt"], | |
| """Forced sequence input file format (default: auto) | |
| Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna] | |
| """, | |
| equate=False, | |
| checker_function=lambda x: x | |
| in [ | |
| "a2m", | |
| "fa", | |
| "fasta", | |
| "clu", | |
| "clustal", | |
| "msf", | |
| "phy", | |
| "phylip", | |
| "selex", | |
| "st", | |
| "stockholm", | |
| "vie", | |
| "vienna", | |
| ], | |
| ), | |
| # Clustering | |
| _Option( | |
| ["--distmat-in", "distmat_in"], | |
| "Pairwise distance matrix input file (skips distance computation).", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--distmat-out", "distmat_out"], | |
| "Pairwise distance matrix output file.", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--guidetree-in", "guidetree_in"], | |
| "Guide tree input file (skips distance computation and guide-tree clustering step).", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--guidetree-out", "guidetree_out"], | |
| "Guide tree output file.", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["--full", "distmat_full"], | |
| "Use full distance matrix for guide-tree calculation (slow; mBed is default)", | |
| ), | |
| _Switch( | |
| ["--full-iter", "distmat_full_iter"], | |
| "Use full distance matrix for guide-tree calculation during iteration (mBed is default)", | |
| ), | |
| _Option( | |
| ["--cluster-size", "clustersize"], | |
| "soft maximum of sequences in sub-clusters", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["--clustering-out", "clusteringout"], | |
| "Clustering output file", | |
| filename=True, | |
| ), | |
| _Switch( | |
| ["--use-kimura", "usekimura"], | |
| "use Kimura distance correction for aligned sequences (default no)", | |
| ), | |
| _Switch( | |
| ["--percent-id", "percentid"], | |
| "convert distances into percent identities (default no)", | |
| ), | |
| # Alignment Output | |
| _Option( | |
| ["-o", "--out", "--outfile", "outfile"], | |
| "Multiple sequence alignment output file (default: stdout).", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--outfmt", "outfmt"], | |
| "MSA output file format:" | |
| " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]" | |
| " (default: fasta).", | |
| equate=False, | |
| checker_function=lambda x: x | |
| in [ | |
| "a2m", | |
| "fa", | |
| "fasta", | |
| "clu", | |
| "clustal", | |
| "msf", | |
| "phy", | |
| "phylip", | |
| "selex", | |
| "st", | |
| "stockholm", | |
| "vie", | |
| "vienna", | |
| ], | |
| ), | |
| _Switch( | |
| ["--residuenumber", "--resno", "residuenumber"], | |
| "in Clustal format print residue numbers (default no)", | |
| ), | |
| _Option( | |
| ["--wrap", "wrap"], | |
| "number of residues before line-wrap in output", | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["--output-order", "outputorder"], | |
| "MSA output order like in input/guide-tree", | |
| checker_function=lambda x: x in ["input-order", "tree-order"], | |
| ), | |
| # Iteration | |
| _Option( | |
| ["--iterations", "--iter", "iterations"], | |
| "Number of (combined guide-tree/HMM) iterations", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["--max-guidetree-iterations", "max_guidetree_iterations"], | |
| "Maximum number of guidetree iterations", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["--max-hmm-iterations", "max_hmm_iterations"], | |
| "Maximum number of HMM iterations", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| # Limits (will exit early, if exceeded): | |
| _Option( | |
| ["--maxnumseq", "maxnumseq"], | |
| "Maximum allowed number of sequences", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["--maxseqlen", "maxseqlen"], | |
| "Maximum allowed sequence length", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| # Miscellaneous: | |
| _Switch( | |
| ["--auto", "auto"], | |
| "Set options automatically (might overwrite some of your options)", | |
| ), | |
| _Option( | |
| ["--threads", "threads"], | |
| "Number of processors to use", | |
| equate=False, | |
| checker_function=lambda x: isinstance(x, int), | |
| ), | |
| _Option( | |
| ["-l", "--log", "log"], | |
| "Log all non-essential output to this file.", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Switch(["-h", "--help", "help"], "Print help and exit."), | |
| _Switch(["-v", "--verbose", "verbose"], "Verbose output"), | |
| _Switch(["--version", "version"], "Print version information and exit"), | |
| _Switch( | |
| ["--long-version", "long_version"], | |
| "Print long version information and exit", | |
| ), | |
| _Switch(["--force", "force"], "Force file overwriting."), | |
| ] | |
| AbstractCommandline.__init__(self, cmd, **kwargs) | |
| if __name__ == "__main__": | |
| from Bio._utils import run_doctest | |
| run_doctest() | |