Spaces:
No application file
No application file
| # Copyright 2011 by Eric Talevich. All rights reserved. | |
| # | |
| # This file is part of the Biopython distribution and governed by your | |
| # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
| # Please see the LICENSE file that should have been included as part of this | |
| # package. | |
| """Command-line wrapper for the tree inference program PhyML.""" | |
| from Bio.Application import _Option, _Switch, AbstractCommandline | |
| class PhymlCommandline(AbstractCommandline): | |
| """Command-line wrapper for the tree inference program PhyML. | |
| Homepage: http://www.atgc-montpellier.fr/phyml | |
| References | |
| ---------- | |
| Guindon S, Gascuel O. | |
| A simple, fast, and accurate algorithm to estimate large phylogenies by maximum | |
| likelihood. | |
| Systematic Biology, 2003 Oct;52(5):696-704. | |
| PubMed PMID: 14530136. | |
| Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O. | |
| New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing | |
| the Performance of PhyML 3.0. | |
| Systematic Biology, 2010 59(3):307-21. | |
| """ | |
| def __init__(self, cmd="phyml", **kwargs): | |
| """Initialize the class.""" | |
| self.parameters = [ | |
| _Option( | |
| ["-i", "--input", "input"], | |
| "PHYLIP format input nucleotide or amino-acid sequence filenam.", | |
| filename=True, | |
| is_required=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-d", "--datatype", "datatype"], | |
| "Datatype 'nt' for nucleotide (default) or 'aa' for amino-acids.", | |
| checker_function=lambda x: x in ("nt", "aa"), | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["-q", "--sequential", "sequential"], | |
| "Changes interleaved format (default) to sequential format.", | |
| ), | |
| _Option( | |
| ["-n", "--multiple", "multiple"], | |
| "Number of data sets to analyse (integer).", | |
| checker_function=(lambda x: isinstance(x, int) or x.isdigit()), | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["-p", "--pars", "pars"], | |
| """Use a minimum parsimony starting tree. | |
| This option is taken into account when the '-u' option is absent | |
| and when tree topology modifications are to be done. | |
| """, | |
| ), | |
| _Option( | |
| ["-b", "--bootstrap", "bootstrap"], | |
| r"""Number of bootstrap replicates, if value is > 0. | |
| Otherwise: | |
| 0: neither approximate likelihood ratio test nor bootstrap | |
| values are computed. | |
| -1: approximate likelihood ratio test returning aLRT statistics. | |
| -2: approximate likelihood ratio test returning Chi2-based | |
| parametric branch supports. | |
| -4: SH-like branch supports alone. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-m", "--model", "model"], | |
| """Substitution model name. | |
| Nucleotide-based models: | |
| HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom | |
| For the custom option, a string of six digits identifies the | |
| model. For instance, 000000 corresponds to F81 (or JC69, | |
| provided the distribution of nucleotide frequencies is uniform). | |
| 012345 corresponds to GTR. This option can be used for encoding | |
| any model that is a nested within GTR. | |
| Amino-acid based models: | |
| LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV | | |
| CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom | |
| """, | |
| checker_function=( | |
| lambda x: x | |
| in ( | |
| # Nucleotide models: | |
| "HKY85", | |
| "JC69", | |
| "K80", | |
| "F81", | |
| "F84", | |
| "TN93", | |
| "GTR", | |
| # Amino acid models: | |
| "LG", | |
| "WAG", | |
| "JTT", | |
| "MtREV", | |
| "Dayhoff", | |
| "DCMut", | |
| "RtREV", | |
| "CpREV", | |
| "VT", | |
| "Blosum62", | |
| "MtMam", | |
| "MtArt", | |
| "HIVw", | |
| "HIVb", | |
| ) | |
| or isinstance(x, int) | |
| ), | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-f", "frequencies"], | |
| """Character frequencies. | |
| -f e, m, or "fA fC fG fT" | |
| e : Empirical frequencies, determined as follows : | |
| - Nucleotide sequences: (Empirical) the equilibrium base | |
| frequencies are estimated by counting the occurrence | |
| of the different bases in the alignment. | |
| - Amino-acid sequences: (Empirical) the equilibrium | |
| amino-acid frequencies are estimated by counting the | |
| occurrence of the different amino-acids in the alignment. | |
| m : ML/model-based frequencies, determined as follows : | |
| - Nucleotide sequences: (ML) the equilibrium base | |
| frequencies are estimated using maximum likelihood | |
| - Amino-acid sequences: (Model) the equilibrium amino-acid | |
| frequencies are estimated using the frequencies defined by | |
| the substitution model. | |
| "fA fC fG fT" : only valid for nucleotide-based models. | |
| fA, fC, fG and fT are floating-point numbers that correspond | |
| to the frequencies of A, C, G and T, respectively. | |
| """, | |
| filename=True, # ensure ".25 .25 .25 .25" stays quoted | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-t", "--ts/tv", "ts_tv_ratio"], | |
| """Transition/transversion ratio. (DNA sequences only.) | |
| Can be a fixed positive value (ex:4.0) or e to get the | |
| maximum-likelihood estimate. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-v", "--pinv", "prop_invar"], | |
| """Proportion of invariable sites. | |
| Can be a fixed value in the range [0,1], or 'e' to get the | |
| maximum-likelihood estimate. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-c", "--nclasses", "nclasses"], | |
| """Number of relative substitution rate categories. | |
| Default 1. Must be a positive integer. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-a", "--alpha", "alpha"], | |
| """Distribution of the gamma distribution shape parameter. | |
| Can be a fixed positive value, or 'e' to get the | |
| maximum-likelihood estimate. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-s", "--search", "search"], | |
| """Tree topology search operation option. | |
| Can be one of: | |
| NNI : default, fast | |
| SPR : a bit slower than NNI | |
| BEST : best of NNI and SPR search | |
| """, | |
| checker_function=lambda x: x in ("NNI", "SPR", "BEST"), | |
| equate=False, | |
| ), | |
| # alt name: user_tree_file | |
| _Option( | |
| ["-u", "--inputtree", "input_tree"], | |
| "Starting tree filename. The tree must be in Newick format.", | |
| filename=True, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["-o", "optimize"], | |
| r"""Specific parameter optimisation. | |
| tlr : tree topology (t), branch length (l) and | |
| rate parameters (r) are optimised. | |
| tl : tree topology and branch length are optimised. | |
| lr : branch length and rate parameters are optimised. | |
| l : branch length are optimised. | |
| r : rate parameters are optimised. | |
| n : no parameter is optimised. | |
| """, | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["--rand_start", "rand_start"], | |
| """Sets the initial tree to random. | |
| Only valid if SPR searches are to be performed. | |
| """, | |
| ), | |
| _Option( | |
| ["--n_rand_starts", "n_rand_starts"], | |
| """Number of initial random trees to be used. | |
| Only valid if SPR searches are to be performed. | |
| """, | |
| equate=False, | |
| ), | |
| _Option( | |
| ["--r_seed", "r_seed"], | |
| """Seed used to initiate the random number generator. | |
| Must be an integer. | |
| """, | |
| equate=False, | |
| ), | |
| _Switch( | |
| ["--print_site_lnl", "print_site_lnl"], | |
| r"Print the likelihood for each site in file \*_phyml_lk.txt.", | |
| ), | |
| _Switch( | |
| ["--print_trace", "print_trace"], | |
| r""" | |
| Print each phylogeny explored during the tree search process | |
| in file \*_phyml_trace.txt.""", | |
| ), | |
| _Option( | |
| ["--run_id", "run_id"], | |
| """Append the given string at the end of each PhyML output file. | |
| This option may be useful when running simulations involving | |
| PhyML. | |
| """, | |
| checker_function=lambda x: isinstance(x, str), | |
| equate=False, | |
| ), | |
| # XXX should this always be set to True? | |
| _Switch( | |
| ["--quiet", "quiet"], | |
| "No interactive questions (for running in batch mode).", | |
| ), | |
| ] | |
| AbstractCommandline.__init__(self, cmd, **kwargs) | |