Spaces:
Running
Running
| """T-test with permutations.""" | |
| # Authors: The MNE-Python contributors. | |
| # License: BSD-3-Clause | |
| # Copyright the MNE-Python contributors. | |
| from math import sqrt | |
| import numpy as np | |
| from ..parallel import parallel_func | |
| from ..utils import check_random_state, logger, verbose | |
| def _max_stat(X, X2, perms, dof_scaling): | |
| """Aux function for permutation_t_test (for parallel comp).""" | |
| n_samples = len(X) | |
| mus = np.dot(perms, X) / float(n_samples) | |
| stds = np.sqrt(X2[None, :] - mus * mus) * dof_scaling # std with splitting | |
| max_abs = np.max(np.abs(mus) / (stds / sqrt(n_samples)), axis=1) # t-max | |
| return max_abs | |
| def permutation_t_test( | |
| X, n_permutations=10000, tail=0, n_jobs=None, seed=None, verbose=None | |
| ): | |
| """One sample/paired sample permutation test based on a t-statistic. | |
| This function can perform the test on one variable or | |
| simultaneously on multiple variables. When applying the test to multiple | |
| variables, the "tmax" method is used for adjusting the p-values of each | |
| variable for multiple comparisons. Like Bonferroni correction, this method | |
| adjusts p-values in a way that controls the family-wise error rate. | |
| However, the permutation method will be more | |
| powerful than Bonferroni correction when different variables in the test | |
| are correlated (see :footcite:`NicholsHolmes2002`). | |
| Parameters | |
| ---------- | |
| X : array, shape (n_samples, n_tests) | |
| Samples (observations) by number of tests (variables). | |
| n_permutations : int | 'all' | |
| Number of permutations. If n_permutations is 'all' all possible | |
| permutations are tested. It's the exact test, that | |
| can be untractable when the number of samples is big (e.g. > 20). | |
| If n_permutations >= 2**n_samples then the exact test is performed. | |
| tail : -1 or 0 or 1 (default = 0) | |
| If tail is 1, the alternative hypothesis is that the | |
| mean of the data is greater than 0 (upper tailed test). If tail is 0, | |
| the alternative hypothesis is that the mean of the data is different | |
| than 0 (two tailed test). If tail is -1, the alternative hypothesis | |
| is that the mean of the data is less than 0 (lower tailed test). | |
| %(n_jobs)s | |
| %(seed)s | |
| %(verbose)s | |
| Returns | |
| ------- | |
| T_obs : array of shape [n_tests] | |
| T-statistic observed for all variables. | |
| p_values : array of shape [n_tests] | |
| P-values for all the tests (a.k.a. variables). | |
| H0 : array of shape [n_permutations] | |
| T-statistic obtained by permutations and t-max trick for multiple | |
| comparison. | |
| Notes | |
| ----- | |
| If ``n_permutations >= 2 ** (n_samples - (tail == 0))``, | |
| ``n_permutations`` and ``seed`` will be ignored since an exact test | |
| (full permutation test) will be performed. | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| from .cluster_level import _get_1samp_orders | |
| n_samples, n_tests = X.shape | |
| X2 = np.mean(X**2, axis=0) # precompute moments | |
| mu0 = np.mean(X, axis=0) | |
| dof_scaling = sqrt(n_samples / (n_samples - 1.0)) | |
| std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with var splitting | |
| T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples)) | |
| rng = check_random_state(seed) | |
| orders, _, extra = _get_1samp_orders(n_samples, n_permutations, tail, rng) | |
| perms = 2 * np.array(orders) - 1 # from 0, 1 -> 1, -1 | |
| logger.info(f"Permuting {len(orders)} times{extra}...") | |
| parallel, my_max_stat, n_jobs = parallel_func(_max_stat, n_jobs) | |
| max_abs = np.concatenate( | |
| parallel( | |
| my_max_stat(X, X2, p, dof_scaling) for p in np.array_split(perms, n_jobs) | |
| ) | |
| ) | |
| max_abs = np.concatenate((max_abs, [np.abs(T_obs).max()])) | |
| H0 = np.sort(max_abs) | |
| if tail == 0: | |
| p_values = (H0 >= np.abs(T_obs[:, np.newaxis])).mean(-1) | |
| elif tail == 1: | |
| p_values = (H0 >= T_obs[:, np.newaxis]).mean(-1) | |
| elif tail == -1: | |
| p_values = (-H0 <= T_obs[:, np.newaxis]).mean(-1) | |
| return T_obs, p_values, H0 | |
| def bootstrap_confidence_interval( | |
| arr, ci=0.95, n_bootstraps=2000, stat_fun="mean", random_state=None | |
| ): | |
| """Get confidence intervals from non-parametric bootstrap. | |
| Parameters | |
| ---------- | |
| arr : ndarray, shape (n_samples, ...) | |
| The input data on which to calculate the confidence interval. | |
| ci : float | |
| Level of the confidence interval between 0 and 1. | |
| n_bootstraps : int | |
| Number of bootstraps. | |
| stat_fun : str | callable | |
| Can be "mean", "median", or a callable operating along ``axis=0``. | |
| random_state : int | float | array_like | None | |
| The seed at which to initialize the bootstrap. | |
| Returns | |
| ------- | |
| cis : ndarray, shape (2, ...) | |
| Containing the lower boundary of the CI at ``cis[0, ...]`` and the | |
| upper boundary of the CI at ``cis[1, ...]``. | |
| """ | |
| if stat_fun == "mean": | |
| def stat_fun(x): | |
| return x.mean(axis=0) | |
| elif stat_fun == "median": | |
| def stat_fun(x): | |
| return np.median(x, axis=0) | |
| elif not callable(stat_fun): | |
| raise ValueError("stat_fun must be 'mean', 'median' or callable.") | |
| n_trials = arr.shape[0] | |
| indices = np.arange(n_trials, dtype=int) # BCA would be cool to have too | |
| rng = check_random_state(random_state) | |
| boot_indices = rng.choice(indices, replace=True, size=(n_bootstraps, len(indices))) | |
| stat = np.array([stat_fun(arr[inds]) for inds in boot_indices]) | |
| ci = (((1 - ci) / 2) * 100, (1 - ((1 - ci) / 2)) * 100) | |
| ci_low, ci_up = np.percentile(stat, ci, axis=0) | |
| return np.array([ci_low, ci_up]) | |
| def _ci(arr, ci=0.95, method="bootstrap", n_bootstraps=2000, random_state=None): | |
| """Calculate confidence interval. Aux function for plot_compare_evokeds.""" | |
| if method == "bootstrap": | |
| return bootstrap_confidence_interval( | |
| arr, ci=ci, n_bootstraps=n_bootstraps, random_state=random_state | |
| ) | |
| else: | |
| from .parametric import _parametric_ci | |
| return _parametric_ci(arr, ci=ci) | |