Spaces:
Running
Running
| # Authors: The MNE-Python contributors. | |
| # License: BSD-3-Clause | |
| # Copyright the MNE-Python contributors. | |
| from functools import reduce | |
| from string import ascii_uppercase | |
| import numpy as np | |
| from scipy import stats | |
| from scipy.signal import detrend | |
| from ..utils import _check_option | |
| # The following function is a rewriting of scipy.stats.f_oneway | |
| # Contrary to the scipy.stats.f_oneway implementation it does not | |
| # copy the data while keeping the inputs unchanged. | |
| def ttest_1samp_no_p(X, sigma=0, method="relative"): | |
| """Perform one-sample t-test. | |
| This is a modified version of :func:`scipy.stats.ttest_1samp` that avoids | |
| a (relatively) time-consuming p-value calculation, and can adjust | |
| for implausibly small variance values :footcite:`RidgwayEtAl2012`. | |
| Parameters | |
| ---------- | |
| X : array | |
| Array to return t-values for. | |
| sigma : float | |
| The variance estimate will be given by ``var + sigma * max(var)`` or | |
| ``var + sigma``, depending on "method". By default this is 0 (no | |
| adjustment). See Notes for details. | |
| method : str | |
| If 'relative', the minimum variance estimate will be sigma * max(var), | |
| if 'absolute' the minimum variance estimate will be sigma. | |
| Returns | |
| ------- | |
| t : array | |
| T-values, potentially adjusted using the hat method. | |
| Notes | |
| ----- | |
| To use the "hat" adjustment method :footcite:`RidgwayEtAl2012`, a value | |
| of ``sigma=1e-3`` may be a reasonable choice. | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| _check_option("method", method, ["absolute", "relative"]) | |
| var = np.var(X, axis=0, ddof=1) | |
| if sigma > 0: | |
| limit = sigma * np.max(var) if method == "relative" else sigma | |
| var += limit | |
| return np.mean(X, axis=0) / np.sqrt(var / X.shape[0]) | |
| def ttest_ind_no_p(a, b, equal_var=True, sigma=0.0): | |
| """Independent samples t-test without p calculation. | |
| This is a modified version of :func:`scipy.stats.ttest_ind`. It operates | |
| along the first axis. The ``sigma`` parameter provides an optional "hat" | |
| adjustment (see :func:`ttest_1samp_no_p` and :footcite:`RidgwayEtAl2012`). | |
| Parameters | |
| ---------- | |
| a : array-like | |
| The first array. | |
| b : array-like | |
| The second array. | |
| equal_var : bool | |
| Assume equal variance. See :func:`scipy.stats.ttest_ind`. | |
| sigma : float | |
| The regularization. See :func:`ttest_1samp_no_p`. | |
| Returns | |
| ------- | |
| t : array | |
| T values. | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| v1 = np.var(a, axis=0, ddof=1) | |
| v2 = np.var(b, axis=0, ddof=1) | |
| n1 = a.shape[0] | |
| n2 = b.shape[0] | |
| if equal_var: | |
| df = n1 + n2 - 2.0 | |
| var = ((n1 - 1) * v1 + (n2 - 1) * v2) / df | |
| var = var * (1.0 / n1 + 1.0 / n2) | |
| else: | |
| vn1 = v1 / n1 | |
| vn2 = v2 / n2 | |
| with np.errstate(divide="ignore", invalid="ignore"): | |
| df = (vn1 + vn2) ** 2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) | |
| # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). | |
| # Hence it doesn't matter what df is as long as it's not NaN. | |
| df = np.where(np.isnan(df), 1, df) | |
| var = vn1 + vn2 | |
| if sigma > 0: | |
| var += sigma * np.max(var) | |
| denom = np.sqrt(var) | |
| d = np.mean(a, 0) - np.mean(b, 0) | |
| with np.errstate(divide="ignore", invalid="ignore"): | |
| t = np.divide(d, denom) | |
| return t | |
| def f_oneway(*args): | |
| """Perform a 1-way ANOVA. | |
| The one-way ANOVA tests the null hypothesis that 2 or more groups have | |
| the same population mean. The test is applied to samples from two or | |
| more groups, possibly with differing sizes :footcite:`Lowry2014`. | |
| This is a modified version of :func:`scipy.stats.f_oneway` that avoids | |
| computing the associated p-value. | |
| Parameters | |
| ---------- | |
| *args : array_like | |
| The sample measurements should be given as arguments. | |
| Returns | |
| ------- | |
| F-value : float | |
| The computed F-value of the test. | |
| Notes | |
| ----- | |
| The ANOVA test has important assumptions that must be satisfied in order | |
| for the associated p-value to be valid. | |
| 1. The samples are independent | |
| 2. Each sample is from a normally distributed population | |
| 3. The population standard deviations of the groups are all equal. This | |
| property is known as homoscedasticity. | |
| If these assumptions are not true for a given set of data, it may still be | |
| possible to use the Kruskal-Wallis H-test (:func:`scipy.stats.kruskal`) | |
| although with some loss of power. | |
| The algorithm is from Heiman :footcite:`Heiman2002`, pp.394-7. | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| n_classes = len(args) | |
| n_samples_per_class = np.array([len(a) for a in args]) | |
| n_samples = np.sum(n_samples_per_class) | |
| ss_alldata = reduce(lambda x, y: x + y, [np.sum(a**2, axis=0) for a in args]) | |
| sums_args = [np.sum(a, axis=0) for a in args] | |
| square_of_sums_alldata = reduce(lambda x, y: x + y, sums_args) ** 2 | |
| square_of_sums_args = [s**2 for s in sums_args] | |
| sstot = ss_alldata - square_of_sums_alldata / float(n_samples) | |
| ssbn = 0 | |
| for k, _ in enumerate(args): | |
| ssbn += square_of_sums_args[k] / n_samples_per_class[k] | |
| ssbn -= square_of_sums_alldata / float(n_samples) | |
| sswn = sstot - ssbn | |
| dfbn = n_classes - 1 | |
| dfwn = n_samples - n_classes | |
| msb = ssbn / float(dfbn) | |
| msw = sswn / float(dfwn) | |
| f = msb / msw | |
| return f | |
| def _map_effects(n_factors, effects): | |
| """Map effects to indices.""" | |
| if n_factors > len(ascii_uppercase): | |
| raise ValueError("Maximum number of factors supported is 26") | |
| factor_names = list(ascii_uppercase[:n_factors]) | |
| if isinstance(effects, str): | |
| if "*" in effects and ":" in effects: | |
| raise ValueError('Not "*" and ":" permitted in effects') | |
| elif "+" in effects and ":" in effects: | |
| raise ValueError('Not "+" and ":" permitted in effects') | |
| elif effects == "all": | |
| effects = None | |
| elif len(effects) == 1 or ":" in effects: | |
| effects = [effects] | |
| elif "+" in effects: | |
| # all main effects | |
| effects = effects.split("+") | |
| elif "*" in effects: | |
| pass # handle later | |
| else: | |
| raise ValueError(f'"{effects}" is not a valid option for "effects"') | |
| if isinstance(effects, list): | |
| bad_names = [e for e in effects if e not in factor_names] | |
| if len(bad_names) > 1: | |
| raise ValueError( | |
| f"Effect names: {bad_names} are not valid. They should consist of the " | |
| f"first `n_factors` ({n_factors}) characters from the alphabet" | |
| ) | |
| indices = list(np.arange(2**n_factors - 1)) | |
| names = list() | |
| for this_effect in indices: | |
| contrast_idx = _get_contrast_indices(this_effect + 1, n_factors) | |
| this_code = (n_factors - 1) - np.where(contrast_idx == 1)[0] | |
| this_name = [factor_names[e] for e in this_code] | |
| this_name.sort() | |
| names.append(":".join(this_name)) | |
| if effects is None or isinstance(effects, str): | |
| effects_ = names | |
| else: | |
| effects_ = effects | |
| selection = [names.index(sel) for sel in effects_] | |
| names = [names[sel] for sel in selection] | |
| if isinstance(effects, str): | |
| if "*" in effects: | |
| # hierarchical order of effects | |
| # the * based effect can be used as stop index | |
| sel_ind = names.index(effects.replace("*", ":")) + 1 | |
| names = names[:sel_ind] | |
| selection = selection[:sel_ind] | |
| return selection, names | |
| def _get_contrast_indices(effect_idx, n_factors): # noqa: D401 | |
| """Henson's factor coding, see num2binvec.""" | |
| binrepr = np.binary_repr(effect_idx, n_factors) | |
| return np.array([int(i) for i in binrepr], dtype=int) | |
| def _iter_contrasts(n_subjects, factor_levels, effect_picks): | |
| """Set up contrasts.""" | |
| sc = [] | |
| n_factors = len(factor_levels) | |
| # prepare computation of Kronecker products | |
| for n_levels in factor_levels: | |
| # for each factor append | |
| # 1) column vector of length == number of levels, | |
| # 2) square matrix with diagonal == number of levels | |
| # main + interaction effects for contrasts | |
| sc.append([np.ones([n_levels, 1]), detrend(np.eye(n_levels), type="constant")]) | |
| for this_effect in effect_picks: | |
| contrast_idx = _get_contrast_indices(this_effect + 1, n_factors) | |
| c_ = sc[0][contrast_idx[n_factors - 1]] | |
| for i_contrast in range(1, n_factors): | |
| this_contrast = contrast_idx[(n_factors - 1) - i_contrast] | |
| c_ = np.kron(c_, sc[i_contrast][this_contrast]) | |
| df1 = np.linalg.matrix_rank(c_) | |
| df2 = df1 * (n_subjects - 1) | |
| yield c_, df1, df2 | |
| def f_threshold_mway_rm(n_subjects, factor_levels, effects="A*B", pvalue=0.05): | |
| """Compute F-value thresholds for a two-way ANOVA. | |
| Parameters | |
| ---------- | |
| n_subjects : int | |
| The number of subjects to be analyzed. | |
| factor_levels : list-like | |
| The number of levels per factor. | |
| effects : str | |
| A string denoting the effect to be returned. The following | |
| mapping is currently supported: | |
| * ``'A'``: main effect of A | |
| * ``'B'``: main effect of B | |
| * ``'A:B'``: interaction effect | |
| * ``'A+B'``: both main effects | |
| * ``'A*B'``: all three effects | |
| pvalue : float | |
| The p-value to be thresholded. | |
| Returns | |
| ------- | |
| F_threshold : list | float | |
| List of F-values for each effect if the number of effects | |
| requested > 2, else float. | |
| See Also | |
| -------- | |
| f_oneway | |
| f_mway_rm | |
| Notes | |
| ----- | |
| .. versionadded:: 0.10 | |
| """ | |
| effect_picks, _ = _map_effects(len(factor_levels), effects) | |
| F_threshold = [] | |
| for _, df1, df2 in _iter_contrasts(n_subjects, factor_levels, effect_picks): | |
| F_threshold.append(stats.f(df1, df2).isf(pvalue)) | |
| return F_threshold if len(F_threshold) > 1 else F_threshold[0] | |
| def f_mway_rm(data, factor_levels, effects="all", correction=False, return_pvals=True): | |
| """Compute M-way repeated measures ANOVA for fully balanced designs. | |
| Parameters | |
| ---------- | |
| data : ndarray | |
| 3D array where the first two dimensions are compliant | |
| with a subjects X conditions scheme where the first | |
| factor repeats slowest:: | |
| A1B1 A1B2 A2B1 A2B2 | |
| subject 1 1.34 2.53 0.97 1.74 | |
| subject ... .... .... .... .... | |
| subject k 2.45 7.90 3.09 4.76 | |
| The last dimensions is thought to carry the observations | |
| for mass univariate analysis. | |
| factor_levels : list-like | |
| The number of levels per factor. | |
| effects : str | list | |
| A string denoting the effect to be returned. The following | |
| mapping is currently supported (example with 2 factors): | |
| * ``'A'``: main effect of A | |
| * ``'B'``: main effect of B | |
| * ``'A:B'``: interaction effect | |
| * ``'A+B'``: both main effects | |
| * ``'A*B'``: all three effects | |
| * ``'all'``: all effects (equals 'A*B' in a 2 way design) | |
| If list, effect names are used: ``['A', 'B', 'A:B']``. | |
| correction : bool | |
| The correction method to be employed if one factor has more than two | |
| levels. If True, sphericity correction using the Greenhouse-Geisser | |
| method will be applied. | |
| return_pvals : bool | |
| If True, return p-values corresponding to F-values. | |
| Returns | |
| ------- | |
| F_vals : ndarray | |
| An array of F-statistics with length corresponding to the number | |
| of effects estimated. The shape depends on the number of effects | |
| estimated. | |
| p_vals : ndarray | |
| If not requested via return_pvals, defaults to an empty array. | |
| See Also | |
| -------- | |
| f_oneway | |
| f_threshold_mway_rm | |
| Notes | |
| ----- | |
| .. versionadded:: 0.10 | |
| """ | |
| out_reshape = (-1,) | |
| if data.ndim == 2: # general purpose support, e.g. behavioural data | |
| data = data[:, :, np.newaxis] | |
| elif data.ndim > 3: # let's allow for some magic here | |
| out_reshape = data.shape[2:] | |
| data = data.reshape(data.shape[0], data.shape[1], np.prod(data.shape[2:])) | |
| effect_picks, _ = _map_effects(len(factor_levels), effects) | |
| n_obs = data.shape[2] | |
| n_replications = data.shape[0] | |
| # put last axis in front to 'iterate' over mass univariate instances. | |
| data = np.rollaxis(data, 2) | |
| fvalues, pvalues = [], [] | |
| for c_, df1, df2 in _iter_contrasts(n_replications, factor_levels, effect_picks): | |
| y = np.dot(data, c_) | |
| b = np.mean(y, axis=1)[:, np.newaxis, :] | |
| ss = np.sum(np.sum(y * b, axis=2), axis=1) | |
| mse = (np.sum(np.sum(y * y, axis=2), axis=1) - ss) / (df2 / df1) | |
| fvals = ss / mse | |
| fvalues.append(fvals) | |
| if correction: | |
| # sample covariances, leave off "/ (y.shape[1] - 1)" norm because | |
| # it falls out. | |
| v = np.array([np.dot(y_.T, y_) for y_ in y]) | |
| v = np.array([np.trace(vv) for vv in v]) ** 2 / ( | |
| df1 * np.sum(np.sum(v * v, axis=2), axis=1) | |
| ) | |
| eps = v | |
| df1, df2 = np.zeros(n_obs) + df1, np.zeros(n_obs) + df2 | |
| if correction: | |
| # numerical imprecision can cause eps=0.99999999999999989 | |
| # even with a single category, so never let our degrees of | |
| # freedom drop below 1. | |
| df1, df2 = (np.maximum(d[None, :] * eps, 1.0) for d in (df1, df2)) | |
| if return_pvals: | |
| pvals = stats.f(df1, df2).sf(fvals) | |
| else: | |
| pvals = np.empty(0) | |
| pvalues.append(pvals) | |
| # handle single effect returns | |
| return [ | |
| np.squeeze(np.asarray([v.reshape(out_reshape) for v in vv])) | |
| for vv in (fvalues, pvalues) | |
| ] | |
| def _parametric_ci(arr, ci=0.95): | |
| """Calculate the `ci`% parametric confidence interval for `arr`.""" | |
| mean = arr.mean(0) | |
| if len(arr) < 2: # can't compute standard error | |
| sigma = np.full_like(mean, np.nan) | |
| return mean, sigma | |
| sigma = stats.sem(arr, 0) | |
| return stats.t.interval(ci, loc=mean, scale=sigma, df=arr.shape[0]) | |