Spaces:
Running
Running
| #!/usr/bin/env python | |
| # Authors: The MNE-Python contributors. | |
| # License: BSD-3-Clause | |
| # Copyright the MNE-Python contributors. | |
| import numpy as np | |
| from scipy import ndimage, sparse | |
| from scipy.sparse.csgraph import connected_components | |
| from scipy.stats import f as fstat | |
| from scipy.stats import t as tstat | |
| from ..fixes import has_numba, jit | |
| from ..parallel import parallel_func | |
| from ..source_estimate import MixedSourceEstimate, SourceEstimate, VolSourceEstimate | |
| from ..source_space import SourceSpaces | |
| from ..utils import ( | |
| ProgressBar, | |
| _check_option, | |
| _pl, | |
| _validate_type, | |
| check_random_state, | |
| logger, | |
| split_list, | |
| verbose, | |
| warn, | |
| ) | |
| from .parametric import f_oneway, ttest_1samp_no_p | |
| def _get_buddies_fallback(r, s, neighbors, indices=None): | |
| if indices is None: | |
| buddies = np.where(r)[0] | |
| else: | |
| buddies = indices[r[indices]] | |
| buddies = buddies[np.isin(s[buddies], neighbors, assume_unique=True)] | |
| r[buddies] = False | |
| return buddies.tolist() | |
| def _get_selves_fallback(r, s, ind, inds, t, t_border, max_step): | |
| start = t_border[max(t[ind] - max_step, 0)] | |
| stop = t_border[min(t[ind] + max_step + 1, len(t_border) - 1)] | |
| indices = inds[start:stop] | |
| selves = indices[r[indices]] | |
| selves = selves[s[ind] == s[selves]] | |
| r[selves] = False | |
| return selves.tolist() | |
| def _where_first_fallback(x): | |
| # this is equivalent to np.where(r)[0] for these purposes, but it's | |
| # a little bit faster. Unfortunately there's no way to tell numpy | |
| # just to find the first instance (to save checking every one): | |
| next_ind = int(np.argmax(x)) | |
| if next_ind == 0: | |
| next_ind = -1 | |
| return next_ind | |
| if has_numba: # pragma: no cover | |
| def _get_buddies(r, s, neighbors, indices=None): | |
| buddies = list() | |
| # At some point we might be able to use the sorted-ness of s or | |
| # neighbors to further speed this up | |
| if indices is None: | |
| n_check = len(r) | |
| else: | |
| n_check = len(indices) | |
| for ii in range(n_check): | |
| if indices is None: | |
| this_idx = ii | |
| else: | |
| this_idx = indices[ii] | |
| if r[this_idx]: | |
| this_s = s[this_idx] | |
| for ni in range(len(neighbors)): | |
| if this_s == neighbors[ni]: | |
| buddies.append(this_idx) | |
| r[this_idx] = False | |
| break | |
| return buddies | |
| def _get_selves(r, s, ind, inds, t, t_border, max_step): | |
| selves = list() | |
| start = t_border[max(t[ind] - max_step, 0)] | |
| stop = t_border[min(t[ind] + max_step + 1, len(t_border) - 1)] | |
| for ii in range(start, stop): | |
| this_idx = inds[ii] | |
| if r[this_idx] and s[ind] == s[this_idx]: | |
| selves.append(this_idx) | |
| r[this_idx] = False | |
| return selves | |
| def _where_first(x): | |
| for ii in range(len(x)): | |
| if x[ii]: | |
| return ii | |
| return -1 | |
| else: # pragma: no cover | |
| # fastest ways we've found with NumPy | |
| _get_buddies = _get_buddies_fallback | |
| _get_selves = _get_selves_fallback | |
| _where_first = _where_first_fallback | |
| def _masked_sum(x, c): | |
| return np.sum(x[c]) | |
| def _masked_sum_power(x, c, t_power): | |
| return np.sum(np.sign(x[c]) * np.abs(x[c]) ** t_power) | |
| def _sum_cluster_data(data, tstep): | |
| return np.sign(data) * np.logical_not(data == 0) * tstep | |
| def _get_clusters_spatial(s, neighbors): | |
| """Form spatial clusters using neighbor lists. | |
| This is equivalent to _get_components with n_times = 1, with a properly | |
| reconfigured adjacency matrix (formed as "neighbors" list) | |
| """ | |
| # s is a vector of spatial indices that are significant, like: | |
| # s = np.where(x_in)[0] | |
| # for x_in representing a single time-instant | |
| r = np.ones(s.shape, bool) | |
| clusters = list() | |
| next_ind = 0 if s.size > 0 else -1 | |
| while next_ind >= 0: | |
| # put first point in a cluster, adjust remaining | |
| t_inds = [next_ind] | |
| r[next_ind] = 0 | |
| icount = 1 # count of nodes in the current cluster | |
| while icount <= len(t_inds): | |
| ind = t_inds[icount - 1] | |
| # look across other vertices | |
| buddies = _get_buddies(r, s, neighbors[s[ind]]) | |
| t_inds.extend(buddies) | |
| icount += 1 | |
| next_ind = _where_first(r) | |
| clusters.append(s[t_inds]) | |
| return clusters | |
| def _reassign(check, clusters, base, num): | |
| """Reassign cluster numbers.""" | |
| # reconfigure check matrix | |
| check[check == num] = base | |
| # concatenate new values into clusters array | |
| clusters[base - 1] = np.concatenate((clusters[base - 1], clusters[num - 1])) | |
| clusters[num - 1] = np.array([], dtype=int) | |
| def _get_clusters_st_1step(keepers, neighbors): | |
| """Directly calculate clusters. | |
| This uses knowledge that time points are | |
| only adjacent to immediate neighbors for data organized as time x space. | |
| This algorithm time increases linearly with the number of time points, | |
| compared to with the square for the standard (graph) algorithm. | |
| This algorithm creates clusters for each time point using a method more | |
| efficient than the standard graph method (but otherwise equivalent), then | |
| combines these clusters across time points in a reasonable way. | |
| """ | |
| n_src = len(neighbors) | |
| n_times = len(keepers) | |
| # start cluster numbering at 1 for diffing convenience | |
| enum_offset = 1 | |
| check = np.zeros((n_times, n_src), dtype=int) | |
| clusters = list() | |
| for ii, k in enumerate(keepers): | |
| c = _get_clusters_spatial(k, neighbors) | |
| for ci, cl in enumerate(c): | |
| check[ii, cl] = ci + enum_offset | |
| enum_offset += len(c) | |
| # give them the correct offsets | |
| c = [cl + ii * n_src for cl in c] | |
| clusters += c | |
| # now that each cluster has been assigned a unique number, combine them | |
| # by going through each time point | |
| for check1, check2, k in zip(check[:-1], check[1:], keepers[:-1]): | |
| # go through each one that needs reassignment | |
| inds = k[check2[k] - check1[k] > 0] | |
| check1_d = check1[inds] | |
| n = check2[inds] | |
| nexts = np.unique(n) | |
| for num in nexts: | |
| prevs = check1_d[n == num] | |
| base = np.min(prevs) | |
| for pr in np.unique(prevs[prevs != base]): | |
| _reassign(check1, clusters, base, pr) | |
| # reassign values | |
| _reassign(check2, clusters, base, num) | |
| # clean up clusters | |
| clusters = [cl for cl in clusters if len(cl) > 0] | |
| return clusters | |
| def _get_clusters_st_multistep(keepers, neighbors, max_step=1): | |
| """Directly calculate clusters. | |
| This uses knowledge that time points are | |
| only adjacent to immediate neighbors for data organized as time x space. | |
| This algorithm time increases linearly with the number of time points, | |
| compared to with the square for the standard (graph) algorithm. | |
| """ | |
| n_src = len(neighbors) | |
| n_times = len(keepers) | |
| t_border = list() | |
| t_border.append(0) | |
| for ki, k in enumerate(keepers): | |
| keepers[ki] = k + ki * n_src | |
| t_border.append(t_border[ki] + len(k)) | |
| t_border = np.array(t_border) | |
| keepers = np.concatenate(keepers) | |
| v = keepers | |
| t, s = divmod(v, n_src) | |
| r = np.ones(t.shape, dtype=bool) | |
| clusters = list() | |
| inds = np.arange(t_border[0], t_border[n_times]) | |
| next_ind = 0 if s.size > 0 else -1 | |
| while next_ind >= 0: | |
| # put first point in a cluster, adjust remaining | |
| t_inds = [next_ind] | |
| r[next_ind] = False | |
| icount = 1 # count of nodes in the current cluster | |
| # look for significant values at the next time point, | |
| # same sensor, not placed yet, and add those | |
| while icount <= len(t_inds): | |
| ind = t_inds[icount - 1] | |
| selves = _get_selves(r, s, ind, inds, t, t_border, max_step) | |
| # look at current time point across other vertices | |
| these_inds = inds[t_border[t[ind]] : t_border[t[ind] + 1]] | |
| buddies = _get_buddies(r, s, neighbors[s[ind]], these_inds) | |
| t_inds += buddies + selves | |
| icount += 1 | |
| next_ind = _where_first(r) | |
| clusters.append(v[t_inds]) | |
| return clusters | |
| def _get_clusters_st(x_in, neighbors, max_step=1): | |
| """Choose the most efficient version.""" | |
| n_src = len(neighbors) | |
| n_times = x_in.size // n_src | |
| cl_goods = np.where(x_in)[0] | |
| if len(cl_goods) > 0: | |
| keepers = [np.array([], dtype=int)] * n_times | |
| row, col = np.unravel_index(cl_goods, (n_times, n_src)) | |
| lims = [0] | |
| if isinstance(row, int): | |
| row = [row] | |
| col = [col] | |
| else: | |
| order = np.argsort(row) | |
| row = row[order] | |
| col = col[order] | |
| lims += (np.where(np.diff(row) > 0)[0] + 1).tolist() | |
| lims.append(len(row)) | |
| for start, end in zip(lims[:-1], lims[1:]): | |
| keepers[row[start]] = np.sort(col[start:end]) | |
| if max_step == 1: | |
| return _get_clusters_st_1step(keepers, neighbors) | |
| else: | |
| return _get_clusters_st_multistep(keepers, neighbors, max_step) | |
| else: | |
| return [] | |
| def _get_components(x_in, adjacency, return_list=True): | |
| """Get connected components from a mask and a adjacency matrix.""" | |
| if adjacency is False: | |
| components = np.arange(len(x_in)) | |
| else: | |
| mask = np.logical_and(x_in[adjacency.row], x_in[adjacency.col]) | |
| data = adjacency.data[mask] | |
| row = adjacency.row[mask] | |
| col = adjacency.col[mask] | |
| shape = adjacency.shape | |
| idx = np.where(x_in)[0] | |
| row = np.concatenate((row, idx)) | |
| col = np.concatenate((col, idx)) | |
| data = np.concatenate((data, np.ones(len(idx), dtype=data.dtype))) | |
| adjacency = sparse.coo_array((data, (row, col)), shape=shape) | |
| _, components = connected_components(adjacency) | |
| if return_list: | |
| start = np.min(components) | |
| stop = np.max(components) | |
| comp_list = [list() for i in range(start, stop + 1, 1)] | |
| mask = np.zeros(len(comp_list), dtype=bool) | |
| for ii, comp in enumerate(components): | |
| comp_list[comp].append(ii) | |
| mask[comp] += x_in[ii] | |
| clusters = [np.array(k) for k, m in zip(comp_list, mask) if m] | |
| return clusters | |
| else: | |
| return components | |
| def _find_clusters( | |
| x, | |
| threshold, | |
| tail=0, | |
| adjacency=None, | |
| max_step=1, | |
| include=None, | |
| partitions=None, | |
| t_power=1, | |
| show_info=False, | |
| ): | |
| """Find all clusters which are above/below a certain threshold. | |
| When doing a two-tailed test (tail == 0), only points with the same | |
| sign will be clustered together. | |
| Parameters | |
| ---------- | |
| x : 1D array | |
| Data | |
| threshold : float | dict | |
| Where to threshold the statistic. Should be negative for tail == -1, | |
| and positive for tail == 0 or 1. Can also be an dict for | |
| threshold-free cluster enhancement. | |
| tail : -1 | 0 | 1 | |
| Type of comparison | |
| adjacency : scipy.sparse.coo_array, None, or list | |
| Defines adjacency between features. The matrix is assumed to | |
| be symmetric and only the upper triangular half is used. | |
| If adjacency is a list, it is assumed that each entry stores the | |
| indices of the spatial neighbors in a spatio-temporal dataset x. | |
| Default is None, i.e, a regular lattice adjacency. | |
| False means no adjacency. | |
| max_step : int | |
| If adjacency is a list, this defines the maximal number of steps | |
| between vertices along the second dimension (typically time) to be | |
| considered adjacent. | |
| include : 1D bool array or None | |
| Mask to apply to the data of points to cluster. If None, all points | |
| are used. | |
| partitions : array of int or None | |
| An array (same size as X) of integers indicating which points belong | |
| to each partition. | |
| t_power : float | |
| Power to raise the statistical values (usually t-values) by before | |
| summing (sign will be retained). Note that t_power == 0 will give a | |
| count of nodes in each cluster, t_power == 1 will weight each node by | |
| its statistical score. | |
| show_info : bool | |
| If True, display information about thresholds used (for TFCE). Should | |
| only be done for the standard permutation. | |
| Returns | |
| ------- | |
| clusters : list of slices or list of arrays (boolean masks) | |
| We use slices for 1D signals and mask to multidimensional | |
| arrays. None is returned if threshold is a dict (TFCE) | |
| sums : array | |
| Sum of x values in clusters. | |
| """ | |
| _check_option("tail", tail, [-1, 0, 1]) | |
| x = np.asanyarray(x) | |
| if not np.isscalar(threshold): | |
| if not isinstance(threshold, dict): | |
| raise TypeError( | |
| "threshold must be a number, or a dict for " | |
| "threshold-free cluster enhancement" | |
| ) | |
| if not all(key in threshold for key in ["start", "step"]): | |
| raise KeyError('threshold, if dict, must have at least "start" and "step"') | |
| tfce = True | |
| use_x = x[np.isfinite(x)] | |
| if use_x.size == 0: | |
| raise RuntimeError( | |
| "No finite values found in the observed statistic values" | |
| ) | |
| if tail == -1: | |
| if threshold["start"] > 0: | |
| raise ValueError('threshold["start"] must be <= 0 for tail == -1') | |
| if threshold["step"] >= 0: | |
| raise ValueError('threshold["step"] must be < 0 for tail == -1') | |
| stop = np.min(use_x) | |
| elif tail == 1: | |
| stop = np.max(use_x) | |
| else: # tail == 0 | |
| stop = max(np.max(use_x), -np.min(use_x)) | |
| del use_x | |
| thresholds = np.arange(threshold["start"], stop, threshold["step"], float) | |
| h_power = threshold.get("h_power", 2) | |
| e_power = threshold.get("e_power", 0.5) | |
| if show_info is True: | |
| if len(thresholds) == 0: | |
| warn( | |
| f'threshold["start"] ({threshold["start"]}) is more extreme ' | |
| f"than data statistics with most extreme value {stop}" | |
| ) | |
| else: | |
| logger.info( | |
| "Using %d thresholds from %0.2f to %0.2f for TFCE " | |
| "computation (h_power=%0.2f, e_power=%0.2f)", | |
| len(thresholds), | |
| thresholds[0], | |
| thresholds[-1], | |
| h_power, | |
| e_power, | |
| ) | |
| scores = np.zeros(x.size) | |
| else: | |
| thresholds = [threshold] | |
| tfce = False | |
| # include all points by default | |
| if include is None: | |
| include = np.ones(x.shape, dtype=bool) | |
| if tail in [0, 1] and not np.all(np.diff(thresholds) > 0): | |
| raise ValueError("Thresholds must be monotonically increasing") | |
| if tail == -1 and not np.all(np.diff(thresholds) < 0): | |
| raise ValueError("Thresholds must be monotonically decreasing") | |
| # set these here just in case thresholds == [] | |
| clusters = list() | |
| sums = list() | |
| for ti, thresh in enumerate(thresholds): | |
| # these need to be reset on each run | |
| clusters = list() | |
| if tail == 0: | |
| x_ins = [ | |
| np.logical_and(x > thresh, include), | |
| np.logical_and(x < -thresh, include), | |
| ] | |
| elif tail == -1: | |
| x_ins = [np.logical_and(x < thresh, include)] | |
| else: # tail == 1 | |
| x_ins = [np.logical_and(x > thresh, include)] | |
| # loop over tails | |
| for x_in in x_ins: | |
| if np.any(x_in): | |
| out = _find_clusters_1dir_parts( | |
| x, x_in, adjacency, max_step, partitions, t_power, ndimage | |
| ) | |
| clusters += out[0] | |
| sums.append(out[1]) | |
| if tfce: | |
| # the score of each point is the sum of the h^H * e^E for each | |
| # supporting section "rectangle" h x e. | |
| if ti == 0: | |
| h = abs(thresh) | |
| else: | |
| h = abs(thresh - thresholds[ti - 1]) | |
| h = h**h_power | |
| for c in clusters: | |
| # triage based on cluster storage type | |
| if isinstance(c, slice): | |
| len_c = c.stop - c.start | |
| elif isinstance(c, tuple): | |
| len_c = len(c) | |
| elif c.dtype == np.dtype(bool): | |
| len_c = np.sum(c) | |
| else: | |
| len_c = len(c) | |
| scores[c] += h * (len_c**e_power) | |
| # turn sums into array | |
| sums = np.concatenate(sums) if sums else np.array([]) | |
| if tfce: | |
| sums = scores | |
| clusters = None # clusters construction is made in _permutation_cluster_test | |
| return clusters, sums | |
| def _find_clusters_1dir_parts( | |
| x, x_in, adjacency, max_step, partitions, t_power, ndimage | |
| ): | |
| """Deal with partitions, and pass the work to _find_clusters_1dir.""" | |
| if partitions is None: | |
| clusters, sums = _find_clusters_1dir( | |
| x, x_in, adjacency, max_step, t_power, ndimage | |
| ) | |
| else: | |
| # cluster each partition separately | |
| clusters = list() | |
| sums = list() | |
| for p in range(np.max(partitions) + 1): | |
| x_i = np.logical_and(x_in, partitions == p) | |
| out = _find_clusters_1dir(x, x_i, adjacency, max_step, t_power, ndimage) | |
| clusters += out[0] | |
| sums.append(out[1]) | |
| sums = np.concatenate(sums) | |
| return clusters, sums | |
| def _find_clusters_1dir(x, x_in, adjacency, max_step, t_power, ndimage): | |
| """Actually call the clustering algorithm.""" | |
| if adjacency is None: | |
| labels, n_labels = ndimage.label(x_in) | |
| if x.ndim == 1: | |
| # slices | |
| clusters = ndimage.find_objects(labels, n_labels) | |
| # equivalent to if len(clusters) == 0 but faster | |
| if not clusters: | |
| sums = list() | |
| else: | |
| index = list(range(1, n_labels + 1)) | |
| if t_power == 1: | |
| sums = ndimage.sum(x, labels, index=index) | |
| else: | |
| sums = ndimage.sum( | |
| np.sign(x) * np.abs(x) ** t_power, labels, index=index | |
| ) | |
| else: | |
| # boolean masks (raveled) | |
| clusters = list() | |
| sums = np.empty(n_labels) | |
| for label in range(n_labels): | |
| c = labels == label + 1 | |
| clusters.append(c.ravel()) | |
| if t_power == 1: | |
| sums[label] = np.sum(x[c]) | |
| else: | |
| sums[label] = np.sum(np.sign(x[c]) * np.abs(x[c]) ** t_power) | |
| else: | |
| if x.ndim > 1: | |
| raise Exception( | |
| "Data should be 1D when using a adjacency to define clusters." | |
| ) | |
| if isinstance(adjacency, sparse.spmatrix): | |
| adjacency = sparse.coo_array(adjacency) | |
| if sparse.issparse(adjacency) or adjacency is False: | |
| clusters = _get_components(x_in, adjacency) | |
| elif isinstance(adjacency, list): # use temporal adjacency | |
| clusters = _get_clusters_st(x_in, adjacency, max_step) | |
| else: | |
| raise TypeError( | |
| f"adjacency must be a sparse array or list, got {type(adjacency)}" | |
| ) | |
| if t_power == 1: | |
| sums = [_masked_sum(x, c) for c in clusters] | |
| else: | |
| sums = [_masked_sum_power(x, c, t_power) for c in clusters] | |
| return clusters, np.atleast_1d(sums) | |
| def _cluster_indices_to_mask(components, n_tot, slice_out): | |
| """Convert to the old format of clusters, which were bool arrays (or slices in 1D).""" # noqa: E501 | |
| for ci, c in enumerate(components): | |
| if not slice_out: | |
| # boolean array | |
| components[ci] = np.zeros((n_tot), dtype=bool) | |
| components[ci][c] = True | |
| else: | |
| # slice (similar as ndimage.find_object output) | |
| components[ci] = (slice(c.min(), c.max() + 1),) | |
| return components | |
| def _cluster_mask_to_indices(components, shape): | |
| """Convert to the old format of clusters, which were bool arrays.""" | |
| for ci, c in enumerate(components): | |
| if isinstance(c, np.ndarray): # mask | |
| components[ci] = np.where(c.reshape(shape)) | |
| elif isinstance(c, slice): | |
| components[ci] = np.arange(c.start, c.stop) | |
| else: | |
| assert isinstance(c, tuple), type(c) | |
| c = list(c) # tuple->list | |
| for ii, cc in enumerate(c): | |
| if isinstance(cc, slice): | |
| c[ii] = np.arange(cc.start, cc.stop) | |
| else: | |
| c[ii] = np.where(cc)[0] | |
| components[ci] = tuple(c) | |
| return components | |
| def _pval_from_histogram(T, H0, tail): | |
| """Get p-values from stats values given an H0 distribution. | |
| For each stat compute a p-value as percentile of its statistics | |
| within all statistics in surrogate data | |
| """ | |
| # from pct to fraction | |
| if tail == -1: # up tail | |
| pval = np.array([np.mean(H0 <= t) for t in T]) | |
| elif tail == 1: # low tail | |
| pval = np.array([np.mean(H0 >= t) for t in T]) | |
| else: # both tails | |
| pval = np.array([np.mean(abs(H0) >= abs(t)) for t in T]) | |
| return pval | |
| def _setup_adjacency(adjacency, n_tests, n_times): | |
| if not sparse.issparse(adjacency): | |
| raise ValueError( | |
| "If adjacency matrix is given, it must be a SciPy sparse matrix." | |
| ) | |
| if adjacency.shape[0] == n_tests: # use global algorithm | |
| adjacency = adjacency.tocoo() | |
| else: # use temporal adjacency algorithm | |
| got_times, mod = divmod(n_tests, adjacency.shape[0]) | |
| if got_times != n_times or mod != 0: | |
| raise ValueError( | |
| f"adjacency (len {adjacency.shape[0]}) must be of the correct size, " | |
| "i.e. be equal to or evenly divide the number of tests ({n_tests}).\n\n" | |
| "If adjacency was computed for a source space, try using " | |
| 'the fwd["src"] or inv["src"] as some original source space ' | |
| "vertices can be excluded during forward computation" | |
| ) | |
| # we claim to only use upper triangular part... not true here | |
| adjacency = (adjacency + adjacency.transpose()).tocsr() | |
| adjacency = [ | |
| adjacency.indices[adjacency.indptr[i] : adjacency.indptr[i + 1]] | |
| for i in range(len(adjacency.indptr) - 1) | |
| ] | |
| return adjacency | |
| def _do_permutations( | |
| X_full, | |
| slices, | |
| threshold, | |
| tail, | |
| adjacency, | |
| stat_fun, | |
| max_step, | |
| include, | |
| partitions, | |
| t_power, | |
| orders, | |
| sample_shape, | |
| buffer_size, | |
| progress_bar, | |
| ): | |
| n_samp, n_vars = X_full.shape | |
| if buffer_size is not None and n_vars <= buffer_size: | |
| buffer_size = None # don't use buffer for few variables | |
| # allocate space for output | |
| max_cluster_sums = np.empty(len(orders), dtype=np.double) | |
| if buffer_size is not None: | |
| # allocate buffer, so we don't need to allocate memory during loop | |
| X_buffer = [ | |
| np.empty((len(X_full[s]), buffer_size), dtype=X_full.dtype) for s in slices | |
| ] | |
| for seed_idx, order in enumerate(orders): | |
| # shuffle sample indices | |
| assert order is not None | |
| idx_shuffle_list = [order[s] for s in slices] | |
| if buffer_size is None: | |
| # shuffle all data at once | |
| X_shuffle_list = [X_full[idx, :] for idx in idx_shuffle_list] | |
| t_obs_surr = stat_fun(*X_shuffle_list) | |
| else: | |
| # only shuffle a small data buffer, so we need less memory | |
| t_obs_surr = np.empty(n_vars, dtype=X_full.dtype) | |
| for pos in range(0, n_vars, buffer_size): | |
| # number of variables for this loop | |
| n_var_loop = min(pos + buffer_size, n_vars) - pos | |
| # fill buffer | |
| for i, idx in enumerate(idx_shuffle_list): | |
| X_buffer[i][:, :n_var_loop] = X_full[idx, pos : pos + n_var_loop] | |
| # apply stat_fun and store result | |
| tmp = stat_fun(*X_buffer) | |
| t_obs_surr[pos : pos + n_var_loop] = tmp[:n_var_loop] | |
| # The stat should have the same shape as the samples for no adj. | |
| if adjacency is None: | |
| t_obs_surr.shape = sample_shape | |
| # Find cluster on randomized stats | |
| out = _find_clusters( | |
| t_obs_surr, | |
| threshold=threshold, | |
| tail=tail, | |
| max_step=max_step, | |
| adjacency=adjacency, | |
| partitions=partitions, | |
| include=include, | |
| t_power=t_power, | |
| ) | |
| perm_clusters_sums = out[1] | |
| if len(perm_clusters_sums) > 0: | |
| max_cluster_sums[seed_idx] = np.max(perm_clusters_sums) | |
| else: | |
| max_cluster_sums[seed_idx] = 0 | |
| progress_bar.update(seed_idx + 1) | |
| return max_cluster_sums | |
| def _do_1samp_permutations( | |
| X, | |
| slices, | |
| threshold, | |
| tail, | |
| adjacency, | |
| stat_fun, | |
| max_step, | |
| include, | |
| partitions, | |
| t_power, | |
| orders, | |
| sample_shape, | |
| buffer_size, | |
| progress_bar, | |
| ): | |
| n_samp, n_vars = X.shape | |
| assert slices is None # should be None for the 1 sample case | |
| if buffer_size is not None and n_vars <= buffer_size: | |
| buffer_size = None # don't use buffer for few variables | |
| # allocate space for output | |
| max_cluster_sums = np.empty(len(orders), dtype=np.double) | |
| if buffer_size is not None: | |
| # allocate a buffer so we don't need to allocate memory in loop | |
| X_flip_buffer = np.empty((n_samp, buffer_size), dtype=X.dtype) | |
| for seed_idx, order in enumerate(orders): | |
| assert isinstance(order, np.ndarray) | |
| # new surrogate data with specified sign flip | |
| assert order.size == n_samp # should be guaranteed by parent | |
| signs = 2 * order[:, None].astype(int) - 1 | |
| if not np.all(np.equal(np.abs(signs), 1)): | |
| raise ValueError("signs from rng must be +/- 1") | |
| if buffer_size is None: | |
| # be careful about non-writable memmap (GH#1507) | |
| if X.flags.writeable: | |
| X *= signs | |
| # Recompute statistic on randomized data | |
| t_obs_surr = stat_fun(X) | |
| # Set X back to previous state (trade memory eff. for CPU use) | |
| X *= signs | |
| else: | |
| t_obs_surr = stat_fun(X * signs) | |
| else: | |
| # only sign-flip a small data buffer, so we need less memory | |
| t_obs_surr = np.empty(n_vars, dtype=X.dtype) | |
| for pos in range(0, n_vars, buffer_size): | |
| # number of variables for this loop | |
| n_var_loop = min(pos + buffer_size, n_vars) - pos | |
| X_flip_buffer[:, :n_var_loop] = signs * X[:, pos : pos + n_var_loop] | |
| # apply stat_fun and store result | |
| tmp = stat_fun(X_flip_buffer) | |
| t_obs_surr[pos : pos + n_var_loop] = tmp[:n_var_loop] | |
| # The stat should have the same shape as the samples for no adj. | |
| if adjacency is None: | |
| t_obs_surr.shape = sample_shape | |
| # Find cluster on randomized stats | |
| out = _find_clusters( | |
| t_obs_surr, | |
| threshold=threshold, | |
| tail=tail, | |
| max_step=max_step, | |
| adjacency=adjacency, | |
| partitions=partitions, | |
| include=include, | |
| t_power=t_power, | |
| ) | |
| perm_clusters_sums = out[1] | |
| if len(perm_clusters_sums) > 0: | |
| # get max with sign info | |
| idx_max = np.argmax(np.abs(perm_clusters_sums)) | |
| max_cluster_sums[seed_idx] = perm_clusters_sums[idx_max] | |
| else: | |
| max_cluster_sums[seed_idx] = 0 | |
| progress_bar.update(seed_idx + 1) | |
| return max_cluster_sums | |
| def bin_perm_rep(ndim, a=0, b=1): | |
| """Ndim permutations with repetitions of (a,b). | |
| Returns an array with all the possible permutations with repetitions of | |
| (0,1) in ndim dimensions. The array is shaped as (2**ndim,ndim), and is | |
| ordered with the last index changing fastest. For examble, for ndim=3: | |
| Examples | |
| -------- | |
| >>> bin_perm_rep(3) | |
| array([[0, 0, 0], | |
| [0, 0, 1], | |
| [0, 1, 0], | |
| [0, 1, 1], | |
| [1, 0, 0], | |
| [1, 0, 1], | |
| [1, 1, 0], | |
| [1, 1, 1]]) | |
| """ | |
| # Create the leftmost column as 0,0,...,1,1,... | |
| nperms = 2**ndim | |
| perms = np.empty((nperms, ndim), type(a)) | |
| perms.fill(a) | |
| half_point = nperms // 2 | |
| perms[half_point:, 0] = b | |
| # Fill the rest of the table by sampling the previous column every 2 items | |
| for j in range(1, ndim): | |
| half_col = perms[::2, j - 1] | |
| perms[:half_point, j] = half_col | |
| perms[half_point:, j] = half_col | |
| # This is equivalent to something like: | |
| # orders = [np.fromiter(np.binary_repr(s + 1, ndim), dtype=int) | |
| # for s in np.arange(2 ** ndim)] | |
| return perms | |
| def _get_1samp_orders(n_samples, n_permutations, tail, rng): | |
| """Get the 1samp orders.""" | |
| max_perms = 2 ** (n_samples - (tail == 0)) - 1 | |
| extra = "" | |
| if isinstance(n_permutations, str): | |
| if n_permutations != "all": | |
| raise ValueError('n_permutations as a string must be "all"') | |
| n_permutations = max_perms | |
| n_permutations = int(n_permutations) | |
| if max_perms < n_permutations: | |
| # omit first perm b/c accounted for in H0.append() later; | |
| # convert to binary array representation | |
| extra = " (exact test)" | |
| orders = bin_perm_rep(n_samples)[1 : max_perms + 1] | |
| elif n_samples <= 20: # fast way to do it for small(ish) n_samples | |
| orders = rng.choice(max_perms, n_permutations - 1, replace=False) | |
| orders = [ | |
| np.fromiter(np.binary_repr(s + 1, n_samples), dtype=int) for s in orders | |
| ] | |
| else: # n_samples >= 64 | |
| # Here we can just use the hash-table (w/collision detection) | |
| # functionality of a dict to ensure uniqueness | |
| orders = np.zeros((n_permutations - 1, n_samples), int) | |
| hashes = {} | |
| ii = 0 | |
| # in the symmetric case, we should never flip one of the subjects | |
| # to prevent positive/negative equivalent collisions | |
| use_samples = n_samples - (tail == 0) | |
| while ii < n_permutations - 1: | |
| signs = tuple((rng.uniform(size=use_samples) < 0.5).astype(int)) | |
| if signs not in hashes: | |
| orders[ii, :use_samples] = signs | |
| if tail == 0 and rng.uniform() < 0.5: | |
| # To undo the non-flipping of the last subject in the | |
| # tail == 0 case, half the time we use the positive | |
| # last subject, half the time negative last subject | |
| orders[ii] = 1 - orders[ii] | |
| hashes[signs] = None | |
| ii += 1 | |
| return orders, n_permutations, extra | |
| def _permutation_cluster_test( | |
| X, | |
| threshold, | |
| n_permutations, | |
| tail, | |
| stat_fun, | |
| adjacency, | |
| n_jobs, | |
| seed, | |
| max_step, | |
| exclude, | |
| step_down_p, | |
| t_power, | |
| out_type, | |
| check_disjoint, | |
| buffer_size, | |
| ): | |
| """Aux Function. | |
| Note. X is required to be a list. Depending on the length of X | |
| either a 1 sample t-test or an F test / more sample permutation scheme | |
| is elicited. | |
| """ | |
| _check_option("out_type", out_type, ["mask", "indices"]) | |
| _check_option("tail", tail, [-1, 0, 1]) | |
| if not isinstance(threshold, dict): | |
| threshold = float(threshold) | |
| if ( | |
| tail < 0 | |
| and threshold > 0 | |
| or tail > 0 | |
| and threshold < 0 | |
| or tail == 0 | |
| and threshold < 0 | |
| ): | |
| raise ValueError( | |
| f"incompatible tail and threshold signs, got {tail} and {threshold}" | |
| ) | |
| # check dimensions for each group in X (a list at this stage). | |
| X = [x[:, np.newaxis] if x.ndim == 1 else x for x in X] | |
| n_samples = X[0].shape[0] | |
| n_times = X[0].shape[1] | |
| sample_shape = X[0].shape[1:] | |
| for x in X: | |
| if x.shape[1:] != sample_shape: | |
| raise ValueError("All samples mush have the same size") | |
| # flatten the last dimensions in case the data is high dimensional | |
| X = [np.reshape(x, (x.shape[0], -1)) for x in X] | |
| n_tests = X[0].shape[1] | |
| if adjacency is not None and adjacency is not False: | |
| adjacency = _setup_adjacency(adjacency, n_tests, n_times) | |
| if (exclude is not None) and not exclude.size == n_tests: | |
| raise ValueError("exclude must be the same shape as X[0]") | |
| # Step 1: Calculate t-stat for original data | |
| # ------------------------------------------------------------- | |
| t_obs = stat_fun(*X) | |
| _validate_type(t_obs, np.ndarray, "return value of stat_fun") | |
| logger.info(f"stat_fun(H1): min={np.min(t_obs)} max={np.max(t_obs)}") | |
| # test if stat_fun treats variables independently | |
| if buffer_size is not None: | |
| t_obs_buffer = np.zeros_like(t_obs) | |
| for pos in range(0, n_tests, buffer_size): | |
| t_obs_buffer[pos : pos + buffer_size] = stat_fun( | |
| *[x[:, pos : pos + buffer_size] for x in X] | |
| ) | |
| if not np.all(t_obs == t_obs_buffer): | |
| warn( | |
| "Provided stat_fun does not treat variables independently. " | |
| "Setting buffer_size to None." | |
| ) | |
| buffer_size = None | |
| # The stat should have the same shape as the samples for no adj. | |
| if t_obs.size != np.prod(sample_shape): | |
| raise ValueError( | |
| f"t_obs.shape {t_obs.shape} provided by stat_fun {stat_fun} is not " | |
| f"compatible with the sample shape {sample_shape}" | |
| ) | |
| if adjacency is None or adjacency is False: | |
| t_obs.shape = sample_shape | |
| if exclude is not None: | |
| include = np.logical_not(exclude) | |
| else: | |
| include = None | |
| # determine if adjacency itself can be separated into disjoint sets | |
| if check_disjoint is True and (adjacency is not None and adjacency is not False): | |
| partitions = _get_partitions_from_adjacency(adjacency, n_times) | |
| else: | |
| partitions = None | |
| logger.info("Running initial clustering …") | |
| out = _find_clusters( | |
| t_obs, | |
| threshold, | |
| tail, | |
| adjacency, | |
| max_step=max_step, | |
| include=include, | |
| partitions=partitions, | |
| t_power=t_power, | |
| show_info=True, | |
| ) | |
| clusters, cluster_stats = out | |
| # The stat should have the same shape as the samples | |
| t_obs.shape = sample_shape | |
| # For TFCE, return the "adjusted" statistic instead of raw scores | |
| # and for clusters, each point gets treated independently | |
| tfce = isinstance(threshold, dict) | |
| if tfce: | |
| t_obs = cluster_stats.reshape(t_obs.shape) * np.sign(t_obs) | |
| clusters = [np.array([c]) for c in range(t_obs.size)] | |
| logger.info(f"Found {len(clusters)} cluster{_pl(clusters)}") | |
| # convert clusters to old format | |
| if (adjacency is not None and adjacency is not False) or tfce: | |
| # our algorithms output lists of indices by default | |
| if out_type == "mask": | |
| slice_out = (adjacency is None) & (len(sample_shape) == 1) | |
| clusters = _cluster_indices_to_mask(clusters, n_tests, slice_out) | |
| else: | |
| # ndimage outputs slices or boolean masks by default, | |
| if out_type == "indices": | |
| clusters = _cluster_mask_to_indices(clusters, t_obs.shape) | |
| # convert our seed to orders | |
| # check to see if we can do an exact test | |
| # (for a two-tailed test, we can exploit symmetry to just do half) | |
| extra = "" | |
| rng = check_random_state(seed) | |
| del seed | |
| if len(X) == 1: # 1-sample test | |
| do_perm_func = _do_1samp_permutations | |
| X_full = X[0] | |
| slices = None | |
| orders, n_permutations, extra = _get_1samp_orders( | |
| n_samples, n_permutations, tail, rng | |
| ) | |
| else: | |
| n_permutations = int(n_permutations) | |
| do_perm_func = _do_permutations | |
| X_full = np.concatenate(X, axis=0) | |
| n_samples_per_condition = [x.shape[0] for x in X] | |
| splits_idx = np.append([0], np.cumsum(n_samples_per_condition)) | |
| slices = [slice(splits_idx[k], splits_idx[k + 1]) for k in range(len(X))] | |
| orders = [rng.permutation(len(X_full)) for _ in range(n_permutations - 1)] | |
| del rng | |
| parallel, my_do_perm_func, n_jobs = parallel_func( | |
| do_perm_func, n_jobs, verbose=False | |
| ) | |
| if len(clusters) == 0: | |
| warn("No clusters found, returning empty H0, clusters, and cluster_pv") | |
| return t_obs, np.array([]), np.array([]), np.array([]) | |
| # Step 2: If we have some clusters, repeat process on permuted data | |
| # ------------------------------------------------------------------- | |
| # Step 3: repeat permutations for step-down-in-jumps procedure | |
| n_removed = 1 # number of new clusters added | |
| total_removed = 0 | |
| step_down_include = None # start out including all points | |
| n_step_downs = 0 | |
| while n_removed > 0: | |
| # actually do the clustering for each partition | |
| if include is not None: | |
| if step_down_include is not None: | |
| this_include = np.logical_and(include, step_down_include) | |
| else: | |
| this_include = include | |
| else: | |
| this_include = step_down_include | |
| with ProgressBar( | |
| iterable=range(len(orders)), mesg=f"Permuting{extra}" | |
| ) as progress_bar: | |
| H0 = parallel( | |
| my_do_perm_func( | |
| X_full, | |
| slices, | |
| threshold, | |
| tail, | |
| adjacency, | |
| stat_fun, | |
| max_step, | |
| this_include, | |
| partitions, | |
| t_power, | |
| order, | |
| sample_shape, | |
| buffer_size, | |
| progress_bar.subset(idx), | |
| ) | |
| for idx, order in split_list(orders, n_jobs, idx=True) | |
| ) | |
| # include original (true) ordering | |
| if tail == -1: # up tail | |
| orig = cluster_stats.min() | |
| elif tail == 1: | |
| orig = cluster_stats.max() | |
| else: | |
| orig = abs(cluster_stats).max() | |
| H0.insert(0, [orig]) | |
| H0 = np.concatenate(H0) | |
| logger.debug("Computing cluster p-values") | |
| cluster_pv = _pval_from_histogram(cluster_stats, H0, tail) | |
| # figure out how many new ones will be removed for step-down | |
| to_remove = np.where(cluster_pv < step_down_p)[0] | |
| n_removed = to_remove.size - total_removed | |
| total_removed = to_remove.size | |
| step_down_include = np.ones(n_tests, dtype=bool) | |
| for ti in to_remove: | |
| step_down_include[clusters[ti]] = False | |
| if adjacency is None and adjacency is not False: | |
| step_down_include.shape = sample_shape | |
| n_step_downs += 1 | |
| if step_down_p > 0: | |
| a_text = "additional " if n_step_downs > 1 else "" | |
| logger.info( | |
| "Step-down-in-jumps iteration #%i found %i %s" | |
| "cluster%s to exclude from subsequent iterations", | |
| n_step_downs, | |
| n_removed, | |
| a_text, | |
| _pl(n_removed), | |
| ) | |
| # The clusters should have the same shape as the samples | |
| clusters = _reshape_clusters(clusters, sample_shape) | |
| return t_obs, clusters, cluster_pv, H0 | |
| def _check_fun(X, stat_fun, threshold, tail=0, kind="within"): | |
| """Check the stat_fun and threshold values.""" | |
| if kind == "within": | |
| if threshold is None: | |
| if stat_fun is not None and stat_fun is not ttest_1samp_no_p: | |
| warn( | |
| "Automatic threshold is only valid for stat_fun=None " | |
| f"(or ttest_1samp_no_p), got {stat_fun}" | |
| ) | |
| p_thresh = 0.05 / (1 + (tail == 0)) | |
| n_samples = len(X) | |
| threshold = -tstat.ppf(p_thresh, n_samples - 1) | |
| if np.sign(tail) < 0: | |
| threshold = -threshold | |
| logger.info(f"Using a threshold of {threshold:.6f}") | |
| stat_fun = ttest_1samp_no_p if stat_fun is None else stat_fun | |
| else: | |
| assert kind == "between" | |
| if threshold is None: | |
| if stat_fun is not None and stat_fun is not f_oneway: | |
| warn( | |
| "Automatic threshold is only valid for stat_fun=None " | |
| f"(or f_oneway), got {stat_fun}" | |
| ) | |
| elif tail != 1: | |
| warn('Ignoring argument "tail", performing 1-tailed F-test') | |
| p_thresh = 0.05 | |
| dfn = len(X) - 1 | |
| dfd = np.sum([len(x) for x in X]) - len(X) | |
| threshold = fstat.ppf(1.0 - p_thresh, dfn, dfd) | |
| logger.info(f"Using a threshold of {threshold:.6f}") | |
| stat_fun = f_oneway if stat_fun is None else stat_fun | |
| return stat_fun, threshold | |
| def permutation_cluster_test( | |
| X, | |
| threshold=None, | |
| n_permutations=1024, | |
| tail=0, | |
| stat_fun=None, | |
| adjacency=None, | |
| n_jobs=None, | |
| seed=None, | |
| max_step=1, | |
| exclude=None, | |
| step_down_p=0, | |
| t_power=1, | |
| out_type="indices", | |
| check_disjoint=False, | |
| buffer_size=1000, | |
| verbose=None, | |
| ): | |
| """Cluster-level statistical permutation test. | |
| For a list of :class:`NumPy arrays <numpy.ndarray>` of data, | |
| calculate some statistics corrected for multiple comparisons using | |
| permutations and cluster-level correction. Each element of the list ``X`` | |
| should contain the data for one group of observations (e.g., 2D arrays for | |
| time series, 3D arrays for time-frequency power values). Permutations are | |
| generated with random partitions of the data. For details, see | |
| :footcite:p:`MarisOostenveld2007,Sassenhagen2019`. | |
| Parameters | |
| ---------- | |
| X : list of array, shape (n_observations, p[, q][, r]) | |
| The data to be clustered. Each array in ``X`` should contain the | |
| observations for one group. The first dimension of each array is the | |
| number of observations from that group; remaining dimensions comprise | |
| the size of a single observation. For example if ``X = [X1, X2]`` | |
| with ``X1.shape = (20, 50, 4)`` and ``X2.shape = (17, 50, 4)``, then | |
| ``X`` has 2 groups with respectively 20 and 17 observations in each, | |
| and each data point is of shape ``(50, 4)``. Note: that the | |
| *last dimension* of each element of ``X`` should correspond to the | |
| dimension represented in the ``adjacency`` parameter | |
| (e.g., spectral data should be provided as | |
| ``(observations, frequencies, channels/vertices)``). | |
| %(threshold_clust_f)s | |
| %(n_permutations_clust_int)s | |
| %(tail_clust)s | |
| %(stat_fun_clust_f)s | |
| %(adjacency_clust_n)s | |
| %(n_jobs)s | |
| %(seed)s | |
| %(max_step_clust)s | |
| %(exclude_clust)s | |
| %(step_down_p_clust)s | |
| %(f_power_clust)s | |
| %(out_type_clust)s | |
| %(check_disjoint_clust)s | |
| %(buffer_size_clust)s | |
| %(verbose)s | |
| Returns | |
| ------- | |
| F_obs : array, shape (p[, q][, r]) | |
| Statistic (F by default) observed for all variables. | |
| clusters : list | |
| List type defined by out_type above. | |
| cluster_pv : array | |
| P-value for each cluster. | |
| H0 : array, shape (n_permutations,) | |
| Max cluster level stats observed under permutation. | |
| Notes | |
| ----- | |
| %(threshold_clust_f_notes)s | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| stat_fun, threshold = _check_fun(X, stat_fun, threshold, tail, "between") | |
| return _permutation_cluster_test( | |
| X=X, | |
| threshold=threshold, | |
| n_permutations=n_permutations, | |
| tail=tail, | |
| stat_fun=stat_fun, | |
| adjacency=adjacency, | |
| n_jobs=n_jobs, | |
| seed=seed, | |
| max_step=max_step, | |
| exclude=exclude, | |
| step_down_p=step_down_p, | |
| t_power=t_power, | |
| out_type=out_type, | |
| check_disjoint=check_disjoint, | |
| buffer_size=buffer_size, | |
| ) | |
| def permutation_cluster_1samp_test( | |
| X, | |
| threshold=None, | |
| n_permutations=1024, | |
| tail=0, | |
| stat_fun=None, | |
| adjacency=None, | |
| n_jobs=None, | |
| seed=None, | |
| max_step=1, | |
| exclude=None, | |
| step_down_p=0, | |
| t_power=1, | |
| out_type="indices", | |
| check_disjoint=False, | |
| buffer_size=1000, | |
| verbose=None, | |
| ): | |
| """Non-parametric cluster-level paired t-test. | |
| For details, see :footcite:p:`MarisOostenveld2007,Sassenhagen2019`. | |
| Parameters | |
| ---------- | |
| X : array, shape (n_observations, p[, q][, r]) | |
| The data to be clustered. The first dimension should correspond to the | |
| difference between paired samples (observations) in two conditions. | |
| The subarrays ``X[k]`` can be 1D (e.g., time series), 2D (e.g., | |
| time series over channels), or 3D (e.g., time-frequencies over | |
| channels) associated with the kth observation. For spatiotemporal data, | |
| see also :func:`mne.stats.spatio_temporal_cluster_1samp_test`. | |
| %(threshold_clust_t)s | |
| %(n_permutations_clust_all)s | |
| %(tail_clust)s | |
| %(stat_fun_clust_t)s | |
| %(adjacency_clust_1)s | |
| %(n_jobs)s | |
| %(seed)s | |
| %(max_step_clust)s | |
| %(exclude_clust)s | |
| %(step_down_p_clust)s | |
| %(t_power_clust)s | |
| %(out_type_clust)s | |
| %(check_disjoint_clust)s | |
| %(buffer_size_clust)s | |
| %(verbose)s | |
| Returns | |
| ------- | |
| t_obs : array, shape (p[, q][, r]) | |
| T-statistic observed for all variables. | |
| clusters : list | |
| List type defined by out_type above. | |
| cluster_pv : array | |
| P-value for each cluster. | |
| H0 : array, shape (n_permutations,) | |
| Max cluster level stats observed under permutation. | |
| Notes | |
| ----- | |
| From an array of paired observations, e.g. a difference in signal | |
| amplitudes or power spectra in two conditions, calculate if the data | |
| distributions in the two conditions are significantly different. | |
| The procedure uses a cluster analysis with permutation test | |
| for calculating corrected p-values. Randomized data are generated with | |
| random sign flips. See :footcite:`MarisOostenveld2007` for more | |
| information. | |
| Because a 1-sample t-test on the difference in observations is | |
| mathematically equivalent to a paired t-test, internally this function | |
| computes a 1-sample t-test (by default) and uses sign flipping (always) | |
| to perform permutations. This might not be suitable for the case where | |
| there is truly a single observation under test; see :ref:`disc-stats`. | |
| %(threshold_clust_t_notes)s | |
| If ``n_permutations`` exceeds the maximum number of possible permutations | |
| given the number of observations, then ``n_permutations`` and ``seed`` | |
| will be ignored since an exact test (full permutation test) will be | |
| performed (this is the case when | |
| ``n_permutations >= 2 ** (n_observations - (tail == 0))``). | |
| If no initial clusters are found because all points in the true | |
| distribution are below the threshold, then ``clusters``, ``cluster_pv``, | |
| and ``H0`` will all be empty arrays. | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| stat_fun, threshold = _check_fun(X, stat_fun, threshold, tail) | |
| return _permutation_cluster_test( | |
| X=[X], | |
| threshold=threshold, | |
| n_permutations=n_permutations, | |
| tail=tail, | |
| stat_fun=stat_fun, | |
| adjacency=adjacency, | |
| n_jobs=n_jobs, | |
| seed=seed, | |
| max_step=max_step, | |
| exclude=exclude, | |
| step_down_p=step_down_p, | |
| t_power=t_power, | |
| out_type=out_type, | |
| check_disjoint=check_disjoint, | |
| buffer_size=buffer_size, | |
| ) | |
| def spatio_temporal_cluster_1samp_test( | |
| X, | |
| threshold=None, | |
| n_permutations=1024, | |
| tail=0, | |
| stat_fun=None, | |
| adjacency=None, | |
| n_jobs=None, | |
| seed=None, | |
| max_step=1, | |
| spatial_exclude=None, | |
| step_down_p=0, | |
| t_power=1, | |
| out_type="indices", | |
| check_disjoint=False, | |
| buffer_size=1000, | |
| verbose=None, | |
| ): | |
| """Non-parametric cluster-level paired t-test for spatio-temporal data. | |
| This function provides a convenient wrapper for | |
| :func:`mne.stats.permutation_cluster_1samp_test`, for use with data | |
| organized in the form (observations × time × space), | |
| (observations × frequencies × space), or optionally | |
| (observations × time × frequencies × space). For details, see | |
| :footcite:p:`MarisOostenveld2007,Sassenhagen2019`. | |
| Parameters | |
| ---------- | |
| X : array, shape (n_observations, p[, q], n_vertices) | |
| The data to be clustered. The first dimension should correspond to the | |
| difference between paired samples (observations) in two conditions. | |
| The second, and optionally third, dimensions correspond to the | |
| time or time-frequency data. And, the last dimension should be spatial. | |
| %(threshold_clust_t)s | |
| %(n_permutations_clust_all)s | |
| %(tail_clust)s | |
| %(stat_fun_clust_t)s | |
| %(adjacency_clust_st1)s | |
| %(n_jobs)s | |
| %(seed)s | |
| %(max_step_clust)s | |
| spatial_exclude : list of int or None | |
| List of spatial indices to exclude from clustering. | |
| %(step_down_p_clust)s | |
| %(t_power_clust)s | |
| %(out_type_clust)s | |
| %(check_disjoint_clust)s | |
| %(buffer_size_clust)s | |
| %(verbose)s | |
| Returns | |
| ------- | |
| t_obs : array, shape (p[, q], n_vertices) | |
| T-statistic observed for all variables. | |
| clusters : list | |
| List type defined by out_type above. | |
| cluster_pv : array | |
| P-value for each cluster. | |
| H0 : array, shape (n_permutations,) | |
| Max cluster level stats observed under permutation. | |
| Notes | |
| ----- | |
| %(threshold_clust_t_notes)s | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| # convert spatial_exclude before passing on if necessary | |
| if spatial_exclude is not None: | |
| exclude = _st_mask_from_s_inds( | |
| np.prod(X.shape[1:-1]), X.shape[-1], spatial_exclude, True | |
| ) | |
| else: | |
| exclude = None | |
| return permutation_cluster_1samp_test( | |
| X, | |
| threshold=threshold, | |
| stat_fun=stat_fun, | |
| tail=tail, | |
| n_permutations=n_permutations, | |
| adjacency=adjacency, | |
| n_jobs=n_jobs, | |
| seed=seed, | |
| max_step=max_step, | |
| exclude=exclude, | |
| step_down_p=step_down_p, | |
| t_power=t_power, | |
| out_type=out_type, | |
| check_disjoint=check_disjoint, | |
| buffer_size=buffer_size, | |
| ) | |
| def spatio_temporal_cluster_test( | |
| X, | |
| threshold=None, | |
| n_permutations=1024, | |
| tail=0, | |
| stat_fun=None, | |
| adjacency=None, | |
| n_jobs=None, | |
| seed=None, | |
| max_step=1, | |
| spatial_exclude=None, | |
| step_down_p=0, | |
| t_power=1, | |
| out_type="indices", | |
| check_disjoint=False, | |
| buffer_size=1000, | |
| verbose=None, | |
| ): | |
| """Non-parametric cluster-level test for spatio-temporal data. | |
| This function provides a convenient wrapper for | |
| :func:`mne.stats.permutation_cluster_test`, for use with data | |
| organized in the form (observations × time × space), | |
| (observations × time × space), or optionally | |
| (observations × time × frequencies × space). For more information, | |
| see :footcite:p:`MarisOostenveld2007,Sassenhagen2019`. | |
| Parameters | |
| ---------- | |
| X : list of array, shape (n_observations, p[, q], n_vertices) | |
| The data to be clustered. Each array in ``X`` should contain the | |
| observations for one group. The first dimension of each array is the | |
| number of observations from that group (and may vary between groups). | |
| The second, and optionally third, dimensions correspond to the | |
| time or time-frequency data. And, the last dimension should be spatial. | |
| All dimensions except the first should match across all groups. | |
| %(threshold_clust_f)s | |
| %(n_permutations_clust_int)s | |
| %(tail_clust)s | |
| %(stat_fun_clust_f)s | |
| %(adjacency_clust_stn)s | |
| %(n_jobs)s | |
| %(seed)s | |
| %(max_step_clust)s | |
| spatial_exclude : list of int or None | |
| List of spatial indices to exclude from clustering. | |
| %(step_down_p_clust)s | |
| %(f_power_clust)s | |
| %(out_type_clust)s | |
| %(check_disjoint_clust)s | |
| %(buffer_size_clust)s | |
| %(verbose)s | |
| Returns | |
| ------- | |
| F_obs : array, shape (p[, q], n_vertices) | |
| Statistic (F by default) observed for all variables. | |
| clusters : list | |
| List type defined by out_type above. | |
| cluster_pv: array | |
| P-value for each cluster. | |
| H0 : array, shape (n_permutations,) | |
| Max cluster level stats observed under permutation. | |
| Notes | |
| ----- | |
| %(threshold_clust_f_notes)s | |
| References | |
| ---------- | |
| .. footbibliography:: | |
| """ | |
| # convert spatial_exclude before passing on if necessary | |
| if spatial_exclude is not None: | |
| exclude = _st_mask_from_s_inds( | |
| np.prod(X[0].shape[1:-1]), X[0].shape[-1], spatial_exclude, True | |
| ) | |
| else: | |
| exclude = None | |
| return permutation_cluster_test( | |
| X, | |
| threshold=threshold, | |
| stat_fun=stat_fun, | |
| tail=tail, | |
| n_permutations=n_permutations, | |
| adjacency=adjacency, | |
| n_jobs=n_jobs, | |
| seed=seed, | |
| max_step=max_step, | |
| exclude=exclude, | |
| step_down_p=step_down_p, | |
| t_power=t_power, | |
| out_type=out_type, | |
| check_disjoint=check_disjoint, | |
| buffer_size=buffer_size, | |
| ) | |
| def _st_mask_from_s_inds(n_times, n_vertices, vertices, set_as=True): | |
| """Compute mask to apply to a spatio-temporal adjacency matrix. | |
| This can be used to include (or exclude) certain spatial coordinates. | |
| This is useful for excluding certain regions from analysis (e.g., | |
| medial wall vertices). | |
| Parameters | |
| ---------- | |
| n_times : int | |
| Number of time points. | |
| n_vertices : int | |
| Number of spatial points. | |
| vertices : list or array of int | |
| Vertex numbers to set. | |
| set_as : bool | |
| If True, all points except "vertices" are set to False (inclusion). | |
| If False, all points except "vertices" are set to True (exclusion). | |
| Returns | |
| ------- | |
| mask : array of bool | |
| A (n_times * n_vertices) array of boolean values for masking | |
| """ | |
| mask = np.zeros((n_times, n_vertices), dtype=bool) | |
| mask[:, vertices] = True | |
| mask = mask.ravel() | |
| if set_as is False: | |
| mask = np.logical_not(mask) | |
| return mask | |
| def _get_partitions_from_adjacency(adjacency, n_times, verbose=None): | |
| """Specify disjoint subsets (e.g., hemispheres) based on adjacency.""" | |
| if isinstance(adjacency, list): | |
| test = np.ones(len(adjacency)) | |
| test_adj = np.zeros((len(adjacency), len(adjacency)), dtype="bool") | |
| for vi in range(len(adjacency)): | |
| test_adj[adjacency[vi], vi] = True | |
| test_adj = sparse.coo_array(test_adj, dtype="float") | |
| else: | |
| test = np.ones(adjacency.shape[0]) | |
| test_adj = adjacency | |
| part_clusts = _find_clusters(test, 0, 1, test_adj)[0] | |
| if len(part_clusts) > 1: | |
| logger.info(f"{len(part_clusts)} disjoint adjacency sets found") | |
| partitions = np.zeros(len(test), dtype="int") | |
| for ii, pc in enumerate(part_clusts): | |
| partitions[pc] = ii | |
| if isinstance(adjacency, list): | |
| partitions = np.tile(partitions, n_times) | |
| else: | |
| logger.info("No disjoint adjacency sets found") | |
| partitions = None | |
| return partitions | |
| def _reshape_clusters(clusters, sample_shape): | |
| """Reshape cluster masks or indices to be of the correct shape.""" | |
| # format of the bool mask and indices are ndarrays | |
| if len(clusters) > 0 and isinstance(clusters[0], np.ndarray): | |
| if clusters[0].dtype == np.dtype(bool): # format of mask | |
| clusters = [c.reshape(sample_shape) for c in clusters] | |
| else: # format of indices | |
| clusters = [np.unravel_index(c, sample_shape) for c in clusters] | |
| return clusters | |
| def summarize_clusters_stc( | |
| clu, p_thresh=0.05, tstep=1.0, tmin=0, subject="fsaverage", vertices=None | |
| ): | |
| """Assemble summary SourceEstimate from spatiotemporal cluster results. | |
| This helps visualizing results from spatio-temporal-clustering | |
| permutation tests. | |
| Parameters | |
| ---------- | |
| clu : tuple | |
| The output from clustering permutation tests. | |
| p_thresh : float | |
| The significance threshold for inclusion of clusters. | |
| tstep : float | |
| The time step between samples of the original :class:`STC | |
| <mne.SourceEstimate>`, in seconds (i.e., ``1 / stc.sfreq``). Defaults | |
| to ``1``, which will yield a colormap indicating cluster duration | |
| measured in *samples* rather than *seconds*. | |
| tmin : float | int | |
| The time of the first sample. | |
| subject : str | |
| The name of the subject. | |
| vertices : list of array | instance of SourceSpaces | None | |
| The vertex numbers associated with the source space locations. Defaults | |
| to None. If None, equals ``[np.arange(10242), np.arange(10242)]``. | |
| Can also be an instance of SourceSpaces to get vertex numbers from. | |
| .. versionchanged:: 0.21 | |
| Added support for SourceSpaces. | |
| Returns | |
| ------- | |
| out : instance of SourceEstimate | |
| A summary of the clusters. The first time point in this SourceEstimate | |
| object is the summation of all the clusters. Subsequent time points | |
| contain each individual cluster. The magnitude of the activity | |
| corresponds to the duration spanned by the cluster (duration units are | |
| determined by ``tstep``). | |
| .. versionchanged:: 0.21 | |
| Added support for volume and mixed source estimates. | |
| """ | |
| _validate_type(vertices, (None, list, SourceSpaces), "vertices") | |
| if vertices is None: | |
| vertices = [np.arange(10242), np.arange(10242)] | |
| klass = SourceEstimate | |
| elif isinstance(vertices, SourceSpaces): | |
| klass = dict( | |
| surface=SourceEstimate, volume=VolSourceEstimate, mixed=MixedSourceEstimate | |
| )[vertices.kind] | |
| vertices = [s["vertno"] for s in vertices] | |
| else: | |
| klass = {1: VolSourceEstimate, 2: SourceEstimate}.get( | |
| len(vertices), MixedSourceEstimate | |
| ) | |
| n_vertices_need = sum(len(v) for v in vertices) | |
| t_obs, clusters, clu_pvals, _ = clu | |
| n_times, n_vertices = t_obs.shape | |
| if n_vertices != n_vertices_need: | |
| raise ValueError( | |
| f"Number of cluster vertices ({n_vertices}) did not match the " | |
| f"provided vertices ({n_vertices_need})" | |
| ) | |
| good_cluster_inds = np.where(clu_pvals < p_thresh)[0] | |
| # Build a convenient representation of each cluster, where each | |
| # cluster becomes a "time point" in the SourceEstimate | |
| if len(good_cluster_inds) == 0: | |
| raise RuntimeError( | |
| "No significant clusters available. Please adjust " | |
| "your threshold or check your statistical " | |
| "analysis." | |
| ) | |
| data = np.zeros((n_vertices, n_times)) | |
| data_summary = np.zeros((n_vertices, len(good_cluster_inds) + 1)) | |
| for ii, cluster_ind in enumerate(good_cluster_inds): | |
| data.fill(0) | |
| t_inds, v_inds = clusters[cluster_ind] | |
| data[v_inds, t_inds] = t_obs[t_inds, v_inds] | |
| # Store a nice visualization of the cluster by summing across time | |
| data_summary[:, ii + 1] = np.sum(_sum_cluster_data(data, tstep), axis=1) | |
| # Make the first "time point" a sum across all clusters for easy | |
| # visualization | |
| data_summary[:, 0] = np.sum(data_summary, axis=1) | |
| return klass(data_summary, vertices, tmin, tstep, subject) | |