| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import sys |
| import os.path |
|
|
| from functools import wraps, partial |
| import weakref |
|
|
| import numpy as np |
| import warnings |
| from numpy.linalg import norm |
| from numpy.testing import (verbose, assert_, |
| assert_array_equal, assert_equal, |
| assert_almost_equal, assert_allclose, |
| break_cycles, IS_PYPY) |
| import pytest |
|
|
| import scipy.spatial.distance |
|
|
| from scipy.spatial.distance import ( |
| squareform, pdist, cdist, num_obs_y, num_obs_dm, is_valid_dm, is_valid_y, |
| _validate_vector, _METRICS_NAMES) |
|
|
| |
| |
| from scipy.spatial.distance import (braycurtis, canberra, chebyshev, cityblock, |
| correlation, cosine, dice, euclidean, |
| hamming, jaccard, jensenshannon, |
| kulczynski1, mahalanobis, |
| minkowski, rogerstanimoto, |
| russellrao, seuclidean, sokalmichener, |
| sokalsneath, sqeuclidean, yule) |
| from scipy._lib._util import np_long, np_ulong |
|
|
|
|
| @pytest.fixture(params=_METRICS_NAMES, scope="session") |
| def metric(request): |
| """ |
| Fixture for all metrics in scipy.spatial.distance |
| """ |
| return request.param |
|
|
|
|
| _filenames = [ |
| "cdist-X1.txt", |
| "cdist-X2.txt", |
| "iris.txt", |
| "pdist-boolean-inp.txt", |
| "pdist-chebyshev-ml-iris.txt", |
| "pdist-chebyshev-ml.txt", |
| "pdist-cityblock-ml-iris.txt", |
| "pdist-cityblock-ml.txt", |
| "pdist-correlation-ml-iris.txt", |
| "pdist-correlation-ml.txt", |
| "pdist-cosine-ml-iris.txt", |
| "pdist-cosine-ml.txt", |
| "pdist-double-inp.txt", |
| "pdist-euclidean-ml-iris.txt", |
| "pdist-euclidean-ml.txt", |
| "pdist-hamming-ml.txt", |
| "pdist-jaccard-ml.txt", |
| "pdist-jensenshannon-ml-iris.txt", |
| "pdist-jensenshannon-ml.txt", |
| "pdist-minkowski-3.2-ml-iris.txt", |
| "pdist-minkowski-3.2-ml.txt", |
| "pdist-minkowski-5.8-ml-iris.txt", |
| "pdist-seuclidean-ml-iris.txt", |
| "pdist-seuclidean-ml.txt", |
| "pdist-spearman-ml.txt", |
| "random-bool-data.txt", |
| "random-double-data.txt", |
| "random-int-data.txt", |
| "random-uint-data.txt", |
| ] |
|
|
| _tdist = np.array([[0, 662, 877, 255, 412, 996], |
| [662, 0, 295, 468, 268, 400], |
| [877, 295, 0, 754, 564, 138], |
| [255, 468, 754, 0, 219, 869], |
| [412, 268, 564, 219, 0, 669], |
| [996, 400, 138, 869, 669, 0]], dtype='double') |
|
|
| _ytdist = squareform(_tdist) |
|
|
| |
| |
| |
| eo = {} |
|
|
|
|
| def load_testing_files(): |
| for fn in _filenames: |
| name = fn.replace(".txt", "").replace("-ml", "") |
| fqfn = os.path.join(os.path.dirname(__file__), 'data', fn) |
| fp = open(fqfn) |
| eo[name] = np.loadtxt(fp) |
| fp.close() |
| eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp']) |
| eo['random-bool-data'] = np.bool_(eo['random-bool-data']) |
| eo['random-float32-data'] = np.float32(eo['random-double-data']) |
| eo['random-int-data'] = np_long(eo['random-int-data']) |
| eo['random-uint-data'] = np_ulong(eo['random-uint-data']) |
|
|
|
|
| load_testing_files() |
|
|
|
|
| def _is_32bit(): |
| return np.intp(0).itemsize < 8 |
|
|
|
|
| def _chk_asarrays(arrays, axis=None): |
| arrays = [np.asanyarray(a) for a in arrays] |
| if axis is None: |
| |
| arrays = [np.ravel(a) if a.ndim != 1 else a |
| for a in arrays] |
| axis = 0 |
| arrays = tuple(np.atleast_1d(a) for a in arrays) |
| if axis < 0: |
| if not all(a.ndim == arrays[0].ndim for a in arrays): |
| raise ValueError("array ndim must be the same for neg axis") |
| axis = range(arrays[0].ndim)[axis] |
| return arrays + (axis,) |
|
|
|
|
| def _chk_weights(arrays, weights=None, axis=None, |
| force_weights=False, simplify_weights=True, |
| pos_only=False, neg_check=False, |
| nan_screen=False, mask_screen=False, |
| ddof=None): |
| chked = _chk_asarrays(arrays, axis=axis) |
| arrays, axis = chked[:-1], chked[-1] |
|
|
| simplify_weights = simplify_weights and not force_weights |
| if not force_weights and mask_screen: |
| force_weights = any(np.ma.getmask(a) is not np.ma.nomask for a in arrays) |
|
|
| if nan_screen: |
| has_nans = [np.isnan(np.sum(a)) for a in arrays] |
| if any(has_nans): |
| mask_screen = True |
| force_weights = True |
| arrays = tuple(np.ma.masked_invalid(a) if has_nan else a |
| for a, has_nan in zip(arrays, has_nans)) |
|
|
| if weights is not None: |
| weights = np.asanyarray(weights) |
| elif force_weights: |
| weights = np.ones(arrays[0].shape[axis]) |
| else: |
| return arrays + (weights, axis) |
|
|
| if ddof: |
| weights = _freq_weights(weights) |
|
|
| if mask_screen: |
| weights = _weight_masked(arrays, weights, axis) |
|
|
| if not all(weights.shape == (a.shape[axis],) for a in arrays): |
| raise ValueError("weights shape must match arrays along axis") |
| if neg_check and (weights < 0).any(): |
| raise ValueError("weights cannot be negative") |
|
|
| if pos_only: |
| pos_weights = np.nonzero(weights > 0)[0] |
| if pos_weights.size < weights.size: |
| arrays = tuple(np.take(a, pos_weights, axis=axis) for a in arrays) |
| weights = weights[pos_weights] |
| if simplify_weights and (weights == 1).all(): |
| weights = None |
| return arrays + (weights, axis) |
|
|
|
|
| def _freq_weights(weights): |
| if weights is None: |
| return weights |
| int_weights = weights.astype(int) |
| if (weights != int_weights).any(): |
| raise ValueError(f"frequency (integer count-type) weights required {weights}") |
| return int_weights |
|
|
|
|
| def _weight_masked(arrays, weights, axis): |
| if axis is None: |
| axis = 0 |
| weights = np.asanyarray(weights) |
| for a in arrays: |
| axis_mask = np.ma.getmask(a) |
| if axis_mask is np.ma.nomask: |
| continue |
| if a.ndim > 1: |
| not_axes = tuple(i for i in range(a.ndim) if i != axis) |
| axis_mask = axis_mask.any(axis=not_axes) |
| weights *= 1 - axis_mask.astype(int) |
| return weights |
|
|
|
|
| def _rand_split(arrays, weights, axis, split_per, seed=None): |
| |
| arrays = [arr.astype(np.float64) if np.issubdtype(arr.dtype, np.integer) |
| else arr for arr in arrays] |
|
|
| |
| weights = np.array(weights, dtype=np.float64) |
| seeded_rand = np.random.RandomState(seed) |
|
|
| def mytake(a, ix, axis): |
| record = np.asanyarray(np.take(a, ix, axis=axis)) |
| return record.reshape([a.shape[i] if i != axis else 1 |
| for i in range(a.ndim)]) |
|
|
| n_obs = arrays[0].shape[axis] |
| assert all(a.shape[axis] == n_obs for a in arrays), \ |
| "data must be aligned on sample axis" |
| for i in range(int(split_per) * n_obs): |
| split_ix = seeded_rand.randint(n_obs + i) |
| prev_w = weights[split_ix] |
| q = seeded_rand.rand() |
| weights[split_ix] = q * prev_w |
| weights = np.append(weights, (1. - q) * prev_w) |
| arrays = [np.append(a, mytake(a, split_ix, axis=axis), |
| axis=axis) for a in arrays] |
| return arrays, weights |
|
|
|
|
| assert_allclose_forgiving = partial(assert_allclose, atol=1e-5) |
|
|
|
|
| def _rough_check(a, b, compare_assert=assert_allclose_forgiving, |
| key=lambda x: x, w=None): |
| check_a = key(a) |
| check_b = key(b) |
| try: |
| if np.array(check_a != check_b).any(): |
| compare_assert(check_a, check_b) |
| except AttributeError: |
| compare_assert(check_a, check_b) |
| except (TypeError, ValueError): |
| for a_i, b_i in zip(check_a, check_b): |
| _rough_check(a_i, b_i, compare_assert=compare_assert) |
|
|
| |
| |
| |
| def _weight_checked(fn, n_args=2, default_axis=None, key=lambda x: x, weight_arg='w', |
| squeeze=True, silent=False, |
| ones_test=True, const_test=True, dup_test=True, |
| split_test=True, dud_test=True, ma_safe=False, ma_very_safe=False, |
| nan_safe=False, split_per=1.0, seed=0, |
| compare_assert=assert_allclose_forgiving): |
| """runs fn on its arguments 2 or 3 ways, checks that the results are the same, |
| then returns the same thing it would have returned before""" |
| @wraps(fn) |
| def wrapped(*args, **kwargs): |
| result = fn(*args, **kwargs) |
|
|
| arrays = args[:n_args] |
| rest = args[n_args:] |
| weights = kwargs.get(weight_arg, None) |
| axis = kwargs.get('axis', default_axis) |
|
|
| chked = _chk_weights(arrays, weights=weights, axis=axis, |
| force_weights=True, mask_screen=True) |
| arrays, weights, axis = chked[:-2], chked[-2], chked[-1] |
| if squeeze: |
| arrays = [np.atleast_1d(a.squeeze()) for a in arrays] |
|
|
| try: |
| |
| args = tuple(arrays) + rest |
| if ones_test: |
| kwargs[weight_arg] = weights |
| _rough_check(result, fn(*args, **kwargs), key=key) |
| if const_test: |
| kwargs[weight_arg] = weights * 101.0 |
| _rough_check(result, fn(*args, **kwargs), key=key) |
| kwargs[weight_arg] = weights * 0.101 |
| try: |
| _rough_check(result, fn(*args, **kwargs), key=key) |
| except Exception as e: |
| raise type(e)((e, arrays, weights)) from e |
|
|
| |
| if dud_test: |
| |
| dud_arrays, dud_weights = _rand_split(arrays, weights, axis, |
| split_per=split_per, seed=seed) |
| dud_weights[:weights.size] = weights |
| dud_weights[weights.size:] = 0 |
| dud_args = tuple(dud_arrays) + rest |
| kwargs[weight_arg] = dud_weights |
| _rough_check(result, fn(*dud_args, **kwargs), key=key) |
| |
| for a in dud_arrays: |
| indexer = [slice(None)] * a.ndim |
| indexer[axis] = slice(weights.size, None) |
| indexer = tuple(indexer) |
| a[indexer] = a[indexer] * 101 |
| dud_args = tuple(dud_arrays) + rest |
| _rough_check(result, fn(*dud_args, **kwargs), key=key) |
| |
| for a in dud_arrays: |
| indexer = [slice(None)] * a.ndim |
| indexer[axis] = slice(weights.size, None) |
| indexer = tuple(indexer) |
| a[indexer] = a[indexer] * np.nan |
| if kwargs.get("nan_policy", None) == "omit" and nan_safe: |
| dud_args = tuple(dud_arrays) + rest |
| _rough_check(result, fn(*dud_args, **kwargs), key=key) |
| |
| if ma_safe: |
| dud_arrays = [np.ma.masked_invalid(a) for a in dud_arrays] |
| dud_args = tuple(dud_arrays) + rest |
| _rough_check(result, fn(*dud_args, **kwargs), key=key) |
| if ma_very_safe: |
| kwargs[weight_arg] = None |
| _rough_check(result, fn(*dud_args, **kwargs), key=key) |
| del dud_arrays, dud_args, dud_weights |
|
|
| |
| if dup_test: |
| dup_arrays = [np.append(a, a, axis=axis) for a in arrays] |
| dup_weights = np.append(weights, weights) / 2.0 |
| dup_args = tuple(dup_arrays) + rest |
| kwargs[weight_arg] = dup_weights |
| _rough_check(result, fn(*dup_args, **kwargs), key=key) |
| del dup_args, dup_arrays, dup_weights |
|
|
| |
| if split_test and split_per > 0: |
| split = _rand_split(arrays, weights, axis, |
| split_per=split_per, seed=seed) |
| split_arrays, split_weights = split |
| split_args = tuple(split_arrays) + rest |
| kwargs[weight_arg] = split_weights |
| _rough_check(result, fn(*split_args, **kwargs), key=key) |
| except NotImplementedError as e: |
| |
| |
| if not silent: |
| warnings.warn(f"{fn.__name__} NotImplemented weights: {e}", |
| stacklevel=3) |
| return result |
| return wrapped |
|
|
|
|
| class DummyContextManager: |
| def __enter__(self): |
| pass |
| def __exit__(self, *args): |
| pass |
|
|
|
|
| def maybe_deprecated(metric: str): |
| if metric in ('kulczynski1', 'sokalmichener'): |
| return pytest.deprecated_call() |
| else: |
| return DummyContextManager() |
|
|
|
|
| wcdist = _weight_checked(cdist, default_axis=1, squeeze=False) |
| wcdist_no_const = _weight_checked(cdist, default_axis=1, |
| squeeze=False, const_test=False) |
| wpdist = _weight_checked(pdist, default_axis=1, squeeze=False, n_args=1) |
| wpdist_no_const = _weight_checked(pdist, default_axis=1, squeeze=False, |
| const_test=False, n_args=1) |
| wrogerstanimoto = _weight_checked(rogerstanimoto) |
| wmatching = whamming = _weight_checked(hamming, dud_test=False) |
| wyule = _weight_checked(yule) |
| wdice = _weight_checked(dice) |
| wcityblock = _weight_checked(cityblock) |
| wchebyshev = _weight_checked(chebyshev) |
| wcosine = _weight_checked(cosine) |
| wcorrelation = _weight_checked(correlation) |
| wkulczynski1 = _weight_checked(kulczynski1) |
| wjaccard = _weight_checked(jaccard) |
| weuclidean = _weight_checked(euclidean, const_test=False) |
| wsqeuclidean = _weight_checked(sqeuclidean, const_test=False) |
| wbraycurtis = _weight_checked(braycurtis) |
| wcanberra = _weight_checked(canberra, const_test=False) |
| wsokalsneath = _weight_checked(sokalsneath) |
| wsokalmichener = _weight_checked(sokalmichener) |
| wrussellrao = _weight_checked(russellrao) |
|
|
|
|
| class TestCdist: |
|
|
| def setup_method(self): |
| self.rnd_eo_names = ['random-float32-data', 'random-int-data', |
| 'random-uint-data', 'random-double-data', |
| 'random-bool-data'] |
| self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64], |
| 'uint': [np_long, np.float32, np.float64], |
| 'int': [np.float32, np.float64], |
| 'float32': [np.float64]} |
|
|
| @pytest.mark.thread_unsafe |
| def test_cdist_extra_args(self, metric): |
| |
|
|
| X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]] |
| X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]] |
| kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)} |
| args = [3.14] * 200 |
|
|
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric=metric, **kwargs) |
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric=eval(metric), **kwargs) |
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric="test_" + metric, **kwargs) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, metric=metric, *args) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, metric=eval(metric), *args) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, metric="test_" + metric, *args) |
|
|
| def test_cdist_extra_args_custom(self): |
| |
| |
| def _my_metric(x, y, arg, kwarg=1, kwarg2=2): |
| return arg + kwarg + kwarg2 |
|
|
| X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]] |
| X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]] |
| kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)} |
| args = [3.14] * 200 |
|
|
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, *args) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, **kwargs) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, kwarg=2.2, kwarg2=3.3) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1.1, 2.2, 3.3) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1.1, 2.2) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1.1) |
| with pytest.raises(TypeError): |
| cdist(X1, X2, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3) |
|
|
| |
| assert_allclose(cdist(X1, X2, metric=_my_metric, |
| arg=1.1, kwarg2=3.3), 5.4) |
|
|
| def test_cdist_euclidean_random_unicode(self): |
| eps = 1e-15 |
| X1 = eo['cdist-X1'] |
| X2 = eo['cdist-X2'] |
| Y1 = wcdist_no_const(X1, X2, 'euclidean') |
| Y2 = wcdist_no_const(X1, X2, 'test_euclidean') |
| assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 1.23, |
| 2.0, 3.8, 4.6, np.inf]) |
| def test_cdist_minkowski_random(self, p): |
| eps = 1e-13 |
| X1 = eo['cdist-X1'] |
| X2 = eo['cdist-X2'] |
| Y1 = wcdist_no_const(X1, X2, 'minkowski', p=p) |
| Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=p) |
| assert_allclose(Y1, Y2, atol=0, rtol=eps, verbose=verbose > 2) |
|
|
| def test_cdist_cosine_random(self): |
| eps = 1e-14 |
| X1 = eo['cdist-X1'] |
| X2 = eo['cdist-X2'] |
| Y1 = wcdist(X1, X2, 'cosine') |
|
|
| |
| def norms(X): |
| return np.linalg.norm(X, axis=1).reshape(-1, 1) |
|
|
| Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T) |
|
|
| assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2) |
|
|
| def test_cdist_mahalanobis(self): |
| |
| x1 = np.array([[2], [3]]) |
| x2 = np.array([[2], [5]]) |
| dist = cdist(x1, x2, metric='mahalanobis') |
| assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]]) |
|
|
| |
| x1 = np.array([[0, 0], [-1, 0]]) |
| x2 = np.array([[0, 2], [1, 0], [0, -2]]) |
| dist = cdist(x1, x2, metric='mahalanobis') |
| rt2 = np.sqrt(2) |
| assert_allclose(dist, [[rt2, rt2, rt2], [2, 2 * rt2, 2]]) |
|
|
| |
| with pytest.raises(ValueError): |
| cdist([[0, 1]], [[2, 3]], metric='mahalanobis') |
|
|
| def test_cdist_custom_notdouble(self): |
| class myclass: |
| pass |
|
|
| def _my_metric(x, y): |
| if not isinstance(x[0], myclass) or not isinstance(y[0], myclass): |
| raise ValueError("Type has been changed") |
| return 1.123 |
| data = np.array([[myclass()]], dtype=object) |
| cdist_y = cdist(data, data, metric=_my_metric) |
| right_y = 1.123 |
| assert_equal(cdist_y, right_y, verbose=verbose > 2) |
|
|
| def _check_calling_conventions(self, X1, X2, metric, eps=1e-07, **kwargs): |
| |
| try: |
| y1 = cdist(X1, X2, metric=metric, **kwargs) |
| y2 = cdist(X1, X2, metric=eval(metric), **kwargs) |
| y3 = cdist(X1, X2, metric="test_" + metric, **kwargs) |
| except Exception as e: |
| e_cls = e.__class__ |
| if verbose > 2: |
| print(e_cls.__name__) |
| print(e) |
| with pytest.raises(e_cls): |
| cdist(X1, X2, metric=metric, **kwargs) |
| with pytest.raises(e_cls): |
| cdist(X1, X2, metric=eval(metric), **kwargs) |
| with pytest.raises(e_cls): |
| cdist(X1, X2, metric="test_" + metric, **kwargs) |
| else: |
| assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2) |
| assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2) |
|
|
| def test_cdist_calling_conventions(self, metric): |
| |
| |
| |
| for eo_name in self.rnd_eo_names: |
| |
| |
| X1 = eo[eo_name][::5, ::-2] |
| X2 = eo[eo_name][1::5, ::2] |
| if verbose > 2: |
| print("testing: ", metric, " with: ", eo_name) |
| if metric in {'dice', 'yule', |
| 'rogerstanimoto', |
| 'russellrao', 'sokalmichener', |
| 'sokalsneath', |
| 'kulczynski1'} and 'bool' not in eo_name: |
| |
| continue |
| self._check_calling_conventions(X1, X2, metric) |
|
|
| |
| if metric == "seuclidean": |
| X12 = np.vstack([X1, X2]).astype(np.float64) |
| V = np.var(X12, axis=0, ddof=1) |
| self._check_calling_conventions(X1, X2, metric, V=V) |
| elif metric == "mahalanobis": |
| X12 = np.vstack([X1, X2]).astype(np.float64) |
| V = np.atleast_2d(np.cov(X12.T)) |
| VI = np.array(np.linalg.inv(V).T) |
| self._check_calling_conventions(X1, X2, metric, VI=VI) |
|
|
| def test_cdist_dtype_equivalence(self, metric): |
| |
| eps = 1e-07 |
| tests = [(eo['random-bool-data'], self.valid_upcasts['bool']), |
| (eo['random-uint-data'], self.valid_upcasts['uint']), |
| (eo['random-int-data'], self.valid_upcasts['int']), |
| (eo['random-float32-data'], self.valid_upcasts['float32'])] |
| for test in tests: |
| X1 = test[0][::5, ::-2] |
| X2 = test[0][1::5, ::2] |
| try: |
| y1 = cdist(X1, X2, metric=metric) |
| except Exception as e: |
| e_cls = e.__class__ |
| if verbose > 2: |
| print(e_cls.__name__) |
| print(e) |
| for new_type in test[1]: |
| X1new = new_type(X1) |
| X2new = new_type(X2) |
| with pytest.raises(e_cls): |
| cdist(X1new, X2new, metric=metric) |
| else: |
| for new_type in test[1]: |
| y2 = cdist(new_type(X1), new_type(X2), metric=metric) |
| assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.thread_unsafe |
| def test_cdist_out(self, metric): |
| |
| eps = 1e-15 |
| X1 = eo['cdist-X1'] |
| X2 = eo['cdist-X2'] |
| out_r, out_c = X1.shape[0], X2.shape[0] |
|
|
| kwargs = dict() |
| if metric == 'minkowski': |
| kwargs['p'] = 1.23 |
| out1 = np.empty((out_r, out_c), dtype=np.float64) |
| with maybe_deprecated(metric): |
| Y1 = cdist(X1, X2, metric, **kwargs) |
| with maybe_deprecated(metric): |
| Y2 = cdist(X1, X2, metric, out=out1, **kwargs) |
|
|
| |
| assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2) |
|
|
| |
| assert_(Y2 is out1) |
|
|
| |
| out2 = np.empty((out_r-1, out_c+1), dtype=np.float64) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric, out=out2, **kwargs) |
|
|
| |
| out3 = np.empty( |
| (2 * out_r, 2 * out_c), dtype=np.float64)[::2, ::2] |
| out4 = np.empty((out_r, out_c), dtype=np.float64, order='F') |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric, out=out3, **kwargs) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric, out=out4, **kwargs) |
|
|
| |
| out5 = np.empty((out_r, out_c), dtype=np.int64) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| cdist(X1, X2, metric, out=out5, **kwargs) |
|
|
| @pytest.mark.thread_unsafe |
| def test_striding(self, metric): |
| |
| |
| eps = 1e-15 |
| X1 = eo['cdist-X1'][::2, ::2] |
| X2 = eo['cdist-X2'][::2, ::2] |
| X1_copy = X1.copy() |
| X2_copy = X2.copy() |
|
|
| |
| assert_equal(X1, X1_copy) |
| assert_equal(X2, X2_copy) |
| |
| assert_(not X1.flags.c_contiguous) |
| assert_(not X2.flags.c_contiguous) |
| assert_(X1_copy.flags.c_contiguous) |
| assert_(X2_copy.flags.c_contiguous) |
|
|
| kwargs = dict() |
| if metric == 'minkowski': |
| kwargs['p'] = 1.23 |
| with maybe_deprecated(metric): |
| Y1 = cdist(X1, X2, metric, **kwargs) |
| with maybe_deprecated(metric): |
| Y2 = cdist(X1_copy, X2_copy, metric, **kwargs) |
| |
| assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.thread_unsafe |
| def test_cdist_refcount(self, metric): |
| x1 = np.random.rand(10, 10) |
| x2 = np.random.rand(10, 10) |
|
|
| kwargs = dict() |
| if metric == 'minkowski': |
| kwargs['p'] = 1.23 |
|
|
| with maybe_deprecated(metric): |
| out = cdist(x1, x2, metric=metric, **kwargs) |
|
|
| |
| |
| weak_refs = [weakref.ref(v) for v in (x1, x2, out)] |
| del x1, x2, out |
|
|
| if IS_PYPY: |
| break_cycles() |
| assert all(weak_ref() is None for weak_ref in weak_refs) |
|
|
|
|
| class TestPdist: |
|
|
| def setup_method(self): |
| self.rnd_eo_names = ['random-float32-data', 'random-int-data', |
| 'random-uint-data', 'random-double-data', |
| 'random-bool-data'] |
| self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64], |
| 'uint': [np_long, np.float32, np.float64], |
| 'int': [np.float32, np.float64], |
| 'float32': [np.float64]} |
|
|
| @pytest.mark.thread_unsafe |
| def test_pdist_extra_args(self, metric): |
| |
| X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]] |
| kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)} |
| args = [3.14] * 200 |
|
|
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| pdist(X1, metric=metric, **kwargs) |
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| pdist(X1, metric=eval(metric), **kwargs) |
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| pdist(X1, metric="test_" + metric, **kwargs) |
| with pytest.raises(TypeError): |
| pdist(X1, metric=metric, *args) |
| with pytest.raises(TypeError): |
| pdist(X1, metric=eval(metric), *args) |
| with pytest.raises(TypeError): |
| pdist(X1, metric="test_" + metric, *args) |
|
|
| def test_pdist_extra_args_custom(self): |
| |
| |
| def _my_metric(x, y, arg, kwarg=1, kwarg2=2): |
| return arg + kwarg + kwarg2 |
|
|
| X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]] |
| kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)} |
| args = [3.14] * 200 |
|
|
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, *args) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, **kwargs) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, kwarg=2.2, kwarg2=3.3) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1, 2, kwarg=2.2) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1, 2, kwarg=2.2) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1.1, 2.2, 3.3) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1.1, 2.2) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1.1) |
| with pytest.raises(TypeError): |
| pdist(X1, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3) |
|
|
| |
| assert_allclose(pdist(X1, metric=_my_metric, |
| arg=1.1, kwarg2=3.3), 5.4) |
|
|
| def test_pdist_euclidean_random(self): |
| eps = 1e-07 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-euclidean'] |
| Y_test1 = wpdist_no_const(X, 'euclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_euclidean_random_u(self): |
| eps = 1e-07 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-euclidean'] |
| Y_test1 = wpdist_no_const(X, 'euclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_euclidean_random_float32(self): |
| eps = 1e-07 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-euclidean'] |
| Y_test1 = wpdist_no_const(X, 'euclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_euclidean_random_nonC(self): |
| eps = 1e-07 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-euclidean'] |
| Y_test2 = wpdist_no_const(X, 'test_euclidean') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_euclidean_iris_double(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-euclidean-iris'] |
| Y_test1 = wpdist_no_const(X, 'euclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_euclidean_iris_float32(self): |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-euclidean-iris'] |
| Y_test1 = wpdist_no_const(X, 'euclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.slow |
| def test_pdist_euclidean_iris_nonC(self): |
| |
| |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-euclidean-iris'] |
| Y_test2 = wpdist_no_const(X, 'test_euclidean') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_random(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-seuclidean'] |
| Y_test1 = pdist(X, 'seuclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-seuclidean'] |
| Y_test1 = pdist(X, 'seuclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| |
| V = np.var(X, axis=0, ddof=1) |
| Y_test2 = pdist(X, 'seuclidean', V=V) |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_random_nonC(self): |
| |
| eps = 1e-07 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-seuclidean'] |
| Y_test2 = pdist(X, 'test_seuclidean') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_iris(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-seuclidean-iris'] |
| Y_test1 = pdist(X, 'seuclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_iris_float32(self): |
| |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-seuclidean-iris'] |
| Y_test1 = pdist(X, 'seuclidean') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_seuclidean_iris_nonC(self): |
| |
| |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-seuclidean-iris'] |
| Y_test2 = pdist(X, 'test_seuclidean') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_cosine_random(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-cosine'] |
| Y_test1 = wpdist(X, 'cosine') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_cosine_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-cosine'] |
| Y_test1 = wpdist(X, 'cosine') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_cosine_random_nonC(self): |
| |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-cosine'] |
| Y_test2 = wpdist(X, 'test_cosine') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_cosine_iris(self): |
| eps = 1e-05 |
| X = eo['iris'] |
| Y_right = eo['pdist-cosine-iris'] |
| Y_test1 = wpdist(X, 'cosine') |
| assert_allclose(Y_test1, Y_right, atol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_cosine_iris_float32(self): |
| eps = 1e-05 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-cosine-iris'] |
| Y_test1 = wpdist(X, 'cosine') |
| assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.slow |
| def test_pdist_cosine_iris_nonC(self): |
| eps = 1e-05 |
| X = eo['iris'] |
| Y_right = eo['pdist-cosine-iris'] |
| Y_test2 = wpdist(X, 'test_cosine') |
| assert_allclose(Y_test2, Y_right, atol=eps) |
|
|
| def test_pdist_cosine_bounds(self): |
| |
| |
| |
| x = np.abs(np.random.RandomState(1337).rand(91)) |
| X = np.vstack([x, x]) |
| assert_(wpdist(X, 'cosine')[0] >= 0, |
| msg='cosine distance should be non-negative') |
|
|
| def test_pdist_cityblock_random(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-cityblock'] |
| Y_test1 = wpdist_no_const(X, 'cityblock') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_cityblock_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-cityblock'] |
| Y_test1 = wpdist_no_const(X, 'cityblock') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_cityblock_random_nonC(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-cityblock'] |
| Y_test2 = wpdist_no_const(X, 'test_cityblock') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_cityblock_iris(self): |
| eps = 1e-14 |
| X = eo['iris'] |
| Y_right = eo['pdist-cityblock-iris'] |
| Y_test1 = wpdist_no_const(X, 'cityblock') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_cityblock_iris_float32(self): |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-cityblock-iris'] |
| Y_test1 = wpdist_no_const(X, 'cityblock') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.slow |
| def test_pdist_cityblock_iris_nonC(self): |
| |
| |
| eps = 1e-14 |
| X = eo['iris'] |
| Y_right = eo['pdist-cityblock-iris'] |
| Y_test2 = wpdist_no_const(X, 'test_cityblock') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_correlation_random(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-correlation'] |
| Y_test1 = wpdist(X, 'correlation') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_correlation_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-correlation'] |
| Y_test1 = wpdist(X, 'correlation') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_correlation_random_nonC(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-correlation'] |
| Y_test2 = wpdist(X, 'test_correlation') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_correlation_iris(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-correlation-iris'] |
| Y_test1 = wpdist(X, 'correlation') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_correlation_iris_float32(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = np.float32(eo['pdist-correlation-iris']) |
| Y_test1 = wpdist(X, 'correlation') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.slow |
| def test_pdist_correlation_iris_nonC(self): |
| if sys.maxsize > 2**32: |
| eps = 1e-7 |
| else: |
| pytest.skip("see gh-16456") |
| X = eo['iris'] |
| Y_right = eo['pdist-correlation-iris'] |
| Y_test2 = wpdist(X, 'test_correlation') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 2.0, 3.2, np.inf]) |
| def test_pdist_minkowski_random_p(self, p): |
| eps = 1e-13 |
| X = eo['pdist-double-inp'] |
| Y1 = wpdist_no_const(X, 'minkowski', p=p) |
| Y2 = wpdist_no_const(X, 'test_minkowski', p=p) |
| assert_allclose(Y1, Y2, atol=0, rtol=eps) |
|
|
| def test_pdist_minkowski_random(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-minkowski-3.2'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2) |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_minkowski_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-minkowski-3.2'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2) |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_minkowski_random_nonC(self): |
| eps = 1e-7 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-minkowski-3.2'] |
| Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2) |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_3_2_iris(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-minkowski-3.2-iris'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2) |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_3_2_iris_float32(self): |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-minkowski-3.2-iris'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2) |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_3_2_iris_nonC(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-minkowski-3.2-iris'] |
| Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2) |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_5_8_iris(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-minkowski-5.8-iris'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8) |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_5_8_iris_float32(self): |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-minkowski-5.8-iris'] |
| Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8) |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.slow |
| def test_pdist_minkowski_5_8_iris_nonC(self): |
| eps = 1e-7 |
| X = eo['iris'] |
| Y_right = eo['pdist-minkowski-5.8-iris'] |
| Y_test2 = wpdist_no_const(X, 'test_minkowski', p=5.8) |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_mahalanobis(self): |
| |
| x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1) |
| dist = pdist(x, metric='mahalanobis') |
| assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5), |
| np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)]) |
|
|
| |
| x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]]) |
| dist = pdist(x, metric='mahalanobis') |
| rt2 = np.sqrt(2) |
| assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2 * rt2, 2, 2, 2 * rt2, 2]) |
|
|
| |
| with pytest.raises(ValueError): |
| wpdist([[0, 1], [2, 3]], metric='mahalanobis') |
|
|
| def test_pdist_hamming_random(self): |
| eps = 1e-15 |
| X = eo['pdist-boolean-inp'] |
| Y_right = eo['pdist-hamming'] |
| Y_test1 = wpdist(X, 'hamming') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_hamming_random_float32(self): |
| eps = 1e-15 |
| X = np.float32(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-hamming'] |
| Y_test1 = wpdist(X, 'hamming') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_hamming_random_nonC(self): |
| eps = 1e-15 |
| X = eo['pdist-boolean-inp'] |
| Y_right = eo['pdist-hamming'] |
| Y_test2 = wpdist(X, 'test_hamming') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_dhamming_random(self): |
| eps = 1e-15 |
| X = np.float64(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-hamming'] |
| Y_test1 = wpdist(X, 'hamming') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_dhamming_random_float32(self): |
| eps = 1e-15 |
| X = np.float32(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-hamming'] |
| Y_test1 = wpdist(X, 'hamming') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_dhamming_random_nonC(self): |
| eps = 1e-15 |
| X = np.float64(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-hamming'] |
| Y_test2 = wpdist(X, 'test_hamming') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_jensenshannon_random(self): |
| eps = 1e-11 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-jensenshannon'] |
| Y_test1 = pdist(X, 'jensenshannon') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_jensenshannon_random_float32(self): |
| eps = 1e-8 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-jensenshannon'] |
| Y_test1 = pdist(X, 'jensenshannon') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_jensenshannon_random_nonC(self): |
| eps = 1e-11 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-jensenshannon'] |
| Y_test2 = pdist(X, 'test_jensenshannon') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_jensenshannon_iris(self): |
| if _is_32bit(): |
| |
| eps = 2.5e-10 |
| else: |
| eps = 1e-12 |
|
|
| X = eo['iris'] |
| Y_right = eo['pdist-jensenshannon-iris'] |
| Y_test1 = pdist(X, 'jensenshannon') |
| assert_allclose(Y_test1, Y_right, atol=eps) |
|
|
| def test_pdist_jensenshannon_iris_float32(self): |
| eps = 1e-06 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-jensenshannon-iris'] |
| Y_test1 = pdist(X, 'jensenshannon') |
| assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_jensenshannon_iris_nonC(self): |
| eps = 5e-5 |
| X = eo['iris'] |
| Y_right = eo['pdist-jensenshannon-iris'] |
| Y_test2 = pdist(X, 'test_jensenshannon') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_matching_mtica1(self): |
| |
| m = wmatching(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wmatching(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| assert_allclose(m, 0.6, rtol=0, atol=1e-10) |
| assert_allclose(m2, 0.6, rtol=0, atol=1e-10) |
|
|
| def test_pdist_matching_mtica2(self): |
| |
| m = wmatching(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wmatching(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| assert_allclose(m, 2 / 3, rtol=0, atol=1e-10) |
| assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10) |
|
|
| def test_pdist_yule_mtica1(self): |
| m = wyule(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wyule(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 2, rtol=0, atol=1e-10) |
| assert_allclose(m2, 2, rtol=0, atol=1e-10) |
|
|
| def test_pdist_yule_mtica2(self): |
| m = wyule(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wyule(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 2, rtol=0, atol=1e-10) |
| assert_allclose(m2, 2, rtol=0, atol=1e-10) |
|
|
| def test_pdist_dice_mtica1(self): |
| m = wdice(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wdice(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 3 / 7, rtol=0, atol=1e-10) |
| assert_allclose(m2, 3 / 7, rtol=0, atol=1e-10) |
|
|
| def test_pdist_dice_mtica2(self): |
| m = wdice(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wdice(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 0.5, rtol=0, atol=1e-10) |
| assert_allclose(m2, 0.5, rtol=0, atol=1e-10) |
|
|
| def test_pdist_sokalsneath_mtica1(self): |
| m = sokalsneath(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 3 / 4, rtol=0, atol=1e-10) |
| assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10) |
|
|
| def test_pdist_sokalsneath_mtica2(self): |
| m = wsokalsneath(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wsokalsneath(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 4 / 5, rtol=0, atol=1e-10) |
| assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10) |
|
|
| def test_pdist_rogerstanimoto_mtica1(self): |
| m = wrogerstanimoto(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wrogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 3 / 4, rtol=0, atol=1e-10) |
| assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10) |
|
|
| def test_pdist_rogerstanimoto_mtica2(self): |
| m = wrogerstanimoto(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wrogerstanimoto(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 4 / 5, rtol=0, atol=1e-10) |
| assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10) |
|
|
| def test_pdist_russellrao_mtica1(self): |
| m = wrussellrao(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wrussellrao(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 3 / 5, rtol=0, atol=1e-10) |
| assert_allclose(m2, 3 / 5, rtol=0, atol=1e-10) |
|
|
| def test_pdist_russellrao_mtica2(self): |
| m = wrussellrao(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wrussellrao(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| if verbose > 2: |
| print(m) |
| assert_allclose(m, 2 / 3, rtol=0, atol=1e-10) |
| assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10) |
|
|
| @pytest.mark.slow |
| def test_pdist_canberra_match(self): |
| D = eo['iris'] |
| if verbose > 2: |
| print(D.shape, D.dtype) |
| eps = 1e-15 |
| y1 = wpdist_no_const(D, "canberra") |
| y2 = wpdist_no_const(D, "test_canberra") |
| assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_canberra_ticket_711(self): |
| |
| |
| eps = 1e-8 |
| pdist_y = wpdist_no_const(([3.3], [3.4]), "canberra") |
| right_y = 0.01492537 |
| assert_allclose(pdist_y, right_y, atol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_custom_notdouble(self): |
| |
| class myclass: |
| pass |
|
|
| def _my_metric(x, y): |
| if not isinstance(x[0], myclass) or not isinstance(y[0], myclass): |
| raise ValueError("Type has been changed") |
| return 1.123 |
| data = np.array([[myclass()], [myclass()]], dtype=object) |
| pdist_y = pdist(data, metric=_my_metric) |
| right_y = 1.123 |
| assert_equal(pdist_y, right_y, verbose=verbose > 2) |
|
|
| def _check_calling_conventions(self, X, metric, eps=1e-07, **kwargs): |
| |
| try: |
| y1 = pdist(X, metric=metric, **kwargs) |
| y2 = pdist(X, metric=eval(metric), **kwargs) |
| y3 = pdist(X, metric="test_" + metric, **kwargs) |
| except Exception as e: |
| e_cls = e.__class__ |
| if verbose > 2: |
| print(e_cls.__name__) |
| print(e) |
| with pytest.raises(e_cls): |
| pdist(X, metric=metric, **kwargs) |
| with pytest.raises(e_cls): |
| pdist(X, metric=eval(metric), **kwargs) |
| with pytest.raises(e_cls): |
| pdist(X, metric="test_" + metric, **kwargs) |
| else: |
| assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2) |
| assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_calling_conventions(self, metric): |
| |
| |
| |
| |
| for eo_name in self.rnd_eo_names: |
| |
| |
| X = eo[eo_name][::5, ::2] |
| if verbose > 2: |
| print("testing: ", metric, " with: ", eo_name) |
| if metric in {'dice', 'yule', 'matching', |
| 'rogerstanimoto', 'russellrao', 'sokalmichener', |
| 'sokalsneath', |
| 'kulczynski1'} and 'bool' not in eo_name: |
| |
| continue |
| self._check_calling_conventions(X, metric) |
|
|
| |
| if metric == "seuclidean": |
| V = np.var(X.astype(np.float64), axis=0, ddof=1) |
| self._check_calling_conventions(X, metric, V=V) |
| elif metric == "mahalanobis": |
| V = np.atleast_2d(np.cov(X.astype(np.float64).T)) |
| VI = np.array(np.linalg.inv(V).T) |
| self._check_calling_conventions(X, metric, VI=VI) |
|
|
| def test_pdist_dtype_equivalence(self, metric): |
| |
| eps = 1e-07 |
| tests = [(eo['random-bool-data'], self.valid_upcasts['bool']), |
| (eo['random-uint-data'], self.valid_upcasts['uint']), |
| (eo['random-int-data'], self.valid_upcasts['int']), |
| (eo['random-float32-data'], self.valid_upcasts['float32'])] |
| for test in tests: |
| X1 = test[0][::5, ::2] |
| try: |
| y1 = pdist(X1, metric=metric) |
| except Exception as e: |
| e_cls = e.__class__ |
| if verbose > 2: |
| print(e_cls.__name__) |
| print(e) |
| for new_type in test[1]: |
| X2 = new_type(X1) |
| with pytest.raises(e_cls): |
| pdist(X2, metric=metric) |
| else: |
| for new_type in test[1]: |
| y2 = pdist(new_type(X1), metric=metric) |
| assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2) |
|
|
| @pytest.mark.thread_unsafe |
| def test_pdist_out(self, metric): |
| |
| eps = 1e-15 |
| X = eo['random-float32-data'][::5, ::2] |
| out_size = int((X.shape[0] * (X.shape[0] - 1)) / 2) |
|
|
| kwargs = dict() |
| if metric == 'minkowski': |
| kwargs['p'] = 1.23 |
| out1 = np.empty(out_size, dtype=np.float64) |
| with maybe_deprecated(metric): |
| Y_right = pdist(X, metric, **kwargs) |
| with maybe_deprecated(metric): |
| Y_test1 = pdist(X, metric, out=out1, **kwargs) |
|
|
| |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| |
| assert_(Y_test1 is out1) |
|
|
| |
| out2 = np.empty(out_size + 3, dtype=np.float64) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| pdist(X, metric, out=out2, **kwargs) |
|
|
| |
| out3 = np.empty(2 * out_size, dtype=np.float64)[::2] |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| pdist(X, metric, out=out3, **kwargs) |
|
|
| |
| out5 = np.empty(out_size, dtype=np.int64) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| pdist(X, metric, out=out5, **kwargs) |
|
|
| @pytest.mark.thread_unsafe |
| def test_striding(self, metric): |
| |
| |
| eps = 1e-15 |
| X = eo['random-float32-data'][::5, ::2] |
| X_copy = X.copy() |
|
|
| |
| assert_(not X.flags.c_contiguous) |
| assert_(X_copy.flags.c_contiguous) |
|
|
| kwargs = dict() |
| if metric == 'minkowski': |
| kwargs['p'] = 1.23 |
| with maybe_deprecated(metric): |
| Y1 = pdist(X, metric, **kwargs) |
| with maybe_deprecated(metric): |
| Y2 = pdist(X_copy, metric, **kwargs) |
| |
| assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2) |
|
|
| class TestSomeDistanceFunctions: |
|
|
| def setup_method(self): |
| |
| x = np.array([1.0, 2.0, 3.0]) |
| y = np.array([1.0, 1.0, 5.0]) |
|
|
| self.cases = [(x, y)] |
|
|
| def test_minkowski(self): |
| for x, y in self.cases: |
| dist1 = minkowski(x, y, p=1) |
| assert_almost_equal(dist1, 3.0) |
| dist1p5 = minkowski(x, y, p=1.5) |
| assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3)) |
| dist2 = minkowski(x, y, p=2) |
| assert_almost_equal(dist2, 5.0 ** 0.5) |
| dist0p25 = minkowski(x, y, p=0.25) |
| assert_almost_equal(dist0p25, (1.0 + 2.0 ** 0.25) ** 4) |
|
|
| |
| |
| |
| a = np.array([352, 916]) |
| b = np.array([350, 660]) |
| assert_equal(minkowski(a, b), |
| minkowski(a.astype('uint16'), b.astype('uint16'))) |
|
|
| def test_euclidean(self): |
| for x, y in self.cases: |
| dist = weuclidean(x, y) |
| assert_almost_equal(dist, np.sqrt(5)) |
|
|
| def test_sqeuclidean(self): |
| for x, y in self.cases: |
| dist = wsqeuclidean(x, y) |
| assert_almost_equal(dist, 5.0) |
|
|
| def test_cosine(self): |
| for x, y in self.cases: |
| dist = wcosine(x, y) |
| assert_almost_equal(dist, 1.0 - 18.0 / (np.sqrt(14) * np.sqrt(27))) |
|
|
| def test_cosine_output_dtype(self): |
| |
| assert isinstance(wcorrelation([1, 1], [1, 1], centered=False), float) |
| assert isinstance(wcosine([1, 1], [1, 1]), float) |
|
|
| def test_correlation(self): |
| xm = np.array([-1.0, 0, 1.0]) |
| ym = np.array([-4.0 / 3, -4.0 / 3, 5.0 - 7.0 / 3]) |
| for x, y in self.cases: |
| dist = wcorrelation(x, y) |
| assert_almost_equal(dist, 1.0 - np.dot(xm, ym) / (norm(xm) * norm(ym))) |
|
|
| def test_correlation_positive(self): |
| |
| x = np.array([0., 0., 0., 0., 0., 0., -2., 0., 0., 0., -2., -2., -2., |
| 0., -2., 0., -2., 0., 0., -1., -2., 0., 1., 0., 0., -2., |
| 0., 0., -2., 0., -2., -2., -2., -2., -2., -2., 0.]) |
| y = np.array([1., 1., 1., 1., 1., 1., -1., 1., 1., 1., -1., -1., -1., |
| 1., -1., 1., -1., 1., 1., 0., -1., 1., 2., 1., 1., -1., |
| 1., 1., -1., 1., -1., -1., -1., -1., -1., -1., 1.]) |
| dist = correlation(x, y) |
| assert 0 <= dist <= 10 * np.finfo(np.float64).eps |
|
|
| @pytest.mark.thread_unsafe |
| @pytest.mark.filterwarnings('ignore:Casting complex') |
| @pytest.mark.parametrize("func", [correlation, cosine]) |
| def test_corr_dep_complex(self, func): |
| x = [1+0j, 2+0j] |
| y = [3+0j, 4+0j] |
| with pytest.deprecated_call(match="Complex `u` and `v` are deprecated"): |
| func(x, y) |
|
|
| def test_mahalanobis(self): |
| x = np.array([1.0, 2.0, 3.0]) |
| y = np.array([1.0, 1.0, 5.0]) |
| vi = np.array([[2.0, 1.0, 0.0], [1.0, 2.0, 1.0], [0.0, 1.0, 2.0]]) |
| for x, y in self.cases: |
| dist = mahalanobis(x, y, vi) |
| assert_almost_equal(dist, np.sqrt(6.0)) |
|
|
|
|
| class TestSquareForm: |
| checked_dtypes = [np.float64, np.float32, np.int32, np.int8, bool] |
|
|
| def test_squareform_matrix(self): |
| for dtype in self.checked_dtypes: |
| self.check_squareform_matrix(dtype) |
|
|
| def test_squareform_vector(self): |
| for dtype in self.checked_dtypes: |
| self.check_squareform_vector(dtype) |
|
|
| def check_squareform_matrix(self, dtype): |
| A = np.zeros((0, 0), dtype=dtype) |
| rA = squareform(A) |
| assert_equal(rA.shape, (0,)) |
| assert_equal(rA.dtype, dtype) |
|
|
| A = np.zeros((1, 1), dtype=dtype) |
| rA = squareform(A) |
| assert_equal(rA.shape, (0,)) |
| assert_equal(rA.dtype, dtype) |
|
|
| A = np.array([[0, 4.2], [4.2, 0]], dtype=dtype) |
| rA = squareform(A) |
| assert_equal(rA.shape, (1,)) |
| assert_equal(rA.dtype, dtype) |
| assert_array_equal(rA, np.array([4.2], dtype=dtype)) |
|
|
| def check_squareform_vector(self, dtype): |
| v = np.zeros((0,), dtype=dtype) |
| rv = squareform(v) |
| assert_equal(rv.shape, (1, 1)) |
| assert_equal(rv.dtype, dtype) |
| assert_array_equal(rv, [[0]]) |
|
|
| v = np.array([8.3], dtype=dtype) |
| rv = squareform(v) |
| assert_equal(rv.shape, (2, 2)) |
| assert_equal(rv.dtype, dtype) |
| assert_array_equal(rv, np.array([[0, 8.3], [8.3, 0]], dtype=dtype)) |
|
|
| def test_squareform_multi_matrix(self): |
| for n in range(2, 5): |
| self.check_squareform_multi_matrix(n) |
|
|
| def check_squareform_multi_matrix(self, n): |
| X = np.random.rand(n, 4) |
| Y = wpdist_no_const(X) |
| assert_equal(len(Y.shape), 1) |
| A = squareform(Y) |
| Yr = squareform(A) |
| s = A.shape |
| k = 0 |
| if verbose >= 3: |
| print(A.shape, Y.shape, Yr.shape) |
| assert_equal(len(s), 2) |
| assert_equal(len(Yr.shape), 1) |
| assert_equal(s[0], s[1]) |
| for i in range(0, s[0]): |
| for j in range(i + 1, s[1]): |
| if i != j: |
| assert_equal(A[i, j], Y[k]) |
| k += 1 |
| else: |
| assert_equal(A[i, j], 0) |
|
|
|
|
| class TestNumObsY: |
|
|
| def test_num_obs_y_multi_matrix(self): |
| for n in range(2, 10): |
| X = np.random.rand(n, 4) |
| Y = wpdist_no_const(X) |
| assert_equal(num_obs_y(Y), n) |
|
|
| def test_num_obs_y_1(self): |
| |
| |
| with pytest.raises(ValueError): |
| self.check_y(1) |
|
|
| def test_num_obs_y_2(self): |
| |
| |
| assert_(self.check_y(2)) |
|
|
| def test_num_obs_y_3(self): |
| assert_(self.check_y(3)) |
|
|
| def test_num_obs_y_4(self): |
| assert_(self.check_y(4)) |
|
|
| def test_num_obs_y_5_10(self): |
| for i in range(5, 16): |
| self.minit(i) |
|
|
| def test_num_obs_y_2_100(self): |
| |
| |
| a = set() |
| for n in range(2, 16): |
| a.add(n * (n - 1) / 2) |
| for i in range(5, 105): |
| if i not in a: |
| with pytest.raises(ValueError): |
| self.bad_y(i) |
|
|
| def minit(self, n): |
| assert_(self.check_y(n)) |
|
|
| def bad_y(self, n): |
| y = np.random.rand(n) |
| return num_obs_y(y) |
|
|
| def check_y(self, n): |
| return num_obs_y(self.make_y(n)) == n |
|
|
| def make_y(self, n): |
| return np.random.rand((n * (n - 1)) // 2) |
|
|
|
|
| class TestNumObsDM: |
|
|
| def test_num_obs_dm_multi_matrix(self): |
| for n in range(1, 10): |
| X = np.random.rand(n, 4) |
| Y = wpdist_no_const(X) |
| A = squareform(Y) |
| if verbose >= 3: |
| print(A.shape, Y.shape) |
| assert_equal(num_obs_dm(A), n) |
|
|
| def test_num_obs_dm_0(self): |
| |
| assert_(self.check_D(0)) |
|
|
| def test_num_obs_dm_1(self): |
| |
| assert_(self.check_D(1)) |
|
|
| def test_num_obs_dm_2(self): |
| assert_(self.check_D(2)) |
|
|
| def test_num_obs_dm_3(self): |
| assert_(self.check_D(2)) |
|
|
| def test_num_obs_dm_4(self): |
| assert_(self.check_D(4)) |
|
|
| def check_D(self, n): |
| return num_obs_dm(self.make_D(n)) == n |
|
|
| def make_D(self, n): |
| return np.random.rand(n, n) |
|
|
|
|
| def is_valid_dm_throw(D): |
| return is_valid_dm(D, throw=True) |
|
|
|
|
| class TestIsValidDM: |
|
|
| def test_is_valid_dm_improper_shape_1D_E(self): |
| D = np.zeros((5,), dtype=np.float64) |
| with pytest.raises(ValueError): |
| is_valid_dm_throw(D) |
|
|
| def test_is_valid_dm_improper_shape_1D_F(self): |
| D = np.zeros((5,), dtype=np.float64) |
| assert_equal(is_valid_dm(D), False) |
|
|
| def test_is_valid_dm_improper_shape_3D_E(self): |
| D = np.zeros((3, 3, 3), dtype=np.float64) |
| with pytest.raises(ValueError): |
| is_valid_dm_throw(D) |
|
|
| def test_is_valid_dm_improper_shape_3D_F(self): |
| D = np.zeros((3, 3, 3), dtype=np.float64) |
| assert_equal(is_valid_dm(D), False) |
|
|
| def test_is_valid_dm_nonzero_diagonal_E(self): |
| y = np.random.rand(10) |
| D = squareform(y) |
| for i in range(0, 5): |
| D[i, i] = 2.0 |
| with pytest.raises(ValueError): |
| is_valid_dm_throw(D) |
|
|
| def test_is_valid_dm_nonzero_diagonal_F(self): |
| y = np.random.rand(10) |
| D = squareform(y) |
| for i in range(0, 5): |
| D[i, i] = 2.0 |
| assert_equal(is_valid_dm(D), False) |
|
|
| def test_is_valid_dm_asymmetric_E(self): |
| y = np.random.rand(10) |
| D = squareform(y) |
| D[1, 3] = D[3, 1] + 1 |
| with pytest.raises(ValueError): |
| is_valid_dm_throw(D) |
|
|
| def test_is_valid_dm_asymmetric_F(self): |
| y = np.random.rand(10) |
| D = squareform(y) |
| D[1, 3] = D[3, 1] + 1 |
| assert_equal(is_valid_dm(D), False) |
|
|
| def test_is_valid_dm_correct_1_by_1(self): |
| D = np.zeros((1, 1), dtype=np.float64) |
| assert_equal(is_valid_dm(D), True) |
|
|
| def test_is_valid_dm_correct_2_by_2(self): |
| y = np.random.rand(1) |
| D = squareform(y) |
| assert_equal(is_valid_dm(D), True) |
|
|
| def test_is_valid_dm_correct_3_by_3(self): |
| y = np.random.rand(3) |
| D = squareform(y) |
| assert_equal(is_valid_dm(D), True) |
|
|
| def test_is_valid_dm_correct_4_by_4(self): |
| y = np.random.rand(6) |
| D = squareform(y) |
| assert_equal(is_valid_dm(D), True) |
|
|
| def test_is_valid_dm_correct_5_by_5(self): |
| y = np.random.rand(10) |
| D = squareform(y) |
| assert_equal(is_valid_dm(D), True) |
|
|
|
|
| def is_valid_y_throw(y): |
| return is_valid_y(y, throw=True) |
|
|
|
|
| class TestIsValidY: |
| |
| |
| |
|
|
| def test_is_valid_y_improper_shape_2D_E(self): |
| y = np.zeros((3, 3,), dtype=np.float64) |
| with pytest.raises(ValueError): |
| is_valid_y_throw(y) |
|
|
| def test_is_valid_y_improper_shape_2D_F(self): |
| y = np.zeros((3, 3,), dtype=np.float64) |
| assert_equal(is_valid_y(y), False) |
|
|
| def test_is_valid_y_improper_shape_3D_E(self): |
| y = np.zeros((3, 3, 3), dtype=np.float64) |
| with pytest.raises(ValueError): |
| is_valid_y_throw(y) |
|
|
| def test_is_valid_y_improper_shape_3D_F(self): |
| y = np.zeros((3, 3, 3), dtype=np.float64) |
| assert_equal(is_valid_y(y), False) |
|
|
| def test_is_valid_y_correct_2_by_2(self): |
| y = self.correct_n_by_n(2) |
| assert_equal(is_valid_y(y), True) |
|
|
| def test_is_valid_y_correct_3_by_3(self): |
| y = self.correct_n_by_n(3) |
| assert_equal(is_valid_y(y), True) |
|
|
| def test_is_valid_y_correct_4_by_4(self): |
| y = self.correct_n_by_n(4) |
| assert_equal(is_valid_y(y), True) |
|
|
| def test_is_valid_y_correct_5_by_5(self): |
| y = self.correct_n_by_n(5) |
| assert_equal(is_valid_y(y), True) |
|
|
| def test_is_valid_y_2_100(self): |
| a = set() |
| for n in range(2, 16): |
| a.add(n * (n - 1) / 2) |
| for i in range(5, 105): |
| if i not in a: |
| with pytest.raises(ValueError): |
| self.bad_y(i) |
|
|
| def bad_y(self, n): |
| y = np.random.rand(n) |
| return is_valid_y(y, throw=True) |
|
|
| def correct_n_by_n(self, n): |
| y = np.random.rand((n * (n - 1)) // 2) |
| return y |
|
|
|
|
| @pytest.mark.parametrize("p", [-10.0, -0.5, 0.0]) |
| def test_bad_p(p): |
| |
| with pytest.raises(ValueError): |
| minkowski([1, 2], [3, 4], p) |
| with pytest.raises(ValueError): |
| minkowski([1, 2], [3, 4], p, [1, 1]) |
|
|
|
|
| def test_sokalsneath_all_false(): |
| |
| with pytest.raises(ValueError): |
| sokalsneath([False, False, False], [False, False, False]) |
|
|
|
|
| def test_canberra(): |
| |
| assert_equal(wcanberra([1, 2, 3], [2, 4, 6]), 1) |
| assert_equal(wcanberra([1, 1, 0, 0], [1, 0, 1, 0]), 2) |
|
|
|
|
| def test_braycurtis(): |
| |
| assert_almost_equal(wbraycurtis([1, 2, 3], [2, 4, 6]), 1. / 3, decimal=15) |
| assert_almost_equal(wbraycurtis([1, 1, 0, 0], [1, 0, 1, 0]), 0.5, decimal=15) |
|
|
|
|
| def test_euclideans(): |
| |
| x1 = np.array([1, 1, 1]) |
| x2 = np.array([0, 0, 0]) |
|
|
| |
| assert_almost_equal(wsqeuclidean(x1, x2), 3.0, decimal=14) |
| assert_almost_equal(weuclidean(x1, x2), np.sqrt(3), decimal=14) |
|
|
| |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| weuclidean(x1[np.newaxis, :], x2[np.newaxis, :]), np.sqrt(3) |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| wsqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :]) |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| wsqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis]) |
|
|
| |
| x = np.arange(4).reshape(2, 2) |
| with pytest.raises(ValueError): |
| weuclidean(x, x) |
| with pytest.raises(ValueError): |
| wsqeuclidean(x, x) |
|
|
| |
| rs = np.random.RandomState(1234567890) |
| x = rs.rand(10) |
| y = rs.rand(10) |
| d1 = weuclidean(x, y) |
| d2 = wsqeuclidean(x, y) |
| assert_almost_equal(d1**2, d2, decimal=14) |
|
|
|
|
| def test_hamming_unequal_length(): |
| |
| x = [0, 0, 1] |
| y = [1, 0, 1, 0] |
| |
| with pytest.raises(ValueError): |
| whamming(x, y) |
|
|
|
|
| def test_hamming_unequal_length_with_w(): |
| u = [0, 0, 1] |
| v = [0, 0, 1] |
| w = [1, 0, 1, 0] |
| msg = "'w' should have the same length as 'u' and 'v'." |
| with pytest.raises(ValueError, match=msg): |
| whamming(u, v, w) |
|
|
|
|
| def test_hamming_string_array(): |
| |
| a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam', |
| 'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam', |
| 'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'], |
| dtype='|S4') |
| b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam', |
| 'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs', |
| 'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'], |
| dtype='|S4') |
| desired = 0.45 |
| assert_allclose(whamming(a, b), desired) |
|
|
|
|
| def test_minkowski_w(): |
| |
| arr_in = np.array([[83.33333333, 100., 83.33333333, 100., 36., |
| 60., 90., 150., 24., 48.], |
| [83.33333333, 100., 83.33333333, 100., 36., |
| 60., 90., 150., 24., 48.]]) |
| p0 = pdist(arr_in, metric='minkowski', p=1, w=None) |
| c0 = cdist(arr_in, arr_in, metric='minkowski', p=1, w=None) |
| p1 = pdist(arr_in, metric='minkowski', p=1) |
| c1 = cdist(arr_in, arr_in, metric='minkowski', p=1) |
|
|
| assert_allclose(p0, p1, rtol=1e-15) |
| assert_allclose(c0, c1, rtol=1e-15) |
|
|
|
|
| def test_sqeuclidean_dtypes(): |
| |
| |
| |
| x = [1, 2, 3] |
| y = [4, 5, 6] |
|
|
| for dtype in [np.int8, np.int16, np.int32, np.int64]: |
| d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype)) |
| assert_(np.issubdtype(d.dtype, np.floating)) |
|
|
| for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]: |
| umax = np.iinfo(dtype).max |
| d1 = wsqeuclidean([0], np.asarray([umax], dtype=dtype)) |
| d2 = wsqeuclidean(np.asarray([umax], dtype=dtype), [0]) |
|
|
| assert_equal(d1, d2) |
| assert_equal(d1, np.float64(umax)**2) |
|
|
| dtypes = [np.float32, np.float64, np.complex64, np.complex128] |
| for dtype in ['float16', 'float128']: |
| |
| |
| if hasattr(np, dtype): |
| dtypes.append(getattr(np, dtype)) |
|
|
| for dtype in dtypes: |
| d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype)) |
| assert_equal(d.dtype, dtype) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_sokalmichener(): |
| |
| p = [True, True, False] |
| q = [True, False, True] |
| x = [int(b) for b in p] |
| y = [int(b) for b in q] |
| with pytest.deprecated_call(): |
| dist1 = sokalmichener(p, q) |
| with pytest.deprecated_call(): |
| dist2 = sokalmichener(x, y) |
| |
| assert_equal(dist1, dist2) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_sokalmichener_with_weight(): |
| |
| |
| |
| ntf = 0 * 1 + 0 * 0.2 |
| nft = 0 * 1 + 1 * 0.2 |
| ntt = 1 * 1 + 0 * 0.2 |
| nff = 0 * 1 + 0 * 0.2 |
| expected = 2 * (nft + ntf) / (ntt + nff + 2 * (nft + ntf)) |
| assert_almost_equal(expected, 0.2857143) |
| with pytest.deprecated_call(): |
| actual = sokalmichener([1, 0], [1, 1], w=[1, 0.2]) |
| assert_almost_equal(expected, actual) |
|
|
| a1 = [False, False, True, True, True, False, False, True, True, True, True, |
| True, True, False, True, False, False, False, True, True] |
| a2 = [True, True, True, False, False, True, True, True, False, True, |
| True, True, True, True, False, False, False, True, True, True] |
|
|
| for w in [0.05, 0.1, 1.0, 20.0]: |
| with pytest.deprecated_call(): |
| assert_almost_equal(sokalmichener(a2, a1, [w]), 0.6666666666666666) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_modifies_input(metric): |
| |
| X1 = np.asarray([[1., 2., 3.], |
| [1.2, 2.3, 3.4], |
| [2.2, 2.3, 4.4], |
| [22.2, 23.3, 44.4]]) |
| X1_copy = X1.copy() |
| with maybe_deprecated(metric): |
| cdist(X1, X1, metric) |
| with maybe_deprecated(metric): |
| pdist(X1, metric) |
| assert_array_equal(X1, X1_copy) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_Xdist_deprecated_args(metric): |
| |
| X1 = np.asarray([[1., 2., 3.], |
| [1.2, 2.3, 3.4], |
| [2.2, 2.3, 4.4], |
| [22.2, 23.3, 44.4]]) |
|
|
| with pytest.raises(TypeError): |
| cdist(X1, X1, metric, 2.) |
|
|
| with pytest.raises(TypeError): |
| pdist(X1, metric, 2.) |
|
|
| for arg in ["p", "V", "VI"]: |
| kwargs = {arg: "foo"} |
|
|
| if ((arg == "V" and metric == "seuclidean") |
| or (arg == "VI" and metric == "mahalanobis") |
| or (arg == "p" and metric == "minkowski")): |
| continue |
|
|
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| cdist(X1, X1, metric, **kwargs) |
|
|
| with pytest.raises(TypeError): |
| with maybe_deprecated(metric): |
| pdist(X1, metric, **kwargs) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_Xdist_non_negative_weights(metric): |
| X = eo['random-float32-data'][::5, ::2] |
| w = np.ones(X.shape[1]) |
| w[::5] = -w[::5] |
|
|
| if metric in ['seuclidean', 'mahalanobis', 'jensenshannon']: |
| pytest.skip("not applicable") |
|
|
| for m in [metric, eval(metric), "test_" + metric]: |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| pdist(X, m, w=w) |
| with pytest.raises(ValueError): |
| with maybe_deprecated(metric): |
| cdist(X, X, m, w=w) |
|
|
|
|
| def test__validate_vector(): |
| x = [1, 2, 3] |
| y = _validate_vector(x) |
| assert_array_equal(y, x) |
|
|
| y = _validate_vector(x, dtype=np.float64) |
| assert_array_equal(y, x) |
| assert_equal(y.dtype, np.float64) |
|
|
| x = [1] |
| y = _validate_vector(x) |
| assert_equal(y.ndim, 1) |
| assert_equal(y, x) |
|
|
| x = 1 |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| _validate_vector(x) |
|
|
| x = np.arange(5).reshape(1, -1, 1) |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| _validate_vector(x) |
|
|
| x = [[1, 2], [3, 4]] |
| with pytest.raises(ValueError, match="Input vector should be 1-D"): |
| _validate_vector(x) |
|
|
| def test_yule_all_same(): |
| |
| x = np.ones((2, 6), dtype=bool) |
| d = wyule(x[0], x[0]) |
| assert d == 0.0 |
|
|
| d = pdist(x, 'yule') |
| assert_equal(d, [0.0]) |
|
|
| d = cdist(x[:1], x[:1], 'yule') |
| assert_equal(d, [[0.0]]) |
|
|
|
|
| def test_jensenshannon(): |
| assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [0.0, 1.0, 0.0], 2.0), |
| 1.0) |
| assert_almost_equal(jensenshannon([1.0, 0.0], [0.5, 0.5]), |
| 0.46450140402245893) |
| assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0]), 0.0) |
|
|
| assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0), |
| [0.0, 0.0]) |
| assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1), |
| [0.0649045]) |
| assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0, |
| keepdims=True), [[0.0, 0.0]]) |
| assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1, |
| keepdims=True), [[0.0649045]]) |
|
|
| a = np.array([[1, 2, 3, 4], |
| [5, 6, 7, 8], |
| [9, 10, 11, 12]]) |
| b = np.array([[13, 14, 15, 16], |
| [17, 18, 19, 20], |
| [21, 22, 23, 24]]) |
|
|
| assert_almost_equal(jensenshannon(a, b, axis=0), |
| [0.1954288, 0.1447697, 0.1138377, 0.0927636]) |
| assert_almost_equal(jensenshannon(a, b, axis=1), |
| [0.1402339, 0.0399106, 0.0201815]) |
|
|
|
|
| def test_gh_17703(): |
| arr_1 = np.array([1, 0, 0]) |
| arr_2 = np.array([2, 0, 0]) |
| expected = dice(arr_1, arr_2) |
| actual = pdist([arr_1, arr_2], metric='dice') |
| assert_allclose(actual, expected) |
| actual = cdist(np.atleast_2d(arr_1), |
| np.atleast_2d(arr_2), metric='dice') |
| assert_allclose(actual, expected) |
|
|
|
|
| @pytest.mark.thread_unsafe |
| def test_immutable_input(metric): |
| if metric in ("jensenshannon", "mahalanobis", "seuclidean"): |
| pytest.skip("not applicable") |
| x = np.arange(10, dtype=np.float64) |
| x.setflags(write=False) |
| with maybe_deprecated(metric): |
| getattr(scipy.spatial.distance, metric)(x, x, w=x) |
|
|
|
|
| class TestJaccard: |
|
|
| def test_pdist_jaccard_random(self): |
| eps = 1e-8 |
| X = eo['pdist-boolean-inp'] |
| Y_right = eo['pdist-jaccard'] |
| Y_test1 = wpdist(X, 'jaccard') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_jaccard_random_float32(self): |
| eps = 1e-8 |
| X = np.float32(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-jaccard'] |
| Y_test1 = wpdist(X, 'jaccard') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_jaccard_random_nonC(self): |
| eps = 1e-8 |
| X = eo['pdist-boolean-inp'] |
| Y_right = eo['pdist-jaccard'] |
| Y_test2 = wpdist(X, 'test_jaccard') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_djaccard_random(self): |
| eps = 1e-8 |
| X = np.float64(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-jaccard'] |
| Y_test1 = wpdist(X, 'jaccard') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_djaccard_random_float32(self): |
| eps = 1e-8 |
| X = np.float32(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-jaccard'] |
| Y_test1 = wpdist(X, 'jaccard') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_djaccard_allzeros(self): |
| eps = 1e-15 |
| Y = pdist(np.zeros((5, 3)), 'jaccard') |
| assert_allclose(np.zeros(10), Y, rtol=eps) |
|
|
| def test_pdist_djaccard_random_nonC(self): |
| eps = 1e-8 |
| X = np.float64(eo['pdist-boolean-inp']) |
| Y_right = eo['pdist-jaccard'] |
| Y_test2 = wpdist(X, 'test_jaccard') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_djaccard_allzeros_nonC(self): |
| eps = 1e-15 |
| Y = pdist(np.zeros((5, 3)), 'test_jaccard') |
| assert_allclose(np.zeros(10), Y, rtol=eps) |
|
|
| def test_pdist_jaccard_mtica1(self): |
| m = wjaccard(np.array([1, 0, 1, 1, 0]), |
| np.array([1, 1, 0, 1, 1])) |
| m2 = wjaccard(np.array([1, 0, 1, 1, 0], dtype=bool), |
| np.array([1, 1, 0, 1, 1], dtype=bool)) |
| assert_allclose(m, 0.6, rtol=0, atol=1e-10) |
| assert_allclose(m2, 0.6, rtol=0, atol=1e-10) |
|
|
| def test_pdist_jaccard_mtica2(self): |
| m = wjaccard(np.array([1, 0, 1]), |
| np.array([1, 1, 0])) |
| m2 = wjaccard(np.array([1, 0, 1], dtype=bool), |
| np.array([1, 1, 0], dtype=bool)) |
| assert_allclose(m, 2 / 3, rtol=0, atol=1e-10) |
| assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10) |
|
|
| def test_non_01_input(self): |
| |
| |
| x = np.array([-10, 2.5, 0]) |
| y = np.array([ 2, -5, 2]) |
| eps = np.finfo(float).eps |
| assert_allclose(jaccard(x, y), 1/3, rtol=eps) |
| assert_allclose(cdist([x], [y], 'jaccard'), [[1/3]]) |
| assert_allclose(pdist([x, y], 'jaccard'), [1/3]) |
|
|
|
|
| class TestChebyshev: |
|
|
| def test_pdist_chebyshev_random(self): |
| eps = 1e-8 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-chebyshev'] |
| Y_test1 = pdist(X, 'chebyshev') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_chebyshev_random_float32(self): |
| eps = 1e-7 |
| X = np.float32(eo['pdist-double-inp']) |
| Y_right = eo['pdist-chebyshev'] |
| Y_test1 = pdist(X, 'chebyshev') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_chebyshev_random_nonC(self): |
| eps = 1e-8 |
| X = eo['pdist-double-inp'] |
| Y_right = eo['pdist-chebyshev'] |
| Y_test2 = pdist(X, 'test_chebyshev') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_pdist_chebyshev_iris(self): |
| eps = 1e-14 |
| X = eo['iris'] |
| Y_right = eo['pdist-chebyshev-iris'] |
| Y_test1 = pdist(X, 'chebyshev') |
| assert_allclose(Y_test1, Y_right, rtol=eps) |
|
|
| def test_pdist_chebyshev_iris_float32(self): |
| eps = 1e-5 |
| X = np.float32(eo['iris']) |
| Y_right = eo['pdist-chebyshev-iris'] |
| Y_test1 = pdist(X, 'chebyshev') |
| assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2) |
|
|
| def test_pdist_chebyshev_iris_nonC(self): |
| eps = 1e-14 |
| X = eo['iris'] |
| Y_right = eo['pdist-chebyshev-iris'] |
| Y_test2 = pdist(X, 'test_chebyshev') |
| assert_allclose(Y_test2, Y_right, rtol=eps) |
|
|
| def test_weighted(self): |
| |
| |
| x = [1, 2, 3] |
| y = [6, 5, 4] |
| w = [0, 1, 5] |
| assert_equal(chebyshev(x, y, w), 3) |
| assert_equal(pdist([x, y], 'chebyshev', w=w), [3]) |
| assert_equal(cdist([x], [y], 'chebyshev', w=w), [[3]]) |
|
|
| def test_zero_weight(self): |
| |
| x = [1, 2, 3] |
| y = [6, 5, 4] |
| w = [0, 0, 0] |
| assert_equal(chebyshev(x, y, w), 0) |
| assert_equal(pdist([x, y], 'chebyshev', w=w), [0]) |
| assert_equal(cdist([x], [y], 'chebyshev', w=w), [[0]]) |
|
|