| """ Test functions for stats module |
| |
| WRITTEN BY LOUIS LUANGKESORN <lluang@yahoo.com> FOR THE STATS MODULE |
| BASED ON WILKINSON'S STATISTICS QUIZ |
| https://www.stanford.edu/~clint/bench/wilk.txt |
| |
| Additional tests by a host of SciPy developers. |
| """ |
| import os |
| import re |
| import warnings |
| from collections import namedtuple |
| from itertools import product |
| import hypothesis.extra.numpy as npst |
| import hypothesis |
| import contextlib |
|
|
| from numpy.testing import (assert_, assert_equal, |
| assert_almost_equal, assert_array_almost_equal, |
| assert_array_equal, assert_approx_equal, |
| assert_allclose, suppress_warnings, |
| assert_array_less) |
| import pytest |
| from pytest import raises as assert_raises |
| import numpy.ma.testutils as mat |
| from numpy import array, arange, float32, power |
| import numpy as np |
|
|
| import scipy.stats as stats |
| import scipy.stats.mstats as mstats |
| import scipy.stats._mstats_basic as mstats_basic |
| from scipy.stats._ksstats import kolmogn |
| from scipy.special._testutils import FuncData |
| from scipy.special import binom |
| from scipy import optimize |
| from .common_tests import check_named_results |
| from scipy.stats._axis_nan_policy import (_broadcast_concatenate, SmallSampleWarning, |
| too_small_nd_omit, too_small_nd_not_omit, |
| too_small_1d_omit, too_small_1d_not_omit) |
| from scipy.stats._stats_py import (_permutation_distribution_t, _chk_asarray, _moment, |
| LinregressResult, _xp_mean, _xp_var, _SimpleChi2) |
| from scipy._lib._util import AxisError |
| from scipy.conftest import array_api_compatible, skip_xp_invalid_arg |
| from scipy._lib._array_api import (array_namespace, xp_copy, is_numpy, |
| is_torch, xp_size, SCIPY_ARRAY_API) |
| from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal |
|
|
| skip_xp_backends = pytest.mark.skip_xp_backends |
|
|
|
|
| """ Numbers in docstrings beginning with 'W' refer to the section numbers |
| and headings found in the STATISTICS QUIZ of Leland Wilkinson. These are |
| considered to be essential functionality. True testing and |
| evaluation of a statistics package requires use of the |
| NIST Statistical test data. See McCoullough(1999) Assessing The Reliability |
| of Statistical Software for a test methodology and its |
| implementation in testing SAS, SPSS, and S-Plus |
| """ |
|
|
| |
| |
| |
| |
| |
| X = array([1,2,3,4,5,6,7,8,9], float) |
| ZERO = array([0,0,0,0,0,0,0,0,0], float) |
| BIG = array([99999991,99999992,99999993,99999994,99999995,99999996,99999997, |
| 99999998,99999999], float) |
| LITTLE = array([0.99999991,0.99999992,0.99999993,0.99999994,0.99999995,0.99999996, |
| 0.99999997,0.99999998,0.99999999], float) |
| HUGE = array([1e+12,2e+12,3e+12,4e+12,5e+12,6e+12,7e+12,8e+12,9e+12], float) |
| TINY = array([1e-12,2e-12,3e-12,4e-12,5e-12,6e-12,7e-12,8e-12,9e-12], float) |
| ROUND = array([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5], float) |
|
|
|
|
| @array_api_compatible |
| @skip_xp_backends('jax.numpy', reason="JAX doesn't allow item assignment.") |
| @skip_xp_backends('array_api_strict', |
| reason=("`array_api_strict.where` `fillvalue` doesn't " |
| "accept Python floats. See data-apis/array-api#807.") |
| ) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| class TestTrimmedStats: |
| |
| dprec = np.finfo(np.float64).precision |
|
|
| def test_tmean(self, xp): |
| x = xp.asarray(X.tolist()) |
|
|
| y = stats.tmean(x, (2, 8), (True, True)) |
| xp_assert_close(y, xp.asarray(5.0)) |
|
|
| y1 = stats.tmean(x, limits=(2, 8), inclusive=(False, False)) |
| y2 = stats.tmean(x, limits=None) |
| xp_assert_close(y1, y2) |
|
|
| x_2d = xp.reshape(xp.arange(63.), (9, 7)) |
| y = stats.tmean(x_2d, axis=None) |
| xp_assert_close(y, xp.mean(x_2d)) |
|
|
| y = stats.tmean(x_2d, axis=0) |
| xp_assert_close(y, xp.mean(x_2d, axis=0)) |
|
|
| y = stats.tmean(x_2d, axis=1) |
| xp_assert_close(y, xp.mean(x_2d, axis=1)) |
|
|
| y = stats.tmean(x_2d, limits=(2, 61), axis=None) |
| xp_assert_close(y, xp.asarray(31.5)) |
|
|
| y = stats.tmean(x_2d, limits=(2, 21), axis=0) |
| y_true = [14, 11.5, 9, 10, 11, 12, 13] |
| xp_assert_close(y, xp.asarray(y_true)) |
|
|
| y = stats.tmean(x_2d, limits=(2, 21), inclusive=(True, False), axis=0) |
| y_true = [10.5, 11.5, 9, 10, 11, 12, 13] |
| xp_assert_close(y, xp.asarray(y_true)) |
|
|
| x_2d_with_nan = xp_copy(x_2d) |
| x_2d_with_nan[-1, -3:] = xp.nan |
| y = stats.tmean(x_2d_with_nan, limits=(1, 13), axis=0) |
| y_true = [7, 4.5, 5.5, 6.5, xp.nan, xp.nan, xp.nan] |
| xp_assert_close(y, xp.asarray(y_true)) |
|
|
| y = stats.tmean(x_2d, limits=(2, 21), axis=1) |
| y_true = [4, 10, 17, 21, xp.nan, xp.nan, xp.nan, xp.nan, xp.nan] |
| xp_assert_close(y, xp.asarray(y_true)) |
|
|
| y = stats.tmean(x_2d, limits=(2, 21), |
| inclusive=(False, True), axis=1) |
| y_true = [4.5, 10, 17, 21, xp.nan, xp.nan, xp.nan, xp.nan, xp.nan] |
| xp_assert_close(y, xp.asarray(y_true)) |
|
|
| @skip_xp_backends('cupy', |
| reason="cupy/cupy#8391") |
| def test_tvar(self, xp): |
| x = xp.asarray(X.tolist()) |
| xp_test = array_namespace(x) |
|
|
| y = stats.tvar(x, limits=(2, 8), inclusive=(True, True)) |
| xp_assert_close(y, xp.asarray(4.6666666666666661)) |
|
|
| y = stats.tvar(x, limits=None) |
| xp_assert_close(y, xp_test.var(x, correction=1)) |
|
|
| x_2d = xp.reshape(xp.arange(63.), (9, 7)) |
| y = stats.tvar(x_2d, axis=None) |
| xp_assert_close(y, xp_test.var(x_2d, correction=1)) |
|
|
| y = stats.tvar(x_2d, axis=0) |
| xp_assert_close(y, xp.full((7,), 367.5)) |
|
|
| y = stats.tvar(x_2d, axis=1) |
| xp_assert_close(y, xp.full((9,), 4.66666667)) |
|
|
| |
| y = stats.tvar(x_2d, limits=(1, 5), axis=1, inclusive=(True, True)) |
| xp_assert_close(y[0], xp.asarray(2.5)) |
|
|
| |
| y = stats.tvar(x_2d, limits=(0, 6), axis=1, inclusive=(True, True)) |
| xp_assert_close(y[0], xp.asarray(4.666666666666667)) |
| xp_assert_equal(y[1], xp.asarray(xp.nan)) |
|
|
| def test_tstd(self, xp): |
| x = xp.asarray(X.tolist()) |
| xp_test = array_namespace(x) |
|
|
| y = stats.tstd(x, (2, 8), (True, True)) |
| xp_assert_close(y, xp.asarray(2.1602468994692865)) |
|
|
| y = stats.tstd(x, limits=None) |
| xp_assert_close(y, xp_test.std(x, correction=1)) |
|
|
| def test_tmin(self, xp): |
| x = xp.arange(10) |
| xp_assert_equal(stats.tmin(x), xp.asarray(0)) |
| xp_assert_equal(stats.tmin(x, lowerlimit=0), xp.asarray(0)) |
| xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), xp.asarray(1)) |
|
|
| x = xp.reshape(x, (5, 2)) |
| xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), |
| xp.asarray([2, 1])) |
| xp_assert_equal(stats.tmin(x, axis=1), xp.asarray([0, 2, 4, 6, 8])) |
| xp_assert_equal(stats.tmin(x, axis=None), xp.asarray(0)) |
|
|
| x = xp.arange(10.) |
| x[9] = xp.nan |
| xp_assert_equal(stats.tmin(x), xp.asarray(xp.nan)) |
|
|
| |
| |
| x = xp.arange(16).reshape(4, 4) |
| res = stats.tmin(x, lowerlimit=4, axis=1) |
| xp_assert_equal(res, xp.asarray([np.nan, 4, 8, 12])) |
|
|
| @skip_xp_backends(np_only=True, |
| reason="Only NumPy arrays support scalar input/`nan_policy`.") |
| def test_tmin_scalar_and_nanpolicy(self, xp): |
| assert_equal(stats.tmin(4), 4) |
|
|
| x = np.arange(10.) |
| x[9] = np.nan |
| with suppress_warnings() as sup: |
| sup.record(RuntimeWarning, "invalid value*") |
| assert_equal(stats.tmin(x, nan_policy='omit'), 0.) |
| msg = "The input contains nan values" |
| with assert_raises(ValueError, match=msg): |
| stats.tmin(x, nan_policy='raise') |
| msg = "nan_policy must be one of..." |
| with assert_raises(ValueError, match=msg): |
| stats.tmin(x, nan_policy='foobar') |
|
|
| def test_tmax(self, xp): |
| x = xp.arange(10) |
| xp_assert_equal(stats.tmax(x), xp.asarray(9)) |
| xp_assert_equal(stats.tmax(x, upperlimit=9), xp.asarray(9)) |
| xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), xp.asarray(8)) |
|
|
| x = xp.reshape(x, (5, 2)) |
| xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), |
| xp.asarray([8, 7])) |
| xp_assert_equal(stats.tmax(x, axis=1), xp.asarray([1, 3, 5, 7, 9])) |
| xp_assert_equal(stats.tmax(x, axis=None), xp.asarray(9)) |
|
|
| x = xp.arange(10.) |
| x[9] = xp.nan |
| xp_assert_equal(stats.tmax(x), xp.asarray(xp.nan)) |
|
|
| |
| |
| with suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "All-NaN slice encountered") |
| x = xp.reshape(xp.arange(16), (4, 4)) |
| res = stats.tmax(x, upperlimit=11, axis=1) |
| xp_assert_equal(res, xp.asarray([3, 7, 11, np.nan])) |
|
|
| @skip_xp_backends(np_only=True, |
| reason="Only NumPy arrays support scalar input/`nan_policy`.") |
| def test_tax_scalar_and_nanpolicy(self, xp): |
| assert_equal(stats.tmax(4), 4) |
|
|
| x = np.arange(10.) |
| x[6] = np.nan |
| with suppress_warnings() as sup: |
| sup.record(RuntimeWarning, "invalid value*") |
| assert_equal(stats.tmax(x, nan_policy='omit'), 9.) |
| msg = "The input contains nan values" |
| with assert_raises(ValueError, match=msg): |
| stats.tmax(x, nan_policy='raise') |
| msg = "nan_policy must be one of..." |
| with assert_raises(ValueError, match=msg): |
| stats.tmax(x, nan_policy='foobar') |
|
|
| def test_tsem(self, xp): |
| x = xp.asarray(X.tolist()) |
| xp_test = array_namespace(x) |
|
|
| y = stats.tsem(x, limits=(3, 8), inclusive=(False, True)) |
| y_ref = xp.asarray([4., 5., 6., 7., 8.]) |
| xp_assert_close(y, xp_test.std(y_ref, correction=1) / xp_size(y_ref)**0.5) |
| xp_assert_close(stats.tsem(x, limits=[-1, 10]), stats.tsem(x, limits=None)) |
|
|
|
|
| class TestPearsonrWilkinson: |
| """ W.II.D. Compute a correlation matrix on all the variables. |
| |
| All the correlations, except for ZERO and MISS, should be exactly 1. |
| ZERO and MISS should have undefined or missing correlations with the |
| other variables. The same should go for SPEARMAN correlations, if |
| your program has them. |
| """ |
|
|
| def test_pXX(self): |
| y = stats.pearsonr(X,X) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pXBIG(self): |
| y = stats.pearsonr(X,BIG) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pXLITTLE(self): |
| y = stats.pearsonr(X,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pXHUGE(self): |
| y = stats.pearsonr(X,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pXTINY(self): |
| y = stats.pearsonr(X,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pXROUND(self): |
| y = stats.pearsonr(X,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pBIGBIG(self): |
| y = stats.pearsonr(BIG,BIG) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pBIGLITTLE(self): |
| y = stats.pearsonr(BIG,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pBIGHUGE(self): |
| y = stats.pearsonr(BIG,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pBIGTINY(self): |
| y = stats.pearsonr(BIG,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pBIGROUND(self): |
| y = stats.pearsonr(BIG,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pLITTLELITTLE(self): |
| y = stats.pearsonr(LITTLE,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pLITTLEHUGE(self): |
| y = stats.pearsonr(LITTLE,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pLITTLETINY(self): |
| y = stats.pearsonr(LITTLE,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pLITTLEROUND(self): |
| y = stats.pearsonr(LITTLE,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pHUGEHUGE(self): |
| y = stats.pearsonr(HUGE,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pHUGETINY(self): |
| y = stats.pearsonr(HUGE,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pHUGEROUND(self): |
| y = stats.pearsonr(HUGE,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pTINYTINY(self): |
| y = stats.pearsonr(TINY,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pTINYROUND(self): |
| y = stats.pearsonr(TINY,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_pROUNDROUND(self): |
| y = stats.pearsonr(ROUND,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @skip_xp_backends(cpu_only=True) |
| class TestPearsonr: |
| @skip_xp_backends(np_only=True) |
| def test_pearsonr_result_attributes(self): |
| res = stats.pearsonr(X, X) |
| attributes = ('correlation', 'pvalue') |
| check_named_results(res, attributes) |
| assert_equal(res.correlation, res.statistic) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_r_almost_exactly_pos1(self, xp): |
| a = xp.arange(3.0) |
| r, prob = stats.pearsonr(a, a) |
|
|
| xp_assert_close(r, xp.asarray(1.0), atol=1e-15) |
| |
| |
| xp_assert_close(prob, xp.asarray(0.0), atol=np.sqrt(2*np.spacing(1.0))) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_r_almost_exactly_neg1(self, xp): |
| a = xp.arange(3.0) |
| r, prob = stats.pearsonr(a, -a) |
|
|
| xp_assert_close(r, xp.asarray(-1.0), atol=1e-15) |
| |
| |
| xp_assert_close(prob, xp.asarray(0.0), atol=np.sqrt(2*np.spacing(1.0))) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_basic(self, xp): |
| |
| |
| a = xp.asarray([-1, 0, 1]) |
| b = xp.asarray([0, 0, 3]) |
| r, prob = stats.pearsonr(a, b) |
| xp_assert_close(r, xp.asarray(3**0.5/2)) |
| xp_assert_close(prob, xp.asarray(1/3)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment', |
| ) |
| def test_constant_input(self, xp): |
| |
| |
| msg = "An input array is constant" |
| with pytest.warns(stats.ConstantInputWarning, match=msg): |
| x = xp.asarray([0.667, 0.667, 0.667]) |
| y = xp.asarray([0.123, 0.456, 0.789]) |
| r, p = stats.pearsonr(x, y) |
| xp_assert_close(r, xp.asarray(xp.nan)) |
| xp_assert_close(p, xp.asarray(xp.nan)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment', |
| ) |
| @pytest.mark.parametrize('dtype', ['float32', 'float64']) |
| def test_near_constant_input(self, xp, dtype): |
| npdtype = getattr(np, dtype) |
| dtype = getattr(xp, dtype) |
| |
| x = xp.asarray([2, 2, 2 + np.spacing(2, dtype=npdtype)], dtype=dtype) |
| y = xp.asarray([3, 3, 3 + 6*np.spacing(3, dtype=npdtype)], dtype=dtype) |
| msg = "An input array is nearly constant; the computed" |
| with pytest.warns(stats.NearConstantInputWarning, match=msg): |
| |
| |
| stats.pearsonr(x, y) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment', |
| ) |
| def test_very_small_input_values(self, xp): |
| |
| |
| |
| x = xp.asarray([0.004434375, 0.004756007, 0.003911996, 0.0038005, 0.003409971], |
| dtype=xp.float64) |
| y = xp.asarray([2.48e-188, 7.41e-181, 4.09e-208, 2.08e-223, 2.66e-245], |
| dtype=xp.float64) |
| r, p = stats.pearsonr(x, y) |
|
|
| |
| |
| xp_assert_close(r, xp.asarray(0.7272930540750450, dtype=xp.float64)) |
| xp_assert_close(p, xp.asarray(0.1637805429533202, dtype=xp.float64)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment', |
| ) |
| def test_very_large_input_values(self, xp): |
| |
| |
| |
| x = 1e90*xp.asarray([0, 0, 0, 1, 1, 1, 1], dtype=xp.float64) |
| y = 1e90*xp.arange(7, dtype=xp.float64) |
|
|
| r, p = stats.pearsonr(x, y) |
|
|
| |
| |
| xp_assert_close(r, xp.asarray(0.8660254037844386, dtype=xp.float64)) |
| xp_assert_close(p, xp.asarray(0.011724811003954638, dtype=xp.float64)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_extremely_large_input_values(self, xp): |
| |
| |
| |
| x = xp.asarray([2.3e200, 4.5e200, 6.7e200, 8e200], dtype=xp.float64) |
| y = xp.asarray([1.2e199, 5.5e200, 3.3e201, 1.0e200], dtype=xp.float64) |
| r, p = stats.pearsonr(x, y) |
|
|
| |
| |
| xp_assert_close(r, xp.asarray(0.351312332103289, dtype=xp.float64)) |
| xp_assert_close(p, xp.asarray(0.648687667896711, dtype=xp.float64)) |
|
|
| @skip_xp_backends('jax.numpy', reason="JAX doesn't allow item assignment.") |
| def test_length_two_pos1(self, xp): |
| |
| |
| x = xp.asarray([1., 2.]) |
| y = xp.asarray([3., 5.]) |
| res = stats.pearsonr(x, y) |
| r, p = res |
| one = xp.asarray(1.) |
| xp_assert_equal(r, one) |
| xp_assert_equal(p, one) |
| low, high = res.confidence_interval() |
| xp_assert_equal(low, -one) |
| xp_assert_equal(high, one) |
|
|
| @skip_xp_backends('jax.numpy', reason="JAX doesn't allow item assignment.") |
| def test_length_two_neg1(self, xp): |
| |
| |
| x = xp.asarray([2., 1.]) |
| y = xp.asarray([3., 5.]) |
| res = stats.pearsonr(x, y) |
| r, p = res |
| one = xp.asarray(1.) |
| xp_assert_equal(r, -one) |
| xp_assert_equal(p, one) |
| low, high = res.confidence_interval() |
| xp_assert_equal(low, -one) |
| xp_assert_equal(high, one) |
|
|
| @skip_xp_backends('jax.numpy', reason="JAX doesn't allow item assignment.") |
| def test_length_two_constant_input(self, xp): |
| |
| |
| |
| msg = "An input array is constant" |
| with pytest.warns(stats.ConstantInputWarning, match=msg): |
| x = xp.asarray([0.667, 0.667]) |
| y = xp.asarray([0.123, 0.456]) |
| r, p = stats.pearsonr(x, y) |
| xp_assert_close(r, xp.asarray(xp.nan)) |
| xp_assert_close(p, xp.asarray(xp.nan)) |
|
|
| |
| |
| |
| |
| |
| |
| |
| @pytest.mark.skip_xp_backends(np_only=True) |
| @pytest.mark.parametrize('alternative, pval, rlow, rhigh, sign', |
| [('two-sided', 0.325800137536, -0.814938968841, 0.99230697523, 1), |
| ('less', 0.8370999312316, -1, 0.985600937290653, 1), |
| ('greater', 0.1629000687684, -0.6785654158217636, 1, 1), |
| ('two-sided', 0.325800137536, -0.992306975236, 0.81493896884, -1), |
| ('less', 0.1629000687684, -1.0, 0.6785654158217636, -1), |
| ('greater', 0.8370999312316, -0.985600937290653, 1.0, -1)]) |
| def test_basic_example(self, alternative, pval, rlow, rhigh, sign): |
| x = [1, 2, 3, 4] |
| y = np.array([0, 1, 0.5, 1]) * sign |
| result = stats.pearsonr(x, y, alternative=alternative) |
| assert_allclose(result.statistic, 0.6741998624632421*sign, rtol=1e-12) |
| assert_allclose(result.pvalue, pval, rtol=1e-6) |
| ci = result.confidence_interval() |
| assert_allclose(ci, (rlow, rhigh), rtol=1e-6) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_negative_correlation_pvalue_gh17795(self, xp): |
| x = xp.arange(10.) |
| y = -x |
| test_greater = stats.pearsonr(x, y, alternative='greater') |
| test_less = stats.pearsonr(x, y, alternative='less') |
| xp_assert_close(test_greater.pvalue, xp.asarray(1.)) |
| xp_assert_close(test_less.pvalue, xp.asarray(0.), atol=1e-20) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_length3_r_exactly_negative_one(self, xp): |
| x = xp.asarray([1., 2., 3.]) |
| y = xp.asarray([5., -4., -13.]) |
| res = stats.pearsonr(x, y) |
|
|
| |
| r, p = res |
| one = xp.asarray(1.0) |
| xp_assert_close(r, -one) |
| xp_assert_close(p, 0*one, atol=1e-7) |
| low, high = res.confidence_interval() |
| xp_assert_equal(low, -one) |
| xp_assert_equal(high, one) |
|
|
| @pytest.mark.skip_xp_backends(np_only=True) |
| def test_input_validation(self): |
| x = [1, 2, 3] |
| y = [4, 5] |
| message = '`x` and `y` must have the same length along `axis`.' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, y) |
|
|
| x = [1] |
| y = [2] |
| message = '`x` and `y` must have length at least 2.' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, y) |
|
|
| x = [-1j, -2j, -3.0j] |
| y = [-1j, -2j, -3.0j] |
| message = 'This function does not support complex data' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, y) |
|
|
| message = "`method` must be an instance of..." |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr([1, 2], [3, 4], method="asymptotic") |
|
|
| res = stats.pearsonr([1, 2], [3, 4]) |
| with pytest.raises(ValueError, match=message): |
| res.confidence_interval(method="exact") |
|
|
| @pytest.mark.fail_slow(10) |
| @pytest.mark.skip_xp_backends(np_only=True) |
| @pytest.mark.xfail_on_32bit("Monte Carlo method needs > a few kB of memory") |
| @pytest.mark.parametrize('alternative', ('less', 'greater', 'two-sided')) |
| @pytest.mark.parametrize('method_name', |
| ('permutation', 'monte_carlo', 'monte_carlo2')) |
| def test_resampling_pvalue(self, method_name, alternative): |
| rng = np.random.default_rng(24623935790378923) |
| size = (2, 100) if method_name == 'permutation' else (2, 1000) |
| x = rng.normal(size=size) |
| y = rng.normal(size=size) |
| methods = {'permutation': stats.PermutationMethod(rng=rng), |
| 'monte_carlo': stats.MonteCarloMethod(rvs=(rng.normal,)*2), |
| 'monte_carlo2': stats.MonteCarloMethod(rng=1294)} |
| method = methods[method_name] |
| res = stats.pearsonr(x, y, alternative=alternative, method=method, axis=-1) |
| ref = stats.pearsonr(x, y, alternative=alternative, axis=-1) |
| assert_allclose(res.statistic, ref.statistic, rtol=1e-15) |
| assert_allclose(res.pvalue, ref.pvalue, rtol=1e-2, atol=1e-3) |
|
|
| if method_name == 'monte_carlo2': |
| method = stats.MonteCarloMethod(rng=1294) |
| res2 = stats.pearsonr(x, y, alternative=alternative, method=method, axis=-1) |
| assert_equal(res2.statistic, res.statistic) |
| assert_equal(res2.pvalue, res.pvalue) |
|
|
| @pytest.mark.skip_xp_backends(np_only=True) |
| @pytest.mark.parametrize('alternative', ('less', 'greater', 'two-sided')) |
| def test_bootstrap_ci(self, alternative): |
| rng = np.random.default_rng(2462935790378923) |
| x = rng.normal(size=(2, 100)) |
| y = rng.normal(size=(2, 100)) |
| res = stats.pearsonr(x, y, alternative=alternative, axis=-1) |
|
|
| |
| rng = np.random.default_rng(724358723498249852) |
| method = stats.BootstrapMethod(random_state=rng) |
| res_ci = res.confidence_interval(method=method) |
| ref_ci = res.confidence_interval() |
| assert_allclose(res_ci, ref_ci, atol=1.5e-2) |
|
|
| |
| rng = np.random.default_rng(724358723498249852) |
| method = stats.BootstrapMethod(rng=rng) |
| res_ci2 = res.confidence_interval(method=method) |
| assert_allclose(res_ci2, res_ci) |
|
|
| @pytest.mark.skip_xp_backends(np_only=True) |
| @pytest.mark.parametrize('axis', [0, 1]) |
| def test_axis01(self, axis, xp): |
| rng = np.random.default_rng(38572345825) |
| shape = (9, 10) |
| x, y = rng.normal(size=(2,) + shape) |
| res = stats.pearsonr(x, y, axis=axis) |
| ci = res.confidence_interval() |
| if axis == 0: |
| x, y = x.T, y.T |
| for i in range(x.shape[0]): |
| res_i = stats.pearsonr(x[i], y[i]) |
| ci_i = res_i.confidence_interval() |
| assert_allclose(res.statistic[i], res_i.statistic) |
| assert_allclose(res.pvalue[i], res_i.pvalue) |
| assert_allclose(ci.low[i], ci_i.low) |
| assert_allclose(ci.high[i], ci_i.high) |
|
|
| @pytest.mark.skip_xp_backends(np_only=True) |
| def test_axis_None(self, xp): |
| rng = np.random.default_rng(38572345825) |
| shape = (9, 10) |
| x, y = rng.normal(size=(2,) + shape) |
| res = stats.pearsonr(x, y, axis=None) |
| ci = res.confidence_interval() |
| ref = stats.pearsonr(x.ravel(), y.ravel()) |
| ci_ref = ref.confidence_interval() |
| assert_allclose(res.statistic, ref.statistic) |
| assert_allclose(res.pvalue, ref.pvalue) |
| assert_allclose(ci, ci_ref) |
|
|
| def test_nd_input_validation(self, xp): |
| x = y = xp.ones((2, 5)) |
| message = '`axis` must be an integer.' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, y, axis=1.5) |
|
|
| message = '`x` and `y` must have the same length along `axis`' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, xp.ones((2, 1)), axis=1) |
|
|
| message = '`x` and `y` must have length at least 2.' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(xp.ones((2, 1)), xp.ones((2, 1)), axis=1) |
|
|
| message = '`x` and `y` must be broadcastable.' |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, xp.ones((3, 5)), axis=1) |
|
|
| message = '`method` must be `None` if arguments are not NumPy arrays.' |
| if not is_numpy(xp): |
| x = xp.arange(10) |
| with pytest.raises(ValueError, match=message): |
| stats.pearsonr(x, x, method=stats.PermutationMethod()) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| def test_nd_special_cases(self, xp): |
| rng = np.random.default_rng(34989235492245) |
| x0 = xp.asarray(rng.random((3, 5))) |
| y0 = xp.asarray(rng.random((3, 5))) |
|
|
| message = 'An input array is constant' |
| with pytest.warns(stats.ConstantInputWarning, match=message): |
| x = xp_copy(x0) |
| x[0, ...] = 1 |
| res = stats.pearsonr(x, y0, axis=1) |
| ci = res.confidence_interval() |
| nan = xp.asarray(xp.nan, dtype=xp.float64) |
| xp_assert_equal(res.statistic[0], nan) |
| xp_assert_equal(res.pvalue[0], nan) |
| xp_assert_equal(ci.low[0], nan) |
| xp_assert_equal(ci.high[0], nan) |
| assert not xp.any(xp.isnan(res.statistic[1:])) |
| assert not xp.any(xp.isnan(res.pvalue[1:])) |
| assert not xp.any(xp.isnan(ci.low[1:])) |
| assert not xp.any(xp.isnan(ci.high[1:])) |
|
|
| message = 'An input array is nearly constant' |
| with pytest.warns(stats.NearConstantInputWarning, match=message): |
| x[0, 0] = 1 + 1e-15 |
| stats.pearsonr(x, y0, axis=1) |
|
|
| |
| x = xp.asarray([[1, 2], [1, 2], [2, 1], [2, 1.]]) |
| y = xp.asarray([[1, 2], [2, 1], [1, 2], [2, 1.]]) |
| ones = xp.ones(4) |
| res = stats.pearsonr(x, y, axis=-1) |
| ci = res.confidence_interval() |
| xp_assert_close(res.statistic, xp.asarray([1, -1, -1, 1.])) |
| xp_assert_close(res.pvalue, ones) |
| xp_assert_close(ci.low, -ones) |
| xp_assert_close(ci.high, ones) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| @pytest.mark.parametrize('axis', [0, 1, None]) |
| @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided']) |
| def test_array_api(self, xp, axis, alternative): |
| x, y = rng.normal(size=(2, 10, 11)) |
| res = stats.pearsonr(xp.asarray(x), xp.asarray(y), |
| axis=axis, alternative=alternative) |
| ref = stats.pearsonr(x, y, axis=axis, alternative=alternative) |
| xp_assert_close(res.statistic, xp.asarray(ref.statistic)) |
| xp_assert_close(res.pvalue, xp.asarray(ref.pvalue)) |
|
|
| res_ci = res.confidence_interval() |
| ref_ci = ref.confidence_interval() |
| xp_assert_close(res_ci.low, xp.asarray(ref_ci.low)) |
| xp_assert_close(res_ci.high, xp.asarray(ref_ci.high)) |
|
|
|
|
| class TestFisherExact: |
| """Some tests to show that fisher_exact() works correctly. |
| |
| Note that in SciPy 0.9.0 this was not working well for large numbers due to |
| inaccuracy of the hypergeom distribution (see #1218). Fixed now. |
| |
| Also note that R and SciPy have different argument formats for their |
| hypergeometric distribution functions. |
| |
| R: |
| > phyper(18999, 99000, 110000, 39000, lower.tail = FALSE) |
| [1] 1.701815e-09 |
| """ |
|
|
| def test_basic(self): |
| fisher_exact = stats.fisher_exact |
|
|
| res = fisher_exact([[14500, 20000], [30000, 40000]])[1] |
| assert_approx_equal(res, 0.01106, significant=4) |
| res = fisher_exact([[100, 2], [1000, 5]])[1] |
| assert_approx_equal(res, 0.1301, significant=4) |
| res = fisher_exact([[2, 7], [8, 2]])[1] |
| assert_approx_equal(res, 0.0230141, significant=6) |
| res = fisher_exact([[5, 1], [10, 10]])[1] |
| assert_approx_equal(res, 0.1973244, significant=6) |
| res = fisher_exact([[5, 15], [20, 20]])[1] |
| assert_approx_equal(res, 0.0958044, significant=6) |
| res = fisher_exact([[5, 16], [20, 25]])[1] |
| assert_approx_equal(res, 0.1725862, significant=6) |
| res = fisher_exact([[10, 5], [10, 1]])[1] |
| assert_approx_equal(res, 0.1973244, significant=6) |
| res = fisher_exact([[5, 0], [1, 4]])[1] |
| assert_approx_equal(res, 0.04761904, significant=6) |
| res = fisher_exact([[0, 1], [3, 2]])[1] |
| assert_approx_equal(res, 1.0) |
| res = fisher_exact([[0, 2], [6, 4]])[1] |
| assert_approx_equal(res, 0.4545454545) |
| res = fisher_exact([[2, 7], [8, 2]]) |
| assert_approx_equal(res[1], 0.0230141, significant=6) |
| assert_approx_equal(res[0], 4.0 / 56) |
|
|
| def test_precise(self): |
| |
| |
| |
| |
| |
| tablist = [ |
| ([[100, 2], [1000, 5]], (2.505583993422285e-001, 1.300759363430016e-001)), |
| ([[2, 7], [8, 2]], (8.586235135736206e-002, 2.301413756522114e-002)), |
| ([[5, 1], [10, 10]], (4.725646047336584e+000, 1.973244147157190e-001)), |
| ([[5, 15], [20, 20]], (3.394396617440852e-001, 9.580440012477637e-002)), |
| ([[5, 16], [20, 25]], (3.960558326183334e-001, 1.725864953812994e-001)), |
| ([[10, 5], [10, 1]], (2.116112781158483e-001, 1.973244147157190e-001)), |
| ([[10, 5], [10, 0]], (0.000000000000000e+000, 6.126482213438734e-002)), |
| ([[5, 0], [1, 4]], (np.inf, 4.761904761904762e-002)), |
| ([[0, 5], [1, 4]], (0.000000000000000e+000, 1.000000000000000e+000)), |
| ([[5, 1], [0, 4]], (np.inf, 4.761904761904758e-002)), |
| ([[0, 1], [3, 2]], (0.000000000000000e+000, 1.000000000000000e+000)) |
| ] |
| for table, res_r in tablist: |
| res = stats.fisher_exact(np.asarray(table)) |
| np.testing.assert_almost_equal(res[1], res_r[1], decimal=11, |
| verbose=True) |
|
|
| def test_gh4130(self): |
| |
| |
| |
| |
| |
| |
| x = [[6, 37], [108, 200]] |
| res = stats.fisher_exact(x) |
| assert_allclose(res[1], 0.005092697748126) |
|
|
| |
| |
| x = [[22, 0], [0, 102]] |
| res = stats.fisher_exact(x) |
| assert_allclose(res[1], 7.175066786244549e-25) |
|
|
| |
| x = [[94, 48], [3577, 16988]] |
| res = stats.fisher_exact(x) |
| assert_allclose(res[1], 2.069356340993818e-37) |
|
|
| def test_gh9231(self): |
| |
| |
| x = [[5829225, 5692693], [5760959, 5760959]] |
| res = stats.fisher_exact(x) |
| assert_allclose(res[1], 0, atol=1e-170) |
|
|
| @pytest.mark.slow |
| def test_large_numbers(self): |
| |
| pvals = [5.56e-11, 2.666e-11, 1.363e-11] |
| for pval, num in zip(pvals, [75, 76, 77]): |
| res = stats.fisher_exact([[17704, 496], [1065, num]])[1] |
| assert_approx_equal(res, pval, significant=4) |
|
|
| res = stats.fisher_exact([[18000, 80000], [20000, 90000]])[1] |
| assert_approx_equal(res, 0.2751, significant=4) |
|
|
| def test_raises(self): |
| |
| message = "The input `table` must have two dimensions." |
| with pytest.raises(ValueError, match=message): |
| stats.fisher_exact(np.arange(6)) |
|
|
| def test_row_or_col_zero(self): |
| tables = ([[0, 0], [5, 10]], |
| [[5, 10], [0, 0]], |
| [[0, 5], [0, 10]], |
| [[5, 0], [10, 0]]) |
| for table in tables: |
| oddsratio, pval = stats.fisher_exact(table) |
| assert_equal(pval, 1.0) |
| assert_equal(oddsratio, np.nan) |
|
|
| def test_less_greater(self): |
| tables = ( |
| |
| [[2, 7], [8, 2]], |
| [[200, 7], [8, 300]], |
| [[28, 21], [6, 1957]], |
| [[190, 800], [200, 900]], |
| |
| |
| [[0, 2], [3, 0]], |
| [[1, 1], [2, 1]], |
| [[2, 0], [1, 2]], |
| [[0, 1], [2, 3]], |
| [[1, 0], [1, 4]], |
| ) |
| pvals = ( |
| |
| [0.018521725952066501, 0.9990149169715733], |
| [1.0, 2.0056578803889148e-122], |
| [1.0, 5.7284374608319831e-44], |
| [0.7416227, 0.2959826], |
| |
| [0.1, 1.0], |
| [0.7, 0.9], |
| [1.0, 0.3], |
| [2./3, 1.0], |
| [1.0, 1./3], |
| ) |
| for table, pval in zip(tables, pvals): |
| res = [] |
| res.append(stats.fisher_exact(table, alternative="less")[1]) |
| res.append(stats.fisher_exact(table, alternative="greater")[1]) |
| assert_allclose(res, pval, atol=0, rtol=1e-7) |
|
|
| def test_gh3014(self): |
| |
| |
| odds, pvalue = stats.fisher_exact([[1, 2], [9, 84419233]]) |
|
|
| @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater']) |
| def test_result(self, alternative): |
| table = np.array([[14500, 20000], [30000, 40000]]) |
| res = stats.fisher_exact(table, alternative=alternative) |
| assert_equal((res.statistic, res.pvalue), res) |
|
|
| def test_input_validation_edge_cases_rxc(self): |
| rng = np.random.default_rng(2345783457834572345) |
| table = np.asarray([[2, 7], [8, 2]]) |
|
|
| message = r"`alternative` must be the default \(None\) unless..." |
| with pytest.raises(ValueError, match=message): |
| method = stats.PermutationMethod(rng=rng) |
| stats.fisher_exact(table, method=method, alternative='less') |
|
|
| message = "...not recognized; if provided, `method` must be an..." |
| with pytest.raises(ValueError, match=message): |
| method = stats.BootstrapMethod(rng=rng) |
| stats.fisher_exact(table, method=method) |
|
|
| message = "If the `method` argument of `fisher_exact` is an..." |
| with pytest.raises(ValueError, match=message): |
| method = stats.MonteCarloMethod(rvs=stats.norm.rvs) |
| stats.fisher_exact(table, method=method) |
|
|
| message = "`table` must have at least one row and one column." |
| with pytest.raises(ValueError, match=message): |
| stats.fisher_exact(np.zeros((0, 1))) |
|
|
| |
| |
| np.testing.assert_equal(stats.fisher_exact([[1, 2, 3]]), (1, 1)) |
| np.testing.assert_equal(stats.fisher_exact([[1], [2], [3]]), (1, 1)) |
| np.testing.assert_equal(stats.fisher_exact(np.zeros((2, 3))), (1, 1)) |
|
|
|
|
|
|
| @pytest.mark.fail_slow(10) |
| @pytest.mark.slow() |
| def test_resampling_2x2(self): |
| rng = np.random.default_rng(2345783457834572345) |
| table = np.asarray([[2, 7], [8, 2]]) |
| ref = stats.fisher_exact(table) |
| ref_pvalue = ref.pvalue |
| ref_stat = stats.random_table(table.sum(axis=1), table.sum(axis=0)).pmf(table) |
|
|
| method = stats.MonteCarloMethod(rng=rng) |
| res = stats.fisher_exact(table, method=method) |
| assert_allclose(res.pvalue, ref_pvalue, atol=0.0025) |
| assert_equal(res.statistic, ref_stat) |
|
|
| method = stats.PermutationMethod(rng=rng) |
| res = stats.fisher_exact(table, method=method) |
| assert_allclose(res.pvalue, ref.pvalue, atol=0.0025) |
| assert_equal(res.statistic, ref_stat) |
|
|
| @pytest.mark.fail_slow(10) |
| @pytest.mark.slow() |
| def test_resampling_rxc(self): |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| table = [[1, 2, 2, 1, 1, 0, 1], |
| [2, 0, 0, 2, 3, 0, 0], |
| [0, 1, 1, 1, 2, 7, 3], |
| [1, 1, 2, 0, 0, 0, 1], |
| [0, 1, 1, 1, 1, 0, 0]] |
| table = np.asarray(table) |
|
|
| ref_pvalue = 0.03928964365533 |
| rng = np.random.default_rng(3928964365533) |
|
|
| method = stats.PermutationMethod(rng=rng) |
| res = stats.fisher_exact(table, method=method) |
| assert_allclose(res.pvalue, ref_pvalue, atol=5e-4) |
|
|
| method = stats.MonteCarloMethod(rng=rng, n_resamples=99999) |
| res = stats.fisher_exact(table, method=method) |
| assert_allclose(res.pvalue, ref_pvalue, atol=5e-4) |
|
|
| @pytest.mark.xslow() |
| def test_resampling_exact_2x2(self): |
| |
| rng = np.random.default_rng(2345783457834572345) |
| method = stats.PermutationMethod(rng=rng) |
|
|
| for a in range(1, 3): |
| for b in range(1, 3): |
| for c in range(1, 3): |
| for d in range(1, 4): |
| table = np.asarray([[a, b], [c, d]]) |
| ref = stats.fisher_exact(table) |
| res = stats.fisher_exact(table, method=method) |
| assert_allclose(res.pvalue, ref.pvalue, atol=1e-14) |
|
|
|
|
| class TestCorrSpearmanr: |
| """ W.II.D. Compute a correlation matrix on all the variables. |
| |
| All the correlations, except for ZERO and MISS, should be exactly 1. |
| ZERO and MISS should have undefined or missing correlations with the |
| other variables. The same should go for SPEARMAN correlations, if |
| your program has them. |
| """ |
|
|
| def test_scalar(self): |
| y = stats.spearmanr(4., 2.) |
| assert_(np.isnan(y).all()) |
|
|
| def test_uneven_lengths(self): |
| assert_raises(ValueError, stats.spearmanr, [1, 2, 1], [8, 9]) |
| assert_raises(ValueError, stats.spearmanr, [1, 2, 1], 8) |
|
|
| def test_uneven_2d_shapes(self): |
| |
| np.random.seed(232324) |
| x = np.random.randn(4, 3) |
| y = np.random.randn(4, 2) |
| assert stats.spearmanr(x, y).statistic.shape == (5, 5) |
| assert stats.spearmanr(x.T, y.T, axis=1).pvalue.shape == (5, 5) |
|
|
| assert_raises(ValueError, stats.spearmanr, x, y, axis=1) |
| assert_raises(ValueError, stats.spearmanr, x.T, y.T) |
|
|
| def test_ndim_too_high(self): |
| np.random.seed(232324) |
| x = np.random.randn(4, 3, 2) |
| assert_raises(ValueError, stats.spearmanr, x) |
| assert_raises(ValueError, stats.spearmanr, x, x) |
| assert_raises(ValueError, stats.spearmanr, x, None, None) |
| |
| assert_allclose(stats.spearmanr(x, x, axis=None), |
| stats.spearmanr(x.flatten(), x.flatten(), axis=0)) |
|
|
| def test_nan_policy(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan)) |
| assert_array_equal(stats.spearmanr(x, x, nan_policy='omit'), |
| (1.0, 0.0)) |
| assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise') |
| assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar') |
|
|
| def test_nan_policy_bug_12458(self): |
| np.random.seed(5) |
| x = np.random.rand(5, 10) |
| k = 6 |
| x[:, k] = np.nan |
| y = np.delete(x, k, axis=1) |
| corx, px = stats.spearmanr(x, nan_policy='omit') |
| cory, py = stats.spearmanr(y) |
| corx = np.delete(np.delete(corx, k, axis=1), k, axis=0) |
| px = np.delete(np.delete(px, k, axis=1), k, axis=0) |
| assert_allclose(corx, cory, atol=1e-14) |
| assert_allclose(px, py, atol=1e-14) |
|
|
| def test_nan_policy_bug_12411(self): |
| np.random.seed(5) |
| m = 5 |
| n = 10 |
| x = np.random.randn(m, n) |
| x[1, 0] = np.nan |
| x[3, -1] = np.nan |
| corr, pvalue = stats.spearmanr(x, axis=1, nan_policy="propagate") |
| res = [[stats.spearmanr(x[i, :], x[j, :]).statistic for i in range(m)] |
| for j in range(m)] |
| assert_allclose(corr, res) |
|
|
| def test_sXX(self): |
| y = stats.spearmanr(X,X) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sXBIG(self): |
| y = stats.spearmanr(X,BIG) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sXLITTLE(self): |
| y = stats.spearmanr(X,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sXHUGE(self): |
| y = stats.spearmanr(X,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sXTINY(self): |
| y = stats.spearmanr(X,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sXROUND(self): |
| y = stats.spearmanr(X,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sBIGBIG(self): |
| y = stats.spearmanr(BIG,BIG) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sBIGLITTLE(self): |
| y = stats.spearmanr(BIG,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sBIGHUGE(self): |
| y = stats.spearmanr(BIG,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sBIGTINY(self): |
| y = stats.spearmanr(BIG,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sBIGROUND(self): |
| y = stats.spearmanr(BIG,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sLITTLELITTLE(self): |
| y = stats.spearmanr(LITTLE,LITTLE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sLITTLEHUGE(self): |
| y = stats.spearmanr(LITTLE,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sLITTLETINY(self): |
| y = stats.spearmanr(LITTLE,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sLITTLEROUND(self): |
| y = stats.spearmanr(LITTLE,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sHUGEHUGE(self): |
| y = stats.spearmanr(HUGE,HUGE) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sHUGETINY(self): |
| y = stats.spearmanr(HUGE,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sHUGEROUND(self): |
| y = stats.spearmanr(HUGE,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sTINYTINY(self): |
| y = stats.spearmanr(TINY,TINY) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sTINYROUND(self): |
| y = stats.spearmanr(TINY,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_sROUNDROUND(self): |
| y = stats.spearmanr(ROUND,ROUND) |
| r = y[0] |
| assert_approx_equal(r,1.0) |
|
|
| def test_spearmanr_result_attributes(self): |
| res = stats.spearmanr(X, X) |
| attributes = ('correlation', 'pvalue') |
| check_named_results(res, attributes) |
| assert_equal(res.correlation, res.statistic) |
|
|
| def test_1d_vs_2d(self): |
| x1 = [1, 2, 3, 4, 5, 6] |
| x2 = [1, 2, 3, 4, 6, 5] |
| res1 = stats.spearmanr(x1, x2) |
| res2 = stats.spearmanr(np.asarray([x1, x2]).T) |
| assert_allclose(res1, res2) |
|
|
| def test_1d_vs_2d_nans(self): |
| |
| for nan_policy in ['propagate', 'omit']: |
| x1 = [1, np.nan, 3, 4, 5, 6] |
| x2 = [1, 2, 3, 4, 6, np.nan] |
| res1 = stats.spearmanr(x1, x2, nan_policy=nan_policy) |
| res2 = stats.spearmanr(np.asarray([x1, x2]).T, nan_policy=nan_policy) |
| assert_allclose(res1, res2) |
|
|
| def test_3cols(self): |
| x1 = np.arange(6) |
| x2 = -x1 |
| x3 = np.array([0, 1, 2, 3, 5, 4]) |
| x = np.asarray([x1, x2, x3]).T |
| actual = stats.spearmanr(x) |
| expected_corr = np.array([[1, -1, 0.94285714], |
| [-1, 1, -0.94285714], |
| [0.94285714, -0.94285714, 1]]) |
| expected_pvalue = np.zeros((3, 3), dtype=float) |
| expected_pvalue[2, 0:2] = 0.00480466472 |
| expected_pvalue[0:2, 2] = 0.00480466472 |
|
|
| assert_allclose(actual.statistic, expected_corr) |
| assert_allclose(actual.pvalue, expected_pvalue) |
|
|
| def test_gh_9103(self): |
| |
| x = np.array([[np.nan, 3.0, 4.0, 5.0, 5.1, 6.0, 9.2], |
| [5.0, np.nan, 4.1, 4.8, 4.9, 5.0, 4.1], |
| [0.5, 4.0, 7.1, 3.8, 8.0, 5.1, 7.6]]).T |
| corr = np.array([[np.nan, np.nan, np.nan], |
| [np.nan, np.nan, np.nan], |
| [np.nan, np.nan, 1.]]) |
| assert_allclose(stats.spearmanr(x, nan_policy='propagate').statistic, |
| corr) |
|
|
| res = stats.spearmanr(x, nan_policy='omit').statistic |
| assert_allclose((res[0][1], res[0][2], res[1][2]), |
| (0.2051957, 0.4857143, -0.4707919), rtol=1e-6) |
|
|
| def test_gh_8111(self): |
| |
| n = 100 |
| np.random.seed(234568) |
| x = np.random.rand(n) |
| m = np.random.rand(n) > 0.7 |
|
|
| |
| a = (x > .5) |
| b = np.array(x) |
| res1 = stats.spearmanr(a, b, nan_policy='omit').statistic |
|
|
| |
| b[m] = np.nan |
| res2 = stats.spearmanr(a, b, nan_policy='omit').statistic |
|
|
| |
| a = a.astype(np.int32) |
| res3 = stats.spearmanr(a, b, nan_policy='omit').statistic |
|
|
| expected = [0.865895477, 0.866100381, 0.866100381] |
| assert_allclose([res1, res2, res3], expected) |
|
|
|
|
| class TestCorrSpearmanr2: |
| """Some further tests of the spearmanr function.""" |
|
|
| def test_spearmanr_vs_r(self): |
| |
| |
| x1 = [1, 2, 3, 4, 5] |
| x2 = [5, 6, 7, 8, 7] |
| expected = (0.82078268166812329, 0.088587005313543798) |
| res = stats.spearmanr(x1, x2) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| def test_empty_arrays(self): |
| assert_equal(stats.spearmanr([], []), (np.nan, np.nan)) |
|
|
| def test_normal_draws(self): |
| np.random.seed(7546) |
| x = np.array([np.random.normal(loc=1, scale=1, size=500), |
| np.random.normal(loc=1, scale=1, size=500)]) |
| corr = [[1.0, 0.3], |
| [0.3, 1.0]] |
| x = np.dot(np.linalg.cholesky(corr), x) |
| expected = (0.28659685838743354, 6.579862219051161e-11) |
| res = stats.spearmanr(x[0], x[1]) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| def test_corr_1(self): |
| assert_approx_equal(stats.spearmanr([1, 1, 2], [1, 1, 2])[0], 1.0) |
|
|
| def test_nan_policies(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_array_equal(stats.spearmanr(x, x), (np.nan, np.nan)) |
| assert_allclose(stats.spearmanr(x, x, nan_policy='omit'), |
| (1.0, 0)) |
| assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='raise') |
| assert_raises(ValueError, stats.spearmanr, x, x, nan_policy='foobar') |
|
|
| def test_unequal_lengths(self): |
| x = np.arange(10.) |
| y = np.arange(20.) |
| assert_raises(ValueError, stats.spearmanr, x, y) |
|
|
| def test_omit_paired_value(self): |
| x1 = [1, 2, 3, 4] |
| x2 = [8, 7, 6, np.nan] |
| res1 = stats.spearmanr(x1, x2, nan_policy='omit') |
| res2 = stats.spearmanr(x1[:3], x2[:3], nan_policy='omit') |
| assert_equal(res1, res2) |
|
|
| def test_gh_issue_6061_windows_overflow(self): |
| x = list(range(2000)) |
| y = list(range(2000)) |
| y[0], y[9] = y[9], y[0] |
| y[10], y[434] = y[434], y[10] |
| y[435], y[1509] = y[1509], y[435] |
| |
| |
| |
| x.append(np.nan) |
| y.append(3.0) |
| assert_almost_equal(stats.spearmanr(x, y, nan_policy='omit')[0], 0.998) |
|
|
| def test_tie0(self): |
| |
| warn_msg = "An input array is constant" |
| with pytest.warns(stats.ConstantInputWarning, match=warn_msg): |
| r, p = stats.spearmanr([2, 2, 2], [2, 2, 2]) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
| r, p = stats.spearmanr([2, 0, 2], [2, 2, 2]) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
| r, p = stats.spearmanr([2, 2, 2], [2, 0, 2]) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
|
|
| def test_tie1(self): |
| |
| x = [1.0, 2.0, 3.0, 4.0] |
| y = [1.0, 2.0, 2.0, 3.0] |
| |
| xr = [1.0, 2.0, 3.0, 4.0] |
| yr = [1.0, 2.5, 2.5, 4.0] |
| |
| |
| sr = stats.spearmanr(x, y) |
| pr = stats.pearsonr(xr, yr) |
| assert_almost_equal(sr, pr) |
|
|
| def test_tie2(self): |
| |
| |
| x1 = [1, 2, 2.5, 2] |
| y1 = [1, 3, 2.5, 4] |
| |
| x2 = [1, 2, 2.5, 2, np.nan] |
| y2 = [1, 3, 2.5, 4, np.nan] |
|
|
| |
| sr1 = stats.spearmanr(x1, y1) |
| sr2 = stats.spearmanr(x2, y2, nan_policy='omit') |
| assert_almost_equal(sr1, sr2) |
|
|
| def test_ties_axis_1(self): |
| z1 = np.array([[1, 1, 1, 1], [1, 2, 3, 4]]) |
| z2 = np.array([[1, 2, 3, 4], [1, 1, 1, 1]]) |
| z3 = np.array([[1, 1, 1, 1], [1, 1, 1, 1]]) |
| warn_msg = "An input array is constant" |
| with pytest.warns(stats.ConstantInputWarning, match=warn_msg): |
| r, p = stats.spearmanr(z1, axis=1) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
| r, p = stats.spearmanr(z2, axis=1) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
| r, p = stats.spearmanr(z3, axis=1) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
|
|
| def test_gh_11111(self): |
| x = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) |
| y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587, |
| 0.0007535430349118562, 0.0002661781514710257, 0, 0, |
| 0.0007835762419683435]) |
| warn_msg = "An input array is constant" |
| with pytest.warns(stats.ConstantInputWarning, match=warn_msg): |
| r, p = stats.spearmanr(x, y) |
| assert_equal(r, np.nan) |
| assert_equal(p, np.nan) |
|
|
| def test_index_error(self): |
| x = np.array([1.0, 7.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) |
| y = np.array([0, 0.009783728115345005, 0, 0, 0.0019759230121848587, |
| 0.0007535430349118562, 0.0002661781514710257, 0, 0, |
| 0.0007835762419683435]) |
| assert_raises(ValueError, stats.spearmanr, x, y, axis=2) |
|
|
| def test_alternative(self): |
| |
|
|
| |
| x1 = [1, 2, 3, 4, 5] |
| x2 = [5, 6, 7, 8, 7] |
|
|
| |
| expected = (0.82078268166812329, 0.088587005313543798) |
|
|
| |
| res = stats.spearmanr(x1, x2, alternative="less") |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], 1 - (expected[1] / 2)) |
|
|
| |
| res = stats.spearmanr(x1, x2, alternative="greater") |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1] / 2) |
|
|
| with pytest.raises(ValueError, match="`alternative` must be 'less'..."): |
| stats.spearmanr(x1, x2, alternative="ekki-ekki") |
|
|
| @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater')) |
| def test_alternative_nan_policy(self, alternative): |
| |
| x1 = [1, 2, 3, 4, 5] |
| x2 = [5, 6, 7, 8, 7] |
| x1nan = x1 + [np.nan] |
| x2nan = x2 + [np.nan] |
|
|
| |
| assert_array_equal(stats.spearmanr(x1nan, x2nan), (np.nan, np.nan)) |
|
|
| |
| res_actual = stats.spearmanr(x1nan, x2nan, nan_policy='omit', |
| alternative=alternative) |
| res_expected = stats.spearmanr(x1, x2, alternative=alternative) |
| assert_allclose(res_actual, res_expected) |
|
|
| |
| message = 'The input contains nan values' |
| with pytest.raises(ValueError, match=message): |
| stats.spearmanr(x1nan, x2nan, nan_policy='raise', |
| alternative=alternative) |
|
|
| |
| message = "nan_policy must be one of..." |
| with pytest.raises(ValueError, match=message): |
| stats.spearmanr(x1nan, x2nan, nan_policy='ekki-ekki', |
| alternative=alternative) |
|
|
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
|
|
| def test_kendalltau(): |
| |
| |
| variants = ('b', 'c') |
|
|
| |
| x = [5, 2, 1, 3, 6, 4, 7, 8] |
| y = [5, 2, 6, 3, 1, 8, 7, 4] |
| |
| |
| expected = (0.0, 1.0) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| x = [0, 5, 2, 1, 3, 6, 4, 7, 8] |
| y = [5, 2, 0, 6, 3, 1, 8, 7, 4] |
| |
| |
| expected = (0.0, 1.0) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| x = [5, 2, 1, 3, 6, 4, 7] |
| y = [5, 2, 6, 3, 1, 7, 4] |
| |
| |
| expected = (-0.14285714286, 0.77261904762) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| x = [2, 1, 3, 6, 4, 7, 8] |
| y = [2, 6, 3, 1, 8, 7, 4] |
| |
| |
| expected = (0.047619047619, 1.0) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| x = np.arange(10) |
| y = np.arange(10) |
| |
| |
| expected = (1.0, 5.511463844797e-07) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| b = y[1] |
| y[1] = y[2] |
| y[2] = b |
| |
| |
| expected = (0.9555555555555556, 5.511463844797e-06) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| b = y[5] |
| y[5] = y[6] |
| y[6] = b |
| |
| |
| expected = (0.9111111111111111, 2.976190476190e-05) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| x = np.arange(10) |
| y = np.arange(10)[::-1] |
| |
| |
| expected = (-1.0, 5.511463844797e-07) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| b = y[1] |
| y[1] = y[2] |
| y[2] = b |
| |
| |
| expected = (-0.9555555555555556, 5.511463844797e-06) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| b = y[5] |
| y[5] = y[6] |
| y[6] = b |
| |
| |
| expected = (-0.9111111111111111, 2.976190476190e-05) |
| for taux in variants: |
| res = stats.kendalltau(x, y, variant=taux) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| |
| |
| x = array([1, 2, 2, 4, 4, 6, 6, 8, 9, 9]) |
| y = array([1, 2, 4, 4, 4, 4, 8, 8, 8, 10]) |
| expected = 0.85895569 |
| assert_approx_equal(stats.kendalltau(x, y, variant='b')[0], expected) |
| expected = 0.825 |
| assert_approx_equal(stats.kendalltau(x, y, variant='c')[0], expected) |
|
|
| |
| y[2] = y[1] |
| assert_raises(ValueError, stats.kendalltau, x, y, method='exact') |
|
|
| |
| assert_raises(ValueError, stats.kendalltau, x, y, method='banana') |
|
|
| |
| assert_raises(ValueError, stats.kendalltau, x, y, variant='rms') |
|
|
| |
| |
| |
| x1 = [12, 2, 1, 12, 2] |
| x2 = [1, 4, 7, 1, 0] |
| expected = (-0.47140452079103173, 0.28274545993277478) |
| res = stats.kendalltau(x1, x2) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| attributes = ('correlation', 'pvalue') |
| for taux in variants: |
| res = stats.kendalltau(x1, x2, variant=taux) |
| check_named_results(res, attributes) |
| assert_equal(res.correlation, res.statistic) |
|
|
| |
| for taux in variants: |
| assert_equal(stats.kendalltau([2, 2, 2], [2, 2, 2], variant=taux), |
| (np.nan, np.nan)) |
| assert_equal(stats.kendalltau([2, 0, 2], [2, 2, 2], variant=taux), |
| (np.nan, np.nan)) |
| assert_equal(stats.kendalltau([2, 2, 2], [2, 0, 2], variant=taux), |
| (np.nan, np.nan)) |
|
|
| |
| assert_equal(stats.kendalltau([], []), (np.nan, np.nan)) |
|
|
| |
| np.random.seed(7546) |
| x = np.array([np.random.normal(loc=1, scale=1, size=500), |
| np.random.normal(loc=1, scale=1, size=500)]) |
| corr = [[1.0, 0.3], |
| [0.3, 1.0]] |
| x = np.dot(np.linalg.cholesky(corr), x) |
| expected = (0.19291382765531062, 1.1337095377742629e-10) |
| res = stats.kendalltau(x[0], x[1]) |
| assert_approx_equal(res[0], expected[0]) |
| assert_approx_equal(res[1], expected[1]) |
|
|
| |
| assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='b')[0], |
| 1.0) |
| assert_approx_equal(stats.kendalltau([1, 1, 2], [1, 1, 2], variant='c')[0], |
| 0.88888888) |
|
|
| |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan)) |
| assert_allclose(stats.kendalltau(x, x, nan_policy='omit'), |
| (1.0, 5.5114638e-6), rtol=1e-06) |
| assert_allclose(stats.kendalltau(x, x, nan_policy='omit', method='asymptotic'), |
| (1.0, 0.00017455009626808976), rtol=1e-06) |
| assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise') |
| assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar') |
|
|
| |
| x = np.arange(10.) |
| y = np.arange(20.) |
| assert_raises(ValueError, stats.kendalltau, x, y) |
|
|
| |
| tau, p_value = stats.kendalltau([], []) |
| assert_equal(np.nan, tau) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.kendalltau([0], [0]) |
| assert_equal(np.nan, tau) |
| assert_equal(np.nan, p_value) |
|
|
| |
| x = np.arange(2000, dtype=float) |
| x = np.ma.masked_greater(x, 1995) |
| y = np.arange(2000, dtype=float) |
| y = np.concatenate((y[1000:], y[:1000])) |
| assert_(np.isfinite(stats.kendalltau(x,y)[1])) |
|
|
|
|
| def test_kendalltau_vs_mstats_basic(): |
| np.random.seed(42) |
| for s in range(2,10): |
| a = [] |
| |
| for i in range(s): |
| a += [i]*i |
| b = list(a) |
| np.random.shuffle(a) |
| np.random.shuffle(b) |
| expected = mstats_basic.kendalltau(a, b) |
| actual = stats.kendalltau(a, b) |
| assert_approx_equal(actual[0], expected[0]) |
| assert_approx_equal(actual[1], expected[1]) |
|
|
|
|
| def test_kendalltau_nan_2nd_arg(): |
| |
| x = [1., 2., 3., 4.] |
| y = [np.nan, 2.4, 3.4, 3.4] |
|
|
| r1 = stats.kendalltau(x, y, nan_policy='omit') |
| r2 = stats.kendalltau(x[1:], y[1:]) |
| assert_allclose(r1.statistic, r2.statistic, atol=1e-15) |
|
|
|
|
| def test_kendalltau_gh18139_overflow(): |
| |
| |
| |
| import random |
| random.seed(6272161) |
| classes = [1, 2, 3, 4, 5, 6, 7] |
| n_samples = 2 * 10 ** 5 |
| x = random.choices(classes, k=n_samples) |
| y = random.choices(classes, k=n_samples) |
| res = stats.kendalltau(x, y) |
| |
| assert_allclose(res.statistic, 0.0011816493905730343) |
| |
| |
| assert_allclose(res.pvalue, 0.4894, atol=2e-3) |
|
|
|
|
| class TestKendallTauAlternative: |
| def test_kendalltau_alternative_asymptotic(self): |
| |
|
|
| |
| x1 = [1, 2, 3, 4, 5] |
| x2 = [5, 6, 7, 8, 7] |
|
|
| |
| expected = stats.kendalltau(x1, x2, alternative="two-sided") |
| assert expected[0] > 0 |
|
|
| |
| res = stats.kendalltau(x1, x2, alternative="less") |
| assert_equal(res[0], expected[0]) |
| assert_allclose(res[1], 1 - (expected[1] / 2)) |
|
|
| |
| res = stats.kendalltau(x1, x2, alternative="greater") |
| assert_equal(res[0], expected[0]) |
| assert_allclose(res[1], expected[1] / 2) |
|
|
| |
| x2.reverse() |
|
|
| |
| expected = stats.kendalltau(x1, x2, alternative="two-sided") |
| assert expected[0] < 0 |
|
|
| |
| res = stats.kendalltau(x1, x2, alternative="greater") |
| assert_equal(res[0], expected[0]) |
| assert_allclose(res[1], 1 - (expected[1] / 2)) |
|
|
| |
| res = stats.kendalltau(x1, x2, alternative="less") |
| assert_equal(res[0], expected[0]) |
| assert_allclose(res[1], expected[1] / 2) |
|
|
| with pytest.raises(ValueError, match="`alternative` must be 'less'..."): |
| stats.kendalltau(x1, x2, alternative="ekki-ekki") |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| alternatives = ('less', 'two-sided', 'greater') |
| p_n1 = [np.nan, np.nan, np.nan] |
| p_n2 = [1, 1, 0.5] |
| p_c0 = [1, 0.3333333333333, 0.1666666666667] |
| p_c1 = [0.9583333333333, 0.3333333333333, 0.1666666666667] |
| p_no_correlation = [0.5916666666667, 1, 0.5916666666667] |
| p_no_correlationb = [0.5475694444444, 1, 0.5475694444444] |
| p_n_lt_171 = [0.9624118165785, 0.1194389329806, 0.0597194664903] |
| p_n_lt_171b = [0.246236925303, 0.4924738506059, 0.755634083327] |
| p_n_lt_171c = [0.9847475308925, 0.03071385306533, 0.01535692653267] |
|
|
| def exact_test(self, x, y, alternative, rev, stat_expected, p_expected): |
| if rev: |
| y = -np.asarray(y) |
| stat_expected *= -1 |
| res = stats.kendalltau(x, y, method='exact', alternative=alternative) |
| res_expected = stat_expected, p_expected |
| assert_allclose(res, res_expected) |
|
|
| case_R_n1 = (list(zip(alternatives, p_n1, [False]*3)) |
| + list(zip(alternatives, reversed(p_n1), [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n1) |
| def test_against_R_n1(self, alternative, p_expected, rev): |
| x, y = [1], [2] |
| stat_expected = np.nan |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_n2 = (list(zip(alternatives, p_n2, [False]*3)) |
| + list(zip(alternatives, reversed(p_n2), [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_n2) |
| def test_against_R_n2(self, alternative, p_expected, rev): |
| x, y = [1, 2], [3, 4] |
| stat_expected = 0.9999999999999998 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_c0 = (list(zip(alternatives, p_c0, [False]*3)) |
| + list(zip(alternatives, reversed(p_c0), [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c0) |
| def test_against_R_c0(self, alternative, p_expected, rev): |
| x, y = [1, 2, 3], [1, 2, 3] |
| stat_expected = 1 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_c1 = (list(zip(alternatives, p_c1, [False]*3)) |
| + list(zip(alternatives, reversed(p_c1), [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_c1) |
| def test_against_R_c1(self, alternative, p_expected, rev): |
| x, y = [1, 2, 3, 4], [1, 2, 4, 3] |
| stat_expected = 0.6666666666666667 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_no_corr = (list(zip(alternatives, p_no_correlation, [False]*3)) |
| + list(zip(alternatives, reversed(p_no_correlation), |
| [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_no_corr) |
| def test_against_R_no_correlation(self, alternative, p_expected, rev): |
| x, y = [1, 2, 3, 4, 5], [1, 5, 4, 2, 3] |
| stat_expected = 0 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_no_cor_b = (list(zip(alternatives, p_no_correlationb, [False]*3)) |
| + list(zip(alternatives, reversed(p_no_correlationb), |
| [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_no_cor_b) |
| def test_against_R_no_correlationb(self, alternative, p_expected, rev): |
| x, y = [1, 2, 3, 4, 5, 6, 7, 8], [8, 6, 1, 3, 2, 5, 4, 7] |
| stat_expected = 0 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_lt_171 = (list(zip(alternatives, p_n_lt_171, [False]*3)) |
| + list(zip(alternatives, reversed(p_n_lt_171), [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171) |
| def test_against_R_lt_171(self, alternative, p_expected, rev): |
| |
| |
| x = [44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1] |
| y = [2.6, 3.1, 2.5, 5.0, 3.6, 4.0, 5.2, 2.8, 3.8] |
| stat_expected = 0.4444444444444445 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_lt_171b = (list(zip(alternatives, p_n_lt_171b, [False]*3)) |
| + list(zip(alternatives, reversed(p_n_lt_171b), |
| [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171b) |
| def test_against_R_lt_171b(self, alternative, p_expected, rev): |
| np.random.seed(0) |
| x = np.random.rand(100) |
| y = np.random.rand(100) |
| stat_expected = -0.04686868686868687 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_R_lt_171c = (list(zip(alternatives, p_n_lt_171c, [False]*3)) |
| + list(zip(alternatives, reversed(p_n_lt_171c), |
| [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, p_expected, rev", case_R_lt_171c) |
| def test_against_R_lt_171c(self, alternative, p_expected, rev): |
| np.random.seed(0) |
| x = np.random.rand(170) |
| y = np.random.rand(170) |
| stat_expected = 0.1115906717716673 |
| self.exact_test(x, y, alternative, rev, stat_expected, p_expected) |
|
|
| case_gt_171 = (list(zip(alternatives, [False]*3)) + |
| list(zip(alternatives, [True]*3))) |
|
|
| @pytest.mark.parametrize("alternative, rev", case_gt_171) |
| def test_gt_171(self, alternative, rev): |
| np.random.seed(0) |
| x = np.random.rand(400) |
| y = np.random.rand(400) |
| res0 = stats.kendalltau(x, y, method='exact', |
| alternative=alternative) |
| res1 = stats.kendalltau(x, y, method='asymptotic', |
| alternative=alternative) |
| assert_equal(res0[0], res1[0]) |
| assert_allclose(res0[1], res1[1], rtol=1e-3) |
|
|
| @pytest.mark.parametrize("method", ('exact', 'asymptotic')) |
| @pytest.mark.parametrize("alternative", ('two-sided', 'less', 'greater')) |
| def test_nan_policy(self, method, alternative): |
| |
| x1 = [1, 2, 3, 4, 5] |
| x2 = [5, 6, 7, 8, 9] |
| x1nan = x1 + [np.nan] |
| x2nan = x2 + [np.nan] |
|
|
| |
| res_actual = stats.kendalltau(x1nan, x2nan, |
| method=method, alternative=alternative) |
| res_expected = (np.nan, np.nan) |
| assert_allclose(res_actual, res_expected) |
|
|
| |
| res_actual = stats.kendalltau(x1nan, x2nan, nan_policy='omit', |
| method=method, alternative=alternative) |
| res_expected = stats.kendalltau(x1, x2, method=method, |
| alternative=alternative) |
| assert_allclose(res_actual, res_expected) |
|
|
| |
| message = 'The input contains nan values' |
| with pytest.raises(ValueError, match=message): |
| stats.kendalltau(x1nan, x2nan, nan_policy='raise', |
| method=method, alternative=alternative) |
|
|
| |
| message = "nan_policy must be one of..." |
| with pytest.raises(ValueError, match=message): |
| stats.kendalltau(x1nan, x2nan, nan_policy='ekki-ekki', |
| method=method, alternative=alternative) |
|
|
|
|
| def test_weightedtau(): |
| x = [12, 2, 1, 12, 2] |
| y = [1, 4, 7, 1, 0] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(x, y, additive=False) |
| assert_approx_equal(tau, -0.62205716951801038) |
| assert_equal(np.nan, p_value) |
| |
| tau, p_value = stats.weightedtau(x, y, weigher=lambda x: 1) |
| assert_approx_equal(tau, -0.47140452079103173) |
| assert_equal(np.nan, p_value) |
|
|
| |
| res = stats.weightedtau(x, y) |
| attributes = ('correlation', 'pvalue') |
| check_named_results(res, attributes) |
| assert_equal(res.correlation, res.statistic) |
|
|
| |
| tau, p_value = stats.weightedtau(x, y, rank=None) |
| assert_approx_equal(tau, -0.4157652301037516) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(y, x, rank=None) |
| assert_approx_equal(tau, -0.7181341329699029) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(x, y, rank=None, additive=False) |
| assert_approx_equal(tau, -0.40644850966246893) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(y, x, rank=None, additive=False) |
| assert_approx_equal(tau, -0.83766582937355172) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(x, y, rank=False) |
| assert_approx_equal(tau, -0.51604397940261848) |
| assert_equal(np.nan, p_value) |
| |
| tau, p_value = stats.weightedtau(x, y, rank=True, weigher=lambda x: 1) |
| assert_approx_equal(tau, -0.47140452079103173) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau(y, x, rank=True, weigher=lambda x: 1) |
| assert_approx_equal(tau, -0.47140452079103173) |
| assert_equal(np.nan, p_value) |
| |
| tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64), y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.int16), y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| tau, p_value = stats.weightedtau(np.asarray(x, dtype=np.float64), |
| np.asarray(y, dtype=np.float64)) |
| assert_approx_equal(tau, -0.56694968153682723) |
| |
| tau, p_value = stats.weightedtau([], []) |
| assert_equal(np.nan, tau) |
| assert_equal(np.nan, p_value) |
| tau, p_value = stats.weightedtau([0], [0]) |
| assert_equal(np.nan, tau) |
| assert_equal(np.nan, p_value) |
| |
| assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1, 2]) |
| assert_raises(ValueError, stats.weightedtau, [0, 1], [0, 1], [0]) |
| |
| x = [12, 2, 1, 12, 2] |
| y = [1, 4, 7, 1, np.nan] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| x = [12, 2, np.nan, 12, 2] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| |
| x = [12.0, 2.0, 1.0, 12.0, 2.0] |
| y = [1.0, 4.0, 7.0, 1.0, np.nan] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| x = [12.0, 2.0, np.nan, 12.0, 2.0] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.56694968153682723) |
| |
| x = [12.0, 2.0, 1.0, 12.0, 1.0] |
| y = [1.0, 4.0, 7.0, 1.0, 1.0] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.6615242347139803) |
| x = [12.0, 2.0, np.nan, 12.0, np.nan] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.6615242347139803) |
| y = [np.nan, 4.0, 7.0, np.nan, np.nan] |
| tau, p_value = stats.weightedtau(x, y) |
| assert_approx_equal(tau, -0.6615242347139803) |
|
|
|
|
| def test_segfault_issue_9710(): |
| |
| |
| |
| stats.weightedtau([1], [1.0]) |
| stats.weightedtau([1], [1.0]) |
| |
| stats.weightedtau([np.nan], [52]) |
|
|
|
|
| def test_kendall_tau_large(): |
| n = 172 |
| |
| x = np.arange(n + 1).astype(float) |
| y = np.arange(n + 1).astype(float) |
| y[-1] = np.nan |
| _, pval = stats.kendalltau(x, y, method='exact', nan_policy='omit') |
| assert_equal(pval, 0.0) |
|
|
|
|
| def test_weightedtau_vs_quadratic(): |
| |
| def wkq(x, y, rank, weigher, add): |
| tot = conc = disc = u = v = 0 |
| for (i, j) in product(range(len(x)), range(len(x))): |
| w = weigher(rank[i]) + weigher(rank[j]) if add \ |
| else weigher(rank[i]) * weigher(rank[j]) |
| tot += w |
| if x[i] == x[j]: |
| u += w |
| if y[i] == y[j]: |
| v += w |
| if x[i] < x[j] and y[i] < y[j] or x[i] > x[j] and y[i] > y[j]: |
| conc += w |
| elif x[i] < x[j] and y[i] > y[j] or x[i] > x[j] and y[i] < y[j]: |
| disc += w |
| return (conc - disc) / np.sqrt(tot - u) / np.sqrt(tot - v) |
|
|
| def weigher(x): |
| return 1. / (x + 1) |
|
|
| np.random.seed(42) |
| for s in range(3,10): |
| a = [] |
| |
| for i in range(s): |
| a += [i]*i |
| b = list(a) |
| np.random.shuffle(a) |
| np.random.shuffle(b) |
| |
| rank = np.arange(len(a), dtype=np.intp) |
| for _ in range(2): |
| for add in [True, False]: |
| expected = wkq(a, b, rank, weigher, add) |
| actual = stats.weightedtau(a, b, rank, weigher, add).statistic |
| assert_approx_equal(expected, actual) |
| |
| np.random.shuffle(rank) |
|
|
|
|
| class TestFindRepeats: |
|
|
| def test_basic(self): |
| a = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 5] |
| message = "`scipy.stats.find_repeats` is deprecated..." |
| with pytest.deprecated_call(match=message): |
| res, nums = stats.find_repeats(a) |
| assert_array_equal(res, [1, 2, 3, 4]) |
| assert_array_equal(nums, [3, 3, 2, 2]) |
|
|
| def test_empty_result(self): |
| |
| for a in [[10, 20, 50, 30, 40], []]: |
| message = "`scipy.stats.find_repeats` is deprecated..." |
| with pytest.deprecated_call(match=message): |
| repeated, counts = stats.find_repeats(a) |
| assert_array_equal(repeated, []) |
| assert_array_equal(counts, []) |
|
|
|
|
| class TestRegression: |
|
|
| def test_one_arg_deprecation(self): |
| x = np.arange(20).reshape((2, 10)) |
| message = "Inference of the two sets..." |
| with pytest.deprecated_call(match=message): |
| stats.linregress(x) |
| stats.linregress(x[0], x[1]) |
|
|
| def test_linregressBIGX(self): |
| |
| result = stats.linregress(X, BIG) |
| assert_almost_equal(result.intercept, 99999990) |
| assert_almost_equal(result.rvalue, 1.0) |
| |
| |
| assert_almost_equal(result.stderr, 0.0) |
| assert_almost_equal(result.intercept_stderr, 0.0) |
|
|
| def test_regressXX(self): |
| |
| |
| |
| |
| result = stats.linregress(X, X) |
| assert_almost_equal(result.intercept, 0.0) |
| assert_almost_equal(result.rvalue, 1.0) |
| |
| assert_almost_equal(result.stderr, 0.0) |
| assert_almost_equal(result.intercept_stderr, 0.0) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def test_regressZEROX(self): |
| |
| |
| |
| |
| result = stats.linregress(X, ZERO) |
| assert_almost_equal(result.intercept, 0.0) |
| assert_almost_equal(result.rvalue, 0.0) |
|
|
| def test_regress_simple(self): |
| |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
|
|
| result = stats.linregress(x, y) |
| lr = LinregressResult |
| assert_(isinstance(result, lr)) |
| assert_almost_equal(result.stderr, 2.3957814497838803e-3) |
|
|
| def test_regress_alternative(self): |
| |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
|
|
| with pytest.raises(ValueError, match="`alternative` must be 'less'..."): |
| stats.linregress(x, y, alternative="ekki-ekki") |
|
|
| res1 = stats.linregress(x, y, alternative="two-sided") |
|
|
| |
| res2 = stats.linregress(x, y, alternative="less") |
| assert_allclose(res2.pvalue, 1 - (res1.pvalue / 2)) |
|
|
| |
| res3 = stats.linregress(x, y, alternative="greater") |
| assert_allclose(res3.pvalue, res1.pvalue / 2) |
|
|
| assert res1.rvalue == res2.rvalue == res3.rvalue |
|
|
| def test_regress_against_R(self): |
| |
| |
| |
| |
| |
| |
|
|
| x = [151, 174, 138, 186, 128, 136, 179, 163, 152, 131] |
| y = [63, 81, 56, 91, 47, 57, 76, 72, 62, 48] |
| res = stats.linregress(x, y, alternative="two-sided") |
| |
| assert_allclose(res.slope, 0.6746104491292) |
| assert_allclose(res.intercept, -38.4550870760770) |
| assert_allclose(res.rvalue, np.sqrt(0.95478224775)) |
| assert_allclose(res.pvalue, 1.16440531074e-06) |
| assert_allclose(res.stderr, 0.0519051424731) |
| assert_allclose(res.intercept_stderr, 8.0490133029927) |
|
|
| |
| |
| @pytest.mark.filterwarnings('ignore::DeprecationWarning') |
| def test_regress_simple_onearg_rows(self): |
| |
| |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
| rows = np.vstack((x, y)) |
|
|
| result = stats.linregress(rows) |
| assert_almost_equal(result.stderr, 2.3957814497838803e-3) |
| assert_almost_equal(result.intercept_stderr, 1.3866936078570702e-1) |
|
|
| |
| |
| @pytest.mark.filterwarnings('ignore::DeprecationWarning') |
| def test_regress_simple_onearg_cols(self): |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
| columns = np.hstack((np.expand_dims(x, 1), np.expand_dims(y, 1))) |
|
|
| result = stats.linregress(columns) |
| assert_almost_equal(result.stderr, 2.3957814497838803e-3) |
| assert_almost_equal(result.intercept_stderr, 1.3866936078570702e-1) |
|
|
| |
| |
| @pytest.mark.filterwarnings('ignore::DeprecationWarning') |
| def test_regress_shape_error(self): |
| |
| |
| assert_raises(ValueError, stats.linregress, np.ones((3, 3))) |
|
|
| def test_linregress(self): |
| |
| x = np.arange(11) |
| y = np.arange(5, 16) |
| y[[(1), (-2)]] -= 1 |
| y[[(0), (-1)]] += 1 |
|
|
| result = stats.linregress(x, y) |
|
|
| |
| |
| |
| |
| def assert_ae(x, y): |
| return assert_almost_equal(x, y, decimal=14) |
| assert_ae(result.slope, 1.0) |
| assert_ae(result.intercept, 5.0) |
| assert_ae(result.rvalue, 0.98229948625750) |
| assert_ae(result.pvalue, 7.45259691e-008) |
| assert_ae(result.stderr, 0.063564172616372733) |
| assert_ae(result.intercept_stderr, 0.37605071654517686) |
|
|
| def test_regress_simple_negative_cor(self): |
| |
| |
| |
| a, n = 1e-71, 100000 |
| x = np.linspace(a, 2 * a, n) |
| y = np.linspace(2 * a, a, n) |
| result = stats.linregress(x, y) |
|
|
| |
| |
| assert_(result.rvalue >= -1) |
| assert_almost_equal(result.rvalue, -1) |
|
|
| |
| assert_(not np.isnan(result.stderr)) |
| assert_(not np.isnan(result.intercept_stderr)) |
|
|
| def test_linregress_result_attributes(self): |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
| result = stats.linregress(x, y) |
|
|
| |
| lr = LinregressResult |
| assert_(isinstance(result, lr)) |
|
|
| |
| attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr') |
| check_named_results(result, attributes) |
| |
| assert 'intercept_stderr' in dir(result) |
|
|
| def test_regress_two_inputs(self): |
| |
| x = np.arange(2) |
| y = np.arange(3, 5) |
| result = stats.linregress(x, y) |
|
|
| |
| assert_almost_equal(result.pvalue, 0.0) |
|
|
| |
| assert_almost_equal(result.stderr, 0.0) |
| assert_almost_equal(result.intercept_stderr, 0.0) |
|
|
| def test_regress_two_inputs_horizontal_line(self): |
| |
| x = np.arange(2) |
| y = np.ones(2) |
| result = stats.linregress(x, y) |
|
|
| |
| assert_almost_equal(result.pvalue, 1.0) |
|
|
| |
| assert_almost_equal(result.stderr, 0.0) |
| assert_almost_equal(result.intercept_stderr, 0.0) |
|
|
| def test_nist_norris(self): |
| |
| |
| |
| x = [ 0.2, 337.4, 118.2, 884.6, 10.1, 226.5, |
| 666.3, 996.3, 448.6, 777.0, 558.2, 0.4, |
| 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, |
| 556.0, 228.1, 995.8, 887.6, 120.2, 0.3, |
| 0.3, 556.8, 339.1, 887.2, 999.0, 779.0, |
| 11.1, 118.3, 229.2, 669.1, 448.9, 0.5] |
|
|
| y = [ 0.1, 338.8, 118.1, 888.0, 9.2, 228.1, |
| 668.5, 998.5, 449.1, 778.9, 559.2, 0.3, |
| 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, |
| 557.7, 228.3, 998.0, 888.8, 119.6, 0.3, |
| 0.6, 557.6, 339.3, 888.0, 998.5, 778.9, |
| 10.2, 117.6, 228.9, 668.4, 449.2, 0.2] |
|
|
| result = stats.linregress(x, y) |
|
|
| assert_almost_equal(result.slope, 1.00211681802045) |
| assert_almost_equal(result.intercept, -0.262323073774029) |
| assert_almost_equal(result.rvalue**2, 0.999993745883712) |
| assert_almost_equal(result.pvalue, 0.0) |
| assert_almost_equal(result.stderr, 0.00042979684820) |
| assert_almost_equal(result.intercept_stderr, 0.23281823430153) |
|
|
| def test_compare_to_polyfit(self): |
| x = np.linspace(0, 100, 100) |
| y = 0.2 * np.linspace(0, 100, 100) + 10 |
| y += np.sin(np.linspace(0, 20, 100)) |
| result = stats.linregress(x, y) |
| poly = np.polyfit(x, y, 1) |
|
|
| |
| |
| assert_almost_equal(result.slope, poly[0]) |
| assert_almost_equal(result.intercept, poly[1]) |
|
|
| def test_empty_input(self): |
| assert_raises(ValueError, stats.linregress, [], []) |
|
|
| def test_nan_input(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
|
|
| with np.errstate(invalid="ignore"): |
| result = stats.linregress(x, x) |
|
|
| |
| lr = LinregressResult |
| assert_(isinstance(result, lr)) |
| assert_array_equal(result, (np.nan,)*5) |
| assert_equal(result.intercept_stderr, np.nan) |
|
|
| def test_identical_x(self): |
| x = np.zeros(10) |
| y = np.random.random(10) |
| msg = "Cannot calculate a linear regression" |
| with assert_raises(ValueError, match=msg): |
| stats.linregress(x, y) |
|
|
|
|
| def test_theilslopes(): |
| |
| slope, intercept, lower, upper = stats.theilslopes([0,1,1]) |
| assert_almost_equal(slope, 0.5) |
| assert_almost_equal(intercept, 0.5) |
|
|
| msg = ("method must be either 'joint' or 'separate'." |
| "'joint_separate' is invalid.") |
| with pytest.raises(ValueError, match=msg): |
| stats.theilslopes([0, 1, 1], method='joint_separate') |
|
|
| slope, intercept, lower, upper = stats.theilslopes([0, 1, 1], |
| method='joint') |
| assert_almost_equal(slope, 0.5) |
| assert_almost_equal(intercept, 0.0) |
|
|
| |
| x = [1, 2, 3, 4, 10, 12, 18] |
| y = [9, 15, 19, 20, 45, 55, 78] |
| slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07, |
| method='separate') |
| assert_almost_equal(slope, 4) |
| assert_almost_equal(intercept, 4.0) |
| assert_almost_equal(upper, 4.38, decimal=2) |
| assert_almost_equal(lower, 3.71, decimal=2) |
|
|
| slope, intercept, lower, upper = stats.theilslopes(y, x, 0.07, |
| method='joint') |
| assert_almost_equal(slope, 4) |
| assert_almost_equal(intercept, 6.0) |
| assert_almost_equal(upper, 4.38, decimal=2) |
| assert_almost_equal(lower, 3.71, decimal=2) |
|
|
|
|
| def test_cumfreq(): |
| x = [1, 4, 2, 1, 3, 1] |
| cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq(x, numbins=4) |
| assert_array_almost_equal(cumfreqs, np.array([3., 4., 5., 6.])) |
| cumfreqs, lowlim, binsize, extrapoints = stats.cumfreq( |
| x, numbins=4, defaultreallimits=(1.5, 5)) |
| assert_(extrapoints == 3) |
|
|
| |
| attributes = ('cumcount', 'lowerlimit', 'binsize', 'extrapoints') |
| res = stats.cumfreq(x, numbins=4, defaultreallimits=(1.5, 5)) |
| check_named_results(res, attributes) |
|
|
|
|
| def test_relfreq(): |
| a = np.array([1, 4, 2, 1, 3, 1]) |
| relfreqs, lowlim, binsize, extrapoints = stats.relfreq(a, numbins=4) |
| assert_array_almost_equal(relfreqs, |
| array([0.5, 0.16666667, 0.16666667, 0.16666667])) |
|
|
| |
| attributes = ('frequency', 'lowerlimit', 'binsize', 'extrapoints') |
| res = stats.relfreq(a, numbins=4) |
| check_named_results(res, attributes) |
|
|
| |
| relfreqs2, lowlim, binsize, extrapoints = stats.relfreq([1, 4, 2, 1, 3, 1], |
| numbins=4) |
| assert_array_almost_equal(relfreqs, relfreqs2) |
|
|
|
|
| class TestScoreatpercentile: |
| def setup_method(self): |
| self.a1 = [3, 4, 5, 10, -3, -5, 6] |
| self.a2 = [3, -6, -2, 8, 7, 4, 2, 1] |
| self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0] |
|
|
| def test_basic(self): |
| x = arange(8) * 0.5 |
| assert_equal(stats.scoreatpercentile(x, 0), 0.) |
| assert_equal(stats.scoreatpercentile(x, 100), 3.5) |
| assert_equal(stats.scoreatpercentile(x, 50), 1.75) |
|
|
| def test_fraction(self): |
| scoreatperc = stats.scoreatpercentile |
|
|
| |
| assert_equal(scoreatperc(list(range(10)), 50), 4.5) |
| assert_equal(scoreatperc(list(range(10)), 50, (2,7)), 4.5) |
| assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8)), 4.5) |
| assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10,100)), 55) |
| assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10)), 5.5) |
|
|
| |
| assert_equal(scoreatperc(list(range(10)), 50, interpolation_method='fraction'), |
| 4.5) |
| assert_equal(scoreatperc(list(range(10)), 50, limit=(2, 7), |
| interpolation_method='fraction'), |
| 4.5) |
| assert_equal(scoreatperc(list(range(100)), 50, limit=(1, 8), |
| interpolation_method='fraction'), |
| 4.5) |
| assert_equal(scoreatperc(np.array([1, 10,100]), 50, (10, 100), |
| interpolation_method='fraction'), |
| 55) |
| assert_equal(scoreatperc(np.array([1, 10,100]), 50, (1,10), |
| interpolation_method='fraction'), |
| 5.5) |
|
|
| def test_lower_higher(self): |
| scoreatperc = stats.scoreatpercentile |
|
|
| |
| assert_equal(scoreatperc(list(range(10)), 50, |
| interpolation_method='lower'), 4) |
| assert_equal(scoreatperc(list(range(10)), 50, |
| interpolation_method='higher'), 5) |
| assert_equal(scoreatperc(list(range(10)), 50, (2,7), |
| interpolation_method='lower'), 4) |
| assert_equal(scoreatperc(list(range(10)), 50, limit=(2,7), |
| interpolation_method='higher'), 5) |
| assert_equal(scoreatperc(list(range(100)), 50, (1,8), |
| interpolation_method='lower'), 4) |
| assert_equal(scoreatperc(list(range(100)), 50, (1,8), |
| interpolation_method='higher'), 5) |
| assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (10, 100), |
| interpolation_method='lower'), 10) |
| assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(10, 100), |
| interpolation_method='higher'), 100) |
| assert_equal(scoreatperc(np.array([1, 10, 100]), 50, (1, 10), |
| interpolation_method='lower'), 1) |
| assert_equal(scoreatperc(np.array([1, 10, 100]), 50, limit=(1, 10), |
| interpolation_method='higher'), 10) |
|
|
| def test_sequence_per(self): |
| x = arange(8) * 0.5 |
| expected = np.array([0, 3.5, 1.75]) |
| res = stats.scoreatpercentile(x, [0, 100, 50]) |
| assert_allclose(res, expected) |
| assert_(isinstance(res, np.ndarray)) |
| |
| assert_allclose(stats.scoreatpercentile(x, np.array([0, 100, 50])), |
| expected) |
| |
| res2 = stats.scoreatpercentile(np.arange(12).reshape((3,4)), |
| np.array([0, 1, 100, 100]), axis=1) |
| expected2 = array([[0, 4, 8], |
| [0.03, 4.03, 8.03], |
| [3, 7, 11], |
| [3, 7, 11]]) |
| assert_allclose(res2, expected2) |
|
|
| def test_axis(self): |
| scoreatperc = stats.scoreatpercentile |
| x = arange(12).reshape(3, 4) |
|
|
| assert_equal(scoreatperc(x, (25, 50, 100)), [2.75, 5.5, 11.0]) |
|
|
| r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]] |
| assert_equal(scoreatperc(x, (25, 50, 100), axis=0), r0) |
|
|
| r1 = [[0.75, 4.75, 8.75], [1.5, 5.5, 9.5], [3, 7, 11]] |
| assert_equal(scoreatperc(x, (25, 50, 100), axis=1), r1) |
|
|
| x = array([[1, 1, 1], |
| [1, 1, 1], |
| [4, 4, 3], |
| [1, 1, 1], |
| [1, 1, 1]]) |
| score = stats.scoreatpercentile(x, 50) |
| assert_equal(score.shape, ()) |
| assert_equal(score, 1.0) |
| score = stats.scoreatpercentile(x, 50, axis=0) |
| assert_equal(score.shape, (3,)) |
| assert_equal(score, [1, 1, 1]) |
|
|
| def test_exception(self): |
| assert_raises(ValueError, stats.scoreatpercentile, [1, 2], 56, |
| interpolation_method='foobar') |
| assert_raises(ValueError, stats.scoreatpercentile, [1], 101) |
| assert_raises(ValueError, stats.scoreatpercentile, [1], -1) |
|
|
| def test_empty(self): |
| assert_equal(stats.scoreatpercentile([], 50), np.nan) |
| assert_equal(stats.scoreatpercentile(np.array([[], []]), 50), np.nan) |
| assert_equal(stats.scoreatpercentile([], [50, 99]), [np.nan, np.nan]) |
|
|
|
|
| class TestMode: |
|
|
| def test_empty(self): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| vals, counts = stats.mode([]) |
| assert_equal(vals, np.array([])) |
| assert_equal(counts, np.array([])) |
|
|
| def test_scalar(self): |
| vals, counts = stats.mode(4.) |
| assert_equal(vals, np.array([4.])) |
| assert_equal(counts, np.array([1])) |
|
|
| def test_basic(self): |
| data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6] |
| vals = stats.mode(data1) |
| assert_equal(vals[0], 6) |
| assert_equal(vals[1], 3) |
|
|
| def test_axes(self): |
| data1 = [10, 10, 30, 40] |
| data2 = [10, 10, 10, 10] |
| data3 = [20, 10, 20, 20] |
| data4 = [30, 30, 30, 30] |
| data5 = [40, 30, 30, 30] |
| arr = np.array([data1, data2, data3, data4, data5]) |
|
|
| vals = stats.mode(arr, axis=None, keepdims=True) |
| assert_equal(vals[0], np.array([[30]])) |
| assert_equal(vals[1], np.array([[8]])) |
|
|
| vals = stats.mode(arr, axis=0, keepdims=True) |
| assert_equal(vals[0], np.array([[10, 10, 30, 30]])) |
| assert_equal(vals[1], np.array([[2, 3, 3, 2]])) |
|
|
| vals = stats.mode(arr, axis=1, keepdims=True) |
| assert_equal(vals[0], np.array([[10], [10], [20], [30], [30]])) |
| assert_equal(vals[1], np.array([[2], [4], [3], [4], [3]])) |
|
|
| @pytest.mark.parametrize('axis', np.arange(-4, 0)) |
| def test_negative_axes_gh_15375(self, axis): |
| np.random.seed(984213899) |
| a = np.random.rand(10, 11, 12, 13) |
| res0 = stats.mode(a, axis=a.ndim+axis) |
| res1 = stats.mode(a, axis=axis) |
| np.testing.assert_array_equal(res0, res1) |
|
|
| def test_mode_result_attributes(self): |
| data1 = [3, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6] |
| data2 = [] |
| actual = stats.mode(data1) |
| attributes = ('mode', 'count') |
| check_named_results(actual, attributes) |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| actual2 = stats.mode(data2) |
| check_named_results(actual2, attributes) |
|
|
| def test_mode_nan(self): |
| data1 = [3, np.nan, 5, 1, 10, 23, 3, 2, 6, 8, 6, 10, 6] |
| actual = stats.mode(data1) |
| assert_equal(actual, (6, 3)) |
|
|
| actual = stats.mode(data1, nan_policy='omit') |
| assert_equal(actual, (6, 3)) |
| assert_raises(ValueError, stats.mode, data1, nan_policy='raise') |
| assert_raises(ValueError, stats.mode, data1, nan_policy='foobar') |
|
|
| @pytest.mark.parametrize("data", [ |
| [3, 5, 1, 1, 3], |
| [3, np.nan, 5, 1, 1, 3], |
| [3, 5, 1], |
| [3, np.nan, 5, 1], |
| ]) |
| @pytest.mark.parametrize('keepdims', [False, True]) |
| def test_smallest_equal(self, data, keepdims): |
| result = stats.mode(data, nan_policy='omit', keepdims=keepdims) |
| if keepdims: |
| assert_equal(result[0][0], 1) |
| else: |
| assert_equal(result[0], 1) |
|
|
| @pytest.mark.parametrize('axis', np.arange(-3, 3)) |
| def test_mode_shape_gh_9955(self, axis, dtype=np.float64): |
| rng = np.random.default_rng(984213899) |
| a = rng.uniform(size=(3, 4, 5)).astype(dtype) |
| res = stats.mode(a, axis=axis, keepdims=False) |
| reference_shape = list(a.shape) |
| reference_shape.pop(axis) |
| np.testing.assert_array_equal(res.mode.shape, reference_shape) |
| np.testing.assert_array_equal(res.count.shape, reference_shape) |
|
|
| def test_nan_policy_propagate_gh_9815(self): |
| |
| |
| a = [2, np.nan, 1, np.nan] |
| res = stats.mode(a) |
| assert np.isnan(res.mode) and res.count == 2 |
|
|
| def test_keepdims(self): |
| |
| a = np.zeros((1, 2, 3, 0)) |
|
|
| res = stats.mode(a, axis=1, keepdims=False) |
| assert res.mode.shape == res.count.shape == (1, 3, 0) |
|
|
| res = stats.mode(a, axis=1, keepdims=True) |
| assert res.mode.shape == res.count.shape == (1, 1, 3, 0) |
|
|
| |
| a = [[1, 3, 3, np.nan], [1, 1, np.nan, 1]] |
|
|
| res = stats.mode(a, axis=1, keepdims=False) |
| assert_array_equal(res.mode, [3, 1]) |
| assert_array_equal(res.count, [2, 3]) |
|
|
| res = stats.mode(a, axis=1, keepdims=True) |
| assert_array_equal(res.mode, [[3], [1]]) |
| assert_array_equal(res.count, [[2], [3]]) |
|
|
| a = np.array(a) |
| res = stats.mode(a, axis=None, keepdims=False) |
| ref = stats.mode(a.ravel(), keepdims=False) |
| assert_array_equal(res, ref) |
| assert res.mode.shape == ref.mode.shape == () |
|
|
| res = stats.mode(a, axis=None, keepdims=True) |
| ref = stats.mode(a.ravel(), keepdims=True) |
| assert_equal(res.mode.ravel(), ref.mode.ravel()) |
| assert res.mode.shape == (1, 1) |
| assert_equal(res.count.ravel(), ref.count.ravel()) |
| assert res.count.shape == (1, 1) |
|
|
| |
| a = [[1, np.nan, np.nan, np.nan, 1], |
| [np.nan, np.nan, np.nan, np.nan, 2], |
| [1, 2, np.nan, 5, 5]] |
|
|
| res = stats.mode(a, axis=1, keepdims=False, nan_policy='omit') |
| assert_array_equal(res.mode, [1, 2, 5]) |
| assert_array_equal(res.count, [2, 1, 2]) |
|
|
| res = stats.mode(a, axis=1, keepdims=True, nan_policy='omit') |
| assert_array_equal(res.mode, [[1], [2], [5]]) |
| assert_array_equal(res.count, [[2], [1], [2]]) |
|
|
| a = np.array(a) |
| res = stats.mode(a, axis=None, keepdims=False, nan_policy='omit') |
| ref = stats.mode(a.ravel(), keepdims=False, nan_policy='omit') |
| assert_array_equal(res, ref) |
| assert res.mode.shape == ref.mode.shape == () |
|
|
| res = stats.mode(a, axis=None, keepdims=True, nan_policy='omit') |
| ref = stats.mode(a.ravel(), keepdims=True, nan_policy='omit') |
| assert_equal(res.mode.ravel(), ref.mode.ravel()) |
| assert res.mode.shape == (1, 1) |
| assert_equal(res.count.ravel(), ref.count.ravel()) |
| assert res.count.shape == (1, 1) |
|
|
| @pytest.mark.parametrize("nan_policy", ['propagate', 'omit']) |
| def test_gh16955(self, nan_policy): |
| |
| shape = (4, 3) |
| data = np.ones(shape) |
| data[0, 0] = np.nan |
| res = stats.mode(a=data, axis=1, keepdims=False, nan_policy=nan_policy) |
| assert_array_equal(res.mode, [1, 1, 1, 1]) |
| assert_array_equal(res.count, [2, 3, 3, 3]) |
|
|
| |
| |
| my_dtype = np.dtype([('asdf', np.uint8), ('qwer', np.float64, (3,))]) |
| test = np.zeros(10, dtype=my_dtype) |
| message = "Argument `a` is not....|An argument has dtype..." |
| with pytest.raises(TypeError, match=message): |
| stats.mode(test, nan_policy=nan_policy) |
|
|
| def test_gh9955(self): |
| |
| |
| |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.mode([]) |
| ref = (np.nan, 0) |
| assert_equal(res, ref) |
|
|
| with pytest.warns(SmallSampleWarning, match=too_small_1d_omit): |
| res = stats.mode([np.nan], nan_policy='omit') |
| assert_equal(res, ref) |
|
|
| a = [[10., 20., 20.], [np.nan, np.nan, np.nan]] |
| with pytest.warns(SmallSampleWarning, match=too_small_nd_omit): |
| res = stats.mode(a, axis=1, nan_policy='omit') |
| ref = ([20, np.nan], [2, 0]) |
| assert_equal(res, ref) |
|
|
| res = stats.mode(a, axis=1, nan_policy='propagate') |
| ref = ([20, np.nan], [2, 3]) |
| assert_equal(res, ref) |
|
|
| z = np.array([[], []]) |
| with pytest.warns(SmallSampleWarning, match=too_small_nd_not_omit): |
| res = stats.mode(z, axis=1) |
| ref = ([np.nan, np.nan], [0, 0]) |
| assert_equal(res, ref) |
|
|
| @pytest.mark.filterwarnings('ignore::RuntimeWarning') |
| @pytest.mark.parametrize('z', [np.empty((0, 1, 2)), np.empty((1, 1, 2))]) |
| def test_gh17214(self, z): |
| if z.size == 0: |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.mode(z, axis=None, keepdims=True) |
| else: |
| res = stats.mode(z, axis=None, keepdims=True) |
| ref = np.mean(z, axis=None, keepdims=True) |
| assert res[0].shape == res[1].shape == ref.shape == (1, 1, 1) |
|
|
| def test_raise_non_numeric_gh18254(self): |
| message = ("...only boolean and numerical dtypes..." if SCIPY_ARRAY_API |
| else "Argument `a` is not recognized as numeric.") |
|
|
| class ArrLike: |
| def __init__(self, x): |
| self._x = x |
|
|
| def __array__(self, dtype=None, copy=None): |
| return self._x.astype(object) |
|
|
| with pytest.raises(TypeError, match=message): |
| stats.mode(ArrLike(np.arange(3))) |
| with pytest.raises(TypeError, match=message): |
| stats.mode(np.arange(3, dtype=object)) |
|
|
|
|
| @array_api_compatible |
| class TestSEM: |
|
|
| testcase = [1., 2., 3., 4.] |
| scalar_testcase = 4. |
|
|
| def test_sem_scalar(self, xp): |
| |
| |
|
|
| |
| |
| scalar_testcase = xp.asarray(self.scalar_testcase)[()] |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| y = stats.sem(scalar_testcase) |
| else: |
| |
| with np.testing.suppress_warnings() as sup: |
| sup.filter(UserWarning) |
| sup.filter(RuntimeWarning) |
| y = stats.sem(scalar_testcase) |
| assert xp.isnan(y) |
|
|
| def test_sem(self, xp): |
| testcase = xp.asarray(self.testcase) |
| y = stats.sem(testcase) |
| xp_assert_close(y, xp.asarray(0.6454972244)) |
| n = len(self.testcase) |
| xp_assert_close(stats.sem(testcase, ddof=0) * (n/(n-2))**0.5, |
| stats.sem(testcase, ddof=2)) |
|
|
| x = xp.arange(10.) |
| x = xp.where(x == 9, xp.asarray(xp.nan), x) |
| xp_assert_equal(stats.sem(x), xp.asarray(xp.nan)) |
|
|
| @skip_xp_backends(np_only=True, |
| reason='`nan_policy` only supports NumPy backend') |
| def test_sem_nan_policy(self, xp): |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_equal(stats.sem(x, nan_policy='omit'), 0.9128709291752769) |
| assert_raises(ValueError, stats.sem, x, nan_policy='raise') |
| assert_raises(ValueError, stats.sem, x, nan_policy='foobar') |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @skip_xp_backends('jax.numpy', reason="JAX can't do item assignment") |
| class TestZmapZscore: |
|
|
| @pytest.mark.parametrize( |
| 'x, y', |
| [([1., 2., 3., 4.], [1., 2., 3., 4.]), |
| ([1., 2., 3.], [0., 1., 2., 3., 4.])] |
| ) |
| def test_zmap(self, x, y, xp): |
| |
| |
| x, y = xp.asarray(x), xp.asarray(y) |
| xp_test = array_namespace(x, y) |
| expected = (x - xp.mean(y)) / xp_test.std(y, correction=0) |
| z = stats.zmap(x, y) |
| xp_assert_close(z, expected) |
|
|
| def test_zmap_axis(self, xp): |
| |
| x = np.array([[0.0, 0.0, 1.0, 1.0], |
| [1.0, 1.0, 1.0, 2.0], |
| [2.0, 0.0, 2.0, 0.0]]) |
|
|
| t1 = 1.0/(2.0/3)**0.5 |
| t2 = 3.**0.5/3 |
| t3 = 2.**0.5 |
|
|
| z0 = stats.zmap(x, x, axis=0) |
| z1 = stats.zmap(x, x, axis=1) |
|
|
| z0_expected = [[-t1, -t3/2, -t3/2, 0.0], |
| [0.0, t3, -t3/2, t1], |
| [t1, -t3/2, t3, -t1]] |
| z1_expected = [[-1.0, -1.0, 1.0, 1.0], |
| [-t2, -t2, -t2, 3.**0.5], |
| [1.0, -1.0, 1.0, -1.0]] |
|
|
| xp_assert_close(z0, z0_expected) |
| xp_assert_close(z1, z1_expected) |
|
|
| def test_zmap_ddof(self, xp): |
| |
| x = xp.asarray([[0.0, 0.0, 1.0, 1.0], |
| [0.0, 1.0, 2.0, 3.0]]) |
|
|
| z = stats.zmap(x, x, axis=1, ddof=1) |
|
|
| z0_expected = xp.asarray([-0.5, -0.5, 0.5, 0.5])/(1.0/3**0.5) |
| z1_expected = xp.asarray([-1.5, -0.5, 0.5, 1.5])/(5./3)**0.5 |
| xp_assert_close(z[0, :], z0_expected) |
| xp_assert_close(z[1, :], z1_expected) |
|
|
| @pytest.mark.parametrize('ddof', [0, 2]) |
| def test_zmap_nan_policy_omit(self, ddof, xp): |
| |
| |
| scores = xp.asarray([-3, -1, 2, np.nan]) |
| compare = xp.asarray([-8, -3, 2, 7, 12, np.nan]) |
| z = stats.zmap(scores, compare, ddof=ddof, nan_policy='omit') |
| ref = stats.zmap(scores, compare[~xp.isnan(compare)], ddof=ddof) |
| xp_assert_close(z, ref) |
|
|
| @pytest.mark.parametrize('ddof', [0, 2]) |
| def test_zmap_nan_policy_omit_with_axis(self, ddof, xp): |
| scores = xp.reshape(xp.arange(-5.0, 9.0), (2, -1)) |
| compare = xp.reshape(xp.linspace(-8, 6, 24), (2, -1)) |
| compare[0, 4] = xp.nan |
| compare[0, 6] = xp.nan |
| compare[1, 1] = xp.nan |
| z = stats.zmap(scores, compare, nan_policy='omit', axis=1, ddof=ddof) |
| res0 = stats.zmap(scores[0, :], compare[0, :][~xp.isnan(compare[0, :])], |
| ddof=ddof) |
| res1 = stats.zmap(scores[1, :], compare[1, :][~xp.isnan(compare[1, :])], |
| ddof=ddof) |
| expected = xp.stack((res0, res1)) |
| xp_assert_close(z, expected) |
|
|
| def test_zmap_nan_policy_raise(self, xp): |
| scores = xp.asarray([1, 2, 3]) |
| compare = xp.asarray([-8, -3, 2, 7, 12, xp.nan]) |
| with pytest.raises(ValueError, match='input contains nan'): |
| stats.zmap(scores, compare, nan_policy='raise') |
|
|
| def test_zscore(self, xp): |
| |
| |
| y = stats.zscore(xp.asarray([1, 2, 3, 4])) |
| desired = [-1.3416407864999, -0.44721359549996, |
| 0.44721359549996, 1.3416407864999] |
| xp_assert_close(y, xp.asarray(desired)) |
|
|
| def test_zscore_axis(self, xp): |
| |
| x = xp.asarray([[0.0, 0.0, 1.0, 1.0], |
| [1.0, 1.0, 1.0, 2.0], |
| [2.0, 0.0, 2.0, 0.0]]) |
|
|
| t1 = 1.0/(2.0/3)**0.5 |
| t2 = 3**0.5/3 |
| t3 = 2**0.5 |
|
|
| z0 = stats.zscore(x, axis=0) |
| z1 = stats.zscore(x, axis=1) |
|
|
| z0_expected = [[-t1, -t3/2, -t3/2, 0.0], |
| [0.0, t3, -t3/2, t1], |
| [t1, -t3/2, t3, -t1]] |
| z1_expected = [[-1.0, -1.0, 1.0, 1.0], |
| [-t2, -t2, -t2, 3**0.5], |
| [1.0, -1.0, 1.0, -1.0]] |
|
|
| xp_assert_close(z0, xp.asarray(z0_expected)) |
| xp_assert_close(z1, xp.asarray(z1_expected)) |
|
|
| def test_zscore_ddof(self, xp): |
| |
| x = xp.asarray([[0.0, 0.0, 1.0, 1.0], |
| [0.0, 1.0, 2.0, 3.0]]) |
|
|
| z = stats.zscore(x, axis=1, ddof=1) |
|
|
| z0_expected = xp.asarray([-0.5, -0.5, 0.5, 0.5])/(1.0/3**0.5) |
| z1_expected = xp.asarray([-1.5, -0.5, 0.5, 1.5])/((5./3)**0.5) |
| xp_assert_close(z[0, :], z0_expected) |
| xp_assert_close(z[1, :], z1_expected) |
|
|
| @skip_xp_backends('cupy', reason="cupy/cupy#8391") |
| def test_zscore_nan_propagate(self, xp): |
| x = xp.asarray([1, 2, np.nan, 4, 5]) |
| z = stats.zscore(x, nan_policy='propagate') |
| xp_assert_equal(z, xp.full(x.shape, xp.nan)) |
|
|
| def test_zscore_nan_omit(self, xp): |
| x = xp.asarray([1, 2, xp.nan, 4, 5]) |
|
|
| z = stats.zscore(x, nan_policy='omit') |
|
|
| expected = xp.asarray([-1.2649110640673518, |
| -0.6324555320336759, |
| xp.nan, |
| 0.6324555320336759, |
| 1.2649110640673518 |
| ]) |
| xp_assert_close(z, expected) |
|
|
| def test_zscore_nan_omit_with_ddof(self, xp): |
| x = xp.asarray([xp.nan, 1.0, 3.0, 5.0, 7.0, 9.0]) |
| xp_test = array_namespace(x) |
| z = stats.zscore(x, ddof=1, nan_policy='omit') |
| expected = xp_test.concat([xp.asarray([xp.nan]), stats.zscore(x[1:], ddof=1)]) |
| xp_assert_close(z, expected) |
|
|
| def test_zscore_nan_raise(self, xp): |
| x = xp.asarray([1, 2, xp.nan, 4, 5]) |
| with pytest.raises(ValueError, match="The input contains nan..."): |
| stats.zscore(x, nan_policy='raise') |
|
|
| @skip_xp_backends('cupy', reason="cupy/cupy#8391") |
| def test_zscore_constant_input_1d(self, xp): |
| x = xp.asarray([-0.087] * 3) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z = stats.zscore(x) |
| xp_assert_equal(z, xp.full(x.shape, xp.nan)) |
|
|
| def test_zscore_constant_input_2d(self, xp): |
| x = xp.asarray([[10.0, 10.0, 10.0, 10.0], |
| [10.0, 11.0, 12.0, 13.0]]) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z0 = stats.zscore(x, axis=0) |
| xp_assert_close(z0, xp.asarray([[xp.nan, -1.0, -1.0, -1.0], |
| [xp.nan, 1.0, 1.0, 1.0]])) |
|
|
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z1 = stats.zscore(x, axis=1) |
| xp_assert_equal(z1, xp.stack([xp.asarray([xp.nan, xp.nan, xp.nan, xp.nan]), |
| stats.zscore(x[1, :])])) |
|
|
| z = stats.zscore(x, axis=None) |
| xp_assert_equal(z, xp.reshape(stats.zscore(xp.reshape(x, (-1,))), x.shape)) |
|
|
| y = xp.ones((3, 6)) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z = stats.zscore(y, axis=None) |
| xp_assert_equal(z, xp.full(y.shape, xp.asarray(xp.nan))) |
|
|
| def test_zscore_constant_input_2d_nan_policy_omit(self, xp): |
| x = xp.asarray([[10.0, 10.0, 10.0, 10.0], |
| [10.0, 11.0, 12.0, xp.nan], |
| [10.0, 12.0, xp.nan, 10.0]]) |
| s = (3/2)**0.5 |
| s2 = 2**0.5 |
|
|
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z0 = stats.zscore(x, nan_policy='omit', axis=0) |
| xp_assert_close(z0, xp.asarray([[xp.nan, -s, -1.0, xp.nan], |
| [xp.nan, 0, 1.0, xp.nan], |
| [xp.nan, s, xp.nan, xp.nan]])) |
|
|
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| z1 = stats.zscore(x, nan_policy='omit', axis=1) |
| xp_assert_close(z1, xp.asarray([[xp.nan, xp.nan, xp.nan, xp.nan], |
| [-s, 0, s, xp.nan], |
| [-s2/2, s2, xp.nan, -s2/2]])) |
|
|
| def test_zscore_2d_all_nan_row(self, xp): |
| |
| x = xp.asarray([[np.nan, np.nan, np.nan, np.nan], |
| [10.0, 10.0, 12.0, 12.0]]) |
| z = stats.zscore(x, nan_policy='omit', axis=1) |
| xp_assert_close(z, xp.asarray([[np.nan, np.nan, np.nan, np.nan], |
| [-1.0, -1.0, 1.0, 1.0]])) |
|
|
| @skip_xp_backends('cupy', reason="cupy/cupy#8391") |
| def test_zscore_2d_all_nan(self, xp): |
| |
| y = xp.full((2, 3), xp.nan) |
| z = stats.zscore(y, nan_policy='omit', axis=None) |
| xp_assert_equal(z, y) |
|
|
| @pytest.mark.parametrize('x', [np.array([]), np.zeros((3, 0, 5))]) |
| def test_zscore_empty_input(self, x, xp): |
| x = xp.asarray(x) |
| z = stats.zscore(x) |
| xp_assert_equal(z, x) |
|
|
| def test_gzscore_normal_array(self, xp): |
| x = np.asarray([1, 2, 3, 4]) |
| z = stats.gzscore(xp.asarray(x)) |
| desired = np.log(x / stats.gmean(x)) / np.log(stats.gstd(x, ddof=0)) |
| xp_assert_close(z, xp.asarray(desired, dtype=xp.asarray(1.).dtype)) |
|
|
| @skip_xp_invalid_arg |
| def test_gzscore_masked_array(self, xp): |
| x = np.array([1, 2, -1, 3, 4]) |
| mask = [0, 0, 1, 0, 0] |
| mx = np.ma.masked_array(x, mask=mask) |
| z = stats.gzscore(mx) |
| desired = ([-1.526072095151, -0.194700599824, np.inf, 0.584101799472, |
| 1.136670895503]) |
| desired = np.ma.masked_array(desired, mask=mask) |
| assert_allclose(z.compressed(), desired.compressed()) |
| assert_allclose(z.mask, desired.mask) |
| assert isinstance(z, np.ma.MaskedArray) |
|
|
| @skip_xp_invalid_arg |
| def test_zscore_masked_element_0_gh19039(self, xp): |
| |
| rng = np.random.default_rng(8675309) |
| x = rng.standard_normal(10) |
| mask = np.zeros_like(x) |
| y = np.ma.masked_array(x, mask) |
| y.mask[0] = True |
|
|
| ref = stats.zscore(x[1:]) |
| assert not np.any(np.isnan(ref)) |
| res = stats.zscore(y) |
| assert_allclose(res[1:], ref) |
| res = stats.zscore(y, axis=None) |
| assert_allclose(res[1:], ref) |
|
|
| y[1:] = y[1] |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| res = stats.zscore(y) |
| assert_equal(res[1:], np.nan) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| res = stats.zscore(y, axis=None) |
| assert_equal(res[1:], np.nan) |
|
|
| def test_degenerate_input(self, xp): |
| scores = xp.arange(3) |
| compare = xp.ones(3) |
| ref = xp.asarray([-xp.inf, xp.nan, xp.inf]) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| res = stats.zmap(scores, compare) |
| xp_assert_equal(res, ref) |
|
|
| @pytest.mark.skip_xp_backends('array_api_strict', reason='needs array-api#850') |
| def test_complex_gh22404(self, xp): |
| res = stats.zmap(xp.asarray([1, 2, 3, 4]), xp.asarray([1, 1j, -1, -1j])) |
| ref = xp.asarray([1.+0.j, 2.+0.j, 3.+0.j, 4.+0.j]) |
| xp_assert_close(res, ref) |
|
|
|
|
| class TestMedianAbsDeviation: |
| def setup_class(self): |
| self.dat_nan = np.array([2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9, |
| 3.03, 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5, |
| 3.6, 3.7, 3.7, 3.7, 3.7, 3.77, 5.28, np.nan]) |
| self.dat = np.array([2.20, 2.20, 2.4, 2.4, 2.5, 2.7, 2.8, 2.9, 3.03, |
| 3.03, 3.10, 3.37, 3.4, 3.4, 3.4, 3.5, 3.6, 3.7, |
| 3.7, 3.7, 3.7, 3.77, 5.28, 28.95]) |
|
|
| def test_median_abs_deviation(self): |
| assert_almost_equal(stats.median_abs_deviation(self.dat, axis=None), |
| 0.355) |
| dat = self.dat.reshape(6, 4) |
| mad = stats.median_abs_deviation(dat, axis=0) |
| mad_expected = np.asarray([0.435, 0.5, 0.45, 0.4]) |
| assert_array_almost_equal(mad, mad_expected) |
|
|
| def test_mad_nan_omit(self): |
| mad = stats.median_abs_deviation(self.dat_nan, nan_policy='omit') |
| assert_almost_equal(mad, 0.34) |
|
|
| def test_axis_and_nan(self): |
| x = np.array([[1.0, 2.0, 3.0, 4.0, np.nan], |
| [1.0, 4.0, 5.0, 8.0, 9.0]]) |
| mad = stats.median_abs_deviation(x, axis=1) |
| assert_equal(mad, np.array([np.nan, 3.0])) |
|
|
| def test_nan_policy_omit_with_inf(self): |
| z = np.array([1, 3, 4, 6, 99, np.nan, np.inf]) |
| mad = stats.median_abs_deviation(z, nan_policy='omit') |
| assert_equal(mad, 3.0) |
|
|
| @pytest.mark.parametrize('axis', [0, 1, 2, None]) |
| def test_size_zero_with_axis(self, axis): |
| x = np.zeros((3, 0, 4)) |
| mad = stats.median_abs_deviation(x, axis=axis) |
| assert_equal(mad, np.full_like(x.sum(axis=axis), fill_value=np.nan)) |
|
|
| @pytest.mark.parametrize('nan_policy, expected', |
| [('omit', np.array([np.nan, 1.5, 1.5])), |
| ('propagate', np.array([np.nan, np.nan, 1.5]))]) |
| def test_nan_policy_with_axis(self, nan_policy, expected): |
| x = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], |
| [1, 5, 3, 6, np.nan, np.nan], |
| [5, 6, 7, 9, 9, 10]]) |
| mad = stats.median_abs_deviation(x, nan_policy=nan_policy, axis=1) |
| assert_equal(mad, expected) |
|
|
| @pytest.mark.parametrize('axis, expected', |
| [(1, [2.5, 2.0, 12.0]), (None, 4.5)]) |
| def test_center_mean_with_nan(self, axis, expected): |
| x = np.array([[1, 2, 4, 9, np.nan], |
| [0, 1, 1, 1, 12], |
| [-10, -10, -10, 20, 20]]) |
| mad = stats.median_abs_deviation(x, center=np.mean, nan_policy='omit', |
| axis=axis) |
| assert_allclose(mad, expected, rtol=1e-15, atol=1e-15) |
|
|
| def test_center_not_callable(self): |
| with pytest.raises(TypeError, match='callable'): |
| stats.median_abs_deviation([1, 2, 3, 5], center=99) |
|
|
|
|
| def _check_warnings(warn_list, expected_type, expected_len): |
| """ |
| Checks that all of the warnings from a list returned by |
| `warnings.catch_all(record=True)` are of the required type and that the list |
| contains expected number of warnings. |
| """ |
| assert_equal(len(warn_list), expected_len, "number of warnings") |
| for warn_ in warn_list: |
| assert_(warn_.category is expected_type) |
|
|
|
|
| class TestIQR: |
|
|
| def test_basic(self): |
| x = np.arange(8) * 0.5 |
| np.random.shuffle(x) |
| assert_equal(stats.iqr(x), 1.75) |
|
|
| def test_api(self): |
| d = np.ones((5, 5)) |
| stats.iqr(d) |
| stats.iqr(d, None) |
| stats.iqr(d, 1) |
| stats.iqr(d, (0, 1)) |
| stats.iqr(d, None, (10, 90)) |
| stats.iqr(d, None, (30, 20), 1.0) |
| stats.iqr(d, None, (25, 75), 1.5, 'propagate') |
| stats.iqr(d, None, (50, 50), 'normal', 'raise', 'linear') |
| stats.iqr(d, None, (25, 75), -0.4, 'omit', 'lower', True) |
|
|
| @pytest.mark.parametrize('x', [[], np.arange(0)]) |
| def test_empty(self, x): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| assert_equal(stats.iqr(x), np.nan) |
|
|
| def test_constant(self): |
| |
| x = np.ones((7, 4)) |
| assert_equal(stats.iqr(x), 0.0) |
| assert_array_equal(stats.iqr(x, axis=0), np.zeros(4)) |
| assert_array_equal(stats.iqr(x, axis=1), np.zeros(7)) |
| assert_equal(stats.iqr(x, interpolation='linear'), 0.0) |
| assert_equal(stats.iqr(x, interpolation='midpoint'), 0.0) |
| assert_equal(stats.iqr(x, interpolation='nearest'), 0.0) |
| assert_equal(stats.iqr(x, interpolation='lower'), 0.0) |
| assert_equal(stats.iqr(x, interpolation='higher'), 0.0) |
|
|
| |
| |
| y = np.ones((4, 5, 6)) * np.arange(6) |
| assert_array_equal(stats.iqr(y, axis=0), np.zeros((5, 6))) |
| assert_array_equal(stats.iqr(y, axis=1), np.zeros((4, 6))) |
| assert_array_equal(stats.iqr(y, axis=2), np.full((4, 5), 2.5)) |
| assert_array_equal(stats.iqr(y, axis=(0, 1)), np.zeros(6)) |
| assert_array_equal(stats.iqr(y, axis=(0, 2)), np.full(5, 3.)) |
| assert_array_equal(stats.iqr(y, axis=(1, 2)), np.full(4, 3.)) |
|
|
| def test_scalarlike(self): |
| x = np.arange(1) + 7.0 |
| assert_equal(stats.iqr(x[0]), 0.0) |
| assert_equal(stats.iqr(x), 0.0) |
| assert_array_equal(stats.iqr(x, keepdims=True), [0.0]) |
|
|
| def test_2D(self): |
| x = np.arange(15).reshape((3, 5)) |
| assert_equal(stats.iqr(x), 7.0) |
| assert_array_equal(stats.iqr(x, axis=0), np.full(5, 5.)) |
| assert_array_equal(stats.iqr(x, axis=1), np.full(3, 2.)) |
| assert_array_equal(stats.iqr(x, axis=(0, 1)), 7.0) |
| assert_array_equal(stats.iqr(x, axis=(1, 0)), 7.0) |
|
|
| def test_axis(self): |
| |
| o = np.random.normal(size=(71, 23)) |
| x = np.dstack([o] * 10) |
| q = stats.iqr(o) |
|
|
| assert_equal(stats.iqr(x, axis=(0, 1)), q) |
| x = np.moveaxis(x, -1, 0) |
| assert_equal(stats.iqr(x, axis=(2, 1)), q) |
| x = x.swapaxes(0, 1) |
| assert_equal(stats.iqr(x, axis=(0, 2)), q) |
| x = x.swapaxes(0, 1) |
|
|
| assert_equal(stats.iqr(x, axis=(0, 1, 2)), |
| stats.iqr(x, axis=None)) |
| assert_equal(stats.iqr(x, axis=(0,)), |
| stats.iqr(x, axis=0)) |
|
|
| d = np.arange(3 * 5 * 7 * 11) |
| |
| |
| np.random.shuffle(d) |
| d = d.reshape((3, 5, 7, 11)) |
| assert_equal(stats.iqr(d, axis=(0, 1, 2))[0], |
| stats.iqr(d[:,:,:, 0].ravel())) |
| assert_equal(stats.iqr(d, axis=(0, 1, 3))[1], |
| stats.iqr(d[:,:, 1,:].ravel())) |
| assert_equal(stats.iqr(d, axis=(3, 1, -4))[2], |
| stats.iqr(d[:,:, 2,:].ravel())) |
| assert_equal(stats.iqr(d, axis=(3, 1, 2))[2], |
| stats.iqr(d[2,:,:,:].ravel())) |
| assert_equal(stats.iqr(d, axis=(3, 2))[2, 1], |
| stats.iqr(d[2, 1,:,:].ravel())) |
| assert_equal(stats.iqr(d, axis=(1, -2))[2, 1], |
| stats.iqr(d[2, :, :, 1].ravel())) |
| assert_equal(stats.iqr(d, axis=(1, 3))[2, 2], |
| stats.iqr(d[2, :, 2,:].ravel())) |
|
|
| assert_raises(AxisError, stats.iqr, d, axis=4) |
| assert_raises(ValueError, stats.iqr, d, axis=(0, 0)) |
|
|
| def test_rng(self): |
| x = np.arange(5) |
| assert_equal(stats.iqr(x), 2) |
| assert_equal(stats.iqr(x, rng=(25, 87.5)), 2.5) |
| assert_equal(stats.iqr(x, rng=(12.5, 75)), 2.5) |
| assert_almost_equal(stats.iqr(x, rng=(10, 50)), 1.6) |
|
|
| assert_raises(ValueError, stats.iqr, x, rng=(0, 101)) |
| assert_raises(ValueError, stats.iqr, x, rng=(np.nan, 25)) |
| assert_raises(TypeError, stats.iqr, x, rng=(0, 50, 60)) |
|
|
| def test_interpolation(self): |
| x = np.arange(5) |
| y = np.arange(4) |
| |
| assert_equal(stats.iqr(x), 2) |
| assert_equal(stats.iqr(y), 1.5) |
| |
| assert_equal(stats.iqr(x, interpolation='linear'), 2) |
| assert_equal(stats.iqr(y, interpolation='linear'), 1.5) |
| |
| assert_equal(stats.iqr(x, interpolation='higher'), 2) |
| assert_equal(stats.iqr(x, rng=(25, 80), interpolation='higher'), 3) |
| assert_equal(stats.iqr(y, interpolation='higher'), 2) |
| |
| assert_equal(stats.iqr(x, interpolation='lower'), 2) |
| assert_equal(stats.iqr(x, rng=(25, 80), interpolation='lower'), 2) |
| assert_equal(stats.iqr(y, interpolation='lower'), 2) |
| |
| assert_equal(stats.iqr(x, interpolation='nearest'), 2) |
| assert_equal(stats.iqr(y, interpolation='nearest'), 1) |
| |
| assert_equal(stats.iqr(x, interpolation='midpoint'), 2) |
| assert_equal(stats.iqr(x, rng=(25, 80), interpolation='midpoint'), 2.5) |
| assert_equal(stats.iqr(y, interpolation='midpoint'), 2) |
|
|
| |
| for method in ('inverted_cdf', 'averaged_inverted_cdf', |
| 'closest_observation', 'interpolated_inverted_cdf', |
| 'hazen', 'weibull', 'median_unbiased', |
| 'normal_unbiased'): |
| stats.iqr(y, interpolation=method) |
|
|
| assert_raises(ValueError, stats.iqr, x, interpolation='foobar') |
|
|
| def test_keepdims(self): |
| |
| x = np.ones((3, 5, 7, 11)) |
| assert_equal(stats.iqr(x, axis=None, keepdims=False).shape, ()) |
| assert_equal(stats.iqr(x, axis=2, keepdims=False).shape, (3, 5, 11)) |
| assert_equal(stats.iqr(x, axis=(0, 1), keepdims=False).shape, (7, 11)) |
| assert_equal(stats.iqr(x, axis=(0, 3), keepdims=False).shape, (5, 7)) |
| assert_equal(stats.iqr(x, axis=(1,), keepdims=False).shape, (3, 7, 11)) |
| assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=False).shape, ()) |
| assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=False).shape, (7,)) |
|
|
| assert_equal(stats.iqr(x, axis=None, keepdims=True).shape, (1, 1, 1, 1)) |
| assert_equal(stats.iqr(x, axis=2, keepdims=True).shape, (3, 5, 1, 11)) |
| assert_equal(stats.iqr(x, axis=(0, 1), keepdims=True).shape, (1, 1, 7, 11)) |
| assert_equal(stats.iqr(x, axis=(0, 3), keepdims=True).shape, (1, 5, 7, 1)) |
| assert_equal(stats.iqr(x, axis=(1,), keepdims=True).shape, (3, 1, 7, 11)) |
| assert_equal(stats.iqr(x, (0, 1, 2, 3), keepdims=True).shape, (1, 1, 1, 1)) |
| assert_equal(stats.iqr(x, axis=(0, 1, 3), keepdims=True).shape, (1, 1, 7, 1)) |
|
|
| def test_nanpolicy(self): |
| x = np.arange(15.0).reshape((3, 5)) |
|
|
| |
| assert_equal(stats.iqr(x, nan_policy='propagate'), 7) |
| assert_equal(stats.iqr(x, nan_policy='omit'), 7) |
| assert_equal(stats.iqr(x, nan_policy='raise'), 7) |
|
|
| |
| x[1, 2] = np.nan |
| with warnings.catch_warnings(record=True): |
| warnings.simplefilter("always") |
| assert_equal(stats.iqr(x, nan_policy='propagate'), |
| np.nan) |
| assert_equal(stats.iqr(x, axis=0, nan_policy='propagate'), |
| [5, 5, np.nan, 5, 5]) |
| assert_equal(stats.iqr(x, axis=1, nan_policy='propagate'), |
| [2, np.nan, 2]) |
|
|
| with warnings.catch_warnings(record=True): |
| warnings.simplefilter("always") |
| assert_equal(stats.iqr(x, nan_policy='omit'), 7.5) |
| assert_equal(stats.iqr(x, axis=0, nan_policy='omit'), np.full(5, 5)) |
| assert_equal(stats.iqr(x, axis=1, nan_policy='omit'), [2, 2.5, 2]) |
|
|
| assert_raises(ValueError, stats.iqr, x, nan_policy='raise') |
| assert_raises(ValueError, stats.iqr, x, axis=0, nan_policy='raise') |
| assert_raises(ValueError, stats.iqr, x, axis=1, nan_policy='raise') |
|
|
| |
| assert_raises(ValueError, stats.iqr, x, nan_policy='barfood') |
|
|
| def test_scale(self): |
| x = np.arange(15.0).reshape((3, 5)) |
|
|
| |
| assert_equal(stats.iqr(x, scale=1.0), 7) |
| assert_almost_equal(stats.iqr(x, scale='normal'), 7 / 1.3489795) |
| assert_equal(stats.iqr(x, scale=2.0), 3.5) |
|
|
| |
| x[1, 2] = np.nan |
| with warnings.catch_warnings(record=True): |
| warnings.simplefilter("always") |
| assert_equal(stats.iqr(x, scale=1.0, nan_policy='propagate'), np.nan) |
| assert_equal(stats.iqr(x, scale='normal', nan_policy='propagate'), np.nan) |
| assert_equal(stats.iqr(x, scale=2.0, nan_policy='propagate'), np.nan) |
| |
| assert_equal(stats.iqr(x, axis=1, scale=1.0, |
| nan_policy='propagate'), [2, np.nan, 2]) |
| assert_almost_equal(stats.iqr(x, axis=1, scale='normal', |
| nan_policy='propagate'), |
| np.array([2, np.nan, 2]) / 1.3489795) |
| assert_equal(stats.iqr(x, axis=1, scale=2.0, nan_policy='propagate'), |
| [1, np.nan, 1]) |
| |
| |
| |
|
|
| assert_equal(stats.iqr(x, scale=1.0, nan_policy='omit'), 7.5) |
| assert_almost_equal(stats.iqr(x, scale='normal', nan_policy='omit'), |
| 7.5 / 1.3489795) |
| assert_equal(stats.iqr(x, scale=2.0, nan_policy='omit'), 3.75) |
|
|
| |
| assert_raises(ValueError, stats.iqr, x, scale='foobar') |
|
|
|
|
| class TestMoments: |
| """ |
| Comparison numbers are found using R v.1.5.1 |
| note that length(testcase) = 4 |
| testmathworks comes from documentation for the |
| Statistics Toolbox for Matlab and can be found at both |
| https://www.mathworks.com/help/stats/kurtosis.html |
| https://www.mathworks.com/help/stats/skewness.html |
| Note that both test cases came from here. |
| """ |
| testcase = [1., 2., 3., 4.] |
| scalar_testcase = 4. |
| np.random.seed(1234) |
| testcase_moment_accuracy = np.random.rand(42) |
|
|
| def _assert_equal(self, actual, expect, *, shape=None, dtype=None): |
| expect = np.asarray(expect) |
| if shape is not None: |
| expect = np.broadcast_to(expect, shape) |
| assert_array_equal(actual, expect) |
| if dtype is None: |
| dtype = expect.dtype |
| assert actual.dtype == dtype |
|
|
| @array_api_compatible |
| @pytest.mark.parametrize('size', [10, (10, 2)]) |
| @pytest.mark.parametrize('m, c', product((0, 1, 2, 3), (None, 0, 1))) |
| def test_moment_center_scalar_moment(self, size, m, c, xp): |
| rng = np.random.default_rng(6581432544381372042) |
| x = xp.asarray(rng.random(size=size)) |
| res = stats.moment(x, m, center=c) |
| c = xp.mean(x, axis=0) if c is None else c |
| ref = xp.sum((x - c)**m, axis=0)/x.shape[0] |
| xp_assert_close(res, ref, atol=1e-16) |
|
|
| @array_api_compatible |
| @pytest.mark.parametrize('size', [10, (10, 2)]) |
| @pytest.mark.parametrize('c', (None, 0, 1)) |
| def test_moment_center_array_moment(self, size, c, xp): |
| rng = np.random.default_rng(1706828300224046506) |
| x = xp.asarray(rng.random(size=size)) |
| m = [0, 1, 2, 3] |
| res = stats.moment(x, m, center=c) |
| xp_test = array_namespace(x) |
| ref = xp_test.concat([stats.moment(x, i, center=c)[xp_test.newaxis, ...] |
| for i in m]) |
| xp_assert_equal(res, ref) |
|
|
| @array_api_compatible |
| def test_moment(self, xp): |
| |
| testcase = xp.asarray(self.testcase) |
|
|
| y = stats.moment(xp.asarray(self.scalar_testcase)) |
| xp_assert_close(y, xp.asarray(0.0)) |
|
|
| y = stats.moment(testcase, 0) |
| xp_assert_close(y, xp.asarray(1.0)) |
| y = stats.moment(testcase, 1) |
| xp_assert_close(y, xp.asarray(0.0)) |
| y = stats.moment(testcase, 2) |
| xp_assert_close(y, xp.asarray(1.25)) |
| y = stats.moment(testcase, 3) |
| xp_assert_close(y, xp.asarray(0.0)) |
| y = stats.moment(testcase, 4) |
| xp_assert_close(y, xp.asarray(2.5625)) |
|
|
| |
| y = stats.moment(testcase, [1, 2, 3, 4]) |
| xp_assert_close(y, xp.asarray([0, 1.25, 0, 2.5625])) |
|
|
| |
| y = stats.moment(testcase, 0.0) |
| xp_assert_close(y, xp.asarray(1.0)) |
| message = 'All elements of `order` must be integral.' |
| with pytest.raises(ValueError, match=message): |
| stats.moment(testcase, 1.2) |
| y = stats.moment(testcase, [1.0, 2, 3, 4.0]) |
| xp_assert_close(y, xp.asarray([0, 1.25, 0, 2.5625])) |
|
|
| def test_cases(): |
| y = stats.moment(xp.asarray([])) |
| xp_assert_equal(y, xp.asarray(xp.nan)) |
| y = stats.moment(xp.asarray([], dtype=xp.float32)) |
| xp_assert_equal(y, xp.asarray(xp.nan, dtype=xp.float32)) |
| y = stats.moment(xp.zeros((1, 0)), axis=0) |
| xp_assert_equal(y, xp.empty((0,))) |
| y = stats.moment(xp.asarray([[]]), axis=1) |
| xp_assert_equal(y, xp.asarray([xp.nan])) |
| y = stats.moment(xp.asarray([[]]), order=[0, 1], axis=0) |
| xp_assert_equal(y, xp.empty((2, 0))) |
|
|
| |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match="See documentation for..."): |
| test_cases() |
| else: |
| with np.testing.suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "Mean of empty slice.") |
| sup.filter(RuntimeWarning, "invalid value") |
| test_cases() |
|
|
| def test_nan_policy(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_equal(stats.moment(x, 2), np.nan) |
| assert_almost_equal(stats.moment(x, nan_policy='omit'), 0.0) |
| assert_raises(ValueError, stats.moment, x, nan_policy='raise') |
| assert_raises(ValueError, stats.moment, x, nan_policy='foobar') |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| @pytest.mark.parametrize('dtype', ['float32', 'float64', 'complex128']) |
| @pytest.mark.parametrize('expect, order', [(0, 1), (1, 0)]) |
| def test_constant_moments(self, dtype, expect, order, xp): |
| if dtype=='complex128' and is_torch(xp): |
| pytest.skip() |
| dtype = getattr(xp, dtype) |
| x = xp.asarray(np.random.rand(5), dtype=dtype) |
| y = stats.moment(x, order=order) |
| xp_assert_equal(y, xp.asarray(expect, dtype=dtype)) |
|
|
| y = stats.moment(xp.broadcast_to(x, (6, 5)), axis=0, order=order) |
| xp_assert_equal(y, xp.full((5,), expect, dtype=dtype)) |
|
|
| y = stats.moment(xp.broadcast_to(x, (1, 2, 3, 4, 5)), axis=2, |
| order=order) |
| xp_assert_equal(y, xp.full((1, 2, 4, 5), expect, dtype=dtype)) |
|
|
| y = stats.moment(xp.broadcast_to(x, (1, 2, 3, 4, 5)), axis=None, |
| order=order) |
| xp_assert_equal(y, xp.full((), expect, dtype=dtype)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason="JAX arrays do not support item assignment") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_moment_propagate_nan(self, xp): |
| |
| |
| a = xp.reshape(xp.arange(8.), (2, -1)) |
| a[1, 0] = np.nan |
| mm = stats.moment(a, 2, axis=1) |
| xp_assert_close(mm, xp.asarray([1.25, np.nan]), atol=1e-15) |
|
|
| @array_api_compatible |
| def test_moment_empty_order(self, xp): |
| |
| with pytest.raises(ValueError, match=r"`order` must be a scalar or a" |
| r" non-empty 1D array."): |
| stats.moment(xp.asarray([1, 2, 3, 4]), order=[]) |
|
|
| @array_api_compatible |
| def test_rename_moment_order(self, xp): |
| |
| |
| x = xp.arange(10) |
| res = stats.moment(x, moment=3) |
| ref = stats.moment(x, order=3) |
| xp_assert_equal(res, ref) |
|
|
| def test_moment_accuracy(self): |
| |
| |
| tc_no_mean = (self.testcase_moment_accuracy |
| - np.mean(self.testcase_moment_accuracy)) |
| assert_allclose(np.power(tc_no_mean, 42).mean(), |
| stats.moment(self.testcase_moment_accuracy, 42)) |
|
|
| @array_api_compatible |
| @pytest.mark.parametrize('order', [0, 1, 2, 3]) |
| @pytest.mark.parametrize('axis', [-1, 0, 1]) |
| @pytest.mark.parametrize('center', [None, 0]) |
| def test_moment_array_api(self, xp, order, axis, center): |
| rng = np.random.default_rng(34823589259425) |
| x = rng.random(size=(5, 6, 7)) |
| res = stats.moment(xp.asarray(x), order, axis=axis, center=center) |
| ref = xp.asarray(_moment(x, order, axis, mean=center)) |
| xp_assert_close(res, ref) |
|
|
|
|
| class SkewKurtosisTest: |
| scalar_testcase = 4. |
| testcase = [1., 2., 3., 4.] |
| testmathworks = [1.165, 0.6268, 0.0751, 0.3516, -0.6965] |
|
|
|
|
| class TestSkew(SkewKurtosisTest): |
| @array_api_compatible |
| @pytest.mark.parametrize('stat_fun', [stats.skew, stats.kurtosis]) |
| def test_empty_1d(self, stat_fun, xp): |
| x = xp.asarray([]) |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stat_fun(x) |
| else: |
| with np.testing.suppress_warnings() as sup: |
| |
| sup.filter(RuntimeWarning, "Mean of empty slice") |
| sup.filter(RuntimeWarning, "invalid value encountered") |
| res = stat_fun(x) |
| xp_assert_equal(res, xp.asarray(xp.nan)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason="JAX arrays do not support item assignment") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_skewness(self, xp): |
| |
| y = stats.skew(xp.asarray(self.scalar_testcase)) |
| xp_assert_close(y, xp.asarray(xp.nan)) |
| |
| |
| y = stats.skew(xp.asarray(self.testmathworks)) |
| xp_assert_close(y, xp.asarray(-0.29322304336607), atol=1e-10) |
| y = stats.skew(xp.asarray(self.testmathworks), bias=0) |
| xp_assert_close(y, xp.asarray(-0.437111105023940), atol=1e-10) |
| y = stats.skew(xp.asarray(self.testcase)) |
| xp_assert_close(y, xp.asarray(0.0), atol=1e-10) |
|
|
| def test_nan_policy(self): |
| |
| x = np.arange(10.) |
| x[9] = np.nan |
| with np.errstate(invalid='ignore'): |
| assert_equal(stats.skew(x), np.nan) |
| assert_equal(stats.skew(x, nan_policy='omit'), 0.) |
| assert_raises(ValueError, stats.skew, x, nan_policy='raise') |
| assert_raises(ValueError, stats.skew, x, nan_policy='foobar') |
|
|
| def test_skewness_scalar(self): |
| |
| assert_equal(stats.skew(arange(10)), 0.0) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason="JAX arrays do not support item assignment") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_skew_propagate_nan(self, xp): |
| |
| |
| a = xp.arange(8.) |
| a = xp.reshape(a, (2, -1)) |
| a[1, 0] = xp.nan |
| with np.errstate(invalid='ignore'): |
| s = stats.skew(a, axis=1) |
| xp_assert_equal(s, xp.asarray([0, xp.nan])) |
|
|
| @array_api_compatible |
| def test_skew_constant_value(self, xp): |
| |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| a = xp.asarray([-0.27829495]*10) |
| xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) |
| xp_assert_equal(stats.skew(a*2.**50), xp.asarray(xp.nan)) |
| xp_assert_equal(stats.skew(a/2.**50), xp.asarray(xp.nan)) |
| xp_assert_equal(stats.skew(a, bias=False), xp.asarray(xp.nan)) |
|
|
| |
| a = xp.asarray([14.3]*7) |
| xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) |
| a = 1. + xp.arange(-3., 4)*1e-16 |
| xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason="JAX arrays do not support item assignment") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_precision_loss_gh15554(self, xp): |
| |
| |
| |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| rng = np.random.default_rng(34095309370) |
| a = xp.asarray(rng.random(size=(100, 10))) |
| a[:, 0] = 1.01 |
| stats.skew(a) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason="JAX arrays do not support item assignment") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| @pytest.mark.parametrize('axis', [-1, 0, 2, None]) |
| @pytest.mark.parametrize('bias', [False, True]) |
| def test_vectorization(self, xp, axis, bias): |
| |
| |
| rng = np.random.default_rng(1283413549926) |
| x = xp.asarray(rng.random((3, 4, 5))) |
|
|
| def skewness(a, axis, bias): |
| |
| if axis is None: |
| a = xp.reshape(a, (-1,)) |
| axis = 0 |
| xp_test = array_namespace(a) |
| mean = xp_test.mean(a, axis=axis, keepdims=True) |
| mu3 = xp_test.mean((a - mean)**3, axis=axis) |
| std = xp_test.std(a, axis=axis) |
| res = mu3 / std ** 3 |
| if not bias: |
| n = a.shape[axis] |
| res *= ((n - 1.0) * n) ** 0.5 / (n - 2.0) |
| return res |
|
|
| res = stats.skew(x, axis=axis, bias=bias) |
| ref = skewness(x, axis=axis, bias=bias) |
| xp_assert_close(res, ref) |
|
|
|
|
| class TestKurtosis(SkewKurtosisTest): |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_kurtosis(self, xp): |
| |
| y = stats.kurtosis(xp.asarray(self.scalar_testcase)) |
| assert xp.isnan(y) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| y = stats.kurtosis(xp.asarray(self.testmathworks), 0, fisher=0, bias=1) |
| xp_assert_close(y, xp.asarray(2.1658856802973)) |
|
|
| |
| |
| |
| |
| y = stats.kurtosis(xp.asarray(self.testmathworks), fisher=0, bias=0) |
| xp_assert_close(y, xp.asarray(3.663542721189047)) |
| y = stats.kurtosis(xp.asarray(self.testcase), 0, 0) |
| xp_assert_close(y, xp.asarray(1.64)) |
|
|
| x = xp.arange(10.) |
| x = xp.where(x == 8, xp.asarray(xp.nan), x) |
| xp_assert_equal(stats.kurtosis(x), xp.asarray(xp.nan)) |
|
|
| def test_kurtosis_nan_policy(self): |
| |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_almost_equal(stats.kurtosis(x, nan_policy='omit'), -1.230000) |
| assert_raises(ValueError, stats.kurtosis, x, nan_policy='raise') |
| assert_raises(ValueError, stats.kurtosis, x, nan_policy='foobar') |
|
|
| def test_kurtosis_array_scalar(self): |
| |
| assert_equal(type(stats.kurtosis([1, 2, 3])), np.float64) |
|
|
| def test_kurtosis_propagate_nan(self): |
| |
| |
| |
| a = np.arange(8).reshape(2, -1).astype(float) |
| a[1, 0] = np.nan |
| k = stats.kurtosis(a, axis=1, nan_policy="propagate") |
| np.testing.assert_allclose(k, [-1.36, np.nan], atol=1e-15) |
|
|
| @array_api_compatible |
| def test_kurtosis_constant_value(self, xp): |
| |
| a = xp.asarray([-0.27829495]*10) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| assert xp.isnan(stats.kurtosis(a, fisher=False)) |
| assert xp.isnan(stats.kurtosis(a * float(2**50), fisher=False)) |
| assert xp.isnan(stats.kurtosis(a / float(2**50), fisher=False)) |
| assert xp.isnan(stats.kurtosis(a, fisher=False, bias=False)) |
|
|
| @skip_xp_backends('jax.numpy', |
| reason='JAX arrays do not support item assignment') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| @pytest.mark.parametrize('axis', [-1, 0, 2, None]) |
| @pytest.mark.parametrize('bias', [False, True]) |
| @pytest.mark.parametrize('fisher', [False, True]) |
| def test_vectorization(self, xp, axis, bias, fisher): |
| |
| |
| rng = np.random.default_rng(1283413549926) |
| x = xp.asarray(rng.random((4, 5, 6))) |
|
|
| def kurtosis(a, axis, bias, fisher): |
| |
| if axis is None: |
| a = xp.reshape(a, (-1,)) |
| axis = 0 |
| xp_test = array_namespace(a) |
| mean = xp_test.mean(a, axis=axis, keepdims=True) |
| mu4 = xp_test.mean((a - mean)**4, axis=axis) |
| mu2 = xp_test.var(a, axis=axis, correction=0) |
| if bias: |
| res = mu4 / mu2**2 - 3 |
| else: |
| n = a.shape[axis] |
| |
| res = (n-1) / ((n-2) * (n-3)) * ((n + 1) * mu4/mu2**2 - 3*(n-1)) |
|
|
| |
| |
| return res if fisher else res + 3 |
|
|
| res = stats.kurtosis(x, axis=axis, bias=bias, fisher=fisher) |
| ref = kurtosis(x, axis=axis, bias=bias, fisher=fisher) |
| xp_assert_close(res, ref) |
|
|
|
|
| @hypothesis.strategies.composite |
| def ttest_data_axis_strategy(draw): |
| |
| elements = dict(allow_nan=False, allow_infinity=False) |
| shape = npst.array_shapes(min_dims=1, min_side=2) |
| |
| |
| |
| data = draw(npst.arrays(dtype=np.float64, elements=elements, shape=shape)) |
|
|
| |
| ok_axes = [] |
| |
| |
| |
| with warnings.catch_warnings(): |
| warnings.simplefilter("error") |
| for axis in range(len(data.shape)): |
| with contextlib.suppress(Exception): |
| var = stats.moment(data, order=2, axis=axis) |
| if np.all(var > 0) and np.all(np.isfinite(var)): |
| ok_axes.append(axis) |
| |
| hypothesis.assume(ok_axes) |
|
|
| |
| axis = draw(hypothesis.strategies.sampled_from(ok_axes)) |
|
|
| return data, axis |
|
|
|
|
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| class TestStudentTest: |
| |
| |
| |
| |
| X1 = [-1., 0., 1.] |
| X2 = [0., 1., 2.] |
| T1_0 = 0. |
| P1_0 = 1. |
| T1_1 = -1.7320508075689 |
| P1_1 = 0.2254033307585 |
| T1_2 = -3.4641016151378 |
| P1_2 = 0.07417990022745 |
| T2_0 = 1.7320508075689 |
| P2_0 = 0.2254033307585 |
| P1_1_l = P1_1 / 2 |
| P1_1_g = 1 - (P1_1 / 2) |
|
|
| def test_onesample(self, xp): |
| with suppress_warnings() as sup, \ |
| np.errstate(invalid="ignore", divide="ignore"): |
| sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") |
| a = xp.asarray(4.) if not is_numpy(xp) else 4. |
| t, p = stats.ttest_1samp(a, 3.) |
| xp_assert_equal(t, xp.asarray(xp.nan)) |
| xp_assert_equal(p, xp.asarray(xp.nan)) |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X1), 0.) |
| xp_assert_close(t, xp.asarray(self.T1_0)) |
| xp_assert_close(p, xp.asarray(self.P1_0)) |
|
|
| res = stats.ttest_1samp(xp.asarray(self.X1), 0.) |
| attributes = ('statistic', 'pvalue') |
| check_named_results(res, attributes, xp=xp) |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X2), 0.) |
|
|
| xp_assert_close(t, xp.asarray(self.T2_0)) |
| xp_assert_close(p, xp.asarray(self.P2_0)) |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X1), 1.) |
|
|
| xp_assert_close(t, xp.asarray(self.T1_1)) |
| xp_assert_close(p, xp.asarray(self.P1_1)) |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X1), 2.) |
|
|
| xp_assert_close(t, xp.asarray(self.T1_2)) |
| xp_assert_close(p, xp.asarray(self.P1_2)) |
|
|
| def test_onesample_nan_policy(self, xp): |
| |
| if not is_numpy(xp): |
| x = xp.asarray([1., 2., 3., xp.nan]) |
| message = "Use of `nan_policy` and `keepdims`..." |
| with pytest.raises(NotImplementedError, match=message): |
| stats.ttest_1samp(x, 1., nan_policy='omit') |
| return |
|
|
| x = stats.norm.rvs(loc=5, scale=10, size=51, random_state=7654567) |
| x[50] = np.nan |
| with np.errstate(invalid="ignore"): |
| assert_array_equal(stats.ttest_1samp(x, 5.0), (np.nan, np.nan)) |
|
|
| assert_array_almost_equal(stats.ttest_1samp(x, 5.0, nan_policy='omit'), |
| (-1.6412624074367159, 0.107147027334048005)) |
| assert_raises(ValueError, stats.ttest_1samp, x, 5.0, nan_policy='raise') |
| assert_raises(ValueError, stats.ttest_1samp, x, 5.0, |
| nan_policy='foobar') |
|
|
| def test_1samp_alternative(self, xp): |
| message = "`alternative` must be 'less', 'greater', or 'two-sided'." |
| with pytest.raises(ValueError, match=message): |
| stats.ttest_1samp(xp.asarray(self.X1), 0., alternative="error") |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X1), 1., alternative="less") |
| xp_assert_close(p, xp.asarray(self.P1_1_l)) |
| xp_assert_close(t, xp.asarray(self.T1_1)) |
|
|
| t, p = stats.ttest_1samp(xp.asarray(self.X1), 1., alternative="greater") |
| xp_assert_close(p, xp.asarray(self.P1_1_g)) |
| xp_assert_close(t, xp.asarray(self.T1_1)) |
|
|
| @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater']) |
| def test_1samp_ci_1d(self, xp, alternative): |
| |
| rng = np.random.default_rng(8066178009154342972) |
| n = 10 |
| x = rng.normal(size=n, loc=1.5, scale=2) |
| popmean = rng.normal() |
| |
| |
| |
| |
| |
| dtype = xp.float32 if is_torch(xp) else xp.float64 |
| x = xp.asarray(x, dtype=dtype) |
| popmean = xp.asarray(popmean, dtype=dtype) |
|
|
| ref = {'two-sided': [0.3594423211709136, 2.9333455028290860], |
| 'greater': [0.7470806207371626, np.inf], |
| 'less': [-np.inf, 2.545707203262837]} |
| res = stats.ttest_1samp(x, popmean=popmean, alternative=alternative) |
| ci = res.confidence_interval(confidence_level=0.85) |
| xp_assert_close(ci.low, xp.asarray(ref[alternative][0])) |
| xp_assert_close(ci.high, xp.asarray(ref[alternative][1])) |
| xp_assert_equal(res.df, xp.asarray(n-1)) |
|
|
| def test_1samp_ci_iv(self, xp): |
| |
| res = stats.ttest_1samp(xp.arange(10.), 0.) |
| message = '`confidence_level` must be a number between 0 and 1.' |
| with pytest.raises(ValueError, match=message): |
| res.confidence_interval(confidence_level=10) |
|
|
| @pytest.mark.xslow |
| @hypothesis.given(alpha=hypothesis.strategies.floats(1e-15, 1-1e-15), |
| data_axis=ttest_data_axis_strategy()) |
| @pytest.mark.parametrize('alternative', ['less', 'greater']) |
| def test_pvalue_ci(self, alpha, data_axis, alternative, xp): |
| |
| data, axis = data_axis |
| data = xp.asarray(data) |
| res = stats.ttest_1samp(data, 0., |
| alternative=alternative, axis=axis) |
| l, u = res.confidence_interval(confidence_level=alpha) |
| popmean = l if alternative == 'greater' else u |
| xp_test = array_namespace(l) |
| popmean = xp_test.expand_dims(popmean, axis=axis) |
| res = stats.ttest_1samp(data, popmean, alternative=alternative, axis=axis) |
| shape = list(data.shape) |
| shape.pop(axis) |
| |
| ref = xp.broadcast_to(xp.asarray(1-alpha, dtype=xp.float64), shape) |
| xp_assert_close(res.pvalue, ref) |
|
|
|
|
| class TestPercentileOfScore: |
|
|
| def f(self, *args, **kwargs): |
| return stats.percentileofscore(*args, **kwargs) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 40), |
| ("mean", 35), |
| ("strict", 30), |
| ("weak", 40)]) |
| def test_unique(self, kind, result): |
| a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] |
| assert_equal(self.f(a, 4, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 45), |
| ("mean", 40), |
| ("strict", 30), |
| ("weak", 50)]) |
| def test_multiple2(self, kind, result): |
| a = [1, 2, 3, 4, 4, 5, 6, 7, 8, 9] |
| assert_equal(self.f(a, 4, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 50), |
| ("mean", 45), |
| ("strict", 30), |
| ("weak", 60)]) |
| def test_multiple3(self, kind, result): |
| a = [1, 2, 3, 4, 4, 4, 5, 6, 7, 8] |
| assert_equal(self.f(a, 4, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 30), |
| ("mean", 30), |
| ("strict", 30), |
| ("weak", 30)]) |
| def test_missing(self, kind, result): |
| a = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11] |
| assert_equal(self.f(a, 4, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 40), |
| ("mean", 35), |
| ("strict", 30), |
| ("weak", 40)]) |
| def test_large_numbers(self, kind, result): |
| a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] |
| assert_equal(self.f(a, 40, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 50), |
| ("mean", 45), |
| ("strict", 30), |
| ("weak", 60)]) |
| def test_large_numbers_multiple3(self, kind, result): |
| a = [10, 20, 30, 40, 40, 40, 50, 60, 70, 80] |
| assert_equal(self.f(a, 40, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", 30), |
| ("mean", 30), |
| ("strict", 30), |
| ("weak", 30)]) |
| def test_large_numbers_missing(self, kind, result): |
| a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110] |
| assert_equal(self.f(a, 40, kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100, 100]), |
| ("mean", [0, 5, 95, 100]), |
| ("strict", [0, 0, 90, 100]), |
| ("weak", [0, 10, 100, 100])]) |
| def test_boundaries(self, kind, result): |
| a = [10, 20, 30, 50, 60, 70, 80, 90, 100, 110] |
| assert_equal(self.f(a, [0, 10, 110, 200], kind=kind), result) |
|
|
| @pytest.mark.parametrize("kind, result", [("rank", [0, 10, 100]), |
| ("mean", [0, 5, 95]), |
| ("strict", [0, 0, 90]), |
| ("weak", [0, 10, 100])]) |
| def test_inf(self, kind, result): |
| a = [1, 2, 3, 4, 5, 6, 7, 8, 9, +np.inf] |
| assert_equal(self.f(a, [-np.inf, 1, +np.inf], kind=kind), result) |
|
|
| cases = [("propagate", [], 1, np.nan), |
| ("propagate", [np.nan], 1, np.nan), |
| ("propagate", [np.nan], [0, 1, 2], [np.nan, np.nan, np.nan]), |
| ("propagate", [1, 2], [1, 2, np.nan], [50, 100, np.nan]), |
| ("omit", [1, 2, np.nan], [0, 1, 2], [0, 50, 100]), |
| ("omit", [1, 2], [0, 1, np.nan], [0, 50, np.nan]), |
| ("omit", [np.nan, np.nan], [0, 1, 2], [np.nan, np.nan, np.nan])] |
|
|
| @pytest.mark.parametrize("policy, a, score, result", cases) |
| def test_nans_ok(self, policy, a, score, result): |
| assert_equal(self.f(a, score, nan_policy=policy), result) |
|
|
| cases = [ |
| ("raise", [1, 2, 3, np.nan], [1, 2, 3], |
| "The input contains nan values"), |
| ("raise", [1, 2, 3], [1, 2, 3, np.nan], |
| "The input contains nan values"), |
| ] |
|
|
| @pytest.mark.parametrize("policy, a, score, message", cases) |
| def test_nans_fail(self, policy, a, score, message): |
| with assert_raises(ValueError, match=message): |
| self.f(a, score, nan_policy=policy) |
|
|
| @pytest.mark.parametrize("shape", [ |
| (6, ), |
| (2, 3), |
| (2, 1, 3), |
| (2, 1, 1, 3), |
| ]) |
| def test_nd(self, shape): |
| a = np.array([0, 1, 2, 3, 4, 5]) |
| scores = a.reshape(shape) |
| results = scores*10 |
| a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] |
| assert_equal(self.f(a, scores, kind="rank"), results) |
|
|
|
|
| PowerDivCase = namedtuple('Case', |
| ['f_obs', 'f_exp', 'ddof', 'axis', |
| 'chi2', |
| 'log', |
| 'mod_log', |
| 'cr', |
| ]) |
|
|
| |
| |
| |
| power_div_1d_cases = [ |
| |
| PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=None, ddof=0, axis=None, |
| chi2=4, |
| log=2*(4*np.log(4/8) + 12*np.log(12/8)), |
| mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)), |
| cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)), |
| |
| PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=[2, 16, 12, 2], ddof=0, axis=None, |
| chi2=24, |
| log=2*(4*np.log(4/2) + 8*np.log(8/16) + 8*np.log(8/2)), |
| mod_log=2*(2*np.log(2/4) + 16*np.log(16/8) + 2*np.log(2/8)), |
| cr=(4*((4/2)**(2/3) - 1) + 8*((8/16)**(2/3) - 1) + |
| 8*((8/2)**(2/3) - 1))/(5/9)), |
| |
| PowerDivCase(f_obs=[4, 8, 12, 8], f_exp=8, ddof=0, axis=None, |
| chi2=4, |
| log=2*(4*np.log(4/8) + 12*np.log(12/8)), |
| mod_log=2*(8*np.log(8/4) + 8*np.log(8/12)), |
| cr=(4*((4/8)**(2/3) - 1) + 12*((12/8)**(2/3) - 1))/(5/9)), |
| |
| PowerDivCase(f_obs=[3, 5, 7, 9], f_exp=[3, 5, 7, 9], ddof=0, axis=0, |
| chi2=0, log=0, mod_log=0, cr=0), |
| ] |
|
|
|
|
| power_div_empty_cases = [ |
| |
| |
| PowerDivCase(f_obs=[], |
| f_exp=None, ddof=0, axis=0, |
| chi2=0, log=0, mod_log=0, cr=0), |
| |
| |
| PowerDivCase(f_obs=np.array([[],[],[]]).T, |
| f_exp=None, ddof=0, axis=0, |
| chi2=[0, 0, 0], |
| log=[0, 0, 0], |
| mod_log=[0, 0, 0], |
| cr=[0, 0, 0]), |
| |
| |
| |
| PowerDivCase(f_obs=np.array([[],[],[]]), |
| f_exp=None, ddof=0, axis=0, |
| chi2=[], |
| log=[], |
| mod_log=[], |
| cr=[]), |
| ] |
|
|
|
|
| @array_api_compatible |
| class TestPowerDivergence: |
|
|
| def check_power_divergence(self, f_obs, f_exp, ddof, axis, lambda_, |
| expected_stat, xp): |
| dtype = xp.asarray(1.).dtype |
|
|
| f_obs = xp.asarray(f_obs, dtype=dtype) |
| f_exp = xp.asarray(f_exp, dtype=dtype) if f_exp is not None else f_exp |
|
|
| if axis is None: |
| num_obs = xp_size(f_obs) |
| else: |
| xp_test = array_namespace(f_obs) |
| arrays = (xp_test.broadcast_arrays(f_obs, f_exp) if f_exp is not None |
| else (f_obs,)) |
| num_obs = arrays[0].shape[axis] |
|
|
| with suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "Mean of empty slice") |
| stat, p = stats.power_divergence( |
| f_obs=f_obs, f_exp=f_exp, ddof=ddof, |
| axis=axis, lambda_=lambda_) |
| xp_assert_close(stat, xp.asarray(expected_stat, dtype=dtype)) |
|
|
| if lambda_ == 1 or lambda_ == "pearson": |
| |
| stat, p = stats.chisquare(f_obs=f_obs, f_exp=f_exp, ddof=ddof, |
| axis=axis) |
| xp_assert_close(stat, xp.asarray(expected_stat, dtype=dtype)) |
|
|
| ddof = np.asarray(ddof) |
| expected_p = stats.distributions.chi2.sf(expected_stat, |
| num_obs - 1 - ddof) |
| xp_assert_close(p, xp.asarray(expected_p, dtype=dtype)) |
|
|
| @pytest.mark.parametrize('case', power_div_1d_cases) |
| @pytest.mark.parametrize('lambda_stat', |
| [(None, 'chi2'), ('pearson', 'chi2'), (1, 'chi2'), |
| ('log-likelihood', 'log'), ('mod-log-likelihood', 'mod_log'), |
| ('cressie-read', 'cr'), (2/3, 'cr')]) |
| def test_basic(self, case, lambda_stat, xp): |
| lambda_, attr = lambda_stat |
| expected_stat = getattr(case, attr) |
| self.check_power_divergence(case.f_obs, case.f_exp, case.ddof, case.axis, |
| lambda_, expected_stat, xp) |
|
|
| def test_axis(self, xp): |
| case0 = power_div_1d_cases[0] |
| case1 = power_div_1d_cases[1] |
| f_obs = np.vstack((case0.f_obs, case1.f_obs)) |
| f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs), |
| case1.f_exp)) |
| |
| |
| f_obs = xp.asarray(f_obs) |
| f_exp = xp.asarray(f_exp) if f_exp is not None else f_exp |
| self.check_power_divergence( |
| f_obs, f_exp, 0, 1, |
| "pearson", [case0.chi2, case1.chi2], xp=xp) |
| self.check_power_divergence( |
| f_obs, f_exp, 0, 1, |
| "log-likelihood", [case0.log, case1.log], xp=xp) |
| self.check_power_divergence( |
| f_obs, f_exp, 0, 1, |
| "mod-log-likelihood", [case0.mod_log, case1.mod_log], xp=xp) |
| self.check_power_divergence( |
| f_obs, f_exp, 0, 1, |
| "cressie-read", [case0.cr, case1.cr], xp=xp) |
| |
| |
| f_obs_reshape = xp.reshape(xp.asarray(case0.f_obs), (2, 2)) |
| self.check_power_divergence( |
| f_obs_reshape, None, 0, None, |
| "pearson", case0.chi2, xp=xp) |
|
|
| def test_ddof_broadcasting(self, xp): |
| |
| |
| |
| |
|
|
| case0 = power_div_1d_cases[0] |
| case1 = power_div_1d_cases[1] |
| |
| f_obs = np.vstack((case0.f_obs, case1.f_obs)).T |
| f_exp = np.vstack((np.ones_like(case0.f_obs)*np.mean(case0.f_obs), |
| case1.f_exp)).T |
|
|
| expected_chi2 = [case0.chi2, case1.chi2] |
|
|
| dtype = xp.asarray(1.).dtype |
| f_obs = xp.asarray(f_obs, dtype=dtype) |
| f_exp = xp.asarray(f_exp, dtype=dtype) |
| expected_chi2 = xp.asarray(expected_chi2, dtype=dtype) |
|
|
| |
| |
| ddof = xp.asarray([[0], [1]]) |
|
|
| stat, p = stats.power_divergence(f_obs, f_exp, ddof=ddof) |
| xp_assert_close(stat, expected_chi2) |
|
|
| |
| stat0, p0 = stats.power_divergence(f_obs, f_exp, ddof=ddof[0, 0]) |
| stat1, p1 = stats.power_divergence(f_obs, f_exp, ddof=ddof[1, 0]) |
|
|
| xp_test = array_namespace(f_obs) |
| expected_p = xp_test.concat((p0[xp_test.newaxis, :], |
| p1[xp_test.newaxis, :]), |
| axis=0) |
| xp_assert_close(p, expected_p) |
|
|
| @pytest.mark.parametrize('case', power_div_empty_cases) |
| @pytest.mark.parametrize('lambda_stat', |
| [('pearson', 'chi2'), ('log-likelihood', 'log'), |
| ('mod-log-likelihood', 'mod_log'), |
| ('cressie-read', 'cr'), (2/3, 'cr')]) |
| def test_empty_cases(self, case, lambda_stat, xp): |
| lambda_, attr = lambda_stat |
| expected_stat = getattr(case, attr) |
| with warnings.catch_warnings(): |
| self.check_power_divergence( |
| case.f_obs, case.f_exp, case.ddof, case.axis, |
| lambda_, expected_stat, xp) |
|
|
| def test_power_divergence_result_attributes(self, xp): |
| f_obs = power_div_1d_cases[0].f_obs |
| f_exp = power_div_1d_cases[0].f_exp |
| ddof = power_div_1d_cases[0].ddof |
| axis = power_div_1d_cases[0].axis |
| dtype = xp.asarray(1.).dtype |
| f_obs = xp.asarray(f_obs, dtype=dtype) |
| |
|
|
| res = stats.power_divergence(f_obs=f_obs, f_exp=f_exp, ddof=ddof, |
| axis=axis, lambda_="pearson") |
| attributes = ('statistic', 'pvalue') |
| check_named_results(res, attributes, xp=xp) |
|
|
| def test_power_divergence_gh_12282(self, xp): |
| |
| f_obs = xp.asarray([[10., 20.], [30., 20.]]) |
| f_exp = xp.asarray([[5., 15.], [35., 25.]]) |
| message = 'For each axis slice...' |
| with pytest.raises(ValueError, match=message): |
| stats.power_divergence(f_obs=f_obs, f_exp=xp.asarray([30., 60.])) |
| with pytest.raises(ValueError, match=message): |
| stats.power_divergence(f_obs=f_obs, f_exp=f_exp, axis=1) |
| stat, pval = stats.power_divergence(f_obs=f_obs, f_exp=f_exp) |
| xp_assert_close(stat, xp.asarray([5.71428571, 2.66666667])) |
| xp_assert_close(pval, xp.asarray([0.01682741, 0.10247043])) |
|
|
| def test_power_divergence_against_cressie_read_data(self, xp): |
| |
| |
| |
| |
|
|
| |
| |
| |
| obs = xp.asarray([15., 11., 14., 17., 5., 11., 10., 4., 8., |
| 10., 7., 9., 11., 3., 6., 1., 1., 4.]) |
| beta = -0.083769 |
| i = xp.arange(1., obs.shape[0] + 1.) |
| alpha = xp.log(xp.sum(obs) / xp.sum(xp.exp(beta*i))) |
| expected_counts = xp.exp(alpha + beta*i) |
|
|
| |
| xp_test = array_namespace(obs) |
| table4 = xp_test.concat((obs[xp_test.newaxis, :], |
| expected_counts[xp_test.newaxis, :])).T |
|
|
| table5 = xp.asarray([ |
| |
| -10.0, 72.2e3, |
| -5.0, 28.9e1, |
| -3.0, 65.6, |
| -2.0, 40.6, |
| -1.5, 34.0, |
| -1.0, 29.5, |
| -0.5, 26.5, |
| 0.0, 24.6, |
| 0.5, 23.4, |
| 0.67, 23.1, |
| 1.0, 22.7, |
| 1.5, 22.6, |
| 2.0, 22.9, |
| 3.0, 24.8, |
| 5.0, 35.5, |
| 10.0, 21.4e1, |
| ]) |
| table5 = xp.reshape(table5, (-1, 2)) |
|
|
| for i in range(table5.shape[0]): |
| lambda_, expected_stat = table5[i, 0], table5[i, 1] |
| stat, p = stats.power_divergence(table4[:,0], table4[:,1], |
| lambda_=lambda_) |
| xp_assert_close(stat, expected_stat, rtol=5e-3) |
|
|
|
|
| @array_api_compatible |
| class TestChisquare: |
| def test_chisquare_12282a(self, xp): |
| |
| |
| |
| with assert_raises(ValueError, match='For each axis slice...'): |
| f_obs = xp.asarray([10., 20.]) |
| f_exp = xp.asarray([30., 60.]) |
| stats.chisquare(f_obs=f_obs, f_exp=f_exp) |
|
|
| def test_chisquare_12282b(self, xp): |
| |
| |
| |
| rng = np.random.default_rng(3843874358728234) |
| n = 10 |
| lam = rng.uniform(1000, 2000, size=n) |
| x = rng.poisson(lam) |
| lam = xp.asarray(lam) |
| x = xp.asarray(x, dtype=lam.dtype) |
| res = stats.chisquare(f_obs=x, f_exp=lam, ddof=-1, sum_check=False) |
| |
| z = (x - lam) / xp.sqrt(lam) |
| statistic = xp.sum(z**2) |
| xp_assert_close(res.statistic, statistic) |
| |
| X2 = _SimpleChi2(xp.asarray(n, dtype=statistic.dtype)) |
| xp_assert_close(res.pvalue, X2.sf(statistic)) |
|
|
| @pytest.mark.parametrize("n, dtype", [(200, 'uint8'), (1000000, 'int32')]) |
| def test_chiquare_data_types_attributes(self, n, dtype, xp): |
| |
| dtype = getattr(xp, dtype) |
| obs = xp.asarray([n, 0], dtype=dtype) |
| exp = xp.asarray([n // 2, n // 2], dtype=dtype) |
| res = stats.chisquare(obs, exp) |
| stat, p = res |
| xp_assert_close(stat, xp.asarray(n, dtype=xp.asarray(1.).dtype), rtol=1e-13) |
| |
| xp_assert_equal(res.statistic, stat) |
| xp_assert_equal(res.pvalue, p) |
|
|
|
|
| @skip_xp_invalid_arg |
| class TestChisquareMA: |
| @pytest.mark.filterwarnings('ignore::DeprecationWarning') |
| def test_chisquare_masked_arrays(self): |
| |
| obs = np.array([[8, 8, 16, 32, -1], [-1, -1, 3, 4, 5]]).T |
| mask = np.array([[0, 0, 0, 0, 1], [1, 1, 0, 0, 0]]).T |
| mobs = np.ma.masked_array(obs, mask) |
| expected_chisq = np.array([24.0, 0.5]) |
| expected_g = np.array([2*(2*8*np.log(0.5) + 32*np.log(2.0)), |
| 2*(3*np.log(0.75) + 5*np.log(1.25))]) |
|
|
| chi2 = stats.distributions.chi2 |
|
|
| chisq, p = stats.chisquare(mobs) |
| mat.assert_array_equal(chisq, expected_chisq) |
| mat.assert_array_almost_equal(p, chi2.sf(expected_chisq, |
| mobs.count(axis=0) - 1)) |
|
|
| g, p = stats.power_divergence(mobs, lambda_='log-likelihood') |
| mat.assert_array_almost_equal(g, expected_g, decimal=15) |
| mat.assert_array_almost_equal(p, chi2.sf(expected_g, |
| mobs.count(axis=0) - 1)) |
|
|
| chisq, p = stats.chisquare(mobs.T, axis=1) |
| mat.assert_array_equal(chisq, expected_chisq) |
| mat.assert_array_almost_equal(p, chi2.sf(expected_chisq, |
| mobs.T.count(axis=1) - 1)) |
| g, p = stats.power_divergence(mobs.T, axis=1, lambda_="log-likelihood") |
| mat.assert_array_almost_equal(g, expected_g, decimal=15) |
| mat.assert_array_almost_equal(p, chi2.sf(expected_g, |
| mobs.count(axis=0) - 1)) |
|
|
| obs1 = np.ma.array([3, 5, 6, 99, 10], mask=[0, 0, 0, 1, 0]) |
| exp1 = np.ma.array([2, 4, 8, 10, 99], mask=[0, 0, 0, 0, 1]) |
| chi2, p = stats.chisquare(obs1, f_exp=exp1) |
| |
| |
| |
| mat.assert_array_equal(chi2, 1/2 + 1/4 + 4/8) |
|
|
| |
| chisq, p = stats.chisquare(np.ma.array([1,2,3]), axis=None) |
| assert_(isinstance(chisq, np.float64)) |
| assert_(isinstance(p, np.float64)) |
| assert_equal(chisq, 1.0) |
| assert_almost_equal(p, stats.distributions.chi2.sf(1.0, 2)) |
|
|
| |
| |
| with np.errstate(invalid='ignore'): |
| with suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "Mean of empty slice") |
| chisq, p = stats.chisquare(np.ma.array([])) |
| assert_(isinstance(chisq, np.ma.MaskedArray)) |
| assert_equal(chisq.shape, ()) |
| assert_(chisq.mask) |
|
|
| empty3 = np.ma.array([[],[],[]]) |
|
|
| |
| |
| chisq, p = stats.chisquare(empty3) |
| assert_(isinstance(chisq, np.ma.MaskedArray)) |
| mat.assert_array_equal(chisq, []) |
|
|
| |
| |
| with np.errstate(invalid='ignore'): |
| with suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "Mean of empty slice") |
| chisq, p = stats.chisquare(empty3.T) |
|
|
| assert_(isinstance(chisq, np.ma.MaskedArray)) |
| assert_equal(chisq.shape, (3,)) |
| assert_(np.all(chisq.mask)) |
|
|
| def test_deprecation_warning(self): |
| a = np.asarray([1., 2., 3.]) |
| ma = np.ma.masked_array(a) |
| message = "`power_divergence` and `chisquare` support for masked..." |
| with pytest.warns(DeprecationWarning, match=message): |
| stats.chisquare(ma) |
| with pytest.warns(DeprecationWarning, match=message): |
| stats.chisquare(a, ma) |
|
|
|
|
| def test_friedmanchisquare(): |
| |
| |
| |
| |
| x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661, 0.583, |
| 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]), |
| array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668, 0.583, |
| 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]), |
| array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609, 0.563, |
| 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]), |
| array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685, 0.625, |
| 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])] |
|
|
| |
| x2 = [array([4,3,5,3,5,3,2,5,4,4,4,3]), |
| array([2,2,1,2,3,1,2,3,2,1,1,3]), |
| array([2,4,3,3,4,3,3,4,4,1,2,1]), |
| array([3,5,4,3,4,4,3,3,3,4,4,4])] |
|
|
| |
| |
| |
| |
| x3 = [array([7.0,9.9,8.5,5.1,10.3]), |
| array([5.3,5.7,4.7,3.5,7.7]), |
| array([4.9,7.6,5.5,2.8,8.4]), |
| array([8.8,8.9,8.1,3.3,9.1])] |
|
|
| assert_array_almost_equal(stats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]), |
| (10.2283464566929, 0.0167215803284414)) |
| assert_array_almost_equal(stats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]), |
| (18.9428571428571, 0.000280938375189499)) |
| assert_array_almost_equal(stats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]), |
| (10.68, 0.0135882729582176)) |
| assert_raises(ValueError, stats.friedmanchisquare,x3[0],x3[1]) |
|
|
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.friedmanchisquare(*x1) |
| check_named_results(res, attributes) |
|
|
| |
| assert_array_almost_equal(mstats.friedmanchisquare(x1[0], x1[1], |
| x1[2], x1[3]), |
| (10.2283464566929, 0.0167215803284414)) |
| |
| |
| |
| assert_array_almost_equal(mstats.friedmanchisquare(x3[0], x3[1], |
| x3[2], x3[3]), |
| (10.68, 0.0135882729582176)) |
| assert_raises(ValueError, mstats.friedmanchisquare,x3[0],x3[1]) |
|
|
|
|
| class TestKSTest: |
| """Tests kstest and ks_1samp agree with K-S various sizes, alternatives, modes.""" |
|
|
| def _testOne(self, x, alternative, expected_statistic, expected_prob, |
| mode='auto', decimal=14): |
| result = stats.kstest(x, 'norm', alternative=alternative, mode=mode) |
| expected = np.array([expected_statistic, expected_prob]) |
| assert_array_almost_equal(np.array(result), expected, decimal=decimal) |
|
|
| def _test_kstest_and_ks1samp(self, x, alternative, mode='auto', decimal=14): |
| result = stats.kstest(x, 'norm', alternative=alternative, mode=mode) |
| result_1samp = stats.ks_1samp(x, stats.norm.cdf, |
| alternative=alternative, mode=mode) |
| assert_array_almost_equal(np.array(result), result_1samp, decimal=decimal) |
|
|
| def test_namedtuple_attributes(self): |
| x = np.linspace(-1, 1, 9) |
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.kstest(x, 'norm') |
| check_named_results(res, attributes) |
|
|
| def test_agree_with_ks_1samp(self): |
| x = np.linspace(-1, 1, 9) |
| self._test_kstest_and_ks1samp(x, 'two-sided') |
|
|
| x = np.linspace(-15, 15, 9) |
| self._test_kstest_and_ks1samp(x, 'two-sided') |
|
|
| x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99] |
| self._test_kstest_and_ks1samp(x, 'two-sided') |
| self._test_kstest_and_ks1samp(x, 'greater', mode='exact') |
| self._test_kstest_and_ks1samp(x, 'less', mode='exact') |
|
|
| def test_pm_inf_gh20386(self): |
| |
| |
| vals = [-np.inf, 0, 1, np.inf] |
| res = stats.kstest(vals, stats.cauchy.cdf) |
| ref = stats.kstest(vals, stats.cauchy.cdf, _no_deco=True) |
| assert np.all(np.isfinite(res)) |
| assert_equal(res, ref) |
| assert not np.isnan(res.statistic) |
| assert not np.isnan(res.pvalue) |
|
|
| |
|
|
|
|
| class TestKSOneSample: |
| """ |
| Tests kstest and ks_samp 1-samples with K-S various sizes, alternatives, modes. |
| """ |
|
|
| def _testOne(self, x, alternative, expected_statistic, expected_prob, |
| mode='auto', decimal=14): |
| result = stats.ks_1samp(x, stats.norm.cdf, alternative=alternative, mode=mode) |
| expected = np.array([expected_statistic, expected_prob]) |
| assert_array_almost_equal(np.array(result), expected, decimal=decimal) |
|
|
| def test_namedtuple_attributes(self): |
| x = np.linspace(-1, 1, 9) |
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.ks_1samp(x, stats.norm.cdf) |
| check_named_results(res, attributes) |
|
|
| def test_agree_with_r(self): |
| |
| x = np.linspace(-1, 1, 9) |
| self._testOne(x, 'two-sided', 0.15865525393145705, 0.95164069201518386) |
|
|
| x = np.linspace(-15, 15, 9) |
| self._testOne(x, 'two-sided', 0.44435602715924361, 0.038850140086788665) |
|
|
| x = [-1.23, 0.06, -0.60, 0.17, 0.66, -0.17, -0.08, 0.27, -0.98, -0.99] |
| self._testOne(x, 'two-sided', 0.293580126801961, 0.293408463684361) |
| self._testOne(x, 'greater', 0.293580126801961, 0.146988835042376, mode='exact') |
| self._testOne(x, 'less', 0.109348552425692, 0.732768892470675, mode='exact') |
|
|
| def test_known_examples(self): |
| |
| x = stats.norm.rvs(loc=0.2, size=100, random_state=987654321) |
| self._testOne(x, 'two-sided', 0.12464329735846891, 0.089444888711820769, |
| mode='asymp') |
| self._testOne(x, 'less', 0.12464329735846891, 0.040989164077641749) |
| self._testOne(x, 'greater', 0.0072115233216310994, 0.98531158590396228) |
|
|
| def test_ks1samp_allpaths(self): |
| |
| assert_(np.isnan(kolmogn(np.nan, 1, True))) |
| with assert_raises(ValueError, match='n is not integral: 1.5'): |
| kolmogn(1.5, 1, True) |
| assert_(np.isnan(kolmogn(-1, 1, True))) |
|
|
| dataset = np.asarray([ |
| |
| (101, 1, True, 1.0), |
| (101, 1.1, True, 1.0), |
| (101, 0, True, 0.0), |
| (101, -0.1, True, 0.0), |
|
|
| (32, 1.0 / 64, True, 0.0), |
| (32, 1.0 / 64, False, 1.0), |
|
|
| |
| (32, 0.5, True, 0.9999999363163307), |
| |
| (32, 0.5, False, 6.368366937916623e-08), |
|
|
| |
| (32, 1.0 / 8, True, 0.34624229979775223), |
| (32, 1.0 / 4, True, 0.9699508336558085), |
| (1600, 0.49, False, 0.0), |
| |
| (1600, 1 / 16.0, False, 7.0837876229702195e-06), |
| |
| (1600, 14 / 1600, False, 0.99962357317602), |
| |
| (1600, 1 / 32, False, 0.08603386296651416), |
| ]) |
| FuncData(kolmogn, dataset, (0, 1, 2), 3).check(dtypes=[int, float, bool]) |
|
|
| @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_1samp]) |
| @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign", |
| [('greater', 6, 6, +1), |
| ('less', 7, 7, -1), |
| ('two-sided', 6, 6, +1), |
| ('two-sided', 7, 7, -1)]) |
| def test_location_sign(self, ksfunc, alternative, |
| x6val, ref_location, ref_sign): |
| |
| |
| x = np.arange(10) + 0.5 |
| x[6] = x6val |
| cdf = stats.uniform(scale=10).cdf |
| res = ksfunc(x, cdf, alternative=alternative) |
| assert_allclose(res.statistic, 0.1, rtol=1e-15) |
| assert res.statistic_location == ref_location |
| assert res.statistic_sign == ref_sign |
|
|
| |
|
|
|
|
| class TestKSTwoSamples: |
| """Tests 2-samples with K-S various sizes, alternatives, modes.""" |
|
|
| def _testOne(self, x1, x2, alternative, expected_statistic, expected_prob, |
| mode='auto'): |
| result = stats.ks_2samp(x1, x2, alternative, mode=mode) |
| expected = np.array([expected_statistic, expected_prob]) |
| assert_array_almost_equal(np.array(result), expected) |
|
|
| def testSmall(self): |
| self._testOne([0], [1], 'two-sided', 1.0/1, 1.0) |
| self._testOne([0], [1], 'greater', 1.0/1, 0.5) |
| self._testOne([0], [1], 'less', 0.0/1, 1.0) |
| self._testOne([1], [0], 'two-sided', 1.0/1, 1.0) |
| self._testOne([1], [0], 'greater', 0.0/1, 1.0) |
| self._testOne([1], [0], 'less', 1.0/1, 0.5) |
|
|
| def testTwoVsThree(self): |
| data1 = np.array([1.0, 2.0]) |
| data1p = data1 + 0.01 |
| data1m = data1 - 0.01 |
| data2 = np.array([1.0, 2.0, 3.0]) |
| self._testOne(data1p, data2, 'two-sided', 1.0 / 3, 1.0) |
| self._testOne(data1p, data2, 'greater', 1.0 / 3, 0.7) |
| self._testOne(data1p, data2, 'less', 1.0 / 3, 0.7) |
| self._testOne(data1m, data2, 'two-sided', 2.0 / 3, 0.6) |
| self._testOne(data1m, data2, 'greater', 2.0 / 3, 0.3) |
| self._testOne(data1m, data2, 'less', 0, 1.0) |
|
|
| def testTwoVsFour(self): |
| data1 = np.array([1.0, 2.0]) |
| data1p = data1 + 0.01 |
| data1m = data1 - 0.01 |
| data2 = np.array([1.0, 2.0, 3.0, 4.0]) |
| self._testOne(data1p, data2, 'two-sided', 2.0 / 4, 14.0/15) |
| self._testOne(data1p, data2, 'greater', 2.0 / 4, 8.0/15) |
| self._testOne(data1p, data2, 'less', 1.0 / 4, 12.0/15) |
|
|
| self._testOne(data1m, data2, 'two-sided', 3.0 / 4, 6.0/15) |
| self._testOne(data1m, data2, 'greater', 3.0 / 4, 3.0/15) |
| self._testOne(data1m, data2, 'less', 0, 1.0) |
|
|
| def test100_100(self): |
| x100 = np.linspace(1, 100, 100) |
| x100_2_p1 = x100 + 2 + 0.1 |
| x100_2_m1 = x100 + 2 - 0.1 |
| self._testOne(x100, x100_2_p1, 'two-sided', 3.0 / 100, 0.9999999999962055) |
| self._testOne(x100, x100_2_p1, 'greater', 3.0 / 100, 0.9143290114276248) |
| self._testOne(x100, x100_2_p1, 'less', 0, 1.0) |
| self._testOne(x100, x100_2_m1, 'two-sided', 2.0 / 100, 1.0) |
| self._testOne(x100, x100_2_m1, 'greater', 2.0 / 100, 0.960978450786184) |
| self._testOne(x100, x100_2_m1, 'less', 0, 1.0) |
|
|
| def test100_110(self): |
| x100 = np.linspace(1, 100, 100) |
| x110 = np.linspace(1, 100, 110) |
| x110_20_p1 = x110 + 20 + 0.1 |
| x110_20_m1 = x110 + 20 - 0.1 |
| |
| self._testOne(x100, x110_20_p1, 'two-sided', 232.0 / 1100, 0.015739183865607353) |
| self._testOne(x100, x110_20_p1, 'greater', 232.0 / 1100, 0.007869594319053203) |
| self._testOne(x100, x110_20_p1, 'less', 0, 1) |
| self._testOne(x100, x110_20_m1, 'two-sided', 229.0 / 1100, 0.017803803861026313) |
| self._testOne(x100, x110_20_m1, 'greater', 229.0 / 1100, 0.008901905958245056) |
| self._testOne(x100, x110_20_m1, 'less', 0.0, 1.0) |
|
|
| def testRepeatedValues(self): |
| x2233 = np.array([2] * 3 + [3] * 4 + [5] * 5 + [6] * 4, dtype=int) |
| x3344 = x2233 + 1 |
| x2356 = np.array([2] * 3 + [3] * 4 + [5] * 10 + [6] * 4, dtype=int) |
| x3467 = np.array([3] * 10 + [4] * 2 + [6] * 10 + [7] * 4, dtype=int) |
| self._testOne(x2233, x3344, 'two-sided', 5.0/16, 0.4262934613454952) |
| self._testOne(x2233, x3344, 'greater', 5.0/16, 0.21465428276573786) |
| self._testOne(x2233, x3344, 'less', 0.0/16, 1.0) |
| self._testOne(x2356, x3467, 'two-sided', 190.0/21/26, 0.0919245790168125) |
| self._testOne(x2356, x3467, 'greater', 190.0/21/26, 0.0459633806858544) |
| self._testOne(x2356, x3467, 'less', 70.0/21/26, 0.6121593130022775) |
|
|
| def testEqualSizes(self): |
| data2 = np.array([1.0, 2.0, 3.0]) |
| self._testOne(data2, data2+1, 'two-sided', 1.0/3, 1.0) |
| self._testOne(data2, data2+1, 'greater', 1.0/3, 0.75) |
| self._testOne(data2, data2+1, 'less', 0.0/3, 1.) |
| self._testOne(data2, data2+0.5, 'two-sided', 1.0/3, 1.0) |
| self._testOne(data2, data2+0.5, 'greater', 1.0/3, 0.75) |
| self._testOne(data2, data2+0.5, 'less', 0.0/3, 1.) |
| self._testOne(data2, data2-0.5, 'two-sided', 1.0/3, 1.0) |
| self._testOne(data2, data2-0.5, 'greater', 0.0/3, 1.0) |
| self._testOne(data2, data2-0.5, 'less', 1.0/3, 0.75) |
|
|
| @pytest.mark.slow |
| def testMiddlingBoth(self): |
| |
| n1, n2 = 500, 600 |
| delta = 1.0/n1/n2/2/2 |
| x = np.linspace(1, 200, n1) - delta |
| y = np.linspace(2, 200, n2) |
| self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, |
| mode='auto') |
| self._testOne(x, y, 'two-sided', 2000.0 / n1 / n2, 1.0, |
| mode='asymp') |
| self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, |
| mode='asymp') |
| self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, |
| mode='asymp') |
| with suppress_warnings() as sup: |
| message = "ks_2samp: Exact calculation unsuccessful." |
| sup.filter(RuntimeWarning, message) |
| self._testOne(x, y, 'greater', 2000.0 / n1 / n2, 0.9697596024683929, |
| mode='exact') |
| self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, |
| mode='exact') |
| with warnings.catch_warnings(record=True) as w: |
| warnings.simplefilter("always") |
| self._testOne(x, y, 'less', 500.0 / n1 / n2, 0.9968735843165021, |
| mode='exact') |
| _check_warnings(w, RuntimeWarning, 1) |
|
|
| @pytest.mark.slow |
| def testMediumBoth(self): |
| |
| n1, n2 = 1000, 1100 |
| delta = 1.0/n1/n2/2/2 |
| x = np.linspace(1, 200, n1) - delta |
| y = np.linspace(2, 200, n2) |
| self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, |
| mode='asymp') |
| self._testOne(x, y, 'two-sided', 6600.0 / n1 / n2, 1.0, |
| mode='auto') |
| self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, |
| mode='asymp') |
| self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, |
| mode='asymp') |
|
|
| with suppress_warnings() as sup: |
| message = "ks_2samp: Exact calculation unsuccessful." |
| sup.filter(RuntimeWarning, message) |
| self._testOne(x, y, 'greater', 6600.0 / n1 / n2, 0.9573185808092622, |
| mode='exact') |
| self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, |
| mode='exact') |
| with warnings.catch_warnings(record=True) as w: |
| warnings.simplefilter("always") |
| self._testOne(x, y, 'less', 1000.0 / n1 / n2, 0.9982410869433984, |
| mode='exact') |
| _check_warnings(w, RuntimeWarning, 1) |
|
|
| def testLarge(self): |
| |
| n1, n2 = 10000, 110 |
| lcm = n1*11.0 |
| delta = 1.0/n1/n2/2/2 |
| x = np.linspace(1, 200, n1) - delta |
| y = np.linspace(2, 100, n2) |
| self._testOne(x, y, 'two-sided', 55275.0 / lcm, 4.2188474935755949e-15) |
| self._testOne(x, y, 'greater', 561.0 / lcm, 0.99115454582047591) |
| self._testOne(x, y, 'less', 55275.0 / lcm, 3.1317328311518713e-26) |
|
|
| def test_gh11184(self): |
| |
| np.random.seed(123456) |
| x = np.random.normal(size=3000) |
| y = np.random.normal(size=3001) * 1.5 |
| self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15, |
| mode='asymp') |
| self._testOne(x, y, 'two-sided', 0.11292880151060758, 2.7755575615628914e-15, |
| mode='exact') |
|
|
| @pytest.mark.xslow |
| def test_gh11184_bigger(self): |
| |
| np.random.seed(123456) |
| x = np.random.normal(size=10000) |
| y = np.random.normal(size=10001) * 1.5 |
| self._testOne(x, y, 'two-sided', 0.10597913208679133, 3.3149311398483503e-49, |
| mode='asymp') |
| self._testOne(x, y, 'two-sided', 0.10597913208679133, 2.7755575615628914e-15, |
| mode='exact') |
| self._testOne(x, y, 'greater', 0.10597913208679133, 2.7947433906389253e-41, |
| mode='asymp') |
| self._testOne(x, y, 'less', 0.09658002199780022, 2.7947433906389253e-41, |
| mode='asymp') |
|
|
| @pytest.mark.xslow |
| def test_gh12999(self): |
| np.random.seed(123456) |
| for x in range(1000, 12000, 1000): |
| vals1 = np.random.normal(size=(x)) |
| vals2 = np.random.normal(size=(x + 10), loc=0.5) |
| exact = stats.ks_2samp(vals1, vals2, mode='exact').pvalue |
| asymp = stats.ks_2samp(vals1, vals2, mode='asymp').pvalue |
| |
| assert_array_less(exact, 3 * asymp) |
| assert_array_less(asymp, 3 * exact) |
|
|
| @pytest.mark.slow |
| def testLargeBoth(self): |
| |
| n1, n2 = 10000, 11000 |
| lcm = n1*11.0 |
| delta = 1.0/n1/n2/2/2 |
| x = np.linspace(1, 200, n1) - delta |
| y = np.linspace(2, 200, n2) |
| self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576, |
| mode='asymp') |
| self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990456491488628, |
| mode='exact') |
| self._testOne(x, y, 'two-sided', 563.0 / lcm, 0.9990660108966576, |
| mode='auto') |
| self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673) |
| self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724) |
| with suppress_warnings() as sup: |
| message = "ks_2samp: Exact calculation unsuccessful." |
| sup.filter(RuntimeWarning, message) |
| self._testOne(x, y, 'greater', 563.0 / lcm, 0.7561851877420673, |
| mode='exact') |
| self._testOne(x, y, 'less', 10.0 / lcm, 0.9998239693191724, |
| mode='exact') |
|
|
| def testNamedAttributes(self): |
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.ks_2samp([1, 2], [3]) |
| check_named_results(res, attributes) |
|
|
| @pytest.mark.slow |
| def test_some_code_paths(self): |
| |
| from scipy.stats._stats_py import ( |
| _count_paths_outside_method, |
| _compute_outer_prob_inside_method |
| ) |
|
|
| _compute_outer_prob_inside_method(1, 1, 1, 1) |
| _count_paths_outside_method(1000, 1, 1, 1001) |
|
|
| with np.errstate(invalid='raise'): |
| assert_raises(FloatingPointError, _count_paths_outside_method, |
| 1100, 1099, 1, 1) |
| assert_raises(FloatingPointError, _count_paths_outside_method, |
| 2000, 1000, 1, 1) |
|
|
| @pytest.mark.parametrize('case', (([], [1]), ([1], []), ([], []))) |
| def test_argument_checking(self, case): |
| |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.ks_2samp(*case) |
| assert_equal(res.statistic, np.nan) |
| assert_equal(res.pvalue, np.nan) |
|
|
| @pytest.mark.xslow |
| def test_gh12218(self): |
| """Ensure gh-12218 is fixed.""" |
| |
| |
| np.random.seed(12345678) |
| n1 = 2097152 |
| rvs1 = stats.uniform.rvs(size=n1, loc=0., scale=1) |
| rvs2 = rvs1 + 1 |
| stats.ks_2samp(rvs1, rvs2, alternative='greater', mode='asymp') |
| stats.ks_2samp(rvs1, rvs2, alternative='less', mode='asymp') |
| stats.ks_2samp(rvs1, rvs2, alternative='two-sided', mode='asymp') |
|
|
| def test_warnings_gh_14019(self): |
| |
| |
| rng = np.random.RandomState(seed=23493549) |
| |
| data1 = rng.random(size=881) + 0.5 |
| data2 = rng.random(size=369) |
| message = "ks_2samp: Exact calculation unsuccessful" |
| with pytest.warns(RuntimeWarning, match=message): |
| res = stats.ks_2samp(data1, data2, alternative='less') |
| assert_allclose(res.pvalue, 0, atol=1e-14) |
|
|
| @pytest.mark.parametrize("ksfunc", [stats.kstest, stats.ks_2samp]) |
| @pytest.mark.parametrize("alternative, x6val, ref_location, ref_sign", |
| [('greater', 5.9, 5.9, +1), |
| ('less', 6.1, 6.0, -1), |
| ('two-sided', 5.9, 5.9, +1), |
| ('two-sided', 6.1, 6.0, -1)]) |
| def test_location_sign(self, ksfunc, alternative, |
| x6val, ref_location, ref_sign): |
| |
| |
| x = np.arange(10, dtype=np.float64) |
| y = x.copy() |
| x[6] = x6val |
| res = stats.ks_2samp(x, y, alternative=alternative) |
| assert res.statistic == 0.1 |
| assert res.statistic_location == ref_location |
| assert res.statistic_sign == ref_sign |
|
|
|
|
| def test_ttest_rel(): |
| |
| tr,pr = 0.81248591389165692, 0.41846234511362157 |
| tpr = ([tr,-tr],[pr,pr]) |
|
|
| rvs1 = np.linspace(1,100,100) |
| rvs2 = np.linspace(1.01,99.989,100) |
| rvs1_2D = np.array([np.linspace(1,100,100), np.linspace(1.01,99.989,100)]) |
| rvs2_2D = np.array([np.linspace(1.01,99.989,100), np.linspace(1,100,100)]) |
|
|
| t,p = stats.ttest_rel(rvs1, rvs2, axis=0) |
| assert_array_almost_equal([t,p],(tr,pr)) |
| t,p = stats.ttest_rel(rvs1_2D.T, rvs2_2D.T, axis=0) |
| assert_array_almost_equal([t,p],tpr) |
| t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1) |
| assert_array_almost_equal([t,p],tpr) |
|
|
| |
| with suppress_warnings() as sup, \ |
| np.errstate(invalid="ignore", divide="ignore"): |
| sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") |
| t, p = stats.ttest_rel(4., 3.) |
| assert_(np.isnan(t)) |
| assert_(np.isnan(p)) |
|
|
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.ttest_rel(rvs1, rvs2, axis=0) |
| check_named_results(res, attributes) |
|
|
| |
| rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D]) |
| rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D]) |
| t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1) |
| assert_array_almost_equal(np.abs(t), tr) |
| assert_array_almost_equal(np.abs(p), pr) |
| assert_equal(t.shape, (2, 3)) |
|
|
| t, p = stats.ttest_rel(np.moveaxis(rvs1_3D, 2, 0), |
| np.moveaxis(rvs2_3D, 2, 0), |
| axis=2) |
| assert_array_almost_equal(np.abs(t), tr) |
| assert_array_almost_equal(np.abs(p), pr) |
| assert_equal(t.shape, (3, 2)) |
|
|
| |
| assert_raises(ValueError, stats.ttest_rel, rvs1, rvs2, alternative="error") |
|
|
| t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="less") |
| assert_allclose(p, 1 - pr/2) |
| assert_allclose(t, tr) |
|
|
| t, p = stats.ttest_rel(rvs1, rvs2, axis=0, alternative="greater") |
| assert_allclose(p, pr/2) |
| assert_allclose(t, tr) |
|
|
| |
| rng = np.random.RandomState(12345678) |
| x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng) |
| x[500] = np.nan |
| y = (stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng) + |
| stats.norm.rvs(scale=0.2, size=501, random_state=rng)) |
| y[500] = np.nan |
|
|
| with np.errstate(invalid="ignore"): |
| assert_array_equal(stats.ttest_rel(x, x), (np.nan, np.nan)) |
|
|
| assert_array_almost_equal(stats.ttest_rel(x, y, nan_policy='omit'), |
| (0.25299925303978066, 0.8003729814201519)) |
| assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='raise') |
| assert_raises(ValueError, stats.ttest_rel, x, y, nan_policy='foobar') |
|
|
| |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| t, p = stats.ttest_rel([0, 0, 0], [1, 1, 1]) |
| assert_equal((np.abs(t), p), (np.inf, 0)) |
| with np.errstate(invalid="ignore"): |
| assert_equal(stats.ttest_rel([0, 0, 0], [0, 0, 0]), (np.nan, np.nan)) |
|
|
| |
| anan = np.array([[1, np.nan], [-1, 1]]) |
| assert_equal(stats.ttest_rel(anan, np.zeros((2, 2))), |
| ([0, np.nan], [1, np.nan])) |
|
|
| |
| x = np.arange(24) |
| assert_raises(ValueError, stats.ttest_rel, x.reshape((8, 3)), |
| x.reshape((2, 3, 4))) |
|
|
| |
| def convert(t, p, alt): |
| if (t < 0 and alt == "less") or (t > 0 and alt == "greater"): |
| return p / 2 |
| return 1 - (p / 2) |
| converter = np.vectorize(convert) |
|
|
| rvs1_2D[:, 20:30] = np.nan |
| rvs2_2D[:, 15:25] = np.nan |
|
|
| with pytest.warns(SmallSampleWarning, match=too_small_nd_omit): |
| tr, pr = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, nan_policy='omit') |
|
|
| with pytest.warns(SmallSampleWarning, match=too_small_nd_omit): |
| t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, |
| nan_policy='omit', alternative='less') |
| assert_allclose(t, tr, rtol=1e-14) |
| with np.errstate(invalid='ignore'): |
| assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14) |
|
|
| with pytest.warns(SmallSampleWarning, match=too_small_nd_omit): |
| t, p = stats.ttest_rel(rvs1_2D, rvs2_2D, 0, |
| nan_policy='omit', alternative='greater') |
| assert_allclose(t, tr, rtol=1e-14) |
| with np.errstate(invalid='ignore'): |
| assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14) |
|
|
|
|
| def test_ttest_rel_nan_2nd_arg(): |
| |
| x = [np.nan, 2.0, 3.0, 4.0] |
| y = [1.0, 2.0, 1.0, 2.0] |
|
|
| r1 = stats.ttest_rel(x, y, nan_policy='omit') |
| r2 = stats.ttest_rel(y, x, nan_policy='omit') |
| assert_allclose(r2.statistic, -r1.statistic, atol=1e-15) |
| assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15) |
|
|
| |
| r3 = stats.ttest_rel(y[1:], x[1:]) |
| assert_allclose(r2, r3, atol=1e-15) |
|
|
| |
| |
| |
| |
| assert_allclose(r2, (-2, 0.1835), atol=1e-4) |
|
|
|
|
| def test_ttest_rel_empty_1d_returns_nan(): |
| |
| |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| result = stats.ttest_rel([], []) |
| assert isinstance(result, stats._stats_py.TtestResult) |
| assert_equal(result, (np.nan, np.nan)) |
|
|
|
|
| @pytest.mark.parametrize('b, expected_shape', |
| [(np.empty((1, 5, 0)), (3, 5)), |
| (np.empty((1, 0, 0)), (3, 0))]) |
| def test_ttest_rel_axis_size_zero(b, expected_shape): |
| |
| |
| |
| a = np.empty((3, 1, 0)) |
| with np.testing.suppress_warnings() as sup: |
| |
| sup.filter(SmallSampleWarning, too_small_nd_not_omit) |
| result = stats.ttest_rel(a, b, axis=-1) |
| assert isinstance(result, stats._stats_py.TtestResult) |
| expected_value = np.full(expected_shape, fill_value=np.nan) |
| assert_equal(result.statistic, expected_value) |
| assert_equal(result.pvalue, expected_value) |
|
|
|
|
| def test_ttest_rel_nonaxis_size_zero(): |
| |
| |
| |
| |
| a = np.empty((1, 8, 0)) |
| b = np.empty((5, 8, 1)) |
| result = stats.ttest_rel(a, b, axis=1) |
| assert isinstance(result, stats._stats_py.TtestResult) |
| assert_equal(result.statistic.shape, (5, 0)) |
| assert_equal(result.pvalue.shape, (5, 0)) |
|
|
|
|
| @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater']) |
| def test_ttest_rel_ci_1d(alternative): |
| |
| rng = np.random.default_rng(3749065329432213059) |
| n = 10 |
| x = rng.normal(size=n, loc=1.5, scale=2) |
| y = rng.normal(size=n, loc=2, scale=2) |
| |
| |
| |
| |
| |
| |
| |
|
|
| ref = {'two-sided': [-1.912194489914035, 0.400169725914035], |
| 'greater': [-1.563944820311475, np.inf], |
| 'less': [-np.inf, 0.05192005631147523]} |
| res = stats.ttest_rel(x, y, alternative=alternative) |
| ci = res.confidence_interval(confidence_level=0.85) |
| assert_allclose(ci, ref[alternative]) |
| assert_equal(res.df, n-1) |
|
|
|
|
| @pytest.mark.parametrize("test_fun, args", |
| [(stats.ttest_1samp, (np.arange(10), 0)), |
| (stats.ttest_rel, (np.arange(10), np.arange(10)))]) |
| def test_ttest_ci_iv(test_fun, args): |
| |
| res = test_fun(*args) |
| message = '`confidence_level` must be a number between 0 and 1.' |
| with pytest.raises(ValueError, match=message): |
| res.confidence_interval(confidence_level=10) |
|
|
|
|
| def _desc_stats(x1, x2, axis=0, *, xp=None): |
| xp = array_namespace(x1, x2) if xp is None else xp |
|
|
| def _stats(x, axis=0): |
| x = xp.asarray(x) |
| mu = xp.mean(x, axis=axis) |
| std = xp.std(x, axis=axis, correction=1) |
| nobs = x.shape[axis] |
| return mu, std, nobs |
|
|
| return _stats(x1, axis) + _stats(x2, axis) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_ttest_ind(xp): |
| |
| tr = xp.asarray(1.0912746897927283) |
| pr = xp.asarray(0.27647818616351882) |
| tr_2D = xp.asarray([tr, -tr]) |
| pr_2D = xp.asarray([pr, pr]) |
|
|
| rvs1 = xp.linspace(5, 105, 100) |
| rvs2 = xp.linspace(1, 100, 100) |
| rvs1_2D = xp.stack([rvs1, rvs2]) |
| rvs2_2D = xp.stack([rvs2, rvs1]) |
|
|
| res = stats.ttest_ind(rvs1, rvs2, axis=0) |
| t, p = res |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| res = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2)) |
| t, p = res |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| res = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| res = stats.ttest_ind_from_stats(*_desc_stats(rvs1_2D.T, rvs2_2D.T)) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| res = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| res = stats.ttest_ind_from_stats(*_desc_stats(rvs1_2D, rvs2_2D, axis=1)) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| |
| |
|
|
| |
| message = "`alternative` must be 'less', 'greater', or 'two-sided'." |
| with pytest.raises(ValueError, match=message): |
| stats.ttest_ind(rvs1, rvs2, alternative = "error") |
|
|
| args = _desc_stats(rvs1_2D.T, rvs2_2D.T) |
| with pytest.raises(ValueError, match=message): |
| stats.ttest_ind_from_stats(*args, alternative = "error") |
|
|
| t, p = stats.ttest_ind(rvs1, rvs2, alternative="less") |
| xp_assert_close(p, 1 - (pr/2)) |
| xp_assert_close(t, tr) |
|
|
| t, p = stats.ttest_ind(rvs1, rvs2, alternative="greater") |
| xp_assert_close(p, pr/2) |
| xp_assert_close(t, tr) |
|
|
| |
| res1 = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="less") |
| args = _desc_stats(rvs1_2D.T, rvs2_2D.T) |
| res2 = stats.ttest_ind_from_stats(*args, alternative="less") |
| xp_assert_close(res1.statistic, res2.statistic) |
| xp_assert_close(res1.pvalue, res2.pvalue) |
|
|
| res1 = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, alternative="less") |
| args = _desc_stats(rvs1_2D.T, rvs2_2D.T) |
| res2 = stats.ttest_ind_from_stats(*args, alternative="less") |
| xp_assert_close(res1.statistic, res2.statistic) |
| xp_assert_close(res1.pvalue, res2.pvalue) |
|
|
| |
| NaN = xp.asarray(xp.nan) |
| rvs1 = xp.where(xp.arange(rvs1.shape[0]) == 0, NaN, rvs1) |
|
|
| res = stats.ttest_ind(rvs1, rvs2, axis=0) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
|
|
| res = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2)) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
|
|
|
|
| def test_ttest_ind_nan_policy(): |
| rvs1 = np.linspace(5, 105, 100) |
| rvs2 = np.linspace(1, 100, 100) |
| rvs1_2D = np.array([rvs1, rvs2]) |
| rvs2_2D = np.array([rvs2, rvs1]) |
| rvs1_3D = np.dstack([rvs1_2D, rvs1_2D, rvs1_2D]) |
| rvs2_3D = np.dstack([rvs2_2D, rvs2_2D, rvs2_2D]) |
|
|
| |
| rng = np.random.RandomState(12345678) |
| x = stats.norm.rvs(loc=5, scale=10, size=501, random_state=rng) |
| x[500] = np.nan |
| y = stats.norm.rvs(loc=5, scale=10, size=500, random_state=rng) |
|
|
| with np.errstate(invalid="ignore"): |
| assert_array_equal(stats.ttest_ind(x, y), (np.nan, np.nan)) |
|
|
| assert_array_almost_equal(stats.ttest_ind(x, y, nan_policy='omit'), |
| (0.24779670949091914, 0.80434267337517906)) |
| assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='raise') |
| assert_raises(ValueError, stats.ttest_ind, x, y, nan_policy='foobar') |
|
|
| |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| t, p = stats.ttest_ind([0, 0, 0], [1, 1, 1]) |
| assert_equal((np.abs(t), p), (np.inf, 0)) |
|
|
| with np.errstate(invalid="ignore"): |
| assert_equal(stats.ttest_ind([0, 0, 0], [0, 0, 0]), (np.nan, np.nan)) |
|
|
| |
| anan = np.array([[1, np.nan], [-1, 1]]) |
| assert_equal(stats.ttest_ind(anan, np.zeros((2, 2))), |
| ([0, np.nan], [1, np.nan])) |
|
|
| rvs1_3D[:, :, 10:15] = np.nan |
| rvs2_3D[:, :, 6:12] = np.nan |
|
|
| |
| def convert(t, p, alt): |
| if (t < 0 and alt == "less") or (t > 0 and alt == "greater"): |
| return p / 2 |
| return 1 - (p / 2) |
| converter = np.vectorize(convert) |
|
|
| tr, pr = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit') |
|
|
| t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit', |
| alternative='less') |
| assert_allclose(t, tr, rtol=1e-14) |
| assert_allclose(p, converter(tr, pr, 'less'), rtol=1e-14) |
|
|
| t, p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=0, nan_policy='omit', |
| alternative='greater') |
| assert_allclose(t, tr, rtol=1e-14) |
| assert_allclose(p, converter(tr, pr, 'greater'), rtol=1e-14) |
|
|
|
|
| def test_ttest_ind_scalar(): |
| |
| with suppress_warnings() as sup, np.errstate(invalid="ignore"): |
| sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") |
| t, p = stats.ttest_ind(4., 3.) |
| assert np.isnan(t) |
| assert np.isnan(p) |
|
|
|
|
| @pytest.mark.filterwarnings("ignore:Arguments...:DeprecationWarning") |
| class Test_ttest_ind_permutations: |
| N = 20 |
|
|
| |
| np.random.seed(0) |
| a = np.vstack((np.arange(3*N//4), np.random.random(3*N//4))) |
| b = np.vstack((np.arange(N//4) + 100, np.random.random(N//4))) |
|
|
| |
| a2 = np.arange(10) |
| b2 = np.arange(10) + 100 |
|
|
| |
| a3 = [1, 2] |
| b3 = [3, 4] |
|
|
| |
| np.random.seed(0) |
| rvs1 = stats.norm.rvs(loc=5, scale=10, |
| size=500).reshape(100, 5).T |
| rvs2 = stats.norm.rvs(loc=8, scale=20, size=100) |
|
|
| p_d = [1/1001, (676+1)/1001] |
| p_d_gen = [1/1001, (672 + 1)/1001] |
| p_d_big = [(993+1)/1001, (685+1)/1001, (840+1)/1001, |
| (955+1)/1001, (255+1)/1001] |
|
|
| params = [ |
| (a, b, {"axis": 1}, p_d), |
| (a.T, b.T, {'axis': 0}, p_d), |
| (a[0, :], b[0, :], {'axis': None}, p_d[0]), |
| (a[0, :].tolist(), b[0, :].tolist(), {'axis': None}, p_d[0]), |
| |
| (a, b, {'random_state': 0, "axis": 1}, p_d), |
| (a, b, {'random_state': np.random.RandomState(0), "axis": 1}, p_d), |
| (a2, b2, {'equal_var': True}, 1/1001), |
| (rvs1, rvs2, {'axis': -1, 'random_state': 0}, p_d_big), |
| (a3, b3, {}, 1/3), |
| (a, b, {'random_state': np.random.default_rng(0), "axis": 1}, p_d_gen), |
| ] |
|
|
| @pytest.mark.parametrize("a,b,update,p_d", params) |
| def test_ttest_ind_permutations(self, a, b, update, p_d): |
| options_a = {'axis': None, 'equal_var': False} |
| options_p = {'axis': None, 'equal_var': False, |
| 'permutations': 1000, 'random_state': 0} |
| options_a.update(update) |
| options_p.update(update) |
|
|
| stat_a, _ = stats.ttest_ind(a, b, **options_a) |
| stat_p, pvalue = stats.ttest_ind(a, b, **options_p) |
| assert_array_almost_equal(stat_a, stat_p, 5) |
| assert_array_almost_equal(pvalue, p_d) |
|
|
| def test_ttest_ind_exact_alternative(self): |
| np.random.seed(0) |
| N = 3 |
| a = np.random.rand(2, N, 2) |
| b = np.random.rand(2, N, 2) |
|
|
| options_p = {'axis': 1, 'permutations': 1000} |
|
|
| options_p.update(alternative="greater") |
| res_g_ab = stats.ttest_ind(a, b, **options_p) |
| res_g_ba = stats.ttest_ind(b, a, **options_p) |
|
|
| options_p.update(alternative="less") |
| res_l_ab = stats.ttest_ind(a, b, **options_p) |
| res_l_ba = stats.ttest_ind(b, a, **options_p) |
|
|
| options_p.update(alternative="two-sided") |
| res_2_ab = stats.ttest_ind(a, b, **options_p) |
| res_2_ba = stats.ttest_ind(b, a, **options_p) |
|
|
| |
| assert_equal(res_g_ab.statistic, res_l_ab.statistic) |
| assert_equal(res_g_ab.statistic, res_2_ab.statistic) |
|
|
| |
| assert_equal(res_g_ab.statistic, -res_g_ba.statistic) |
| assert_equal(res_l_ab.statistic, -res_l_ba.statistic) |
| assert_equal(res_2_ab.statistic, -res_2_ba.statistic) |
|
|
| |
| assert_equal(res_2_ab.pvalue, res_2_ba.pvalue) |
|
|
| |
| |
| assert_equal(res_g_ab.pvalue, res_l_ba.pvalue) |
| assert_equal(res_l_ab.pvalue, res_g_ba.pvalue) |
| mask = res_g_ab.pvalue <= 0.5 |
| assert_equal(res_g_ab.pvalue[mask] + res_l_ba.pvalue[mask], |
| res_2_ab.pvalue[mask]) |
| assert_equal(res_l_ab.pvalue[~mask] + res_g_ba.pvalue[~mask], |
| res_2_ab.pvalue[~mask]) |
|
|
| def test_ttest_ind_exact_selection(self): |
| |
| np.random.seed(0) |
| N = 3 |
| a = np.random.rand(N) |
| b = np.random.rand(N) |
| res0 = stats.ttest_ind(a, b) |
| res1 = stats.ttest_ind(a, b, permutations=1000) |
| res2 = stats.ttest_ind(a, b, permutations=0) |
| res3 = stats.ttest_ind(a, b, permutations=np.inf) |
| assert res1.pvalue != res0.pvalue |
| assert res2.pvalue == res0.pvalue |
| assert res3.pvalue == res1.pvalue |
|
|
| def test_ttest_ind_exact_distribution(self): |
| |
| |
| |
| np.random.seed(0) |
| a = np.random.rand(3) |
| b = np.random.rand(4) |
|
|
| data = np.concatenate((a, b)) |
| na, nb = len(a), len(b) |
|
|
| permutations = 100000 |
| t_stat, _, _ = _permutation_distribution_t(data, permutations, na, |
| True) |
|
|
| n_unique = len(set(t_stat)) |
| assert n_unique == binom(na + nb, na) |
| assert len(t_stat) == n_unique |
|
|
| def test_ttest_ind_randperm_alternative(self): |
| np.random.seed(0) |
| N = 50 |
| a = np.random.rand(2, 3, N) |
| b = np.random.rand(3, N) |
| options_p = {'axis': -1, 'permutations': 1000, "random_state": 0} |
|
|
| options_p.update(alternative="greater") |
| res_g_ab = stats.ttest_ind(a, b, **options_p) |
| res_g_ba = stats.ttest_ind(b, a, **options_p) |
|
|
| options_p.update(alternative="less") |
| res_l_ab = stats.ttest_ind(a, b, **options_p) |
| res_l_ba = stats.ttest_ind(b, a, **options_p) |
|
|
| |
| assert_equal(res_g_ab.statistic, res_l_ab.statistic) |
|
|
| |
| assert_equal(res_g_ab.statistic, -res_g_ba.statistic) |
| assert_equal(res_l_ab.statistic, -res_l_ba.statistic) |
|
|
| |
| |
| assert_equal(res_g_ab.pvalue + res_l_ab.pvalue, |
| 1 + 1/(options_p['permutations'] + 1)) |
| assert_equal(res_g_ba.pvalue + res_l_ba.pvalue, |
| 1 + 1/(options_p['permutations'] + 1)) |
|
|
| @pytest.mark.slow() |
| def test_ttest_ind_randperm_alternative2(self): |
| np.random.seed(0) |
| N = 50 |
| a = np.random.rand(N, 4) |
| b = np.random.rand(N, 4) |
| options_p = {'permutations': 20000, "random_state": 0} |
|
|
| options_p.update(alternative="greater") |
| res_g_ab = stats.ttest_ind(a, b, **options_p) |
|
|
| options_p.update(alternative="less") |
| res_l_ab = stats.ttest_ind(a, b, **options_p) |
|
|
| options_p.update(alternative="two-sided") |
| res_2_ab = stats.ttest_ind(a, b, **options_p) |
|
|
| |
| |
| assert_equal(res_g_ab.pvalue + res_l_ab.pvalue, |
| 1 + 1/(options_p['permutations'] + 1)) |
|
|
| |
| |
| mask = res_g_ab.pvalue <= 0.5 |
| assert_allclose(2 * res_g_ab.pvalue[mask], |
| res_2_ab.pvalue[mask], atol=2e-2) |
| assert_allclose(2 * (1-res_g_ab.pvalue[~mask]), |
| res_2_ab.pvalue[~mask], atol=2e-2) |
| assert_allclose(2 * res_l_ab.pvalue[~mask], |
| res_2_ab.pvalue[~mask], atol=2e-2) |
| assert_allclose(2 * (1-res_l_ab.pvalue[mask]), |
| res_2_ab.pvalue[mask], atol=2e-2) |
|
|
| def test_ttest_ind_permutation_nanpolicy(self): |
| np.random.seed(0) |
| N = 50 |
| a = np.random.rand(N, 5) |
| b = np.random.rand(N, 5) |
| a[5, 1] = np.nan |
| b[8, 2] = np.nan |
| a[9, 3] = np.nan |
| b[9, 3] = np.nan |
| options_p = {'permutations': 1000, "random_state": 0} |
|
|
| |
| options_p.update(nan_policy="raise") |
| with assert_raises(ValueError, match="The input contains nan values"): |
| res = stats.ttest_ind(a, b, **options_p) |
|
|
| |
| with suppress_warnings() as sup: |
| sup.record(RuntimeWarning, "invalid value*") |
| options_p.update(nan_policy="propagate") |
| res = stats.ttest_ind(a, b, **options_p) |
|
|
| mask = np.isnan(a).any(axis=0) | np.isnan(b).any(axis=0) |
| res2 = stats.ttest_ind(a[:, ~mask], b[:, ~mask], **options_p) |
|
|
| assert_equal(res.pvalue[mask], np.nan) |
| assert_equal(res.statistic[mask], np.nan) |
|
|
| assert_allclose(res.pvalue[~mask], res2.pvalue) |
| assert_allclose(res.statistic[~mask], res2.statistic) |
|
|
| |
| res = stats.ttest_ind(a.ravel(), b.ravel(), **options_p) |
| assert np.isnan(res.pvalue) |
| assert np.isnan(res.statistic) |
|
|
| def test_ttest_ind_permutation_check_inputs(self): |
| with assert_raises(ValueError, match="Permutations must be"): |
| stats.ttest_ind(self.a2, self.b2, permutations=-3) |
| with assert_raises(ValueError, match="Permutations must be"): |
| stats.ttest_ind(self.a2, self.b2, permutations=1.5) |
| with assert_raises(ValueError, match="'hello' cannot be used"): |
| stats.ttest_ind(self.a, self.b, permutations=1, |
| random_state='hello', axis=1) |
|
|
| def test_ttest_ind_permutation_check_p_values(self): |
| |
| N = 10 |
| a = np.random.rand(N, 20) |
| b = np.random.rand(N, 20) |
| p_values = stats.ttest_ind(a, b, permutations=1).pvalue |
| print(0.0 not in p_values) |
| assert 0.0 not in p_values |
|
|
| @pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided']) |
| @pytest.mark.parametrize("shape", [(12,), (2, 12)]) |
| def test_permutation_method(self, alternative, shape): |
| rng = np.random.default_rng(2348934579834565) |
| x = rng.random(size=shape) |
| y = rng.random(size=13) |
|
|
| kwargs = dict(n_resamples=999) |
|
|
| |
| rng = np.random.default_rng(348934579834565) |
| method = stats.PermutationMethod(rng=rng, **kwargs) |
| res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method) |
|
|
| |
| def statistic(x, y, axis): return stats.ttest_ind(x, y, axis=axis).statistic |
| rng = np.random.default_rng(348934579834565) |
| ref = stats.permutation_test((x, y), statistic, axis=-1, rng=rng, |
| alternative=alternative, **kwargs) |
|
|
| assert_equal(res.statistic, ref.statistic) |
| assert_equal(res.pvalue, ref.pvalue) |
|
|
| |
| ref = stats.ttest_ind(x, y, axis=-1, alternative=alternative) |
| assert_equal(res.statistic, ref.statistic) |
| assert_allclose(res.pvalue, ref.pvalue, rtol=3e-2) |
|
|
| @pytest.mark.parametrize("alternative", ['less', 'greater', 'two-sided']) |
| @pytest.mark.parametrize("shape", [(12,), (2, 12)]) |
| def test_monte_carlo_method(self, alternative, shape): |
| rng = np.random.default_rng(2348934579834565) |
| x = rng.random(size=shape) |
| y = rng.random(size=13) |
|
|
| kwargs = dict(n_resamples=999) |
|
|
| |
| def statistic(x, y, axis): return stats.ttest_ind(x, y, axis=axis).statistic |
| rng = np.random.default_rng(348934579834565) |
| rvs = [rng.standard_normal, rng.standard_normal] |
| ref = stats.monte_carlo_test((x, y), rvs=rvs, statistic=statistic, axis=-1, |
| alternative=alternative, **kwargs) |
|
|
| |
| rng = np.random.default_rng(348934579834565) |
| rvs = [rng.standard_normal, rng.standard_normal] |
| method = stats.MonteCarloMethod(rvs=rvs, **kwargs) |
| res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method) |
| assert_equal(res.statistic, ref.statistic) |
| assert_equal(res.pvalue, ref.pvalue) |
|
|
| |
| method = stats.MonteCarloMethod(rng=348934579834565, **kwargs) |
| res = stats.ttest_ind(x, y, axis=-1, alternative=alternative, method=method) |
| assert_equal(res.statistic, ref.statistic) |
| assert_equal(res.pvalue, ref.pvalue) |
|
|
| |
| ref = stats.ttest_ind(x, y, axis=-1, alternative=alternative) |
| assert_equal(res.statistic, ref.statistic) |
| assert_allclose(res.pvalue, ref.pvalue, rtol=6e-2) |
|
|
| def test_resampling_input_validation(self): |
| message = "`method` must be an instance of `PermutationMethod`, an instance..." |
| with pytest.raises(ValueError, match=message): |
| stats.ttest_ind([1, 2, 3], [4, 5, 6], method='migratory') |
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_permutation_not_implement_for_xp(self, xp): |
| a2, b2 = xp.asarray(self.a2), xp.asarray(self.b2) |
|
|
| message = "Use of `permutations` is compatible only with NumPy arrays." |
| if is_numpy(xp): |
| stats.ttest_ind(a2, b2, permutations=10) |
| else: |
| with pytest.raises(NotImplementedError, match=message): |
| stats.ttest_ind(a2, b2, permutations=10) |
|
|
| message = "Use of resampling methods is compatible only with NumPy arrays." |
| rng = np.random.default_rng(7457345872572348) |
| method = stats.PermutationMethod(rng=rng) |
| if is_numpy(xp): |
| stats.ttest_ind(a2, b2, method=method) |
| else: |
| with pytest.raises(NotImplementedError, match=message): |
| stats.ttest_ind(a2, b2, method=method) |
|
|
|
|
| @pytest.mark.filterwarnings("ignore:Arguments...:DeprecationWarning") |
| class Test_ttest_ind_common: |
| |
| |
| @pytest.mark.xslow() |
| @pytest.mark.parametrize("kwds", [{'permutations': 200, 'random_state': 0}, |
| {'trim': .2}, {}], |
| ids=["permutations", "trim", "basic"]) |
| @pytest.mark.parametrize('equal_var', [True, False], |
| ids=['equal_var', 'unequal_var']) |
| def test_ttest_many_dims(self, kwds, equal_var): |
| |
| np.random.seed(0) |
| a = np.random.rand(5, 4, 4, 7, 1, 6) |
| b = np.random.rand(4, 1, 8, 2, 6) |
| res = stats.ttest_ind(a, b, axis=-3, **kwds) |
|
|
| |
| i, j, k = 2, 3, 1 |
| a2 = a[i, :, j, :, 0, :] |
| b2 = b[:, 0, :, k, :] |
| res2 = stats.ttest_ind(a2, b2, axis=-2, **kwds) |
| assert_equal(res.statistic[i, :, j, k, :], |
| res2.statistic) |
| assert_equal(res.pvalue[i, :, j, k, :], |
| res2.pvalue) |
|
|
| |
|
|
| |
| x = np.moveaxis(np.tile(a, (1, 1, 1, 1, 2, 1)), -3, -1) |
| y = np.moveaxis(np.tile(b, (5, 1, 4, 1, 1, 1)), -3, -1) |
| shape = x.shape[:-1] |
| statistics = np.zeros(shape) |
| pvalues = np.zeros(shape) |
| for indices in product(*(range(i) for i in shape)): |
| xi = x[indices] |
| yi = y[indices] |
| res3 = stats.ttest_ind(xi, yi, axis=-1, **kwds) |
| statistics[indices] = res3.statistic |
| pvalues[indices] = res3.pvalue |
|
|
| assert_allclose(statistics, res.statistic) |
| assert_allclose(pvalues, res.pvalue) |
|
|
| @pytest.mark.parametrize("kwds", [{'permutations': 200, 'random_state': 0}, |
| {'trim': .2}, {}], |
| ids=["trim", "permutations", "basic"]) |
| @pytest.mark.parametrize("axis", [-1, 0]) |
| def test_nans_on_axis(self, kwds, axis): |
| |
| |
| a = np.random.randint(10, size=(5, 3, 10)).astype('float') |
| b = np.random.randint(10, size=(5, 3, 10)).astype('float') |
| |
| a[0][2][3] = np.nan |
| b[2][0][6] = np.nan |
|
|
| |
| |
| expected = np.isnan(np.sum(a + b, axis=axis)) |
| |
| |
| with suppress_warnings() as sup, np.errstate(invalid="ignore"): |
| sup.filter(RuntimeWarning, |
| "invalid value encountered in less_equal") |
| sup.filter(RuntimeWarning, "Precision loss occurred") |
| res = stats.ttest_ind(a, b, axis=axis, **kwds) |
| p_nans = np.isnan(res.pvalue) |
| assert_array_equal(p_nans, expected) |
| statistic_nans = np.isnan(res.statistic) |
| assert_array_equal(statistic_nans, expected) |
|
|
|
|
| class Test_ttest_trim: |
| params = [ |
| [[1, 2, 3], [1.1, 2.9, 4.2], 0.53619490753126731, -0.6864951273557258, |
| .2], |
| [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2], |
| 0.00998909252078421, 4.591598691181999, .2], |
| [[56, 128.6, 12, 123.8, 64.34, 78, 763.3], [1.1, 2.9, 4.2], |
| 0.10512380092302633, 2.832256715395378, .32], |
| [[2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9], |
| [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1], |
| 0.002878909511344, -4.2461168970325, .2], |
| [[-0.84504783, 0.13366078, 3.53601757, -0.62908581, 0.54119466, |
| -1.16511574, -0.08836614, 1.18495416, 2.48028757, -1.58925028, |
| -1.6706357, 0.3090472, -2.12258305, 0.3697304, -1.0415207, |
| -0.57783497, -0.90997008, 1.09850192, 0.41270579, -1.4927376], |
| [1.2725522, 1.1657899, 2.7509041, 1.2389013, -0.9490494, -1.0752459, |
| 1.1038576, 2.9912821, 3.5349111, 0.4171922, 1.0168959, -0.7625041, |
| -0.4300008, 3.0431921, 1.6035947, 0.5285634, -0.7649405, 1.5575896, |
| 1.3670797, 1.1726023], 0.005293305834235, -3.0983317739483, .2]] |
|
|
| @pytest.mark.parametrize("a,b,pr,tr,trim", params) |
| def test_ttest_compare_r(self, a, b, pr, tr, trim): |
| ''' |
| Using PairedData's yuen.t.test method. Something to note is that there |
| are at least 3 R packages that come with a trimmed t-test method, and |
| comparisons were made between them. It was found that PairedData's |
| method's results match this method, SAS, and one of the other R |
| methods. A notable discrepancy was the DescTools implementation of the |
| function, which only sometimes agreed with SAS, WRS2, PairedData and |
| this implementation. For this reason, most comparisons in R are made |
| against PairedData's method. |
| |
| Rather than providing the input and output for all evaluations, here is |
| a representative example: |
| > library(PairedData) |
| > a <- c(1, 2, 3) |
| > b <- c(1.1, 2.9, 4.2) |
| > options(digits=16) |
| > yuen.t.test(a, b, tr=.2) |
| |
| Two-sample Yuen test, trim=0.2 |
| |
| data: x and y |
| t = -0.68649512735573, df = 3.4104431643464, p-value = 0.5361949075313 |
| alternative hypothesis: true difference in trimmed means is not equal |
| to 0 |
| 95 percent confidence interval: |
| -3.912777195645217 2.446110528978550 |
| sample estimates: |
| trimmed mean of x trimmed mean of y |
| 2.000000000000000 2.73333333333333 |
| ''' |
| statistic, pvalue = stats.ttest_ind(a, b, trim=trim, equal_var=False) |
| assert_allclose(statistic, tr, atol=1e-15) |
| assert_allclose(pvalue, pr, atol=1e-15) |
|
|
| def test_compare_SAS(self): |
| |
| |
| a = [12, 14, 18, 25, 32, 44, 12, 14, 18, 25, 32, 44] |
| b = [17, 22, 14, 12, 30, 29, 19, 17, 22, 14, 12, 30, 29, 19] |
| |
| |
| |
| |
| |
| |
| statistic, pvalue = stats.ttest_ind(a, b, trim=.09, equal_var=False) |
| assert_allclose(pvalue, 0.514522, atol=1e-6) |
| assert_allclose(statistic, 0.669169, atol=1e-6) |
|
|
| def test_equal_var(self): |
| ''' |
| The PairedData library only supports unequal variances. To compare |
| samples with equal variances, the multicon library is used. |
| > library(multicon) |
| > a <- c(2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9) |
| > b <- c(6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1) |
| > dv = c(a,b) |
| > iv = c(rep('a', length(a)), rep('b', length(b))) |
| > yuenContrast(dv~ iv, EQVAR = TRUE) |
| $Ms |
| N M wgt |
| a 11 2.442857142857143 1 |
| b 11 5.385714285714286 -1 |
| |
| $test |
| stat df crit p |
| results -4.246116897032513 12 2.178812829667228 0.00113508833897713 |
| ''' |
| a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9] |
| b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1] |
| |
| statistic, pvalue = stats.ttest_ind(a, b, trim=.2) |
| assert_allclose(pvalue, 0.00113508833897713, atol=1e-10) |
| assert_allclose(statistic, -4.246116897032513, atol=1e-10) |
|
|
| @pytest.mark.parametrize('alt,pr,tr', |
| (('greater', 0.9985605452443, -4.2461168970325), |
| ('less', 0.001439454755672, -4.2461168970325),), |
| ) |
| def test_alternatives(self, alt, pr, tr): |
| ''' |
| > library(PairedData) |
| > a <- c(2.7,2.7,1.1,3.0,1.9,3.0,3.8,3.8,0.3,1.9,1.9) |
| > b <- c(6.5,5.4,8.1,3.5,0.5,3.8,6.8,4.9,9.5,6.2,4.1) |
| > options(digits=16) |
| > yuen.t.test(a, b, alternative = 'greater') |
| ''' |
| a = [2.7, 2.7, 1.1, 3.0, 1.9, 3.0, 3.8, 3.8, 0.3, 1.9, 1.9] |
| b = [6.5, 5.4, 8.1, 3.5, 0.5, 3.8, 6.8, 4.9, 9.5, 6.2, 4.1] |
|
|
| statistic, pvalue = stats.ttest_ind(a, b, trim=.2, equal_var=False, |
| alternative=alt) |
| assert_allclose(pvalue, pr, atol=1e-10) |
| assert_allclose(statistic, tr, atol=1e-10) |
|
|
| def test_errors_unsupported(self): |
| |
| match = "Use of `permutations` is incompatible with with use of `trim`." |
| with assert_raises(NotImplementedError, match=match): |
| message = "Arguments {'permutations'} are deprecated, whether..." |
| with pytest.warns(DeprecationWarning, match=message): |
| stats.ttest_ind([1, 2], [2, 3], trim=.2, permutations=2) |
|
|
| with assert_raises(NotImplementedError, match=match): |
| message = "Arguments {.*'random_state'.*} are deprecated, whether..." |
| with pytest.warns(DeprecationWarning, match=message): |
| stats.ttest_ind([1, 2], [2, 3], trim=.2, permutations=2, |
| random_state=2) |
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_permutation_not_implement_for_xp(self, xp): |
| message = "Use of `trim` is compatible only with NumPy arrays." |
| a, b = xp.arange(10), xp.arange(10)+1 |
| if is_numpy(xp): |
| stats.ttest_ind(a, b, trim=0.1) |
| else: |
| with pytest.raises(NotImplementedError, match=message): |
| stats.ttest_ind(a, b, trim=0.1) |
|
|
| @pytest.mark.parametrize("trim", [-.2, .5, 1]) |
| def test_trim_bounds_error(self, trim): |
| match = "Trimming percentage should be 0 <= `trim` < .5." |
| with assert_raises(ValueError, match=match): |
| stats.ttest_ind([1, 2], [2, 1], trim=trim) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| class Test_ttest_CI: |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| r = np.empty(shape=(3, 2, 2, 5)) |
| r[0, 0, 0] = [-0.2314607, 19.894435, 0.8193209, -0.247220294, 0.188729943] |
| r[1, 0, 0] = [-0.2314607, 19.894435, 0.40966045, -np.inf, 0.1382426469] |
| r[2, 0, 0] = [-0.2314607, 19.894435, 0.5903395, -0.1967329982, np.inf] |
| r[0, 0, 1] = [-0.2452886, 11.427896, 0.8105823, -0.34057446, 0.25847383] |
| r[1, 0, 1] = [-0.2452886, 11.427896, 0.40529115, -np.inf, 0.1865829074] |
| r[2, 0, 1] = [-0.2452886, 11.427896, 0.5947089, -0.268683541, np.inf] |
| |
| r[0, 1, 0] = [-0.2345625322555006, 22, 0.8167175905643815, np.nan, np.nan] |
| r[1, 1, 0] = [-0.2345625322555006, 22, 0.4083587952821908, np.nan, np.nan] |
| r[2, 1, 0] = [-0.2345625322555006, 22, 0.5916412047178092, np.nan, np.nan] |
| r[0, 1, 1] = [-0.2505369406507428, 14, 0.8058115135702835, np.nan, np.nan] |
| r[1, 1, 1] = [-0.2505369406507428, 14, 0.4029057567851417, np.nan, np.nan] |
| r[2, 1, 1] = [-0.2505369406507428, 14, 0.5970942432148583, np.nan, np.nan] |
| @pytest.mark.parametrize('alternative', ['two-sided', 'less', 'greater']) |
| @pytest.mark.parametrize('equal_var', [False, True]) |
| @pytest.mark.parametrize('trim', [0, 0.2]) |
| def test_confidence_interval(self, alternative, equal_var, trim, xp): |
| if equal_var and trim: |
| pytest.xfail('Discrepancy in `main`; needs further investigation.') |
|
|
| if trim and not is_numpy(xp): |
| pytest.skip('`trim` is only compatible with NumPy input') |
|
|
| rng = np.random.default_rng(3810954496107292580) |
| x = xp.asarray(rng.random(11)) |
| y = xp.asarray(rng.random(13)) |
|
|
| res = stats.ttest_ind(x, y, alternative=alternative, |
| equal_var=equal_var, trim=trim) |
|
|
| alternatives = {'two-sided': 0, 'less': 1, 'greater': 2} |
| ref = self.r[alternatives[alternative], int(equal_var), int(np.ceil(trim))] |
| statistic, df, pvalue, low, high = ref |
|
|
| rtol = 1e-7 |
| xp_assert_close(res.statistic, xp.asarray(statistic), rtol=rtol) |
| xp_assert_close(res.df, xp.asarray(df), rtol=rtol) |
| xp_assert_close(res.pvalue, xp.asarray(pvalue), rtol=rtol) |
|
|
| if not equal_var: |
| ci = res.confidence_interval(0.9) |
| xp_assert_close(ci.low, xp.asarray(low), rtol=rtol) |
| xp_assert_close(ci.high, xp.asarray(high), rtol=rtol) |
|
|
|
|
| def test__broadcast_concatenate(): |
| |
| |
| np.random.seed(0) |
| a = np.random.rand(5, 4, 4, 3, 1, 6) |
| b = np.random.rand(4, 1, 8, 2, 6) |
| c = _broadcast_concatenate((a, b), axis=-3) |
| |
| a = np.tile(a, (1, 1, 1, 1, 2, 1)) |
| b = np.tile(b[None, ...], (5, 1, 4, 1, 1, 1)) |
| for index in product(*(range(i) for i in c.shape)): |
| i, j, k, l, m, n = index |
| if l < a.shape[-3]: |
| assert a[i, j, k, l, m, n] == c[i, j, k, l, m, n] |
| else: |
| assert b[i, j, k, l - a.shape[-3], m, n] == c[i, j, k, l, m, n] |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_ttest_ind_with_uneq_var(xp): |
| |
| |
| |
| |
| |
|
|
| a = xp.asarray([1., 2., 3.]) |
| b = xp.asarray([1.1, 2.9, 4.2]) |
| pr = xp.asarray(0.53619490753126686) |
| tr = xp.asarray(-0.686495127355726265) |
|
|
| t, p = stats.ttest_ind(a, b, equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| t, p = stats.ttest_ind_from_stats(*_desc_stats(a, b), equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| a = xp.asarray([1., 2., 3., 4.]) |
| pr = xp.asarray(0.84354139131608252) |
| tr = xp.asarray(-0.210866331595072315) |
|
|
| t, p = stats.ttest_ind(a, b, equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| t, p = stats.ttest_ind_from_stats(*_desc_stats(a, b), equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| |
| tr = xp.asarray(1.0912746897927283) |
| tr_uneq_n = xp.asarray(0.66745638708050492) |
| pr = xp.asarray(0.27647831993021388) |
| pr_uneq_n = xp.asarray(0.50873585065616544) |
| tr_2D = xp.asarray([tr, -tr]) |
| pr_2D = xp.asarray([pr, pr]) |
|
|
| rvs3 = xp.linspace(1, 100, 25) |
| rvs2 = xp.linspace(1, 100, 100) |
| rvs1 = xp.linspace(5, 105, 100) |
| rvs1_2D = xp.stack([rvs1, rvs2]) |
| rvs2_2D = xp.stack([rvs2, rvs1]) |
|
|
| t, p = stats.ttest_ind(rvs1, rvs2, axis=0, equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| t, p = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs2), equal_var=False) |
| xp_assert_close(t, tr) |
| xp_assert_close(p, pr) |
|
|
| t, p = stats.ttest_ind(rvs1, rvs3, axis=0, equal_var=False) |
| xp_assert_close(t, tr_uneq_n) |
| xp_assert_close(p, pr_uneq_n) |
|
|
| t, p = stats.ttest_ind_from_stats(*_desc_stats(rvs1, rvs3), equal_var=False) |
| xp_assert_close(t, tr_uneq_n) |
| xp_assert_close(p, pr_uneq_n) |
|
|
| res = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0, equal_var=False) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| args = _desc_stats(rvs1_2D.T, rvs2_2D.T) |
| res = stats.ttest_ind_from_stats(*args, equal_var=False) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| res = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1, equal_var=False) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
| args = _desc_stats(rvs1_2D, rvs2_2D, axis=1) |
| res = stats.ttest_ind_from_stats(*args, equal_var=False) |
| xp_assert_close(res.statistic, tr_2D) |
| xp_assert_close(res.pvalue, pr_2D) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_ttest_ind_zero_division(xp): |
| |
| x = xp.zeros(3) |
| y = xp.ones(3) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred"): |
| t, p = stats.ttest_ind(x, y, equal_var=False) |
| xp_assert_equal(t, xp.asarray(-xp.inf)) |
| xp_assert_equal(p, xp.asarray(0.)) |
|
|
| with np.errstate(all='ignore'): |
| t, p = stats.ttest_ind(x, x, equal_var=False) |
| xp_assert_equal(t, xp.asarray(xp.nan)) |
| xp_assert_equal(p, xp.asarray(xp.nan)) |
|
|
| |
| anan = xp.asarray([[1, xp.nan], [-1, 1]]) |
| t, p = stats.ttest_ind(anan, xp.zeros((2, 2)), equal_var=False) |
| xp_assert_equal(t, xp.asarray([0., np.nan])) |
| xp_assert_equal(p, xp.asarray([1., np.nan])) |
|
|
|
|
| def test_ttest_ind_nan_2nd_arg(): |
| |
| x = [np.nan, 2.0, 3.0, 4.0] |
| y = [1.0, 2.0, 1.0, 2.0] |
|
|
| r1 = stats.ttest_ind(x, y, nan_policy='omit') |
| r2 = stats.ttest_ind(y, x, nan_policy='omit') |
| assert_allclose(r2.statistic, -r1.statistic, atol=1e-15) |
| assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15) |
|
|
| |
| r3 = stats.ttest_ind(y, x[1:]) |
| assert_allclose(r2, r3, atol=1e-15) |
|
|
| |
| |
| |
| |
| assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), |
| atol=1e-15) |
|
|
|
|
| @array_api_compatible |
| def test_ttest_ind_empty_1d_returns_nan(xp): |
| |
| |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.ttest_ind(xp.asarray([]), xp.asarray([])) |
| else: |
| res = stats.ttest_ind(xp.asarray([]), xp.asarray([])) |
| assert isinstance(res, stats._stats_py.TtestResult) |
| NaN = xp.asarray(xp.nan)[()] |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
|
|
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| @pytest.mark.parametrize('b, expected_shape', |
| [(np.empty((1, 5, 0)), (3, 5)), |
| (np.empty((1, 0, 0)), (3, 0))]) |
| def test_ttest_ind_axis_size_zero(b, expected_shape, xp): |
| |
| |
| |
| a = xp.empty((3, 1, 0)) |
| b = xp.asarray(b) |
| with np.testing.suppress_warnings() as sup: |
| |
| sup.filter(SmallSampleWarning, too_small_nd_not_omit) |
| res = stats.ttest_ind(a, b, axis=-1) |
| assert isinstance(res, stats._stats_py.TtestResult) |
| expected_value = xp.full(expected_shape, fill_value=xp.nan) |
| xp_assert_equal(res.statistic, expected_value) |
| xp_assert_equal(res.pvalue, expected_value) |
|
|
|
|
| @array_api_compatible |
| def test_ttest_ind_nonaxis_size_zero(xp): |
| |
| |
| |
| |
| a = xp.empty((1, 8, 0)) |
| b = xp.empty((5, 8, 1)) |
| res = stats.ttest_ind(a, b, axis=1) |
| assert isinstance(res, stats._stats_py.TtestResult) |
| assert res.statistic.shape ==(5, 0) |
| assert res.pvalue.shape == (5, 0) |
|
|
|
|
| @array_api_compatible |
| def test_ttest_ind_nonaxis_size_zero_different_lengths(xp): |
| |
| |
| |
| |
| |
| a = xp.empty((1, 7, 0)) |
| b = xp.empty((5, 8, 1)) |
| res = stats.ttest_ind(a, b, axis=1) |
| assert isinstance(res, stats._stats_py.TtestResult) |
| assert res.statistic.shape ==(5, 0) |
| assert res.pvalue.shape == (5, 0) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(np_only=True, |
| reason="Other backends don't like integers") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_gh5686(xp): |
| mean1, mean2 = xp.asarray([1, 2]), xp.asarray([3, 4]) |
| std1, std2 = xp.asarray([5, 3]), xp.asarray([4, 5]) |
| nobs1, nobs2 = xp.asarray([130, 140]), xp.asarray([100, 150]) |
| |
| stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_ttest_ind_from_stats_inputs_zero(xp): |
| |
| zero = xp.asarray(0.) |
| six = xp.asarray(6.) |
| NaN = xp.asarray(xp.nan) |
| res = stats.ttest_ind_from_stats(zero, zero, six, zero, zero, six, equal_var=False) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_ttest_uniform_pvalues(xp): |
| |
| rng = np.random.default_rng(246834602926842) |
| x = xp.asarray(rng.normal(size=(10000, 2))) |
| y = xp.asarray(rng.normal(size=(10000, 1))) |
| q = rng.uniform(size=100) |
|
|
| res = stats.ttest_ind(x, y, equal_var=True, axis=-1) |
| pvalue = np.asarray(res.pvalue) |
| assert stats.ks_1samp(pvalue, stats.uniform().cdf).pvalue > 0.1 |
| assert_allclose(np.quantile(pvalue, q), q, atol=1e-2) |
|
|
| res = stats.ttest_ind(y, x, equal_var=True, axis=-1) |
| pvalue = np.asarray(res.pvalue) |
| assert stats.ks_1samp(pvalue, stats.uniform().cdf).pvalue > 0.1 |
| assert_allclose(np.quantile(pvalue, q), q, atol=1e-2) |
|
|
| |
| |
| |
| x, y = xp.asarray([2, 3, 5]), xp.asarray([1.5]) |
|
|
| res = stats.ttest_ind(x, y, equal_var=True) |
| rtol = 1e-6 if is_torch(xp) else 1e-10 |
| xp_assert_close(res.statistic, xp.asarray(1.0394023007754), rtol=rtol) |
| xp_assert_close(res.pvalue, xp.asarray(0.407779907736), rtol=rtol) |
|
|
|
|
| def _convert_pvalue_alternative(t, p, alt, xp): |
| |
| |
| less = xp.asarray(alt == "less") |
| greater = xp.asarray(alt == "greater") |
| i = ((t < 0) & less) | ((t > 0) & greater) |
| return xp.where(i, p/2, 1 - p/2) |
|
|
|
|
| @pytest.mark.slow |
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_ttest_1samp_new(xp): |
| n1, n2, n3 = (10, 15, 20) |
| rvn1 = stats.norm.rvs(loc=5, scale=10, size=(n1, n2, n3)) |
| rvn1 = xp.asarray(rvn1) |
|
|
| |
| |
| popmean = xp.ones((1, n2, n3)) |
| t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=0) |
| t2, p2 = stats.ttest_1samp(rvn1, 1., axis=0) |
| t3, p3 = stats.ttest_1samp(rvn1[:, 0, 0], 1.) |
| xp_assert_close(t1, t2, rtol=1e-14) |
| xp_assert_close(t1[0, 0], t3, rtol=1e-14) |
| assert_equal(t1.shape, (n2, n3)) |
|
|
| popmean = xp.ones((n1, 1, n3)) |
| t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=1) |
| t2, p2 = stats.ttest_1samp(rvn1, 1., axis=1) |
| t3, p3 = stats.ttest_1samp(rvn1[0, :, 0], 1.) |
| xp_assert_close(t1, t2, rtol=1e-14) |
| xp_assert_close(t1[0, 0], t3, rtol=1e-14) |
| assert_equal(t1.shape, (n1, n3)) |
|
|
| popmean = xp.ones((n1, n2, 1)) |
| t1, p1 = stats.ttest_1samp(rvn1, popmean, axis=2) |
| t2, p2 = stats.ttest_1samp(rvn1, 1., axis=2) |
| t3, p3 = stats.ttest_1samp(rvn1[0, 0, :], 1.) |
| xp_assert_close(t1, t2, rtol=1e-14) |
| xp_assert_close(t1[0, 0], t3, rtol=1e-14) |
| assert_equal(t1.shape, (n1, n2)) |
|
|
| |
| t, p = stats.ttest_1samp(xp.asarray([0., 0., 0.]), 1.) |
| xp_assert_equal(xp.abs(t), xp.asarray(xp.inf)) |
| xp_assert_equal(p, xp.asarray(0.)) |
|
|
| tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1.) |
|
|
| t, p = stats.ttest_1samp(rvn1[:, :, :], 1., alternative="greater") |
| pc = _convert_pvalue_alternative(tr, pr, "greater", xp) |
| xp_assert_close(p, pc) |
| xp_assert_close(t, tr) |
|
|
| t, p = stats.ttest_1samp(rvn1[:, :, :], 1., alternative="less") |
| pc = _convert_pvalue_alternative(tr, pr, "less", xp) |
| xp_assert_close(p, pc) |
| xp_assert_close(t, tr) |
|
|
| with np.errstate(all='ignore'): |
| res = stats.ttest_1samp(xp.asarray([0., 0., 0.]), 0.) |
| xp_assert_equal(res.statistic, xp.asarray(xp.nan)) |
| xp_assert_equal(res.pvalue, xp.asarray(xp.nan)) |
|
|
| |
| anan = xp.asarray([[1., np.nan], [-1., 1.]]) |
| res = stats.ttest_1samp(anan, 0.) |
| xp_assert_equal(res.statistic, xp.asarray([0., xp.nan])) |
| xp_assert_equal(res.pvalue, xp.asarray([1., xp.nan])) |
|
|
|
|
| @pytest.mark.skip_xp_backends(np_only=True, |
| reason="Only NumPy has nan_policy='omit' for now") |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_ttest_1samp_new_omit(xp): |
| n1, n2, n3 = (5, 10, 15) |
| rvn1 = stats.norm.rvs(loc=5, scale=10, size=(n1, n2, n3)) |
| rvn1 = xp.asarray(rvn1) |
|
|
| rvn1[0:2, 1:3, 4:8] = xp.nan |
|
|
| tr, pr = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit') |
|
|
| t, p = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit', |
| alternative="greater") |
| pc = _convert_pvalue_alternative(tr, pr, "greater", xp) |
| xp_assert_close(p, pc) |
| xp_assert_close(t, tr) |
|
|
| t, p = stats.ttest_1samp(rvn1[:, :, :], 1., nan_policy='omit', |
| alternative="less") |
| pc = _convert_pvalue_alternative(tr, pr, "less", xp) |
| xp_assert_close(p, pc) |
| xp_assert_close(t, tr) |
|
|
|
|
| @pytest.mark.skip_xp_backends(cpu_only=True, |
| reason='Uses NumPy for pvalue, CI') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_ttest_1samp_popmean_array(xp): |
| |
| |
| |
| rng = np.random.default_rng(2913300596553337193) |
| x = rng.random(size=(1, 15, 20)) |
| x = xp.asarray(x) |
|
|
| message = r"`popmean.shape\[axis\]` must equal 1." |
| popmean = xp.asarray(rng.random(size=(5, 2, 20))) |
| with pytest.raises(ValueError, match=message): |
| stats.ttest_1samp(x, popmean=popmean, axis=-2) |
|
|
| popmean = xp.asarray(rng.random(size=(5, 1, 20))) |
| res = stats.ttest_1samp(x, popmean=popmean, axis=-2) |
| assert res.statistic.shape == (5, 20) |
|
|
| xp_test = array_namespace(x) |
| l, u = res.confidence_interval() |
| l = xp_test.expand_dims(l, axis=-2) |
| u = xp_test.expand_dims(u, axis=-2) |
|
|
| res = stats.ttest_1samp(x, popmean=l, axis=-2) |
| ref = xp.broadcast_to(xp.asarray(0.05, dtype=xp.float64), res.pvalue.shape) |
| xp_assert_close(res.pvalue, ref) |
|
|
| res = stats.ttest_1samp(x, popmean=u, axis=-2) |
| xp_assert_close(res.pvalue, ref) |
|
|
|
|
| class TestDescribe: |
| @array_api_compatible |
| def test_describe_scalar(self, xp): |
| with suppress_warnings() as sup, \ |
| np.errstate(invalid="ignore", divide="ignore"): |
| sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") |
| n, mm, m, v, sk, kurt = stats.describe(xp.asarray(4.)[()]) |
| assert n == 1 |
| xp_assert_equal(mm[0], xp.asarray(4.0)) |
| xp_assert_equal(mm[1], xp.asarray(4.0)) |
| xp_assert_equal(m, xp.asarray(4.0)) |
| xp_assert_equal(v ,xp.asarray(xp.nan)) |
| xp_assert_equal(sk, xp.asarray(xp.nan)) |
| xp_assert_equal(kurt, xp.asarray(xp.nan)) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_describe_numbers(self, xp): |
| xp_test = array_namespace(xp.asarray(1.)) |
| x = xp_test.concat((xp.ones((3, 4)), xp.full((2, 4), 2.))) |
| nc = 5 |
| mmc = (xp.asarray([1., 1., 1., 1.]), xp.asarray([2., 2., 2., 2.])) |
| mc = xp.asarray([1.4, 1.4, 1.4, 1.4]) |
| vc = xp.asarray([0.3, 0.3, 0.3, 0.3]) |
| skc = xp.asarray([0.40824829046386357] * 4) |
| kurtc = xp.asarray([-1.833333333333333] * 4) |
| n, mm, m, v, sk, kurt = stats.describe(x) |
| assert n == nc |
| xp_assert_equal(mm[0], mmc[0]) |
| xp_assert_equal(mm[1], mmc[1]) |
| xp_assert_close(m, mc, rtol=4 * xp.finfo(m.dtype).eps) |
| xp_assert_close(v, vc, rtol=4 * xp.finfo(m.dtype).eps) |
| xp_assert_close(sk, skc) |
| xp_assert_close(kurt, kurtc) |
|
|
| n, mm, m, v, sk, kurt = stats.describe(x.T, axis=1) |
| assert n == nc |
| xp_assert_equal(mm[0], mmc[0]) |
| xp_assert_equal(mm[1], mmc[1]) |
| xp_assert_close(m, mc, rtol=4 * xp.finfo(m.dtype).eps) |
| xp_assert_close(v, vc, rtol=4 * xp.finfo(m.dtype).eps) |
| xp_assert_close(sk, skc) |
| xp_assert_close(kurt, kurtc) |
|
|
| def describe_nan_policy_omit_test(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
|
|
| nc, mmc = (9, (0.0, 8.0)) |
| mc = 4.0 |
| vc = 7.5 |
| skc = 0.0 |
| kurtc = -1.2300000000000002 |
| n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='omit') |
| assert_equal(n, nc) |
| assert_equal(mm, mmc) |
| assert_equal(m, mc) |
| assert_equal(v, vc) |
| assert_array_almost_equal(sk, skc) |
| assert_array_almost_equal(kurt, kurtc, decimal=13) |
|
|
| @array_api_compatible |
| def test_describe_nan_policy_other(self, xp): |
| x = xp.arange(10.) |
| x = xp.where(x==9, xp.asarray(xp.nan), x) |
|
|
| message = 'The input contains nan values' |
| with pytest.raises(ValueError, match=message): |
| stats.describe(x, nan_policy='raise') |
|
|
| n, mm, m, v, sk, kurt = stats.describe(x, nan_policy='propagate') |
| ref = xp.asarray(xp.nan)[()] |
| assert n == 10 |
| xp_assert_equal(mm[0], ref) |
| xp_assert_equal(mm[1], ref) |
| xp_assert_equal(m, ref) |
| xp_assert_equal(v, ref) |
| xp_assert_equal(sk, ref) |
| xp_assert_equal(kurt, ref) |
|
|
| if is_numpy(xp): |
| self.describe_nan_policy_omit_test() |
| else: |
| message = "`nan_policy='omit' is incompatible with non-NumPy arrays." |
| with pytest.raises(ValueError, match=message): |
| stats.describe(x, nan_policy='omit') |
|
|
| message = 'nan_policy must be one of...' |
| with pytest.raises(ValueError, match=message): |
| stats.describe(x, nan_policy='foobar') |
|
|
| def test_describe_result_attributes(self): |
| |
| |
| actual = stats.describe(np.arange(5.)) |
| attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis') |
| check_named_results(actual, attributes) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_describe_ddof(self, xp): |
| xp_test = array_namespace(xp.asarray(1.)) |
| x = xp_test.concat((xp.ones((3, 4)), xp.full((2, 4), 2.))) |
| nc = 5 |
| mmc = (xp.asarray([1., 1., 1., 1.]), xp.asarray([2., 2., 2., 2.])) |
| mc = xp.asarray([1.4, 1.4, 1.4, 1.4]) |
| vc = xp.asarray([0.24, 0.24, 0.24, 0.24]) |
| skc = xp.asarray([0.40824829046386357] * 4) |
| kurtc = xp.asarray([-1.833333333333333] * 4) |
| n, mm, m, v, sk, kurt = stats.describe(x, ddof=0) |
| assert n == nc |
| xp_assert_equal(mm[0], mmc[0]) |
| xp_assert_equal(mm[1], mmc[1]) |
| xp_assert_close(m, mc) |
| xp_assert_close(v, vc) |
| xp_assert_close(sk, skc) |
| xp_assert_close(kurt, kurtc) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_describe_axis_none(self, xp): |
| xp_test = array_namespace(xp.asarray(1.)) |
| x = xp_test.concat((xp.ones((3, 4)), xp.full((2, 4), 2.))) |
|
|
| |
| nc = 20 |
| mmc = (xp.asarray(1.0), xp.asarray(2.0)) |
| mc = xp.asarray(1.3999999999999999) |
| vc = xp.asarray(0.25263157894736848) |
| skc = xp.asarray(0.4082482904638634) |
| kurtc = xp.asarray(-1.8333333333333333) |
|
|
| |
| n, mm, m, v, sk, kurt = stats.describe(x, axis=None) |
|
|
| assert n == nc |
| xp_assert_equal(mm[0], mmc[0]) |
| xp_assert_equal(mm[1], mmc[1]) |
| xp_assert_close(m, mc) |
| xp_assert_close(v, vc) |
| xp_assert_close(sk, skc) |
| xp_assert_close(kurt, kurtc) |
|
|
| @array_api_compatible |
| def test_describe_empty(self, xp): |
| message = "The input must not be empty." |
| with pytest.raises(ValueError, match=message): |
| stats.describe(xp.asarray([])) |
|
|
|
|
| @array_api_compatible |
| class NormalityTests: |
| def test_too_small(self, xp): |
| |
| test_fun = getattr(stats, self.test_name) |
| x = xp.asarray(4.) |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = test_fun(x) |
| NaN = xp.asarray(xp.nan) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
| else: |
| message = "...requires at least..." |
| with pytest.raises(ValueError, match=message): |
| test_fun(x) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater']) |
| def test_against_R(self, alternative, xp): |
| |
| |
| |
| |
| |
| |
| |
| |
| test_name = self.test_name |
| test_fun = getattr(stats, test_name) |
| ref_statistic= xp.asarray(self.case_ref[0]) |
| ref_pvalue = xp.asarray(self.case_ref[1]) |
|
|
| kwargs = {} |
| if alternative in {'less', 'greater'}: |
| if test_name in {'skewtest', 'kurtosistest'}: |
| ref_pvalue = ref_pvalue/2 if alternative == "less" else 1-ref_pvalue/2 |
| ref_pvalue = 1-ref_pvalue if test_name == 'skewtest' else ref_pvalue |
| kwargs['alternative'] = alternative |
| else: |
| pytest.skip('`alternative` not available for `normaltest`') |
|
|
| x = xp.asarray((-2, -1, 0, 1, 2, 3.)*4)**2 |
| res = test_fun(x, **kwargs) |
| res_statistic, res_pvalue = res |
| xp_assert_close(res_statistic, ref_statistic) |
| xp_assert_close(res_pvalue, ref_pvalue) |
| check_named_results(res, ('statistic', 'pvalue'), xp=xp) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_nan(self, xp): |
| |
| test_fun = getattr(stats, self.test_name) |
| x = xp.arange(30.) |
| NaN = xp.asarray(xp.nan, dtype=x.dtype) |
| x = xp.where(x == 29, NaN, x) |
| with np.errstate(invalid="ignore"): |
| res = test_fun(x) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
|
|
| class TestSkewTest(NormalityTests): |
| test_name = 'skewtest' |
| case_ref = (1.98078826090875881, 0.04761502382843208) |
|
|
| def test_intuitive(self, xp): |
| |
| a1 = stats.skewnorm.rvs(a=1, size=10000, random_state=123) |
| a1_xp = xp.asarray(a1) |
| pval = stats.skewtest(a1_xp, alternative='greater').pvalue |
| xp_assert_close(pval, xp.asarray(0.0, dtype=a1_xp.dtype), atol=9e-6) |
|
|
| def test_skewtest_too_few_observations(self, xp): |
| |
| |
| stats.skewtest(xp.arange(8.0)) |
|
|
| x = xp.arange(7.0) |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.skewtest(x) |
| NaN = xp.asarray(xp.nan) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
| else: |
| message = "`skewtest` requires at least 8 observations" |
| with pytest.raises(ValueError, match=message): |
| stats.skewtest(x) |
|
|
|
|
| class TestKurtosisTest(NormalityTests): |
| test_name = 'kurtosistest' |
| case_ref = (-0.01403734404759738, 0.98880018772590561) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_intuitive(self, xp): |
| |
| a2 = stats.laplace.rvs(size=10000, random_state=123) |
| a2_xp = xp.asarray(a2) |
| pval = stats.kurtosistest(a2_xp, alternative='greater').pvalue |
| xp_assert_close(pval, xp.asarray(0.0, dtype=a2_xp.dtype), atol=1e-15) |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_gh9033_regression(self, xp): |
| |
| |
| counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167] |
| x = np.hstack([np.full(c, i) for i, c in enumerate(counts)]) |
| x = xp.asarray(x, dtype=xp.float64) |
| assert stats.kurtosistest(x)[1] < 0.01 |
|
|
| @skip_xp_backends('cupy', reason='cupy/cupy#8391') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_kurtosistest_too_few_observations(self, xp): |
| |
| |
| |
| stats.kurtosistest(xp.arange(20.0)) |
|
|
| message = "`kurtosistest` p-value may be inaccurate..." |
| with pytest.warns(UserWarning, match=message): |
| stats.kurtosistest(xp.arange(5.0)) |
| with pytest.warns(UserWarning, match=message): |
| stats.kurtosistest(xp.arange(19.0)) |
|
|
| x = xp.arange(4.0) |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.skewtest(x) |
| NaN = xp.asarray(xp.nan) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
| else: |
| message = "`kurtosistest` requires at least 5 observations" |
| with pytest.raises(ValueError, match=message): |
| stats.kurtosistest(x) |
|
|
|
|
| class TestNormalTest(NormalityTests): |
| test_name = 'normaltest' |
| case_ref = (3.92371918158185551, 0.14059672529747502) |
|
|
|
|
| class TestRankSums: |
|
|
| np.random.seed(0) |
| x, y = np.random.rand(2, 10) |
|
|
| @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided']) |
| def test_ranksums_result_attributes(self, alternative): |
| |
| res1 = stats.ranksums(self.x, self.y, |
| alternative=alternative).pvalue |
| res2 = stats.mannwhitneyu(self.x, self.y, use_continuity=False, |
| alternative=alternative).pvalue |
| assert_allclose(res1, res2) |
|
|
| def test_ranksums_named_results(self): |
| res = stats.ranksums(self.x, self.y) |
| check_named_results(res, ('statistic', 'pvalue')) |
|
|
| def test_input_validation(self): |
| with assert_raises(ValueError, match="`alternative` must be 'less'"): |
| stats.ranksums(self.x, self.y, alternative='foobar') |
|
|
|
|
| @array_api_compatible |
| class TestJarqueBera: |
| def test_jarque_bera_against_R(self, xp): |
| |
| |
| |
| |
| x = [-0.160104223201523288, 1.131262000934478040, -0.001235254523709458, |
| -0.776440091309490987, -2.072959999533182884] |
| x = xp.asarray(x) |
| ref = xp.asarray([0.17651605223752, 0.9155246169805]) |
| res = stats.jarque_bera(x) |
| xp_assert_close(res.statistic, ref[0]) |
| xp_assert_close(res.pvalue, ref[1]) |
|
|
| @skip_xp_backends(np_only=True) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_jarque_bera_array_like(self): |
| |
| np.random.seed(987654321) |
| x = np.random.normal(0, 1, 100000) |
|
|
| jb_test1 = JB1, p1 = stats.jarque_bera(list(x)) |
| jb_test2 = JB2, p2 = stats.jarque_bera(tuple(x)) |
| jb_test3 = JB3, p3 = stats.jarque_bera(x.reshape(2, 50000)) |
|
|
| assert JB1 == JB2 == JB3 == jb_test1.statistic == jb_test2.statistic == jb_test3.statistic |
| assert p1 == p2 == p3 == jb_test1.pvalue == jb_test2.pvalue == jb_test3.pvalue |
|
|
| def test_jarque_bera_size(self, xp): |
| x = xp.asarray([]) |
| if is_numpy(xp): |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.jarque_bera(x) |
| NaN = xp.asarray(xp.nan) |
| xp_assert_equal(res.statistic, NaN) |
| xp_assert_equal(res.pvalue, NaN) |
| else: |
| message = "At least one observation is required." |
| with pytest.raises(ValueError, match=message): |
| res = stats.jarque_bera(x) |
|
|
| def test_axis(self, xp): |
| rng = np.random.RandomState(seed=122398129) |
| x = xp.asarray(rng.random(size=(2, 45))) |
|
|
| res = stats.jarque_bera(x, axis=None) |
| ref = stats.jarque_bera(xp.reshape(x, (-1,))) |
| xp_assert_equal(res.statistic, ref.statistic) |
| xp_assert_equal(res.pvalue, ref.pvalue) |
|
|
| res = stats.jarque_bera(x, axis=1) |
| s0, p0 = stats.jarque_bera(x[0, :]) |
| s1, p1 = stats.jarque_bera(x[1, :]) |
| xp_assert_close(res.statistic, xp.asarray([s0, s1])) |
| xp_assert_close(res.pvalue, xp.asarray([p0, p1])) |
|
|
| resT = stats.jarque_bera(x.T, axis=0) |
| xp_assert_close(res.statistic, resT.statistic) |
| xp_assert_close(res.pvalue, resT.pvalue) |
|
|
|
|
| class TestMannWhitneyU: |
| X = [19.8958398126694, 19.5452691647182, 19.0577309166425, 21.716543054589, |
| 20.3269502208702, 20.0009273294025, 19.3440043632957, 20.4216806548105, |
| 19.0649894736528, 18.7808043120398, 19.3680942943298, 19.4848044069953, |
| 20.7514611265663, 19.0894948874598, 19.4975522356628, 18.9971170734274, |
| 20.3239606288208, 20.6921298083835, 19.0724259532507, 18.9825187935021, |
| 19.5144462609601, 19.8256857844223, 20.5174677102032, 21.1122407995892, |
| 17.9490854922535, 18.2847521114727, 20.1072217648826, 18.6439891962179, |
| 20.4970638083542, 19.5567594734914] |
|
|
| Y = [19.2790668029091, 16.993808441865, 18.5416338448258, 17.2634018833575, |
| 19.1577183624616, 18.5119655377495, 18.6068455037221, 18.8358343362655, |
| 19.0366413269742, 18.1135025515417, 19.2201873866958, 17.8344909022841, |
| 18.2894380745856, 18.6661374133922, 19.9688601693252, 16.0672254617636, |
| 19.00596360572, 19.201561539032, 19.0487501090183, 19.0847908674356] |
|
|
| significant = 14 |
|
|
| def test_mannwhitneyu_one_sided(self): |
| u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='less') |
| u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='greater') |
| u3, p3 = stats.mannwhitneyu(self.X, self.Y, alternative='greater') |
| u4, p4 = stats.mannwhitneyu(self.Y, self.X, alternative='less') |
|
|
| assert_equal(p1, p2) |
| assert_equal(p3, p4) |
| assert_(p1 != p3) |
| assert_equal(u1, 498) |
| assert_equal(u2, 102) |
| assert_equal(u3, 498) |
| assert_equal(u4, 102) |
| assert_approx_equal(p1, 0.999957683256589, significant=self.significant) |
| assert_approx_equal(p3, 4.5941632666275e-05, significant=self.significant) |
|
|
| def test_mannwhitneyu_two_sided(self): |
| u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='two-sided') |
| u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='two-sided') |
|
|
| assert_equal(p1, p2) |
| assert_equal(u1, 498) |
| assert_equal(u2, 102) |
| assert_approx_equal(p1, 9.188326533255e-05, |
| significant=self.significant) |
|
|
| def test_mannwhitneyu_no_correct_one_sided(self): |
| u1, p1 = stats.mannwhitneyu(self.X, self.Y, False, |
| alternative='less') |
| u2, p2 = stats.mannwhitneyu(self.Y, self.X, False, |
| alternative='greater') |
| u3, p3 = stats.mannwhitneyu(self.X, self.Y, False, |
| alternative='greater') |
| u4, p4 = stats.mannwhitneyu(self.Y, self.X, False, |
| alternative='less') |
|
|
| assert_equal(p1, p2) |
| assert_equal(p3, p4) |
| assert_(p1 != p3) |
| assert_equal(u1, 498) |
| assert_equal(u2, 102) |
| assert_equal(u3, 498) |
| assert_equal(u4, 102) |
| assert_approx_equal(p1, 0.999955905990004, significant=self.significant) |
| assert_approx_equal(p3, 4.40940099958089e-05, significant=self.significant) |
|
|
| def test_mannwhitneyu_no_correct_two_sided(self): |
| u1, p1 = stats.mannwhitneyu(self.X, self.Y, False, |
| alternative='two-sided') |
| u2, p2 = stats.mannwhitneyu(self.Y, self.X, False, |
| alternative='two-sided') |
|
|
| assert_equal(p1, p2) |
| assert_equal(u1, 498) |
| assert_equal(u2, 102) |
| assert_approx_equal(p1, 8.81880199916178e-05, |
| significant=self.significant) |
|
|
| def test_mannwhitneyu_ones(self): |
| |
| x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1.]) |
|
|
| y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1., |
| 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., |
| 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., |
| 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., |
| 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., |
| 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., |
| 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1., |
| 1., 1., 1., 1.]) |
|
|
| |
| assert_allclose(stats.mannwhitneyu(x, y, alternative='less'), |
| (16980.5, 2.8214327656317373e-005)) |
| |
| assert_allclose(stats.mannwhitneyu(x, y, alternative='greater'), |
| (16980.5, 0.9999719954296)) |
| assert_allclose(stats.mannwhitneyu(x, y, alternative='two-sided'), |
| (16980.5, 5.642865531266e-05)) |
|
|
| def test_mannwhitneyu_result_attributes(self): |
| |
| attributes = ('statistic', 'pvalue') |
| res = stats.mannwhitneyu(self.X, self.Y, alternative="less") |
| check_named_results(res, attributes) |
|
|
|
|
| def test_pointbiserial(): |
| |
| |
| x = [1,0,1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0, |
| 0,0,0,0,1] |
| y = [14.8,13.8,12.4,10.1,7.1,6.1,5.8,4.6,4.3,3.5,3.3,3.2,3.0, |
| 2.8,2.8,2.5,2.4,2.3,2.1,1.7,1.7,1.5,1.3,1.3,1.2,1.2,1.1, |
| 0.8,0.7,0.6,0.5,0.2,0.2,0.1] |
| assert_almost_equal(stats.pointbiserialr(x, y)[0], 0.36149, 5) |
|
|
| |
| attributes = ('correlation', 'pvalue') |
| res = stats.pointbiserialr(x, y) |
| check_named_results(res, attributes) |
| assert_equal(res.correlation, res.statistic) |
|
|
|
|
| def test_obrientransform(): |
| |
| x1 = np.array([0, 2, 4]) |
| t1 = stats.obrientransform(x1) |
| expected = [7, -2, 7] |
| assert_allclose(t1[0], expected) |
|
|
| x2 = np.array([0, 3, 6, 9]) |
| t2 = stats.obrientransform(x2) |
| expected = np.array([30, 0, 0, 30]) |
| assert_allclose(t2[0], expected) |
|
|
| |
| a, b = stats.obrientransform(x1, x2) |
| assert_equal(a, t1[0]) |
| assert_equal(b, t2[0]) |
|
|
| |
| a, b, c = stats.obrientransform(x1, x2, x1) |
| assert_equal(a, t1[0]) |
| assert_equal(b, t2[0]) |
| assert_equal(c, t1[0]) |
|
|
| |
| |
| x1 = np.arange(5) |
| result = np.array( |
| [[5.41666667, 1.04166667, -0.41666667, 1.04166667, 5.41666667], |
| [21.66666667, 4.16666667, -1.66666667, 4.16666667, 21.66666667]]) |
| assert_array_almost_equal(stats.obrientransform(x1, 2*x1), result, decimal=8) |
|
|
| |
| |
| values = range(5, 11) |
| reps = np.array([5, 11, 9, 3, 2, 2]) |
| data = np.repeat(values, reps) |
| transformed_values = np.array([3.1828, 0.5591, 0.0344, |
| 1.6086, 5.2817, 11.0538]) |
| expected = np.repeat(transformed_values, reps) |
| result = stats.obrientransform(data) |
| assert_array_almost_equal(result[0], expected, decimal=4) |
|
|
|
|
| def check_equal_xmean(*args, xp, mean_fun, axis=None, dtype=None, |
| rtol=1e-7, weights=None): |
| |
| dtype = dtype or xp.float64 |
| if len(args) == 2: |
| array_like, desired = args |
| else: |
| array_like, p, desired = args |
| array_like = xp.asarray(array_like, dtype=dtype) |
| desired = xp.asarray(desired, dtype=dtype) |
| weights = xp.asarray(weights, dtype=dtype) if weights is not None else weights |
| args = (array_like,) if len(args) == 2 else (array_like, p) |
| x = mean_fun(*args, axis=axis, dtype=dtype, weights=weights) |
| xp_assert_close(x, desired, rtol=rtol) |
|
|
|
|
| def check_equal_gmean(*args, **kwargs): |
| return check_equal_xmean(*args, mean_fun=stats.gmean, **kwargs) |
|
|
|
|
| def check_equal_hmean(*args, **kwargs): |
| return check_equal_xmean(*args, mean_fun=stats.hmean, **kwargs) |
|
|
|
|
| def check_equal_pmean(*args, **kwargs): |
| return check_equal_xmean(*args, mean_fun=stats.pmean, **kwargs) |
|
|
|
|
| @array_api_compatible |
| class TestHMean: |
| def test_0(self, xp): |
| a = [1, 0, 2] |
| desired = 0 |
| check_equal_hmean(a, desired, xp=xp) |
|
|
| def test_1d(self, xp): |
| |
| a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] |
| desired = 34.1417152147 |
| check_equal_hmean(a, desired, xp=xp) |
|
|
| a = [1, 2, 3, 4] |
| desired = 4. / (1. / 1 + 1. / 2 + 1. / 3 + 1. / 4) |
| check_equal_hmean(a, desired, xp=xp) |
|
|
| def test_1d_with_zero(self, xp): |
| a = np.array([1, 0]) |
| desired = 0.0 |
| check_equal_hmean(a, desired, xp=xp, rtol=0.0) |
|
|
| @skip_xp_backends('array_api_strict', |
| reason=("`array_api_strict.where` `fillvalue` doesn't " |
| "accept Python scalars. See data-apis/array-api#807.")) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_1d_with_negative_value(self, xp): |
| |
| a = np.array([1, 0, -1]) |
| message = "The harmonic mean is only defined..." |
| with pytest.warns(RuntimeWarning, match=message): |
| check_equal_hmean(a, xp.nan, xp=xp, rtol=0.0) |
|
|
| |
| def test_2d(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = 38.6696271841 |
| check_equal_hmean(np.array(a), desired, xp=xp) |
|
|
| def test_2d_axis0(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([22.88135593, 39.13043478, 52.90076336, 65.45454545]) |
| check_equal_hmean(a, desired, axis=0, xp=xp) |
|
|
| def test_2d_axis0_with_zero(self, xp): |
| a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([22.88135593, 0.0, 52.90076336, 65.45454545]) |
| check_equal_hmean(a, desired, axis=0, xp=xp) |
|
|
| def test_2d_axis1(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([19.2, 63.03939962, 103.80078637]) |
| check_equal_hmean(a, desired, axis=1, xp=xp) |
|
|
| def test_2d_axis1_with_zero(self, xp): |
| a = [[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([0.0, 63.03939962, 103.80078637]) |
| check_equal_hmean(a, desired, axis=1, xp=xp) |
|
|
| @pytest.mark.skip_xp_backends( |
| np_only=True, |
| reason='array-likes only supported for NumPy backend', |
| ) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_weights_1d_list(self, xp): |
| |
| |
| a = [2, 10, 6] |
| weights = [10, 5, 3] |
| desired = 3. |
| |
| |
| |
| res = stats.hmean(a, weights=weights) |
| xp_assert_close(res, np.asarray(desired), rtol=1e-5) |
|
|
| def test_weights_1d(self, xp): |
| |
| |
| a = np.asarray([2, 10, 6]) |
| weights = np.asarray([10, 5, 3]) |
| desired = 3 |
| check_equal_hmean(a, desired, weights=weights, rtol=1e-5, xp=xp) |
|
|
| def test_weights_2d_axis0(self, xp): |
| |
| |
| a = np.array([[2, 5], [10, 5], [6, 5]]) |
| weights = np.array([[10, 1], [5, 1], [3, 1]]) |
| desired = np.array([3, 5]) |
| check_equal_hmean(a, desired, axis=0, weights=weights, rtol=1e-5, xp=xp) |
|
|
| def test_weights_2d_axis1(self, xp): |
| |
| |
| a = np.array([[2, 10, 6], [7, 7, 7]]) |
| weights = np.array([[10, 5, 3], [1, 1, 1]]) |
| desired = np.array([3, 7]) |
| check_equal_hmean(a, desired, axis=1, weights=weights, rtol=1e-5, xp=xp) |
|
|
| @skip_xp_invalid_arg |
| def test_weights_masked_1d_array(self, xp): |
| |
| |
| a = np.array([2, 10, 6, 42]) |
| weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1]) |
| desired = 3 |
| xp = np.ma |
| check_equal_hmean(a, desired, weights=weights, rtol=1e-5, |
| dtype=np.float64, xp=xp) |
|
|
|
|
| @array_api_compatible |
| class TestGMean: |
| def test_0(self, xp): |
| a = [1, 0, 2] |
| desired = 0 |
| check_equal_gmean(a, desired, xp=xp) |
|
|
| def test_1d(self, xp): |
| |
| a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] |
| desired = 45.2872868812 |
| check_equal_gmean(a, desired, xp=xp) |
|
|
| a = [1, 2, 3, 4] |
| desired = power(1 * 2 * 3 * 4, 1. / 4.) |
| check_equal_gmean(a, desired, rtol=1e-14, xp=xp) |
|
|
| a = array([1, 2, 3, 4], float32) |
| desired = power(1 * 2 * 3 * 4, 1. / 4.) |
| check_equal_gmean(a, desired, dtype=xp.float32, xp=xp) |
|
|
| |
| def test_2d(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = 52.8885199 |
| check_equal_gmean(a, desired, xp=xp) |
|
|
| def test_2d_axis0(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([35.56893304, 49.32424149, 61.3579244, 72.68482371]) |
| check_equal_gmean(a, desired, axis=0, xp=xp) |
|
|
| a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) |
| desired = array([1, 2, 3, 4]) |
| check_equal_gmean(a, desired, axis=0, rtol=1e-14, xp=xp) |
|
|
| def test_2d_axis1(self, xp): |
| |
| a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] |
| desired = np.array([22.13363839, 64.02171746, 104.40086817]) |
| check_equal_gmean(a, desired, axis=1, xp=xp) |
|
|
| a = array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) |
| v = power(1 * 2 * 3 * 4, 1. / 4.) |
| desired = array([v, v, v]) |
| check_equal_gmean(a, desired, axis=1, rtol=1e-14, xp=xp) |
|
|
| def test_large_values(self, xp): |
| a = array([1e100, 1e200, 1e300]) |
| desired = 1e200 |
| check_equal_gmean(a, desired, rtol=1e-13, xp=xp) |
|
|
| def test_1d_with_0(self, xp): |
| |
| a = [10, 20, 30, 40, 50, 60, 70, 80, 90, 0] |
| desired = 0.0 |
| with np.errstate(all='ignore'): |
| check_equal_gmean(a, desired, xp=xp) |
|
|
| def test_1d_neg(self, xp): |
| |
| a = [10, 20, 30, 40, 50, 60, 70, 80, 90, -1] |
| desired = np.nan |
| with np.errstate(invalid='ignore'): |
| check_equal_gmean(a, desired, xp=xp) |
|
|
| @pytest.mark.skip_xp_backends( |
| np_only=True, |
| reason='array-likes only supported for NumPy backend', |
| ) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_weights_1d_list(self, xp): |
| |
| |
| a = [1, 2, 3, 4, 5] |
| weights = [2, 5, 6, 4, 3] |
| desired = 2.77748 |
|
|
| |
| |
| |
| res = stats.gmean(a, weights=weights) |
| xp_assert_close(res, np.asarray(desired), rtol=1e-5) |
|
|
| def test_weights_1d(self, xp): |
| |
| |
| a = np.array([1, 2, 3, 4, 5]) |
| weights = np.array([2, 5, 6, 4, 3]) |
| desired = 2.77748 |
| check_equal_gmean(a, desired, weights=weights, rtol=1e-5, xp=xp) |
|
|
| @skip_xp_invalid_arg |
| def test_weights_masked_1d_array(self, xp): |
| |
| |
| a = np.array([1, 2, 3, 4, 5, 6]) |
| weights = np.ma.array([2, 5, 6, 4, 3, 5], mask=[0, 0, 0, 0, 0, 1]) |
| desired = 2.77748 |
| xp = np.ma |
| check_equal_gmean(a, desired, weights=weights, rtol=1e-5, |
| dtype=np.float64, xp=xp) |
|
|
|
|
| @array_api_compatible |
| class TestPMean: |
|
|
| def pmean_reference(a, p): |
| return (np.sum(a**p) / a.size)**(1/p) |
|
|
| def wpmean_reference(a, p, weights): |
| return (np.sum(weights * a**p) / np.sum(weights))**(1/p) |
|
|
| def test_bad_exponent(self, xp): |
| with pytest.raises(ValueError, match='Power mean only defined for'): |
| stats.pmean(xp.asarray([1, 2, 3]), xp.asarray([0])) |
| with pytest.raises(ValueError, match='Power mean only defined for'): |
| stats.pmean(xp.asarray([1, 2, 3]), xp.asarray([0])) |
|
|
| def test_1d(self, xp): |
| a, p = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 3.5 |
| desired = TestPMean.pmean_reference(np.array(a), p) |
| check_equal_pmean(a, p, desired, xp=xp) |
|
|
| a, p = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], -2.5 |
| desired = TestPMean.pmean_reference(np.array(a), p) |
| check_equal_pmean(a, p, desired, xp=xp) |
|
|
| a, p = [1, 2, 3, 4], 2 |
| desired = np.sqrt((1**2 + 2**2 + 3**2 + 4**2) / 4) |
| check_equal_pmean(a, p, desired, xp=xp) |
|
|
| def test_1d_with_zero(self, xp): |
| a, p = np.array([1, 0]), -1 |
| desired = 0.0 |
| check_equal_pmean(a, p, desired, rtol=0.0, xp=xp) |
|
|
| @skip_xp_backends('array_api_strict', |
| reason=("`array_api_strict.where` `fillvalue` doesn't " |
| "accept Python scalars. See data-apis/array-api#807.")) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_1d_with_negative_value(self, xp): |
| a, p = np.array([1, 0, -1]), 1.23 |
| message = "The power mean is only defined..." |
| with pytest.warns(RuntimeWarning, match=message): |
| check_equal_pmean(a, p, xp.nan, xp=xp) |
|
|
| @pytest.mark.parametrize( |
| ("a", "p"), |
| [([[10, 20], [50, 60], [90, 100]], -0.5), |
| (np.array([[10, 20], [50, 60], [90, 100]]), 0.5)] |
| ) |
| def test_2d_axisnone(self, a, p, xp): |
| desired = TestPMean.pmean_reference(np.array(a), p) |
| check_equal_pmean(a, p, desired, xp=xp) |
|
|
| @pytest.mark.parametrize( |
| ("a", "p"), |
| [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5), |
| ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)] |
| ) |
| def test_2d_axis0(self, a, p, xp): |
| desired = [ |
| TestPMean.pmean_reference( |
| np.array([a[i][j] for i in range(len(a))]), p |
| ) |
| for j in range(len(a[0])) |
| ] |
| check_equal_pmean(a, p, desired, axis=0, xp=xp) |
|
|
| @pytest.mark.parametrize( |
| ("a", "p"), |
| [([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], -0.5), |
| ([[10, 0, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]], 0.5)] |
| ) |
| def test_2d_axis1(self, a, p, xp): |
| desired = [TestPMean.pmean_reference(np.array(a_), p) for a_ in a] |
| check_equal_pmean(a, p, desired, axis=1, xp=xp) |
|
|
| def test_weights_1d(self, xp): |
| a, p = [2, 10, 6], -1.23456789 |
| weights = [10, 5, 3] |
| desired = TestPMean.wpmean_reference(np.array(a), p, weights) |
| check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5, xp=xp) |
|
|
| @pytest.mark.skip_xp_backends( |
| np_only=True, |
| reason='array-likes only supported for NumPy backend', |
| ) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| def test_weights_1d_list(self, xp): |
| a, p = [2, 10, 6], -1.23456789 |
| weights = [10, 5, 3] |
| desired = TestPMean.wpmean_reference(np.array(a), p, weights) |
| |
| |
| |
| res = stats.pmean(a, p, weights=weights) |
| xp_assert_close(res, np.asarray(desired), rtol=1e-5) |
|
|
| @skip_xp_invalid_arg |
| def test_weights_masked_1d_array(self, xp): |
| a, p = np.array([2, 10, 6, 42]), 1 |
| weights = np.ma.array([10, 5, 3, 42], mask=[0, 0, 0, 1]) |
| desired = np.average(a, weights=weights) |
| xp = np.ma |
| check_equal_pmean(a, p, desired, weights=weights, rtol=1e-5, |
| dtype=np.float64, xp=xp) |
|
|
| @pytest.mark.parametrize( |
| ("axis", "fun_name", "p"), |
| [(None, "wpmean_reference", 9.87654321), |
| (0, "gmean", 0), |
| (1, "hmean", -1)] |
| ) |
| def test_weights_2d(self, axis, fun_name, p, xp): |
| if fun_name == 'wpmean_reference': |
| def fun(a, axis, weights): |
| return TestPMean.wpmean_reference(a, p, weights) |
| else: |
| fun = getattr(stats, fun_name) |
| a = np.array([[2, 5], [10, 5], [6, 5]]) |
| weights = np.array([[10, 1], [5, 1], [3, 1]]) |
| desired = fun(a, axis=axis, weights=weights) |
| check_equal_pmean(a, p, desired, axis=axis, weights=weights, rtol=1e-5, xp=xp) |
|
|
|
|
| class TestGSTD: |
| |
| array_1d = np.arange(2 * 3 * 4) + 1 |
| gstd_array_1d = 2.294407613602 |
| array_3d = array_1d.reshape(2, 3, 4) |
|
|
| def test_1d_array(self): |
| gstd_actual = stats.gstd(self.array_1d) |
| assert_allclose(gstd_actual, self.gstd_array_1d) |
|
|
| def test_1d_numeric_array_like_input(self): |
| gstd_actual = stats.gstd(tuple(self.array_1d)) |
| assert_allclose(gstd_actual, self.gstd_array_1d) |
|
|
| def test_raises_value_error_non_numeric_input(self): |
| |
| with pytest.raises(TypeError, match="ufunc 'log' not supported"): |
| stats.gstd('You cannot take the logarithm of a string.') |
|
|
| @pytest.mark.parametrize('bad_value', (0, -1, np.inf, np.nan)) |
| def test_returns_nan_invalid_value(self, bad_value): |
| x = np.append(self.array_1d, [bad_value]) |
| if np.isfinite(bad_value): |
| message = "The geometric standard deviation is only defined..." |
| with pytest.warns(RuntimeWarning, match=message): |
| res = stats.gstd(x) |
| else: |
| res = stats.gstd(x) |
| assert_equal(res, np.nan) |
|
|
| def test_propagates_nan_values(self): |
| a = array([[1, 1, 1, 16], [np.nan, 1, 2, 3]]) |
| gstd_actual = stats.gstd(a, axis=1) |
| assert_allclose(gstd_actual, np.array([4, np.nan])) |
|
|
| def test_ddof_equal_to_number_of_observations(self): |
| with pytest.warns(RuntimeWarning, match='Degrees of freedom <= 0'): |
| assert_equal(stats.gstd(self.array_1d, ddof=self.array_1d.size), np.inf) |
|
|
| def test_3d_array(self): |
| gstd_actual = stats.gstd(self.array_3d, axis=None) |
| assert_allclose(gstd_actual, self.gstd_array_1d) |
|
|
| def test_3d_array_axis_type_tuple(self): |
| gstd_actual = stats.gstd(self.array_3d, axis=(1,2)) |
| assert_allclose(gstd_actual, [2.12939215, 1.22120169]) |
|
|
| def test_3d_array_axis_0(self): |
| gstd_actual = stats.gstd(self.array_3d, axis=0) |
| gstd_desired = np.array([ |
| [6.1330555493918, 3.958900210120, 3.1206598248344, 2.6651441426902], |
| [2.3758135028411, 2.174581428192, 2.0260062829505, 1.9115518327308], |
| [1.8205343606803, 1.746342404566, 1.6846557065742, 1.6325269194382] |
| ]) |
| assert_allclose(gstd_actual, gstd_desired) |
|
|
| def test_3d_array_axis_1(self): |
| gstd_actual = stats.gstd(self.array_3d, axis=1) |
| gstd_desired = np.array([ |
| [3.118993630946, 2.275985934063, 1.933995977619, 1.742896469724], |
| [1.271693593916, 1.254158641801, 1.238774141609, 1.225164057869] |
| ]) |
| assert_allclose(gstd_actual, gstd_desired) |
|
|
| def test_3d_array_axis_2(self): |
| gstd_actual = stats.gstd(self.array_3d, axis=2) |
| gstd_desired = np.array([ |
| [1.8242475707664, 1.2243686572447, 1.1318311657788], |
| [1.0934830582351, 1.0724479791887, 1.0591498540749] |
| ]) |
| assert_allclose(gstd_actual, gstd_desired) |
|
|
| def test_masked_3d_array(self): |
| ma = np.ma.masked_where(self.array_3d > 16, self.array_3d) |
| message = "`gstd` support for masked array input was deprecated in..." |
| with pytest.warns(DeprecationWarning, match=message): |
| gstd_actual = stats.gstd(ma, axis=2) |
| gstd_desired = stats.gstd(self.array_3d, axis=2) |
| mask = [[0, 0, 0], [0, 1, 1]] |
| assert_allclose(gstd_actual, gstd_desired) |
| assert_equal(gstd_actual.mask, mask) |
|
|
|
|
| def test_binomtest(): |
| |
| pp = np.concatenate((np.linspace(0.1, 0.2, 5), |
| np.linspace(0.45, 0.65, 5), |
| np.linspace(0.85, 0.95, 5))) |
| n = 501 |
| x = 450 |
| results = [0.0, 0.0, 1.0159969301994141e-304, |
| 2.9752418572150531e-275, 7.7668382922535275e-250, |
| 2.3381250925167094e-099, 7.8284591587323951e-081, |
| 9.9155947819961383e-065, 2.8729390725176308e-050, |
| 1.7175066298388421e-037, 0.0021070691951093692, |
| 0.12044570587262322, 0.88154763174802508, 0.027120993063129286, |
| 2.6102587134694721e-006] |
|
|
| for p, res in zip(pp, results): |
| assert_approx_equal(stats.binomtest(x, n, p).pvalue, res, |
| significant=12, err_msg=f'fail forp={p}') |
| assert_approx_equal(stats.binomtest(50, 100, 0.1).pvalue, |
| 5.8320387857343647e-024, |
| significant=12) |
|
|
|
|
| def test_binomtest2(): |
| |
| res2 = [ |
| [1.0, 1.0], |
| [0.5, 1.0, 0.5], |
| [0.25, 1.00, 1.00, 0.25], |
| [0.125, 0.625, 1.000, 0.625, 0.125], |
| [0.0625, 0.3750, 1.0000, 1.0000, 0.3750, 0.0625], |
| [0.03125, 0.21875, 0.68750, 1.00000, 0.68750, 0.21875, 0.03125], |
| [0.015625, 0.125000, 0.453125, 1.000000, 1.000000, 0.453125, 0.125000, |
| 0.015625], |
| [0.0078125, 0.0703125, 0.2890625, 0.7265625, 1.0000000, 0.7265625, |
| 0.2890625, 0.0703125, 0.0078125], |
| [0.00390625, 0.03906250, 0.17968750, 0.50781250, 1.00000000, |
| 1.00000000, 0.50781250, 0.17968750, 0.03906250, 0.00390625], |
| [0.001953125, 0.021484375, 0.109375000, 0.343750000, 0.753906250, |
| 1.000000000, 0.753906250, 0.343750000, 0.109375000, 0.021484375, |
| 0.001953125] |
| ] |
| for k in range(1, 11): |
| res1 = [stats.binomtest(v, k, 0.5).pvalue for v in range(k + 1)] |
| assert_almost_equal(res1, res2[k-1], decimal=10) |
|
|
|
|
| def test_binomtest3(): |
| |
| |
| res3 = [stats.binomtest(v, v*k, 1./k).pvalue |
| for v in range(1, 11) for k in range(2, 11)] |
| assert_equal(res3, np.ones(len(res3), int)) |
|
|
| |
| |
| |
| |
| |
| |
| |
| binom_testm1 = np.array([ |
| 0.5, 0.5555555555555556, 0.578125, 0.5904000000000003, |
| 0.5981224279835393, 0.603430543396034, 0.607304096221924, |
| 0.610255656871054, 0.612579511000001, 0.625, 0.670781893004115, |
| 0.68853759765625, 0.6980101120000006, 0.703906431368616, |
| 0.70793209416498, 0.7108561134173507, 0.713076544331419, |
| 0.714820192935702, 0.6875, 0.7268709038256367, 0.7418963909149174, |
| 0.74986110468096, 0.7548015520398076, 0.7581671424768577, |
| 0.760607984787832, 0.762459425024199, 0.7639120677676575, 0.7265625, |
| 0.761553963657302, 0.774800934828818, 0.7818005980538996, |
| 0.78613491480358, 0.789084353140195, 0.7912217659828884, |
| 0.79284214559524, 0.794112956558801, 0.75390625, 0.7856929451142176, |
| 0.7976688481430754, 0.8039848974727624, 0.807891868948366, |
| 0.8105487660137676, 0.812473307174702, 0.8139318233591120, |
| 0.815075399104785, 0.7744140625, 0.8037322594985427, |
| 0.814742863657656, 0.8205425178645808, 0.8241275984172285, |
| 0.8265645374416, 0.8283292196088257, 0.829666291102775, |
| 0.8307144686362666, 0.7905273437499996, 0.8178712053954738, |
| 0.828116983756619, 0.833508948940494, 0.8368403871552892, |
| 0.839104213210105, 0.840743186196171, 0.84198481438049, |
| 0.8429580531563676, 0.803619384765625, 0.829338573944648, |
| 0.8389591907548646, 0.84401876783902, 0.84714369697889, |
| 0.8492667010581667, 0.850803474598719, 0.851967542858308, |
| 0.8528799045949524, 0.8145294189453126, 0.838881732845347, |
| 0.847979024541911, 0.852760894015685, 0.8557134656773457, |
| 0.8577190131799202, 0.85917058278431, 0.860270010472127, |
| 0.861131648404582, 0.823802947998047, 0.846984756807511, |
| 0.855635653643743, 0.860180994825685, 0.86298688573253, |
| 0.864892525675245, 0.866271647085603, 0.867316125625004, |
| 0.8681346531755114 |
| ]) |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| binom_testp1 = np.array([ |
| 0.5, 0.259259259259259, 0.26171875, 0.26272, 0.2632244513031551, |
| 0.2635138663069203, 0.2636951804161073, 0.2638162407564354, |
| 0.2639010709000002, 0.625, 0.4074074074074074, 0.42156982421875, |
| 0.4295746560000003, 0.43473045988554, 0.4383309503172684, |
| 0.4409884859402103, 0.4430309389962837, 0.444649849401104, 0.6875, |
| 0.4927602499618962, 0.5096031427383425, 0.5189636628480, |
| 0.5249280070771274, 0.5290623300865124, 0.5320974248125793, |
| 0.5344204730474308, 0.536255847400756, 0.7265625, 0.5496019313526808, |
| 0.5669248746708034, 0.576436455045805, 0.5824538812831795, |
| 0.5866053321547824, 0.589642781414643, 0.5919618019300193, |
| 0.593790427805202, 0.75390625, 0.590868349763505, 0.607983393277209, |
| 0.617303847446822, 0.623172512167948, 0.627208862156123, |
| 0.6301556891501057, 0.632401894928977, 0.6341708982290303, |
| 0.7744140625, 0.622562037497196, 0.639236102912278, 0.648263335014579, |
| 0.65392850011132, 0.657816519817211, 0.660650782947676, |
| 0.662808780346311, 0.6645068560246006, 0.7905273437499996, |
| 0.6478843304312477, 0.6640468318879372, 0.6727589686071775, |
| 0.6782129857784873, 0.681950188903695, 0.684671508668418, |
| 0.686741824999918, 0.688369886732168, 0.803619384765625, |
| 0.668716055304315, 0.684360013879534, 0.6927642396829181, |
| 0.6980155964704895, 0.701609591890657, 0.7042244320992127, |
| 0.7062125081341817, 0.707775152962577, 0.8145294189453126, |
| 0.686243374488305, 0.7013873696358975, 0.709501223328243, |
| 0.714563595144314, 0.718024953392931, 0.7205416252126137, |
| 0.722454130389843, 0.723956813292035, 0.823802947998047, |
| 0.701255953767043, 0.715928221686075, 0.723772209289768, |
| 0.7286603031173616, 0.7319999279787631, 0.7344267920995765, |
| 0.736270323773157, 0.737718376096348 |
| ]) |
|
|
| res4_p1 = [stats.binomtest(v+1, v*k, 1./k).pvalue |
| for v in range(1, 11) for k in range(2, 11)] |
| res4_m1 = [stats.binomtest(v-1, v*k, 1./k).pvalue |
| for v in range(1, 11) for k in range(2, 11)] |
|
|
| assert_almost_equal(res4_p1, binom_testp1, decimal=13) |
| assert_almost_equal(res4_m1, binom_testm1, decimal=13) |
|
|
|
|
| class TestTrim: |
| |
| def test_trim1(self): |
| a = np.arange(11) |
| assert_equal(np.sort(stats.trim1(a, 0.1)), np.arange(10)) |
| assert_equal(np.sort(stats.trim1(a, 0.2)), np.arange(9)) |
| assert_equal(np.sort(stats.trim1(a, 0.2, tail='left')), |
| np.arange(2, 11)) |
| assert_equal(np.sort(stats.trim1(a, 3/11., tail='left')), |
| np.arange(3, 11)) |
| assert_equal(stats.trim1(a, 1.0), []) |
| assert_equal(stats.trim1(a, 1.0, tail='left'), []) |
|
|
| |
| assert_equal(stats.trim1([], 0.1), []) |
| assert_equal(stats.trim1([], 3/11., tail='left'), []) |
| assert_equal(stats.trim1([], 4/6.), []) |
|
|
| |
| a = np.arange(24).reshape(6, 4) |
| ref = np.arange(4, 24).reshape(5, 4) |
|
|
| axis = 0 |
| trimmed = stats.trim1(a, 0.2, tail='left', axis=axis) |
| assert_equal(np.sort(trimmed, axis=axis), ref) |
|
|
| axis = 1 |
| trimmed = stats.trim1(a.T, 0.2, tail='left', axis=axis) |
| assert_equal(np.sort(trimmed, axis=axis), ref.T) |
|
|
| def test_trimboth(self): |
| a = np.arange(11) |
| assert_equal(np.sort(stats.trimboth(a, 3/11.)), np.arange(3, 8)) |
| assert_equal(np.sort(stats.trimboth(a, 0.2)), |
| np.array([2, 3, 4, 5, 6, 7, 8])) |
| assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(6, 4), 0.2)), |
| np.arange(4, 20).reshape(4, 4)) |
| assert_equal(np.sort(stats.trimboth(np.arange(24).reshape(4, 6).T, |
| 2/6.)), |
| np.array([[2, 8, 14, 20], [3, 9, 15, 21]])) |
| assert_raises(ValueError, stats.trimboth, |
| np.arange(24).reshape(4, 6).T, 4/6.) |
|
|
| |
| assert_equal(stats.trimboth([], 0.1), []) |
| assert_equal(stats.trimboth([], 3/11.), []) |
| assert_equal(stats.trimboth([], 4/6.), []) |
|
|
| def test_trim_mean(self): |
| |
| a = np.array([4, 8, 2, 0, 9, 5, 10, 1, 7, 3, 6]) |
| idx = np.array([3, 5, 0, 1, 2, 4]) |
| a2 = np.arange(24).reshape(6, 4)[idx, :] |
| a3 = np.arange(24).reshape(6, 4, order='F')[idx, :] |
| assert_equal(stats.trim_mean(a3, 2/6.), |
| np.array([2.5, 8.5, 14.5, 20.5])) |
| assert_equal(stats.trim_mean(a2, 2/6.), |
| np.array([10., 11., 12., 13.])) |
| idx4 = np.array([1, 0, 3, 2]) |
| a4 = np.arange(24).reshape(4, 6)[idx4, :] |
| assert_equal(stats.trim_mean(a4, 2/6.), |
| np.array([9., 10., 11., 12., 13., 14.])) |
| |
| a = [7, 11, 12, 21, 16, 6, 22, 1, 5, 0, 18, 10, 17, 9, 19, 15, 23, |
| 20, 2, 14, 4, 13, 8, 3] |
| assert_equal(stats.trim_mean(a, 2/6.), 11.5) |
| assert_equal(stats.trim_mean([5,4,3,1,2,0], 2/6.), 2.5) |
|
|
| |
| np.random.seed(1234) |
| a = np.random.randint(20, size=(5, 6, 4, 7)) |
| for axis in [0, 1, 2, 3, -1]: |
| res1 = stats.trim_mean(a, 2/6., axis=axis) |
| res2 = stats.trim_mean(np.moveaxis(a, axis, 0), 2/6.) |
| assert_equal(res1, res2) |
|
|
| res1 = stats.trim_mean(a, 2/6., axis=None) |
| res2 = stats.trim_mean(a.ravel(), 2/6.) |
| assert_equal(res1, res2) |
|
|
| assert_raises(ValueError, stats.trim_mean, a, 0.6) |
|
|
| |
| assert_equal(stats.trim_mean([], 0.0), np.nan) |
| assert_equal(stats.trim_mean([], 0.6), np.nan) |
|
|
|
|
| class TestSigmaClip: |
| def test_sigmaclip1(self): |
| a = np.concatenate((np.linspace(9.5, 10.5, 31), np.linspace(0, 20, 5))) |
| fact = 4 |
| c, low, upp = stats.sigmaclip(a) |
| assert_(c.min() > low) |
| assert_(c.max() < upp) |
| assert_equal(low, c.mean() - fact*c.std()) |
| assert_equal(upp, c.mean() + fact*c.std()) |
| assert_equal(c.size, a.size) |
|
|
| def test_sigmaclip2(self): |
| a = np.concatenate((np.linspace(9.5, 10.5, 31), np.linspace(0, 20, 5))) |
| fact = 1.5 |
| c, low, upp = stats.sigmaclip(a, fact, fact) |
| assert_(c.min() > low) |
| assert_(c.max() < upp) |
| assert_equal(low, c.mean() - fact*c.std()) |
| assert_equal(upp, c.mean() + fact*c.std()) |
| assert_equal(c.size, 4) |
| assert_equal(a.size, 36) |
|
|
| def test_sigmaclip3(self): |
| a = np.concatenate((np.linspace(9.5, 10.5, 11), |
| np.linspace(-100, -50, 3))) |
| fact = 1.8 |
| c, low, upp = stats.sigmaclip(a, fact, fact) |
| assert_(c.min() > low) |
| assert_(c.max() < upp) |
| assert_equal(low, c.mean() - fact*c.std()) |
| assert_equal(upp, c.mean() + fact*c.std()) |
| assert_equal(c, np.linspace(9.5, 10.5, 11)) |
|
|
| def test_sigmaclip_result_attributes(self): |
| a = np.concatenate((np.linspace(9.5, 10.5, 11), |
| np.linspace(-100, -50, 3))) |
| fact = 1.8 |
| res = stats.sigmaclip(a, fact, fact) |
| attributes = ('clipped', 'lower', 'upper') |
| check_named_results(res, attributes) |
|
|
| def test_std_zero(self): |
| |
| x = np.ones(10) |
| assert_equal(stats.sigmaclip(x)[0], x) |
|
|
|
|
| class TestAlexanderGovern: |
| def test_compare_dtypes(self): |
| args = [[13, 13, 13, 13, 13, 13, 13, 12, 12], |
| [14, 13, 12, 12, 12, 12, 12, 11, 11], |
| [14, 14, 13, 13, 13, 13, 13, 12, 12], |
| [15, 14, 13, 13, 13, 12, 12, 12, 11]] |
| args_int16 = [np.asarray(arg, dtype=np.int16) for arg in args] |
| args_int32 = [np.asarray(arg, dtype=np.int32) for arg in args] |
| args_uint8 = [np.asarray(arg, dtype=np.uint8) for arg in args] |
| args_float64 = [np.asarray(arg, dtype=np.float64) for arg in args] |
|
|
| res_int16 = stats.alexandergovern(*args_int16) |
| res_int32 = stats.alexandergovern(*args_int32) |
| res_uint8 = stats.alexandergovern(*args_uint8) |
| res_float64 = stats.alexandergovern(*args_float64) |
|
|
| assert (res_int16.pvalue == res_int32.pvalue == |
| res_uint8.pvalue == res_float64.pvalue) |
| assert (res_int16.statistic == res_int32.statistic == |
| res_uint8.statistic == res_float64.statistic) |
|
|
| @pytest.mark.parametrize('case',[([1, 2], []), ([1, 2], 2), ([1, 2], [2])]) |
| def test_too_small_inputs(self, case): |
| |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| res = stats.alexandergovern(*case) |
| assert_equal(res.statistic, np.nan) |
| assert_equal(res.pvalue, np.nan) |
|
|
| def test_bad_inputs(self): |
| |
| with np.errstate(invalid='ignore'): |
| res = stats.alexandergovern([1, 2], [np.inf, np.inf]) |
| assert_equal(res.statistic, np.nan) |
| assert_equal(res.pvalue, np.nan) |
|
|
| def test_compare_r(self): |
| ''' |
| Data generated in R with |
| > set.seed(1) |
| > library("onewaytests") |
| > library("tibble") |
| > y <- c(rnorm(40, sd=10), |
| + rnorm(30, sd=15), |
| + rnorm(20, sd=20)) |
| > x <- c(rep("one", times=40), |
| + rep("two", times=30), |
| + rep("eight", times=20)) |
| > x <- factor(x) |
| > ag.test(y ~ x, tibble(y,x)) |
| |
| Alexander-Govern Test (alpha = 0.05) |
| ------------------------------------------------------------- |
| data : y and x |
| |
| statistic : 1.359941 |
| parameter : 2 |
| p.value : 0.5066321 |
| |
| Result : Difference is not statistically significant. |
| ------------------------------------------------------------- |
| Example adapted from: |
| https://eval-serv2.metpsy.uni-jena.de/wiki-metheval-hp/index.php/R_FUN_Alexander-Govern |
| |
| ''' |
| one = [-6.264538107423324, 1.8364332422208225, -8.356286124100471, |
| 15.952808021377916, 3.295077718153605, -8.204683841180152, |
| 4.874290524284853, 7.383247051292173, 5.757813516534923, |
| -3.0538838715635603, 15.11781168450848, 3.898432364114311, |
| -6.2124058054180376, -22.146998871774997, 11.249309181431082, |
| -0.4493360901523085, -0.16190263098946087, 9.438362106852992, |
| 8.212211950980885, 5.939013212175088, 9.189773716082183, |
| 7.821363007310671, 0.745649833651906, -19.89351695863373, |
| 6.198257478947102, -0.5612873952900078, -1.557955067053293, |
| -14.707523838992744, -4.781500551086204, 4.179415601997024, |
| 13.58679551529044, -1.0278772734299553, 3.876716115593691, |
| -0.5380504058290512, -13.770595568286065, -4.149945632996798, |
| -3.942899537103493, -0.5931339671118566, 11.000253719838831, |
| 7.631757484575442] |
|
|
| two = [-2.4678539438038034, -3.8004252020476135, 10.454450631071062, |
| 8.34994798010486, -10.331335418242798, -10.612427354431794, |
| 5.468729432052455, 11.527993867731237, -1.6851931822534207, |
| 13.216615896813222, 5.971588205506021, -9.180395898761569, |
| 5.116795371366372, -16.94044644121189, 21.495355525515556, |
| 29.7059984775879, -5.508322146997636, -15.662019394747961, |
| 8.545794411636193, -2.0258190582123654, 36.024266407571645, |
| -0.5886000409975387, 10.346090436761651, 0.4200323817099909, |
| -11.14909813323608, 2.8318844927151434, -27.074379433365568, |
| 21.98332292344329, 2.2988000731784655, 32.58917505543229] |
|
|
| eight = [9.510190577993251, -14.198928618436291, 12.214527069781099, |
| -18.68195263288503, -25.07266800478204, 5.828924710349257, |
| -8.86583746436866, 0.02210703263248262, 1.4868264830332811, |
| -11.79041892376144, -11.37337465637004, -2.7035723024766414, |
| 23.56173993146409, -30.47133600859524, 11.878923752568431, |
| 6.659007424270365, 21.261996745527256, -6.083678472686013, |
| 7.400376198325763, 5.341975815444621] |
|
|
| one, two, eight = np.asarray(one), np.asarray(two), np.asarray(eight) |
| soln = stats.alexandergovern(one, two, eight) |
| assert_allclose(soln.statistic, 1.3599405447999450836) |
| assert_allclose(soln.pvalue, 0.50663205309676440091) |
|
|
| def test_compare_scholar(self): |
| ''' |
| Data taken from 'The Modification and Evaluation of the |
| Alexander-Govern Test in Terms of Power' by Kingsley Ochuko, T., |
| Abdullah, S., Binti Zain, Z., & Soaad Syed Yahaya, S. (2015). |
| ''' |
| young = [482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62, |
| 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1, 584.68, |
| 609.09, 609.53, 666.63, 676.4] |
| middle = [335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85, |
| 487.3, 493.08, 494.31, 499.1, 886.41] |
| old = [519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24, 558.61, |
| 558.95, 565.43, 586.39, 594.69, 629.22, 645.69, 691.84] |
| young, middle, old = np.asarray(young), np.asarray(middle), np.asarray(old) |
| soln = stats.alexandergovern(young, middle, old) |
| assert_allclose(soln.statistic, 5.3237, atol=1e-3) |
| assert_allclose(soln.pvalue, 0.06982, atol=1e-4) |
|
|
| |
| ''' |
| > library("onewaytests") |
| > library("tibble") |
| > young <- c(482.43, 484.36, 488.84, 495.15, 495.24, 502.69, 504.62, |
| + 518.29, 519.1, 524.1, 524.12, 531.18, 548.42, 572.1, |
| + 584.68, 609.09, 609.53, 666.63, 676.4) |
| > middle <- c(335.59, 338.43, 353.54, 404.27, 437.5, 469.01, 485.85, |
| + 487.3, 493.08, 494.31, 499.1, 886.41) |
| > old <- c(519.01, 528.5, 530.23, 536.03, 538.56, 538.83, 557.24, |
| + 558.61, 558.95, 565.43, 586.39, 594.69, 629.22, |
| + 645.69, 691.84) |
| > young_fct <- c(rep("young", times=19)) |
| > middle_fct <-c(rep("middle", times=12)) |
| > old_fct <- c(rep("old", times=15)) |
| > ag.test(a ~ b, tibble(a=c(young, middle, old), b=factor(c(young_fct, |
| + middle_fct, old_fct)))) |
| |
| Alexander-Govern Test (alpha = 0.05) |
| ------------------------------------------------------------- |
| data : a and b |
| |
| statistic : 5.324629 |
| parameter : 2 |
| p.value : 0.06978651 |
| |
| Result : Difference is not statistically significant. |
| ------------------------------------------------------------- |
| |
| ''' |
| assert_allclose(soln.statistic, 5.324629) |
| assert_allclose(soln.pvalue, 0.06978651) |
|
|
| def test_compare_scholar3(self): |
| ''' |
| Data taken from 'Robustness And Comparative Power Of WelchAspin, |
| Alexander-Govern And Yuen Tests Under Non-Normality And Variance |
| Heteroscedasticity', by Ayed A. Almoied. 2017. Page 34-37. |
| https://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2775&context=oa_dissertations |
| ''' |
| x1 = [-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152, |
| 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555, |
| 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442, |
| 0.999554, 1.642958] |
| x2 = [-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253, |
| -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824, |
| 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475, |
| 1.33964, 1.576766] |
| x1, x2 = np.asarray(x1), np.asarray(x2) |
| soln = stats.alexandergovern(x1, x2) |
| assert_allclose(soln.statistic, 0.713526, atol=1e-5) |
| assert_allclose(soln.pvalue, 0.398276, atol=1e-5) |
|
|
| ''' |
| tested in ag.test in R: |
| > library("onewaytests") |
| > library("tibble") |
| > x1 <- c(-1.77559, -1.4113, -0.69457, -0.54148, -0.18808, -0.07152, |
| + 0.04696, 0.051183, 0.148695, 0.168052, 0.422561, 0.458555, |
| + 0.616123, 0.709968, 0.839956, 0.857226, 0.929159, 0.981442, |
| + 0.999554, 1.642958) |
| > x2 <- c(-1.47973, -1.2722, -0.91914, -0.80916, -0.75977, -0.72253, |
| + -0.3601, -0.33273, -0.28859, -0.09637, -0.08969, -0.01824, |
| + 0.260131, 0.289278, 0.518254, 0.683003, 0.877618, 1.172475, |
| + 1.33964, 1.576766) |
| > x1_fact <- c(rep("x1", times=20)) |
| > x2_fact <- c(rep("x2", times=20)) |
| > a <- c(x1, x2) |
| > b <- factor(c(x1_fact, x2_fact)) |
| > ag.test(a ~ b, tibble(a, b)) |
| Alexander-Govern Test (alpha = 0.05) |
| ------------------------------------------------------------- |
| data : a and b |
| |
| statistic : 0.7135182 |
| parameter : 1 |
| p.value : 0.3982783 |
| |
| Result : Difference is not statistically significant. |
| ------------------------------------------------------------- |
| ''' |
| assert_allclose(soln.statistic, 0.7135182) |
| assert_allclose(soln.pvalue, 0.3982783) |
|
|
| def test_nan_policy_propagate(self): |
| args = np.asarray([1, 2, 3, 4]), np.asarray([1, np.nan]) |
| |
| res = stats.alexandergovern(*args) |
| assert_equal(res.pvalue, np.nan) |
| assert_equal(res.statistic, np.nan) |
|
|
| def test_nan_policy_raise(self): |
| args = np.asarray([1, 2, 3, 4]), np.asarray([1, np.nan]) |
| with assert_raises(ValueError, match="The input contains nan values"): |
| stats.alexandergovern(*args, nan_policy='raise') |
|
|
| def test_nan_policy_omit(self): |
| args_nan = np.asarray([1, 2, 3, np.nan, 4]), np.asarray([1, np.nan, 19, 25]) |
| args_no_nan = np.asarray([1, 2, 3, 4]), np.asarray([1, 19, 25]) |
| res_nan = stats.alexandergovern(*args_nan, nan_policy='omit') |
| res_no_nan = stats.alexandergovern(*args_no_nan) |
| assert_equal(res_nan.pvalue, res_no_nan.pvalue) |
| assert_equal(res_nan.statistic, res_no_nan.statistic) |
|
|
| def test_constant_input(self): |
| |
| x1 = np.asarray([0.667, 0.667, 0.667]) |
| x2 = np.asarray([0.123, 0.456, 0.789]) |
| with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): |
| res = stats.alexandergovern(x1, x2) |
| assert_equal(res.statistic, np.nan) |
| assert_equal(res.pvalue, np.nan) |
|
|
|
|
| class TestFOneWay: |
|
|
| def test_trivial(self): |
| |
| F, p = stats.f_oneway([0, 2], [0, 2]) |
| assert_equal(F, 0.0) |
| assert_equal(p, 1.0) |
|
|
| def test_basic(self): |
| |
| |
| F, p = stats.f_oneway([0, 2], [2, 4]) |
| assert_equal(F, 2.0) |
| assert_allclose(p, 1 - np.sqrt(0.5), rtol=1e-14) |
|
|
| def test_known_exact(self): |
| |
| |
| F, p = stats.f_oneway([2], [2], [2, 3, 4]) |
| |
| |
| |
| assert_equal(F, 3/5) |
| assert_equal(p, 5/8) |
|
|
| def test_large_integer_array(self): |
| a = np.array([655, 788], dtype=np.uint16) |
| b = np.array([789, 772], dtype=np.uint16) |
| F, p = stats.f_oneway(a, b) |
| |
| |
| assert_allclose(F, 0.77450216931805540, rtol=1e-14) |
|
|
| def test_result_attributes(self): |
| a = np.array([655, 788], dtype=np.uint16) |
| b = np.array([789, 772], dtype=np.uint16) |
| res = stats.f_oneway(a, b) |
| attributes = ('statistic', 'pvalue') |
| check_named_results(res, attributes) |
|
|
| def test_nist(self): |
| |
| |
| filenames = ['SiRstv.dat', 'SmLs01.dat', 'SmLs02.dat', 'SmLs03.dat', |
| 'AtmWtAg.dat', 'SmLs04.dat', 'SmLs05.dat', 'SmLs06.dat', |
| 'SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat'] |
|
|
| for test_case in filenames: |
| rtol = 1e-7 |
| fname = os.path.abspath(os.path.join(os.path.dirname(__file__), |
| 'data/nist_anova', test_case)) |
| with open(fname) as f: |
| content = f.read().split('\n') |
| certified = [line.split() for line in content[40:48] |
| if line.strip()] |
| dataf = np.loadtxt(fname, skiprows=60) |
| y, x = dataf.T |
| y = y.astype(int) |
| caty = np.unique(y) |
| f = float(certified[0][-1]) |
|
|
| xlist = [x[y == i] for i in caty] |
| res = stats.f_oneway(*xlist) |
|
|
| |
| hard_tc = ('SmLs07.dat', 'SmLs08.dat', 'SmLs09.dat') |
| if test_case in hard_tc: |
| rtol = 1e-4 |
|
|
| assert_allclose(res[0], f, rtol=rtol, |
| err_msg=f'Failing testcase: {test_case}') |
|
|
| @pytest.mark.parametrize("a, b, expected", [ |
| (np.array([42, 42, 42]), np.array([7, 7, 7]), (np.inf, 0)), |
| (np.array([42, 42, 42]), np.array([42, 42, 42]), (np.nan, np.nan)) |
| ]) |
| def test_constant_input(self, a, b, expected): |
| |
| msg = "Each of the input arrays is constant;" |
| with pytest.warns(stats.ConstantInputWarning, match=msg): |
| f, p = stats.f_oneway(a, b) |
| assert f, p == expected |
|
|
| @pytest.mark.parametrize('axis', [-2, -1, 0, 1]) |
| def test_2d_inputs(self, axis): |
| a = np.array([[1, 4, 3, 3], |
| [2, 5, 3, 3], |
| [3, 6, 3, 3], |
| [2, 3, 3, 3], |
| [1, 4, 3, 3]]) |
| b = np.array([[3, 1, 5, 3], |
| [4, 6, 5, 3], |
| [4, 3, 5, 3], |
| [1, 5, 5, 3], |
| [5, 5, 5, 3], |
| [2, 3, 5, 3], |
| [8, 2, 5, 3], |
| [2, 2, 5, 3]]) |
| c = np.array([[4, 3, 4, 3], |
| [4, 2, 4, 3], |
| [5, 4, 4, 3], |
| [5, 4, 4, 3]]) |
|
|
| if axis in [-1, 1]: |
| a = a.T |
| b = b.T |
| c = c.T |
| take_axis = 0 |
| else: |
| take_axis = 1 |
|
|
| warn_msg = "Each of the input arrays is constant;" |
| with pytest.warns(stats.ConstantInputWarning, match=warn_msg): |
| f, p = stats.f_oneway(a, b, c, axis=axis) |
|
|
| |
| |
| for j in [0, 1]: |
| fj, pj = stats.f_oneway(np.take(a, j, take_axis), |
| np.take(b, j, take_axis), |
| np.take(c, j, take_axis)) |
| assert_allclose(f[j], fj, rtol=1e-14) |
| assert_allclose(p[j], pj, rtol=1e-14) |
| for j in [2, 3]: |
| with pytest.warns(stats.ConstantInputWarning, match=warn_msg): |
| fj, pj = stats.f_oneway(np.take(a, j, take_axis), |
| np.take(b, j, take_axis), |
| np.take(c, j, take_axis)) |
| assert_equal(f[j], fj) |
| assert_equal(p[j], pj) |
|
|
| def test_3d_inputs(self): |
| |
| a = 1/np.arange(1.0, 4*5*7 + 1).reshape(4, 5, 7) |
| b = 2/np.arange(1.0, 4*8*7 + 1).reshape(4, 8, 7) |
| c = np.cos(1/np.arange(1.0, 4*4*7 + 1).reshape(4, 4, 7)) |
|
|
| f, p = stats.f_oneway(a, b, c, axis=1) |
|
|
| assert f.shape == (4, 7) |
| assert p.shape == (4, 7) |
|
|
| for i in range(a.shape[0]): |
| for j in range(a.shape[2]): |
| fij, pij = stats.f_oneway(a[i, :, j], b[i, :, j], c[i, :, j]) |
| assert_allclose(fij, f[i, j]) |
| assert_allclose(pij, p[i, j]) |
|
|
| def test_length0_1d_error(self): |
| |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| result = stats.f_oneway([1, 2, 3], [], [4, 5, 6, 7]) |
| assert_equal(result, (np.nan, np.nan)) |
|
|
| def test_length0_2d_error(self): |
| with pytest.warns(SmallSampleWarning, match=too_small_nd_not_omit): |
| ncols = 3 |
| a = np.ones((4, ncols)) |
| b = np.ones((0, ncols)) |
| c = np.ones((5, ncols)) |
| f, p = stats.f_oneway(a, b, c) |
| nans = np.full((ncols,), fill_value=np.nan) |
| assert_equal(f, nans) |
| assert_equal(p, nans) |
|
|
| def test_all_length_one(self): |
| with pytest.warns(SmallSampleWarning): |
| result = stats.f_oneway([10], [11], [12], [13]) |
| assert_equal(result, (np.nan, np.nan)) |
|
|
| @pytest.mark.parametrize('args', [(), ([1, 2, 3],)]) |
| def test_too_few_inputs(self, args): |
| message = "At least two samples are required..." |
| with assert_raises(TypeError, match=message): |
| stats.f_oneway(*args) |
|
|
| def test_axis_error(self): |
| a = np.ones((3, 4)) |
| b = np.ones((5, 4)) |
| with assert_raises(AxisError): |
| stats.f_oneway(a, b, axis=2) |
|
|
| def test_bad_shapes(self): |
| a = np.ones((3, 4)) |
| b = np.ones((5, 4)) |
| with assert_raises(ValueError): |
| stats.f_oneway(a, b, axis=1) |
|
|
|
|
| class TestKruskal: |
| def test_simple(self): |
| x = [1] |
| y = [2] |
| h, p = stats.kruskal(x, y) |
| assert_equal(h, 1.0) |
| assert_approx_equal(p, stats.distributions.chi2.sf(h, 1)) |
| h, p = stats.kruskal(np.array(x), np.array(y)) |
| assert_equal(h, 1.0) |
| assert_approx_equal(p, stats.distributions.chi2.sf(h, 1)) |
|
|
| def test_basic(self): |
| x = [1, 3, 5, 7, 9] |
| y = [2, 4, 6, 8, 10] |
| h, p = stats.kruskal(x, y) |
| assert_approx_equal(h, 3./11, significant=10) |
| assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1)) |
| h, p = stats.kruskal(np.array(x), np.array(y)) |
| assert_approx_equal(h, 3./11, significant=10) |
| assert_approx_equal(p, stats.distributions.chi2.sf(3./11, 1)) |
|
|
| def test_simple_tie(self): |
| x = [1] |
| y = [1, 2] |
| h_uncorr = 1.5**2 + 2*2.25**2 - 12 |
| corr = 0.75 |
| expected = h_uncorr / corr |
| h, p = stats.kruskal(x, y) |
| |
| |
| assert_equal(h, expected) |
|
|
| def test_another_tie(self): |
| x = [1, 1, 1, 2] |
| y = [2, 2, 2, 2] |
| h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9 |
| corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) |
| expected = h_uncorr / corr |
| h, p = stats.kruskal(x, y) |
| assert_approx_equal(h, expected) |
|
|
| def test_three_groups(self): |
| |
| x = [1, 1, 1] |
| y = [2, 2, 2] |
| z = [2, 2] |
| h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 |
| corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8) |
| expected = h_uncorr / corr |
| h, p = stats.kruskal(x, y, z) |
| assert_approx_equal(h, expected) |
| assert_approx_equal(p, stats.distributions.chi2.sf(h, 2)) |
|
|
| def test_empty(self): |
| |
| x = [1, 1, 1] |
| y = [2, 2, 2] |
| z = [] |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| assert_equal(stats.kruskal(x, y, z), (np.nan, np.nan)) |
|
|
| def test_kruskal_result_attributes(self): |
| x = [1, 3, 5, 7, 9] |
| y = [2, 4, 6, 8, 10] |
| res = stats.kruskal(x, y) |
| attributes = ('statistic', 'pvalue') |
| check_named_results(res, attributes) |
|
|
| def test_nan_policy(self): |
| x = np.arange(10.) |
| x[9] = np.nan |
| assert_equal(stats.kruskal(x, x), (np.nan, np.nan)) |
| assert_almost_equal(stats.kruskal(x, x, nan_policy='omit'), (0.0, 1.0)) |
| assert_raises(ValueError, stats.kruskal, x, x, nan_policy='raise') |
| assert_raises(ValueError, stats.kruskal, x, x, nan_policy='foobar') |
|
|
| def test_large_no_samples(self): |
| |
| n = 50000 |
| x = np.random.randn(n) |
| y = np.random.randn(n) + 50 |
| h, p = stats.kruskal(x, y) |
| expected = 0 |
| assert_approx_equal(p, expected) |
|
|
| def test_no_args_gh20661(self): |
| message = r"Need at least two groups in stats.kruskal\(\)" |
| with pytest.raises(ValueError, match=message): |
| stats.kruskal() |
|
|
|
|
| @skip_xp_backends(cpu_only=True, |
| exceptions=['cupy', 'jax.numpy'], |
| reason=('Delegation for `special.stdtr` only implemented ' |
| 'for CuPy and JAX.')) |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| class TestCombinePvalues: |
| |
| |
| |
| |
| |
| |
| |
| |
| @pytest.mark.parametrize( |
| "method, expected_statistic, expected_pvalue", |
| [("fisher", 14.83716180549625 , 0.02156175132483465), |
| ("stouffer", 2.131790594240385, 0.01651203260896294), |
| ("pearson", -1.179737662212887, 1-0.9778736999143087), |
| ("tippett", 0.01, 0.02970100000000002), |
| |
| ("mudholkar_george", 6.828712071641684, 0.01654551838539527)]) |
| def test_reference_values(self, xp, method, expected_statistic, expected_pvalue): |
| x = [.01, .2, .3] |
| res = stats.combine_pvalues(xp.asarray(x), method=method) |
| xp_assert_close(res.statistic, xp.asarray(expected_statistic)) |
| xp_assert_close(res.pvalue, xp.asarray(expected_pvalue)) |
|
|
| @pytest.mark.parametrize( |
| |
| |
| |
| |
| |
| |
| "weights, expected_statistic, expected_pvalue", |
| [([1., 1., 1.], 2.131790594240385, 0.01651203260896294), |
| ([1., 4., 9.], 1.051815015753598, 0.1464422142261314)]) |
| def test_weighted_stouffer(self, xp, weights, expected_statistic, expected_pvalue): |
| x = xp.asarray([.01, .2, .3]) |
| res = stats.combine_pvalues(x, method='stouffer', weights=xp.asarray(weights)) |
| xp_assert_close(res.statistic, xp.asarray(expected_statistic)) |
| xp_assert_close(res.pvalue, xp.asarray(expected_pvalue)) |
|
|
| methods = ["fisher", "pearson", "tippett", "stouffer", "mudholkar_george"] |
|
|
| @pytest.mark.parametrize("variant", ["single", "all", "random"]) |
| @pytest.mark.parametrize("method", methods) |
| def test_monotonicity(self, variant, method, xp): |
| xp_test = array_namespace(xp.asarray(1)) |
| |
| m, n = 10, 7 |
| rng = np.random.default_rng(278448169958891062669391462690811630763) |
|
|
| |
| |
| |
| |
| if variant == "single": |
| pvaluess = xp.broadcast_to(xp.asarray(rng.random(n)), (m, n)) |
| pvaluess = xp_test.concat([xp.reshape(xp.linspace(0.1, 0.9, m), (-1, 1)), |
| pvaluess[:, 1:]], axis=1) |
| elif variant == "all": |
| pvaluess = xp.broadcast_to(xp.linspace(0.1, 0.9, m), (n, m)).T |
| elif variant == "random": |
| pvaluess = xp_test.sort(xp.asarray(rng.uniform(0, 1, size=(m, n))), axis=0) |
|
|
| combined_pvalues = xp.asarray([ |
| stats.combine_pvalues(pvaluess[i, :], method=method)[1] |
| for i in range(pvaluess.shape[0]) |
| ]) |
| assert xp.all(combined_pvalues[1:] - combined_pvalues[:-1] >= 0) |
|
|
| @pytest.mark.parametrize("method", methods) |
| def test_result(self, method, xp): |
| res = stats.combine_pvalues(xp.asarray([.01, .2, .3]), method=method) |
| xp_assert_equal(res.statistic, res[0]) |
| xp_assert_equal(res.pvalue, res[1]) |
|
|
| @pytest.mark.parametrize("method", methods) |
| |
| |
| @pytest.mark.parametrize("axis", [0, 1]) |
| def test_axis(self, method, axis, xp): |
| rng = np.random.default_rng(234892349810482) |
| x = xp.asarray(rng.random(size=(2, 10))) |
| x = x.T if (axis == 0) else x |
| res = stats.combine_pvalues(x, axis=axis, method=method) |
|
|
| if axis is None: |
| x = xp.reshape(x, (-1,)) |
| ref = stats.combine_pvalues(x, method=method) |
| xp_assert_close(res.statistic, ref.statistic) |
| xp_assert_close(res.pvalue, ref.pvalue) |
| return |
|
|
| x = x.T if (axis == 0) else x |
| x0, x1 = x[0, :], x[1, :] |
| ref0 = stats.combine_pvalues(x0, method=method) |
| ref1 = stats.combine_pvalues(x1, method=method) |
|
|
| xp_assert_close(res.statistic[0], ref0.statistic) |
| xp_assert_close(res.statistic[1], ref1.statistic) |
| xp_assert_close(res.pvalue[0], ref0.pvalue) |
| xp_assert_close(res.pvalue[1], ref1.pvalue) |
|
|
|
|
| class TestCdfDistanceValidation: |
| """ |
| Test that _cdf_distance() (via wasserstein_distance()) raises ValueErrors |
| for bad inputs. |
| """ |
|
|
| def test_distinct_value_and_weight_lengths(self): |
| |
| |
| assert_raises(ValueError, stats.wasserstein_distance, |
| [1], [2], [4], [3, 1]) |
| assert_raises(ValueError, stats.wasserstein_distance, [1], [2], [1, 0]) |
|
|
| def test_zero_weight(self): |
| |
| |
| assert_raises(ValueError, stats.wasserstein_distance, |
| [0, 1], [2], [0, 0]) |
| assert_raises(ValueError, stats.wasserstein_distance, |
| [0, 1], [2], [3, 1], [0]) |
|
|
| def test_negative_weights(self): |
| |
| assert_raises(ValueError, stats.wasserstein_distance, |
| [0, 1], [2, 2], [1, 1], [3, -1]) |
|
|
| def test_empty_distribution(self): |
| |
| |
| assert_raises(ValueError, stats.wasserstein_distance, [], [2, 2]) |
| assert_raises(ValueError, stats.wasserstein_distance, [1], []) |
|
|
| def test_inf_weight(self): |
| |
| assert_raises(ValueError, stats.wasserstein_distance, |
| [1, 2, 1], [1, 1], [1, np.inf, 1], [1, 1]) |
|
|
|
|
| class TestWassersteinDistanceND: |
| """ Tests for wasserstein_distance_nd() output values. |
| """ |
|
|
| def test_published_values(self): |
| |
| |
| |
| |
| u = [(1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), (1,1), |
| (4,2), (6,1), (6,1)] |
| v = [(2,1), (2,1), (3,2), (3,2), (3,2), (5,1), (5,1), (5,1), (5,1), (5,1), |
| (5,1), (5,1), (7,1)] |
|
|
| res = stats.wasserstein_distance_nd(u, v) |
| |
| |
| |
| flow = np.array([2., 3., 5., 1., 1., 1.]) |
| dist = np.array([1.00, 5**0.5, 4.00, 2**0.5, 1.00, 1.00]) |
| ref = np.sum(flow * dist)/np.sum(flow) |
| assert_allclose(res, ref) |
|
|
| @pytest.mark.parametrize('n_value', (4, 15, 35)) |
| @pytest.mark.parametrize('ndim', (3, 4, 7)) |
| @pytest.mark.parametrize('max_repeats', (5, 10)) |
| def test_same_distribution_nD(self, ndim, n_value, max_repeats): |
| |
| |
| rng = np.random.default_rng(363836384995579937222333) |
| repeats = rng.integers(1, max_repeats, size=n_value, dtype=int) |
|
|
| u_values = rng.random(size=(n_value, ndim)) |
| v_values = np.repeat(u_values, repeats, axis=0) |
| v_weights = rng.random(np.sum(repeats)) |
| range_repeat = np.repeat(np.arange(len(repeats)), repeats) |
| u_weights = np.bincount(range_repeat, weights=v_weights) |
| index = rng.permutation(len(v_weights)) |
| v_values, v_weights = v_values[index], v_weights[index] |
|
|
| res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
| assert_allclose(res, 0, atol=1e-15) |
|
|
| @pytest.mark.parametrize('nu', (8, 9, 38)) |
| @pytest.mark.parametrize('nv', (8, 12, 17)) |
| @pytest.mark.parametrize('ndim', (3, 5, 23)) |
| def test_collapse_nD(self, nu, nv, ndim): |
| |
| |
| |
| rng = np.random.default_rng(38573488467338826109) |
| u_values = rng.random(size=(nu, ndim)) |
| v_values = np.zeros((nv, ndim)) |
| u_weights = rng.random(size=nu) |
| v_weights = rng.random(size=nv) |
| ref = np.average(np.linalg.norm(u_values, axis=1), weights=u_weights) |
| res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
| assert_allclose(res, ref) |
|
|
| @pytest.mark.parametrize('nu', (8, 16, 32)) |
| @pytest.mark.parametrize('nv', (8, 16, 32)) |
| @pytest.mark.parametrize('ndim', (1, 2, 6)) |
| def test_zero_weight_nD(self, nu, nv, ndim): |
| |
| rng = np.random.default_rng(38573488467338826109) |
| u_values = rng.random(size=(nu, ndim)) |
| v_values = rng.random(size=(nv, ndim)) |
| u_weights = rng.random(size=nu) |
| v_weights = rng.random(size=nv) |
| ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
|
|
| add_row, nrows = rng.integers(0, nu, size=2) |
| add_value = rng.random(size=(nrows, ndim)) |
| u_values = np.insert(u_values, add_row, add_value, axis=0) |
| u_weights = np.insert(u_weights, add_row, np.zeros(nrows), axis=0) |
| res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
| assert_allclose(res, ref) |
|
|
| def test_inf_values(self): |
| |
| |
| uv, vv, uw = [[1, 1], [2, 1]], [[np.inf, -np.inf]], [1, 1] |
| distance = stats.wasserstein_distance_nd(uv, vv, uw) |
| assert_equal(distance, np.inf) |
| with np.errstate(invalid='ignore'): |
| uv, vv = [[np.inf, np.inf]], [[np.inf, -np.inf]] |
| distance = stats.wasserstein_distance_nd(uv, vv) |
| assert_equal(distance, np.nan) |
|
|
| @pytest.mark.parametrize('nu', (10, 15, 20)) |
| @pytest.mark.parametrize('nv', (10, 15, 20)) |
| @pytest.mark.parametrize('ndim', (1, 3, 5)) |
| def test_multi_dim_nD(self, nu, nv, ndim): |
| |
| rng = np.random.default_rng(2736495738494849509) |
| u_values = rng.random(size=(nu, ndim)) |
| v_values = rng.random(size=(nv, ndim)) |
| u_weights = rng.random(size=nu) |
| v_weights = rng.random(size=nv) |
| ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
|
|
| add_dim = rng.integers(0, ndim) |
| add_value = rng.random() |
|
|
| u_values = np.insert(u_values, add_dim, add_value, axis=1) |
| v_values = np.insert(v_values, add_dim, add_value, axis=1) |
| res = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
| assert_allclose(res, ref) |
|
|
| @pytest.mark.parametrize('nu', (7, 13, 19)) |
| @pytest.mark.parametrize('nv', (7, 13, 19)) |
| @pytest.mark.parametrize('ndim', (2, 4, 7)) |
| def test_orthogonal_nD(self, nu, nv, ndim): |
| |
| |
| rng = np.random.default_rng(34746837464536) |
| u_values = rng.random(size=(nu, ndim)) |
| v_values = rng.random(size=(nv, ndim)) |
| u_weights = rng.random(size=nu) |
| v_weights = rng.random(size=nv) |
| ref = stats.wasserstein_distance_nd(u_values, v_values, u_weights, v_weights) |
|
|
| dist = stats.ortho_group(ndim) |
| transform = dist.rvs(random_state=rng) |
| shift = rng.random(size=ndim) |
| res = stats.wasserstein_distance_nd(u_values @ transform + shift, |
| v_values @ transform + shift, |
| u_weights, v_weights) |
| assert_allclose(res, ref) |
|
|
| def test_error_code(self): |
| rng = np.random.default_rng(52473644737485644836320101) |
| with pytest.raises(ValueError, match='Invalid input values. The inputs'): |
| u_values = rng.random(size=(4, 10, 15)) |
| v_values = rng.random(size=(6, 2, 7)) |
| _ = stats.wasserstein_distance_nd(u_values, v_values) |
| with pytest.raises(ValueError, match='Invalid input values. Dimensions'): |
| u_values = rng.random(size=(15,)) |
| v_values = rng.random(size=(3, 15)) |
| _ = stats.wasserstein_distance_nd(u_values, v_values) |
| with pytest.raises(ValueError, |
| match='Invalid input values. If two-dimensional'): |
| u_values = rng.random(size=(2, 10)) |
| v_values = rng.random(size=(2, 2)) |
| _ = stats.wasserstein_distance_nd(u_values, v_values) |
|
|
| @pytest.mark.parametrize('u_size', [1, 10, 50]) |
| @pytest.mark.parametrize('v_size', [1, 10, 50]) |
| def test_optimization_vs_analytical(self, u_size, v_size): |
| rng = np.random.default_rng(45634745675) |
| |
| u_values = rng.random(size=(u_size, 1)) |
| v_values = rng.random(size=(v_size, 1)) |
| u_values_flat = u_values.ravel() |
| v_values_flat = v_values.ravel() |
| |
| |
| d1 = stats.wasserstein_distance(u_values_flat, v_values_flat) |
| d2 = stats.wasserstein_distance_nd(u_values, v_values) |
| d3 = stats.wasserstein_distance_nd(u_values_flat, v_values_flat) |
| assert_allclose(d2, d1) |
| assert_allclose(d3, d1) |
| |
| u_weights = rng.random(size=u_size) |
| v_weights = rng.random(size=v_size) |
| d1 = stats.wasserstein_distance(u_values_flat, v_values_flat, |
| u_weights, v_weights) |
| d2 = stats.wasserstein_distance_nd(u_values, v_values, |
| u_weights, v_weights) |
| d3 = stats.wasserstein_distance_nd(u_values_flat, v_values_flat, |
| u_weights, v_weights) |
| assert_allclose(d2, d1) |
| assert_allclose(d3, d1) |
|
|
|
|
| class TestWassersteinDistance: |
| """ Tests for wasserstein_distance() output values. |
| """ |
|
|
| def test_simple(self): |
| |
| |
| assert_allclose( |
| stats.wasserstein_distance([0, 1], [0], [1, 1], [1]), |
| .5) |
| assert_allclose(stats.wasserstein_distance( |
| [0, 1], [0], [3, 1], [1]), |
| .25) |
| assert_allclose(stats.wasserstein_distance( |
| [0, 2], [0], [1, 1], [1]), |
| 1) |
| assert_allclose(stats.wasserstein_distance( |
| [0, 1, 2], [1, 2, 3]), |
| 1) |
|
|
| def test_same_distribution(self): |
| |
| |
| assert_equal(stats.wasserstein_distance([1, 2, 3], [2, 1, 3]), 0) |
| assert_equal( |
| stats.wasserstein_distance([1, 1, 1, 4], [4, 1], |
| [1, 1, 1, 1], [1, 3]), |
| 0) |
|
|
| def test_shift(self): |
| |
| |
| assert_allclose(stats.wasserstein_distance([0], [1]), 1) |
| assert_allclose(stats.wasserstein_distance([-5], [5]), 10) |
| assert_allclose( |
| stats.wasserstein_distance([1, 2, 3, 4, 5], [11, 12, 13, 14, 15]), |
| 10) |
| assert_allclose( |
| stats.wasserstein_distance([4.5, 6.7, 2.1], [4.6, 7, 9.2], |
| [3, 1, 1], [1, 3, 1]), |
| 2.5) |
|
|
| def test_combine_weights(self): |
| |
| |
| assert_allclose( |
| stats.wasserstein_distance( |
| [0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4], |
| [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]), |
| stats.wasserstein_distance([5, 0, 1], [0, 4, 3], |
| [1, 2, 4], [1, 2, 4])) |
|
|
| def test_collapse(self): |
| |
| |
| |
| u = np.arange(-10, 30, 0.3) |
| v = np.zeros_like(u) |
| assert_allclose( |
| stats.wasserstein_distance(u, v), |
| np.mean(np.abs(u))) |
|
|
| u_weights = np.arange(len(u)) |
| v_weights = u_weights[::-1] |
| assert_allclose( |
| stats.wasserstein_distance(u, v, u_weights, v_weights), |
| np.average(np.abs(u), weights=u_weights)) |
|
|
| def test_zero_weight(self): |
| |
| assert_allclose( |
| stats.wasserstein_distance([1, 2, 100000], [1, 1], |
| [1, 1, 0], [1, 1]), |
| stats.wasserstein_distance([1, 2], [1, 1], [1, 1], [1, 1])) |
|
|
| def test_inf_values(self): |
| |
| |
| assert_equal( |
| stats.wasserstein_distance([1, 2, np.inf], [1, 1]), |
| np.inf) |
| assert_equal( |
| stats.wasserstein_distance([1, 2, np.inf], [-np.inf, 1]), |
| np.inf) |
| assert_equal( |
| stats.wasserstein_distance([1, -np.inf, np.inf], [1, 1]), |
| np.inf) |
| with suppress_warnings() as sup: |
| sup.record(RuntimeWarning, "invalid value*") |
| assert_equal( |
| stats.wasserstein_distance([1, 2, np.inf], [np.inf, 1]), |
| np.nan) |
|
|
|
|
| class TestEnergyDistance: |
| """ Tests for energy_distance() output values. |
| """ |
|
|
| def test_simple(self): |
| |
| |
| assert_almost_equal( |
| stats.energy_distance([0, 1], [0], [1, 1], [1]), |
| np.sqrt(2) * .5) |
| assert_almost_equal(stats.energy_distance( |
| [0, 1], [0], [3, 1], [1]), |
| np.sqrt(2) * .25) |
| assert_almost_equal(stats.energy_distance( |
| [0, 2], [0], [1, 1], [1]), |
| 2 * .5) |
| assert_almost_equal( |
| stats.energy_distance([0, 1, 2], [1, 2, 3]), |
| np.sqrt(2) * (3*(1./3**2))**.5) |
|
|
| def test_same_distribution(self): |
| |
| |
| assert_equal(stats.energy_distance([1, 2, 3], [2, 1, 3]), 0) |
| assert_equal( |
| stats.energy_distance([1, 1, 1, 4], [4, 1], [1, 1, 1, 1], [1, 3]), |
| 0) |
|
|
| def test_shift(self): |
| |
| |
| assert_almost_equal(stats.energy_distance([0], [1]), np.sqrt(2)) |
| assert_almost_equal( |
| stats.energy_distance([-5], [5]), |
| np.sqrt(2) * 10**.5) |
|
|
| def test_combine_weights(self): |
| |
| |
| assert_almost_equal( |
| stats.energy_distance([0, 0, 1, 1, 1, 1, 5], [0, 3, 3, 3, 3, 4, 4], |
| [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]), |
| stats.energy_distance([5, 0, 1], [0, 4, 3], [1, 2, 4], [1, 2, 4])) |
|
|
| def test_zero_weight(self): |
| |
| assert_almost_equal( |
| stats.energy_distance([1, 2, 100000], [1, 1], [1, 1, 0], [1, 1]), |
| stats.energy_distance([1, 2], [1, 1], [1, 1], [1, 1])) |
|
|
| def test_inf_values(self): |
| |
| |
| assert_equal(stats.energy_distance([1, 2, np.inf], [1, 1]), np.inf) |
| assert_equal( |
| stats.energy_distance([1, 2, np.inf], [-np.inf, 1]), |
| np.inf) |
| assert_equal( |
| stats.energy_distance([1, -np.inf, np.inf], [1, 1]), |
| np.inf) |
| with suppress_warnings() as sup: |
| sup.record(RuntimeWarning, "invalid value*") |
| assert_equal( |
| stats.energy_distance([1, 2, np.inf], [np.inf, 1]), |
| np.nan) |
|
|
|
|
| class TestBrunnerMunzel: |
| |
| X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1] |
| Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] |
| significant = 13 |
|
|
| def test_brunnermunzel_one_sided(self): |
| |
| u1, p1 = stats.brunnermunzel(self.X, self.Y, alternative='less') |
| u2, p2 = stats.brunnermunzel(self.Y, self.X, alternative='greater') |
| u3, p3 = stats.brunnermunzel(self.X, self.Y, alternative='greater') |
| u4, p4 = stats.brunnermunzel(self.Y, self.X, alternative='less') |
|
|
| assert_approx_equal(p1, p2, significant=self.significant) |
| assert_approx_equal(p3, p4, significant=self.significant) |
| assert_(p1 != p3) |
| assert_approx_equal(u1, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u2, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u3, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u4, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(p1, 0.0028931043330757342, |
| significant=self.significant) |
| assert_approx_equal(p3, 0.99710689566692423, |
| significant=self.significant) |
|
|
| def test_brunnermunzel_two_sided(self): |
| |
| u1, p1 = stats.brunnermunzel(self.X, self.Y, alternative='two-sided') |
| u2, p2 = stats.brunnermunzel(self.Y, self.X, alternative='two-sided') |
|
|
| assert_approx_equal(p1, p2, significant=self.significant) |
| assert_approx_equal(u1, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u2, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(p1, 0.0057862086661515377, |
| significant=self.significant) |
|
|
| def test_brunnermunzel_default(self): |
| |
| u1, p1 = stats.brunnermunzel(self.X, self.Y) |
| u2, p2 = stats.brunnermunzel(self.Y, self.X) |
|
|
| assert_approx_equal(p1, p2, significant=self.significant) |
| assert_approx_equal(u1, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u2, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(p1, 0.0057862086661515377, |
| significant=self.significant) |
|
|
| def test_brunnermunzel_alternative_error(self): |
| alternative = "error" |
| distribution = "t" |
| nan_policy = "propagate" |
| assert_(alternative not in ["two-sided", "greater", "less"]) |
| assert_raises(ValueError, |
| stats.brunnermunzel, |
| self.X, |
| self.Y, |
| alternative, |
| distribution, |
| nan_policy) |
|
|
| def test_brunnermunzel_distribution_norm(self): |
| u1, p1 = stats.brunnermunzel(self.X, self.Y, distribution="normal") |
| u2, p2 = stats.brunnermunzel(self.Y, self.X, distribution="normal") |
| assert_approx_equal(p1, p2, significant=self.significant) |
| assert_approx_equal(u1, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u2, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(p1, 0.0017041417600383024, |
| significant=self.significant) |
|
|
| def test_brunnermunzel_distribution_error(self): |
| alternative = "two-sided" |
| distribution = "error" |
| nan_policy = "propagate" |
| assert_(alternative not in ["t", "normal"]) |
| assert_raises(ValueError, |
| stats.brunnermunzel, |
| self.X, |
| self.Y, |
| alternative, |
| distribution, |
| nan_policy) |
|
|
| @pytest.mark.parametrize("kwarg_update", [{'y': []}, {'x': []}, |
| {'x': [], 'y': []}]) |
| def test_brunnermunzel_empty_imput(self, kwarg_update): |
| kwargs = {'x': self.X, 'y': self.Y} |
| kwargs.update(kwarg_update) |
| with pytest.warns(SmallSampleWarning, match=too_small_1d_not_omit): |
| statistic, pvalue = stats.brunnermunzel(**kwargs) |
| assert_equal(statistic, np.nan) |
| assert_equal(pvalue, np.nan) |
|
|
| def test_brunnermunzel_nan_input_propagate(self): |
| X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan] |
| Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] |
| u1, p1 = stats.brunnermunzel(X, Y, nan_policy="propagate") |
| u2, p2 = stats.brunnermunzel(Y, X, nan_policy="propagate") |
|
|
| assert_equal(u1, np.nan) |
| assert_equal(p1, np.nan) |
| assert_equal(u2, np.nan) |
| assert_equal(p2, np.nan) |
|
|
| def test_brunnermunzel_nan_input_raise(self): |
| X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan] |
| Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] |
| alternative = "two-sided" |
| distribution = "t" |
| nan_policy = "raise" |
|
|
| assert_raises(ValueError, |
| stats.brunnermunzel, |
| X, |
| Y, |
| alternative, |
| distribution, |
| nan_policy) |
| assert_raises(ValueError, |
| stats.brunnermunzel, |
| Y, |
| X, |
| alternative, |
| distribution, |
| nan_policy) |
|
|
| def test_brunnermunzel_nan_input_omit(self): |
| X = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, np.nan] |
| Y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] |
| u1, p1 = stats.brunnermunzel(X, Y, nan_policy="omit") |
| u2, p2 = stats.brunnermunzel(Y, X, nan_policy="omit") |
|
|
| assert_approx_equal(p1, p2, significant=self.significant) |
| assert_approx_equal(u1, 3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(u2, -3.1374674823029505, |
| significant=self.significant) |
| assert_approx_equal(p1, 0.0057862086661515377, |
| significant=self.significant) |
|
|
| def test_brunnermunzel_return_nan(self): |
| """ tests that a warning is emitted when p is nan |
| p-value with t-distributions can be nan (0/0) (see gh-15843) |
| """ |
| x = [1, 2, 3] |
| y = [5, 6, 7, 8, 9] |
|
|
| msg = "p-value cannot be estimated|divide by zero|invalid value encountered" |
| with pytest.warns(RuntimeWarning, match=msg): |
| stats.brunnermunzel(x, y, distribution="t") |
|
|
| def test_brunnermunzel_normal_dist(self): |
| """ tests that a p is 0 for datasets that cause p->nan |
| when t-distribution is used (see gh-15843) |
| """ |
| x = [1, 2, 3] |
| y = [5, 6, 7, 8, 9] |
|
|
| with pytest.warns(RuntimeWarning, match='divide by zero'): |
| _, p = stats.brunnermunzel(x, y, distribution="normal") |
| assert_equal(p, 0) |
|
|
|
|
| class TestQuantileTest: |
| r""" Test the non-parametric quantile test, |
| including the computation of confidence intervals |
| """ |
|
|
| def test_quantile_test_iv(self): |
| x = [1, 2, 3] |
|
|
| message = "`x` must be a one-dimensional array of numbers." |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test([x]) |
|
|
| message = "`q` must be a scalar." |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x, q=[1, 2]) |
|
|
| message = "`p` must be a float strictly between 0 and 1." |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x, p=[0.5, 0.75]) |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x, p=2) |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x, p=-0.5) |
|
|
| message = "`alternative` must be one of..." |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x, alternative='one-sided') |
|
|
| message = "`confidence_level` must be a number between 0 and 1." |
| with pytest.raises(ValueError, match=message): |
| stats.quantile_test(x).confidence_interval(1) |
|
|
| @pytest.mark.parametrize( |
| 'p, alpha, lb, ub, alternative', |
| [[0.3, 0.95, 1.221402758160170, 1.476980793882643, 'two-sided'], |
| [0.5, 0.9, 1.506817785112854, 1.803988415397857, 'two-sided'], |
| [0.25, 0.95, -np.inf, 1.39096812846378, 'less'], |
| [0.8, 0.9, 2.117000016612675, np.inf, 'greater']] |
| ) |
| def test_R_ci_quantile(self, p, alpha, lb, ub, alternative): |
| |
| |
| |
| |
| |
| |
| |
| |
| x = np.exp(np.arange(0, 1.01, 0.01)) |
| res = stats.quantile_test(x, p=p, alternative=alternative) |
| assert_allclose(res.confidence_interval(alpha), [lb, ub], rtol=1e-15) |
|
|
| @pytest.mark.parametrize( |
| 'q, p, alternative, ref', |
| [[1.2, 0.3, 'two-sided', 0.01515567517648], |
| [1.8, 0.5, 'two-sided', 0.1109183496606]] |
| ) |
| def test_R_pvalue(self, q, p, alternative, ref): |
| |
| |
| |
| |
| |
| x = np.exp(np.arange(0, 1.01, 0.01)) |
| res = stats.quantile_test(x, q=q, p=p, alternative=alternative) |
| assert_allclose(res.pvalue, ref, rtol=1e-12) |
|
|
| @pytest.mark.parametrize('case', ['continuous', 'discrete']) |
| @pytest.mark.parametrize('alternative', ['less', 'greater']) |
| @pytest.mark.parametrize('alpha', [0.9, 0.95]) |
| def test_pval_ci_match(self, case, alternative, alpha): |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
|
|
| seed = int((7**len(case) + len(alternative))*alpha) |
| rng = np.random.default_rng(seed) |
| if case == 'continuous': |
| p, q = rng.random(size=2) |
| rvs = rng.random(size=100) |
| else: |
| rvs = rng.integers(1, 11, size=100) |
| p = rng.random() |
| q = rng.integers(1, 11) |
|
|
| res = stats.quantile_test(rvs, q=q, p=p, alternative=alternative) |
| ci = res.confidence_interval(confidence_level=alpha) |
|
|
| |
| if alternative == 'less': |
| i_inside = rvs <= ci.high |
| else: |
| i_inside = rvs >= ci.low |
|
|
| for x in rvs[i_inside]: |
| res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative) |
| assert res.pvalue > 1 - alpha |
|
|
| for x in rvs[~i_inside]: |
| res = stats.quantile_test(rvs, q=x, p=p, alternative=alternative) |
| assert res.pvalue < 1 - alpha |
|
|
| def test_match_conover_examples(self): |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| x = [189, 233, 195, 160, 212, 176, 231, 185, 199, 213, 202, 193, |
| 174, 166, 248] |
| pvalue_expected = 0.0346 |
| res = stats.quantile_test(x, q=193, p=0.75, alternative='two-sided') |
| assert_allclose(res.pvalue, pvalue_expected, rtol=1e-5) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| x = [59]*8 + [61]*(112-8) |
| pvalue_expected = stats.binom(p=0.5, n=112).pmf(k=8) |
| res = stats.quantile_test(x, q=60, p=0.5, alternative='greater') |
| assert_allclose(res.pvalue, pvalue_expected, atol=1e-10) |
|
|
|
|
| class TestPageTrendTest: |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| np.random.seed(0) |
| data_3_25 = np.random.rand(3, 25) |
| data_10_26 = np.random.rand(10, 26) |
|
|
| ts = [ |
| (12805, 0.3886487053947608, False, 'asymptotic', data_3_25), |
| (49140, 0.02888978556179862, False, 'asymptotic', data_10_26), |
| (12332, 0.7722477197436702, False, 'asymptotic', |
| [[72, 47, 73, 35, 47, 96, 30, 59, 41, 36, 56, 49, 81, |
| 43, 70, 47, 28, 28, 62, 20, 61, 20, 80, 24, 50], |
| [68, 52, 60, 34, 44, 20, 65, 88, 21, 81, 48, 31, 31, |
| 67, 69, 94, 30, 24, 40, 87, 70, 43, 50, 96, 43], |
| [81, 13, 85, 35, 79, 12, 92, 86, 21, 64, 16, 64, 68, |
| 17, 16, 89, 71, 43, 43, 36, 54, 13, 66, 51, 55]]), |
| (266, 4.121656378600823e-05, False, 'exact', |
| [[1.5, 4., 8.3, 5, 19, 11], |
| [5, 4, 3.5, 10, 20, 21], |
| [8.4, 3.2, 10, 12, 14, 15]]), |
| (332, 0.9566400920502488, True, 'exact', |
| [[4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], |
| [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], [4, 3, 2, 1], |
| [3, 4, 1, 2], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], |
| [1, 2, 3, 4], [1, 2, 3, 4]]), |
| (241, 0.9622210164861476, True, 'exact', |
| [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], |
| [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], |
| [3, 2, 1], [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], |
| [1, 2, 3], [1, 2, 3], [1, 2, 3]]), |
| (197, 0.9619432897162209, True, 'exact', |
| [[6, 5, 4, 3, 2, 1], [6, 5, 4, 3, 2, 1], [1, 3, 4, 5, 2, 6]]), |
| (423, 0.9590458306880073, True, 'exact', |
| [[5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1], |
| [5, 4, 3, 2, 1], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1], |
| [4, 1, 3, 2, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], |
| [1, 2, 3, 4, 5]]), |
| (217, 0.9693058575034678, True, 'exact', |
| [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], |
| [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], |
| [2, 1, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], |
| [1, 2, 3]]), |
| (395, 0.991530289351305, True, 'exact', |
| [[7, 6, 5, 4, 3, 2, 1], [7, 6, 5, 4, 3, 2, 1], |
| [6, 5, 7, 4, 3, 2, 1], [1, 2, 3, 4, 5, 6, 7]]), |
| (117, 0.9997817843373017, True, 'exact', |
| [[3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], [3, 2, 1], |
| [3, 2, 1], [3, 2, 1], [3, 2, 1], [2, 1, 3], [1, 2, 3]]), |
| ] |
|
|
| @pytest.mark.parametrize("L, p, ranked, method, data", ts) |
| def test_accuracy(self, L, p, ranked, method, data): |
| np.random.seed(42) |
| res = stats.page_trend_test(data, ranked=ranked, method=method) |
| assert_equal(L, res.statistic) |
| assert_allclose(p, res.pvalue) |
| assert_equal(method, res.method) |
|
|
| ts2 = [ |
| (542, 0.9481266260876332, True, 'exact', |
| [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1], |
| [1, 8, 4, 7, 6, 5, 9, 3, 2, 10]]), |
| (1322, 0.9993113928199309, True, 'exact', |
| [[10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], |
| [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], [9, 2, 8, 7, 6, 5, 4, 3, 10, 1], |
| [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]), |
| (2286, 0.9908688345484833, True, 'exact', |
| [[8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1], |
| [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1], |
| [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1], |
| [8, 7, 6, 5, 4, 3, 2, 1], [8, 7, 6, 5, 4, 3, 2, 1], |
| [8, 7, 6, 5, 4, 3, 2, 1], [1, 3, 5, 6, 4, 7, 2, 8], |
| [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8], |
| [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8], |
| [1, 2, 3, 4, 5, 6, 7, 8]]), |
| ] |
|
|
| |
| |
| @pytest.mark.parametrize("L, p, ranked, method, data", ts) |
| @pytest.mark.slow() |
| def test_accuracy2(self, L, p, ranked, method, data): |
| np.random.seed(42) |
| res = stats.page_trend_test(data, ranked=ranked, method=method) |
| assert_equal(L, res.statistic) |
| assert_allclose(p, res.pvalue) |
| assert_equal(method, res.method) |
|
|
| def test_options(self): |
| np.random.seed(42) |
| m, n = 10, 20 |
| predicted_ranks = np.arange(1, n+1) |
| perm = np.random.permutation(np.arange(n)) |
| data = np.random.rand(m, n) |
| ranks = stats.rankdata(data, axis=1) |
| res1 = stats.page_trend_test(ranks) |
| res2 = stats.page_trend_test(ranks, ranked=True) |
| res3 = stats.page_trend_test(data, ranked=False) |
| res4 = stats.page_trend_test(ranks, predicted_ranks=predicted_ranks) |
| res5 = stats.page_trend_test(ranks[:, perm], |
| predicted_ranks=predicted_ranks[perm]) |
| assert_equal(res1.statistic, res2.statistic) |
| assert_equal(res1.statistic, res3.statistic) |
| assert_equal(res1.statistic, res4.statistic) |
| assert_equal(res1.statistic, res5.statistic) |
|
|
| def test_Ames_assay(self): |
| |
| np.random.seed(42) |
|
|
| data = [[101, 117, 111], [91, 90, 107], [103, 133, 121], |
| [136, 140, 144], [190, 161, 201], [146, 120, 116]] |
| data = np.array(data).T |
| predicted_ranks = np.arange(1, 7) |
|
|
| res = stats.page_trend_test(data, ranked=False, |
| predicted_ranks=predicted_ranks, |
| method="asymptotic") |
| assert_equal(res.statistic, 257) |
| assert_almost_equal(res.pvalue, 0.0035, decimal=4) |
|
|
| res = stats.page_trend_test(data, ranked=False, |
| predicted_ranks=predicted_ranks, |
| method="exact") |
| assert_equal(res.statistic, 257) |
| assert_almost_equal(res.pvalue, 0.0023, decimal=4) |
|
|
| def test_input_validation(self): |
| |
| with assert_raises(ValueError, match="`data` must be a 2d array."): |
| stats.page_trend_test(None) |
| with assert_raises(ValueError, match="`data` must be a 2d array."): |
| stats.page_trend_test([]) |
| with assert_raises(ValueError, match="`data` must be a 2d array."): |
| stats.page_trend_test([1, 2]) |
| with assert_raises(ValueError, match="`data` must be a 2d array."): |
| stats.page_trend_test([[[1]]]) |
|
|
| |
| with assert_raises(ValueError, match="Page's L is only appropriate"): |
| stats.page_trend_test(np.random.rand(1, 3)) |
| with assert_raises(ValueError, match="Page's L is only appropriate"): |
| stats.page_trend_test(np.random.rand(2, 2)) |
|
|
| |
| message = "`predicted_ranks` must include each integer" |
| with assert_raises(ValueError, match=message): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| predicted_ranks=[0, 1, 2]) |
| with assert_raises(ValueError, match=message): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| predicted_ranks=[1.1, 2, 3]) |
| with assert_raises(ValueError, match=message): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| predicted_ranks=[1, 2, 3, 3]) |
| with assert_raises(ValueError, match=message): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| predicted_ranks="invalid") |
|
|
| |
| with assert_raises(ValueError, match="`data` is not properly ranked"): |
| stats.page_trend_test([[0, 2, 3], [1, 2, 3]], True) |
| with assert_raises(ValueError, match="`data` is not properly ranked"): |
| stats.page_trend_test([[1, 2, 3], [1, 2, 4]], True) |
|
|
| |
| with assert_raises(ValueError, match="`data` contains NaNs"): |
| stats.page_trend_test([[1, 2, 3], [1, 2, np.nan]], |
| ranked=False) |
| with assert_raises(ValueError, match="`method` must be in"): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| method="ekki") |
| with assert_raises(TypeError, match="`ranked` must be boolean."): |
| stats.page_trend_test(data=[[1, 2, 3], [1, 2, 3]], |
| ranked="ekki") |
|
|
|
|
| rng = np.random.default_rng(902340982) |
| x = rng.random(10) |
| y = rng.random(10) |
|
|
|
|
| @pytest.mark.parametrize("fun, args", |
| [(stats.wilcoxon, (x,)), |
| (stats.ks_1samp, (x, stats.norm.cdf)), |
| (stats.ks_2samp, (x, y)), |
| (stats.kstest, (x, y)), |
| ]) |
| def test_rename_mode_method(fun, args): |
|
|
| res = fun(*args, method='exact') |
| res2 = fun(*args, mode='exact') |
| assert_equal(res, res2) |
|
|
| err = rf"{fun.__name__}() got multiple values for argument" |
| with pytest.raises(TypeError, match=re.escape(err)): |
| fun(*args, method='exact', mode='exact') |
|
|
|
|
| class TestExpectile: |
| def test_same_as_mean(self): |
| rng = np.random.default_rng(42) |
| x = rng.random(size=20) |
| assert_allclose(stats.expectile(x, alpha=0.5), np.mean(x)) |
|
|
| def test_minimum(self): |
| rng = np.random.default_rng(42) |
| x = rng.random(size=20) |
| assert_allclose(stats.expectile(x, alpha=0), np.amin(x)) |
|
|
| def test_maximum(self): |
| rng = np.random.default_rng(42) |
| x = rng.random(size=20) |
| assert_allclose(stats.expectile(x, alpha=1), np.amax(x)) |
|
|
| def test_weights(self): |
| |
| |
| |
| |
| rng = np.random.default_rng(1856392524598679138) |
|
|
| def fun(u, a, alpha, weights): |
| w = np.full_like(a, fill_value=alpha) |
| w[a <= u] = 1 - alpha |
| return np.sum(w * weights * (a - u)**2) |
|
|
| def expectile2(a, alpha, weights): |
| bracket = np.min(a), np.max(a) |
| return optimize.minimize_scalar(fun, bracket=bracket, |
| args=(a, alpha, weights)).x |
|
|
| n = 10 |
| a = rng.random(n) |
| alpha = rng.random() |
| weights = rng.random(n) |
|
|
| res = stats.expectile(a, alpha, weights=weights) |
| ref = expectile2(a, alpha, weights) |
| assert_allclose(res, ref) |
|
|
| @pytest.mark.parametrize( |
| "alpha", [0.2, 0.5 - 1e-12, 0.5, 0.5 + 1e-12, 0.8] |
| ) |
| @pytest.mark.parametrize("n", [20, 2000]) |
| def test_expectile_properties(self, alpha, n): |
| """ |
| See Section 6 of |
| I. Steinwart, C. Pasin, R.C. Williamson & S. Zhang (2014). |
| "Elicitation and Identification of Properties". COLT. |
| http://proceedings.mlr.press/v35/steinwart14.html |
| |
| and |
| |
| Propositions 5, 6, 7 of |
| F. Bellini, B. Klar, and A. Müller and E. Rosazza Gianin (2013). |
| "Generalized Quantiles as Risk Measures" |
| http://doi.org/10.2139/ssrn.2225751 |
| """ |
| rng = np.random.default_rng(42) |
| x = rng.normal(size=n) |
|
|
| |
| |
| |
| for c in [-5, 0, 0.5]: |
| assert_allclose( |
| stats.expectile(np.full(shape=n, fill_value=c), alpha=alpha), |
| c |
| ) |
|
|
| |
| |
| c = rng.exponential() |
| assert_allclose( |
| stats.expectile(x + c, alpha=alpha), |
| stats.expectile(x, alpha=alpha) + c, |
| ) |
| assert_allclose( |
| stats.expectile(x - c, alpha=alpha), |
| stats.expectile(x, alpha=alpha) - c, |
| ) |
|
|
| |
| |
| assert_allclose( |
| stats.expectile(c * x, alpha=alpha), |
| c * stats.expectile(x, alpha=alpha), |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| y = rng.logistic(size=n, loc=10) |
| if alpha == 0.5: |
| def assert_op(a, b): |
| assert_allclose(a, b) |
|
|
| elif alpha > 0.5: |
| def assert_op(a, b): |
| assert a < b |
|
|
| else: |
| def assert_op(a, b): |
| assert a > b |
|
|
| assert_op( |
| stats.expectile(np.r_[x + y], alpha=alpha), |
| stats.expectile(x, alpha=alpha) |
| + stats.expectile(y, alpha=alpha) |
| ) |
|
|
| |
| |
| |
| |
| y = rng.normal(size=n, loc=5) |
| assert ( |
| stats.expectile(x, alpha=alpha) <= stats.expectile(y, alpha=alpha) |
| ) |
|
|
| |
| |
| |
| y = rng.logistic(size=n, loc=10) |
| for c in [0.1, 0.5, 0.8]: |
| assert_op( |
| stats.expectile((1-c)*x + c*y, alpha=alpha), |
| (1-c) * stats.expectile(x, alpha=alpha) + |
| c * stats.expectile(y, alpha=alpha) |
| ) |
|
|
| |
| |
| assert_allclose( |
| stats.expectile(-x, alpha=alpha), |
| -stats.expectile(x, alpha=1-alpha), |
| ) |
|
|
| @pytest.mark.parametrize("n", [20, 2000]) |
| def test_monotonicity_in_alpha(self, n): |
| rng = np.random.default_rng(42) |
| x = rng.pareto(a=2, size=n) |
| e_list = [] |
| alpha_seq = np.logspace(-15, np.log10(0.5), 100) |
| |
| for alpha in np.r_[0, alpha_seq, 1 - alpha_seq[:-1:-1], 1]: |
| e_list.append(stats.expectile(x, alpha=alpha)) |
| assert np.all(np.diff(e_list) > 0) |
|
|
|
|
| class TestLMoment: |
| |
| data = [0.87, 0.87, 1.29, 1.5, 1.7, 0.66, 1.5, 0.5, 1., 1.25, 2.3, |
| 1.03, 2.85, 0.68, 1.74, 1.94, 0.63, 2.04, 1.2, 0.64, 2.05, 0.97, |
| 2.81, 1.02, 2.76, 0.86, 1.36, 1.29, 1.68, 0.72, 1.67, 1.15, 3.26, |
| 0.93, 0.83, 0.91, 0.92, 2.32, 1.12, 3.21, 1.23, 1.22, 1.29, 2.08, |
| 0.64, 2.83, 2.68, 1.77, 0.69, 1.69, 0.7, 1.83, 2.25, 1.23, 1.17, |
| 0.94, 1.22, 0.76, 0.69, 0.48, 1.04, 2.49, 1.38, 1.57, 1.79, 1.59, |
| 1.3, 1.54, 1.07, 1.03, 0.76, 2.35, 2.05, 2.02, 2.36, 1.59, 0.97, |
| 1.63, 1.66, 0.94, 1.45, 1.26, 1.25, 0.68, 2.96, 0.8, 1.16, 0.82, |
| 0.64, 0.87, 1.33, 1.28, 1.26, 1.19, 1.24, 1.12, 1.45, 1.03, 1.37, |
| 1.4, 1.35, 1.28, 1.04, 1.31, 0.87, 0.96, 2.55, 1.72, 1.05, 1.15, |
| 1.73, 1.03, 1.53, 2.41, 1.36, 2.08, 0.92, 0.73, 1.56, 1.94, 0.78] |
|
|
| not_integers = [1.5, [1, 2, 3.5], np.nan, np.inf, 'a duck'] |
|
|
| def test_dtype_iv(self): |
| message = '`sample` must be an array of real numbers.' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(np.array(self.data, dtype=np.complex128)) |
|
|
| @skip_xp_invalid_arg |
| def test_dtype_iv_non_numeric(self): |
| message = '`sample` must be an array of real numbers.' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(np.array(self.data, dtype=object)) |
|
|
| @pytest.mark.parametrize('order', not_integers + [0, -1, [], [[1, 2, 3]]]) |
| def test_order_iv(self, order): |
| message = '`order` must be a scalar or a non-empty...' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(self.data, order=order) |
|
|
| @pytest.mark.parametrize('axis', not_integers) |
| def test_axis_iv(self, axis): |
| message = '`axis` must be an integer, a tuple' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(self.data, axis=axis) |
|
|
| @pytest.mark.parametrize('sorted', not_integers) |
| def test_sorted_iv(self, sorted): |
| message = '`sorted` must be True or False.' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(self.data, sorted=sorted) |
|
|
| @pytest.mark.parametrize('standardize', not_integers) |
| def test_standardize_iv(self, standardize): |
| message = '`standardize` must be True or False.' |
| with pytest.raises(ValueError, match=message): |
| stats.lmoment(self.data, standardize=standardize) |
|
|
| @pytest.mark.parametrize('order', [1, 4, [1, 2, 3, 4]]) |
| @pytest.mark.parametrize('standardize', [False, True]) |
| @pytest.mark.parametrize('sorted', [False, True]) |
| def test_lmoment(self, order, standardize, sorted): |
| |
| |
| |
| |
| |
| ref = np.asarray([1.4087603305785130, 0.3415936639118458, |
| 0.2189964482831403, 0.1328186463415905]) |
|
|
| if not standardize: |
| ref[2:] *= ref[1] |
|
|
| data = np.sort(self.data) if sorted else self.data |
|
|
| res = stats.lmoment(data, order, standardize=standardize, sorted=sorted) |
| assert_allclose(res, ref[np.asarray(order)-1]) |
|
|
| def test_dtype(self): |
| dtype = np.float32 |
| sample = np.asarray(self.data) |
| res = stats.lmoment(sample.astype(dtype)) |
| ref = stats.lmoment(sample) |
| assert res.dtype.type == dtype |
| assert_allclose(res, ref, rtol=1e-4) |
|
|
| dtype = np.int64 |
| sample = np.asarray([1, 2, 3, 4, 5]) |
| res = stats.lmoment(sample.astype(dtype)) |
| ref = stats.lmoment(sample.astype(np.float64)) |
| assert res.dtype.type == np.float64 |
| assert_allclose(res, ref, rtol=1e-15) |
|
|
|
|
| @array_api_compatible |
| class TestXP_Mean: |
| @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)]) |
| @pytest.mark.parametrize('weights', [None, True]) |
| @pytest.mark.parametrize('keepdims', [False, True]) |
| def test_xp_mean_basic(self, xp, axis, weights, keepdims): |
| rng = np.random.default_rng(90359458245906) |
| x = rng.random((3, 4, 5)) |
| x_xp = xp.asarray(x) |
| w = w_xp = None |
|
|
| if weights: |
| w = rng.random((1, 5)) |
| w_xp = xp.asarray(w) |
| x, w = np.broadcast_arrays(x, w) |
|
|
| res = _xp_mean(x_xp, weights=w_xp, axis=axis, keepdims=keepdims) |
| ref = np.average(x, weights=w, axis=axis, keepdims=keepdims) |
|
|
| xp_assert_close(res, xp.asarray(ref)) |
|
|
| def test_non_broadcastable(self, xp): |
| |
| x, w = xp.arange(10.), xp.zeros(5) |
| message = "Array shapes are incompatible for broadcasting." |
| with pytest.raises(ValueError, match=message): |
| _xp_mean(x, weights=w) |
|
|
| def test_special_cases(self, xp): |
| |
| weights = xp.asarray([-1., 0., 1.]) |
|
|
| res = _xp_mean(xp.asarray([1., 1., 1.]), weights=weights) |
| xp_assert_close(res, xp.asarray(xp.nan)) |
|
|
| res = _xp_mean(xp.asarray([2., 1., 1.]), weights=weights) |
| xp_assert_close(res, xp.asarray(-np.inf)) |
|
|
| res = _xp_mean(xp.asarray([1., 1., 2.]), weights=weights) |
| xp_assert_close(res, xp.asarray(np.inf)) |
|
|
| def test_nan_policy(self, xp): |
| x = xp.arange(10.) |
| mask = (x == 3) |
| x = xp.where(mask, xp.asarray(xp.nan), x) |
|
|
| |
| message = 'The input contains nan values' |
| with pytest.raises(ValueError, match=message): |
| _xp_mean(x, nan_policy='raise') |
|
|
| |
| res1 = _xp_mean(x) |
| res2 = _xp_mean(x, nan_policy='propagate') |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res1, ref) |
| xp_assert_equal(res2, ref) |
|
|
| |
| res = _xp_mean(x, nan_policy='omit') |
| ref = xp.mean(x[~mask]) |
| xp_assert_close(res, ref) |
|
|
| |
| weights = xp.ones(10) |
| weights = xp.where(mask, xp.asarray(xp.nan), weights) |
| res = _xp_mean(xp.arange(10.), weights=weights, nan_policy='omit') |
| ref = xp.mean(x[~mask]) |
| xp_assert_close(res, ref) |
|
|
| |
| message = 'After omitting NaNs...' |
| with pytest.warns(RuntimeWarning, match=message): |
| res = _xp_mean(x * np.nan, nan_policy='omit') |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res, ref) |
|
|
| def test_empty(self, xp): |
| message = 'One or more sample arguments is too small...' |
| with pytest.warns(SmallSampleWarning, match=message): |
| res = _xp_mean(xp.asarray([])) |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res, ref) |
|
|
| message = "All axis-slices of one or more sample arguments..." |
| with pytest.warns(SmallSampleWarning, match=message): |
| res = _xp_mean(xp.asarray([[]]), axis=1) |
| ref = xp.asarray([xp.nan]) |
| xp_assert_equal(res, ref) |
|
|
| res = _xp_mean(xp.asarray([[]]), axis=0) |
| ref = xp.asarray([]) |
| xp_assert_equal(res, ref) |
|
|
| def test_dtype(self, xp): |
| max = xp.finfo(xp.float32).max |
| x_np = np.asarray([max, max], dtype=np.float32) |
| x_xp = xp.asarray(x_np) |
|
|
| |
| with np.errstate(over='ignore'): |
| res = _xp_mean(x_xp) |
| ref = np.mean(x_np) |
| np.testing.assert_equal(ref, np.inf) |
| xp_assert_close(res, xp.asarray(ref)) |
|
|
| |
| res = _xp_mean(x_xp, dtype=xp.float64) |
| ref = xp.asarray(np.mean(np.asarray(x_np, dtype=np.float64))) |
| xp_assert_close(res, ref) |
|
|
| def test_integer(self, xp): |
| |
| x = xp.arange(10) |
| y = xp.arange(10.) |
| xp_assert_equal(_xp_mean(x), _xp_mean(y)) |
| xp_assert_equal(_xp_mean(y, weights=x), _xp_mean(y, weights=y)) |
|
|
| def test_complex_gh22404(self, xp): |
| rng = np.random.default_rng(90359458245906) |
| x, y, wx, wy = rng.random((4, 20)) |
| res = _xp_mean(xp.asarray(x + y*1j), weights=xp.asarray(wx + wy*1j)) |
| ref = np.average(x + y*1j, weights=wx + wy*1j) |
| xp_assert_close(res, xp.asarray(ref)) |
|
|
|
|
| @array_api_compatible |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @skip_xp_backends('jax.numpy', reason='JAX arrays do not support item assignment') |
| class TestXP_Var: |
| @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)]) |
| @pytest.mark.parametrize('keepdims', [False, True]) |
| @pytest.mark.parametrize('correction', [0, 1]) |
| @pytest.mark.parametrize('nan_policy', ['propagate', 'omit']) |
| def test_xp_var_basic(self, xp, axis, keepdims, correction, nan_policy): |
| rng = np.random.default_rng(90359458245906) |
| x = rng.random((3, 4, 5)) |
| var_ref = np.var |
|
|
| if nan_policy == 'omit': |
| nan_mask = rng.random(size=x.shape) > 0.5 |
| x[nan_mask] = np.nan |
| var_ref = np.nanvar |
|
|
| x_xp = xp.asarray(x) |
|
|
| res = _xp_var(x_xp, axis=axis, keepdims=keepdims, correction=correction, |
| nan_policy=nan_policy) |
|
|
| with suppress_warnings() as sup: |
| sup.filter(RuntimeWarning, "Degrees of freedom <= 0 for slice") |
| ref = var_ref(x, axis=axis, keepdims=keepdims, ddof=correction) |
|
|
| xp_assert_close(res, xp.asarray(ref)) |
|
|
| def test_special_cases(self, xp): |
| |
| res = _xp_var(xp.asarray([1., 2.]), correction=3) |
| xp_assert_close(res, xp.asarray(xp.nan)) |
|
|
| def test_nan_policy(self, xp): |
| x = xp.arange(10.) |
| mask = (x == 3) |
| x = xp.where(mask, xp.asarray(xp.nan), x) |
|
|
| |
| message = 'The input contains nan values' |
| with pytest.raises(ValueError, match=message): |
| _xp_var(x, nan_policy='raise') |
|
|
| |
| res1 = _xp_var(x) |
| res2 = _xp_var(x, nan_policy='propagate') |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res1, ref) |
| xp_assert_equal(res2, ref) |
|
|
| |
| res = _xp_var(x, nan_policy='omit') |
| xp_test = array_namespace(x) |
| ref = xp_test.var(x[~mask]) |
| xp_assert_close(res, ref) |
|
|
| |
| message = 'After omitting NaNs...' |
| with pytest.warns(RuntimeWarning, match=message): |
| res = _xp_var(x * np.nan, nan_policy='omit') |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res, ref) |
|
|
| def test_empty(self, xp): |
| message = 'One or more sample arguments is too small...' |
| with pytest.warns(SmallSampleWarning, match=message): |
| res = _xp_var(xp.asarray([])) |
| ref = xp.asarray(xp.nan) |
| xp_assert_equal(res, ref) |
|
|
| message = "All axis-slices of one or more sample arguments..." |
| with pytest.warns(SmallSampleWarning, match=message): |
| res = _xp_var(xp.asarray([[]]), axis=1) |
| ref = xp.asarray([xp.nan]) |
| xp_assert_equal(res, ref) |
|
|
| res = _xp_var(xp.asarray([[]]), axis=0) |
| ref = xp.asarray([]) |
| xp_assert_equal(res, ref) |
|
|
| def test_dtype(self, xp): |
| max = xp.finfo(xp.float32).max |
| x_np = np.asarray([max, max/2], dtype=np.float32) |
| x_xp = xp.asarray(x_np) |
|
|
| |
| with np.errstate(over='ignore'): |
| res = _xp_var(x_xp) |
| ref = np.var(x_np) |
| np.testing.assert_equal(ref, np.inf) |
| xp_assert_close(res, xp.asarray(ref)) |
|
|
| |
| res = _xp_var(x_xp, dtype=xp.float64) |
| ref = xp.asarray(np.var(np.asarray(x_np, dtype=np.float64))) |
| xp_assert_close(res, ref) |
|
|
| def test_integer(self, xp): |
| |
| x = xp.arange(10) |
| y = xp.arange(10.) |
| xp_assert_equal(_xp_var(x), _xp_var(y)) |
|
|
| @pytest.mark.skip_xp_backends('array_api_strict', reason='needs array-api#850') |
| def test_complex_gh22404(self, xp): |
| rng = np.random.default_rng(90359458245906) |
| x, y = rng.random((2, 20)) |
| res = _xp_var(xp.asarray(x + y*1j)) |
| ref = np.var(x + y*1j) |
| xp_assert_close(res, xp.asarray(ref), check_dtype=False) |
|
|
|
|
| @array_api_compatible |
| def test_chk_asarray(xp): |
| rng = np.random.default_rng(2348923425434) |
| x0 = rng.random(size=(2, 3, 4)) |
| x = xp.asarray(x0) |
|
|
| axis = 1 |
| x_out, axis_out = _chk_asarray(x, axis=axis, xp=xp) |
| xp_assert_equal(x_out, xp.asarray(x0)) |
| assert_equal(axis_out, axis) |
|
|
| axis = None |
| x_out, axis_out = _chk_asarray(x, axis=axis, xp=xp) |
| xp_assert_equal(x_out, xp.asarray(x0.ravel())) |
| assert_equal(axis_out, 0) |
|
|
| axis = 2 |
| x_out, axis_out = _chk_asarray(x[0, 0, 0], axis=axis, xp=xp) |
| xp_assert_equal(x_out, xp.asarray(np.atleast_1d(x0[0, 0, 0]))) |
| assert_equal(axis_out, axis) |
|
|
|
|
| @pytest.mark.skip_xp_backends('numpy', |
| reason='These parameters *are* compatible with NumPy') |
| @pytest.mark.usefixtures("skip_xp_backends") |
| @array_api_compatible |
| def test_axis_nan_policy_keepdims_nanpolicy(xp): |
| |
| |
| |
| |
| x = xp.asarray([1, 2, 3, 4]) |
| message = "Use of `nan_policy` and `keepdims`..." |
| with pytest.raises(NotImplementedError, match=message): |
| stats.skew(x, nan_policy='omit') |
| with pytest.raises(NotImplementedError, match=message): |
| stats.skew(x, keepdims=True) |
|
|