| from math import sqrt |
| import numpy as np |
| from scipy._lib._util import _validate_int |
| from scipy.optimize import brentq |
| from scipy.special import ndtri |
| from ._discrete_distns import binom |
| from ._common import ConfidenceInterval |
|
|
|
|
| class BinomTestResult: |
| """ |
| Result of `scipy.stats.binomtest`. |
| |
| Attributes |
| ---------- |
| k : int |
| The number of successes (copied from `binomtest` input). |
| n : int |
| The number of trials (copied from `binomtest` input). |
| alternative : str |
| Indicates the alternative hypothesis specified in the input |
| to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``, |
| or ``'less'``. |
| statistic: float |
| The estimate of the proportion of successes. |
| pvalue : float |
| The p-value of the hypothesis test. |
| |
| """ |
| def __init__(self, k, n, alternative, statistic, pvalue): |
| self.k = k |
| self.n = n |
| self.alternative = alternative |
| self.statistic = statistic |
| self.pvalue = pvalue |
|
|
| |
| self.proportion_estimate = statistic |
|
|
| def __repr__(self): |
| s = ("BinomTestResult(" |
| f"k={self.k}, " |
| f"n={self.n}, " |
| f"alternative={self.alternative!r}, " |
| f"statistic={self.statistic}, " |
| f"pvalue={self.pvalue})") |
| return s |
|
|
| def proportion_ci(self, confidence_level=0.95, method='exact'): |
| """ |
| Compute the confidence interval for ``statistic``. |
| |
| Parameters |
| ---------- |
| confidence_level : float, optional |
| Confidence level for the computed confidence interval |
| of the estimated proportion. Default is 0.95. |
| method : {'exact', 'wilson', 'wilsoncc'}, optional |
| Selects the method used to compute the confidence interval |
| for the estimate of the proportion: |
| |
| 'exact' : |
| Use the Clopper-Pearson exact method [1]_. |
| 'wilson' : |
| Wilson's method, without continuity correction ([2]_, [3]_). |
| 'wilsoncc' : |
| Wilson's method, with continuity correction ([2]_, [3]_). |
| |
| Default is ``'exact'``. |
| |
| Returns |
| ------- |
| ci : ``ConfidenceInterval`` object |
| The object has attributes ``low`` and ``high`` that hold the |
| lower and upper bounds of the confidence interval. |
| |
| References |
| ---------- |
| .. [1] C. J. Clopper and E. S. Pearson, The use of confidence or |
| fiducial limits illustrated in the case of the binomial, |
| Biometrika, Vol. 26, No. 4, pp 404-413 (Dec. 1934). |
| .. [2] E. B. Wilson, Probable inference, the law of succession, and |
| statistical inference, J. Amer. Stat. Assoc., 22, pp 209-212 |
| (1927). |
| .. [3] Robert G. Newcombe, Two-sided confidence intervals for the |
| single proportion: comparison of seven methods, Statistics |
| in Medicine, 17, pp 857-872 (1998). |
| |
| Examples |
| -------- |
| >>> from scipy.stats import binomtest |
| >>> result = binomtest(k=7, n=50, p=0.1) |
| >>> result.statistic |
| 0.14 |
| >>> result.proportion_ci() |
| ConfidenceInterval(low=0.05819170033997342, high=0.26739600249700846) |
| """ |
| if method not in ('exact', 'wilson', 'wilsoncc'): |
| raise ValueError(f"method ('{method}') must be one of 'exact', " |
| "'wilson' or 'wilsoncc'.") |
| if not (0 <= confidence_level <= 1): |
| raise ValueError(f'confidence_level ({confidence_level}) must be in ' |
| 'the interval [0, 1].') |
| if method == 'exact': |
| low, high = _binom_exact_conf_int(self.k, self.n, |
| confidence_level, |
| self.alternative) |
| else: |
| |
| low, high = _binom_wilson_conf_int(self.k, self.n, |
| confidence_level, |
| self.alternative, |
| correction=method == 'wilsoncc') |
| return ConfidenceInterval(low=low, high=high) |
|
|
|
|
| def _findp(func): |
| try: |
| p = brentq(func, 0, 1) |
| except RuntimeError: |
| raise RuntimeError('numerical solver failed to converge when ' |
| 'computing the confidence limits') from None |
| except ValueError as exc: |
| raise ValueError('brentq raised a ValueError; report this to the ' |
| 'SciPy developers') from exc |
| return p |
|
|
|
|
| def _binom_exact_conf_int(k, n, confidence_level, alternative): |
| """ |
| Compute the estimate and confidence interval for the binomial test. |
| |
| Returns proportion, prop_low, prop_high |
| """ |
| if alternative == 'two-sided': |
| alpha = (1 - confidence_level) / 2 |
| if k == 0: |
| plow = 0.0 |
| else: |
| plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha) |
| if k == n: |
| phigh = 1.0 |
| else: |
| phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha) |
| elif alternative == 'less': |
| alpha = 1 - confidence_level |
| plow = 0.0 |
| if k == n: |
| phigh = 1.0 |
| else: |
| phigh = _findp(lambda p: binom.cdf(k, n, p) - alpha) |
| elif alternative == 'greater': |
| alpha = 1 - confidence_level |
| if k == 0: |
| plow = 0.0 |
| else: |
| plow = _findp(lambda p: binom.sf(k-1, n, p) - alpha) |
| phigh = 1.0 |
| return plow, phigh |
|
|
|
|
| def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction): |
| |
| |
| |
| p = k / n |
| if alternative == 'two-sided': |
| z = ndtri(0.5 + 0.5*confidence_level) |
| else: |
| z = ndtri(confidence_level) |
|
|
| |
| |
| denom = 2*(n + z**2) |
| center = (2*n*p + z**2)/denom |
| q = 1 - p |
| if correction: |
| if alternative == 'less' or k == 0: |
| lo = 0.0 |
| else: |
| dlo = (1 + z*sqrt(z**2 - 2 - 1/n + 4*p*(n*q + 1))) / denom |
| lo = center - dlo |
| if alternative == 'greater' or k == n: |
| hi = 1.0 |
| else: |
| dhi = (1 + z*sqrt(z**2 + 2 - 1/n + 4*p*(n*q - 1))) / denom |
| hi = center + dhi |
| else: |
| delta = z/denom * sqrt(4*n*p*q + z**2) |
| if alternative == 'less' or k == 0: |
| lo = 0.0 |
| else: |
| lo = center - delta |
| if alternative == 'greater' or k == n: |
| hi = 1.0 |
| else: |
| hi = center + delta |
|
|
| return lo, hi |
|
|
|
|
| def binomtest(k, n, p=0.5, alternative='two-sided'): |
| """ |
| Perform a test that the probability of success is p. |
| |
| The binomial test [1]_ is a test of the null hypothesis that the |
| probability of success in a Bernoulli experiment is `p`. |
| |
| Details of the test can be found in many texts on statistics, such |
| as section 24.5 of [2]_. |
| |
| Parameters |
| ---------- |
| k : int |
| The number of successes. |
| n : int |
| The number of trials. |
| p : float, optional |
| The hypothesized probability of success, i.e. the expected |
| proportion of successes. The value must be in the interval |
| ``0 <= p <= 1``. The default value is ``p = 0.5``. |
| alternative : {'two-sided', 'greater', 'less'}, optional |
| Indicates the alternative hypothesis. The default value is |
| 'two-sided'. |
| |
| Returns |
| ------- |
| result : `~scipy.stats._result_classes.BinomTestResult` instance |
| The return value is an object with the following attributes: |
| |
| k : int |
| The number of successes (copied from `binomtest` input). |
| n : int |
| The number of trials (copied from `binomtest` input). |
| alternative : str |
| Indicates the alternative hypothesis specified in the input |
| to `binomtest`. It will be one of ``'two-sided'``, ``'greater'``, |
| or ``'less'``. |
| statistic : float |
| The estimate of the proportion of successes. |
| pvalue : float |
| The p-value of the hypothesis test. |
| |
| The object has the following methods: |
| |
| proportion_ci(confidence_level=0.95, method='exact') : |
| Compute the confidence interval for ``statistic``. |
| |
| Notes |
| ----- |
| .. versionadded:: 1.7.0 |
| |
| References |
| ---------- |
| .. [1] Binomial test, https://en.wikipedia.org/wiki/Binomial_test |
| .. [2] Jerrold H. Zar, Biostatistical Analysis (fifth edition), |
| Prentice Hall, Upper Saddle River, New Jersey USA (2010) |
| |
| Examples |
| -------- |
| >>> from scipy.stats import binomtest |
| |
| A car manufacturer claims that no more than 10% of their cars are unsafe. |
| 15 cars are inspected for safety, 3 were found to be unsafe. Test the |
| manufacturer's claim: |
| |
| >>> result = binomtest(3, n=15, p=0.1, alternative='greater') |
| >>> result.pvalue |
| 0.18406106910639114 |
| |
| The null hypothesis cannot be rejected at the 5% level of significance |
| because the returned p-value is greater than the critical value of 5%. |
| |
| The test statistic is equal to the estimated proportion, which is simply |
| ``3/15``: |
| |
| >>> result.statistic |
| 0.2 |
| |
| We can use the `proportion_ci()` method of the result to compute the |
| confidence interval of the estimate: |
| |
| >>> result.proportion_ci(confidence_level=0.95) |
| ConfidenceInterval(low=0.05684686759024681, high=1.0) |
| |
| """ |
| k = _validate_int(k, 'k', minimum=0) |
| n = _validate_int(n, 'n', minimum=1) |
| if k > n: |
| raise ValueError(f'k ({k}) must not be greater than n ({n}).') |
|
|
| if not (0 <= p <= 1): |
| raise ValueError(f"p ({p}) must be in range [0,1]") |
|
|
| if alternative not in ('two-sided', 'less', 'greater'): |
| raise ValueError(f"alternative ('{alternative}') not recognized; \n" |
| "must be 'two-sided', 'less' or 'greater'") |
| if alternative == 'less': |
| pval = binom.cdf(k, n, p) |
| elif alternative == 'greater': |
| pval = binom.sf(k-1, n, p) |
| else: |
| |
| d = binom.pmf(k, n, p) |
| rerr = 1 + 1e-7 |
| if k == p * n: |
| |
| pval = 1. |
| elif k < p * n: |
| ix = _binary_search_for_binom_tst(lambda x1: -binom.pmf(x1, n, p), |
| -d*rerr, np.ceil(p * n), n) |
| |
| |
| |
| |
| |
| y = n - ix + int(d*rerr == binom.pmf(ix, n, p)) |
| pval = binom.cdf(k, n, p) + binom.sf(n - y, n, p) |
| else: |
| ix = _binary_search_for_binom_tst(lambda x1: binom.pmf(x1, n, p), |
| d*rerr, 0, np.floor(p * n)) |
| |
| |
| |
| |
| y = ix + 1 |
| pval = binom.cdf(y-1, n, p) + binom.sf(k-1, n, p) |
|
|
| pval = min(1.0, pval) |
|
|
| result = BinomTestResult(k=k, n=n, alternative=alternative, |
| statistic=k/n, pvalue=pval) |
| return result |
|
|
|
|
| def _binary_search_for_binom_tst(a, d, lo, hi): |
| """ |
| Conducts an implicit binary search on a function specified by `a`. |
| |
| Meant to be used on the binomial PMF for the case of two-sided tests |
| to obtain the value on the other side of the mode where the tail |
| probability should be computed. The values on either side of |
| the mode are always in order, meaning binary search is applicable. |
| |
| Parameters |
| ---------- |
| a : callable |
| The function over which to perform binary search. Its values |
| for inputs lo and hi should be in ascending order. |
| d : float |
| The value to search. |
| lo : int |
| The lower end of range to search. |
| hi : int |
| The higher end of the range to search. |
| |
| Returns |
| ------- |
| int |
| The index, i between lo and hi |
| such that a(i)<=d<a(i+1) |
| """ |
| while lo < hi: |
| mid = lo + (hi-lo)//2 |
| midval = a(mid) |
| if midval < d: |
| lo = mid+1 |
| elif midval > d: |
| hi = mid-1 |
| else: |
| return mid |
| if a(lo) <= d: |
| return lo |
| else: |
| return lo-1 |
|
|