| import math |
| import numpy as np |
| from scipy._lib._util import _asarray_validated |
| from scipy._lib._array_api import ( |
| array_namespace, |
| xp_size, |
| xp_broadcast_promote, |
| xp_real, |
| xp_copy, |
| xp_float_to_complex, |
| ) |
| from scipy._lib import array_api_extra as xpx |
|
|
| __all__ = ["logsumexp", "softmax", "log_softmax"] |
|
|
|
|
| def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): |
| """Compute the log of the sum of exponentials of input elements. |
| |
| Parameters |
| ---------- |
| a : array_like |
| Input array. |
| axis : None or int or tuple of ints, optional |
| Axis or axes over which the sum is taken. By default `axis` is None, |
| and all elements are summed. |
| |
| .. versionadded:: 0.11.0 |
| b : array-like, optional |
| Scaling factor for exp(`a`) must be of the same shape as `a` or |
| broadcastable to `a`. These values may be negative in order to |
| implement subtraction. |
| |
| .. versionadded:: 0.12.0 |
| keepdims : bool, optional |
| If this is set to True, the axes which are reduced are left in the |
| result as dimensions with size one. With this option, the result |
| will broadcast correctly against the original array. |
| |
| .. versionadded:: 0.15.0 |
| return_sign : bool, optional |
| If this is set to True, the result will be a pair containing sign |
| information; if False, results that are negative will be returned |
| as NaN. Default is False (no sign information). |
| |
| .. versionadded:: 0.16.0 |
| |
| Returns |
| ------- |
| res : ndarray |
| The result, ``np.log(np.sum(np.exp(a)))`` calculated in a numerically |
| more stable way. If `b` is given then ``np.log(np.sum(b*np.exp(a)))`` |
| is returned. If ``return_sign`` is True, ``res`` contains the log of |
| the absolute value of the argument. |
| sgn : ndarray |
| If ``return_sign`` is True, this will be an array of floating-point |
| numbers matching res containing +1, 0, -1 (for real-valued inputs) |
| or a complex phase (for complex inputs). This gives the sign of the |
| argument of the logarithm in ``res``. |
| If ``return_sign`` is False, only one result is returned. |
| |
| See Also |
| -------- |
| numpy.logaddexp, numpy.logaddexp2 |
| |
| Notes |
| ----- |
| NumPy has a logaddexp function which is very similar to `logsumexp`, but |
| only handles two arguments. `logaddexp.reduce` is similar to this |
| function, but may be less stable. |
| |
| The logarithm is a multivalued function: for each :math:`x` there is an |
| infinite number of :math:`z` such that :math:`exp(z) = x`. The convention |
| is to return the :math:`z` whose imaginary part lies in :math:`(-pi, pi]`. |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from scipy.special import logsumexp |
| >>> a = np.arange(10) |
| >>> logsumexp(a) |
| 9.4586297444267107 |
| >>> np.log(np.sum(np.exp(a))) |
| 9.4586297444267107 |
| |
| With weights |
| |
| >>> a = np.arange(10) |
| >>> b = np.arange(10, 0, -1) |
| >>> logsumexp(a, b=b) |
| 9.9170178533034665 |
| >>> np.log(np.sum(b*np.exp(a))) |
| 9.9170178533034647 |
| |
| Returning a sign flag |
| |
| >>> logsumexp([1,2],b=[1,-1],return_sign=True) |
| (1.5413248546129181, -1.0) |
| |
| Notice that `logsumexp` does not directly support masked arrays. To use it |
| on a masked array, convert the mask into zero weights: |
| |
| >>> a = np.ma.array([np.log(2), 2, np.log(3)], |
| ... mask=[False, True, False]) |
| >>> b = (~a.mask).astype(int) |
| >>> logsumexp(a.data, b=b), np.log(5) |
| 1.6094379124341005, 1.6094379124341005 |
| |
| """ |
| xp = array_namespace(a, b) |
| a, b = xp_broadcast_promote(a, b, ensure_writeable=True, force_floating=True, xp=xp) |
| a = xpx.atleast_nd(a, ndim=1, xp=xp) |
| b = xpx.atleast_nd(b, ndim=1, xp=xp) if b is not None else b |
| axis = tuple(range(a.ndim)) if axis is None else axis |
|
|
| if xp_size(a) != 0: |
| with np.errstate(divide='ignore', invalid='ignore'): |
| out, sgn = _logsumexp(a, b, axis=axis, return_sign=return_sign, xp=xp) |
| else: |
| shape = np.asarray(a.shape) |
| shape[axis] = 1 |
| out = xp.full(tuple(shape), -xp.inf, dtype=a.dtype) |
| sgn = xp.sign(out) |
|
|
| if xp.isdtype(out.dtype, 'complex floating'): |
| if return_sign: |
| real = xp.real(sgn) |
| imag = xp_float_to_complex(_wrap_radians(xp.imag(sgn), xp)) |
| sgn = real + imag*1j |
| else: |
| real = xp.real(out) |
| imag = xp_float_to_complex(_wrap_radians(xp.imag(out), xp)) |
| out = real + imag*1j |
|
|
| |
| out = xp.squeeze(out, axis=axis) if not keepdims else out |
| sgn = xp.squeeze(sgn, axis=axis) if (sgn is not None and not keepdims) else sgn |
| out = out[()] if out.ndim == 0 else out |
| sgn = sgn[()] if (sgn is not None and sgn.ndim == 0) else sgn |
|
|
| return (out, sgn) if return_sign else out |
|
|
|
|
| def _wrap_radians(x, xp=None): |
| xp = array_namespace(x) if xp is None else xp |
| |
| out = -((-x + math.pi) % (2 * math.pi) - math.pi) |
| |
| no_wrap = xp.abs(x) < xp.pi |
| out[no_wrap] = x[no_wrap] |
| return out |
|
|
|
|
| def _elements_and_indices_with_max_real(a, axis=-1, xp=None): |
| |
| |
| |
| |
| |
| |
| |
| |
| xp = array_namespace(a) if xp is None else xp |
|
|
| if xp.isdtype(a.dtype, "complex floating"): |
| |
| real_a = xp.real(a) |
| max = xp.max(real_a, axis=axis, keepdims=True) |
| mask = real_a == max |
|
|
| |
| |
| |
| i = xp.reshape(xp.arange(xp_size(a)), a.shape) |
| i[~mask] = -1 |
| max_i = xp.max(i, axis=axis, keepdims=True) |
| mask = i == max_i |
| a = xp_copy(a) |
| a[~mask] = 0 |
| max = xp.sum(a, axis=axis, dtype=a.dtype, keepdims=True) |
| else: |
| max = xp.max(a, axis=axis, keepdims=True) |
| mask = a == max |
|
|
| return xp.asarray(max), xp.asarray(mask) |
|
|
|
|
| def _sign(x, xp): |
| return x / xp.where(x == 0, xp.asarray(1, dtype=x.dtype), xp.abs(x)) |
|
|
|
|
| def _logsumexp(a, b, axis, return_sign, xp): |
|
|
| |
| |
| |
| if b is not None: |
| a[b == 0] = -xp.inf |
|
|
| |
| |
| a_max, i_max = _elements_and_indices_with_max_real(a, axis=axis, xp=xp) |
|
|
| |
| a[i_max] = -xp.inf |
| i_max_dt = xp.astype(i_max, a.dtype) |
| |
| |
| m = (xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=a.dtype) if b is None |
| else xp.sum(b * i_max_dt, axis=axis, keepdims=True, dtype=a.dtype)) |
|
|
| |
| |
| shift = xp.where(xp.isfinite(a_max), a_max, xp.asarray(0, dtype=a_max.dtype)) |
|
|
| |
| exp = b * xp.exp(a - shift) if b is not None else xp.exp(a - shift) |
| s = xp.sum(exp, axis=axis, keepdims=True, dtype=exp.dtype) |
| s = xp.where(s == 0, s, s/m) |
|
|
| |
| sgn = None |
| if return_sign: |
| |
| |
| sgn = _sign(s + 1, xp=xp) * _sign(m, xp=xp) |
|
|
| if xp.isdtype(s.dtype, "real floating"): |
| |
| s = xp.where(s < -1, -s - 2, s) |
| m = xp.abs(m) |
| else: |
| |
| j = xp.asarray(1j, dtype=a_max.dtype) |
| sgn = sgn * xp.exp(xp.imag(a_max) * j) |
|
|
| |
| out = xp.log1p(s) + xp.log(m) + a_max |
|
|
| out = xp_real(out) if return_sign else out |
|
|
| return out, sgn |
|
|
|
|
| def softmax(x, axis=None): |
| r"""Compute the softmax function. |
| |
| The softmax function transforms each element of a collection by |
| computing the exponential of each element divided by the sum of the |
| exponentials of all the elements. That is, if `x` is a one-dimensional |
| numpy array:: |
| |
| softmax(x) = np.exp(x)/sum(np.exp(x)) |
| |
| Parameters |
| ---------- |
| x : array_like |
| Input array. |
| axis : int or tuple of ints, optional |
| Axis to compute values along. Default is None and softmax will be |
| computed over the entire array `x`. |
| |
| Returns |
| ------- |
| s : ndarray |
| An array the same shape as `x`. The result will sum to 1 along the |
| specified axis. |
| |
| Notes |
| ----- |
| The formula for the softmax function :math:`\sigma(x)` for a vector |
| :math:`x = \{x_0, x_1, ..., x_{n-1}\}` is |
| |
| .. math:: \sigma(x)_j = \frac{e^{x_j}}{\sum_k e^{x_k}} |
| |
| The `softmax` function is the gradient of `logsumexp`. |
| |
| The implementation uses shifting to avoid overflow. See [1]_ for more |
| details. |
| |
| .. versionadded:: 1.2.0 |
| |
| References |
| ---------- |
| .. [1] P. Blanchard, D.J. Higham, N.J. Higham, "Accurately computing the |
| log-sum-exp and softmax functions", IMA Journal of Numerical Analysis, |
| Vol.41(4), :doi:`10.1093/imanum/draa038`. |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from scipy.special import softmax |
| >>> np.set_printoptions(precision=5) |
| |
| >>> x = np.array([[1, 0.5, 0.2, 3], |
| ... [1, -1, 7, 3], |
| ... [2, 12, 13, 3]]) |
| ... |
| |
| Compute the softmax transformation over the entire array. |
| |
| >>> m = softmax(x) |
| >>> m |
| array([[ 4.48309e-06, 2.71913e-06, 2.01438e-06, 3.31258e-05], |
| [ 4.48309e-06, 6.06720e-07, 1.80861e-03, 3.31258e-05], |
| [ 1.21863e-05, 2.68421e-01, 7.29644e-01, 3.31258e-05]]) |
| |
| >>> m.sum() |
| 1.0 |
| |
| Compute the softmax transformation along the first axis (i.e., the |
| columns). |
| |
| >>> m = softmax(x, axis=0) |
| |
| >>> m |
| array([[ 2.11942e-01, 1.01300e-05, 2.75394e-06, 3.33333e-01], |
| [ 2.11942e-01, 2.26030e-06, 2.47262e-03, 3.33333e-01], |
| [ 5.76117e-01, 9.99988e-01, 9.97525e-01, 3.33333e-01]]) |
| |
| >>> m.sum(axis=0) |
| array([ 1., 1., 1., 1.]) |
| |
| Compute the softmax transformation along the second axis (i.e., the rows). |
| |
| >>> m = softmax(x, axis=1) |
| >>> m |
| array([[ 1.05877e-01, 6.42177e-02, 4.75736e-02, 7.82332e-01], |
| [ 2.42746e-03, 3.28521e-04, 9.79307e-01, 1.79366e-02], |
| [ 1.22094e-05, 2.68929e-01, 7.31025e-01, 3.31885e-05]]) |
| |
| >>> m.sum(axis=1) |
| array([ 1., 1., 1.]) |
| |
| """ |
| x = _asarray_validated(x, check_finite=False) |
| x_max = np.amax(x, axis=axis, keepdims=True) |
| exp_x_shifted = np.exp(x - x_max) |
| return exp_x_shifted / np.sum(exp_x_shifted, axis=axis, keepdims=True) |
|
|
|
|
| def log_softmax(x, axis=None): |
| r"""Compute the logarithm of the softmax function. |
| |
| In principle:: |
| |
| log_softmax(x) = log(softmax(x)) |
| |
| but using a more accurate implementation. |
| |
| Parameters |
| ---------- |
| x : array_like |
| Input array. |
| axis : int or tuple of ints, optional |
| Axis to compute values along. Default is None and softmax will be |
| computed over the entire array `x`. |
| |
| Returns |
| ------- |
| s : ndarray or scalar |
| An array with the same shape as `x`. Exponential of the result will |
| sum to 1 along the specified axis. If `x` is a scalar, a scalar is |
| returned. |
| |
| Notes |
| ----- |
| `log_softmax` is more accurate than ``np.log(softmax(x))`` with inputs that |
| make `softmax` saturate (see examples below). |
| |
| .. versionadded:: 1.5.0 |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from scipy.special import log_softmax |
| >>> from scipy.special import softmax |
| >>> np.set_printoptions(precision=5) |
| |
| >>> x = np.array([1000.0, 1.0]) |
| |
| >>> y = log_softmax(x) |
| >>> y |
| array([ 0., -999.]) |
| |
| >>> with np.errstate(divide='ignore'): |
| ... y = np.log(softmax(x)) |
| ... |
| >>> y |
| array([ 0., -inf]) |
| |
| """ |
|
|
| x = _asarray_validated(x, check_finite=False) |
|
|
| x_max = np.amax(x, axis=axis, keepdims=True) |
|
|
| if x_max.ndim > 0: |
| x_max[~np.isfinite(x_max)] = 0 |
| elif not np.isfinite(x_max): |
| x_max = 0 |
|
|
| tmp = x - x_max |
| exp_tmp = np.exp(tmp) |
|
|
| |
| with np.errstate(divide='ignore'): |
| s = np.sum(exp_tmp, axis=axis, keepdims=True) |
| out = np.log(s) |
|
|
| out = tmp - out |
| return out |
|
|