Spaces:
Sleeping
Sleeping
File size: 5,898 Bytes
eeef81e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | # -*- coding: utf-8 -*-
from scipy import stats
import numpy as np
import warnings
from ...compat import check_is_fitted, pmdarima as pm_compat
from .base import BaseEndogTransformer
__all__ = ['BoxCoxEndogTransformer']
class BoxCoxEndogTransformer(BaseEndogTransformer):
r"""Apply the Box-Cox transformation to an endogenous array
The Box-Cox transformation is applied to non-normal data to coerce it more
towards a normal distribution. It's specified as::
(((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else
log(y + lam2)
Parameters
----------
lmbda : float or None, optional (default=None)
The lambda value for the Box-Cox transformation, if known. If not
specified, it will be estimated via MLE.
lmbda2 : float, optional (default=0.)
The value to add to ``y`` to make it non-negative. If, after adding
``lmbda2``, there are still negative values, a ValueError will be
raised.
neg_action : str, optional (default="raise")
How to respond if any values in ``y <= 0`` after adding ``lmbda2``.
One of ('raise', 'warn', 'ignore'). If anything other than 'raise',
values <= 0 will be truncated to the value of ``floor``.
floor : float, optional (default=1e-16)
A positive value that truncate values to if there are values in ``y``
that are zero or negative and ``neg_action`` is not 'raise'. Note that
if values are truncated, invertibility will not be preserved, and the
transformed array may not be perfectly inverse-transformed.
"""
def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16):
self.lmbda = lmbda
self.lmbda2 = lmbda2
self.neg_action = neg_action
self.floor = floor
def fit(self, y, X=None, **kwargs): # TODO: kwargs go away
"""Fit the transformer
Learns the value of ``lmbda``, if not specified in the constructor.
If defined in the constructor, is not re-learned.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
X : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
"""
lam1 = self.lmbda
lam2 = self.lmbda2
# Temporary shim until we remove `exogenous` support completely
X, _ = pm_compat.get_X(X, **kwargs)
if lam2 < 0:
raise ValueError("lmbda2 must be a non-negative scalar value")
if lam1 is None:
y, _ = self._check_y_X(y, X)
_, lam1 = stats.boxcox(y + lam2, lmbda=None, alpha=None)
self.lam1_ = lam1
self.lam2_ = lam2
return self
def transform(self, y, X=None, **kwargs):
"""Transform the new array
Apply the Box-Cox transformation to the array after learning the
lambda parameter.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The endogenous (time-series) array.
X : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
Returns
-------
y_transform : array-like or None
The Box-Cox transformed y array
X : array-like or None
The X array
"""
check_is_fitted(self, "lam1_")
# Temporary shim until we remove `exogenous` support completely
X, _ = pm_compat.get_X(X, **kwargs)
lam1 = self.lam1_
lam2 = self.lam2_
y, exog = self._check_y_X(y, X)
y += lam2
neg_mask = y <= 0.
if neg_mask.any():
action = self.neg_action
msg = "Negative or zero values present in y"
if action == "raise":
raise ValueError(msg)
elif action == "warn":
warnings.warn(msg, UserWarning)
y[neg_mask] = self.floor
if lam1 == 0:
return np.log(y), exog
return (y ** lam1 - 1) / lam1, exog
def inverse_transform(self, y, X=None, **kwargs): # TODO: kwargs go away
"""Inverse transform a transformed array
Inverse the Box-Cox transformation on the transformed array. Note that
if truncation happened in the ``transform`` method, invertibility will
not be preserved, and the transformed array may not be perfectly
inverse-transformed.
Parameters
----------
y : array-like or None, shape=(n_samples,)
The transformed endogenous (time-series) array.
X : array-like or None, shape=(n_samples, n_features), optional
The exogenous array of additional covariates. Not used for
endogenous transformers. Default is None, and non-None values will
serve as pass-through arrays.
Returns
-------
y : array-like or None
The inverse-transformed y array
X : array-like or None
The inverse-transformed X array
"""
check_is_fitted(self, "lam1_")
# Temporary shim until we remove `exogenous` support completely
X, _ = pm_compat.get_X(X, **kwargs)
lam1 = self.lam1_
lam2 = self.lam2_
y, exog = self._check_y_X(y, X)
if lam1 == 0:
return np.exp(y) - lam2, exog
numer = y * lam1 # remove denominator
numer += 1. # add 1 back to it
de_exp = numer ** (1. / lam1) # de-exponentiate
return de_exp - lam2, exog
|