Spaces:

slenk
/

codewraith

Sleeping

App Files Files Community

codewraith / data /source_files /clean /0b4d66d202fe.py

slenk

Upload folder using huggingface_hub

eeef81e verified about 1 month ago

raw

history blame contribute delete

5.9 kB

	# -- coding: utf-8 --

	from scipy import stats

	import numpy as np
	import warnings

	from ...compat import check_is_fitted, pmdarima as pm_compat
	from .base import BaseEndogTransformer

	__all__ = ['BoxCoxEndogTransformer']


	class BoxCoxEndogTransformer(BaseEndogTransformer):
	r"""Apply the Box-Cox transformation to an endogenous array

	The Box-Cox transformation is applied to non-normal data to coerce it more
	towards a normal distribution. It's specified as::

	(((y + lam2) ** lam1) - 1) / lam1, if lmbda != 0, else
	log(y + lam2)

	Parameters
	----------
	lmbda : float or None, optional (default=None)
	The lambda value for the Box-Cox transformation, if known. If not
	specified, it will be estimated via MLE.

	lmbda2 : float, optional (default=0.)
	The value to add to ``y`` to make it non-negative. If, after adding
	``lmbda2``, there are still negative values, a ValueError will be
	raised.

	neg_action : str, optional (default="raise")
	How to respond if any values in ``y <= 0`` after adding ``lmbda2``.
	One of ('raise', 'warn', 'ignore'). If anything other than 'raise',
	values <= 0 will be truncated to the value of ``floor``.

	floor : float, optional (default=1e-16)
	A positive value that truncate values to if there are values in ``y``
	that are zero or negative and ``neg_action`` is not 'raise'. Note that
	if values are truncated, invertibility will not be preserved, and the
	transformed array may not be perfectly inverse-transformed.
	"""
	def __init__(self, lmbda=None, lmbda2=0, neg_action="raise", floor=1e-16):

	self.lmbda = lmbda
	self.lmbda2 = lmbda2
	self.neg_action = neg_action
	self.floor = floor

	def fit(self, y, X=None, **kwargs): # TODO: kwargs go away
	"""Fit the transformer

	Learns the value of ``lmbda``, if not specified in the constructor.
	If defined in the constructor, is not re-learned.

	Parameters
	----------
	y : array-like or None, shape=(n_samples,)
	The endogenous (time-series) array.

	X : array-like or None, shape=(n_samples, n_features), optional
	The exogenous array of additional covariates. Not used for
	endogenous transformers. Default is None, and non-None values will
	serve as pass-through arrays.
	"""
	lam1 = self.lmbda
	lam2 = self.lmbda2

	# Temporary shim until we remove `exogenous` support completely
	X, _ = pm_compat.get_X(X, **kwargs)

	if lam2 < 0:
	raise ValueError("lmbda2 must be a non-negative scalar value")

	if lam1 is None:
	y, _ = self._check_y_X(y, X)
	_, lam1 = stats.boxcox(y + lam2, lmbda=None, alpha=None)

	self.lam1_ = lam1
	self.lam2_ = lam2
	return self

	def transform(self, y, X=None, **kwargs):
	"""Transform the new array

	Apply the Box-Cox transformation to the array after learning the
	lambda parameter.

	Parameters
	----------
	y : array-like or None, shape=(n_samples,)
	The endogenous (time-series) array.

	X : array-like or None, shape=(n_samples, n_features), optional
	The exogenous array of additional covariates. Not used for
	endogenous transformers. Default is None, and non-None values will
	serve as pass-through arrays.

	Returns
	-------
	y_transform : array-like or None
	The Box-Cox transformed y array

	X : array-like or None
	The X array
	"""
	check_is_fitted(self, "lam1_")

	# Temporary shim until we remove `exogenous` support completely
	X, _ = pm_compat.get_X(X, **kwargs)

	lam1 = self.lam1_
	lam2 = self.lam2_

	y, exog = self._check_y_X(y, X)
	y += lam2

	neg_mask = y <= 0.
	if neg_mask.any():
	action = self.neg_action
	msg = "Negative or zero values present in y"
	if action == "raise":
	raise ValueError(msg)
	elif action == "warn":
	warnings.warn(msg, UserWarning)
	y[neg_mask] = self.floor

	if lam1 == 0:
	return np.log(y), exog
	return (y ** lam1 - 1) / lam1, exog

	def inverse_transform(self, y, X=None, **kwargs): # TODO: kwargs go away
	"""Inverse transform a transformed array

	Inverse the Box-Cox transformation on the transformed array. Note that
	if truncation happened in the ``transform`` method, invertibility will
	not be preserved, and the transformed array may not be perfectly
	inverse-transformed.

	Parameters
	----------
	y : array-like or None, shape=(n_samples,)
	The transformed endogenous (time-series) array.

	X : array-like or None, shape=(n_samples, n_features), optional
	The exogenous array of additional covariates. Not used for
	endogenous transformers. Default is None, and non-None values will
	serve as pass-through arrays.

	Returns
	-------
	y : array-like or None
	The inverse-transformed y array

	X : array-like or None
	The inverse-transformed X array
	"""
	check_is_fitted(self, "lam1_")

	# Temporary shim until we remove `exogenous` support completely
	X, _ = pm_compat.get_X(X, **kwargs)

	lam1 = self.lam1_
	lam2 = self.lam2_

	y, exog = self._check_y_X(y, X)
	if lam1 == 0:
	return np.exp(y) - lam2, exog

	numer = y * lam1 # remove denominator
	numer += 1. # add 1 back to it
	de_exp = numer ** (1. / lam1) # de-exponentiate
	return de_exp - lam2, exog