| import warnings |
|
|
| import numpy as np |
| import pytest |
|
|
| from sklearn import config_context, datasets |
| from sklearn.base import BaseEstimator, TransformerMixin, clone |
| from sklearn.compose import TransformedTargetRegressor |
| from sklearn.dummy import DummyRegressor |
| from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit |
| from sklearn.pipeline import Pipeline |
| from sklearn.preprocessing import FunctionTransformer, StandardScaler |
| from sklearn.utils._testing import assert_allclose |
|
|
| friedman = datasets.make_friedman1(random_state=0) |
|
|
|
|
| def test_transform_target_regressor_error(): |
| X, y = friedman |
| |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), |
| transformer=StandardScaler(), |
| func=np.exp, |
| inverse_func=np.log, |
| ) |
| with pytest.raises( |
| ValueError, |
| match="'transformer' and functions 'func'/'inverse_func' cannot both be set.", |
| ): |
| regr.fit(X, y) |
| |
| sample_weight = np.ones((y.shape[0],)) |
| regr = TransformedTargetRegressor( |
| regressor=OrthogonalMatchingPursuit(), transformer=StandardScaler() |
| ) |
| with pytest.raises( |
| TypeError, |
| match=r"fit\(\) got an unexpected " "keyword argument 'sample_weight'", |
| ): |
| regr.fit(X, y, sample_weight=sample_weight) |
|
|
| |
| regr = TransformedTargetRegressor(func=np.exp) |
| with pytest.raises( |
| ValueError, |
| match="When 'func' is provided, 'inverse_func' must also be provided", |
| ): |
| regr.fit(X, y) |
|
|
| regr = TransformedTargetRegressor(inverse_func=np.log) |
| with pytest.raises( |
| ValueError, |
| match="When 'inverse_func' is provided, 'func' must also be provided", |
| ): |
| regr.fit(X, y) |
|
|
|
|
| def test_transform_target_regressor_invertible(): |
| X, y = friedman |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), |
| func=np.sqrt, |
| inverse_func=np.log, |
| check_inverse=True, |
| ) |
| with pytest.warns( |
| UserWarning, |
| match=(r"The provided functions.* are not strictly inverse of each other"), |
| ): |
| regr.fit(X, y) |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log |
| ) |
| regr.set_params(check_inverse=False) |
|
|
| with warnings.catch_warnings(): |
| warnings.simplefilter("error", UserWarning) |
| regr.fit(X, y) |
|
|
|
|
| def _check_standard_scaled(y, y_pred): |
| y_mean = np.mean(y, axis=0) |
| y_std = np.std(y, axis=0) |
| assert_allclose((y - y_mean) / y_std, y_pred) |
|
|
|
|
| def _check_shifted_by_one(y, y_pred): |
| assert_allclose(y + 1, y_pred) |
|
|
|
|
| def test_transform_target_regressor_functions(): |
| X, y = friedman |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), func=np.log, inverse_func=np.exp |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| |
| y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze() |
| assert_allclose(np.log(y), y_tran) |
| assert_allclose( |
| y, regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze() |
| ) |
| assert y.shape == y_pred.shape |
| assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X))) |
| |
| lr = LinearRegression().fit(X, regr.func(y)) |
| assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel()) |
|
|
|
|
| def test_transform_target_regressor_functions_multioutput(): |
| X = friedman[0] |
| y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), func=np.log, inverse_func=np.exp |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| |
| y_tran = regr.transformer_.transform(y) |
| assert_allclose(np.log(y), y_tran) |
| assert_allclose(y, regr.transformer_.inverse_transform(y_tran)) |
| assert y.shape == y_pred.shape |
| assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X))) |
| |
| lr = LinearRegression().fit(X, regr.func(y)) |
| assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel()) |
|
|
|
|
| @pytest.mark.parametrize( |
| "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)] |
| ) |
| def test_transform_target_regressor_1d_transformer(X, y): |
| |
| |
| |
| |
| transformer = FunctionTransformer( |
| func=lambda x: x + 1, inverse_func=lambda x: x - 1 |
| ) |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), transformer=transformer |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| assert y.shape == y_pred.shape |
| |
| y_tran = regr.transformer_.transform(y) |
| _check_shifted_by_one(y, y_tran) |
| assert y.shape == y_pred.shape |
| |
| assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze()) |
| |
| lr = LinearRegression() |
| transformer2 = clone(transformer) |
| lr.fit(X, transformer2.fit_transform(y)) |
| y_lr_pred = lr.predict(X) |
| assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) |
| assert_allclose(regr.regressor_.coef_, lr.coef_) |
|
|
|
|
| @pytest.mark.parametrize( |
| "X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)] |
| ) |
| def test_transform_target_regressor_2d_transformer(X, y): |
| |
| |
| transformer = StandardScaler() |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), transformer=transformer |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| assert y.shape == y_pred.shape |
| |
| if y.ndim == 1: |
| y_tran = regr.transformer_.transform(y.reshape(-1, 1)) |
| else: |
| y_tran = regr.transformer_.transform(y) |
| _check_standard_scaled(y, y_tran.squeeze()) |
| assert y.shape == y_pred.shape |
| |
| assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze()) |
| |
| lr = LinearRegression() |
| transformer2 = clone(transformer) |
| if y.ndim == 1: |
| lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze()) |
| y_lr_pred = lr.predict(X).reshape(-1, 1) |
| y_pred2 = transformer2.inverse_transform(y_lr_pred).squeeze() |
| else: |
| lr.fit(X, transformer2.fit_transform(y)) |
| y_lr_pred = lr.predict(X) |
| y_pred2 = transformer2.inverse_transform(y_lr_pred) |
|
|
| assert_allclose(y_pred, y_pred2) |
| assert_allclose(regr.regressor_.coef_, lr.coef_) |
|
|
|
|
| def test_transform_target_regressor_2d_transformer_multioutput(): |
| |
| |
| X = friedman[0] |
| y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T |
| transformer = StandardScaler() |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), transformer=transformer |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| assert y.shape == y_pred.shape |
| |
| y_tran = regr.transformer_.transform(y) |
| _check_standard_scaled(y, y_tran) |
| assert y.shape == y_pred.shape |
| |
| assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze()) |
| |
| lr = LinearRegression() |
| transformer2 = clone(transformer) |
| lr.fit(X, transformer2.fit_transform(y)) |
| y_lr_pred = lr.predict(X) |
| assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred)) |
| assert_allclose(regr.regressor_.coef_, lr.coef_) |
|
|
|
|
| def test_transform_target_regressor_3d_target(): |
| |
| |
| |
| X = friedman[0] |
| y = np.tile(friedman[1].reshape(-1, 1, 1), [1, 3, 2]) |
|
|
| def flatten_data(data): |
| return data.reshape(data.shape[0], -1) |
|
|
| def unflatten_data(data): |
| return data.reshape(data.shape[0], -1, 2) |
|
|
| transformer = FunctionTransformer(func=flatten_data, inverse_func=unflatten_data) |
| regr = TransformedTargetRegressor( |
| regressor=LinearRegression(), transformer=transformer |
| ) |
| y_pred = regr.fit(X, y).predict(X) |
| assert y.shape == y_pred.shape |
|
|
|
|
| def test_transform_target_regressor_multi_to_single(): |
| X = friedman[0] |
| y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)]) |
|
|
| def func(y): |
| out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2) |
| return out[:, np.newaxis] |
|
|
| def inverse_func(y): |
| return y |
|
|
| tt = TransformedTargetRegressor( |
| func=func, inverse_func=inverse_func, check_inverse=False |
| ) |
| tt.fit(X, y) |
| y_pred_2d_func = tt.predict(X) |
| assert y_pred_2d_func.shape == (100, 1) |
|
|
| |
| def func(y): |
| return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2) |
|
|
| tt = TransformedTargetRegressor( |
| func=func, inverse_func=inverse_func, check_inverse=False |
| ) |
| tt.fit(X, y) |
| y_pred_1d_func = tt.predict(X) |
| assert y_pred_1d_func.shape == (100, 1) |
|
|
| assert_allclose(y_pred_1d_func, y_pred_2d_func) |
|
|
|
|
| class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator): |
| def fit(self, X, y=None): |
| assert isinstance(X, np.ndarray) |
| return self |
|
|
| def transform(self, X): |
| assert isinstance(X, np.ndarray) |
| return X |
|
|
| def inverse_transform(self, X): |
| assert isinstance(X, np.ndarray) |
| return X |
|
|
|
|
| class DummyCheckerListRegressor(DummyRegressor): |
| def fit(self, X, y, sample_weight=None): |
| assert isinstance(X, list) |
| return super().fit(X, y, sample_weight) |
|
|
| def predict(self, X): |
| assert isinstance(X, list) |
| return super().predict(X) |
|
|
|
|
| def test_transform_target_regressor_ensure_y_array(): |
| |
| |
| |
| X, y = friedman |
| tt = TransformedTargetRegressor( |
| transformer=DummyCheckerArrayTransformer(), |
| regressor=DummyCheckerListRegressor(), |
| check_inverse=False, |
| ) |
| tt.fit(X.tolist(), y.tolist()) |
| tt.predict(X.tolist()) |
| with pytest.raises(AssertionError): |
| tt.fit(X, y.tolist()) |
| with pytest.raises(AssertionError): |
| tt.predict(X) |
|
|
|
|
| class DummyTransformer(TransformerMixin, BaseEstimator): |
| """Dummy transformer which count how many time fit was called.""" |
|
|
| def __init__(self, fit_counter=0): |
| self.fit_counter = fit_counter |
|
|
| def fit(self, X, y=None): |
| self.fit_counter += 1 |
| return self |
|
|
| def transform(self, X): |
| return X |
|
|
| def inverse_transform(self, X): |
| return X |
|
|
|
|
| @pytest.mark.parametrize("check_inverse", [False, True]) |
| def test_transform_target_regressor_count_fit(check_inverse): |
| |
| |
| X, y = friedman |
| ttr = TransformedTargetRegressor( |
| transformer=DummyTransformer(), check_inverse=check_inverse |
| ) |
| ttr.fit(X, y) |
| assert ttr.transformer_.fit_counter == 1 |
|
|
|
|
| class DummyRegressorWithExtraFitParams(DummyRegressor): |
| def fit(self, X, y, sample_weight=None, check_input=True): |
| |
| |
| assert not check_input |
| return super().fit(X, y, sample_weight) |
|
|
|
|
| def test_transform_target_regressor_pass_fit_parameters(): |
| X, y = friedman |
| regr = TransformedTargetRegressor( |
| regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer() |
| ) |
|
|
| regr.fit(X, y, check_input=False) |
| assert regr.transformer_.fit_counter == 1 |
|
|
|
|
| def test_transform_target_regressor_route_pipeline(): |
| X, y = friedman |
|
|
| regr = TransformedTargetRegressor( |
| regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer() |
| ) |
| estimators = [("normalize", StandardScaler()), ("est", regr)] |
|
|
| pip = Pipeline(estimators) |
| pip.fit(X, y, **{"est__check_input": False}) |
|
|
| assert regr.transformer_.fit_counter == 1 |
|
|
|
|
| class DummyRegressorWithExtraPredictParams(DummyRegressor): |
| def predict(self, X, check_input=True): |
| |
| |
| self.predict_called = True |
| assert not check_input |
| return super().predict(X) |
|
|
|
|
| def test_transform_target_regressor_pass_extra_predict_parameters(): |
| |
| X, y = friedman |
| regr = TransformedTargetRegressor( |
| regressor=DummyRegressorWithExtraPredictParams(), transformer=DummyTransformer() |
| ) |
|
|
| regr.fit(X, y) |
| regr.predict(X, check_input=False) |
| assert regr.regressor_.predict_called |
|
|
|
|
| @pytest.mark.parametrize("output_format", ["pandas", "polars"]) |
| def test_transform_target_regressor_not_warns_with_global_output_set(output_format): |
| """Test that TransformedTargetRegressor will not raise warnings if |
| set_config(transform_output="pandas"/"polars") is set globally; regression test for |
| issue #29361.""" |
| X, y = datasets.make_regression() |
| y = np.abs(y) + 1 |
| with config_context(transform_output=output_format): |
| with warnings.catch_warnings(): |
| warnings.simplefilter("error") |
| TransformedTargetRegressor( |
| regressor=LinearRegression(), func=np.log, inverse_func=np.exp |
| ).fit(X, y) |
|
|