| import warnings |
|
|
| import numpy as np |
| import pytest |
| import scipy.sparse as sp |
|
|
| from sklearn.base import clone |
| from sklearn.dummy import DummyClassifier, DummyRegressor |
| from sklearn.exceptions import NotFittedError |
| from sklearn.utils._testing import ( |
| assert_almost_equal, |
| assert_array_almost_equal, |
| assert_array_equal, |
| ) |
| from sklearn.utils.fixes import CSC_CONTAINERS |
| from sklearn.utils.stats import _weighted_percentile |
|
|
|
|
| def _check_predict_proba(clf, X, y): |
| proba = clf.predict_proba(X) |
|
|
| |
| with warnings.catch_warnings(): |
| warnings.filterwarnings("ignore", "divide by zero encountered in log") |
| log_proba = clf.predict_log_proba(X) |
|
|
| y = np.atleast_1d(y) |
| if y.ndim == 1: |
| y = np.reshape(y, (-1, 1)) |
|
|
| n_outputs = y.shape[1] |
| n_samples = len(X) |
|
|
| if n_outputs == 1: |
| proba = [proba] |
| log_proba = [log_proba] |
|
|
| for k in range(n_outputs): |
| assert proba[k].shape[0] == n_samples |
| assert proba[k].shape[1] == len(np.unique(y[:, k])) |
| assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X))) |
| |
| with warnings.catch_warnings(): |
| warnings.filterwarnings("ignore", "divide by zero encountered in log") |
| assert_array_almost_equal(np.log(proba[k]), log_proba[k]) |
|
|
|
|
| def _check_behavior_2d(clf): |
| |
| X = np.array([[0], [0], [0], [0]]) |
| y = np.array([1, 2, 1, 1]) |
| est = clone(clf) |
| est.fit(X, y) |
| y_pred = est.predict(X) |
| assert y.shape == y_pred.shape |
|
|
| |
| y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]]) |
| est = clone(clf) |
| est.fit(X, y) |
| y_pred = est.predict(X) |
| assert y.shape == y_pred.shape |
|
|
|
|
| def _check_behavior_2d_for_constant(clf): |
| |
| X = np.array([[0], [0], [0], [0]]) |
| y = np.array([[1, 0, 5, 4, 3], [2, 0, 1, 2, 5], [1, 0, 4, 5, 2], [1, 3, 3, 2, 0]]) |
| est = clone(clf) |
| est.fit(X, y) |
| y_pred = est.predict(X) |
| assert y.shape == y_pred.shape |
|
|
|
|
| def _check_equality_regressor(statistic, y_learn, y_pred_learn, y_test, y_pred_test): |
| assert_array_almost_equal(np.tile(statistic, (y_learn.shape[0], 1)), y_pred_learn) |
| assert_array_almost_equal(np.tile(statistic, (y_test.shape[0], 1)), y_pred_test) |
|
|
|
|
| def test_feature_names_in_and_n_features_in_(global_random_seed, n_samples=10): |
| pd = pytest.importorskip("pandas") |
|
|
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = pd.DataFrame([[0]] * n_samples, columns=["feature_1"]) |
| y = random_state.rand(n_samples) |
|
|
| est = DummyRegressor().fit(X, y) |
| assert hasattr(est, "feature_names_in_") |
| assert hasattr(est, "n_features_in_") |
|
|
| est = DummyClassifier().fit(X, y) |
| assert hasattr(est, "feature_names_in_") |
| assert hasattr(est, "n_features_in_") |
|
|
|
|
| def test_most_frequent_and_prior_strategy(): |
| X = [[0], [0], [0], [0]] |
| y = [1, 2, 1, 1] |
|
|
| for strategy in ("most_frequent", "prior"): |
| clf = DummyClassifier(strategy=strategy, random_state=0) |
| clf.fit(X, y) |
| assert_array_equal(clf.predict(X), np.ones(len(X))) |
| _check_predict_proba(clf, X, y) |
|
|
| if strategy == "prior": |
| assert_array_almost_equal( |
| clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1)) |
| ) |
| else: |
| assert_array_almost_equal( |
| clf.predict_proba([X[0]]), clf.class_prior_.reshape((1, -1)) > 0.5 |
| ) |
|
|
|
|
| def test_most_frequent_and_prior_strategy_with_2d_column_y(): |
| |
| |
| X = [[0], [0], [0], [0]] |
| y_1d = [1, 2, 1, 1] |
| y_2d = [[1], [2], [1], [1]] |
|
|
| for strategy in ("most_frequent", "prior"): |
| clf_1d = DummyClassifier(strategy=strategy, random_state=0) |
| clf_2d = DummyClassifier(strategy=strategy, random_state=0) |
|
|
| clf_1d.fit(X, y_1d) |
| clf_2d.fit(X, y_2d) |
| assert_array_equal(clf_1d.predict(X), clf_2d.predict(X)) |
|
|
|
|
| def test_most_frequent_and_prior_strategy_multioutput(): |
| X = [[0], [0], [0], [0]] |
| y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]]) |
|
|
| n_samples = len(X) |
|
|
| for strategy in ("prior", "most_frequent"): |
| clf = DummyClassifier(strategy=strategy, random_state=0) |
| clf.fit(X, y) |
| assert_array_equal( |
| clf.predict(X), |
| np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]), |
| ) |
| _check_predict_proba(clf, X, y) |
| _check_behavior_2d(clf) |
|
|
|
|
| def test_stratified_strategy(global_random_seed): |
| X = [[0]] * 5 |
| y = [1, 2, 1, 1, 2] |
| clf = DummyClassifier(strategy="stratified", random_state=global_random_seed) |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
| p = np.bincount(y_pred) / float(len(X)) |
| assert_almost_equal(p[1], 3.0 / 5, decimal=1) |
| assert_almost_equal(p[2], 2.0 / 5, decimal=1) |
| _check_predict_proba(clf, X, y) |
|
|
|
|
| def test_stratified_strategy_multioutput(global_random_seed): |
| X = [[0]] * 5 |
| y = np.array([[2, 1], [2, 2], [1, 1], [1, 2], [1, 1]]) |
|
|
| clf = DummyClassifier(strategy="stratified", random_state=global_random_seed) |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
|
|
| for k in range(y.shape[1]): |
| p = np.bincount(y_pred[:, k]) / float(len(X)) |
| assert_almost_equal(p[1], 3.0 / 5, decimal=1) |
| assert_almost_equal(p[2], 2.0 / 5, decimal=1) |
| _check_predict_proba(clf, X, y) |
|
|
| _check_behavior_2d(clf) |
|
|
|
|
| def test_uniform_strategy(global_random_seed): |
| X = [[0]] * 4 |
| y = [1, 2, 1, 1] |
| clf = DummyClassifier(strategy="uniform", random_state=global_random_seed) |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
| p = np.bincount(y_pred) / float(len(X)) |
| assert_almost_equal(p[1], 0.5, decimal=1) |
| assert_almost_equal(p[2], 0.5, decimal=1) |
| _check_predict_proba(clf, X, y) |
|
|
|
|
| def test_uniform_strategy_multioutput(global_random_seed): |
| X = [[0]] * 4 |
| y = np.array([[2, 1], [2, 2], [1, 2], [1, 1]]) |
| clf = DummyClassifier(strategy="uniform", random_state=global_random_seed) |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
|
|
| for k in range(y.shape[1]): |
| p = np.bincount(y_pred[:, k]) / float(len(X)) |
| assert_almost_equal(p[1], 0.5, decimal=1) |
| assert_almost_equal(p[2], 0.5, decimal=1) |
| _check_predict_proba(clf, X, y) |
|
|
| _check_behavior_2d(clf) |
|
|
|
|
| def test_string_labels(): |
| X = [[0]] * 5 |
| y = ["paris", "paris", "tokyo", "amsterdam", "berlin"] |
| clf = DummyClassifier(strategy="most_frequent") |
| clf.fit(X, y) |
| assert_array_equal(clf.predict(X), ["paris"] * 5) |
|
|
|
|
| @pytest.mark.parametrize( |
| "y,y_test", |
| [ |
| ([2, 1, 1, 1], [2, 2, 1, 1]), |
| ( |
| np.array([[2, 2], [1, 1], [1, 1], [1, 1]]), |
| np.array([[2, 2], [2, 2], [1, 1], [1, 1]]), |
| ), |
| ], |
| ) |
| def test_classifier_score_with_None(y, y_test): |
| clf = DummyClassifier(strategy="most_frequent") |
| clf.fit(None, y) |
| assert clf.score(None, y_test) == 0.5 |
|
|
|
|
| @pytest.mark.parametrize( |
| "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"] |
| ) |
| def test_classifier_prediction_independent_of_X(strategy, global_random_seed): |
| y = [0, 2, 1, 1] |
| X1 = [[0]] * 4 |
| clf1 = DummyClassifier( |
| strategy=strategy, random_state=global_random_seed, constant=0 |
| ) |
| clf1.fit(X1, y) |
| predictions1 = clf1.predict(X1) |
|
|
| X2 = [[1]] * 4 |
| clf2 = DummyClassifier( |
| strategy=strategy, random_state=global_random_seed, constant=0 |
| ) |
| clf2.fit(X2, y) |
| predictions2 = clf2.predict(X2) |
|
|
| assert_array_equal(predictions1, predictions2) |
|
|
|
|
| def test_mean_strategy_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = [[0]] * 4 |
| y = random_state.randn(4) |
|
|
| reg = DummyRegressor() |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.mean(y)] * len(X)) |
|
|
|
|
| def test_mean_strategy_multioutput_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X_learn = random_state.randn(10, 10) |
| y_learn = random_state.randn(10, 5) |
|
|
| mean = np.mean(y_learn, axis=0).reshape((1, -1)) |
|
|
| X_test = random_state.randn(20, 10) |
| y_test = random_state.randn(20, 5) |
|
|
| |
| est = DummyRegressor() |
| est.fit(X_learn, y_learn) |
| y_pred_learn = est.predict(X_learn) |
| y_pred_test = est.predict(X_test) |
|
|
| _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test) |
| _check_behavior_2d(est) |
|
|
|
|
| def test_regressor_exceptions(): |
| reg = DummyRegressor() |
| with pytest.raises(NotFittedError): |
| reg.predict([]) |
|
|
|
|
| def test_median_strategy_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = [[0]] * 5 |
| y = random_state.randn(5) |
|
|
| reg = DummyRegressor(strategy="median") |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.median(y)] * len(X)) |
|
|
|
|
| def test_median_strategy_multioutput_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X_learn = random_state.randn(10, 10) |
| y_learn = random_state.randn(10, 5) |
|
|
| median = np.median(y_learn, axis=0).reshape((1, -1)) |
|
|
| X_test = random_state.randn(20, 10) |
| y_test = random_state.randn(20, 5) |
|
|
| |
| est = DummyRegressor(strategy="median") |
| est.fit(X_learn, y_learn) |
| y_pred_learn = est.predict(X_learn) |
| y_pred_test = est.predict(X_test) |
|
|
| _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test) |
| _check_behavior_2d(est) |
|
|
|
|
| def test_quantile_strategy_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = [[0]] * 5 |
| y = random_state.randn(5) |
|
|
| reg = DummyRegressor(strategy="quantile", quantile=0.5) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.median(y)] * len(X)) |
|
|
| reg = DummyRegressor(strategy="quantile", quantile=0) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.min(y)] * len(X)) |
|
|
| reg = DummyRegressor(strategy="quantile", quantile=1) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.max(y)] * len(X)) |
|
|
| reg = DummyRegressor(strategy="quantile", quantile=0.3) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X)) |
|
|
|
|
| def test_quantile_strategy_multioutput_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X_learn = random_state.randn(10, 10) |
| y_learn = random_state.randn(10, 5) |
|
|
| median = np.median(y_learn, axis=0).reshape((1, -1)) |
| quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1)) |
|
|
| X_test = random_state.randn(20, 10) |
| y_test = random_state.randn(20, 5) |
|
|
| |
| est = DummyRegressor(strategy="quantile", quantile=0.5) |
| est.fit(X_learn, y_learn) |
| y_pred_learn = est.predict(X_learn) |
| y_pred_test = est.predict(X_test) |
|
|
| _check_equality_regressor(median, y_learn, y_pred_learn, y_test, y_pred_test) |
| _check_behavior_2d(est) |
|
|
| |
| est = DummyRegressor(strategy="quantile", quantile=0.8) |
| est.fit(X_learn, y_learn) |
| y_pred_learn = est.predict(X_learn) |
| y_pred_test = est.predict(X_test) |
|
|
| _check_equality_regressor( |
| quantile_values, y_learn, y_pred_learn, y_test, y_pred_test |
| ) |
| _check_behavior_2d(est) |
|
|
|
|
| def test_quantile_invalid(): |
| X = [[0]] * 5 |
| y = [0] * 5 |
|
|
| est = DummyRegressor(strategy="quantile", quantile=None) |
| err_msg = ( |
| "When using `strategy='quantile', you have to specify the desired quantile" |
| ) |
| with pytest.raises(ValueError, match=err_msg): |
| est.fit(X, y) |
|
|
|
|
| def test_quantile_strategy_empty_train(): |
| est = DummyRegressor(strategy="quantile", quantile=0.4) |
| with pytest.raises(IndexError): |
| est.fit([], []) |
|
|
|
|
| def test_constant_strategy_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = [[0]] * 5 |
| y = random_state.randn(5) |
|
|
| reg = DummyRegressor(strategy="constant", constant=[43]) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [43] * len(X)) |
|
|
| reg = DummyRegressor(strategy="constant", constant=43) |
| reg.fit(X, y) |
| assert_array_equal(reg.predict(X), [43] * len(X)) |
|
|
| |
| assert not isinstance(reg.constant, np.ndarray) |
|
|
|
|
| def test_constant_strategy_multioutput_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X_learn = random_state.randn(10, 10) |
| y_learn = random_state.randn(10, 5) |
|
|
| |
| constants = random_state.randn(5) |
|
|
| X_test = random_state.randn(20, 10) |
| y_test = random_state.randn(20, 5) |
|
|
| |
| est = DummyRegressor(strategy="constant", constant=constants) |
| est.fit(X_learn, y_learn) |
| y_pred_learn = est.predict(X_learn) |
| y_pred_test = est.predict(X_test) |
|
|
| _check_equality_regressor(constants, y_learn, y_pred_learn, y_test, y_pred_test) |
| _check_behavior_2d_for_constant(est) |
|
|
|
|
| def test_y_mean_attribute_regressor(): |
| X = [[0]] * 5 |
| y = [1, 2, 4, 6, 8] |
| |
| est = DummyRegressor(strategy="mean") |
| est.fit(X, y) |
|
|
| assert est.constant_ == np.mean(y) |
|
|
|
|
| def test_constants_not_specified_regressor(): |
| X = [[0]] * 5 |
| y = [1, 2, 4, 6, 8] |
|
|
| est = DummyRegressor(strategy="constant") |
| err_msg = "Constant target value has to be specified" |
| with pytest.raises(TypeError, match=err_msg): |
| est.fit(X, y) |
|
|
|
|
| def test_constant_size_multioutput_regressor(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
| X = random_state.randn(10, 10) |
| y = random_state.randn(10, 5) |
|
|
| est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4]) |
| err_msg = r"Constant target value should have shape \(5, 1\)." |
| with pytest.raises(ValueError, match=err_msg): |
| est.fit(X, y) |
|
|
|
|
| def test_constant_strategy(): |
| X = [[0], [0], [0], [0]] |
| y = [2, 1, 2, 2] |
|
|
| clf = DummyClassifier(strategy="constant", random_state=0, constant=1) |
| clf.fit(X, y) |
| assert_array_equal(clf.predict(X), np.ones(len(X))) |
| _check_predict_proba(clf, X, y) |
|
|
| X = [[0], [0], [0], [0]] |
| y = ["two", "one", "two", "two"] |
| clf = DummyClassifier(strategy="constant", random_state=0, constant="one") |
| clf.fit(X, y) |
| assert_array_equal(clf.predict(X), np.array(["one"] * 4)) |
| _check_predict_proba(clf, X, y) |
|
|
|
|
| def test_constant_strategy_multioutput(): |
| X = [[0], [0], [0], [0]] |
| y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]]) |
|
|
| n_samples = len(X) |
|
|
| clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0]) |
| clf.fit(X, y) |
| assert_array_equal( |
| clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]) |
| ) |
| _check_predict_proba(clf, X, y) |
|
|
|
|
| @pytest.mark.parametrize( |
| "y, params, err_msg", |
| [ |
| ([2, 1, 2, 2], {"random_state": 0}, "Constant.*has to be specified"), |
| ([2, 1, 2, 2], {"constant": [2, 0]}, "Constant.*should have shape"), |
| ( |
| np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]), |
| {"constant": 2}, |
| "Constant.*should have shape", |
| ), |
| ( |
| [2, 1, 2, 2], |
| {"constant": "my-constant"}, |
| "constant=my-constant.*Possible values.*\\[1, 2]", |
| ), |
| ( |
| np.transpose([[2, 1, 2, 2], [2, 1, 2, 2]]), |
| {"constant": [2, "unknown"]}, |
| "constant=\\[2, 'unknown'].*Possible values.*\\[1, 2]", |
| ), |
| ], |
| ids=[ |
| "no-constant", |
| "too-many-constant", |
| "not-enough-output", |
| "single-output", |
| "multi-output", |
| ], |
| ) |
| def test_constant_strategy_exceptions(y, params, err_msg): |
| X = [[0], [0], [0], [0]] |
|
|
| clf = DummyClassifier(strategy="constant", **params) |
| with pytest.raises(ValueError, match=err_msg): |
| clf.fit(X, y) |
|
|
|
|
| def test_classification_sample_weight(): |
| X = [[0], [0], [1]] |
| y = [0, 1, 0] |
| sample_weight = [0.1, 1.0, 0.1] |
|
|
| clf = DummyClassifier(strategy="stratified").fit(X, y, sample_weight) |
| assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1.0 / 1.2]) |
|
|
|
|
| @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) |
| def test_constant_strategy_sparse_target(csc_container): |
| X = [[0]] * 5 |
| y = csc_container(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]])) |
|
|
| n_samples = len(X) |
|
|
| clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0]) |
| clf.fit(X, y) |
| y_pred = clf.predict(X) |
| assert sp.issparse(y_pred) |
| assert_array_equal( |
| y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]) |
| ) |
|
|
|
|
| @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) |
| def test_uniform_strategy_sparse_target_warning(global_random_seed, csc_container): |
| X = [[0]] * 5 |
| y = csc_container(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]])) |
|
|
| clf = DummyClassifier(strategy="uniform", random_state=global_random_seed) |
| with pytest.warns(UserWarning, match="the uniform strategy would not save memory"): |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
|
|
| for k in range(y.shape[1]): |
| p = np.bincount(y_pred[:, k]) / float(len(X)) |
| assert_almost_equal(p[1], 1 / 3, decimal=1) |
| assert_almost_equal(p[2], 1 / 3, decimal=1) |
| assert_almost_equal(p[4], 1 / 3, decimal=1) |
|
|
|
|
| @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) |
| def test_stratified_strategy_sparse_target(global_random_seed, csc_container): |
| X = [[0]] * 5 |
| y = csc_container(np.array([[4, 1], [0, 0], [1, 1], [1, 4], [1, 1]])) |
|
|
| clf = DummyClassifier(strategy="stratified", random_state=global_random_seed) |
| clf.fit(X, y) |
|
|
| X = [[0]] * 500 |
| y_pred = clf.predict(X) |
| assert sp.issparse(y_pred) |
| y_pred = y_pred.toarray() |
|
|
| for k in range(y.shape[1]): |
| p = np.bincount(y_pred[:, k]) / float(len(X)) |
| assert_almost_equal(p[1], 3.0 / 5, decimal=1) |
| assert_almost_equal(p[0], 1.0 / 5, decimal=1) |
| assert_almost_equal(p[4], 1.0 / 5, decimal=1) |
|
|
|
|
| @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) |
| def test_most_frequent_and_prior_strategy_sparse_target(csc_container): |
| X = [[0]] * 5 |
| y = csc_container(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]])) |
|
|
| n_samples = len(X) |
| y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]) |
| for strategy in ("most_frequent", "prior"): |
| clf = DummyClassifier(strategy=strategy, random_state=0) |
| clf.fit(X, y) |
|
|
| y_pred = clf.predict(X) |
| assert sp.issparse(y_pred) |
| assert_array_equal(y_pred.toarray(), y_expected) |
|
|
|
|
| def test_dummy_regressor_sample_weight(global_random_seed, n_samples=10): |
| random_state = np.random.RandomState(seed=global_random_seed) |
|
|
| X = [[0]] * n_samples |
| y = random_state.rand(n_samples) |
| sample_weight = random_state.rand(n_samples) |
|
|
| est = DummyRegressor(strategy="mean").fit(X, y, sample_weight) |
| assert est.constant_ == np.average(y, weights=sample_weight) |
|
|
| est = DummyRegressor(strategy="median").fit(X, y, sample_weight) |
| assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0) |
|
|
| est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight) |
| assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0) |
|
|
|
|
| def test_dummy_regressor_on_3D_array(): |
| X = np.array([[["foo"]], [["bar"]], [["baz"]]]) |
| y = np.array([2, 2, 2]) |
| y_expected = np.array([2, 2, 2]) |
| cls = DummyRegressor() |
| cls.fit(X, y) |
| y_pred = cls.predict(X) |
| assert_array_equal(y_pred, y_expected) |
|
|
|
|
| def test_dummy_classifier_on_3D_array(): |
| X = np.array([[["foo"]], [["bar"]], [["baz"]]]) |
| y = [2, 2, 2] |
| y_expected = [2, 2, 2] |
| y_proba_expected = [[1], [1], [1]] |
| cls = DummyClassifier(strategy="stratified") |
| cls.fit(X, y) |
| y_pred = cls.predict(X) |
| y_pred_proba = cls.predict_proba(X) |
| assert_array_equal(y_pred, y_expected) |
| assert_array_equal(y_pred_proba, y_proba_expected) |
|
|
|
|
| def test_dummy_regressor_return_std(): |
| X = [[0]] * 3 |
| y = np.array([2, 2, 2]) |
| y_std_expected = np.array([0, 0, 0]) |
| cls = DummyRegressor() |
| cls.fit(X, y) |
| y_pred_list = cls.predict(X, return_std=True) |
| |
| assert len(y_pred_list) == 2 |
| |
| assert_array_equal(y_pred_list[1], y_std_expected) |
|
|
|
|
| @pytest.mark.parametrize( |
| "y,y_test", |
| [ |
| ([1, 1, 1, 2], [1.25] * 4), |
| (np.array([[2, 2], [1, 1], [1, 1], [1, 1]]), [[1.25, 1.25]] * 4), |
| ], |
| ) |
| def test_regressor_score_with_None(y, y_test): |
| reg = DummyRegressor() |
| reg.fit(None, y) |
| assert reg.score(None, y_test) == 1.0 |
|
|
|
|
| @pytest.mark.parametrize("strategy", ["mean", "median", "quantile", "constant"]) |
| def test_regressor_prediction_independent_of_X(strategy): |
| y = [0, 2, 1, 1] |
| X1 = [[0]] * 4 |
| reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7) |
| reg1.fit(X1, y) |
| predictions1 = reg1.predict(X1) |
|
|
| X2 = [[1]] * 4 |
| reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7) |
| reg2.fit(X2, y) |
| predictions2 = reg2.predict(X2) |
|
|
| assert_array_equal(predictions1, predictions2) |
|
|
|
|
| @pytest.mark.parametrize( |
| "strategy", ["stratified", "most_frequent", "prior", "uniform", "constant"] |
| ) |
| def test_dtype_of_classifier_probas(strategy): |
| y = [0, 2, 1, 1] |
| X = np.zeros(4) |
| model = DummyClassifier(strategy=strategy, random_state=0, constant=0) |
| probas = model.fit(X, y).predict_proba(X) |
|
|
| assert probas.dtype == np.float64 |
|
|