| """Testing for the VotingClassifier and VotingRegressor""" |
|
|
| import re |
|
|
| import numpy as np |
| import pytest |
|
|
| from sklearn import config_context, datasets |
| from sklearn.base import BaseEstimator, ClassifierMixin, clone |
| from sklearn.datasets import make_multilabel_classification |
| from sklearn.dummy import DummyRegressor |
| from sklearn.ensemble import ( |
| RandomForestClassifier, |
| RandomForestRegressor, |
| VotingClassifier, |
| VotingRegressor, |
| ) |
| from sklearn.exceptions import NotFittedError |
| from sklearn.linear_model import LinearRegression, LogisticRegression |
| from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split |
| from sklearn.multiclass import OneVsRestClassifier |
| from sklearn.naive_bayes import GaussianNB |
| from sklearn.neighbors import KNeighborsClassifier |
| from sklearn.preprocessing import StandardScaler |
| from sklearn.svm import SVC |
| from sklearn.tests.metadata_routing_common import ( |
| ConsumingClassifier, |
| ConsumingRegressor, |
| _Registry, |
| check_recorded_metadata, |
| ) |
| from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor |
| from sklearn.utils._testing import ( |
| assert_almost_equal, |
| assert_array_almost_equal, |
| assert_array_equal, |
| ) |
|
|
| |
| iris = datasets.load_iris() |
| X, y = iris.data[:, 1:3], iris.target |
| |
| X_scaled = StandardScaler().fit_transform(X) |
|
|
| X_r, y_r = datasets.load_diabetes(return_X_y=True) |
|
|
|
|
| @pytest.mark.parametrize( |
| "params, err_msg", |
| [ |
| ( |
| {"estimators": []}, |
| "Invalid 'estimators' attribute, 'estimators' should be a non-empty list", |
| ), |
| ( |
| {"estimators": [("lr", LogisticRegression())], "weights": [1, 2]}, |
| "Number of `estimators` and weights must be equal", |
| ), |
| ], |
| ) |
| def test_voting_classifier_estimator_init(params, err_msg): |
| ensemble = VotingClassifier(**params) |
| with pytest.raises(ValueError, match=err_msg): |
| ensemble.fit(X, y) |
|
|
|
|
| def test_predictproba_hardvoting(): |
| eclf = VotingClassifier( |
| estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())], |
| voting="hard", |
| ) |
|
|
| inner_msg = "predict_proba is not available when voting='hard'" |
| outer_msg = "'VotingClassifier' has no attribute 'predict_proba'" |
| with pytest.raises(AttributeError, match=outer_msg) as exec_info: |
| eclf.predict_proba |
| assert isinstance(exec_info.value.__cause__, AttributeError) |
| assert inner_msg in str(exec_info.value.__cause__) |
|
|
| assert not hasattr(eclf, "predict_proba") |
| eclf.fit(X_scaled, y) |
| assert not hasattr(eclf, "predict_proba") |
|
|
|
|
| def test_notfitted(): |
| eclf = VotingClassifier( |
| estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())], |
| voting="soft", |
| ) |
| ereg = VotingRegressor([("dr", DummyRegressor())]) |
| msg = ( |
| "This %s instance is not fitted yet. Call 'fit'" |
| " with appropriate arguments before using this estimator." |
| ) |
| with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): |
| eclf.predict(X) |
| with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): |
| eclf.predict_proba(X) |
| with pytest.raises(NotFittedError, match=msg % "VotingClassifier"): |
| eclf.transform(X) |
| with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): |
| ereg.predict(X_r) |
| with pytest.raises(NotFittedError, match=msg % "VotingRegressor"): |
| ereg.transform(X_r) |
|
|
|
|
| def test_majority_label_iris(global_random_seed): |
| """Check classification by majority label on dataset iris.""" |
| clf1 = LogisticRegression(solver="liblinear", random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = GaussianNB() |
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard" |
| ) |
| scores = cross_val_score(eclf, X, y, scoring="accuracy") |
|
|
| assert scores.mean() >= 0.9 |
|
|
|
|
| def test_tie_situation(): |
| """Check voting classifier selects smaller class label in tie situation.""" |
| clf1 = LogisticRegression(random_state=123, solver="liblinear") |
| clf2 = RandomForestClassifier(random_state=123) |
| eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard") |
| assert clf1.fit(X, y).predict(X)[73] == 2 |
| assert clf2.fit(X, y).predict(X)[73] == 1 |
| assert eclf.fit(X, y).predict(X)[73] == 1 |
|
|
|
|
| def test_weights_iris(global_random_seed): |
| """Check classification by average probabilities on dataset iris.""" |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = GaussianNB() |
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="soft", |
| weights=[1, 2, 10], |
| ) |
| scores = cross_val_score(eclf, X_scaled, y, scoring="accuracy") |
| assert scores.mean() >= 0.9 |
|
|
|
|
| def test_weights_regressor(): |
| """Check weighted average regression prediction on diabetes dataset.""" |
| reg1 = DummyRegressor(strategy="mean") |
| reg2 = DummyRegressor(strategy="median") |
| reg3 = DummyRegressor(strategy="quantile", quantile=0.2) |
| ereg = VotingRegressor( |
| [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 2, 10] |
| ) |
|
|
| X_r_train, X_r_test, y_r_train, y_r_test = train_test_split( |
| X_r, y_r, test_size=0.25 |
| ) |
|
|
| reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test) |
| reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test) |
| reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test) |
| ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test) |
|
|
| avg = np.average( |
| np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10] |
| ) |
| assert_almost_equal(ereg_pred, avg, decimal=2) |
|
|
| ereg_weights_none = VotingRegressor( |
| [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=None |
| ) |
| ereg_weights_equal = VotingRegressor( |
| [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 1, 1] |
| ) |
| ereg_weights_none.fit(X_r_train, y_r_train) |
| ereg_weights_equal.fit(X_r_train, y_r_train) |
| ereg_none_pred = ereg_weights_none.predict(X_r_test) |
| ereg_equal_pred = ereg_weights_equal.predict(X_r_test) |
| assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2) |
|
|
|
|
| def test_predict_on_toy_problem(global_random_seed): |
| """Manually check predicted class labels for toy dataset.""" |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = GaussianNB() |
|
|
| X = np.array( |
| [[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2], [2.1, 1.4], [3.1, 2.3]] |
| ) |
|
|
| y = np.array([1, 1, 1, 2, 2, 2]) |
|
|
| assert_array_equal(clf1.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2]) |
| assert_array_equal(clf2.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2]) |
| assert_array_equal(clf3.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2]) |
|
|
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="hard", |
| weights=[1, 1, 1], |
| ) |
| assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2]) |
|
|
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="soft", |
| weights=[1, 1, 1], |
| ) |
| assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2]) |
|
|
|
|
| def test_predict_proba_on_toy_problem(): |
| """Calculate predicted probabilities on toy dataset.""" |
| clf1 = LogisticRegression(random_state=123) |
| clf2 = RandomForestClassifier(random_state=123) |
| clf3 = GaussianNB() |
| X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) |
| y = np.array([1, 1, 2, 2]) |
|
|
| clf1_res = np.array( |
| [ |
| [0.59790391, 0.40209609], |
| [0.57622162, 0.42377838], |
| [0.50728456, 0.49271544], |
| [0.40241774, 0.59758226], |
| ] |
| ) |
|
|
| clf2_res = np.array([[0.8, 0.2], [0.8, 0.2], [0.2, 0.8], [0.3, 0.7]]) |
|
|
| clf3_res = np.array( |
| [[0.9985082, 0.0014918], [0.99845843, 0.00154157], [0.0, 1.0], [0.0, 1.0]] |
| ) |
|
|
| t00 = (2 * clf1_res[0][0] + clf2_res[0][0] + clf3_res[0][0]) / 4 |
| t11 = (2 * clf1_res[1][1] + clf2_res[1][1] + clf3_res[1][1]) / 4 |
| t21 = (2 * clf1_res[2][1] + clf2_res[2][1] + clf3_res[2][1]) / 4 |
| t31 = (2 * clf1_res[3][1] + clf2_res[3][1] + clf3_res[3][1]) / 4 |
|
|
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="soft", |
| weights=[2, 1, 1], |
| ) |
| eclf_res = eclf.fit(X, y).predict_proba(X) |
|
|
| assert_almost_equal(t00, eclf_res[0][0], decimal=1) |
| assert_almost_equal(t11, eclf_res[1][1], decimal=1) |
| assert_almost_equal(t21, eclf_res[2][1], decimal=1) |
| assert_almost_equal(t31, eclf_res[3][1], decimal=1) |
|
|
| inner_msg = "predict_proba is not available when voting='hard'" |
| outer_msg = "'VotingClassifier' has no attribute 'predict_proba'" |
| with pytest.raises(AttributeError, match=outer_msg) as exec_info: |
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard" |
| ) |
| eclf.fit(X, y).predict_proba(X) |
|
|
| assert isinstance(exec_info.value.__cause__, AttributeError) |
| assert inner_msg in str(exec_info.value.__cause__) |
|
|
|
|
| def test_multilabel(): |
| """Check if error is raised for multilabel classification.""" |
| X, y = make_multilabel_classification( |
| n_classes=2, n_labels=1, allow_unlabeled=False, random_state=123 |
| ) |
| clf = OneVsRestClassifier(SVC(kernel="linear")) |
|
|
| eclf = VotingClassifier(estimators=[("ovr", clf)], voting="hard") |
|
|
| try: |
| eclf.fit(X, y) |
| except NotImplementedError: |
| return |
|
|
|
|
| def test_gridsearch(): |
| """Check GridSearch support.""" |
| clf1 = LogisticRegression(random_state=1) |
| clf2 = RandomForestClassifier(random_state=1, n_estimators=3) |
| clf3 = GaussianNB() |
| eclf = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft" |
| ) |
|
|
| params = { |
| "lr__C": [1.0, 100.0], |
| "voting": ["soft", "hard"], |
| "weights": [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]], |
| } |
|
|
| grid = GridSearchCV(estimator=eclf, param_grid=params, cv=2) |
| grid.fit(X_scaled, y) |
|
|
|
|
| def test_parallel_fit(global_random_seed): |
| """Check parallel backend of VotingClassifier on toy dataset.""" |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = GaussianNB() |
| X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) |
| y = np.array([1, 1, 2, 2]) |
|
|
| eclf1 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=1 |
| ).fit(X, y) |
| eclf2 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=2 |
| ).fit(X, y) |
|
|
| assert_array_equal(eclf1.predict(X), eclf2.predict(X)) |
| assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) |
|
|
|
|
| |
| @pytest.mark.filterwarnings("ignore::FutureWarning") |
| def test_sample_weight(global_random_seed): |
| """Tests sample_weight parameter of VotingClassifier""" |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = SVC(probability=True, random_state=global_random_seed) |
| eclf1 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft" |
| ).fit(X_scaled, y, sample_weight=np.ones((len(y),))) |
| eclf2 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft" |
| ).fit(X_scaled, y) |
| assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled)) |
| assert_array_almost_equal( |
| eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled) |
| ) |
| sample_weight = np.random.RandomState(global_random_seed).uniform(size=(len(y),)) |
| eclf3 = VotingClassifier(estimators=[("lr", clf1)], voting="soft") |
| eclf3.fit(X_scaled, y, sample_weight) |
| clf1.fit(X_scaled, y, sample_weight) |
| assert_array_equal(eclf3.predict(X_scaled), clf1.predict(X_scaled)) |
| assert_array_almost_equal( |
| eclf3.predict_proba(X_scaled), clf1.predict_proba(X_scaled) |
| ) |
|
|
| |
| |
| clf4 = KNeighborsClassifier() |
| eclf3 = VotingClassifier( |
| estimators=[("lr", clf1), ("svc", clf3), ("knn", clf4)], voting="soft" |
| ) |
| msg = "Underlying estimator KNeighborsClassifier does not support sample weights." |
| with pytest.raises(TypeError, match=msg): |
| eclf3.fit(X_scaled, y, sample_weight) |
|
|
| |
| |
| class ClassifierErrorFit(ClassifierMixin, BaseEstimator): |
| def fit(self, X_scaled, y, sample_weight): |
| raise TypeError("Error unrelated to sample_weight.") |
|
|
| clf = ClassifierErrorFit() |
| with pytest.raises(TypeError, match="Error unrelated to sample_weight"): |
| clf.fit(X_scaled, y, sample_weight=sample_weight) |
|
|
|
|
| def test_sample_weight_kwargs(): |
| """Check that VotingClassifier passes sample_weight as kwargs""" |
|
|
| class MockClassifier(ClassifierMixin, BaseEstimator): |
| """Mock Classifier to check that sample_weight is received as kwargs""" |
|
|
| def fit(self, X, y, *args, **sample_weight): |
| assert "sample_weight" in sample_weight |
|
|
| clf = MockClassifier() |
| eclf = VotingClassifier(estimators=[("mock", clf)], voting="soft") |
|
|
| |
| eclf.fit(X, y, sample_weight=np.ones((len(y),))) |
|
|
|
|
| def test_voting_classifier_set_params(global_random_seed): |
| |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier( |
| n_estimators=10, random_state=global_random_seed, max_depth=None |
| ) |
| clf3 = GaussianNB() |
|
|
| eclf1 = VotingClassifier( |
| [("lr", clf1), ("rf", clf2)], voting="soft", weights=[1, 2] |
| ).fit(X_scaled, y) |
| eclf2 = VotingClassifier( |
| [("lr", clf1), ("nb", clf3)], voting="soft", weights=[1, 2] |
| ) |
| eclf2.set_params(nb=clf2).fit(X_scaled, y) |
|
|
| assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled)) |
| assert_array_almost_equal( |
| eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled) |
| ) |
| assert eclf2.estimators[0][1].get_params() == clf1.get_params() |
| assert eclf2.estimators[1][1].get_params() == clf2.get_params() |
|
|
|
|
| def test_set_estimator_drop(): |
| |
| |
| clf1 = LogisticRegression(random_state=123) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=123) |
| clf3 = GaussianNB() |
| eclf1 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)], |
| voting="hard", |
| weights=[1, 0, 0.5], |
| ).fit(X, y) |
|
|
| eclf2 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)], |
| voting="hard", |
| weights=[1, 1, 0.5], |
| ) |
| eclf2.set_params(rf="drop").fit(X, y) |
|
|
| assert_array_equal(eclf1.predict(X), eclf2.predict(X)) |
|
|
| assert dict(eclf2.estimators)["rf"] == "drop" |
| assert len(eclf2.estimators_) == 2 |
| assert all( |
| isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_ |
| ) |
| assert eclf2.get_params()["rf"] == "drop" |
|
|
| eclf1.set_params(voting="soft").fit(X, y) |
| eclf2.set_params(voting="soft").fit(X, y) |
|
|
| assert_array_equal(eclf1.predict(X), eclf2.predict(X)) |
| assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) |
| msg = "All estimators are dropped. At least one is required" |
| with pytest.raises(ValueError, match=msg): |
| eclf2.set_params(lr="drop", rf="drop", nb="drop").fit(X, y) |
|
|
| |
| X1 = np.array([[1], [2]]) |
| y1 = np.array([1, 2]) |
| eclf1 = VotingClassifier( |
| estimators=[("rf", clf2), ("nb", clf3)], |
| voting="soft", |
| weights=[0, 0.5], |
| flatten_transform=False, |
| ).fit(X1, y1) |
|
|
| eclf2 = VotingClassifier( |
| estimators=[("rf", clf2), ("nb", clf3)], |
| voting="soft", |
| weights=[1, 0.5], |
| flatten_transform=False, |
| ) |
| eclf2.set_params(rf="drop").fit(X1, y1) |
| assert_array_almost_equal( |
| eclf1.transform(X1), |
| np.array([[[0.7, 0.3], [0.3, 0.7]], [[1.0, 0.0], [0.0, 1.0]]]), |
| ) |
| assert_array_almost_equal(eclf2.transform(X1), np.array([[[1.0, 0.0], [0.0, 1.0]]])) |
| eclf1.set_params(voting="hard") |
| eclf2.set_params(voting="hard") |
| assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]])) |
| assert_array_equal(eclf2.transform(X1), np.array([[0], [1]])) |
|
|
|
|
| def test_estimator_weights_format(global_random_seed): |
| |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| eclf1 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2)], weights=[1, 2], voting="soft" |
| ) |
| eclf2 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2)], weights=np.array((1, 2)), voting="soft" |
| ) |
| eclf1.fit(X_scaled, y) |
| eclf2.fit(X_scaled, y) |
| assert_array_almost_equal( |
| eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled) |
| ) |
|
|
|
|
| def test_transform(global_random_seed): |
| """Check transform method of VotingClassifier on toy dataset.""" |
| clf1 = LogisticRegression(random_state=global_random_seed) |
| clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed) |
| clf3 = GaussianNB() |
| X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) |
| y = np.array([1, 1, 2, 2]) |
|
|
| eclf1 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft" |
| ).fit(X, y) |
| eclf2 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="soft", |
| flatten_transform=True, |
| ).fit(X, y) |
| eclf3 = VotingClassifier( |
| estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], |
| voting="soft", |
| flatten_transform=False, |
| ).fit(X, y) |
|
|
| assert_array_equal(eclf1.transform(X).shape, (4, 6)) |
| assert_array_equal(eclf2.transform(X).shape, (4, 6)) |
| assert_array_equal(eclf3.transform(X).shape, (3, 4, 2)) |
| assert_array_almost_equal(eclf1.transform(X), eclf2.transform(X)) |
| assert_array_almost_equal( |
| eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), eclf2.transform(X) |
| ) |
|
|
|
|
| @pytest.mark.parametrize( |
| "X, y, voter", |
| [ |
| ( |
| X, |
| y, |
| VotingClassifier( |
| [ |
| ("lr", LogisticRegression()), |
| ("rf", RandomForestClassifier(n_estimators=5)), |
| ] |
| ), |
| ), |
| ( |
| X_r, |
| y_r, |
| VotingRegressor( |
| [ |
| ("lr", LinearRegression()), |
| ("rf", RandomForestRegressor(n_estimators=5)), |
| ] |
| ), |
| ), |
| ], |
| ) |
| def test_none_estimator_with_weights(X, y, voter): |
| |
| |
| |
| voter = clone(voter) |
| |
| X_scaled = StandardScaler().fit_transform(X) |
| voter.fit(X_scaled, y, sample_weight=np.ones(y.shape)) |
| voter.set_params(lr="drop") |
| voter.fit(X_scaled, y, sample_weight=np.ones(y.shape)) |
| y_pred = voter.predict(X_scaled) |
| assert y_pred.shape == y.shape |
|
|
|
|
| @pytest.mark.parametrize( |
| "est", |
| [ |
| VotingRegressor( |
| estimators=[ |
| ("lr", LinearRegression()), |
| ("tree", DecisionTreeRegressor(random_state=0)), |
| ] |
| ), |
| VotingClassifier( |
| estimators=[ |
| ("lr", LogisticRegression(random_state=0)), |
| ("tree", DecisionTreeClassifier(random_state=0)), |
| ] |
| ), |
| ], |
| ids=["VotingRegressor", "VotingClassifier"], |
| ) |
| def test_n_features_in(est): |
| X = [[1, 2], [3, 4], [5, 6]] |
| y = [0, 1, 2] |
|
|
| assert not hasattr(est, "n_features_in_") |
| est.fit(X, y) |
| assert est.n_features_in_ == 2 |
|
|
|
|
| @pytest.mark.parametrize( |
| "estimator", |
| [ |
| VotingRegressor( |
| estimators=[ |
| ("lr", LinearRegression()), |
| ("rf", RandomForestRegressor(random_state=123)), |
| ], |
| verbose=True, |
| ), |
| VotingClassifier( |
| estimators=[ |
| ("lr", LogisticRegression(random_state=123)), |
| ("rf", RandomForestClassifier(random_state=123)), |
| ], |
| verbose=True, |
| ), |
| ], |
| ) |
| def test_voting_verbose(estimator, capsys): |
| X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) |
| y = np.array([1, 1, 2, 2]) |
|
|
| pattern = ( |
| r"\[Voting\].*\(1 of 2\) Processing lr, total=.*\n" |
| r"\[Voting\].*\(2 of 2\) Processing rf, total=.*\n$" |
| ) |
| clone(estimator).fit(X, y) |
| assert re.match(pattern, capsys.readouterr()[0]) |
|
|
|
|
| def test_get_features_names_out_regressor(): |
| """Check get_feature_names_out output for regressor.""" |
|
|
| X = [[1, 2], [3, 4], [5, 6]] |
| y = [0, 1, 2] |
|
|
| voting = VotingRegressor( |
| estimators=[ |
| ("lr", LinearRegression()), |
| ("tree", DecisionTreeRegressor(random_state=0)), |
| ("ignore", "drop"), |
| ] |
| ) |
| voting.fit(X, y) |
|
|
| names_out = voting.get_feature_names_out() |
| expected_names = ["votingregressor_lr", "votingregressor_tree"] |
| assert_array_equal(names_out, expected_names) |
|
|
|
|
| @pytest.mark.parametrize( |
| "kwargs, expected_names", |
| [ |
| ( |
| {"voting": "soft", "flatten_transform": True}, |
| [ |
| "votingclassifier_lr0", |
| "votingclassifier_lr1", |
| "votingclassifier_lr2", |
| "votingclassifier_tree0", |
| "votingclassifier_tree1", |
| "votingclassifier_tree2", |
| ], |
| ), |
| ({"voting": "hard"}, ["votingclassifier_lr", "votingclassifier_tree"]), |
| ], |
| ) |
| def test_get_features_names_out_classifier(kwargs, expected_names): |
| """Check get_feature_names_out for classifier for different settings.""" |
| X = [[1, 2], [3, 4], [5, 6], [1, 1.2]] |
| y = [0, 1, 2, 0] |
|
|
| voting = VotingClassifier( |
| estimators=[ |
| ("lr", LogisticRegression(random_state=0)), |
| ("tree", DecisionTreeClassifier(random_state=0)), |
| ], |
| **kwargs, |
| ) |
| voting.fit(X, y) |
| X_trans = voting.transform(X) |
| names_out = voting.get_feature_names_out() |
|
|
| assert X_trans.shape[1] == len(expected_names) |
| assert_array_equal(names_out, expected_names) |
|
|
|
|
| def test_get_features_names_out_classifier_error(): |
| """Check that error is raised when voting="soft" and flatten_transform=False.""" |
| X = [[1, 2], [3, 4], [5, 6]] |
| y = [0, 1, 2] |
|
|
| voting = VotingClassifier( |
| estimators=[ |
| ("lr", LogisticRegression(random_state=0)), |
| ("tree", DecisionTreeClassifier(random_state=0)), |
| ], |
| voting="soft", |
| flatten_transform=False, |
| ) |
| voting.fit(X, y) |
|
|
| msg = ( |
| "get_feature_names_out is not supported when `voting='soft'` and " |
| "`flatten_transform=False`" |
| ) |
| with pytest.raises(ValueError, match=msg): |
| voting.get_feature_names_out() |
|
|
|
|
| |
| |
|
|
|
|
| @pytest.mark.parametrize( |
| "Estimator, Child", |
| [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)], |
| ) |
| def test_routing_passed_metadata_not_supported(Estimator, Child): |
| """Test that the right error message is raised when metadata is passed while |
| not supported when `enable_metadata_routing=False`.""" |
|
|
| X = np.array([[0, 1], [2, 2], [4, 6]]) |
| y = [1, 2, 3] |
|
|
| with pytest.raises( |
| ValueError, match="is only supported if enable_metadata_routing=True" |
| ): |
| Estimator(["clf", Child()]).fit(X, y, sample_weight=[1, 1, 1], metadata="a") |
|
|
|
|
| @pytest.mark.parametrize( |
| "Estimator, Child", |
| [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)], |
| ) |
| @config_context(enable_metadata_routing=True) |
| def test_get_metadata_routing_without_fit(Estimator, Child): |
| |
| est = Estimator([("sub_est", Child())]) |
| est.get_metadata_routing() |
|
|
|
|
| @pytest.mark.parametrize( |
| "Estimator, Child", |
| [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)], |
| ) |
| @pytest.mark.parametrize("prop", ["sample_weight", "metadata"]) |
| @config_context(enable_metadata_routing=True) |
| def test_metadata_routing_for_voting_estimators(Estimator, Child, prop): |
| """Test that metadata is routed correctly for Voting*.""" |
| X = np.array([[0, 1], [2, 2], [4, 6]]) |
| y = [1, 2, 3] |
| sample_weight, metadata = [1, 1, 1], "a" |
|
|
| est = Estimator( |
| [ |
| ( |
| "sub_est1", |
| Child(registry=_Registry()).set_fit_request(**{prop: True}), |
| ), |
| ( |
| "sub_est2", |
| Child(registry=_Registry()).set_fit_request(**{prop: True}), |
| ), |
| ] |
| ) |
|
|
| est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata}) |
|
|
| for estimator in est.estimators: |
| if prop == "sample_weight": |
| kwargs = {prop: sample_weight} |
| else: |
| kwargs = {prop: metadata} |
| |
| registry = estimator[1].registry |
| assert len(registry) |
| for sub_est in registry: |
| check_recorded_metadata(obj=sub_est, method="fit", parent="fit", **kwargs) |
|
|
|
|
| @pytest.mark.parametrize( |
| "Estimator, Child", |
| [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)], |
| ) |
| @config_context(enable_metadata_routing=True) |
| def test_metadata_routing_error_for_voting_estimators(Estimator, Child): |
| """Test that the right error is raised when metadata is not requested.""" |
| X = np.array([[0, 1], [2, 2], [4, 6]]) |
| y = [1, 2, 3] |
| sample_weight, metadata = [1, 1, 1], "a" |
|
|
| est = Estimator([("sub_est", Child())]) |
|
|
| error_message = ( |
| "[sample_weight, metadata] are passed but are not explicitly set as requested" |
| f" or not requested for {Child.__name__}.fit" |
| ) |
|
|
| with pytest.raises(ValueError, match=re.escape(error_message)): |
| est.fit(X, y, sample_weight=sample_weight, metadata=metadata) |
|
|
|
|
| |
| |
|
|