| |
| |
|
|
| import numpy as np |
| import pytest |
| from numpy.testing import assert_allclose |
|
|
| from sklearn.base import BaseEstimator, ClassifierMixin, clone |
| from sklearn.calibration import ( |
| CalibratedClassifierCV, |
| CalibrationDisplay, |
| _CalibratedClassifier, |
| _sigmoid_calibration, |
| _SigmoidCalibration, |
| calibration_curve, |
| ) |
| from sklearn.datasets import load_iris, make_blobs, make_classification |
| from sklearn.dummy import DummyClassifier |
| from sklearn.ensemble import ( |
| RandomForestClassifier, |
| VotingClassifier, |
| ) |
| from sklearn.exceptions import NotFittedError |
| from sklearn.feature_extraction import DictVectorizer |
| from sklearn.frozen import FrozenEstimator |
| from sklearn.impute import SimpleImputer |
| from sklearn.isotonic import IsotonicRegression |
| from sklearn.linear_model import LogisticRegression, SGDClassifier |
| from sklearn.metrics import brier_score_loss |
| from sklearn.model_selection import ( |
| KFold, |
| LeaveOneOut, |
| check_cv, |
| cross_val_predict, |
| cross_val_score, |
| train_test_split, |
| ) |
| from sklearn.naive_bayes import MultinomialNB |
| from sklearn.pipeline import Pipeline, make_pipeline |
| from sklearn.preprocessing import LabelEncoder, StandardScaler |
| from sklearn.svm import LinearSVC |
| from sklearn.tree import DecisionTreeClassifier |
| from sklearn.utils._mocking import CheckingClassifier |
| from sklearn.utils._testing import ( |
| _convert_container, |
| assert_almost_equal, |
| assert_array_almost_equal, |
| assert_array_equal, |
| ignore_warnings, |
| ) |
| from sklearn.utils.extmath import softmax |
| from sklearn.utils.fixes import CSR_CONTAINERS |
|
|
| N_SAMPLES = 200 |
|
|
|
|
| @pytest.fixture(scope="module") |
| def data(): |
| X, y = make_classification(n_samples=N_SAMPLES, n_features=6, random_state=42) |
| return X, y |
|
|
|
|
| @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) |
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibration(data, method, csr_container, ensemble): |
| |
| n_samples = N_SAMPLES // 2 |
| X, y = data |
| sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) |
|
|
| X -= X.min() |
|
|
| |
| X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] |
| X_test, y_test = X[n_samples:], y[n_samples:] |
|
|
| |
| clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train) |
| prob_pos_clf = clf.predict_proba(X_test)[:, 1] |
|
|
| cal_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble) |
| with pytest.raises(ValueError): |
| cal_clf.fit(X, y) |
|
|
| |
| for this_X_train, this_X_test in [ |
| (X_train, X_test), |
| (csr_container(X_train), csr_container(X_test)), |
| ]: |
| cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble) |
| |
| |
| cal_clf.fit(this_X_train, y_train, sample_weight=sw_train) |
| prob_pos_cal_clf = cal_clf.predict_proba(this_X_test)[:, 1] |
|
|
| |
| assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss( |
| y_test, prob_pos_cal_clf |
| ) |
|
|
| |
| cal_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train) |
| prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1] |
| assert_array_almost_equal(prob_pos_cal_clf, prob_pos_cal_clf_relabeled) |
|
|
| |
| cal_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train) |
| prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1] |
| assert_array_almost_equal(prob_pos_cal_clf, prob_pos_cal_clf_relabeled) |
|
|
| |
| cal_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train) |
| prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1] |
| if method == "sigmoid": |
| assert_array_almost_equal(prob_pos_cal_clf, 1 - prob_pos_cal_clf_relabeled) |
| else: |
| |
| |
| assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss( |
| (y_test + 1) % 2, prob_pos_cal_clf_relabeled |
| ) |
|
|
|
|
| def test_calibration_default_estimator(data): |
| |
| X, y = data |
| calib_clf = CalibratedClassifierCV(cv=2) |
| calib_clf.fit(X, y) |
|
|
| base_est = calib_clf.calibrated_classifiers_[0].estimator |
| assert isinstance(base_est, LinearSVC) |
|
|
|
|
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibration_cv_splitter(data, ensemble): |
| |
| X, y = data |
|
|
| splits = 5 |
| kfold = KFold(n_splits=splits) |
| calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=ensemble) |
| assert isinstance(calib_clf.cv, KFold) |
| assert calib_clf.cv.n_splits == splits |
|
|
| calib_clf.fit(X, y) |
| expected_n_clf = splits if ensemble else 1 |
| assert len(calib_clf.calibrated_classifiers_) == expected_n_clf |
|
|
|
|
| def test_calibration_cv_nfold(data): |
| |
| X, y = data |
|
|
| kfold = KFold(n_splits=101) |
| calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=True) |
| with pytest.raises(ValueError, match="Requesting 101-fold cross-validation"): |
| calib_clf.fit(X, y) |
|
|
| calib_clf = CalibratedClassifierCV(cv=LeaveOneOut(), ensemble=True) |
| with pytest.raises(ValueError, match="LeaveOneOut cross-validation does"): |
| calib_clf.fit(X, y) |
|
|
|
|
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_sample_weight(data, method, ensemble): |
| n_samples = N_SAMPLES // 2 |
| X, y = data |
|
|
| sample_weight = np.random.RandomState(seed=42).uniform(size=len(y)) |
| X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] |
| X_test = X[n_samples:] |
|
|
| estimator = LinearSVC(random_state=42) |
| calibrated_clf = CalibratedClassifierCV(estimator, method=method, ensemble=ensemble) |
| calibrated_clf.fit(X_train, y_train, sample_weight=sw_train) |
| probs_with_sw = calibrated_clf.predict_proba(X_test) |
|
|
| |
| |
| calibrated_clf.fit(X_train, y_train) |
| probs_without_sw = calibrated_clf.predict_proba(X_test) |
|
|
| diff = np.linalg.norm(probs_with_sw - probs_without_sw) |
| assert diff > 0.1 |
|
|
|
|
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_parallel_execution(data, method, ensemble): |
| """Test parallel calibration""" |
| X, y = data |
| X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) |
|
|
| estimator = make_pipeline(StandardScaler(), LinearSVC(random_state=42)) |
|
|
| cal_clf_parallel = CalibratedClassifierCV( |
| estimator, method=method, n_jobs=2, ensemble=ensemble |
| ) |
| cal_clf_parallel.fit(X_train, y_train) |
| probs_parallel = cal_clf_parallel.predict_proba(X_test) |
|
|
| cal_clf_sequential = CalibratedClassifierCV( |
| estimator, method=method, n_jobs=1, ensemble=ensemble |
| ) |
| cal_clf_sequential.fit(X_train, y_train) |
| probs_sequential = cal_clf_sequential.predict_proba(X_test) |
|
|
| assert_allclose(probs_parallel, probs_sequential) |
|
|
|
|
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| @pytest.mark.parametrize("ensemble", [True, False]) |
| |
| |
| @pytest.mark.parametrize("seed", range(2)) |
| def test_calibration_multiclass(method, ensemble, seed): |
| def multiclass_brier(y_true, proba_pred, n_classes): |
| Y_onehot = np.eye(n_classes)[y_true] |
| return np.sum((Y_onehot - proba_pred) ** 2) / Y_onehot.shape[0] |
|
|
| |
| |
| clf = LinearSVC(random_state=7) |
| X, y = make_blobs( |
| n_samples=500, n_features=100, random_state=seed, centers=10, cluster_std=15.0 |
| ) |
|
|
| |
| |
| |
| y[y > 2] = 2 |
| n_classes = np.unique(y).shape[0] |
| X_train, y_train = X[::2], y[::2] |
| X_test, y_test = X[1::2], y[1::2] |
|
|
| clf.fit(X_train, y_train) |
|
|
| cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble) |
| cal_clf.fit(X_train, y_train) |
| probas = cal_clf.predict_proba(X_test) |
| |
| assert_allclose(np.sum(probas, axis=1), np.ones(len(X_test))) |
|
|
| |
| |
| |
| assert 0.65 < clf.score(X_test, y_test) < 0.95 |
|
|
| |
| |
| assert cal_clf.score(X_test, y_test) > 0.95 * clf.score(X_test, y_test) |
|
|
| |
| |
| |
| uncalibrated_brier = multiclass_brier( |
| y_test, softmax(clf.decision_function(X_test)), n_classes=n_classes |
| ) |
| calibrated_brier = multiclass_brier(y_test, probas, n_classes=n_classes) |
|
|
| assert calibrated_brier < 1.1 * uncalibrated_brier |
|
|
| |
| |
| clf = RandomForestClassifier(n_estimators=30, random_state=42) |
| clf.fit(X_train, y_train) |
| clf_probs = clf.predict_proba(X_test) |
| uncalibrated_brier = multiclass_brier(y_test, clf_probs, n_classes=n_classes) |
|
|
| cal_clf = CalibratedClassifierCV(clf, method=method, cv=5, ensemble=ensemble) |
| cal_clf.fit(X_train, y_train) |
| cal_clf_probs = cal_clf.predict_proba(X_test) |
| calibrated_brier = multiclass_brier(y_test, cal_clf_probs, n_classes=n_classes) |
| assert calibrated_brier < 1.1 * uncalibrated_brier |
|
|
|
|
| def test_calibration_zero_probability(): |
| |
| |
| |
| |
| class ZeroCalibrator: |
| |
| def predict(self, X): |
| return np.zeros(X.shape[0]) |
|
|
| X, y = make_blobs( |
| n_samples=50, n_features=10, random_state=7, centers=10, cluster_std=15.0 |
| ) |
| clf = DummyClassifier().fit(X, y) |
| calibrator = ZeroCalibrator() |
| cal_clf = _CalibratedClassifier( |
| estimator=clf, calibrators=[calibrator], classes=clf.classes_ |
| ) |
|
|
| probas = cal_clf.predict_proba(X) |
|
|
| |
| assert_allclose(probas, 1.0 / clf.n_classes_) |
|
|
|
|
| @ignore_warnings(category=FutureWarning) |
| @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) |
| def test_calibration_prefit(csr_container): |
| """Test calibration for prefitted classifiers""" |
| |
| n_samples = 50 |
| X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42) |
| sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) |
|
|
| X -= X.min() |
|
|
| |
| X_train, y_train, sw_train = X[:n_samples], y[:n_samples], sample_weight[:n_samples] |
| X_calib, y_calib, sw_calib = ( |
| X[n_samples : 2 * n_samples], |
| y[n_samples : 2 * n_samples], |
| sample_weight[n_samples : 2 * n_samples], |
| ) |
| X_test, y_test = X[2 * n_samples :], y[2 * n_samples :] |
|
|
| |
| clf = MultinomialNB() |
| |
| unfit_clf = CalibratedClassifierCV(clf, cv="prefit") |
| with pytest.raises(NotFittedError): |
| unfit_clf.fit(X_calib, y_calib) |
|
|
| clf.fit(X_train, y_train, sw_train) |
| prob_pos_clf = clf.predict_proba(X_test)[:, 1] |
|
|
| |
| for this_X_calib, this_X_test in [ |
| (X_calib, X_test), |
| (csr_container(X_calib), csr_container(X_test)), |
| ]: |
| for method in ["isotonic", "sigmoid"]: |
| cal_clf_prefit = CalibratedClassifierCV(clf, method=method, cv="prefit") |
| cal_clf_frozen = CalibratedClassifierCV(FrozenEstimator(clf), method=method) |
|
|
| for sw in [sw_calib, None]: |
| cal_clf_prefit.fit(this_X_calib, y_calib, sample_weight=sw) |
| cal_clf_frozen.fit(this_X_calib, y_calib, sample_weight=sw) |
|
|
| y_prob_prefit = cal_clf_prefit.predict_proba(this_X_test) |
| y_prob_frozen = cal_clf_frozen.predict_proba(this_X_test) |
| y_pred_prefit = cal_clf_prefit.predict(this_X_test) |
| y_pred_frozen = cal_clf_frozen.predict(this_X_test) |
| prob_pos_cal_clf_prefit = y_prob_prefit[:, 1] |
| prob_pos_cal_clf_frozen = y_prob_frozen[:, 1] |
| assert_array_equal(y_pred_prefit, y_pred_frozen) |
| assert_array_equal( |
| y_pred_prefit, np.array([0, 1])[np.argmax(y_prob_prefit, axis=1)] |
| ) |
| assert brier_score_loss(y_test, prob_pos_clf) > brier_score_loss( |
| y_test, prob_pos_cal_clf_frozen |
| ) |
|
|
|
|
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| def test_calibration_ensemble_false(data, method): |
| |
| |
| X, y = data |
| clf = LinearSVC(random_state=7) |
|
|
| cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False) |
| cal_clf.fit(X, y) |
| cal_probas = cal_clf.predict_proba(X) |
|
|
| |
| unbiased_preds = cross_val_predict(clf, X, y, cv=3, method="decision_function") |
| if method == "isotonic": |
| calibrator = IsotonicRegression(out_of_bounds="clip") |
| else: |
| calibrator = _SigmoidCalibration() |
| calibrator.fit(unbiased_preds, y) |
| |
| clf.fit(X, y) |
| clf_df = clf.decision_function(X) |
| manual_probas = calibrator.predict(clf_df) |
| assert_allclose(cal_probas[:, 1], manual_probas) |
|
|
|
|
| def test_sigmoid_calibration(): |
| """Test calibration values with Platt sigmoid model""" |
| exF = np.array([5, -4, 1.0]) |
| exY = np.array([1, -1, -1]) |
| |
| AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512]) |
| assert_array_almost_equal(AB_lin_libsvm, _sigmoid_calibration(exF, exY), 3) |
| lin_prob = 1.0 / (1.0 + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1])) |
| sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF) |
| assert_array_almost_equal(lin_prob, sk_prob, 6) |
|
|
| |
| |
| with pytest.raises(ValueError): |
| _SigmoidCalibration().fit(np.vstack((exF, exF)), exY) |
|
|
|
|
| def test_calibration_curve(): |
| """Check calibration_curve function""" |
| y_true = np.array([0, 0, 0, 1, 1, 1]) |
| y_pred = np.array([0.0, 0.1, 0.2, 0.8, 0.9, 1.0]) |
| prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) |
| assert len(prob_true) == len(prob_pred) |
| assert len(prob_true) == 2 |
| assert_almost_equal(prob_true, [0, 1]) |
| assert_almost_equal(prob_pred, [0.1, 0.9]) |
|
|
| |
| with pytest.raises(ValueError): |
| calibration_curve([1], [-0.1]) |
|
|
| |
| y_true2 = np.array([0, 0, 0, 0, 1, 1]) |
| y_pred2 = np.array([0.0, 0.1, 0.2, 0.5, 0.9, 1.0]) |
| prob_true_quantile, prob_pred_quantile = calibration_curve( |
| y_true2, y_pred2, n_bins=2, strategy="quantile" |
| ) |
|
|
| assert len(prob_true_quantile) == len(prob_pred_quantile) |
| assert len(prob_true_quantile) == 2 |
| assert_almost_equal(prob_true_quantile, [0, 2 / 3]) |
| assert_almost_equal(prob_pred_quantile, [0.1, 0.8]) |
|
|
| |
| with pytest.raises(ValueError): |
| calibration_curve(y_true2, y_pred2, strategy="percentile") |
|
|
|
|
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibration_nan_imputer(ensemble): |
| """Test that calibration can accept nan""" |
| X, y = make_classification( |
| n_samples=10, n_features=2, n_informative=2, n_redundant=0, random_state=42 |
| ) |
| X[0, 0] = np.nan |
| clf = Pipeline( |
| [("imputer", SimpleImputer()), ("rf", RandomForestClassifier(n_estimators=1))] |
| ) |
| clf_c = CalibratedClassifierCV(clf, cv=2, method="isotonic", ensemble=ensemble) |
| clf_c.fit(X, y) |
| clf_c.predict(X) |
|
|
|
|
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibration_prob_sum(ensemble): |
| |
| |
| X, _ = make_classification(n_samples=10, n_features=5, n_classes=2) |
| y = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] |
| clf = LinearSVC(C=1.0, random_state=7) |
| |
| clf_prob = CalibratedClassifierCV( |
| clf, method="sigmoid", cv=KFold(n_splits=3), ensemble=ensemble |
| ) |
| clf_prob.fit(X, y) |
| assert_allclose(clf_prob.predict_proba(X).sum(axis=1), 1.0) |
|
|
|
|
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibration_less_classes(ensemble): |
| |
| |
| |
| |
| X = np.random.randn(12, 5) |
| y = [0, 0, 0, 1] + [1, 1, 2, 2] + [2, 3, 3, 3] |
| clf = DecisionTreeClassifier(random_state=7) |
| cal_clf = CalibratedClassifierCV( |
| clf, method="sigmoid", cv=KFold(3), ensemble=ensemble |
| ) |
| cal_clf.fit(X, y) |
|
|
| if ensemble: |
| classes = np.arange(4) |
| for calib_i, class_i in zip([0, 2], [0, 3]): |
| proba = cal_clf.calibrated_classifiers_[calib_i].predict_proba(X) |
| |
| assert_array_equal(proba[:, class_i], np.zeros(len(y))) |
| |
| assert np.all(proba[:, classes != class_i] > 0) |
|
|
| |
| |
| else: |
| proba = cal_clf.calibrated_classifiers_[0].predict_proba(X) |
| assert_array_almost_equal(proba.sum(axis=1), np.ones(proba.shape[0])) |
|
|
|
|
| @pytest.mark.parametrize( |
| "X", |
| [ |
| np.random.RandomState(42).randn(15, 5, 2), |
| np.random.RandomState(42).randn(15, 5, 2, 6), |
| ], |
| ) |
| def test_calibration_accepts_ndarray(X): |
| """Test that calibration accepts n-dimensional arrays as input""" |
| y = [1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0] |
|
|
| class MockTensorClassifier(ClassifierMixin, BaseEstimator): |
| """A toy estimator that accepts tensor inputs""" |
|
|
| def fit(self, X, y): |
| self.classes_ = np.unique(y) |
| return self |
|
|
| def decision_function(self, X): |
| |
| return X.reshape(X.shape[0], -1).sum(axis=1) |
|
|
| calibrated_clf = CalibratedClassifierCV(MockTensorClassifier()) |
| |
| calibrated_clf.fit(X, y) |
|
|
|
|
| @pytest.fixture |
| def dict_data(): |
| dict_data = [ |
| {"state": "NY", "age": "adult"}, |
| {"state": "TX", "age": "adult"}, |
| {"state": "VT", "age": "child"}, |
| {"state": "CT", "age": "adult"}, |
| {"state": "BR", "age": "child"}, |
| ] |
| text_labels = [1, 0, 1, 1, 0] |
| return dict_data, text_labels |
|
|
|
|
| @pytest.fixture |
| def dict_data_pipeline(dict_data): |
| X, y = dict_data |
| pipeline_prefit = Pipeline( |
| [("vectorizer", DictVectorizer()), ("clf", RandomForestClassifier())] |
| ) |
| return pipeline_prefit.fit(X, y) |
|
|
|
|
| def test_calibration_dict_pipeline(dict_data, dict_data_pipeline): |
| """Test that calibration works in prefit pipeline with transformer |
| |
| `X` is not array-like, sparse matrix or dataframe at the start. |
| See https://github.com/scikit-learn/scikit-learn/issues/8710 |
| |
| Also test it can predict without running into validation errors. |
| See https://github.com/scikit-learn/scikit-learn/issues/19637 |
| """ |
| X, y = dict_data |
| clf = dict_data_pipeline |
| calib_clf = CalibratedClassifierCV(FrozenEstimator(clf), cv=2) |
| calib_clf.fit(X, y) |
| |
| assert_array_equal(calib_clf.classes_, clf.classes_) |
|
|
| |
| |
| assert not hasattr(clf, "n_features_in_") |
| assert not hasattr(calib_clf, "n_features_in_") |
|
|
| |
| calib_clf.predict(X) |
| calib_clf.predict_proba(X) |
|
|
|
|
| @pytest.mark.parametrize( |
| "clf, cv", |
| [ |
| pytest.param(LinearSVC(C=1), 2), |
| pytest.param(LinearSVC(C=1), "prefit"), |
| ], |
| ) |
| def test_calibration_attributes(clf, cv): |
| |
| X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7) |
| if cv == "prefit": |
| clf = clf.fit(X, y) |
| calib_clf = CalibratedClassifierCV(clf, cv=cv) |
| calib_clf.fit(X, y) |
|
|
| if cv == "prefit": |
| assert_array_equal(calib_clf.classes_, clf.classes_) |
| assert calib_clf.n_features_in_ == clf.n_features_in_ |
| else: |
| classes = LabelEncoder().fit(y).classes_ |
| assert_array_equal(calib_clf.classes_, classes) |
| assert calib_clf.n_features_in_ == X.shape[1] |
|
|
|
|
| def test_calibration_inconsistent_prefit_n_features_in(): |
| |
| |
| X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7) |
| clf = LinearSVC(C=1).fit(X, y) |
| calib_clf = CalibratedClassifierCV(FrozenEstimator(clf)) |
|
|
| msg = "X has 3 features, but LinearSVC is expecting 5 features as input." |
| with pytest.raises(ValueError, match=msg): |
| calib_clf.fit(X[:, :3], y) |
|
|
|
|
| def test_calibration_votingclassifier(): |
| |
| |
| |
| X, y = make_classification(n_samples=10, n_features=5, n_classes=2, random_state=7) |
| vote = VotingClassifier( |
| estimators=[("lr" + str(i), LogisticRegression()) for i in range(3)], |
| voting="soft", |
| ) |
| vote.fit(X, y) |
|
|
| calib_clf = CalibratedClassifierCV(estimator=FrozenEstimator(vote)) |
| |
| calib_clf.fit(X, y) |
|
|
|
|
| @pytest.fixture(scope="module") |
| def iris_data(): |
| return load_iris(return_X_y=True) |
|
|
|
|
| @pytest.fixture(scope="module") |
| def iris_data_binary(iris_data): |
| X, y = iris_data |
| return X[y < 2], y[y < 2] |
|
|
|
|
| @pytest.mark.parametrize("n_bins", [5, 10]) |
| @pytest.mark.parametrize("strategy", ["uniform", "quantile"]) |
| def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy): |
| |
| |
| |
| X, y = iris_data_binary |
|
|
| lr = LogisticRegression().fit(X, y) |
|
|
| viz = CalibrationDisplay.from_estimator( |
| lr, X, y, n_bins=n_bins, strategy=strategy, alpha=0.8 |
| ) |
|
|
| y_prob = lr.predict_proba(X)[:, 1] |
| prob_true, prob_pred = calibration_curve( |
| y, y_prob, n_bins=n_bins, strategy=strategy |
| ) |
|
|
| assert_allclose(viz.prob_true, prob_true) |
| assert_allclose(viz.prob_pred, prob_pred) |
| assert_allclose(viz.y_prob, y_prob) |
|
|
| assert viz.estimator_name == "LogisticRegression" |
|
|
| |
| import matplotlib as mpl |
|
|
| assert isinstance(viz.line_, mpl.lines.Line2D) |
| assert viz.line_.get_alpha() == 0.8 |
| assert isinstance(viz.ax_, mpl.axes.Axes) |
| assert isinstance(viz.figure_, mpl.figure.Figure) |
|
|
| assert viz.ax_.get_xlabel() == "Mean predicted probability (Positive class: 1)" |
| assert viz.ax_.get_ylabel() == "Fraction of positives (Positive class: 1)" |
|
|
| expected_legend_labels = ["LogisticRegression", "Perfectly calibrated"] |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| def test_plot_calibration_curve_pipeline(pyplot, iris_data_binary): |
| |
| X, y = iris_data_binary |
| clf = make_pipeline(StandardScaler(), LogisticRegression()) |
| clf.fit(X, y) |
| viz = CalibrationDisplay.from_estimator(clf, X, y) |
|
|
| expected_legend_labels = [viz.estimator_name, "Perfectly calibrated"] |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| @pytest.mark.parametrize( |
| "name, expected_label", [(None, "_line1"), ("my_est", "my_est")] |
| ) |
| def test_calibration_display_default_labels(pyplot, name, expected_label): |
| prob_true = np.array([0, 1, 1, 0]) |
| prob_pred = np.array([0.2, 0.8, 0.8, 0.4]) |
| y_prob = np.array([]) |
|
|
| viz = CalibrationDisplay(prob_true, prob_pred, y_prob, estimator_name=name) |
| viz.plot() |
|
|
| expected_legend_labels = [] if name is None else [name] |
| expected_legend_labels.append("Perfectly calibrated") |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| def test_calibration_display_label_class_plot(pyplot): |
| |
| |
| prob_true = np.array([0, 1, 1, 0]) |
| prob_pred = np.array([0.2, 0.8, 0.8, 0.4]) |
| y_prob = np.array([]) |
|
|
| name = "name one" |
| viz = CalibrationDisplay(prob_true, prob_pred, y_prob, estimator_name=name) |
| assert viz.estimator_name == name |
| name = "name two" |
| viz.plot(name=name) |
|
|
| expected_legend_labels = [name, "Perfectly calibrated"] |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"]) |
| def test_calibration_display_name_multiple_calls( |
| constructor_name, pyplot, iris_data_binary |
| ): |
| |
| |
| |
| |
| X, y = iris_data_binary |
| clf_name = "my hand-crafted name" |
| clf = LogisticRegression().fit(X, y) |
| y_prob = clf.predict_proba(X)[:, 1] |
|
|
| constructor = getattr(CalibrationDisplay, constructor_name) |
| params = (clf, X, y) if constructor_name == "from_estimator" else (y, y_prob) |
|
|
| viz = constructor(*params, name=clf_name) |
| assert viz.estimator_name == clf_name |
| pyplot.close("all") |
| viz.plot() |
|
|
| expected_legend_labels = [clf_name, "Perfectly calibrated"] |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
| pyplot.close("all") |
| clf_name = "another_name" |
| viz.plot(name=clf_name) |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| def test_calibration_display_ref_line(pyplot, iris_data_binary): |
| |
| X, y = iris_data_binary |
| lr = LogisticRegression().fit(X, y) |
| dt = DecisionTreeClassifier().fit(X, y) |
|
|
| viz = CalibrationDisplay.from_estimator(lr, X, y) |
| viz2 = CalibrationDisplay.from_estimator(dt, X, y, ax=viz.ax_) |
|
|
| labels = viz2.ax_.get_legend_handles_labels()[1] |
| assert labels.count("Perfectly calibrated") == 1 |
|
|
|
|
| @pytest.mark.parametrize("dtype_y_str", [str, object]) |
| def test_calibration_curve_pos_label_error_str(dtype_y_str): |
| """Check error message when a `pos_label` is not specified with `str` targets.""" |
| rng = np.random.RandomState(42) |
| y1 = np.array(["spam"] * 3 + ["eggs"] * 2, dtype=dtype_y_str) |
| y2 = rng.randint(0, 2, size=y1.size) |
|
|
| err_msg = ( |
| "y_true takes value in {'eggs', 'spam'} and pos_label is not " |
| "specified: either make y_true take value in {0, 1} or {-1, 1} or " |
| "pass pos_label explicitly" |
| ) |
| with pytest.raises(ValueError, match=err_msg): |
| calibration_curve(y1, y2) |
|
|
|
|
| @pytest.mark.parametrize("dtype_y_str", [str, object]) |
| def test_calibration_curve_pos_label(dtype_y_str): |
| """Check the behaviour when passing explicitly `pos_label`.""" |
| y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1]) |
| classes = np.array(["spam", "egg"], dtype=dtype_y_str) |
| y_true_str = classes[y_true] |
| y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.0]) |
|
|
| |
| prob_true, _ = calibration_curve(y_true, y_pred, n_bins=4) |
| assert_allclose(prob_true, [0, 0.5, 1, 1]) |
| |
| prob_true, _ = calibration_curve(y_true_str, y_pred, n_bins=4, pos_label="egg") |
| assert_allclose(prob_true, [0, 0.5, 1, 1]) |
|
|
| prob_true, _ = calibration_curve(y_true, 1 - y_pred, n_bins=4, pos_label=0) |
| assert_allclose(prob_true, [0, 0, 0.5, 1]) |
| prob_true, _ = calibration_curve(y_true_str, 1 - y_pred, n_bins=4, pos_label="spam") |
| assert_allclose(prob_true, [0, 0, 0.5, 1]) |
|
|
|
|
| @pytest.mark.parametrize( |
| "kwargs", |
| [ |
| {"c": "red", "lw": 2, "ls": "-."}, |
| {"color": "red", "linewidth": 2, "linestyle": "-."}, |
| ], |
| ) |
| def test_calibration_display_kwargs(pyplot, iris_data_binary, kwargs): |
| """Check that matplotlib aliases are handled.""" |
| X, y = iris_data_binary |
|
|
| lr = LogisticRegression().fit(X, y) |
| viz = CalibrationDisplay.from_estimator(lr, X, y, **kwargs) |
|
|
| assert viz.line_.get_color() == "red" |
| assert viz.line_.get_linewidth() == 2 |
| assert viz.line_.get_linestyle() == "-." |
|
|
|
|
| @pytest.mark.parametrize("pos_label, expected_pos_label", [(None, 1), (0, 0), (1, 1)]) |
| def test_calibration_display_pos_label( |
| pyplot, iris_data_binary, pos_label, expected_pos_label |
| ): |
| """Check the behaviour of `pos_label` in the `CalibrationDisplay`.""" |
| X, y = iris_data_binary |
|
|
| lr = LogisticRegression().fit(X, y) |
| viz = CalibrationDisplay.from_estimator(lr, X, y, pos_label=pos_label) |
|
|
| y_prob = lr.predict_proba(X)[:, expected_pos_label] |
| prob_true, prob_pred = calibration_curve(y, y_prob, pos_label=pos_label) |
|
|
| assert_allclose(viz.prob_true, prob_true) |
| assert_allclose(viz.prob_pred, prob_pred) |
| assert_allclose(viz.y_prob, y_prob) |
|
|
| assert ( |
| viz.ax_.get_xlabel() |
| == f"Mean predicted probability (Positive class: {expected_pos_label})" |
| ) |
| assert ( |
| viz.ax_.get_ylabel() |
| == f"Fraction of positives (Positive class: {expected_pos_label})" |
| ) |
|
|
| expected_legend_labels = [lr.__class__.__name__, "Perfectly calibrated"] |
| legend_labels = viz.ax_.get_legend().get_texts() |
| assert len(legend_labels) == len(expected_legend_labels) |
| for labels in legend_labels: |
| assert labels.get_text() in expected_legend_labels |
|
|
|
|
| @pytest.mark.parametrize("method", ["sigmoid", "isotonic"]) |
| @pytest.mark.parametrize("ensemble", [True, False]) |
| def test_calibrated_classifier_cv_double_sample_weights_equivalence(method, ensemble): |
| """Check that passing repeating twice the dataset `X` is equivalent to |
| passing a `sample_weight` with a factor 2.""" |
| X, y = load_iris(return_X_y=True) |
| |
| X = StandardScaler().fit_transform(X) |
| |
| X, y = X[:100], y[:100] |
| sample_weight = np.ones_like(y) * 2 |
|
|
| |
| |
| X_twice = np.zeros((X.shape[0] * 2, X.shape[1]), dtype=X.dtype) |
| X_twice[::2, :] = X |
| X_twice[1::2, :] = X |
| y_twice = np.zeros(y.shape[0] * 2, dtype=y.dtype) |
| y_twice[::2] = y |
| y_twice[1::2] = y |
|
|
| estimator = LogisticRegression() |
| calibrated_clf_without_weights = CalibratedClassifierCV( |
| estimator, |
| method=method, |
| ensemble=ensemble, |
| cv=2, |
| ) |
| calibrated_clf_with_weights = clone(calibrated_clf_without_weights) |
|
|
| calibrated_clf_with_weights.fit(X, y, sample_weight=sample_weight) |
| calibrated_clf_without_weights.fit(X_twice, y_twice) |
|
|
| |
| for est_with_weights, est_without_weights in zip( |
| calibrated_clf_with_weights.calibrated_classifiers_, |
| calibrated_clf_without_weights.calibrated_classifiers_, |
| ): |
| assert_allclose( |
| est_with_weights.estimator.coef_, |
| est_without_weights.estimator.coef_, |
| ) |
|
|
| |
| y_pred_with_weights = calibrated_clf_with_weights.predict_proba(X) |
| y_pred_without_weights = calibrated_clf_without_weights.predict_proba(X) |
|
|
| assert_allclose(y_pred_with_weights, y_pred_without_weights) |
|
|
|
|
| @pytest.mark.parametrize("fit_params_type", ["list", "array"]) |
| def test_calibration_with_fit_params(fit_params_type, data): |
| """Tests that fit_params are passed to the underlying base estimator. |
| |
| Non-regression test for: |
| https://github.com/scikit-learn/scikit-learn/issues/12384 |
| """ |
| X, y = data |
| fit_params = { |
| "a": _convert_container(y, fit_params_type), |
| "b": _convert_container(y, fit_params_type), |
| } |
|
|
| clf = CheckingClassifier(expected_fit_params=["a", "b"]) |
| pc_clf = CalibratedClassifierCV(clf) |
|
|
| pc_clf.fit(X, y, **fit_params) |
|
|
|
|
| @pytest.mark.parametrize( |
| "sample_weight", |
| [ |
| [1.0] * N_SAMPLES, |
| np.ones(N_SAMPLES), |
| ], |
| ) |
| def test_calibration_with_sample_weight_estimator(sample_weight, data): |
| """Tests that sample_weight is passed to the underlying base |
| estimator. |
| """ |
| X, y = data |
| clf = CheckingClassifier(expected_sample_weight=True) |
| pc_clf = CalibratedClassifierCV(clf) |
|
|
| pc_clf.fit(X, y, sample_weight=sample_weight) |
|
|
|
|
| def test_calibration_without_sample_weight_estimator(data): |
| """Check that even if the estimator doesn't support |
| sample_weight, fitting with sample_weight still works. |
| |
| There should be a warning, since the sample_weight is not passed |
| on to the estimator. |
| """ |
| X, y = data |
| sample_weight = np.ones_like(y) |
|
|
| class ClfWithoutSampleWeight(CheckingClassifier): |
| def fit(self, X, y, **fit_params): |
| assert "sample_weight" not in fit_params |
| return super().fit(X, y, **fit_params) |
|
|
| clf = ClfWithoutSampleWeight() |
| pc_clf = CalibratedClassifierCV(clf) |
|
|
| with pytest.warns(UserWarning): |
| pc_clf.fit(X, y, sample_weight=sample_weight) |
|
|
|
|
| def test_calibration_with_non_sample_aligned_fit_param(data): |
| """Check that CalibratedClassifierCV does not enforce sample alignment |
| for fit parameters.""" |
|
|
| class TestClassifier(LogisticRegression): |
| def fit(self, X, y, sample_weight=None, fit_param=None): |
| assert fit_param is not None |
| return super().fit(X, y, sample_weight=sample_weight) |
|
|
| CalibratedClassifierCV(estimator=TestClassifier()).fit( |
| *data, fit_param=np.ones(len(data[1]) + 1) |
| ) |
|
|
|
|
| def test_calibrated_classifier_cv_works_with_large_confidence_scores( |
| global_random_seed, |
| ): |
| """Test that :class:`CalibratedClassifierCV` works with large confidence |
| scores when using the `sigmoid` method, particularly with the |
| :class:`SGDClassifier`. |
| |
| Non-regression test for issue #26766. |
| """ |
| prob = 0.67 |
| n = 1000 |
| random_noise = np.random.default_rng(global_random_seed).normal(size=n) |
|
|
| y = np.array([1] * int(n * prob) + [0] * (n - int(n * prob))) |
| X = 1e5 * y.reshape((-1, 1)) + random_noise |
|
|
| |
| |
| cv = check_cv(cv=None, y=y, classifier=True) |
| indices = cv.split(X, y) |
| for train, test in indices: |
| X_train, y_train = X[train], y[train] |
| X_test = X[test] |
| sgd_clf = SGDClassifier(loss="squared_hinge", random_state=global_random_seed) |
| sgd_clf.fit(X_train, y_train) |
| predictions = sgd_clf.decision_function(X_test) |
| assert (predictions > 1e4).any() |
|
|
| |
| |
| |
| clf_sigmoid = CalibratedClassifierCV( |
| SGDClassifier(loss="squared_hinge", random_state=global_random_seed), |
| method="sigmoid", |
| ) |
| score_sigmoid = cross_val_score(clf_sigmoid, X, y, scoring="roc_auc") |
|
|
| |
| |
| clf_isotonic = CalibratedClassifierCV( |
| SGDClassifier(loss="squared_hinge", random_state=global_random_seed), |
| method="isotonic", |
| ) |
| score_isotonic = cross_val_score(clf_isotonic, X, y, scoring="roc_auc") |
|
|
| |
| |
| assert_allclose(score_sigmoid, score_isotonic) |
|
|
|
|
| def test_sigmoid_calibration_max_abs_prediction_threshold(global_random_seed): |
| random_state = np.random.RandomState(seed=global_random_seed) |
| n = 100 |
| y = random_state.randint(0, 2, size=n) |
|
|
| |
| |
| predictions_small = random_state.uniform(low=-2, high=2, size=100) |
|
|
| |
| |
| threshold_1 = 0.1 |
| a1, b1 = _sigmoid_calibration( |
| predictions=predictions_small, |
| y=y, |
| max_abs_prediction_threshold=threshold_1, |
| ) |
|
|
| |
| threshold_2 = 10 |
| a2, b2 = _sigmoid_calibration( |
| predictions=predictions_small, |
| y=y, |
| max_abs_prediction_threshold=threshold_2, |
| ) |
|
|
| |
| a3, b3 = _sigmoid_calibration( |
| predictions=predictions_small, |
| y=y, |
| ) |
|
|
| |
| |
| atol = 1e-6 |
| assert_allclose(a1, a2, atol=atol) |
| assert_allclose(a2, a3, atol=atol) |
| assert_allclose(b1, b2, atol=atol) |
| assert_allclose(b2, b3, atol=atol) |
|
|
|
|
| def test_float32_predict_proba(data): |
| """Check that CalibratedClassifierCV works with float32 predict proba. |
| |
| Non-regression test for gh-28245. |
| """ |
|
|
| class DummyClassifer32(DummyClassifier): |
| def predict_proba(self, X): |
| return super().predict_proba(X).astype(np.float32) |
|
|
| model = DummyClassifer32() |
| calibrator = CalibratedClassifierCV(model) |
| |
| calibrator.fit(*data) |
|
|
|
|
| def test_error_less_class_samples_than_folds(): |
| """Check that CalibratedClassifierCV works with string targets. |
| |
| non-regression test for issue #28841. |
| """ |
| X = np.random.normal(size=(20, 3)) |
| y = ["a"] * 10 + ["b"] * 10 |
|
|
| CalibratedClassifierCV(cv=3).fit(X, y) |
|
|