| """Weight Boosting. |
| |
| This module contains weight boosting estimators for both classification and |
| regression. |
| |
| The module structure is the following: |
| |
| - The `BaseWeightBoosting` base class implements a common ``fit`` method |
| for all the estimators in the module. Regression and classification |
| only differ from each other in the loss function that is optimized. |
| |
| - :class:`~sklearn.ensemble.AdaBoostClassifier` implements adaptive boosting |
| (AdaBoost-SAMME) for classification problems. |
| |
| - :class:`~sklearn.ensemble.AdaBoostRegressor` implements adaptive boosting |
| (AdaBoost.R2) for regression problems. |
| """ |
|
|
| |
| |
|
|
| import warnings |
| from abc import ABCMeta, abstractmethod |
| from numbers import Integral, Real |
|
|
| import numpy as np |
|
|
| from ..base import ( |
| ClassifierMixin, |
| RegressorMixin, |
| _fit_context, |
| is_classifier, |
| is_regressor, |
| ) |
| from ..metrics import accuracy_score, r2_score |
| from ..tree import DecisionTreeClassifier, DecisionTreeRegressor |
| from ..utils import _safe_indexing, check_random_state |
| from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions |
| from ..utils.extmath import softmax, stable_cumsum |
| from ..utils.metadata_routing import ( |
| _raise_for_unsupported_routing, |
| _RoutingNotSupportedMixin, |
| ) |
| from ..utils.validation import ( |
| _check_sample_weight, |
| _num_samples, |
| check_is_fitted, |
| has_fit_parameter, |
| validate_data, |
| ) |
| from ._base import BaseEnsemble |
|
|
| __all__ = [ |
| "AdaBoostClassifier", |
| "AdaBoostRegressor", |
| ] |
|
|
|
|
| class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta): |
| """Base class for AdaBoost estimators. |
| |
| Warning: This class should not be used directly. Use derived classes |
| instead. |
| """ |
|
|
| _parameter_constraints: dict = { |
| "estimator": [HasMethods(["fit", "predict"]), None], |
| "n_estimators": [Interval(Integral, 1, None, closed="left")], |
| "learning_rate": [Interval(Real, 0, None, closed="neither")], |
| "random_state": ["random_state"], |
| } |
|
|
| @abstractmethod |
| def __init__( |
| self, |
| estimator=None, |
| *, |
| n_estimators=50, |
| estimator_params=tuple(), |
| learning_rate=1.0, |
| random_state=None, |
| ): |
| super().__init__( |
| estimator=estimator, |
| n_estimators=n_estimators, |
| estimator_params=estimator_params, |
| ) |
|
|
| self.learning_rate = learning_rate |
| self.random_state = random_state |
|
|
| def _check_X(self, X): |
| |
| return validate_data( |
| self, |
| X, |
| accept_sparse=["csr", "csc"], |
| ensure_2d=True, |
| allow_nd=True, |
| dtype=None, |
| reset=False, |
| ) |
|
|
| @_fit_context( |
| |
| prefer_skip_nested_validation=False |
| ) |
| def fit(self, X, y, sample_weight=None): |
| """Build a boosted classifier/regressor from the training set (X, y). |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| y : array-like of shape (n_samples,) |
| The target values. |
| |
| sample_weight : array-like of shape (n_samples,), default=None |
| Sample weights. If None, the sample weights are initialized to |
| 1 / n_samples. |
| |
| Returns |
| ------- |
| self : object |
| Fitted estimator. |
| """ |
| _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight) |
| X, y = validate_data( |
| self, |
| X, |
| y, |
| accept_sparse=["csr", "csc"], |
| ensure_2d=True, |
| allow_nd=True, |
| dtype=None, |
| y_numeric=is_regressor(self), |
| ) |
|
|
| sample_weight = _check_sample_weight( |
| sample_weight, X, np.float64, copy=True, ensure_non_negative=True |
| ) |
| sample_weight /= sample_weight.sum() |
|
|
| |
| self._validate_estimator() |
|
|
| |
| self.estimators_ = [] |
| self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) |
| self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64) |
|
|
| |
| |
| random_state = check_random_state(self.random_state) |
| epsilon = np.finfo(sample_weight.dtype).eps |
|
|
| zero_weight_mask = sample_weight == 0.0 |
| for iboost in range(self.n_estimators): |
| |
| sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None) |
| |
| sample_weight[zero_weight_mask] = 0.0 |
|
|
| |
| sample_weight, estimator_weight, estimator_error = self._boost( |
| iboost, X, y, sample_weight, random_state |
| ) |
|
|
| |
| if sample_weight is None: |
| break |
| self.estimator_weights_[iboost] = estimator_weight |
| self.estimator_errors_[iboost] = estimator_error |
|
|
| |
| if estimator_error == 0: |
| break |
|
|
| sample_weight_sum = np.sum(sample_weight) |
|
|
| if not np.isfinite(sample_weight_sum): |
| warnings.warn( |
| ( |
| "Sample weights have reached infinite values," |
| f" at iteration {iboost}, causing overflow. " |
| "Iterations stopped. Try lowering the learning rate." |
| ), |
| stacklevel=2, |
| ) |
| break |
|
|
| |
| if sample_weight_sum <= 0: |
| break |
|
|
| if iboost < self.n_estimators - 1: |
| |
| sample_weight /= sample_weight_sum |
|
|
| return self |
|
|
| @abstractmethod |
| def _boost(self, iboost, X, y, sample_weight, random_state): |
| """Implement a single boost. |
| |
| Warning: This method needs to be overridden by subclasses. |
| |
| Parameters |
| ---------- |
| iboost : int |
| The index of the current boost iteration. |
| |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| y : array-like of shape (n_samples,) |
| The target values (class labels). |
| |
| sample_weight : array-like of shape (n_samples,) |
| The current sample weights. |
| |
| random_state : RandomState |
| The current random number generator |
| |
| Returns |
| ------- |
| sample_weight : array-like of shape (n_samples,) or None |
| The reweighted sample weights. |
| If None then boosting has terminated early. |
| |
| estimator_weight : float |
| The weight for the current boost. |
| If None then boosting has terminated early. |
| |
| error : float |
| The classification error for the current boost. |
| If None then boosting has terminated early. |
| """ |
| pass |
|
|
| def staged_score(self, X, y, sample_weight=None): |
| """Return staged scores for X, y. |
| |
| This generator method yields the ensemble score after each iteration of |
| boosting and therefore allows monitoring, such as to determine the |
| score on a test set after each boost. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| y : array-like of shape (n_samples,) |
| Labels for X. |
| |
| sample_weight : array-like of shape (n_samples,), default=None |
| Sample weights. |
| |
| Yields |
| ------ |
| z : float |
| """ |
| X = self._check_X(X) |
|
|
| for y_pred in self.staged_predict(X): |
| if is_classifier(self): |
| yield accuracy_score(y, y_pred, sample_weight=sample_weight) |
| else: |
| yield r2_score(y, y_pred, sample_weight=sample_weight) |
|
|
| @property |
| def feature_importances_(self): |
| """The impurity-based feature importances. |
| |
| The higher, the more important the feature. |
| The importance of a feature is computed as the (normalized) |
| total reduction of the criterion brought by that feature. It is also |
| known as the Gini importance. |
| |
| Warning: impurity-based feature importances can be misleading for |
| high cardinality features (many unique values). See |
| :func:`sklearn.inspection.permutation_importance` as an alternative. |
| |
| Returns |
| ------- |
| feature_importances_ : ndarray of shape (n_features,) |
| The feature importances. |
| """ |
| if self.estimators_ is None or len(self.estimators_) == 0: |
| raise ValueError( |
| "Estimator not fitted, call `fit` before `feature_importances_`." |
| ) |
|
|
| try: |
| norm = self.estimator_weights_.sum() |
| return ( |
| sum( |
| weight * clf.feature_importances_ |
| for weight, clf in zip(self.estimator_weights_, self.estimators_) |
| ) |
| / norm |
| ) |
|
|
| except AttributeError as e: |
| raise AttributeError( |
| "Unable to compute feature importances " |
| "since estimator does not have a " |
| "feature_importances_ attribute" |
| ) from e |
|
|
| def __sklearn_tags__(self): |
| tags = super().__sklearn_tags__() |
| tags.input_tags.sparse = True |
| return tags |
|
|
|
|
| def _samme_proba(estimator, n_classes, X): |
| """Calculate algorithm 4, step 2, equation c) of Zhu et al [1]. |
| |
| References |
| ---------- |
| .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009. |
| |
| """ |
| proba = estimator.predict_proba(X) |
|
|
| |
| |
| |
| np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba) |
| log_proba = np.log(proba) |
|
|
| return (n_classes - 1) * ( |
| log_proba - (1.0 / n_classes) * log_proba.sum(axis=1)[:, np.newaxis] |
| ) |
|
|
|
|
| class AdaBoostClassifier( |
| _RoutingNotSupportedMixin, ClassifierMixin, BaseWeightBoosting |
| ): |
| """An AdaBoost classifier. |
| |
| An AdaBoost [1]_ classifier is a meta-estimator that begins by fitting a |
| classifier on the original dataset and then fits additional copies of the |
| classifier on the same dataset but where the weights of incorrectly |
| classified instances are adjusted such that subsequent classifiers focus |
| more on difficult cases. |
| |
| This class implements the algorithm based on [2]_. |
| |
| Read more in the :ref:`User Guide <adaboost>`. |
| |
| .. versionadded:: 0.14 |
| |
| Parameters |
| ---------- |
| estimator : object, default=None |
| The base estimator from which the boosted ensemble is built. |
| Support for sample weighting is required, as well as proper |
| ``classes_`` and ``n_classes_`` attributes. If ``None``, then |
| the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier` |
| initialized with `max_depth=1`. |
| |
| .. versionadded:: 1.2 |
| `base_estimator` was renamed to `estimator`. |
| |
| n_estimators : int, default=50 |
| The maximum number of estimators at which boosting is terminated. |
| In case of perfect fit, the learning procedure is stopped early. |
| Values must be in the range `[1, inf)`. |
| |
| learning_rate : float, default=1.0 |
| Weight applied to each classifier at each boosting iteration. A higher |
| learning rate increases the contribution of each classifier. There is |
| a trade-off between the `learning_rate` and `n_estimators` parameters. |
| Values must be in the range `(0.0, inf)`. |
| |
| algorithm : {'SAMME'}, default='SAMME' |
| Use the SAMME discrete boosting algorithm. |
| |
| .. deprecated:: 1.6 |
| `algorithm` is deprecated and will be removed in version 1.8. This |
| estimator only implements the 'SAMME' algorithm. |
| |
| random_state : int, RandomState instance or None, default=None |
| Controls the random seed given at each `estimator` at each |
| boosting iteration. |
| Thus, it is only used when `estimator` exposes a `random_state`. |
| Pass an int for reproducible output across multiple function calls. |
| See :term:`Glossary <random_state>`. |
| |
| Attributes |
| ---------- |
| estimator_ : estimator |
| The base estimator from which the ensemble is grown. |
| |
| .. versionadded:: 1.2 |
| `base_estimator_` was renamed to `estimator_`. |
| |
| estimators_ : list of classifiers |
| The collection of fitted sub-estimators. |
| |
| classes_ : ndarray of shape (n_classes,) |
| The classes labels. |
| |
| n_classes_ : int |
| The number of classes. |
| |
| estimator_weights_ : ndarray of floats |
| Weights for each estimator in the boosted ensemble. |
| |
| estimator_errors_ : ndarray of floats |
| Classification error for each estimator in the boosted |
| ensemble. |
| |
| feature_importances_ : ndarray of shape (n_features,) |
| The impurity-based feature importances if supported by the |
| ``estimator`` (when based on decision trees). |
| |
| Warning: impurity-based feature importances can be misleading for |
| high cardinality features (many unique values). See |
| :func:`sklearn.inspection.permutation_importance` as an alternative. |
| |
| n_features_in_ : int |
| Number of features seen during :term:`fit`. |
| |
| .. versionadded:: 0.24 |
| |
| feature_names_in_ : ndarray of shape (`n_features_in_`,) |
| Names of features seen during :term:`fit`. Defined only when `X` |
| has feature names that are all strings. |
| |
| .. versionadded:: 1.0 |
| |
| See Also |
| -------- |
| AdaBoostRegressor : An AdaBoost regressor that begins by fitting a |
| regressor on the original dataset and then fits additional copies of |
| the regressor on the same dataset but where the weights of instances |
| are adjusted according to the error of the current prediction. |
| |
| GradientBoostingClassifier : GB builds an additive model in a forward |
| stage-wise fashion. Regression trees are fit on the negative gradient |
| of the binomial or multinomial deviance loss function. Binary |
| classification is a special case where only a single regression tree is |
| induced. |
| |
| sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning |
| method used for classification. |
| Creates a model that predicts the value of a target variable by |
| learning simple decision rules inferred from the data features. |
| |
| References |
| ---------- |
| .. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of |
| on-Line Learning and an Application to Boosting", 1995. |
| |
| .. [2] :doi:`J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class adaboost." |
| Statistics and its Interface 2.3 (2009): 349-360. |
| <10.4310/SII.2009.v2.n3.a8>` |
| |
| Examples |
| -------- |
| >>> from sklearn.ensemble import AdaBoostClassifier |
| >>> from sklearn.datasets import make_classification |
| >>> X, y = make_classification(n_samples=1000, n_features=4, |
| ... n_informative=2, n_redundant=0, |
| ... random_state=0, shuffle=False) |
| >>> clf = AdaBoostClassifier(n_estimators=100, random_state=0) |
| >>> clf.fit(X, y) |
| AdaBoostClassifier(n_estimators=100, random_state=0) |
| >>> clf.predict([[0, 0, 0, 0]]) |
| array([1]) |
| >>> clf.score(X, y) |
| 0.96... |
| |
| For a detailed example of using AdaBoost to fit a sequence of DecisionTrees |
| as weaklearners, please refer to |
| :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. |
| |
| For a detailed example of using AdaBoost to fit a non-linearly seperable |
| classification dataset composed of two Gaussian quantiles clusters, please |
| refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py`. |
| """ |
|
|
| |
| _parameter_constraints: dict = { |
| **BaseWeightBoosting._parameter_constraints, |
| "algorithm": [StrOptions({"SAMME"}), Hidden(StrOptions({"deprecated"}))], |
| } |
|
|
| def __init__( |
| self, |
| estimator=None, |
| *, |
| n_estimators=50, |
| learning_rate=1.0, |
| algorithm="deprecated", |
| random_state=None, |
| ): |
| super().__init__( |
| estimator=estimator, |
| n_estimators=n_estimators, |
| learning_rate=learning_rate, |
| random_state=random_state, |
| ) |
|
|
| self.algorithm = algorithm |
|
|
| def _validate_estimator(self): |
| """Check the estimator and set the estimator_ attribute.""" |
| super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1)) |
|
|
| if self.algorithm != "deprecated": |
| warnings.warn( |
| "The parameter 'algorithm' is deprecated in 1.6 and has no effect. " |
| "It will be removed in version 1.8.", |
| FutureWarning, |
| ) |
|
|
| if not has_fit_parameter(self.estimator_, "sample_weight"): |
| raise ValueError( |
| f"{self.estimator.__class__.__name__} doesn't support sample_weight." |
| ) |
|
|
| def _boost(self, iboost, X, y, sample_weight, random_state): |
| """Implement a single boost. |
| |
| Perform a single boost according to the discrete SAMME algorithm and return the |
| updated sample weights. |
| |
| Parameters |
| ---------- |
| iboost : int |
| The index of the current boost iteration. |
| |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. |
| |
| y : array-like of shape (n_samples,) |
| The target values (class labels). |
| |
| sample_weight : array-like of shape (n_samples,) |
| The current sample weights. |
| |
| random_state : RandomState instance |
| The RandomState instance used if the base estimator accepts a |
| `random_state` attribute. |
| |
| Returns |
| ------- |
| sample_weight : array-like of shape (n_samples,) or None |
| The reweighted sample weights. |
| If None then boosting has terminated early. |
| |
| estimator_weight : float |
| The weight for the current boost. |
| If None then boosting has terminated early. |
| |
| estimator_error : float |
| The classification error for the current boost. |
| If None then boosting has terminated early. |
| """ |
| estimator = self._make_estimator(random_state=random_state) |
|
|
| estimator.fit(X, y, sample_weight=sample_weight) |
|
|
| y_predict = estimator.predict(X) |
|
|
| if iboost == 0: |
| self.classes_ = getattr(estimator, "classes_", None) |
| self.n_classes_ = len(self.classes_) |
|
|
| |
| incorrect = y_predict != y |
|
|
| |
| estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0)) |
|
|
| |
| if estimator_error <= 0: |
| return sample_weight, 1.0, 0.0 |
|
|
| n_classes = self.n_classes_ |
|
|
| |
| if estimator_error >= 1.0 - (1.0 / n_classes): |
| self.estimators_.pop(-1) |
| if len(self.estimators_) == 0: |
| raise ValueError( |
| "BaseClassifier in AdaBoostClassifier " |
| "ensemble is worse than random, ensemble " |
| "can not be fit." |
| ) |
| return None, None, None |
|
|
| |
| estimator_weight = self.learning_rate * ( |
| np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0) |
| ) |
|
|
| |
| if not iboost == self.n_estimators - 1: |
| |
| sample_weight = np.exp( |
| np.log(sample_weight) |
| + estimator_weight * incorrect * (sample_weight > 0) |
| ) |
|
|
| return sample_weight, estimator_weight, estimator_error |
|
|
| def predict(self, X): |
| """Predict classes for X. |
| |
| The predicted class of an input sample is computed as the weighted mean |
| prediction of the classifiers in the ensemble. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Returns |
| ------- |
| y : ndarray of shape (n_samples,) |
| The predicted classes. |
| """ |
| pred = self.decision_function(X) |
|
|
| if self.n_classes_ == 2: |
| return self.classes_.take(pred > 0, axis=0) |
|
|
| return self.classes_.take(np.argmax(pred, axis=1), axis=0) |
|
|
| def staged_predict(self, X): |
| """Return staged predictions for X. |
| |
| The predicted class of an input sample is computed as the weighted mean |
| prediction of the classifiers in the ensemble. |
| |
| This generator method yields the ensemble prediction after each |
| iteration of boosting and therefore allows monitoring, such as to |
| determine the prediction on a test set after each boost. |
| |
| Parameters |
| ---------- |
| X : array-like of shape (n_samples, n_features) |
| The input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Yields |
| ------ |
| y : generator of ndarray of shape (n_samples,) |
| The predicted classes. |
| """ |
| X = self._check_X(X) |
|
|
| n_classes = self.n_classes_ |
| classes = self.classes_ |
|
|
| if n_classes == 2: |
| for pred in self.staged_decision_function(X): |
| yield np.array(classes.take(pred > 0, axis=0)) |
|
|
| else: |
| for pred in self.staged_decision_function(X): |
| yield np.array(classes.take(np.argmax(pred, axis=1), axis=0)) |
|
|
| def decision_function(self, X): |
| """Compute the decision function of ``X``. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Returns |
| ------- |
| score : ndarray of shape of (n_samples, k) |
| The decision function of the input samples. The order of |
| outputs is the same as that of the :term:`classes_` attribute. |
| Binary classification is a special cases with ``k == 1``, |
| otherwise ``k==n_classes``. For binary classification, |
| values closer to -1 or 1 mean more like the first or second |
| class in ``classes_``, respectively. |
| """ |
| check_is_fitted(self) |
| X = self._check_X(X) |
|
|
| n_classes = self.n_classes_ |
| classes = self.classes_[:, np.newaxis] |
|
|
| if n_classes == 1: |
| return np.zeros_like(X, shape=(X.shape[0], 1)) |
|
|
| pred = sum( |
| np.where( |
| (estimator.predict(X) == classes).T, |
| w, |
| -1 / (n_classes - 1) * w, |
| ) |
| for estimator, w in zip(self.estimators_, self.estimator_weights_) |
| ) |
|
|
| pred /= self.estimator_weights_.sum() |
| if n_classes == 2: |
| pred[:, 0] *= -1 |
| return pred.sum(axis=1) |
| return pred |
|
|
| def staged_decision_function(self, X): |
| """Compute decision function of ``X`` for each boosting iteration. |
| |
| This method allows monitoring (i.e. determine error on testing set) |
| after each boosting iteration. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Yields |
| ------ |
| score : generator of ndarray of shape (n_samples, k) |
| The decision function of the input samples. The order of |
| outputs is the same of that of the :term:`classes_` attribute. |
| Binary classification is a special cases with ``k == 1``, |
| otherwise ``k==n_classes``. For binary classification, |
| values closer to -1 or 1 mean more like the first or second |
| class in ``classes_``, respectively. |
| """ |
| check_is_fitted(self) |
| X = self._check_X(X) |
|
|
| n_classes = self.n_classes_ |
| classes = self.classes_[:, np.newaxis] |
| pred = None |
| norm = 0.0 |
|
|
| for weight, estimator in zip(self.estimator_weights_, self.estimators_): |
| norm += weight |
|
|
| current_pred = np.where( |
| (estimator.predict(X) == classes).T, |
| weight, |
| -1 / (n_classes - 1) * weight, |
| ) |
|
|
| if pred is None: |
| pred = current_pred |
| else: |
| pred += current_pred |
|
|
| if n_classes == 2: |
| tmp_pred = np.copy(pred) |
| tmp_pred[:, 0] *= -1 |
| yield (tmp_pred / norm).sum(axis=1) |
| else: |
| yield pred / norm |
|
|
| @staticmethod |
| def _compute_proba_from_decision(decision, n_classes): |
| """Compute probabilities from the decision function. |
| |
| This is based eq. (15) of [1] where: |
| p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X))) |
| = softmax((1 / K-1) * f(X)) |
| |
| References |
| ---------- |
| .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", |
| 2009. |
| """ |
| if n_classes == 2: |
| decision = np.vstack([-decision, decision]).T / 2 |
| else: |
| decision /= n_classes - 1 |
| return softmax(decision, copy=False) |
|
|
| def predict_proba(self, X): |
| """Predict class probabilities for X. |
| |
| The predicted class probabilities of an input sample is computed as |
| the weighted mean predicted class probabilities of the classifiers |
| in the ensemble. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Returns |
| ------- |
| p : ndarray of shape (n_samples, n_classes) |
| The class probabilities of the input samples. The order of |
| outputs is the same of that of the :term:`classes_` attribute. |
| """ |
| check_is_fitted(self) |
| n_classes = self.n_classes_ |
|
|
| if n_classes == 1: |
| return np.ones((_num_samples(X), 1)) |
|
|
| decision = self.decision_function(X) |
| return self._compute_proba_from_decision(decision, n_classes) |
|
|
| def staged_predict_proba(self, X): |
| """Predict class probabilities for X. |
| |
| The predicted class probabilities of an input sample is computed as |
| the weighted mean predicted class probabilities of the classifiers |
| in the ensemble. |
| |
| This generator method yields the ensemble predicted class probabilities |
| after each iteration of boosting and therefore allows monitoring, such |
| as to determine the predicted class probabilities on a test set after |
| each boost. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Yields |
| ------ |
| p : generator of ndarray of shape (n_samples,) |
| The class probabilities of the input samples. The order of |
| outputs is the same of that of the :term:`classes_` attribute. |
| """ |
|
|
| n_classes = self.n_classes_ |
|
|
| for decision in self.staged_decision_function(X): |
| yield self._compute_proba_from_decision(decision, n_classes) |
|
|
| def predict_log_proba(self, X): |
| """Predict class log-probabilities for X. |
| |
| The predicted class log-probabilities of an input sample is computed as |
| the weighted mean predicted class log-probabilities of the classifiers |
| in the ensemble. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Returns |
| ------- |
| p : ndarray of shape (n_samples, n_classes) |
| The class probabilities of the input samples. The order of |
| outputs is the same of that of the :term:`classes_` attribute. |
| """ |
| return np.log(self.predict_proba(X)) |
|
|
|
|
| class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoosting): |
| """An AdaBoost regressor. |
| |
| An AdaBoost [1] regressor is a meta-estimator that begins by fitting a |
| regressor on the original dataset and then fits additional copies of the |
| regressor on the same dataset but where the weights of instances are |
| adjusted according to the error of the current prediction. As such, |
| subsequent regressors focus more on difficult cases. |
| |
| This class implements the algorithm known as AdaBoost.R2 [2]. |
| |
| Read more in the :ref:`User Guide <adaboost>`. |
| |
| .. versionadded:: 0.14 |
| |
| Parameters |
| ---------- |
| estimator : object, default=None |
| The base estimator from which the boosted ensemble is built. |
| If ``None``, then the base estimator is |
| :class:`~sklearn.tree.DecisionTreeRegressor` initialized with |
| `max_depth=3`. |
| |
| .. versionadded:: 1.2 |
| `base_estimator` was renamed to `estimator`. |
| |
| n_estimators : int, default=50 |
| The maximum number of estimators at which boosting is terminated. |
| In case of perfect fit, the learning procedure is stopped early. |
| Values must be in the range `[1, inf)`. |
| |
| learning_rate : float, default=1.0 |
| Weight applied to each regressor at each boosting iteration. A higher |
| learning rate increases the contribution of each regressor. There is |
| a trade-off between the `learning_rate` and `n_estimators` parameters. |
| Values must be in the range `(0.0, inf)`. |
| |
| loss : {'linear', 'square', 'exponential'}, default='linear' |
| The loss function to use when updating the weights after each |
| boosting iteration. |
| |
| random_state : int, RandomState instance or None, default=None |
| Controls the random seed given at each `estimator` at each |
| boosting iteration. |
| Thus, it is only used when `estimator` exposes a `random_state`. |
| In addition, it controls the bootstrap of the weights used to train the |
| `estimator` at each boosting iteration. |
| Pass an int for reproducible output across multiple function calls. |
| See :term:`Glossary <random_state>`. |
| |
| Attributes |
| ---------- |
| estimator_ : estimator |
| The base estimator from which the ensemble is grown. |
| |
| .. versionadded:: 1.2 |
| `base_estimator_` was renamed to `estimator_`. |
| |
| estimators_ : list of regressors |
| The collection of fitted sub-estimators. |
| |
| estimator_weights_ : ndarray of floats |
| Weights for each estimator in the boosted ensemble. |
| |
| estimator_errors_ : ndarray of floats |
| Regression error for each estimator in the boosted ensemble. |
| |
| feature_importances_ : ndarray of shape (n_features,) |
| The impurity-based feature importances if supported by the |
| ``estimator`` (when based on decision trees). |
| |
| Warning: impurity-based feature importances can be misleading for |
| high cardinality features (many unique values). See |
| :func:`sklearn.inspection.permutation_importance` as an alternative. |
| |
| n_features_in_ : int |
| Number of features seen during :term:`fit`. |
| |
| .. versionadded:: 0.24 |
| |
| feature_names_in_ : ndarray of shape (`n_features_in_`,) |
| Names of features seen during :term:`fit`. Defined only when `X` |
| has feature names that are all strings. |
| |
| .. versionadded:: 1.0 |
| |
| See Also |
| -------- |
| AdaBoostClassifier : An AdaBoost classifier. |
| GradientBoostingRegressor : Gradient Boosting Classification Tree. |
| sklearn.tree.DecisionTreeRegressor : A decision tree regressor. |
| |
| References |
| ---------- |
| .. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of |
| on-Line Learning and an Application to Boosting", 1995. |
| |
| .. [2] H. Drucker, "Improving Regressors using Boosting Techniques", 1997. |
| |
| Examples |
| -------- |
| >>> from sklearn.ensemble import AdaBoostRegressor |
| >>> from sklearn.datasets import make_regression |
| >>> X, y = make_regression(n_features=4, n_informative=2, |
| ... random_state=0, shuffle=False) |
| >>> regr = AdaBoostRegressor(random_state=0, n_estimators=100) |
| >>> regr.fit(X, y) |
| AdaBoostRegressor(n_estimators=100, random_state=0) |
| >>> regr.predict([[0, 0, 0, 0]]) |
| array([4.7972...]) |
| >>> regr.score(X, y) |
| 0.9771... |
| |
| For a detailed example of utilizing :class:`~sklearn.ensemble.AdaBoostRegressor` |
| to fit a sequence of decision trees as weak learners, please refer to |
| :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`. |
| """ |
|
|
| _parameter_constraints: dict = { |
| **BaseWeightBoosting._parameter_constraints, |
| "loss": [StrOptions({"linear", "square", "exponential"})], |
| } |
|
|
| def __init__( |
| self, |
| estimator=None, |
| *, |
| n_estimators=50, |
| learning_rate=1.0, |
| loss="linear", |
| random_state=None, |
| ): |
| super().__init__( |
| estimator=estimator, |
| n_estimators=n_estimators, |
| learning_rate=learning_rate, |
| random_state=random_state, |
| ) |
|
|
| self.loss = loss |
| self.random_state = random_state |
|
|
| def _validate_estimator(self): |
| """Check the estimator and set the estimator_ attribute.""" |
| super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3)) |
|
|
| def _boost(self, iboost, X, y, sample_weight, random_state): |
| """Implement a single boost for regression |
| |
| Perform a single boost according to the AdaBoost.R2 algorithm and |
| return the updated sample weights. |
| |
| Parameters |
| ---------- |
| iboost : int |
| The index of the current boost iteration. |
| |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. |
| |
| y : array-like of shape (n_samples,) |
| The target values (class labels in classification, real numbers in |
| regression). |
| |
| sample_weight : array-like of shape (n_samples,) |
| The current sample weights. |
| |
| random_state : RandomState |
| The RandomState instance used if the base estimator accepts a |
| `random_state` attribute. |
| Controls also the bootstrap of the weights used to train the weak |
| learner. |
| |
| Returns |
| ------- |
| sample_weight : array-like of shape (n_samples,) or None |
| The reweighted sample weights. |
| If None then boosting has terminated early. |
| |
| estimator_weight : float |
| The weight for the current boost. |
| If None then boosting has terminated early. |
| |
| estimator_error : float |
| The regression error for the current boost. |
| If None then boosting has terminated early. |
| """ |
| estimator = self._make_estimator(random_state=random_state) |
|
|
| |
| bootstrap_idx = random_state.choice( |
| np.arange(_num_samples(X)), |
| size=_num_samples(X), |
| replace=True, |
| p=sample_weight, |
| ) |
|
|
| |
| |
| X_ = _safe_indexing(X, bootstrap_idx) |
| y_ = _safe_indexing(y, bootstrap_idx) |
| estimator.fit(X_, y_) |
| y_predict = estimator.predict(X) |
|
|
| error_vect = np.abs(y_predict - y) |
| sample_mask = sample_weight > 0 |
| masked_sample_weight = sample_weight[sample_mask] |
| masked_error_vector = error_vect[sample_mask] |
|
|
| error_max = masked_error_vector.max() |
| if error_max != 0: |
| masked_error_vector /= error_max |
|
|
| if self.loss == "square": |
| masked_error_vector **= 2 |
| elif self.loss == "exponential": |
| masked_error_vector = 1.0 - np.exp(-masked_error_vector) |
|
|
| |
| estimator_error = (masked_sample_weight * masked_error_vector).sum() |
|
|
| if estimator_error <= 0: |
| |
| return sample_weight, 1.0, 0.0 |
|
|
| elif estimator_error >= 0.5: |
| |
| if len(self.estimators_) > 1: |
| self.estimators_.pop(-1) |
| return None, None, None |
|
|
| beta = estimator_error / (1.0 - estimator_error) |
|
|
| |
| estimator_weight = self.learning_rate * np.log(1.0 / beta) |
|
|
| if not iboost == self.n_estimators - 1: |
| sample_weight[sample_mask] *= np.power( |
| beta, (1.0 - masked_error_vector) * self.learning_rate |
| ) |
|
|
| return sample_weight, estimator_weight, estimator_error |
|
|
| def _get_median_predict(self, X, limit): |
| |
| predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T |
|
|
| |
| sorted_idx = np.argsort(predictions, axis=1) |
|
|
| |
| weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1) |
| median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis] |
| median_idx = median_or_above.argmax(axis=1) |
|
|
| median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx] |
|
|
| |
| return predictions[np.arange(_num_samples(X)), median_estimators] |
|
|
| def predict(self, X): |
| """Predict regression value for X. |
| |
| The predicted regression value of an input sample is computed |
| as the weighted median prediction of the regressors in the ensemble. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. Sparse matrix can be CSC, CSR, COO, |
| DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| |
| Returns |
| ------- |
| y : ndarray of shape (n_samples,) |
| The predicted regression values. |
| """ |
| check_is_fitted(self) |
| X = self._check_X(X) |
|
|
| return self._get_median_predict(X, len(self.estimators_)) |
|
|
| def staged_predict(self, X): |
| """Return staged predictions for X. |
| |
| The predicted regression value of an input sample is computed |
| as the weighted median prediction of the regressors in the ensemble. |
| |
| This generator method yields the ensemble prediction after each |
| iteration of boosting and therefore allows monitoring, such as to |
| determine the prediction on a test set after each boost. |
| |
| Parameters |
| ---------- |
| X : {array-like, sparse matrix} of shape (n_samples, n_features) |
| The training input samples. |
| |
| Yields |
| ------ |
| y : generator of ndarray of shape (n_samples,) |
| The predicted regression values. |
| """ |
| check_is_fitted(self) |
| X = self._check_X(X) |
|
|
| for i, _ in enumerate(self.estimators_, 1): |
| yield self._get_median_predict(X, limit=i) |
|
|