Spaces:
Runtime error
Runtime error
| """ | |
| The code for GridSearchReduction wraps the source class | |
| fairlearn.reductions.GridSearch | |
| available in the https://github.com/fairlearn/fairlearn library | |
| licensed under the MIT Licencse, Copyright Microsoft Corporation | |
| """ | |
| try: | |
| import fairlearn.reductions as red | |
| except ImportError as error: | |
| from logging import warning | |
| warning("{}: GridSearchReduction will be unavailable. To install, run:\n" | |
| "pip install 'aif360[Reductions]'".format(error)) | |
| from sklearn.base import BaseEstimator, ClassifierMixin, clone | |
| from sklearn.preprocessing import LabelEncoder | |
| class GridSearchReduction(BaseEstimator, ClassifierMixin): | |
| """Grid search reduction for fair classification or regression. | |
| Grid search is an in-processing technique that can be used for fair | |
| classification or fair regression. For classification it reduces fair | |
| classification to a sequence of cost-sensitive classification problems, | |
| returning the deterministic classifier with the lowest empirical error | |
| subject to fair classification constraints [#agarwal18]_ among the | |
| candidates searched. For regression it uses the same priniciple to return a | |
| deterministic regressor with the lowest empirical error subject to the | |
| constraint of bounded group loss [#agarwal19]_. | |
| References: | |
| .. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and | |
| H. Wallach, "A Reductions Approach to Fair Classification," | |
| International Conference on Machine Learning, 2018. | |
| <https://arxiv.org/abs/1803.02453>`_ | |
| .. [#agarwal19] `A. Agarwal, M. Dudik, and Z. Wu, "Fair Regression: | |
| Quantitative Definitions and Reduction-based Algorithms," | |
| International Conference on Machine Learning, 2019. | |
| <https://arxiv.org/abs/1905.12843>`_ | |
| """ | |
| def __init__(self, | |
| prot_attr, | |
| estimator, | |
| constraints, | |
| constraint_weight=0.5, | |
| grid_size=10, | |
| grid_limit=2.0, | |
| grid=None, | |
| drop_prot_attr=True, | |
| loss="ZeroOne", | |
| min_val=None, | |
| max_val=None | |
| ): | |
| """ | |
| Args: | |
| prot_attr: String or array-like column indices or column names | |
| of protected attributes. | |
| estimator: An estimator implementing methods ``fit(X, y, | |
| sample_weight)`` and ``predict(X)``, where ``X`` is the matrix | |
| of features, ``y`` is the vector of labels, and | |
| ``sample_weight`` is a vector of weights; labels ``y`` and | |
| predictions returned by ``predict(X)`` are either 0 or 1 -- e.g. | |
| scikit-learn classifiers/regressors. | |
| constraints (str or fairlearn.reductions.Moment): If string, keyword | |
| denoting the :class:`fairlearn.reductions.Moment` object | |
| defining the disparity constraints -- e.g., "DemographicParity" | |
| or "EqualizedOdds". For a full list of possible options see | |
| `self.model.moments`. Otherwise, provide the desired | |
| :class:`~fairlearn.reductions.Moment` object defining the | |
| disparity constraints. | |
| constraint_weight: When the ``selection_rule`` is | |
| "tradeoff_optimization" (default, no other option currently) | |
| this float specifies the relative weight put on the constraint | |
| violation when selecting the best model. The weight placed on | |
| the error rate will be ``1-constraint_weight``. | |
| grid_size (int): The number of Lagrange multipliers to generate in | |
| the grid. | |
| grid_limit (float): The largest Lagrange multiplier to generate. The | |
| grid will contain values distributed between ``-grid_limit`` and | |
| ``grid_limit`` by default. | |
| grid (pandas.DataFrame): Instead of supplying a size and limit for | |
| the grid, users may specify the exact set of Lagrange | |
| multipliers they desire using this argument in a DataFrame. | |
| drop_prot_attr (bool): Flag indicating whether to drop protected | |
| attributes from training data. | |
| loss (str): String identifying loss function for constraints. | |
| Options include "ZeroOne", "Square", and "Absolute." | |
| min_val: Loss function parameter for "Square" and "Absolute," | |
| typically the minimum of the range of y values. | |
| max_val: Loss function parameter for "Square" and "Absolute," | |
| typically the maximum of the range of y values. | |
| """ | |
| self.prot_attr = prot_attr | |
| self.estimator = estimator | |
| self.constraints = constraints | |
| self.constraint_weight = constraint_weight | |
| self.grid_size = grid_size | |
| self.grid_limit = grid_limit | |
| self.grid = grid | |
| self.drop_prot_attr = drop_prot_attr | |
| self.loss = loss | |
| self.min_val = min_val | |
| self.max_val = max_val | |
| def fit(self, X, y): | |
| """Train a less biased classifier or regressor with the given training | |
| data. | |
| Args: | |
| X (pandas.DataFrame): Training samples. | |
| y (array-like): Training output. | |
| Returns: | |
| self | |
| """ | |
| self.estimator_ = clone(self.estimator) | |
| moments = { | |
| "DemographicParity": red.DemographicParity, | |
| "EqualizedOdds": red.EqualizedOdds, | |
| "TruePositiveRateParity": red.TruePositiveRateParity, | |
| "FalsePositiveRateParity": red.FalsePositiveRateParity, | |
| "ErrorRateParity": red.ErrorRateParity, | |
| "BoundedGroupLoss": red.BoundedGroupLoss, | |
| } | |
| if isinstance(self.constraints, str): | |
| if self.constraints not in moments: | |
| raise ValueError(f"Constraint not recognized: {self.constraints}") | |
| if self.constraints == "BoundedGroupLoss": | |
| losses = { | |
| "ZeroOne": red.ZeroOneLoss, | |
| "Square": red.SquareLoss, | |
| "Absolute": red.AbsoluteLoss | |
| } | |
| if self.loss == "ZeroOne": | |
| self.loss_ = losses[self.loss]() | |
| else: | |
| self.loss_ = losses[self.loss](self.min_val, self.max_val) | |
| self.moment_ = moments[self.constraints](loss=self.loss_) | |
| else: | |
| self.moment_ = moments[self.constraints]() | |
| elif isinstance(self.constraints, red.Moment): | |
| self.moment_ = self.constraints | |
| else: | |
| raise ValueError("constraints must be a string or Moment object.") | |
| self.model_ = red.GridSearch(estimator=self.estimator_, | |
| constraints=self.moment_, | |
| constraint_weight=self.constraint_weight, | |
| grid_size=self.grid_size, grid_limit=self.grid_limit, | |
| grid=self.grid) | |
| A = X[self.prot_attr] | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| if isinstance(self.model_.constraints, red.ClassificationMoment): | |
| le = LabelEncoder() | |
| y = le.fit_transform(y) | |
| self.classes_ = le.classes_ | |
| self.model_.fit(X, y, sensitive_features=A) | |
| return self | |
| def predict(self, X): | |
| """Predict output for the given samples. | |
| Args: | |
| X (pandas.DataFrame): Test samples. | |
| Returns: | |
| numpy.ndarray: Predicted output per sample. | |
| """ | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| return self.model_.predict(X) | |
| def predict_proba(self, X): | |
| """Probability estimates. | |
| The returned estimates for all classes are ordered by the label of | |
| classes for classification. | |
| Args: | |
| X (pandas.DataFrame): Test samples. | |
| Returns: | |
| numpy.ndarray: returns the probability of the sample for each class | |
| in the model, where classes are ordered as they are in | |
| ``self.classes_``. | |
| """ | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| if isinstance(self.model_.constraints, red.ClassificationMoment): | |
| return self.model_.predict_proba(X) | |
| raise NotImplementedError("Underlying model does not support " | |
| "predict_proba") | |