Spaces:
Runtime error
Runtime error
| """ | |
| The code for GridSearchReduction wraps the source class | |
| fairlearn.reductions.GridSearch | |
| available in the https://github.com/fairlearn/fairlearn library | |
| licensed under the MIT Licencse, Copyright Microsoft Corporation | |
| """ | |
| from logging import warning | |
| try: | |
| import fairlearn.reductions as red | |
| except ImportError as error: | |
| warning("{}: GridSearchReduction will be unavailable. To install, run:\n" | |
| "pip install 'aif360[Reductions]'".format(error)) | |
| import pandas as pd | |
| from aif360.algorithms import Transformer | |
| from aif360.sklearn.inprocessing import GridSearchReduction as skGridSearchRed | |
| class GridSearchReduction(Transformer): | |
| """Grid search reduction for fair classification or regression. | |
| Grid search is an in-processing technique that can be used for fair | |
| classification or fair regression. For classification it reduces fair | |
| classification to a sequence of cost-sensitive classification problems, | |
| returning the deterministic classifier with the lowest empirical error | |
| subject to fair classification constraints [#agarwal18]_ among the | |
| candidates searched. For regression it uses the same priniciple to return a | |
| deterministic regressor with the lowest empirical error subject to the | |
| constraint of bounded group loss [#agarwal19]_. | |
| References: | |
| .. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and | |
| H. Wallach, "A Reductions Approach to Fair Classification," | |
| International Conference on Machine Learning, 2018. | |
| <https://arxiv.org/abs/1803.02453>`_ | |
| .. [#agarwal19] `A. Agarwal, M. Dudik, and Z. Wu, "Fair Regression: | |
| Quantitative Definitions and Reduction-based Algorithms," | |
| International Conference on Machine Learning, 2019. | |
| <https://arxiv.org/abs/1905.12843>`_ | |
| """ | |
| def __init__(self, | |
| estimator, | |
| constraints, | |
| prot_attr=None, | |
| constraint_weight=0.5, | |
| grid_size=10, | |
| grid_limit=2.0, | |
| grid=None, | |
| drop_prot_attr=True, | |
| loss="ZeroOne", | |
| min_val=None, | |
| max_val=None): | |
| """ | |
| Args: | |
| estimator: An estimator implementing methods ``fit(X, y, | |
| sample_weight)`` and ``predict(X)``, where ``X`` is the matrix | |
| of features, ``y`` is the vector of labels, and | |
| ``sample_weight`` is a vector of weights; labels ``y`` and | |
| predictions returned by ``predict(X)`` are either 0 or 1 -- e.g. | |
| scikit-learn classifiers/regressors. | |
| constraints (str or fairlearn.reductions.Moment): If string, keyword | |
| denoting the :class:`fairlearn.reductions.Moment` object | |
| defining the disparity constraints -- e.g., "DemographicParity" | |
| or "EqualizedOdds". For a full list of possible options see | |
| `self.model.moments`. Otherwise, provide the desired | |
| :class:`~fairlearn.reductions.Moment` object defining the | |
| disparity constraints. | |
| prot_attr: String or array-like column indices or column names | |
| of protected attributes. | |
| constraint_weight: When the ``selection_rule`` is | |
| "tradeoff_optimization" (default, no other option currently) | |
| this float specifies the relative weight put on the constraint | |
| violation when selecting the best model. The weight placed on | |
| the error rate will be ``1-constraint_weight``. | |
| grid_size (int): The number of Lagrange multipliers to generate in | |
| the grid. | |
| grid_limit (float): The largest Lagrange multiplier to generate. The | |
| grid will contain values distributed between ``-grid_limit`` and | |
| ``grid_limit`` by default. | |
| grid (pandas.DataFrame): Instead of supplying a size and limit for | |
| the grid, users may specify the exact set of Lagrange | |
| multipliers they desire using this argument in a DataFrame. | |
| drop_prot_attr (bool): Flag indicating whether to drop protected | |
| attributes from training data. | |
| loss (str): String identifying loss function for constraints. | |
| Options include "ZeroOne", "Square", and "Absolute." | |
| min_val: Loss function parameter for "Square" and "Absolute," | |
| typically the minimum of the range of y values. | |
| max_val: Loss function parameter for "Square" and "Absolute," | |
| typically the maximum of the range of y values. | |
| """ | |
| super(GridSearchReduction, self).__init__() | |
| #init model, set prot_attr during fit | |
| if prot_attr is None: | |
| prot_attr = [] | |
| self.model = skGridSearchRed(prot_attr, estimator, constraints, | |
| constraint_weight, grid_size, grid_limit, grid, drop_prot_attr, | |
| loss, min_val, max_val) | |
| def fit(self, dataset): | |
| """Learns model with less bias | |
| Args: | |
| dataset : Dataset containing true output. | |
| Returns: | |
| GridSearchReduction: Returns self. | |
| """ | |
| #set prot_attr | |
| if len(self.model.prot_attr) == 0: | |
| self.model.prot_attr = dataset.protected_attribute_names | |
| X_df = pd.DataFrame(dataset.features, columns=dataset.feature_names) | |
| Y = dataset.labels | |
| self.model.fit(X_df, Y) | |
| return self | |
| def predict(self, dataset): | |
| """Obtain the predictions for the provided dataset using the model | |
| learned. | |
| Args: | |
| dataset: Dataset containing output values that need to be | |
| transformed. | |
| Returns: | |
| dataset: Transformed dataset. | |
| """ | |
| X_df = pd.DataFrame(dataset.features, columns=dataset.feature_names) | |
| dataset_new = dataset.copy() | |
| dataset_new.labels = self.model.predict(X_df).reshape(-1, 1) | |
| if isinstance(self.model.moment, red.ClassificationMoment): | |
| fav = int(dataset.favorable_label) | |
| try: | |
| # Probability of favorable label | |
| scores = self.model.predict_proba(X_df)[:, fav] | |
| dataset_new.scores = scores.reshape(-1, 1) | |
| except (AttributeError, NotImplementedError): | |
| warning("dataset.scores not updated, underlying model does not " | |
| "support predict_proba") | |
| return dataset_new | |