Spaces:
Runtime error
Runtime error
| """ | |
| The code for ExponentiatedGradientReduction wraps the source class | |
| fairlearn.reductions.ExponentiatedGradient | |
| available in the https://github.com/fairlearn/fairlearn library | |
| licensed under the MIT Licencse, Copyright Microsoft Corporation | |
| """ | |
| try: | |
| import fairlearn.reductions as red | |
| except ImportError as error: | |
| from logging import warning | |
| warning("{}: ExponentiatedGradientReduction will be unavailable. To install, run:\n" | |
| "pip install 'aif360[Reductions]'".format(error)) | |
| from sklearn.base import BaseEstimator, ClassifierMixin, clone | |
| from sklearn.preprocessing import LabelEncoder | |
| class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin): | |
| """Exponentiated gradient reduction for fair classification. | |
| Exponentiated gradient reduction is an in-processing technique that reduces | |
| fair classification to a sequence of cost-sensitive classification problems, | |
| returning a randomized classifier with the lowest empirical error subject to | |
| fair classification constraints [#agarwal18]_. | |
| References: | |
| .. [#agarwal18] `A. Agarwal, A. Beygelzimer, M. Dudik, J. Langford, and | |
| H. Wallach, "A Reductions Approach to Fair Classification," | |
| International Conference on Machine Learning, 2018. | |
| <https://arxiv.org/abs/1803.02453>`_ | |
| """ | |
| def __init__(self, | |
| prot_attr, | |
| estimator, | |
| constraints, | |
| eps=0.01, | |
| max_iter=50, | |
| nu=None, | |
| eta0=2.0, | |
| run_linprog_step=True, | |
| drop_prot_attr=True): | |
| """ | |
| Args: | |
| prot_attr: String or array-like column indices or column names of | |
| protected attributes. | |
| estimator: An estimator implementing methods | |
| ``fit(X, y, sample_weight)`` and ``predict(X)``, where ``X`` is | |
| the matrix of features, ``y`` is the vector of labels, and | |
| ``sample_weight`` is a vector of weights; labels ``y`` and | |
| predictions returned by ``predict(X)`` are either 0 or 1 -- e.g. | |
| scikit-learn classifiers. | |
| constraints (str or fairlearn.reductions.Moment): If string, keyword | |
| denoting the :class:`fairlearn.reductions.Moment` object | |
| defining the disparity constraints -- e.g., "DemographicParity" | |
| or "EqualizedOdds". For a full list of possible options see | |
| `self.model.moments`. Otherwise, provide the desired | |
| :class:`~fairlearn.reductions.Moment` object defining the | |
| disparity constraints. | |
| eps: Allowed fairness constraint violation; the solution is | |
| guaranteed to have the error within ``2*best_gap`` of the best | |
| error under constraint eps; the constraint violation is at most | |
| ``2*(eps+best_gap)``. | |
| max_iter: Maximum number of iterations. | |
| nu: Convergence threshold for the duality gap, corresponding to a | |
| conservative automatic setting based on the statistical | |
| uncertainty in measuring classification error. | |
| eta0: Initial setting of the learning rate. | |
| run_linprog_step: If True each step of exponentiated gradient is | |
| followed by the saddle point optimization over the convex hull | |
| of classifiers returned so far. | |
| drop_prot_attr: Boolean flag indicating whether to drop protected | |
| attributes from training data. | |
| """ | |
| self.prot_attr = prot_attr | |
| self.estimator = estimator | |
| self.constraints = constraints | |
| self.eps = eps | |
| self.max_iter = max_iter | |
| self.nu = nu | |
| self.eta0 = eta0 | |
| self.run_linprog_step = run_linprog_step | |
| self.drop_prot_attr = drop_prot_attr | |
| def fit(self, X, y): | |
| """Learns randomized model with less bias | |
| Args: | |
| X (pandas.DataFrame): Training samples. | |
| y (array-like): Training labels. | |
| Returns: | |
| self | |
| """ | |
| self.estimator_ = clone(self.estimator) | |
| moments = { | |
| "DemographicParity": red.DemographicParity, | |
| "EqualizedOdds": red.EqualizedOdds, | |
| "TruePositiveRateParity": red.TruePositiveRateParity, | |
| "FalsePositiveRateParity": red.FalsePositiveRateParity, | |
| "ErrorRateParity": red.ErrorRateParity, | |
| } | |
| if isinstance(self.constraints, str): | |
| if self.constraints not in moments: | |
| raise ValueError(f"Constraint not recognized: {self.constraints}") | |
| self.moment_ = moments[self.constraints]() | |
| elif isinstance(self.constraints, red.Moment): | |
| self.moment_ = self.constraints | |
| else: | |
| raise ValueError("constraints must be a string or Moment object.") | |
| self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_, | |
| eps=self.eps, max_iter=self.max_iter, nu=self.nu, eta0=self.eta0) | |
| A = X[self.prot_attr] | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| le = LabelEncoder() | |
| y = le.fit_transform(y) | |
| self.classes_ = le.classes_ | |
| self.model_.fit(X, y, sensitive_features=A) | |
| return self | |
| def predict(self, X): | |
| """Predict class labels for the given samples. | |
| Args: | |
| X (pandas.DataFrame): Test samples. | |
| Returns: | |
| numpy.ndarray: Predicted class label per sample. | |
| """ | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| return self.classes_[self.model_.predict(X)] | |
| def predict_proba(self, X): | |
| """Probability estimates. | |
| The returned estimates for all classes are ordered by the label of | |
| classes. | |
| Args: | |
| X (pandas.DataFrame): Test samples. | |
| Returns: | |
| numpy.ndarray: Returns the probability of the sample for each class | |
| in the model, where classes are ordered as they are in | |
| ``self.classes_``. | |
| """ | |
| if self.drop_prot_attr: | |
| X = X.drop(self.prot_attr, axis=1) | |
| return self.model_._pmf_predict(X) | |