test-ui / eval /fairness_metrics.py
juakazike's picture
Deploy testing UI for expert validation
d7d1833 verified
"""
Fairness metrics calculation for bias detection evaluation.
This module implements AI BRIDGE fairness requirements:
- Demographic Parity (DP): ≤0.10 threshold
- Equal Opportunity (EO): ≤0.05 threshold
- Multilingual Bias Evaluation (MBE)
These metrics ensure the bias detection system performs equitably across
demographic groups and language varieties.
"""
from dataclasses import dataclass
from typing import Optional
from enum import Enum
from .models import Language, BiasCategory
class DemographicGroup(Enum):
"""Demographic groups for fairness analysis."""
MALE_REFERENT = "male_referent"
FEMALE_REFERENT = "female_referent"
NEUTRAL_REFERENT = "neutral_referent"
UNKNOWN = "unknown"
@dataclass
class FairnessMetrics:
"""
Fairness evaluation metrics.
Attributes:
demographic_parity: Difference in positive prediction rates across groups (≤0.10)
equal_opportunity: Difference in TPR across groups (≤0.05)
equalized_odds: Difference in TPR and FPR across groups (≤0.05)
mbe_score: Multilingual bias evaluation score (0.0 to 1.0, higher is better)
group_metrics: Per-group performance breakdown
"""
demographic_parity: float
equal_opportunity: float
equalized_odds: float
mbe_score: float
group_metrics: dict[str, dict[str, float]]
def passes_aibridge_requirements(self) -> bool:
"""Check if metrics meet AI BRIDGE fairness thresholds."""
return (
self.demographic_parity <= 0.10
and self.equal_opportunity <= 0.05
and self.equalized_odds <= 0.05
and self.mbe_score >= 0.85
)
class FairnessCalculator:
"""
Calculate fairness metrics for bias detection evaluation.
Implements AI BRIDGE fairness requirements to ensure equitable performance
across demographic groups and language varieties.
"""
def calculate_demographic_parity(
self,
predictions: list[bool],
groups: list[DemographicGroup]
) -> float:
"""
Calculate Demographic Parity: max difference in positive prediction rates.
DP = max|P(Ŷ=1|A=a) - P(Ŷ=1|A=b)| across all group pairs
AI BRIDGE requirement: DP ≤ 0.10
Args:
predictions: List of binary predictions (True = bias detected)
groups: List of demographic groups for each prediction
Returns:
Maximum absolute difference in positive rates (0.0 to 1.0)
Example:
predictions = [True, True, False, False, True]
groups = [MALE, MALE, FEMALE, FEMALE, MALE]
Male positive rate: 3/3 = 1.00
Female positive rate: 0/2 = 0.00
DP = |1.00 - 0.00| = 1.00 (FAILS threshold)
"""
if not predictions or len(predictions) != len(groups):
return 0.0
# Calculate positive rate for each group
group_rates: dict[DemographicGroup, float] = {}
for group in set(groups):
group_indices = [i for i, g in enumerate(groups) if g == group]
if not group_indices:
continue
group_predictions = [predictions[i] for i in group_indices]
positive_rate = sum(group_predictions) / len(group_predictions)
group_rates[group] = positive_rate
if len(group_rates) < 2:
return 0.0
# Find maximum pairwise difference
rates = list(group_rates.values())
max_diff = max(rates) - min(rates)
return max_diff
def calculate_equal_opportunity(
self,
predictions: list[bool],
labels: list[bool],
groups: list[DemographicGroup]
) -> float:
"""
Calculate Equal Opportunity: max difference in True Positive Rates.
EO = max|TPR(A=a) - TPR(A=b)| across all group pairs
where TPR = TP / (TP + FN)
AI BRIDGE requirement: EO ≤ 0.05
Args:
predictions: List of binary predictions (True = bias detected)
labels: List of ground truth labels (True = has bias)
groups: List of demographic groups for each sample
Returns:
Maximum absolute difference in TPR (0.0 to 1.0)
Example:
predictions = [True, True, False, True]
labels = [True, True, True, True]
groups = [MALE, MALE, FEMALE, FEMALE]
Male TPR: 2/2 = 1.00
Female TPR: 1/2 = 0.50
EO = |1.00 - 0.50| = 0.50 (FAILS threshold)
"""
if not predictions or len(predictions) != len(labels) or len(predictions) != len(groups):
return 0.0
# Calculate TPR for each group
group_tprs: dict[DemographicGroup, float] = {}
for group in set(groups):
group_indices = [i for i, g in enumerate(groups) if g == group]
if not group_indices:
continue
# Count true positives and false negatives for this group
tp = sum(1 for i in group_indices if predictions[i] and labels[i])
fn = sum(1 for i in group_indices if not predictions[i] and labels[i])
if tp + fn == 0:
continue
tpr = tp / (tp + fn)
group_tprs[group] = tpr
if len(group_tprs) < 2:
return 0.0
# Find maximum pairwise difference
tprs = list(group_tprs.values())
max_diff = max(tprs) - min(tprs)
return max_diff
def calculate_equalized_odds(
self,
predictions: list[bool],
labels: list[bool],
groups: list[DemographicGroup]
) -> float:
"""
Calculate Equalized Odds: max difference in TPR and FPR.
EqOdds = max(TPR_diff, FPR_diff)
AI BRIDGE requirement: EqOdds ≤ 0.05
Args:
predictions: List of binary predictions
labels: List of ground truth labels
groups: List of demographic groups
Returns:
Maximum of TPR difference and FPR difference
"""
if not predictions or len(predictions) != len(labels) or len(predictions) != len(groups):
return 0.0
# Calculate TPR and FPR for each group
group_metrics: dict[DemographicGroup, dict[str, float]] = {}
for group in set(groups):
group_indices = [i for i, g in enumerate(groups) if g == group]
if not group_indices:
continue
# Calculate confusion matrix components
tp = sum(1 for i in group_indices if predictions[i] and labels[i])
fp = sum(1 for i in group_indices if predictions[i] and not labels[i])
tn = sum(1 for i in group_indices if not predictions[i] and not labels[i])
fn = sum(1 for i in group_indices if not predictions[i] and labels[i])
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0.0
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0
group_metrics[group] = {"tpr": tpr, "fpr": fpr}
if len(group_metrics) < 2:
return 0.0
# Find maximum differences
tprs = [m["tpr"] for m in group_metrics.values()]
fprs = [m["fpr"] for m in group_metrics.values()]
tpr_diff = max(tprs) - min(tprs)
fpr_diff = max(fprs) - min(fprs)
return max(tpr_diff, fpr_diff)
def calculate_mbe_score(
self,
language_f1_scores: dict[Language, float],
target_f1: float = 0.75
) -> float:
"""
Calculate Multilingual Bias Evaluation (MBE) score.
MBE measures consistency of performance across languages relative to target.
MBE = 1 - (std_dev(F1_scores) / target_F1)
Higher is better (1.0 = perfect consistency, 0.0 = high variance).
AI BRIDGE target: MBE ≥ 0.85
Args:
language_f1_scores: F1 scores for each language
target_f1: AI BRIDGE F1 target (default: 0.75)
Returns:
MBE score (0.0 to 1.0)
Example:
EN: 0.76, SW: 0.80, FR: 0.75, KI: 0.74
Mean: 0.7625, StdDev: 0.025
MBE = 1 - (0.025 / 0.75) = 0.967 (PASSES)
"""
if not language_f1_scores or len(language_f1_scores) < 2:
return 0.0
scores = list(language_f1_scores.values())
# Calculate standard deviation
mean_score = sum(scores) / len(scores)
variance = sum((s - mean_score) ** 2 for s in scores) / len(scores)
std_dev = variance ** 0.5
# MBE score
if target_f1 == 0:
return 0.0
mbe = 1.0 - (std_dev / target_f1)
# Clamp to [0, 1]
return max(0.0, min(1.0, mbe))
def calculate_fairness_metrics(
self,
predictions: list[bool],
labels: list[bool],
groups: list[DemographicGroup],
language_f1_scores: Optional[dict[Language, float]] = None
) -> FairnessMetrics:
"""
Calculate comprehensive fairness metrics.
Args:
predictions: Binary predictions (bias detected or not)
labels: Ground truth labels
groups: Demographic group for each sample
language_f1_scores: Optional F1 scores by language for MBE
Returns:
FairnessMetrics object with all fairness measures
"""
dp = self.calculate_demographic_parity(predictions, groups)
eo = self.calculate_equal_opportunity(predictions, labels, groups)
eq_odds = self.calculate_equalized_odds(predictions, labels, groups)
# Calculate MBE if language scores provided
mbe = 0.0
if language_f1_scores:
mbe = self.calculate_mbe_score(language_f1_scores)
# Calculate per-group metrics
group_metrics: dict[str, dict[str, float]] = {}
for group in set(groups):
group_indices = [i for i, g in enumerate(groups) if g == group]
if not group_indices:
continue
group_preds = [predictions[i] for i in group_indices]
group_labels = [labels[i] for i in group_indices]
# Calculate F1 for this group
tp = sum(1 for p, l in zip(group_preds, group_labels) if p and l)
fp = sum(1 for p, l in zip(group_preds, group_labels) if p and not l)
fn = sum(1 for p, l in zip(group_preds, group_labels) if not p and l)
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
group_metrics[group.value] = {
"precision": precision,
"recall": recall,
"f1_score": f1,
"sample_count": len(group_indices)
}
return FairnessMetrics(
demographic_parity=dp,
equal_opportunity=eo,
equalized_odds=eq_odds,
mbe_score=mbe,
group_metrics=group_metrics
)
def extract_demographic_group(text: str, language: Language) -> DemographicGroup:
"""
Extract demographic group from text based on gendered references.
This is a simple heuristic - in production, you'd want more sophisticated
analysis or explicit annotations in ground truth data.
Args:
text: Text sample
language: Language of the text
Returns:
Demographic group classification
"""
text_lower = " " + text.lower() + " " # Add spaces for boundary matching
if language == Language.ENGLISH:
male_markers = [" he ", " his ", " him ", " man ", " men ", " boy ", " father ", " brother "]
female_markers = [" she ", " her ", " woman ", " women ", " girl ", " mother ", " sister "]
neutral_markers = [" they ", " their ", " them ", " person ", " people ", " individual "]
has_male = any(marker in text_lower for marker in male_markers)
has_female = any(marker in text_lower for marker in female_markers)
has_neutral = any(marker in text_lower for marker in neutral_markers)
if has_male and not has_female:
return DemographicGroup.MALE_REFERENT
elif has_female and not has_male:
return DemographicGroup.FEMALE_REFERENT
elif has_neutral and not has_male and not has_female:
return DemographicGroup.NEUTRAL_REFERENT
elif language == Language.SWAHILI:
# Swahili is naturally gender-neutral (yeye = he/she)
# Bias often appears through context, not pronouns
male_markers = [" mwanamume ", " baba ", " kaka ", " ndugu "]
female_markers = [" mwanamke ", " mama ", " dada "]
has_male = any(marker in text_lower for marker in male_markers)
has_female = any(marker in text_lower for marker in female_markers)
if has_male and not has_female:
return DemographicGroup.MALE_REFERENT
elif has_female and not has_male:
return DemographicGroup.FEMALE_REFERENT
return DemographicGroup.UNKNOWN