Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -439,6 +439,29 @@ def run_light_auto_test():
|
|
| 439 |
|
| 440 |
return f"Loaded {len(ALL_Y_TRUE)} samples for fairness evaluation."
|
| 441 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
def analyze_response(user_response):
|
| 443 |
"""Main analysis function for Gradio"""
|
| 444 |
|
|
@@ -475,12 +498,17 @@ def analyze_response(user_response):
|
|
| 475 |
eod = equalized_odds_difference(
|
| 476 |
ALL_Y_TRUE, ALL_Y_PRED, sensitive_features=ALL_GROUPS
|
| 477 |
)
|
| 478 |
-
|
|
|
|
|
|
|
| 479 |
=== FAIRNESS METRICS ===
|
| 480 |
DPD: {dpd:.2f}
|
| 481 |
DPR: {dpr:.2f}
|
| 482 |
EOD: {eod:.2f}
|
| 483 |
Samples collected: {len(ALL_Y_TRUE)}
|
|
|
|
|
|
|
|
|
|
| 484 |
"""
|
| 485 |
else:
|
| 486 |
fairness_metrics = "Not enough samples for fairness metrics."
|
|
|
|
| 439 |
|
| 440 |
return f"Loaded {len(ALL_Y_TRUE)} samples for fairness evaluation."
|
| 441 |
|
| 442 |
+
def explain_fairness_metrics(dpd, dpr, eod):
|
| 443 |
+
explanation = ""
|
| 444 |
+
|
| 445 |
+
# DPD explanation
|
| 446 |
+
if abs(dpd) < 0.1:
|
| 447 |
+
explanation += "• DPD (Demographic Parity Difference): Low → Predictions are fairly balanced across groups.\n"
|
| 448 |
+
else:
|
| 449 |
+
explanation += "• DPD (Demographic Parity Difference): High → Model may favor one group over another.\n"
|
| 450 |
+
|
| 451 |
+
# DPR explanation
|
| 452 |
+
if 0.8 <= dpr <= 1.25:
|
| 453 |
+
explanation += "• DPR (Demographic Parity Ratio): Fair range → Outcomes are proportionally similar.\n"
|
| 454 |
+
else:
|
| 455 |
+
explanation += "• DPR (Demographic Parity Ratio): Outside fair range → Possible bias in outcomes.\n"
|
| 456 |
+
|
| 457 |
+
# EOD explanation
|
| 458 |
+
if abs(eod) < 0.1:
|
| 459 |
+
explanation += "• EOD (Equalized Odds Difference): Low → Model treats groups similarly when predicting.\n"
|
| 460 |
+
else:
|
| 461 |
+
explanation += "• EOD (Equalized Odds Difference): High → Model performance differs across groups.\n"
|
| 462 |
+
|
| 463 |
+
return explanation
|
| 464 |
+
|
| 465 |
def analyze_response(user_response):
|
| 466 |
"""Main analysis function for Gradio"""
|
| 467 |
|
|
|
|
| 498 |
eod = equalized_odds_difference(
|
| 499 |
ALL_Y_TRUE, ALL_Y_PRED, sensitive_features=ALL_GROUPS
|
| 500 |
)
|
| 501 |
+
explanation = explain_fairness_metrics(dpd, dpr, eod)
|
| 502 |
+
|
| 503 |
+
fairness_metrics = f"""
|
| 504 |
=== FAIRNESS METRICS ===
|
| 505 |
DPD: {dpd:.2f}
|
| 506 |
DPR: {dpr:.2f}
|
| 507 |
EOD: {eod:.2f}
|
| 508 |
Samples collected: {len(ALL_Y_TRUE)}
|
| 509 |
+
|
| 510 |
+
--- What this means ---
|
| 511 |
+
{explanation}
|
| 512 |
"""
|
| 513 |
else:
|
| 514 |
fairness_metrics = "Not enough samples for fairness metrics."
|