Upload src/bias_guard.py with huggingface_hub
Browse files- src/bias_guard.py +197 -0
src/bias_guard.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Bias Mitigation Layer
|
| 3 |
+
|
| 4 |
+
Scans inputs, features, and outputs for potential bias risks.
|
| 5 |
+
Flags issues without blocking — humans make final decisions.
|
| 6 |
+
|
| 7 |
+
Principles:
|
| 8 |
+
- Never use demographic traits (age, gender, race, ethnicity, nationality)
|
| 9 |
+
- Never use university prestige as a direct signal
|
| 10 |
+
- Never use name-based inferences
|
| 11 |
+
- Flag proxy variables that correlate with protected characteristics
|
| 12 |
+
- Log all flagged items for audit
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import re
|
| 16 |
+
from dataclasses import dataclass, field
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class BiasFlag:
|
| 21 |
+
category: str # demographic_leak, proxy_variable, prestige_bias, etc.
|
| 22 |
+
severity: str # info, warning, critical
|
| 23 |
+
description: str
|
| 24 |
+
field_path: str # JSON path to the flagged field
|
| 25 |
+
recommendation: str
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class BiasAuditResult:
|
| 30 |
+
flags: list[BiasFlag] = field(default_factory=list)
|
| 31 |
+
passed: bool = True
|
| 32 |
+
summary: str = ""
|
| 33 |
+
|
| 34 |
+
def add_flag(self, flag: BiasFlag):
|
| 35 |
+
self.flags.append(flag)
|
| 36 |
+
if flag.severity == "critical":
|
| 37 |
+
self.passed = False
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# Known prestige indicators that should not influence scoring
|
| 41 |
+
PRESTIGE_INDICATORS = [
|
| 42 |
+
r"\b(ivy league|oxbridge|russell group)\b",
|
| 43 |
+
r"\b(harvard|stanford|mit|princeton|yale|columbia|caltech|oxford|cambridge)\b",
|
| 44 |
+
r"\b(top\s*\d+\s*(university|school|program))\b",
|
| 45 |
+
r"\b(elite|prestigious|tier[- ]?1|world[- ]?class)\b.*\b(university|school|institution)\b",
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
# Demographic signal patterns that must never be used
|
| 49 |
+
DEMOGRAPHIC_PATTERNS = [
|
| 50 |
+
(r"\b(age|born|year of birth|dob)\s*[:=]\s*\d+", "age_signal"),
|
| 51 |
+
(r"\b(gender|sex)\s*[:=]\s*\w+", "gender_signal"),
|
| 52 |
+
(r"\b(race|ethnicity|national origin)\s*[:=]", "race_signal"),
|
| 53 |
+
(r"\b(married|single|divorced|children|pregnant)", "family_status"),
|
| 54 |
+
(r"\b(disability|disabled|handicap)", "disability_signal"),
|
| 55 |
+
(r"\b(veteran|military service)\b", "veteran_status"), # info only, not critical
|
| 56 |
+
(r"\b(religion|religious|church|mosque|temple|synagogue)\b", "religion_signal"),
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
# Proxy variables that may correlate with protected characteristics
|
| 60 |
+
PROXY_PATTERNS = [
|
| 61 |
+
(r"\b(graduation year|class of \d{4})\b", "age_proxy",
|
| 62 |
+
"Graduation year can be used to infer age. Do not use in scoring."),
|
| 63 |
+
(r"\b(native speaker|native english|accent)\b", "national_origin_proxy",
|
| 64 |
+
"Language nativity can proxy for national origin. Focus on communication skill evidence instead."),
|
| 65 |
+
(r"\b(cultural fit)\b", "affinity_proxy",
|
| 66 |
+
"'Cultural fit' is a known proxy for in-group bias. Use 'values alignment' with specific criteria instead."),
|
| 67 |
+
(r"\b(commute|neighborhood|zip code|postal code)\b", "socioeconomic_proxy",
|
| 68 |
+
"Location granularity can proxy for socioeconomic status."),
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class BiasGuard:
|
| 73 |
+
"""Scans for and flags potential bias in inputs and outputs."""
|
| 74 |
+
|
| 75 |
+
def audit_inputs(self, job_description: str, resume_text: str) -> BiasAuditResult:
|
| 76 |
+
"""Scan raw inputs for bias risks before processing."""
|
| 77 |
+
result = BiasAuditResult()
|
| 78 |
+
|
| 79 |
+
# Check JD for biased language
|
| 80 |
+
self._check_demographic_signals(job_description, "job_description", result)
|
| 81 |
+
self._check_proxy_variables(job_description, "job_description", result)
|
| 82 |
+
|
| 83 |
+
# Check resume for demographic leaks
|
| 84 |
+
self._check_demographic_signals(resume_text, "resume", result)
|
| 85 |
+
|
| 86 |
+
result.summary = self._summarize(result)
|
| 87 |
+
return result
|
| 88 |
+
|
| 89 |
+
def audit_features(self, role_features: dict, candidate_features: dict) -> BiasAuditResult:
|
| 90 |
+
"""Scan extracted features for bias risks."""
|
| 91 |
+
result = BiasAuditResult()
|
| 92 |
+
|
| 93 |
+
# Check if education prestige leaked into features
|
| 94 |
+
edu = candidate_features.get("education", [])
|
| 95 |
+
for i, entry in enumerate(edu):
|
| 96 |
+
inst = entry.get("institution", "")
|
| 97 |
+
for pattern in PRESTIGE_INDICATORS:
|
| 98 |
+
if re.search(pattern, inst, re.IGNORECASE):
|
| 99 |
+
result.add_flag(BiasFlag(
|
| 100 |
+
category="prestige_bias",
|
| 101 |
+
severity="warning",
|
| 102 |
+
description=f"University prestige detected: '{inst}'. Must not influence scoring.",
|
| 103 |
+
field_path=f"candidate_features.education[{i}].institution",
|
| 104 |
+
recommendation="Use degree field and level only, not institution name.",
|
| 105 |
+
))
|
| 106 |
+
|
| 107 |
+
# Check for age-inferrable data
|
| 108 |
+
positions = candidate_features.get("experience_profile", {}).get("positions", [])
|
| 109 |
+
if positions:
|
| 110 |
+
earliest = min(
|
| 111 |
+
(p.get("start_year", 9999) for p in positions if p.get("start_year")),
|
| 112 |
+
default=9999,
|
| 113 |
+
)
|
| 114 |
+
if earliest < 9999:
|
| 115 |
+
result.add_flag(BiasFlag(
|
| 116 |
+
category="age_proxy",
|
| 117 |
+
severity="info",
|
| 118 |
+
description=f"Earliest career date ({earliest}) can be used to infer age.",
|
| 119 |
+
field_path="candidate_features.experience_profile.positions",
|
| 120 |
+
recommendation="Use total_years and relevant_years for scoring, not start dates.",
|
| 121 |
+
))
|
| 122 |
+
|
| 123 |
+
result.summary = self._summarize(result)
|
| 124 |
+
return result
|
| 125 |
+
|
| 126 |
+
def audit_output(self, final_output: dict) -> BiasAuditResult:
|
| 127 |
+
"""Scan final output for bias in reasoning."""
|
| 128 |
+
result = BiasAuditResult()
|
| 129 |
+
|
| 130 |
+
# Scan reasoning text for prestige or demographic mentions
|
| 131 |
+
reasoning = final_output.get("reasoning_summary", "")
|
| 132 |
+
signals = (
|
| 133 |
+
final_output.get("positive_signals", [])
|
| 134 |
+
+ final_output.get("risk_signals", [])
|
| 135 |
+
)
|
| 136 |
+
all_text = reasoning + " ".join(signals)
|
| 137 |
+
|
| 138 |
+
for pattern in PRESTIGE_INDICATORS:
|
| 139 |
+
if re.search(pattern, all_text, re.IGNORECASE):
|
| 140 |
+
result.add_flag(BiasFlag(
|
| 141 |
+
category="prestige_bias",
|
| 142 |
+
severity="critical",
|
| 143 |
+
description="University prestige mentioned in output reasoning.",
|
| 144 |
+
field_path="reasoning/signals",
|
| 145 |
+
recommendation="Remove prestige references. Use skill and experience evidence only.",
|
| 146 |
+
))
|
| 147 |
+
|
| 148 |
+
for pattern, signal_type in DEMOGRAPHIC_PATTERNS:
|
| 149 |
+
if re.search(pattern, all_text, re.IGNORECASE):
|
| 150 |
+
result.add_flag(BiasFlag(
|
| 151 |
+
category="demographic_leak",
|
| 152 |
+
severity="critical",
|
| 153 |
+
description=f"Demographic signal ({signal_type}) found in output.",
|
| 154 |
+
field_path="reasoning/signals",
|
| 155 |
+
recommendation="Remove all demographic references from output.",
|
| 156 |
+
))
|
| 157 |
+
|
| 158 |
+
result.summary = self._summarize(result)
|
| 159 |
+
return result
|
| 160 |
+
|
| 161 |
+
def _check_demographic_signals(self, text: str, source: str, result: BiasAuditResult):
|
| 162 |
+
for pattern, signal_type in DEMOGRAPHIC_PATTERNS:
|
| 163 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 164 |
+
severity = "info" if signal_type == "veteran_status" else "warning"
|
| 165 |
+
result.add_flag(BiasFlag(
|
| 166 |
+
category="demographic_leak",
|
| 167 |
+
severity=severity,
|
| 168 |
+
description=f"Demographic signal ({signal_type}) detected in {source}.",
|
| 169 |
+
field_path=source,
|
| 170 |
+
recommendation=f"Ensure {signal_type} is not used in scoring.",
|
| 171 |
+
))
|
| 172 |
+
|
| 173 |
+
def _check_proxy_variables(self, text: str, source: str, result: BiasAuditResult):
|
| 174 |
+
for pattern, proxy_type, recommendation in PROXY_PATTERNS:
|
| 175 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 176 |
+
result.add_flag(BiasFlag(
|
| 177 |
+
category="proxy_variable",
|
| 178 |
+
severity="warning",
|
| 179 |
+
description=f"Proxy variable ({proxy_type}) detected in {source}.",
|
| 180 |
+
field_path=source,
|
| 181 |
+
recommendation=recommendation,
|
| 182 |
+
))
|
| 183 |
+
|
| 184 |
+
def _summarize(self, result: BiasAuditResult) -> str:
|
| 185 |
+
if not result.flags:
|
| 186 |
+
return "No bias risks detected."
|
| 187 |
+
critical = sum(1 for f in result.flags if f.severity == "critical")
|
| 188 |
+
warnings = sum(1 for f in result.flags if f.severity == "warning")
|
| 189 |
+
info = sum(1 for f in result.flags if f.severity == "info")
|
| 190 |
+
parts = []
|
| 191 |
+
if critical:
|
| 192 |
+
parts.append(f"{critical} critical")
|
| 193 |
+
if warnings:
|
| 194 |
+
parts.append(f"{warnings} warnings")
|
| 195 |
+
if info:
|
| 196 |
+
parts.append(f"{info} info")
|
| 197 |
+
return f"Bias audit: {', '.join(parts)} flag(s) found."
|