recruitment-intelligence / src /bias_guard.py
Niketjain2002's picture
Upload src/bias_guard.py with huggingface_hub
62df3aa verified
"""
Bias Mitigation Layer
Scans inputs, features, and outputs for potential bias risks.
Flags issues without blocking — humans make final decisions.
Principles:
- Never use demographic traits (age, gender, race, ethnicity, nationality)
- Never use university prestige as a direct signal
- Never use name-based inferences
- Flag proxy variables that correlate with protected characteristics
- Log all flagged items for audit
"""
import re
from dataclasses import dataclass, field
@dataclass
class BiasFlag:
category: str # demographic_leak, proxy_variable, prestige_bias, etc.
severity: str # info, warning, critical
description: str
field_path: str # JSON path to the flagged field
recommendation: str
@dataclass
class BiasAuditResult:
flags: list[BiasFlag] = field(default_factory=list)
passed: bool = True
summary: str = ""
def add_flag(self, flag: BiasFlag):
self.flags.append(flag)
if flag.severity == "critical":
self.passed = False
# Known prestige indicators that should not influence scoring
PRESTIGE_INDICATORS = [
r"\b(ivy league|oxbridge|russell group)\b",
r"\b(harvard|stanford|mit|princeton|yale|columbia|caltech|oxford|cambridge)\b",
r"\b(top\s*\d+\s*(university|school|program))\b",
r"\b(elite|prestigious|tier[- ]?1|world[- ]?class)\b.*\b(university|school|institution)\b",
]
# Demographic signal patterns that must never be used
DEMOGRAPHIC_PATTERNS = [
(r"\b(age|born|year of birth|dob)\s*[:=]\s*\d+", "age_signal"),
(r"\b(gender|sex)\s*[:=]\s*\w+", "gender_signal"),
(r"\b(race|ethnicity|national origin)\s*[:=]", "race_signal"),
(r"\b(married|single|divorced|children|pregnant)", "family_status"),
(r"\b(disability|disabled|handicap)", "disability_signal"),
(r"\b(veteran|military service)\b", "veteran_status"), # info only, not critical
(r"\b(religion|religious|church|mosque|temple|synagogue)\b", "religion_signal"),
]
# Proxy variables that may correlate with protected characteristics
PROXY_PATTERNS = [
(r"\b(graduation year|class of \d{4})\b", "age_proxy",
"Graduation year can be used to infer age. Do not use in scoring."),
(r"\b(native speaker|native english|accent)\b", "national_origin_proxy",
"Language nativity can proxy for national origin. Focus on communication skill evidence instead."),
(r"\b(cultural fit)\b", "affinity_proxy",
"'Cultural fit' is a known proxy for in-group bias. Use 'values alignment' with specific criteria instead."),
(r"\b(commute|neighborhood|zip code|postal code)\b", "socioeconomic_proxy",
"Location granularity can proxy for socioeconomic status."),
]
class BiasGuard:
"""Scans for and flags potential bias in inputs and outputs."""
def audit_inputs(self, job_description: str, resume_text: str) -> BiasAuditResult:
"""Scan raw inputs for bias risks before processing."""
result = BiasAuditResult()
# Check JD for biased language
self._check_demographic_signals(job_description, "job_description", result)
self._check_proxy_variables(job_description, "job_description", result)
# Check resume for demographic leaks
self._check_demographic_signals(resume_text, "resume", result)
result.summary = self._summarize(result)
return result
def audit_features(self, role_features: dict, candidate_features: dict) -> BiasAuditResult:
"""Scan extracted features for bias risks."""
result = BiasAuditResult()
# Check if education prestige leaked into features
edu = candidate_features.get("education", [])
for i, entry in enumerate(edu):
inst = entry.get("institution", "")
for pattern in PRESTIGE_INDICATORS:
if re.search(pattern, inst, re.IGNORECASE):
result.add_flag(BiasFlag(
category="prestige_bias",
severity="warning",
description=f"University prestige detected: '{inst}'. Must not influence scoring.",
field_path=f"candidate_features.education[{i}].institution",
recommendation="Use degree field and level only, not institution name.",
))
# Check for age-inferrable data
positions = candidate_features.get("experience_profile", {}).get("positions", [])
if positions:
earliest = min(
(p.get("start_year", 9999) for p in positions if p.get("start_year")),
default=9999,
)
if earliest < 9999:
result.add_flag(BiasFlag(
category="age_proxy",
severity="info",
description=f"Earliest career date ({earliest}) can be used to infer age.",
field_path="candidate_features.experience_profile.positions",
recommendation="Use total_years and relevant_years for scoring, not start dates.",
))
result.summary = self._summarize(result)
return result
def audit_output(self, final_output: dict) -> BiasAuditResult:
"""Scan final output for bias in reasoning."""
result = BiasAuditResult()
# Scan reasoning text for prestige or demographic mentions
reasoning = final_output.get("reasoning_summary", "")
signals = (
final_output.get("positive_signals", [])
+ final_output.get("risk_signals", [])
)
all_text = reasoning + " ".join(signals)
for pattern in PRESTIGE_INDICATORS:
if re.search(pattern, all_text, re.IGNORECASE):
result.add_flag(BiasFlag(
category="prestige_bias",
severity="critical",
description="University prestige mentioned in output reasoning.",
field_path="reasoning/signals",
recommendation="Remove prestige references. Use skill and experience evidence only.",
))
for pattern, signal_type in DEMOGRAPHIC_PATTERNS:
if re.search(pattern, all_text, re.IGNORECASE):
result.add_flag(BiasFlag(
category="demographic_leak",
severity="critical",
description=f"Demographic signal ({signal_type}) found in output.",
field_path="reasoning/signals",
recommendation="Remove all demographic references from output.",
))
result.summary = self._summarize(result)
return result
def _check_demographic_signals(self, text: str, source: str, result: BiasAuditResult):
for pattern, signal_type in DEMOGRAPHIC_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
severity = "info" if signal_type == "veteran_status" else "warning"
result.add_flag(BiasFlag(
category="demographic_leak",
severity=severity,
description=f"Demographic signal ({signal_type}) detected in {source}.",
field_path=source,
recommendation=f"Ensure {signal_type} is not used in scoring.",
))
def _check_proxy_variables(self, text: str, source: str, result: BiasAuditResult):
for pattern, proxy_type, recommendation in PROXY_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
result.add_flag(BiasFlag(
category="proxy_variable",
severity="warning",
description=f"Proxy variable ({proxy_type}) detected in {source}.",
field_path=source,
recommendation=recommendation,
))
def _summarize(self, result: BiasAuditResult) -> str:
if not result.flags:
return "No bias risks detected."
critical = sum(1 for f in result.flags if f.severity == "critical")
warnings = sum(1 for f in result.flags if f.severity == "warning")
info = sum(1 for f in result.flags if f.severity == "info")
parts = []
if critical:
parts.append(f"{critical} critical")
if warnings:
parts.append(f"{warnings} warnings")
if info:
parts.append(f"{info} info")
return f"Bias audit: {', '.join(parts)} flag(s) found."