| """ |
| Bias detection service for evaluating gender bias in text. |
| |
| This module provides a clean interface for bias detection using rules-based matching. |
| Implements AI BRIDGE bias constructs: stereotype, counter-stereotype, derogation, neutral. |
| |
| Enhanced with context-aware correction to preserve meaning when gender terms are used |
| for accuracy (biographical, historical, medical, etc.) rather than bias. |
| """ |
| import logging |
| import re |
| from typing import List, Dict, Any, Optional |
| from pathlib import Path |
|
|
| from .models import ( |
| Language, BiasDetectionResult, BiasLabel, StereotypeCategory, |
| TargetGender, Explicitness |
| ) |
| from .data_loader import RulesLoader, DataLoadError |
| from .ngeli_tracker import NgeliTracker, NounClass |
| from .context_checker import ContextChecker, ContextCheckResult |
|
|
|
|
| |
| logger = logging.getLogger(__name__) |
|
|
|
|
| class BiasDetectionError(Exception): |
| """Custom exception for bias detection errors.""" |
| pass |
|
|
|
|
| class BiasDetector: |
| """ |
| Service for detecting gender bias in text using rules-based approach. |
| |
| This class encapsulates the bias detection logic and provides a clean interface |
| for evaluating text samples. Implements AI BRIDGE bias constructs. |
| """ |
|
|
| |
| |
| COUNTER_STEREOTYPE_PATTERNS = { |
| Language.ENGLISH: [ |
| |
| (r'\b(father|dad|husband)\b.*(caregiver|nurtur|cook|clean|homemaker|stay.at.home)', |
| StereotypeCategory.FAMILY_ROLE, TargetGender.MALE), |
| (r'\b(mother|mom|wife)\b.*(breadwinner|provider|work.*(full.time|office)|career)', |
| StereotypeCategory.FAMILY_ROLE, TargetGender.FEMALE), |
| |
| (r'\b(female|woman|she)\b.*(engineer|mechanic|pilot|ceo|surgeon|firefighter)', |
| StereotypeCategory.PROFESSION, TargetGender.FEMALE), |
| (r'\b(male|man|he)\b.*(nurse|secretary|receptionist|kindergarten|nanny)', |
| StereotypeCategory.PROFESSION, TargetGender.MALE), |
| |
| (r'\b(she|her|woman|female)\b.*(lead|command|chief|director|president|boss)', |
| StereotypeCategory.LEADERSHIP, TargetGender.FEMALE), |
| ], |
| Language.SWAHILI: [ |
| |
| (r'\bbaba\b.+\b(anale[zl]a|anapika|anasafisha|anakaa\s+nyumbani)', |
| StereotypeCategory.FAMILY_ROLE, TargetGender.MALE), |
| (r'\bmama\b.+\b(anafanya\s+kazi\s+ofisi|ni\s+mkurugenzi|anaongoza)', |
| StereotypeCategory.FAMILY_ROLE, TargetGender.FEMALE), |
| |
| (r'\bmwanamke\b.+\b(mhandisi|rubani|fundi\s+wa\s+magari)', |
| StereotypeCategory.PROFESSION, TargetGender.FEMALE), |
| (r'\bmwanamume\b.+\b(muuguzi|mkunga|mlezi\s+wa\s+watoto)', |
| StereotypeCategory.PROFESSION, TargetGender.MALE), |
| ], |
| } |
|
|
| |
| DEROGATION_PATTERNS = { |
| Language.ENGLISH: [ |
| (r'\b(just|only|merely)\s+a\s+(woman|girl|female|housewife)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(woman|women|female|girl).*(can\'t|cannot|unable|incapable|shouldn\'t|could\s+never)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(women|woman)\s+(cannot|can\'t)\s+be\s+(good|great|effective)', |
| StereotypeCategory.LEADERSHIP, TargetGender.FEMALE), |
| (r'\b(like\s+a\s+girl|throw.like.a.girl|cry.like)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(too\s+emotional|hysterical|overreact)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(real\s+men\s+don\'t|man\s+up|be\s+a\s+man)', |
| StereotypeCategory.CAPABILITY, TargetGender.MALE), |
| ], |
| Language.SWAHILI: [ |
| (r'\b(tu|basi)\s+(mwanamke|msichana)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(mwanamke|msichana).*(hawezi|haiwezekani|dhaifu)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| (r'\b(kama\s+msichana|kama\s+mwanamke)', |
| StereotypeCategory.CAPABILITY, TargetGender.FEMALE), |
| ], |
| } |
|
|
| def __init__( |
| self, |
| rules_dir: Path = Path("rules"), |
| enable_ngeli_tracking: bool = True, |
| enable_context_checking: bool = True |
| ): |
| """ |
| Initialize the bias detector. |
| |
| Args: |
| rules_dir: Directory containing bias detection rules |
| enable_ngeli_tracking: Enable Swahili noun class tracking (default: True) |
| enable_context_checking: Enable context-aware correction (default: True) |
| """ |
| self.rules_loader = RulesLoader(rules_dir) |
| self._rules_cache: Dict[Language, List[Dict[str, str]]] = {} |
| self._compiled_patterns: Dict[Language, List[re.Pattern]] = {} |
| self._counter_stereotype_patterns: Dict[Language, List[tuple]] = {} |
| self._derogation_patterns: Dict[Language, List[tuple]] = {} |
| self.enable_ngeli_tracking = enable_ngeli_tracking |
| self.ngeli_tracker = NgeliTracker() if enable_ngeli_tracking else None |
|
|
| |
| self.enable_context_checking = enable_context_checking |
| self.context_checker = ContextChecker() if enable_context_checking else None |
|
|
| |
| self._compile_special_patterns() |
|
|
| def _compile_special_patterns(self) -> None: |
| """Compile counter-stereotype and derogation regex patterns.""" |
| for lang, patterns in self.COUNTER_STEREOTYPE_PATTERNS.items(): |
| self._counter_stereotype_patterns[lang] = [ |
| (re.compile(p[0], re.IGNORECASE), p[1], p[2]) for p in patterns |
| ] |
|
|
| for lang, patterns in self.DEROGATION_PATTERNS.items(): |
| self._derogation_patterns[lang] = [ |
| (re.compile(p[0], re.IGNORECASE), p[1], p[2]) for p in patterns |
| ] |
|
|
| def _detect_counter_stereotype(self, text: str, language: Language) -> Optional[Dict[str, Any]]: |
| """ |
| Detect counter-stereotype patterns in text. |
| |
| Counter-stereotypes challenge or contradict common gender stereotypes. |
| These should be preserved, not corrected. |
| """ |
| patterns = self._counter_stereotype_patterns.get(language, []) |
| for pattern, category, gender in patterns: |
| if pattern.search(text): |
| return { |
| 'bias_label': BiasLabel.COUNTER_STEREOTYPE, |
| 'stereotype_category': category, |
| 'target_gender': gender, |
| 'explicitness': Explicitness.EXPLICIT, |
| 'matched_pattern': pattern.pattern |
| } |
| return None |
|
|
| def _detect_derogation(self, text: str, language: Language) -> Optional[Dict[str, Any]]: |
| """ |
| Detect derogatory language patterns in text. |
| |
| Derogation is language that demeans or disparages a gender group. |
| """ |
| patterns = self._derogation_patterns.get(language, []) |
| for pattern, category, gender in patterns: |
| if pattern.search(text): |
| return { |
| 'bias_label': BiasLabel.DEROGATION, |
| 'stereotype_category': category, |
| 'target_gender': gender, |
| 'explicitness': Explicitness.EXPLICIT, |
| 'matched_pattern': pattern.pattern |
| } |
| return None |
|
|
| def detect_bias(self, text: str, language: Language) -> BiasDetectionResult: |
| """ |
| Detect bias in a text sample. |
| |
| Implements AI BRIDGE bias construct detection: |
| - stereotype: Reinforces common gender beliefs |
| - counter-stereotype: Challenges gender stereotypes (preserved, not corrected) |
| - derogation: Language that demeans a gender group |
| - neutral: No bias present |
| |
| Args: |
| text: Text to analyze for bias |
| language: Language of the text |
| |
| Returns: |
| BiasDetectionResult with detection results and AI BRIDGE classifications |
| |
| Raises: |
| BiasDetectionError: If detection fails |
| """ |
| try: |
| |
| derogation_result = self._detect_derogation(text, language) |
| if derogation_result: |
| return BiasDetectionResult( |
| text=text, |
| has_bias_detected=True, |
| detected_edits=[{ |
| 'from': text, |
| 'to': '[DEROGATORY - requires manual review]', |
| 'severity': 'high', |
| 'bias_type': 'derogation' |
| }], |
| bias_label=BiasLabel.DEROGATION, |
| stereotype_category=derogation_result['stereotype_category'], |
| target_gender=derogation_result['target_gender'], |
| explicitness=Explicitness.EXPLICIT, |
| confidence=0.9 |
| ) |
|
|
| |
| counter_result = self._detect_counter_stereotype(text, language) |
| if counter_result: |
| return BiasDetectionResult( |
| text=text, |
| has_bias_detected=False, |
| detected_edits=[], |
| bias_label=BiasLabel.COUNTER_STEREOTYPE, |
| stereotype_category=counter_result['stereotype_category'], |
| target_gender=counter_result['target_gender'], |
| explicitness=Explicitness.EXPLICIT, |
| confidence=0.85 |
| ) |
|
|
| |
| rules = self._get_rules(language) |
| patterns = self._get_compiled_patterns(language) |
|
|
| detected_edits = [] |
| detected_categories = [] |
| detected_genders = [] |
| skipped_edits = [] |
|
|
| for rule, pattern in zip(rules, patterns): |
| if pattern.search(text): |
| |
| if rule['biased'] == rule['neutral_primary']: |
| continue |
|
|
| biased_term = rule['biased'] |
| avoid_when = rule.get('avoid_when', '') |
| constraints = rule.get('constraints', '') |
|
|
| |
| if self.context_checker and (avoid_when or constraints): |
| context_result = self.context_checker.check_context( |
| text=text, |
| biased_term=biased_term, |
| avoid_when=avoid_when, |
| constraints=constraints |
| ) |
|
|
| if not context_result.should_correct: |
| |
| skipped_edits.append({ |
| 'term': biased_term, |
| 'reason': context_result.reason, |
| 'blocked_by': context_result.blocked_by.value if context_result.blocked_by else None, |
| 'confidence': context_result.confidence |
| }) |
| logger.debug( |
| "Skipped correction for '%s': %s", |
| biased_term, context_result.reason |
| ) |
| continue |
|
|
| edit = { |
| 'from': rule['biased'], |
| 'to': rule['neutral_primary'], |
| 'severity': rule['severity'], |
| 'bias_type': rule.get('bias_label', 'stereotype'), |
| 'stereotype_category': rule.get('stereotype_category', 'profession') |
| } |
|
|
| |
| if language == Language.SWAHILI and self.ngeli_tracker: |
| ngeli = rule.get('ngeli', '') |
| if ngeli: |
| edit['ngeli'] = ngeli |
| self.ngeli_tracker.track_noun(rule['biased']) |
|
|
| detected_edits.append(edit) |
|
|
| |
| cat = rule.get('stereotype_category', 'profession') |
| if cat: |
| detected_categories.append(cat) |
|
|
| |
| primary_category = None |
| if detected_categories: |
| try: |
| primary_category = StereotypeCategory(detected_categories[0]) |
| except (ValueError, KeyError): |
| primary_category = StereotypeCategory.PROFESSION |
|
|
| |
| ngeli_analysis = None |
| if language == Language.SWAHILI and self.ngeli_tracker: |
| ngeli_analysis = self.ngeli_tracker.analyze_text(text) |
|
|
| |
| has_bias = len(detected_edits) > 0 |
| result = BiasDetectionResult( |
| text=text, |
| has_bias_detected=has_bias, |
| detected_edits=detected_edits, |
| bias_label=BiasLabel.STEREOTYPE if has_bias else BiasLabel.NEUTRAL, |
| stereotype_category=primary_category, |
| target_gender=None, |
| explicitness=Explicitness.EXPLICIT if has_bias else None, |
| confidence=0.85 if has_bias else 0.7 |
| ) |
|
|
| |
| if ngeli_analysis: |
| result._ngeli_analysis = ngeli_analysis |
|
|
| |
| if skipped_edits: |
| result._skipped_edits = skipped_edits |
|
|
| return result |
|
|
| except Exception as e: |
| raise BiasDetectionError(f"Failed to detect bias in text: {e}") from e |
| |
| def _get_rules(self, language: Language) -> List[Dict[str, str]]: |
| """Get rules for a language, loading and caching if necessary.""" |
| if language not in self._rules_cache: |
| try: |
| self._rules_cache[language] = self.rules_loader.load_rules(language) |
| except DataLoadError as e: |
| raise BiasDetectionError(f"Failed to load rules for {language}: {e}") from e |
| |
| return self._rules_cache[language] |
| |
| def _get_compiled_patterns(self, language: Language) -> List[re.Pattern]: |
| """Get compiled regex patterns for a language, compiling and caching if necessary.""" |
| if language not in self._compiled_patterns: |
| rules = self._get_rules(language) |
| patterns = [] |
|
|
| for rule in rules: |
| biased_term = rule['biased'] |
| pos = rule.get('pos', 'noun') |
|
|
| |
| if ' ' in biased_term: |
| |
| |
| pattern = r'\b' + re.escape(biased_term) + r'\b' |
| elif pos == 'suffix' or len(biased_term) <= 4: |
| |
| |
| |
| pattern = r'\b' + re.escape(biased_term) + r'\b' |
| else: |
| |
| pattern = r'\b' + re.escape(biased_term) + r'\b' |
|
|
| try: |
| compiled_pattern = re.compile(pattern, re.IGNORECASE) |
| patterns.append(compiled_pattern) |
| except re.error as e: |
| |
| logger.warning( |
| "Invalid regex pattern for '%s': %s", |
| biased_term, e |
| ) |
| continue |
|
|
| self._compiled_patterns[language] = patterns |
|
|
| return self._compiled_patterns[language] |
| |
| def get_ngeli_statistics(self) -> Optional[Dict[str, int]]: |
| """ |
| Get noun class statistics from tracked Swahili nouns. |
| |
| Returns: |
| Dictionary mapping noun class codes to counts, or None if tracking disabled |
| """ |
| if self.ngeli_tracker: |
| return self.ngeli_tracker.get_statistics() |
| return None |
|
|
| def clear_cache(self) -> None: |
| """Clear the rules and patterns cache.""" |
| self._rules_cache.clear() |
| self._compiled_patterns.clear() |
|
|
|
|
| class BaselineDetector: |
| """ |
| Simple baseline detector for comparison purposes. |
| |
| Uses naive gendered term detection without sophisticated rules. |
| """ |
| |
| def __init__(self): |
| """Initialize the baseline detector.""" |
| self.gendered_terms = { |
| Language.ENGLISH: ['he', 'she', 'his', 'her', 'him', 'man', 'woman', 'male', 'female', 'boy', 'girl'], |
| Language.SWAHILI: ['yeye', 'mwanaume', 'mwanamke', 'mvulana', 'msichana', 'baba', 'mama'] |
| } |
| |
| def detect_bias(self, text: str, language: Language) -> BiasDetectionResult: |
| """ |
| Detect bias using simple gendered term matching. |
| |
| Args: |
| text: Text to analyze |
| language: Language of the text |
| |
| Returns: |
| BiasDetectionResult with detection results |
| """ |
| text_lower = text.lower() |
| terms = self.gendered_terms.get(language, []) |
| |
| detected_terms = [] |
| for term in terms: |
| if term in text_lower: |
| detected_terms.append({ |
| 'from': term, |
| 'to': '[gendered_term]', |
| 'severity': 'baseline' |
| }) |
| |
| return BiasDetectionResult( |
| text=text, |
| has_bias_detected=len(detected_terms) > 0, |
| detected_edits=detected_terms |
| ) |