| """ |
| ML-based bias detector using transformer models for African languages |
| """ |
| import re |
| from typing import Dict, List, Optional |
| from .models import BiasDetectionResult, Language |
|
|
| class MLBiasDetector: |
| """Machine learning bias detector using pre-trained models""" |
| |
| def __init__(self): |
| self.models = self._load_models() |
| |
| def _load_models(self) -> Dict[Language, str]: |
| """Load appropriate models for each language""" |
| return { |
| Language.ENGLISH: "distilbert-base-uncased", |
| Language.SWAHILI: "xlm-roberta-base", |
| Language.FRENCH: "xlm-roberta-base", |
| Language.GIKUYU: "xlm-roberta-base" |
| } |
| |
| def detect_bias(self, text: str, language: Language) -> BiasDetectionResult: |
| """Detect bias using ML model (simplified implementation)""" |
| |
| bias_score = self._predict_bias_score(text, language) |
| |
| if bias_score > 0.7: |
| edits = self._extract_biased_terms(text, language) |
| return BiasDetectionResult( |
| text=text, |
| has_bias_detected=True, |
| detected_edits=edits |
| ) |
|
|
| return BiasDetectionResult( |
| text=text, |
| has_bias_detected=False, |
| detected_edits=[] |
| ) |
| |
| def _predict_bias_score(self, text: str, language: Language) -> float: |
| """Simulate ML model bias prediction""" |
| |
| bias_patterns = { |
| Language.ENGLISH: ['chairman', 'businessman', 'policeman', 'fireman'], |
| Language.SWAHILI: ['mwanaume', 'bwana'], |
| Language.FRENCH: ['pr茅sident', 'directeur', 'policier'], |
| Language.GIKUYU: ['m农nd农 m农r农me', 'm农r农me'] |
| } |
| |
| patterns = bias_patterns.get(language, []) |
| text_lower = text.lower() |
| |
| |
| matches = sum(1 for pattern in patterns if pattern in text_lower) |
| return min(matches * 0.4, 1.0) |
| |
| def _extract_biased_terms(self, text: str, language: Language) -> List[Dict[str, str]]: |
| """Extract biased terms and suggest corrections""" |
| corrections = { |
| Language.ENGLISH: { |
| 'chairman': 'chair', |
| 'businessman': 'businessperson', |
| 'policeman': 'police officer', |
| 'fireman': 'firefighter' |
| }, |
| Language.SWAHILI: { |
| 'mwanaume': 'mtu', |
| 'bwana': 'mkuu' |
| } |
| } |
| |
| lang_corrections = corrections.get(language, {}) |
| edits = [] |
| |
| for biased_term, correction in lang_corrections.items(): |
| if biased_term.lower() in text.lower(): |
| edits.append({ |
| 'from': biased_term, |
| 'to': correction, |
| 'severity': 'replace' |
| }) |
| |
| return edits |