File size: 1,863 Bytes
d7d1833 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | #!/usr/bin/env python3
import csv
from pathlib import Path
from config import lexicon_filename, ground_truth_filename
def load_rules(lang):
"""Load bias detection rules."""
rules = []
rules_path = Path("rules") / lexicon_filename(lang)
with open(rules_path, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
if row.get('biased'):
rules.append(row['biased'].lower())
return rules
def detect_bias_simple(text, lang):
"""Simple bias detection using rules."""
rules = load_rules(lang)
text_lower = text.lower()
return any(rule in text_lower for rule in rules)
def analyze_failures():
"""Analyze false negatives."""
for lang in ['en', 'sw', 'ha', 'yo', 'ig']:
print(f"\n=== {lang.upper()} FAILURE ANALYSIS ===")
# Load ground truth
samples = []
gt_path = Path("eval") / ground_truth_filename(lang)
with open(gt_path, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
samples.append({
'text': row['text'].strip('"'),
'expected': row['has_bias'].lower() == 'true'
})
# Find false negatives
false_negatives = []
for sample in samples:
if sample['expected']:
detected = detect_bias_simple(sample['text'], lang)
if not detected:
false_negatives.append(sample['text'])
print(f"False Negatives: {len(false_negatives)}")
# Show top 5
for i, text in enumerate(false_negatives[:5], 1):
print(f"{i}. \"{text}\"")
if len(false_negatives) > 5:
print(f"... and {len(false_negatives) - 5} more")
if __name__ == "__main__":
analyze_failures() |