File size: 2,917 Bytes
d7d1833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
"""
Simple baseline gender bias detector using basic keyword matching.
Used as sanity check baseline for comparison with rule-based approach.
"""

import csv
import re
from typing import List, Tuple, Dict

class SimpleBaselineDetector:
    """Basic keyword-based bias detector as baseline"""
    
    def __init__(self):
        # Simple gendered keywords for baseline detection
        self.gendered_keywords = {
            'en': ['he', 'she', 'his', 'her', 'him', 'chairman', 'waitress', 'policeman', 'businessman'],
            'sw': ['yeye', 'mwanaume', 'mwanamke', 'baba', 'mama'],
            'ha': ['shi', 'ita', 'namiji', 'mace'],
            'ig': ['nwoke', 'nwanyi', 'ya', 'o'],
            'yo': ['ọkunrin', 'obinrin', 'o', 'oun']
        }
    
    def detect_bias(self, text: str, language: str) -> bool:
        """Simple detection: return True if any gendered keyword found"""
        if language not in self.gendered_keywords:
            return False
        
        text_lower = text.lower()
        keywords = self.gendered_keywords[language]
        
        for keyword in keywords:
            if re.search(r'\b' + keyword + r'\b', text_lower):
                return True
        return False

def evaluate_baseline(ground_truth_file: str, language: str) -> Dict:
    """Evaluate baseline detector on ground truth"""
    detector = SimpleBaselineDetector()
    
    tp = fp = tn = fn = 0
    
    with open(ground_truth_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            text = row['text'].strip('"')
            actual_bias = row['has_bias'] == 'true'
            predicted_bias = detector.detect_bias(text, language)
            
            if actual_bias and predicted_bias:
                tp += 1
            elif not actual_bias and predicted_bias:
                fp += 1
            elif not actual_bias and not predicted_bias:
                tn += 1
            else:  # actual_bias and not predicted_bias
                fn += 1
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'language': language,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'tp': tp,
        'fp': fp,
        'tn': tn,
        'fn': fn
    }

if __name__ == "__main__":
    languages = ['en', 'sw', 'ha', 'ig', 'yo']
    
    print("Baseline Evaluation Results:")
    print("=" * 50)
    
    for lang in languages:
        try:
            results = evaluate_baseline(f'ground_truth_{lang}.csv', lang)
            print(f"{lang.upper()}: F1={results['f1']:.3f}, P={results['precision']:.3f}, R={results['recall']:.3f}")
        except FileNotFoundError:
            print(f"{lang.upper()}: File not found")