File size: 5,820 Bytes
d7d1833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""
Simplified data models for bias evaluation framework without external dependencies.

This module defines the data structures used throughout the evaluation system
using only standard library components.

AI BRIDGE Compliance: Implements bias constructs from the AI BRIDGE guidelines
including stereotype, counter-stereotype, derogation, and neutral classifications.
"""
from enum import Enum
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field


class BiasCategory(str, Enum):
    """Enumeration of bias categories for classification (detection mechanism)."""
    OCCUPATION = "occupation"
    PRONOUN_ASSUMPTION = "pronoun_assumption"
    PRONOUN_GENERIC = "pronoun_generic"
    HONORIFIC = "honorific"
    MORPHOLOGY = "morphology"
    NONE = "none"
    STEREOTYPE="stereotype"


class BiasLabel(str, Enum):
    """
    AI BRIDGE bias label classification.

    Defines the type of representational bias present in text:
    - stereotype: Reinforces common, often oversimplified beliefs about a group
    - counter_stereotype: Challenges or contradicts common stereotypes
    - derogation: Language that demeans or disparages a group
    - neutral: No bias or stereotype present
    """
    STEREOTYPE = "stereotype"
    COUNTER_STEREOTYPE = "counter-stereotype"
    DEROGATION = "derogation"
    NEUTRAL = "neutral"


class StereotypeCategory(str, Enum):
    """
    AI BRIDGE stereotype category classification.

    Thematic areas where gender stereotypes commonly manifest.
    """
    PROFESSION = "profession"
    FAMILY_ROLE = "family_role"
    LEADERSHIP = "leadership"
    EDUCATION = "education"
    RELIGION_CULTURE = "religion_culture"
    PROVERB_IDIOM = "proverb_idiom"
    DAILY_LIFE = "daily_life"
    APPEARANCE = "appearance"
    CAPABILITY = "capability"
    NONE = "none"


class TargetGender(str, Enum):
    """
    AI BRIDGE target gender classification.

    Who is being talked about, referenced, or implied in the text.
    """
    FEMALE = "female"
    MALE = "male"
    NEUTRAL = "neutral"
    MIXED = "mixed"
    NONBINARY = "nonbinary"
    UNKNOWN = "unknown"


class Explicitness(str, Enum):
    """
    AI BRIDGE explicitness classification.

    Whether the bias is directly stated or implied through context.
    """
    EXPLICIT = "explicit"
    IMPLICIT = "implicit"


class Sentiment(str, Enum):
    """Emotional tone toward the gendered referent."""
    POSITIVE = "positive"
    NEUTRAL = "neutral"
    NEGATIVE = "negative"


class SafetyFlag(str, Enum):
    """Content safety classification."""
    SAFE = "safe"
    SENSITIVE = "sensitive"
    REJECT = "reject"


class QAStatus(str, Enum):
    """Quality assurance status for annotations."""
    GOLD = "gold"
    PASSED = "passed"
    NEEDS_REVIEW = "needs_review"
    REJECTED = "rejected"


class Language(str, Enum):
    """Supported languages for bias detection."""
    ENGLISH = "en"
    SWAHILI = "sw"
    FRENCH = "fr"
    GIKUYU = "ki"


@dataclass
class GroundTruthSample:
    """
    Single ground truth test case for evaluation.

    Supports both legacy 4-field format and full AI BRIDGE 29-field format.
    """
    # Core required fields
    text: str
    has_bias: bool
    bias_category: BiasCategory
    expected_correction: str

    # AI BRIDGE extended fields (optional for backward compatibility)
    id: Optional[str] = None
    language: Optional[str] = None
    script: Optional[str] = None
    country: Optional[str] = None
    region_dialect: Optional[str] = None
    source_type: Optional[str] = None
    source_ref: Optional[str] = None
    collection_date: Optional[str] = None
    translation: Optional[str] = None
    domain: Optional[str] = None
    topic: Optional[str] = None
    theme: Optional[str] = None
    sensitive_characteristic: Optional[str] = None

    # AI BRIDGE bias annotation fields
    target_gender: Optional[TargetGender] = None
    bias_label: Optional[BiasLabel] = None
    stereotype_category: Optional[StereotypeCategory] = None
    explicitness: Optional[Explicitness] = None
    bias_severity: Optional[int] = None  # 1-3 scale
    sentiment_toward_referent: Optional[Sentiment] = None
    device: Optional[str] = None  # metaphor, proverb, sarcasm, etc.

    # Quality and safety fields
    safety_flag: Optional[SafetyFlag] = None
    pii_removed: Optional[bool] = None
    annotator_id: Optional[str] = None
    qa_status: Optional[QAStatus] = None
    approver_id: Optional[str] = None
    cohen_kappa: Optional[float] = None
    notes: Optional[str] = None
    eval_split: Optional[str] = None  # train, validation, test


@dataclass
class BiasDetectionResult:
    """Result of bias detection on a single text sample."""
    text: str
    has_bias_detected: bool
    detected_edits: List[Dict[str, str]]

    # AI BRIDGE extended detection results
    bias_label: Optional[BiasLabel] = None
    stereotype_category: Optional[StereotypeCategory] = None
    target_gender: Optional[TargetGender] = None
    explicitness: Optional[Explicitness] = None
    confidence: Optional[float] = None


@dataclass
class EvaluationMetrics:
    """Evaluation metrics for bias detection performance."""
    precision: float
    recall: float
    f1_score: float
    true_positives: int
    false_positives: int
    false_negatives: int
    true_negatives: int


@dataclass
class LanguageEvaluationResult:
    """Complete evaluation results for a single language."""
    language: Language
    overall_metrics: EvaluationMetrics
    category_metrics: Dict[BiasCategory, EvaluationMetrics]
    total_samples: int


@dataclass
class FailureCase:
    """Analysis of a failed prediction case."""
    failure_type: str
    input_text: str
    expected: bool
    predicted: bool
    category: BiasCategory
    diagnosis: str
    language: Language