File size: 5,820 Bytes
d7d1833 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | """
Simplified data models for bias evaluation framework without external dependencies.
This module defines the data structures used throughout the evaluation system
using only standard library components.
AI BRIDGE Compliance: Implements bias constructs from the AI BRIDGE guidelines
including stereotype, counter-stereotype, derogation, and neutral classifications.
"""
from enum import Enum
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, field
class BiasCategory(str, Enum):
"""Enumeration of bias categories for classification (detection mechanism)."""
OCCUPATION = "occupation"
PRONOUN_ASSUMPTION = "pronoun_assumption"
PRONOUN_GENERIC = "pronoun_generic"
HONORIFIC = "honorific"
MORPHOLOGY = "morphology"
NONE = "none"
STEREOTYPE="stereotype"
class BiasLabel(str, Enum):
"""
AI BRIDGE bias label classification.
Defines the type of representational bias present in text:
- stereotype: Reinforces common, often oversimplified beliefs about a group
- counter_stereotype: Challenges or contradicts common stereotypes
- derogation: Language that demeans or disparages a group
- neutral: No bias or stereotype present
"""
STEREOTYPE = "stereotype"
COUNTER_STEREOTYPE = "counter-stereotype"
DEROGATION = "derogation"
NEUTRAL = "neutral"
class StereotypeCategory(str, Enum):
"""
AI BRIDGE stereotype category classification.
Thematic areas where gender stereotypes commonly manifest.
"""
PROFESSION = "profession"
FAMILY_ROLE = "family_role"
LEADERSHIP = "leadership"
EDUCATION = "education"
RELIGION_CULTURE = "religion_culture"
PROVERB_IDIOM = "proverb_idiom"
DAILY_LIFE = "daily_life"
APPEARANCE = "appearance"
CAPABILITY = "capability"
NONE = "none"
class TargetGender(str, Enum):
"""
AI BRIDGE target gender classification.
Who is being talked about, referenced, or implied in the text.
"""
FEMALE = "female"
MALE = "male"
NEUTRAL = "neutral"
MIXED = "mixed"
NONBINARY = "nonbinary"
UNKNOWN = "unknown"
class Explicitness(str, Enum):
"""
AI BRIDGE explicitness classification.
Whether the bias is directly stated or implied through context.
"""
EXPLICIT = "explicit"
IMPLICIT = "implicit"
class Sentiment(str, Enum):
"""Emotional tone toward the gendered referent."""
POSITIVE = "positive"
NEUTRAL = "neutral"
NEGATIVE = "negative"
class SafetyFlag(str, Enum):
"""Content safety classification."""
SAFE = "safe"
SENSITIVE = "sensitive"
REJECT = "reject"
class QAStatus(str, Enum):
"""Quality assurance status for annotations."""
GOLD = "gold"
PASSED = "passed"
NEEDS_REVIEW = "needs_review"
REJECTED = "rejected"
class Language(str, Enum):
"""Supported languages for bias detection."""
ENGLISH = "en"
SWAHILI = "sw"
FRENCH = "fr"
GIKUYU = "ki"
@dataclass
class GroundTruthSample:
"""
Single ground truth test case for evaluation.
Supports both legacy 4-field format and full AI BRIDGE 29-field format.
"""
# Core required fields
text: str
has_bias: bool
bias_category: BiasCategory
expected_correction: str
# AI BRIDGE extended fields (optional for backward compatibility)
id: Optional[str] = None
language: Optional[str] = None
script: Optional[str] = None
country: Optional[str] = None
region_dialect: Optional[str] = None
source_type: Optional[str] = None
source_ref: Optional[str] = None
collection_date: Optional[str] = None
translation: Optional[str] = None
domain: Optional[str] = None
topic: Optional[str] = None
theme: Optional[str] = None
sensitive_characteristic: Optional[str] = None
# AI BRIDGE bias annotation fields
target_gender: Optional[TargetGender] = None
bias_label: Optional[BiasLabel] = None
stereotype_category: Optional[StereotypeCategory] = None
explicitness: Optional[Explicitness] = None
bias_severity: Optional[int] = None # 1-3 scale
sentiment_toward_referent: Optional[Sentiment] = None
device: Optional[str] = None # metaphor, proverb, sarcasm, etc.
# Quality and safety fields
safety_flag: Optional[SafetyFlag] = None
pii_removed: Optional[bool] = None
annotator_id: Optional[str] = None
qa_status: Optional[QAStatus] = None
approver_id: Optional[str] = None
cohen_kappa: Optional[float] = None
notes: Optional[str] = None
eval_split: Optional[str] = None # train, validation, test
@dataclass
class BiasDetectionResult:
"""Result of bias detection on a single text sample."""
text: str
has_bias_detected: bool
detected_edits: List[Dict[str, str]]
# AI BRIDGE extended detection results
bias_label: Optional[BiasLabel] = None
stereotype_category: Optional[StereotypeCategory] = None
target_gender: Optional[TargetGender] = None
explicitness: Optional[Explicitness] = None
confidence: Optional[float] = None
@dataclass
class EvaluationMetrics:
"""Evaluation metrics for bias detection performance."""
precision: float
recall: float
f1_score: float
true_positives: int
false_positives: int
false_negatives: int
true_negatives: int
@dataclass
class LanguageEvaluationResult:
"""Complete evaluation results for a single language."""
language: Language
overall_metrics: EvaluationMetrics
category_metrics: Dict[BiasCategory, EvaluationMetrics]
total_samples: int
@dataclass
class FailureCase:
"""Analysis of a failed prediction case."""
failure_type: str
input_text: str
expected: bool
predicted: bool
category: BiasCategory
diagnosis: str
language: Language |