File size: 13,439 Bytes
d7d1833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
"""
Data loading utilities for bias evaluation framework.

This module handles all file I/O operations with proper error handling and validation.
Supports both legacy 4-field format and full AI BRIDGE 29-field schema.
Includes automatic lexicon validation on load.
"""
import csv
import json
from pathlib import Path
from typing import List, Dict, Any, Optional

from .models import (
    GroundTruthSample, Language, BiasCategory, BiasLabel,
    StereotypeCategory, TargetGender, Explicitness, Sentiment,
    SafetyFlag, QAStatus
)
from .lexicon_validator import (
    LexiconValidator, ValidationReport, LexiconValidationError,
    validate_lexicon_on_load
)
from config import lexicon_filename, ground_truth_filename


class DataLoadError(Exception):
    """Custom exception for data loading errors."""
    pass


class GroundTruthLoader:
    """Handles loading and validation of ground truth datasets."""
    
    def __init__(self, data_dir: Path = Path("eval")):
        """
        Initialize the ground truth loader.
        
        Args:
            data_dir: Directory containing ground truth files
        """
        self.data_dir = data_dir
    
    def load_ground_truth(self, language: Language) -> List[GroundTruthSample]:
        """
        Load ground truth samples for a specific language.
        
        Args:
            language: Language to load ground truth for
            
        Returns:
            List of validated ground truth samples
            
        Raises:
            DataLoadError: If file cannot be loaded or data is invalid
        """
        file_path = self._get_ground_truth_path(language)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                samples = []
                
                for row_num, row in enumerate(reader, start=2):  # Start at 2 for header
                    try:
                        sample = self._parse_ground_truth_row(row)
                        samples.append(sample)
                    except Exception as e:
                        raise DataLoadError(
                            f"Invalid data in {file_path} at row {row_num}: {e}"
                        ) from e
                        
                return samples
                
        except FileNotFoundError:
            raise DataLoadError(f"Ground truth file not found: {file_path}")
        except Exception as e:
            raise DataLoadError(f"Failed to load ground truth from {file_path}: {e}") from e
    
    def _get_ground_truth_path(self, language: Language) -> Path:
        """Get the file path for ground truth data."""
        filename = ground_truth_filename(language.value)
        return self.data_dir / filename
    
    def _parse_ground_truth_row(self, row: Dict[str, str]) -> GroundTruthSample:
        """
        Parse a single CSV row into a GroundTruthSample.

        Supports both legacy 4-field format and full AI BRIDGE schema.
        """
        # Core required fields
        text = row['text'].strip('"')
        has_bias = row['has_bias'].lower() == 'true'
        bias_category = BiasCategory(row['bias_category'])
        expected_correction = row.get('expected_correction', '')

        # Check if this is AI BRIDGE extended format
        is_extended = 'target_gender' in row or 'bias_label' in row

        if is_extended:
            return GroundTruthSample(
                text=text,
                has_bias=has_bias,
                bias_category=bias_category,
                expected_correction=expected_correction,
                # AI BRIDGE metadata fields
                id=row.get('id'),
                language=row.get('language'),
                script=row.get('script'),
                country=row.get('country'),
                region_dialect=row.get('region_dialect'),
                source_type=row.get('source_type'),
                source_ref=row.get('source_ref'),
                collection_date=row.get('collection_date'),
                translation=row.get('translation'),
                domain=row.get('domain'),
                topic=row.get('topic'),
                theme=row.get('theme'),
                sensitive_characteristic=row.get('sensitive_characteristic'),
                # AI BRIDGE bias annotation fields
                target_gender=self._parse_enum(row.get('target_gender'), TargetGender),
                bias_label=self._parse_enum(row.get('bias_label'), BiasLabel),
                stereotype_category=self._parse_enum(row.get('stereotype_category'), StereotypeCategory),
                explicitness=self._parse_enum(row.get('explicitness'), Explicitness),
                bias_severity=self._parse_int(row.get('bias_severity')),
                sentiment_toward_referent=self._parse_enum(row.get('sentiment_toward_referent'), Sentiment),
                device=row.get('device'),
                # Quality and safety fields
                safety_flag=self._parse_enum(row.get('safety_flag'), SafetyFlag),
                pii_removed=self._parse_bool(row.get('pii_removed')),
                annotator_id=row.get('annotator_id'),
                qa_status=self._parse_enum(row.get('qa_status'), QAStatus),
                approver_id=row.get('approver_id'),
                cohen_kappa=self._parse_float(row.get('cohen_kappa')),
                notes=row.get('notes'),
                eval_split=row.get('eval_split')
            )
        else:
            # Legacy 4-field format
            return GroundTruthSample(
                text=text,
                has_bias=has_bias,
                bias_category=bias_category,
                expected_correction=expected_correction
            )

    def _parse_enum(self, value: Optional[str], enum_class) -> Optional[Any]:
        """Parse a string value into an enum, returning None if invalid."""
        if not value or value.upper() in ('', 'NEEDS_ANNOTATION', 'N/A', 'NONE'):
            return None
        try:
            # Handle both value and name matching
            value_lower = value.lower().replace('_', '-')
            for member in enum_class:
                if member.value.lower() == value_lower or member.name.lower() == value_lower:
                    return member
            return None
        except (ValueError, KeyError):
            return None

    def _parse_int(self, value: Optional[str]) -> Optional[int]:
        """Parse a string to int, returning None if invalid."""
        if not value or value in ('', 'N/A'):
            return None
        try:
            return int(value)
        except ValueError:
            return None

    def _parse_float(self, value: Optional[str]) -> Optional[float]:
        """Parse a string to float, returning None if invalid."""
        if not value or value in ('', 'N/A'):
            return None
        try:
            return float(value)
        except ValueError:
            return None

    def _parse_bool(self, value: Optional[str]) -> Optional[bool]:
        """Parse a string to bool, returning None if invalid."""
        if not value or value in ('', 'N/A'):
            return None
        return value.lower() in ('true', '1', 'yes')


class RulesLoader:
    """Handles loading bias detection rules from CSV files with validation."""

    def __init__(self, rules_dir: Path = Path("rules"), validate: bool = True,
                 strict_validation: bool = False):
        """
        Initialize the rules loader.

        Args:
            rules_dir: Directory containing rule files
            validate: If True, validates lexicons before loading
            strict_validation: If True, warnings become errors during validation
        """
        self.rules_dir = rules_dir
        self.validate = validate
        self.strict_validation = strict_validation
        self._validator = LexiconValidator(strict_mode=strict_validation)
        self._validation_reports: Dict[str, ValidationReport] = {}

    def get_validation_report(self, language: Language) -> Optional[ValidationReport]:
        """Get the validation report for a language if available."""
        return self._validation_reports.get(language.value)

    def load_rules(self, language: Language) -> List[Dict[str, str]]:
        """
        Load bias detection rules for a specific language.

        Args:
            language: Language to load rules for

        Returns:
            List of rule dictionaries with AI BRIDGE extended fields

        Raises:
            DataLoadError: If rules cannot be loaded
            LexiconValidationError: If validation fails (when validate=True)
        """
        file_path = self._get_rules_path(language)

        # Validate lexicon before loading
        if self.validate:
            report = self._validator.validate_file(file_path)
            self._validation_reports[language.value] = report

            if not report.is_valid:
                # Log validation issues
                print(f"\n⚠️  Lexicon validation issues for {language.value}:")
                for issue in report.issues:
                    if issue.severity.value == "error":
                        print(f"   ❌ Row {issue.row_number}: {issue.message}")

                raise LexiconValidationError(report)

            elif report.warning_count > 0:
                print(f"\n⚠️  Lexicon warnings for {language.value}: {report.warning_count} warnings")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                rules = []

                for row in reader:
                    # Include rules with biased term (neutral_primary can be empty for deletion patterns)
                    if row.get('biased'):
                        rule = {
                            'biased': row['biased'],
                            'neutral_primary': row.get('neutral_primary', ''),
                            'severity': row.get('severity', 'replace'),
                            'pos': row.get('pos', 'noun'),
                            'tags': row.get('tags', ''),
                            # AI BRIDGE extended fields
                            'bias_label': row.get('bias_label', 'stereotype'),
                            'stereotype_category': row.get('stereotype_category', 'profession'),
                            'explicitness': row.get('explicitness', 'explicit'),
                            # Language-specific fields
                            'ngeli': row.get('ngeli', ''),
                            'number': row.get('number', ''),
                            'requires_agreement': row.get('requires_agreement', 'false'),
                            'scope': row.get('scope', ''),
                            'register': row.get('register', 'formal'),
                        }
                        rules.append(rule)

                return rules

        except FileNotFoundError:
            raise DataLoadError(f"Rules file not found: {file_path}")
        except Exception as e:
            raise DataLoadError(f"Failed to load rules from {file_path}: {e}") from e
    
    def _get_rules_path(self, language: Language) -> Path:
        """Get the file path for rules data."""
        filename = lexicon_filename(language.value)
        return self.rules_dir / filename


class ResultsWriter:
    """Handles writing evaluation results to files."""
    
    def __init__(self, results_dir: Path = Path("eval/results")):
        """
        Initialize the results writer.
        
        Args:
            results_dir: Directory to write results to
        """
        self.results_dir = results_dir
        self.results_dir.mkdir(parents=True, exist_ok=True)
    
    def write_csv_report(self, results: List[Any], filename: str) -> Path:
        """
        Write evaluation results to CSV file.
        
        Args:
            results: List of result dictionaries
            filename: Name of output file
            
        Returns:
            Path to written file
            
        Raises:
            DataLoadError: If file cannot be written
        """
        file_path = self.results_dir / filename
        
        try:
            with open(file_path, 'w', newline='', encoding='utf-8') as f:
                if results:
                    writer = csv.DictWriter(f, fieldnames=results[0].keys())
                    writer.writeheader()
                    writer.writerows(results)
            
            return file_path
            
        except Exception as e:
            raise DataLoadError(f"Failed to write CSV report to {file_path}: {e}") from e
    
    def write_json_report(self, data: Dict[str, Any], filename: str) -> Path:
        """
        Write data to JSON file.
        
        Args:
            data: Data to write
            filename: Name of output file
            
        Returns:
            Path to written file
            
        Raises:
            DataLoadError: If file cannot be written
        """
        file_path = self.results_dir / filename
        
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2, ensure_ascii=False)
            
            return file_path
            
        except Exception as e:
            raise DataLoadError(f"Failed to write JSON report to {file_path}: {e}") from e