juakazike commited on
Commit
14f35c3
·
verified ·
1 Parent(s): d7d1833

update title

Browse files
Files changed (1) hide show
  1. app.py +414 -414
app.py CHANGED
@@ -1,414 +1,414 @@
1
- #!/usr/bin/env python3
2
- """
3
- JuaKazi Gender Bias Detection and Correction - Testing Interface
4
- User-friendly web UI for non-technical experts to test the bias detection and correction model
5
- """
6
-
7
- import streamlit as st
8
- import pandas as pd
9
- import sys
10
- from pathlib import Path
11
- from io import StringIO
12
-
13
- # Add parent directory to path for imports
14
- BASE_DIR = Path(__file__).resolve().parent.parent
15
- sys.path.insert(0, str(BASE_DIR))
16
-
17
- from eval.bias_detector import BiasDetector
18
- from eval.models import Language
19
-
20
- # Page configuration
21
- st.set_page_config(
22
- page_title="JuaKazi Bias Detection and Correction Testing",
23
- layout="wide",
24
- initial_sidebar_state="collapsed"
25
- )
26
-
27
- # Language mapping for dropdown
28
- LANGUAGE_MAP = {
29
- "English": Language.ENGLISH,
30
- "Swahili": Language.SWAHILI,
31
- "French": Language.FRENCH,
32
- "Gikuyu (Kikuyu)": Language.GIKUYU
33
- }
34
-
35
- LANGUAGE_CODES = {
36
- "English": "en",
37
- "Swahili": "sw",
38
- "French": "fr",
39
- "Gikuyu (Kikuyu)": "ki"
40
- }
41
-
42
- # Initialize detector with caching
43
- @st.cache_resource
44
- def get_detector():
45
- """Initialize BiasDetector once and cache it"""
46
- return BiasDetector()
47
-
48
- # Main title
49
- st.title("JuaKazi Gender Bias Detection and Correction - Testing Interface")
50
- st.markdown("**For non-technical experts:** Test individual texts or batch process files to detect and correct gender bias")
51
- st.markdown("---")
52
-
53
- # Initialize detector
54
- try:
55
- detector = get_detector()
56
- except Exception as e:
57
- st.error(f"Failed to initialize bias detector: {e}")
58
- st.stop()
59
-
60
- # Create tabs
61
- tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"])
62
-
63
- # ===================================
64
- # TAB 1: SINGLE TEXT TESTING
65
- # ===================================
66
- with tab1:
67
- st.header("Test Individual Text")
68
- st.markdown("Enter text below and select a language to check for gender bias.")
69
-
70
- # Language selector
71
- col1, col2 = st.columns([1, 3])
72
- with col1:
73
- selected_lang_name = st.selectbox(
74
- "Select Language",
75
- list(LANGUAGE_MAP.keys()),
76
- index=0,
77
- help="Choose the language of your text"
78
- )
79
-
80
- language = LANGUAGE_MAP[selected_lang_name]
81
-
82
- # Text input
83
- text_input = st.text_area(
84
- "Enter text to analyze:",
85
- height=150,
86
- placeholder="e.g., The chairman will lead the meeting today.",
87
- help="Paste or type the text you want to check for gender bias"
88
- )
89
-
90
- # Detect button
91
- col1, col2, col3 = st.columns([1, 2, 1])
92
- with col1:
93
- detect_button = st.button("Detect Bias", type="primary", use_container_width=True)
94
-
95
- # Process detection
96
- if detect_button:
97
- if not text_input.strip():
98
- st.warning("Please enter some text to analyze.")
99
- else:
100
- with st.spinner("Analyzing text..."):
101
- try:
102
- result = detector.detect_bias(text_input, language)
103
-
104
- # Display results
105
- st.markdown("---")
106
- st.subheader("Detection Results")
107
-
108
- # Status indicator
109
- if result.has_bias_detected:
110
- st.error("**Bias Detected**")
111
- else:
112
- st.success("**No Bias Detected** - Text appears bias-free")
113
-
114
- # Create two columns for original vs corrected
115
- if result.has_bias_detected and result.detected_edits:
116
- col1, col2 = st.columns(2)
117
-
118
- with col1:
119
- st.markdown("**Original Text:**")
120
- st.info(text_input)
121
-
122
- with col2:
123
- st.markdown("**Corrected Text:**")
124
- corrected_text = text_input
125
- for edit in result.detected_edits:
126
- corrected_text = corrected_text.replace(edit["from"], edit["to"])
127
- st.success(corrected_text)
128
-
129
- # Show detected edits
130
- st.markdown("**Detected Edits:**")
131
- edits_data = []
132
- for i, edit in enumerate(result.detected_edits, 1):
133
- edits_data.append({
134
- "#": i,
135
- "Original": edit["from"],
136
- "Replacement": edit["to"],
137
- "Severity": edit.get("severity", "replace"),
138
- "Tags": edit.get("tags", "")
139
- })
140
-
141
- st.dataframe(pd.DataFrame(edits_data), use_container_width=True)
142
-
143
- # Additional metadata
144
- st.markdown("**Detection Metadata:**")
145
- meta_col1, meta_col2, meta_col3 = st.columns(3)
146
- with meta_col1:
147
- st.metric("Source", "Rules-based")
148
- with meta_col2:
149
- st.metric("Edits Found", len(result.detected_edits))
150
- with meta_col3:
151
- st.metric("Language", selected_lang_name)
152
-
153
- except Exception as e:
154
- st.error(f"Error during detection: {e}")
155
- st.exception(e)
156
-
157
- # ===================================
158
- # TAB 2: BATCH TESTING
159
- # ===================================
160
- with tab2:
161
- st.header("Batch Testing from CSV")
162
- st.markdown("Upload a CSV file with columns: `id`, `language`, `text`")
163
-
164
- # Show example format
165
- with st.expander("CSV Format Example"):
166
- example_df = pd.DataFrame({
167
- "id": ["1", "2", "3"],
168
- "language": ["en", "sw", "fr"],
169
- "text": [
170
- "The chairman will lead the meeting",
171
- "Daktari anaangalia wagonjwa",
172
- "Le président dirigera la réunion"
173
- ]
174
- })
175
- st.dataframe(example_df, use_container_width=True)
176
- st.markdown("**Language codes:** `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)")
177
-
178
- # Download template
179
- csv_template = example_df.to_csv(index=False)
180
- st.download_button(
181
- "Download Template CSV",
182
- csv_template,
183
- "batch_template.csv",
184
- "text/csv",
185
- help="Download this template and fill it with your data"
186
- )
187
-
188
- # File uploader
189
- uploaded_file = st.file_uploader(
190
- "Upload CSV File",
191
- type=['csv'],
192
- help="Max 1000 rows, 10MB file size limit"
193
- )
194
-
195
- if uploaded_file is not None:
196
- try:
197
- # Read CSV
198
- df = pd.read_csv(uploaded_file)
199
-
200
- # Validate columns
201
- required_cols = ['id', 'language', 'text']
202
- missing_cols = [col for col in required_cols if col not in df.columns]
203
-
204
- if missing_cols:
205
- st.error(f"Missing required columns: {', '.join(missing_cols)}")
206
- else:
207
- st.success(f"Loaded {len(df)} rows from CSV")
208
-
209
- # Show preview
210
- with st.expander("Preview Data (first 5 rows)"):
211
- st.dataframe(df.head(), use_container_width=True)
212
-
213
- # Row limit check
214
- if len(df) > 1000:
215
- st.warning("File has more than 1000 rows. Only first 1000 will be processed.")
216
- df = df.head(1000)
217
-
218
- # Process button
219
- col1, col2, col3 = st.columns([1, 2, 1])
220
- with col1:
221
- process_button = st.button("Process All", type="primary", use_container_width=True)
222
-
223
- if process_button:
224
- results = []
225
- progress_bar = st.progress(0)
226
- status_text = st.empty()
227
-
228
- # Language code mapping
229
- lang_code_map = {
230
- 'en': Language.ENGLISH,
231
- 'sw': Language.SWAHILI,
232
- 'fr': Language.FRENCH,
233
- 'ki': Language.GIKUYU
234
- }
235
-
236
- for idx, row in df.iterrows():
237
- status_text.text(f"Processing {idx + 1}/{len(df)}...")
238
-
239
- try:
240
- lang_code = row['language'].lower()
241
- if lang_code not in lang_code_map:
242
- results.append({
243
- 'id': row['id'],
244
- 'original_text': row['text'],
245
- 'corrected_text': row['text'],
246
- 'bias_detected': False,
247
- 'edits_count': 0,
248
- 'status': f'Invalid language code: {lang_code}'
249
- })
250
- continue
251
-
252
- language = lang_code_map[lang_code]
253
- result = detector.detect_bias(row['text'], language)
254
-
255
- corrected_text = row['text']
256
- if result.detected_edits:
257
- for edit in result.detected_edits:
258
- corrected_text = corrected_text.replace(edit["from"], edit["to"])
259
-
260
- results.append({
261
- 'id': row['id'],
262
- 'language': row['language'],
263
- 'original_text': row['text'],
264
- 'corrected_text': corrected_text,
265
- 'bias_detected': result.has_bias_detected,
266
- 'edits_count': len(result.detected_edits),
267
- 'edits': "; ".join([f"{e['from']}→{e['to']}" for e in result.detected_edits]),
268
- 'status': 'Success'
269
- })
270
-
271
- except Exception as e:
272
- results.append({
273
- 'id': row['id'],
274
- 'original_text': row['text'],
275
- 'corrected_text': row['text'],
276
- 'bias_detected': False,
277
- 'edits_count': 0,
278
- 'status': f'Error: {str(e)}'
279
- })
280
-
281
- progress_bar.progress((idx + 1) / len(df))
282
-
283
- status_text.text("Processing complete!")
284
-
285
- # Display results
286
- results_df = pd.DataFrame(results)
287
- st.subheader("Batch Processing Results")
288
-
289
- # Summary metrics
290
- col1, col2, col3, col4 = st.columns(4)
291
- with col1:
292
- st.metric("Total Processed", len(results_df))
293
- with col2:
294
- bias_count = results_df['bias_detected'].sum()
295
- st.metric("Bias Detected", bias_count)
296
- with col3:
297
- success_count = (results_df['status'] == 'Success').sum()
298
- st.metric("Successful", success_count)
299
- with col4:
300
- total_edits = results_df['edits_count'].sum()
301
- st.metric("Total Edits", total_edits)
302
-
303
- # Results table
304
- st.dataframe(results_df, use_container_width=True)
305
-
306
- # Download results
307
- csv_output = results_df.to_csv(index=False)
308
- st.download_button(
309
- "Download Results as CSV",
310
- csv_output,
311
- "bias_detection_results.csv",
312
- "text/csv",
313
- help="Download the complete results with all columns"
314
- )
315
-
316
- except Exception as e:
317
- st.error(f"Error reading CSV file: {e}")
318
- st.exception(e)
319
-
320
- # ===================================
321
- # TAB 3: STATISTICS
322
- # ===================================
323
- with tab3:
324
- st.header("Language Statistics & System Information")
325
-
326
- # System info
327
- st.subheader("Detection System")
328
- st.markdown("""
329
- - **Engine:** Rules-based bias detection with lexicon matching
330
- - **Approach:** Regular expression pattern matching with word boundaries
331
- - **Case Handling:** Case-preserving replacement
332
- - **Precision:** 1.000 (zero false positives) across all languages
333
- """)
334
-
335
- st.markdown("---")
336
-
337
- # Language statistics
338
- st.subheader("Supported Languages")
339
-
340
- lang_stats = {
341
- "Language": ["English", "Swahili", "French", "Gikuyu"],
342
- "F1 Score": [0.786, 0.708, 0.571, 0.260],
343
- "Precision": [1.000, 1.000, 1.000, 0.814],
344
- "Recall": [0.647, 0.548, 0.400, 0.155],
345
- "Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"],
346
- "Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"],
347
- "Status": ["Production", "Foundation", "Beta", "Beta"]
348
- }
349
-
350
- stats_df = pd.DataFrame(lang_stats)
351
- st.dataframe(stats_df, use_container_width=True, hide_index=True)
352
-
353
- st.markdown("---")
354
-
355
- # Bias categories
356
- st.subheader("Detected Bias Categories")
357
-
358
- categories = {
359
- "Category": [
360
- "Occupation",
361
- "Pronoun Assumption",
362
- "Generic Pronoun",
363
- "Honorific",
364
- "Morphology"
365
- ],
366
- "Description": [
367
- "Gendered job titles (chairman, policeman)",
368
- "Assumed pronouns (he/she when gender unknown)",
369
- "Generic male pronouns (he as universal)",
370
- "Gendered titles (Mr./Mrs., Mzee/Bi)",
371
- "Gender markers in word structure (wa kike/wa kiume)"
372
- ],
373
- "Example": [
374
- "chairman → chair",
375
- "yeye ni → ni",
376
- "his → their",
377
- "Mzee → Mheshimiwa",
378
- "wa kike → [removed]"
379
- ]
380
- }
381
-
382
- categories_df = pd.DataFrame(categories)
383
- st.dataframe(categories_df, use_container_width=True, hide_index=True)
384
-
385
- st.markdown("---")
386
-
387
- # Usage tips
388
- st.subheader("Usage Tips")
389
- st.markdown("""
390
- **Best Practices:**
391
- - Always review suggested corrections before accepting them
392
- - Consider cultural and contextual appropriateness
393
- - Test with various sentence structures
394
- - Use batch processing for large datasets
395
- - Export results for further analysis
396
-
397
- **Limitations:**
398
- - Detection is lexicon-based (limited to known patterns)
399
- - Context-dependent bias may be missed
400
- - Some languages have smaller lexicons (ongoing expansion)
401
- - Review all ML-flagged items carefully
402
- """)
403
-
404
- st.markdown("---")
405
-
406
- # Footer
407
- st.markdown("""
408
- <div style='text-align: center; color: gray; padding: 20px;'>
409
- JuaKazi Gender Sensitization Engine | Version 0.3<br>
410
- Perfect Precision: 1.000 (Zero False Positives)<br>
411
- Culturally Adapted for African Languages
412
- </div>
413
- """, unsafe_allow_html=True)
414
-
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ JuaKazi Gender Bias Detection and Correction - Testing Interface
4
+ User-friendly web UI for non-technical experts to test the bias detection and correction model
5
+ """
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ import sys
10
+ from pathlib import Path
11
+ from io import StringIO
12
+
13
+ # Add parent directory to path for imports
14
+ BASE_DIR = Path(__file__).resolve().parent.parent
15
+ sys.path.insert(0, str(BASE_DIR))
16
+
17
+ from eval.bias_detector import BiasDetector
18
+ from eval.models import Language
19
+
20
+ # Page configuration
21
+ st.set_page_config(
22
+ page_title="JuaKazi Bias Detection and Correction Testing",
23
+ layout="wide",
24
+ initial_sidebar_state="collapsed"
25
+ )
26
+
27
+ # Language mapping for dropdown
28
+ LANGUAGE_MAP = {
29
+ "English": Language.ENGLISH,
30
+ "Swahili": Language.SWAHILI,
31
+ "French": Language.FRENCH,
32
+ "Gikuyu (Kikuyu)": Language.GIKUYU
33
+ }
34
+
35
+ LANGUAGE_CODES = {
36
+ "English": "en",
37
+ "Swahili": "sw",
38
+ "French": "fr",
39
+ "Gikuyu (Kikuyu)": "ki"
40
+ }
41
+
42
+ # Initialize detector with caching
43
+ @st.cache_resource
44
+ def get_detector():
45
+ """Initialize BiasDetector once and cache it"""
46
+ return BiasDetector()
47
+
48
+ # Main title
49
+ st.title("JuaKazi Detection and Correction - Testing Interface")
50
+ st.markdown("Test individual texts or batch process files to detect and correct gender bias")
51
+ st.markdown("---")
52
+
53
+ # Initialize detector
54
+ try:
55
+ detector = get_detector()
56
+ except Exception as e:
57
+ st.error(f"Failed to initialize bias detector: {e}")
58
+ st.stop()
59
+
60
+ # Create tabs
61
+ tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"])
62
+
63
+ # ===================================
64
+ # TAB 1: SINGLE TEXT TESTING
65
+ # ===================================
66
+ with tab1:
67
+ st.header("Test Individual Text")
68
+ st.markdown("Enter text below and select a language to check for gender bias.")
69
+
70
+ # Language selector
71
+ col1, col2 = st.columns([1, 3])
72
+ with col1:
73
+ selected_lang_name = st.selectbox(
74
+ "Select Language",
75
+ list(LANGUAGE_MAP.keys()),
76
+ index=0,
77
+ help="Choose the language of your text"
78
+ )
79
+
80
+ language = LANGUAGE_MAP[selected_lang_name]
81
+
82
+ # Text input
83
+ text_input = st.text_area(
84
+ "Enter text to analyze:",
85
+ height=150,
86
+ placeholder="e.g., The chairman will lead the meeting today.",
87
+ help="Paste or type the text you want to check for gender bias"
88
+ )
89
+
90
+ # Detect button
91
+ col1, col2, col3 = st.columns([1, 2, 1])
92
+ with col1:
93
+ detect_button = st.button("Detect Bias", type="primary", use_container_width=True)
94
+
95
+ # Process detection
96
+ if detect_button:
97
+ if not text_input.strip():
98
+ st.warning("Please enter some text to analyze.")
99
+ else:
100
+ with st.spinner("Analyzing text..."):
101
+ try:
102
+ result = detector.detect_bias(text_input, language)
103
+
104
+ # Display results
105
+ st.markdown("---")
106
+ st.subheader("Detection Results")
107
+
108
+ # Status indicator
109
+ if result.has_bias_detected:
110
+ st.error("**Bias Detected**")
111
+ else:
112
+ st.success("**No Bias Detected** - Text appears bias-free")
113
+
114
+ # Create two columns for original vs corrected
115
+ if result.has_bias_detected and result.detected_edits:
116
+ col1, col2 = st.columns(2)
117
+
118
+ with col1:
119
+ st.markdown("**Original Text:**")
120
+ st.info(text_input)
121
+
122
+ with col2:
123
+ st.markdown("**Corrected Text:**")
124
+ corrected_text = text_input
125
+ for edit in result.detected_edits:
126
+ corrected_text = corrected_text.replace(edit["from"], edit["to"])
127
+ st.success(corrected_text)
128
+
129
+ # Show detected edits
130
+ st.markdown("**Detected Edits:**")
131
+ edits_data = []
132
+ for i, edit in enumerate(result.detected_edits, 1):
133
+ edits_data.append({
134
+ "#": i,
135
+ "Original": edit["from"],
136
+ "Replacement": edit["to"],
137
+ "Severity": edit.get("severity", "replace"),
138
+ "Tags": edit.get("tags", "")
139
+ })
140
+
141
+ st.dataframe(pd.DataFrame(edits_data), use_container_width=True)
142
+
143
+ # Additional metadata
144
+ st.markdown("**Detection Metadata:**")
145
+ meta_col1, meta_col2, meta_col3 = st.columns(3)
146
+ with meta_col1:
147
+ st.metric("Source", "Rules-based")
148
+ with meta_col2:
149
+ st.metric("Edits Found", len(result.detected_edits))
150
+ with meta_col3:
151
+ st.metric("Language", selected_lang_name)
152
+
153
+ except Exception as e:
154
+ st.error(f"Error during detection: {e}")
155
+ st.exception(e)
156
+
157
+ # ===================================
158
+ # TAB 2: BATCH TESTING
159
+ # ===================================
160
+ with tab2:
161
+ st.header("Batch Testing from CSV")
162
+ st.markdown("Upload a CSV file with columns: `id`, `language`, `text`")
163
+
164
+ # Show example format
165
+ with st.expander("CSV Format Example"):
166
+ example_df = pd.DataFrame({
167
+ "id": ["1", "2", "3"],
168
+ "language": ["en", "sw", "fr"],
169
+ "text": [
170
+ "The chairman will lead the meeting",
171
+ "Daktari anaangalia wagonjwa",
172
+ "Le président dirigera la réunion"
173
+ ]
174
+ })
175
+ st.dataframe(example_df, use_container_width=True)
176
+ st.markdown("**Language codes:** `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)")
177
+
178
+ # Download template
179
+ csv_template = example_df.to_csv(index=False)
180
+ st.download_button(
181
+ "Download Template CSV",
182
+ csv_template,
183
+ "batch_template.csv",
184
+ "text/csv",
185
+ help="Download this template and fill it with your data"
186
+ )
187
+
188
+ # File uploader
189
+ uploaded_file = st.file_uploader(
190
+ "Upload CSV File",
191
+ type=['csv'],
192
+ help="Max 1000 rows, 10MB file size limit"
193
+ )
194
+
195
+ if uploaded_file is not None:
196
+ try:
197
+ # Read CSV
198
+ df = pd.read_csv(uploaded_file)
199
+
200
+ # Validate columns
201
+ required_cols = ['id', 'language', 'text']
202
+ missing_cols = [col for col in required_cols if col not in df.columns]
203
+
204
+ if missing_cols:
205
+ st.error(f"Missing required columns: {', '.join(missing_cols)}")
206
+ else:
207
+ st.success(f"Loaded {len(df)} rows from CSV")
208
+
209
+ # Show preview
210
+ with st.expander("Preview Data (first 5 rows)"):
211
+ st.dataframe(df.head(), use_container_width=True)
212
+
213
+ # Row limit check
214
+ if len(df) > 1000:
215
+ st.warning("File has more than 1000 rows. Only first 1000 will be processed.")
216
+ df = df.head(1000)
217
+
218
+ # Process button
219
+ col1, col2, col3 = st.columns([1, 2, 1])
220
+ with col1:
221
+ process_button = st.button("Process All", type="primary", use_container_width=True)
222
+
223
+ if process_button:
224
+ results = []
225
+ progress_bar = st.progress(0)
226
+ status_text = st.empty()
227
+
228
+ # Language code mapping
229
+ lang_code_map = {
230
+ 'en': Language.ENGLISH,
231
+ 'sw': Language.SWAHILI,
232
+ 'fr': Language.FRENCH,
233
+ 'ki': Language.GIKUYU
234
+ }
235
+
236
+ for idx, row in df.iterrows():
237
+ status_text.text(f"Processing {idx + 1}/{len(df)}...")
238
+
239
+ try:
240
+ lang_code = row['language'].lower()
241
+ if lang_code not in lang_code_map:
242
+ results.append({
243
+ 'id': row['id'],
244
+ 'original_text': row['text'],
245
+ 'corrected_text': row['text'],
246
+ 'bias_detected': False,
247
+ 'edits_count': 0,
248
+ 'status': f'Invalid language code: {lang_code}'
249
+ })
250
+ continue
251
+
252
+ language = lang_code_map[lang_code]
253
+ result = detector.detect_bias(row['text'], language)
254
+
255
+ corrected_text = row['text']
256
+ if result.detected_edits:
257
+ for edit in result.detected_edits:
258
+ corrected_text = corrected_text.replace(edit["from"], edit["to"])
259
+
260
+ results.append({
261
+ 'id': row['id'],
262
+ 'language': row['language'],
263
+ 'original_text': row['text'],
264
+ 'corrected_text': corrected_text,
265
+ 'bias_detected': result.has_bias_detected,
266
+ 'edits_count': len(result.detected_edits),
267
+ 'edits': "; ".join([f"{e['from']}→{e['to']}" for e in result.detected_edits]),
268
+ 'status': 'Success'
269
+ })
270
+
271
+ except Exception as e:
272
+ results.append({
273
+ 'id': row['id'],
274
+ 'original_text': row['text'],
275
+ 'corrected_text': row['text'],
276
+ 'bias_detected': False,
277
+ 'edits_count': 0,
278
+ 'status': f'Error: {str(e)}'
279
+ })
280
+
281
+ progress_bar.progress((idx + 1) / len(df))
282
+
283
+ status_text.text("Processing complete!")
284
+
285
+ # Display results
286
+ results_df = pd.DataFrame(results)
287
+ st.subheader("Batch Processing Results")
288
+
289
+ # Summary metrics
290
+ col1, col2, col3, col4 = st.columns(4)
291
+ with col1:
292
+ st.metric("Total Processed", len(results_df))
293
+ with col2:
294
+ bias_count = results_df['bias_detected'].sum()
295
+ st.metric("Bias Detected", bias_count)
296
+ with col3:
297
+ success_count = (results_df['status'] == 'Success').sum()
298
+ st.metric("Successful", success_count)
299
+ with col4:
300
+ total_edits = results_df['edits_count'].sum()
301
+ st.metric("Total Edits", total_edits)
302
+
303
+ # Results table
304
+ st.dataframe(results_df, use_container_width=True)
305
+
306
+ # Download results
307
+ csv_output = results_df.to_csv(index=False)
308
+ st.download_button(
309
+ "Download Results as CSV",
310
+ csv_output,
311
+ "bias_detection_results.csv",
312
+ "text/csv",
313
+ help="Download the complete results with all columns"
314
+ )
315
+
316
+ except Exception as e:
317
+ st.error(f"Error reading CSV file: {e}")
318
+ st.exception(e)
319
+
320
+ # ===================================
321
+ # TAB 3: STATISTICS
322
+ # ===================================
323
+ with tab3:
324
+ st.header("Language Statistics & System Information")
325
+
326
+ # System info
327
+ st.subheader("Detection System")
328
+ st.markdown("""
329
+ - **Engine:** Rules-based bias detection with lexicon matching
330
+ - **Approach:** Regular expression pattern matching with word boundaries
331
+ - **Case Handling:** Case-preserving replacement
332
+ - **Precision:** 1.000 (zero false positives) across all languages
333
+ """)
334
+
335
+ st.markdown("---")
336
+
337
+ # Language statistics
338
+ st.subheader("Supported Languages")
339
+
340
+ lang_stats = {
341
+ "Language": ["English", "Swahili", "French", "Gikuyu"],
342
+ "F1 Score": [0.786, 0.708, 0.571, 0.260],
343
+ "Precision": [1.000, 1.000, 1.000, 0.814],
344
+ "Recall": [0.647, 0.548, 0.400, 0.155],
345
+ "Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"],
346
+ "Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"],
347
+ "Status": ["Production", "Foundation", "Beta", "Beta"]
348
+ }
349
+
350
+ stats_df = pd.DataFrame(lang_stats)
351
+ st.dataframe(stats_df, use_container_width=True, hide_index=True)
352
+
353
+ st.markdown("---")
354
+
355
+ # Bias categories
356
+ st.subheader("Detected Bias Categories")
357
+
358
+ categories = {
359
+ "Category": [
360
+ "Occupation",
361
+ "Pronoun Assumption",
362
+ "Generic Pronoun",
363
+ "Honorific",
364
+ "Morphology"
365
+ ],
366
+ "Description": [
367
+ "Gendered job titles (chairman, policeman)",
368
+ "Assumed pronouns (he/she when gender unknown)",
369
+ "Generic male pronouns (he as universal)",
370
+ "Gendered titles (Mr./Mrs., Mzee/Bi)",
371
+ "Gender markers in word structure (wa kike/wa kiume)"
372
+ ],
373
+ "Example": [
374
+ "chairman → chair",
375
+ "yeye ni → ni",
376
+ "his → their",
377
+ "Mzee → Mheshimiwa",
378
+ "wa kike → [removed]"
379
+ ]
380
+ }
381
+
382
+ categories_df = pd.DataFrame(categories)
383
+ st.dataframe(categories_df, use_container_width=True, hide_index=True)
384
+
385
+ st.markdown("---")
386
+
387
+ # Usage tips
388
+ st.subheader("Usage Tips")
389
+ st.markdown("""
390
+ **Best Practices:**
391
+ - Always review suggested corrections before accepting them
392
+ - Consider cultural and contextual appropriateness
393
+ - Test with various sentence structures
394
+ - Use batch processing for large datasets
395
+ - Export results for further analysis
396
+
397
+ **Limitations:**
398
+ - Detection is lexicon-based (limited to known patterns)
399
+ - Context-dependent bias may be missed
400
+ - Some languages have smaller lexicons (ongoing expansion)
401
+ - Review all ML-flagged items carefully
402
+ """)
403
+
404
+ st.markdown("---")
405
+
406
+ # Footer
407
+ st.markdown("""
408
+ <div style='text-align: center; color: gray; padding: 20px;'>
409
+ JuaKazi Gender Sensitization Engine | Version 0.3<br>
410
+ Perfect Precision: 1.000 (Zero False Positives)<br>
411
+ Culturally Adapted for African Languages
412
+ </div>
413
+ """, unsafe_allow_html=True)
414
+