#!/usr/bin/env python3 """ JuaKazi Gender Bias Detection and Correction - Testing Interface User-friendly web UI for non-technical experts to test the bias detection and correction model """ import streamlit as st import pandas as pd import sys from pathlib import Path from io import StringIO # Add parent directory to path for imports BASE_DIR = Path(__file__).resolve().parent.parent sys.path.insert(0, str(BASE_DIR)) from eval.bias_detector import BiasDetector from eval.models import Language # Page configuration st.set_page_config( page_title="JuaKazi Bias Detection and Correction Testing", layout="wide", initial_sidebar_state="collapsed" ) # Language mapping for dropdown LANGUAGE_MAP = { "English": Language.ENGLISH, "Swahili": Language.SWAHILI, "French": Language.FRENCH, "Gikuyu (Kikuyu)": Language.GIKUYU } LANGUAGE_CODES = { "English": "en", "Swahili": "sw", "French": "fr", "Gikuyu (Kikuyu)": "ki" } # Initialize detector with caching @st.cache_resource def get_detector(): """Initialize BiasDetector once and cache it""" return BiasDetector() # Main title st.title("JuaKazi Detection and Correction - Testing Interface") st.markdown("Test individual texts or batch process files to detect and correct gender bias") st.markdown("---") # Initialize detector try: detector = get_detector() except Exception as e: st.error(f"Failed to initialize bias detector: {e}") st.stop() # Create tabs tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"]) # =================================== # TAB 1: SINGLE TEXT TESTING # =================================== with tab1: st.header("Test Individual Text") st.markdown("Enter text below and select a language to check for gender bias.") # Language selector col1, col2 = st.columns([1, 3]) with col1: selected_lang_name = st.selectbox( "Select Language", list(LANGUAGE_MAP.keys()), index=0, help="Choose the language of your text" ) language = LANGUAGE_MAP[selected_lang_name] # Text input text_input = st.text_area( "Enter text to analyze:", height=150, placeholder="e.g., The chairman will lead the meeting today.", help="Paste or type the text you want to check for gender bias" ) # Detect button col1, col2, col3 = st.columns([1, 2, 1]) with col1: detect_button = st.button("Detect Bias", type="primary", use_container_width=True) # Process detection if detect_button: if not text_input.strip(): st.warning("Please enter some text to analyze.") else: with st.spinner("Analyzing text..."): try: result = detector.detect_bias(text_input, language) # Display results st.markdown("---") st.subheader("Detection Results") # Status indicator if result.has_bias_detected: st.error("**Bias Detected**") else: st.success("**No Bias Detected** - Text appears bias-free") # Create two columns for original vs corrected if result.has_bias_detected and result.detected_edits: col1, col2 = st.columns(2) with col1: st.markdown("**Original Text:**") st.info(text_input) with col2: st.markdown("**Corrected Text:**") corrected_text = text_input for edit in result.detected_edits: corrected_text = corrected_text.replace(edit["from"], edit["to"]) st.success(corrected_text) # Show detected edits st.markdown("**Detected Edits:**") edits_data = [] for i, edit in enumerate(result.detected_edits, 1): edits_data.append({ "#": i, "Original": edit["from"], "Replacement": edit["to"], "Severity": edit.get("severity", "replace"), "Tags": edit.get("tags", "") }) st.dataframe(pd.DataFrame(edits_data), use_container_width=True) # Additional metadata st.markdown("**Detection Metadata:**") meta_col1, meta_col2, meta_col3 = st.columns(3) with meta_col1: st.metric("Source", "Rules-based") with meta_col2: st.metric("Edits Found", len(result.detected_edits)) with meta_col3: st.metric("Language", selected_lang_name) except Exception as e: st.error(f"Error during detection: {e}") st.exception(e) # =================================== # TAB 2: BATCH TESTING # =================================== with tab2: st.header("Batch Testing from CSV") st.markdown("Upload a CSV file with columns: `id`, `language`, `text`") # Show example format with st.expander("CSV Format Example"): example_df = pd.DataFrame({ "id": ["1", "2", "3"], "language": ["en", "sw", "fr"], "text": [ "The chairman will lead the meeting", "Daktari anaangalia wagonjwa", "Le président dirigera la réunion" ] }) st.dataframe(example_df, use_container_width=True) st.markdown("**Language codes:** `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)") # Download template csv_template = example_df.to_csv(index=False) st.download_button( "Download Template CSV", csv_template, "batch_template.csv", "text/csv", help="Download this template and fill it with your data" ) # File uploader uploaded_file = st.file_uploader( "Upload CSV File", type=['csv'], help="Max 1000 rows, 10MB file size limit" ) if uploaded_file is not None: try: # Read CSV df = pd.read_csv(uploaded_file) # Validate columns required_cols = ['id', 'language', 'text'] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: st.error(f"Missing required columns: {', '.join(missing_cols)}") else: st.success(f"Loaded {len(df)} rows from CSV") # Show preview with st.expander("Preview Data (first 5 rows)"): st.dataframe(df.head(), use_container_width=True) # Row limit check if len(df) > 1000: st.warning("File has more than 1000 rows. Only first 1000 will be processed.") df = df.head(1000) # Process button col1, col2, col3 = st.columns([1, 2, 1]) with col1: process_button = st.button("Process All", type="primary", use_container_width=True) if process_button: results = [] progress_bar = st.progress(0) status_text = st.empty() # Language code mapping lang_code_map = { 'en': Language.ENGLISH, 'sw': Language.SWAHILI, 'fr': Language.FRENCH, 'ki': Language.GIKUYU } for idx, row in df.iterrows(): status_text.text(f"Processing {idx + 1}/{len(df)}...") try: lang_code = row['language'].lower() if lang_code not in lang_code_map: results.append({ 'id': row['id'], 'original_text': row['text'], 'corrected_text': row['text'], 'bias_detected': False, 'edits_count': 0, 'status': f'Invalid language code: {lang_code}' }) continue language = lang_code_map[lang_code] result = detector.detect_bias(row['text'], language) corrected_text = row['text'] if result.detected_edits: for edit in result.detected_edits: corrected_text = corrected_text.replace(edit["from"], edit["to"]) results.append({ 'id': row['id'], 'language': row['language'], 'original_text': row['text'], 'corrected_text': corrected_text, 'bias_detected': result.has_bias_detected, 'edits_count': len(result.detected_edits), 'edits': "; ".join([f"{e['from']}→{e['to']}" for e in result.detected_edits]), 'status': 'Success' }) except Exception as e: results.append({ 'id': row['id'], 'original_text': row['text'], 'corrected_text': row['text'], 'bias_detected': False, 'edits_count': 0, 'status': f'Error: {str(e)}' }) progress_bar.progress((idx + 1) / len(df)) status_text.text("Processing complete!") # Display results results_df = pd.DataFrame(results) st.subheader("Batch Processing Results") # Summary metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Processed", len(results_df)) with col2: bias_count = results_df['bias_detected'].sum() st.metric("Bias Detected", bias_count) with col3: success_count = (results_df['status'] == 'Success').sum() st.metric("Successful", success_count) with col4: total_edits = results_df['edits_count'].sum() st.metric("Total Edits", total_edits) # Results table st.dataframe(results_df, use_container_width=True) # Download results csv_output = results_df.to_csv(index=False) st.download_button( "Download Results as CSV", csv_output, "bias_detection_results.csv", "text/csv", help="Download the complete results with all columns" ) except Exception as e: st.error(f"Error reading CSV file: {e}") st.exception(e) # =================================== # TAB 3: STATISTICS # =================================== with tab3: st.header("Language Statistics & System Information") # System info st.subheader("Detection System") st.markdown(""" - **Engine:** Rules-based bias detection with lexicon matching - **Approach:** Regular expression pattern matching with word boundaries - **Case Handling:** Case-preserving replacement - **Precision:** 1.000 (zero false positives) across all languages """) st.markdown("---") # Language statistics st.subheader("Supported Languages") lang_stats = { "Language": ["English", "Swahili", "French", "Gikuyu"], "F1 Score": [0.786, 0.708, 0.571, 0.260], "Precision": [1.000, 1.000, 1.000, 0.814], "Recall": [0.647, 0.548, 0.400, 0.155], "Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"], "Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"], "Status": ["Production", "Foundation", "Beta", "Beta"] } stats_df = pd.DataFrame(lang_stats) st.dataframe(stats_df, use_container_width=True, hide_index=True) st.markdown("---") # Bias categories st.subheader("Detected Bias Categories") categories = { "Category": [ "Occupation", "Pronoun Assumption", "Generic Pronoun", "Honorific", "Morphology" ], "Description": [ "Gendered job titles (chairman, policeman)", "Assumed pronouns (he/she when gender unknown)", "Generic male pronouns (he as universal)", "Gendered titles (Mr./Mrs., Mzee/Bi)", "Gender markers in word structure (wa kike/wa kiume)" ], "Example": [ "chairman → chair", "yeye ni → ni", "his → their", "Mzee → Mheshimiwa", "wa kike → [removed]" ] } categories_df = pd.DataFrame(categories) st.dataframe(categories_df, use_container_width=True, hide_index=True) st.markdown("---") # Usage tips st.subheader("Usage Tips") st.markdown(""" **Best Practices:** - Always review suggested corrections before accepting them - Consider cultural and contextual appropriateness - Test with various sentence structures - Use batch processing for large datasets - Export results for further analysis **Limitations:** - Detection is lexicon-based (limited to known patterns) - Context-dependent bias may be missed - Some languages have smaller lexicons (ongoing expansion) - Review all ML-flagged items carefully """) st.markdown("---") # Footer st.markdown("""