Spaces:

juakazike
/

test-ui

Sleeping

File size: 15,069 Bytes

14f35c3

#!/usr/bin/env python3
"""
JuaKazi Gender Bias Detection and Correction - Testing Interface
User-friendly web UI for non-technical experts to test the bias detection and correction model
"""

import streamlit as st
import pandas as pd
import sys
from pathlib import Path
from io import StringIO

# Add parent directory to path for imports
BASE_DIR = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(BASE_DIR))

from eval.bias_detector import BiasDetector
from eval.models import Language

# Page configuration
st.set_page_config(
    page_title="JuaKazi Bias Detection and Correction Testing",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Language mapping for dropdown
LANGUAGE_MAP = {
    "English": Language.ENGLISH,
    "Swahili": Language.SWAHILI,
    "French": Language.FRENCH,
    "Gikuyu (Kikuyu)": Language.GIKUYU
}

LANGUAGE_CODES = {
    "English": "en",
    "Swahili": "sw",
    "French": "fr",
    "Gikuyu (Kikuyu)": "ki"
}

# Initialize detector with caching
@st.cache_resource
def get_detector():
    """Initialize BiasDetector once and cache it"""
    return BiasDetector()

# Main title
st.title("JuaKazi Detection and Correction - Testing Interface")
st.markdown("Test individual texts or batch process files to detect and correct gender bias")
st.markdown("---")

# Initialize detector
try:
    detector = get_detector()
except Exception as e:
    st.error(f"Failed to initialize bias detector: {e}")
    st.stop()

# Create tabs
tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"])

# ===================================
# TAB 1: SINGLE TEXT TESTING
# ===================================
with tab1:
    st.header("Test Individual Text")
    st.markdown("Enter text below and select a language to check for gender bias.")

    # Language selector
    col1, col2 = st.columns([1, 3])
    with col1:
        selected_lang_name = st.selectbox(
            "Select Language",
            list(LANGUAGE_MAP.keys()),
            index=0,
            help="Choose the language of your text"
        )

    language = LANGUAGE_MAP[selected_lang_name]

    # Text input
    text_input = st.text_area(
        "Enter text to analyze:",
        height=150,
        placeholder="e.g., The chairman will lead the meeting today.",
        help="Paste or type the text you want to check for gender bias"
    )

    # Detect button
    col1, col2, col3 = st.columns([1, 2, 1])
    with col1:
        detect_button = st.button("Detect Bias", type="primary", use_container_width=True)

    # Process detection
    if detect_button:
        if not text_input.strip():
            st.warning("Please enter some text to analyze.")
        else:
            with st.spinner("Analyzing text..."):
                try:
                    result = detector.detect_bias(text_input, language)

                    # Display results
                    st.markdown("---")
                    st.subheader("Detection Results")

                    # Status indicator
                    if result.has_bias_detected:
                        st.error("**Bias Detected**")
                    else:
                        st.success("**No Bias Detected** - Text appears bias-free")

                    # Create two columns for original vs corrected
                    if result.has_bias_detected and result.detected_edits:
                        col1, col2 = st.columns(2)

                        with col1:
                            st.markdown("**Original Text:**")
                            st.info(text_input)

                        with col2:
                            st.markdown("**Corrected Text:**")
                            corrected_text = text_input
                            for edit in result.detected_edits:
                                corrected_text = corrected_text.replace(edit["from"], edit["to"])
                            st.success(corrected_text)

                        # Show detected edits
                        st.markdown("**Detected Edits:**")
                        edits_data = []
                        for i, edit in enumerate(result.detected_edits, 1):
                            edits_data.append({
                                "#": i,
                                "Original": edit["from"],
                                "Replacement": edit["to"],
                                "Severity": edit.get("severity", "replace"),
                                "Tags": edit.get("tags", "")
                            })

                        st.dataframe(pd.DataFrame(edits_data), use_container_width=True)

                        # Additional metadata
                        st.markdown("**Detection Metadata:**")
                        meta_col1, meta_col2, meta_col3 = st.columns(3)
                        with meta_col1:
                            st.metric("Source", "Rules-based")
                        with meta_col2:
                            st.metric("Edits Found", len(result.detected_edits))
                        with meta_col3:
                            st.metric("Language", selected_lang_name)

                except Exception as e:
                    st.error(f"Error during detection: {e}")
                    st.exception(e)

# ===================================
# TAB 2: BATCH TESTING
# ===================================
with tab2:
    st.header("Batch Testing from CSV")
    st.markdown("Upload a CSV file with columns: `id`, `language`, `text`")

    # Show example format
    with st.expander("CSV Format Example"):
        example_df = pd.DataFrame({
            "id": ["1", "2", "3"],
            "language": ["en", "sw", "fr"],
            "text": [
                "The chairman will lead the meeting",
                "Daktari anaangalia wagonjwa",
                "Le président dirigera la réunion"
            ]
        })
        st.dataframe(example_df, use_container_width=True)
        st.markdown("**Language codes:** `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)")

        # Download template
        csv_template = example_df.to_csv(index=False)
        st.download_button(
            "Download Template CSV",
            csv_template,
            "batch_template.csv",
            "text/csv",
            help="Download this template and fill it with your data"
        )

    # File uploader
    uploaded_file = st.file_uploader(
        "Upload CSV File",
        type=['csv'],
        help="Max 1000 rows, 10MB file size limit"
    )

    if uploaded_file is not None:
        try:
            # Read CSV
            df = pd.read_csv(uploaded_file)

            # Validate columns
            required_cols = ['id', 'language', 'text']
            missing_cols = [col for col in required_cols if col not in df.columns]

            if missing_cols:
                st.error(f"Missing required columns: {', '.join(missing_cols)}")
            else:
                st.success(f"Loaded {len(df)} rows from CSV")

                # Show preview
                with st.expander("Preview Data (first 5 rows)"):
                    st.dataframe(df.head(), use_container_width=True)

                # Row limit check
                if len(df) > 1000:
                    st.warning("File has more than 1000 rows. Only first 1000 will be processed.")
                    df = df.head(1000)

                # Process button
                col1, col2, col3 = st.columns([1, 2, 1])
                with col1:
                    process_button = st.button("Process All", type="primary", use_container_width=True)

                if process_button:
                    results = []
                    progress_bar = st.progress(0)
                    status_text = st.empty()

                    # Language code mapping
                    lang_code_map = {
                        'en': Language.ENGLISH,
                        'sw': Language.SWAHILI,
                        'fr': Language.FRENCH,
                        'ki': Language.GIKUYU
                    }

                    for idx, row in df.iterrows():
                        status_text.text(f"Processing {idx + 1}/{len(df)}...")

                        try:
                            lang_code = row['language'].lower()
                            if lang_code not in lang_code_map:
                                results.append({
                                    'id': row['id'],
                                    'original_text': row['text'],
                                    'corrected_text': row['text'],
                                    'bias_detected': False,
                                    'edits_count': 0,
                                    'status': f'Invalid language code: {lang_code}'
                                })
                                continue

                            language = lang_code_map[lang_code]
                            result = detector.detect_bias(row['text'], language)

                            corrected_text = row['text']
                            if result.detected_edits:
                                for edit in result.detected_edits:
                                    corrected_text = corrected_text.replace(edit["from"], edit["to"])

                            results.append({
                                'id': row['id'],
                                'language': row['language'],
                                'original_text': row['text'],
                                'corrected_text': corrected_text,
                                'bias_detected': result.has_bias_detected,
                                'edits_count': len(result.detected_edits),
                                'edits': "; ".join([f"{e['from']}→{e['to']}" for e in result.detected_edits]),
                                'status': 'Success'
                            })

                        except Exception as e:
                            results.append({
                                'id': row['id'],
                                'original_text': row['text'],
                                'corrected_text': row['text'],
                                'bias_detected': False,
                                'edits_count': 0,
                                'status': f'Error: {str(e)}'
                            })

                        progress_bar.progress((idx + 1) / len(df))

                    status_text.text("Processing complete!")

                    # Display results
                    results_df = pd.DataFrame(results)
                    st.subheader("Batch Processing Results")

                    # Summary metrics
                    col1, col2, col3, col4 = st.columns(4)
                    with col1:
                        st.metric("Total Processed", len(results_df))
                    with col2:
                        bias_count = results_df['bias_detected'].sum()
                        st.metric("Bias Detected", bias_count)
                    with col3:
                        success_count = (results_df['status'] == 'Success').sum()
                        st.metric("Successful", success_count)
                    with col4:
                        total_edits = results_df['edits_count'].sum()
                        st.metric("Total Edits", total_edits)

                    # Results table
                    st.dataframe(results_df, use_container_width=True)

                    # Download results
                    csv_output = results_df.to_csv(index=False)
                    st.download_button(
                        "Download Results as CSV",
                        csv_output,
                        "bias_detection_results.csv",
                        "text/csv",
                        help="Download the complete results with all columns"
                    )

        except Exception as e:
            st.error(f"Error reading CSV file: {e}")
            st.exception(e)

# ===================================
# TAB 3: STATISTICS
# ===================================
with tab3:
    st.header("Language Statistics & System Information")

    # System info
    st.subheader("Detection System")
    st.markdown("""
    - **Engine:** Rules-based bias detection with lexicon matching
    - **Approach:** Regular expression pattern matching with word boundaries
    - **Case Handling:** Case-preserving replacement
    - **Precision:** 1.000 (zero false positives) across all languages
    """)

    st.markdown("---")

    # Language statistics
    st.subheader("Supported Languages")

    lang_stats = {
        "Language": ["English", "Swahili", "French", "Gikuyu"],
        "F1 Score": [0.786, 0.708, 0.571, 0.260],
        "Precision": [1.000, 1.000, 1.000, 0.814],
        "Recall": [0.647, 0.548, 0.400, 0.155],
        "Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"],
        "Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"],
        "Status": ["Production", "Foundation", "Beta", "Beta"]
    }

    stats_df = pd.DataFrame(lang_stats)
    st.dataframe(stats_df, use_container_width=True, hide_index=True)

    st.markdown("---")

    # Bias categories
    st.subheader("Detected Bias Categories")

    categories = {
        "Category": [
            "Occupation",
            "Pronoun Assumption",
            "Generic Pronoun",
            "Honorific",
            "Morphology"
        ],
        "Description": [
            "Gendered job titles (chairman, policeman)",
            "Assumed pronouns (he/she when gender unknown)",
            "Generic male pronouns (he as universal)",
            "Gendered titles (Mr./Mrs., Mzee/Bi)",
            "Gender markers in word structure (wa kike/wa kiume)"
        ],
        "Example": [
            "chairman → chair",
            "yeye ni → ni",
            "his → their",
            "Mzee → Mheshimiwa",
            "wa kike → [removed]"
        ]
    }

    categories_df = pd.DataFrame(categories)
    st.dataframe(categories_df, use_container_width=True, hide_index=True)

    st.markdown("---")

    # Usage tips
    st.subheader("Usage Tips")
    st.markdown("""
    **Best Practices:**
    - Always review suggested corrections before accepting them
    - Consider cultural and contextual appropriateness
    - Test with various sentence structures
    - Use batch processing for large datasets
    - Export results for further analysis

    **Limitations:**
    - Detection is lexicon-based (limited to known patterns)
    - Context-dependent bias may be missed
    - Some languages have smaller lexicons (ongoing expansion)
    - Review all ML-flagged items carefully
    """)

    st.markdown("---")

    # Footer
    st.markdown("""
    <div style='text-align: center; color: gray; padding: 20px;'>
    JuaKazi Gender Sensitization Engine | Version 0.3<br>
    Perfect Precision: 1.000 (Zero False Positives)<br>
    Culturally Adapted for African Languages
    </div>
    """, unsafe_allow_html=True)