test-ui / app.py
juakazike's picture
update title
14f35c3 verified
#!/usr/bin/env python3
"""
JuaKazi Gender Bias Detection and Correction - Testing Interface
User-friendly web UI for non-technical experts to test the bias detection and correction model
"""
import streamlit as st
import pandas as pd
import sys
from pathlib import Path
from io import StringIO
# Add parent directory to path for imports
BASE_DIR = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(BASE_DIR))
from eval.bias_detector import BiasDetector
from eval.models import Language
# Page configuration
st.set_page_config(
page_title="JuaKazi Bias Detection and Correction Testing",
layout="wide",
initial_sidebar_state="collapsed"
)
# Language mapping for dropdown
LANGUAGE_MAP = {
"English": Language.ENGLISH,
"Swahili": Language.SWAHILI,
"French": Language.FRENCH,
"Gikuyu (Kikuyu)": Language.GIKUYU
}
LANGUAGE_CODES = {
"English": "en",
"Swahili": "sw",
"French": "fr",
"Gikuyu (Kikuyu)": "ki"
}
# Initialize detector with caching
@st.cache_resource
def get_detector():
"""Initialize BiasDetector once and cache it"""
return BiasDetector()
# Main title
st.title("JuaKazi Detection and Correction - Testing Interface")
st.markdown("Test individual texts or batch process files to detect and correct gender bias")
st.markdown("---")
# Initialize detector
try:
detector = get_detector()
except Exception as e:
st.error(f"Failed to initialize bias detector: {e}")
st.stop()
# Create tabs
tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"])
# ===================================
# TAB 1: SINGLE TEXT TESTING
# ===================================
with tab1:
st.header("Test Individual Text")
st.markdown("Enter text below and select a language to check for gender bias.")
# Language selector
col1, col2 = st.columns([1, 3])
with col1:
selected_lang_name = st.selectbox(
"Select Language",
list(LANGUAGE_MAP.keys()),
index=0,
help="Choose the language of your text"
)
language = LANGUAGE_MAP[selected_lang_name]
# Text input
text_input = st.text_area(
"Enter text to analyze:",
height=150,
placeholder="e.g., The chairman will lead the meeting today.",
help="Paste or type the text you want to check for gender bias"
)
# Detect button
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
detect_button = st.button("Detect Bias", type="primary", use_container_width=True)
# Process detection
if detect_button:
if not text_input.strip():
st.warning("Please enter some text to analyze.")
else:
with st.spinner("Analyzing text..."):
try:
result = detector.detect_bias(text_input, language)
# Display results
st.markdown("---")
st.subheader("Detection Results")
# Status indicator
if result.has_bias_detected:
st.error("**Bias Detected**")
else:
st.success("**No Bias Detected** - Text appears bias-free")
# Create two columns for original vs corrected
if result.has_bias_detected and result.detected_edits:
col1, col2 = st.columns(2)
with col1:
st.markdown("**Original Text:**")
st.info(text_input)
with col2:
st.markdown("**Corrected Text:**")
corrected_text = text_input
for edit in result.detected_edits:
corrected_text = corrected_text.replace(edit["from"], edit["to"])
st.success(corrected_text)
# Show detected edits
st.markdown("**Detected Edits:**")
edits_data = []
for i, edit in enumerate(result.detected_edits, 1):
edits_data.append({
"#": i,
"Original": edit["from"],
"Replacement": edit["to"],
"Severity": edit.get("severity", "replace"),
"Tags": edit.get("tags", "")
})
st.dataframe(pd.DataFrame(edits_data), use_container_width=True)
# Additional metadata
st.markdown("**Detection Metadata:**")
meta_col1, meta_col2, meta_col3 = st.columns(3)
with meta_col1:
st.metric("Source", "Rules-based")
with meta_col2:
st.metric("Edits Found", len(result.detected_edits))
with meta_col3:
st.metric("Language", selected_lang_name)
except Exception as e:
st.error(f"Error during detection: {e}")
st.exception(e)
# ===================================
# TAB 2: BATCH TESTING
# ===================================
with tab2:
st.header("Batch Testing from CSV")
st.markdown("Upload a CSV file with columns: `id`, `language`, `text`")
# Show example format
with st.expander("CSV Format Example"):
example_df = pd.DataFrame({
"id": ["1", "2", "3"],
"language": ["en", "sw", "fr"],
"text": [
"The chairman will lead the meeting",
"Daktari anaangalia wagonjwa",
"Le prΓ©sident dirigera la rΓ©union"
]
})
st.dataframe(example_df, use_container_width=True)
st.markdown("**Language codes:** `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)")
# Download template
csv_template = example_df.to_csv(index=False)
st.download_button(
"Download Template CSV",
csv_template,
"batch_template.csv",
"text/csv",
help="Download this template and fill it with your data"
)
# File uploader
uploaded_file = st.file_uploader(
"Upload CSV File",
type=['csv'],
help="Max 1000 rows, 10MB file size limit"
)
if uploaded_file is not None:
try:
# Read CSV
df = pd.read_csv(uploaded_file)
# Validate columns
required_cols = ['id', 'language', 'text']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
st.error(f"Missing required columns: {', '.join(missing_cols)}")
else:
st.success(f"Loaded {len(df)} rows from CSV")
# Show preview
with st.expander("Preview Data (first 5 rows)"):
st.dataframe(df.head(), use_container_width=True)
# Row limit check
if len(df) > 1000:
st.warning("File has more than 1000 rows. Only first 1000 will be processed.")
df = df.head(1000)
# Process button
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
process_button = st.button("Process All", type="primary", use_container_width=True)
if process_button:
results = []
progress_bar = st.progress(0)
status_text = st.empty()
# Language code mapping
lang_code_map = {
'en': Language.ENGLISH,
'sw': Language.SWAHILI,
'fr': Language.FRENCH,
'ki': Language.GIKUYU
}
for idx, row in df.iterrows():
status_text.text(f"Processing {idx + 1}/{len(df)}...")
try:
lang_code = row['language'].lower()
if lang_code not in lang_code_map:
results.append({
'id': row['id'],
'original_text': row['text'],
'corrected_text': row['text'],
'bias_detected': False,
'edits_count': 0,
'status': f'Invalid language code: {lang_code}'
})
continue
language = lang_code_map[lang_code]
result = detector.detect_bias(row['text'], language)
corrected_text = row['text']
if result.detected_edits:
for edit in result.detected_edits:
corrected_text = corrected_text.replace(edit["from"], edit["to"])
results.append({
'id': row['id'],
'language': row['language'],
'original_text': row['text'],
'corrected_text': corrected_text,
'bias_detected': result.has_bias_detected,
'edits_count': len(result.detected_edits),
'edits': "; ".join([f"{e['from']}β†’{e['to']}" for e in result.detected_edits]),
'status': 'Success'
})
except Exception as e:
results.append({
'id': row['id'],
'original_text': row['text'],
'corrected_text': row['text'],
'bias_detected': False,
'edits_count': 0,
'status': f'Error: {str(e)}'
})
progress_bar.progress((idx + 1) / len(df))
status_text.text("Processing complete!")
# Display results
results_df = pd.DataFrame(results)
st.subheader("Batch Processing Results")
# Summary metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Processed", len(results_df))
with col2:
bias_count = results_df['bias_detected'].sum()
st.metric("Bias Detected", bias_count)
with col3:
success_count = (results_df['status'] == 'Success').sum()
st.metric("Successful", success_count)
with col4:
total_edits = results_df['edits_count'].sum()
st.metric("Total Edits", total_edits)
# Results table
st.dataframe(results_df, use_container_width=True)
# Download results
csv_output = results_df.to_csv(index=False)
st.download_button(
"Download Results as CSV",
csv_output,
"bias_detection_results.csv",
"text/csv",
help="Download the complete results with all columns"
)
except Exception as e:
st.error(f"Error reading CSV file: {e}")
st.exception(e)
# ===================================
# TAB 3: STATISTICS
# ===================================
with tab3:
st.header("Language Statistics & System Information")
# System info
st.subheader("Detection System")
st.markdown("""
- **Engine:** Rules-based bias detection with lexicon matching
- **Approach:** Regular expression pattern matching with word boundaries
- **Case Handling:** Case-preserving replacement
- **Precision:** 1.000 (zero false positives) across all languages
""")
st.markdown("---")
# Language statistics
st.subheader("Supported Languages")
lang_stats = {
"Language": ["English", "Swahili", "French", "Gikuyu"],
"F1 Score": [0.786, 0.708, 0.571, 0.260],
"Precision": [1.000, 1.000, 1.000, 0.814],
"Recall": [0.647, 0.548, 0.400, 0.155],
"Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"],
"Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"],
"Status": ["Production", "Foundation", "Beta", "Beta"]
}
stats_df = pd.DataFrame(lang_stats)
st.dataframe(stats_df, use_container_width=True, hide_index=True)
st.markdown("---")
# Bias categories
st.subheader("Detected Bias Categories")
categories = {
"Category": [
"Occupation",
"Pronoun Assumption",
"Generic Pronoun",
"Honorific",
"Morphology"
],
"Description": [
"Gendered job titles (chairman, policeman)",
"Assumed pronouns (he/she when gender unknown)",
"Generic male pronouns (he as universal)",
"Gendered titles (Mr./Mrs., Mzee/Bi)",
"Gender markers in word structure (wa kike/wa kiume)"
],
"Example": [
"chairman β†’ chair",
"yeye ni β†’ ni",
"his β†’ their",
"Mzee β†’ Mheshimiwa",
"wa kike β†’ [removed]"
]
}
categories_df = pd.DataFrame(categories)
st.dataframe(categories_df, use_container_width=True, hide_index=True)
st.markdown("---")
# Usage tips
st.subheader("Usage Tips")
st.markdown("""
**Best Practices:**
- Always review suggested corrections before accepting them
- Consider cultural and contextual appropriateness
- Test with various sentence structures
- Use batch processing for large datasets
- Export results for further analysis
**Limitations:**
- Detection is lexicon-based (limited to known patterns)
- Context-dependent bias may be missed
- Some languages have smaller lexicons (ongoing expansion)
- Review all ML-flagged items carefully
""")
st.markdown("---")
# Footer
st.markdown("""
<div style='text-align: center; color: gray; padding: 20px;'>
JuaKazi Gender Sensitization Engine | Version 0.3<br>
Perfect Precision: 1.000 (Zero False Positives)<br>
Culturally Adapted for African Languages
</div>
""", unsafe_allow_html=True)