Spaces:

juakazike
/

test-ui

Running

App Files Files Community

test-ui / app.py

juakazike

update title

14f35c3 verified about 1 month ago

raw

history blame contribute delete

15.1 kB

	#!/usr/bin/env python3
	"""
	JuaKazi Gender Bias Detection and Correction - Testing Interface
	User-friendly web UI for non-technical experts to test the bias detection and correction model
	"""

	import streamlit as st
	import pandas as pd
	import sys
	from pathlib import Path
	from io import StringIO

	# Add parent directory to path for imports
	BASE_DIR = Path(__file__).resolve().parent.parent
	sys.path.insert(0, str(BASE_DIR))

	from eval.bias_detector import BiasDetector
	from eval.models import Language

	# Page configuration
	st.set_page_config(
	page_title="JuaKazi Bias Detection and Correction Testing",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Language mapping for dropdown
	LANGUAGE_MAP = {
	"English": Language.ENGLISH,
	"Swahili": Language.SWAHILI,
	"French": Language.FRENCH,
	"Gikuyu (Kikuyu)": Language.GIKUYU
	}

	LANGUAGE_CODES = {
	"English": "en",
	"Swahili": "sw",
	"French": "fr",
	"Gikuyu (Kikuyu)": "ki"
	}

	# Initialize detector with caching
	@st.cache_resource
	def get_detector():
	"""Initialize BiasDetector once and cache it"""
	return BiasDetector()

	# Main title
	st.title("JuaKazi Detection and Correction - Testing Interface")
	st.markdown("Test individual texts or batch process files to detect and correct gender bias")
	st.markdown("---")

	# Initialize detector
	try:
	detector = get_detector()
	except Exception as e:
	st.error(f"Failed to initialize bias detector: {e}")
	st.stop()

	# Create tabs
	tab1, tab2, tab3 = st.tabs(["Single Text Test", "Batch Testing", "Statistics"])

	# ===================================
	# TAB 1: SINGLE TEXT TESTING
	# ===================================
	with tab1:
	st.header("Test Individual Text")
	st.markdown("Enter text below and select a language to check for gender bias.")

	# Language selector
	col1, col2 = st.columns([1, 3])
	with col1:
	selected_lang_name = st.selectbox(
	"Select Language",
	list(LANGUAGE_MAP.keys()),
	index=0,
	help="Choose the language of your text"
	)

	language = LANGUAGE_MAP[selected_lang_name]

	# Text input
	text_input = st.text_area(
	"Enter text to analyze:",
	height=150,
	placeholder="e.g., The chairman will lead the meeting today.",
	help="Paste or type the text you want to check for gender bias"
	)

	# Detect button
	col1, col2, col3 = st.columns([1, 2, 1])
	with col1:
	detect_button = st.button("Detect Bias", type="primary", use_container_width=True)

	# Process detection
	if detect_button:
	if not text_input.strip():
	st.warning("Please enter some text to analyze.")
	else:
	with st.spinner("Analyzing text..."):
	try:
	result = detector.detect_bias(text_input, language)

	# Display results
	st.markdown("---")
	st.subheader("Detection Results")

	# Status indicator
	if result.has_bias_detected:
	st.error("Bias Detected")
	else:
	st.success("No Bias Detected - Text appears bias-free")

	# Create two columns for original vs corrected
	if result.has_bias_detected and result.detected_edits:
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Original Text:")
	st.info(text_input)

	with col2:
	st.markdown("Corrected Text:")
	corrected_text = text_input
	for edit in result.detected_edits:
	corrected_text = corrected_text.replace(edit["from"], edit["to"])
	st.success(corrected_text)

	# Show detected edits
	st.markdown("Detected Edits:")
	edits_data = []
	for i, edit in enumerate(result.detected_edits, 1):
	edits_data.append({
	"#": i,
	"Original": edit["from"],
	"Replacement": edit["to"],
	"Severity": edit.get("severity", "replace"),
	"Tags": edit.get("tags", "")
	})

	st.dataframe(pd.DataFrame(edits_data), use_container_width=True)

	# Additional metadata
	st.markdown("Detection Metadata:")
	meta_col1, meta_col2, meta_col3 = st.columns(3)
	with meta_col1:
	st.metric("Source", "Rules-based")
	with meta_col2:
	st.metric("Edits Found", len(result.detected_edits))
	with meta_col3:
	st.metric("Language", selected_lang_name)

	except Exception as e:
	st.error(f"Error during detection: {e}")
	st.exception(e)

	# ===================================
	# TAB 2: BATCH TESTING
	# ===================================
	with tab2:
	st.header("Batch Testing from CSV")
	st.markdown("Upload a CSV file with columns: `id`, `language`, `text`")

	# Show example format
	with st.expander("CSV Format Example"):
	example_df = pd.DataFrame({
	"id": ["1", "2", "3"],
	"language": ["en", "sw", "fr"],
	"text": [
	"The chairman will lead the meeting",
	"Daktari anaangalia wagonjwa",
	"Le président dirigera la réunion"
	]
	})
	st.dataframe(example_df, use_container_width=True)
	st.markdown("Language codes: `en` (English), `sw` (Swahili), `fr` (French), `ki` (Gikuyu)")

	# Download template
	csv_template = example_df.to_csv(index=False)
	st.download_button(
	"Download Template CSV",
	csv_template,
	"batch_template.csv",
	"text/csv",
	help="Download this template and fill it with your data"
	)

	# File uploader
	uploaded_file = st.file_uploader(
	"Upload CSV File",
	type=['csv'],
	help="Max 1000 rows, 10MB file size limit"
	)

	if uploaded_file is not None:
	try:
	# Read CSV
	df = pd.read_csv(uploaded_file)

	# Validate columns
	required_cols = ['id', 'language', 'text']
	missing_cols = [col for col in required_cols if col not in df.columns]

	if missing_cols:
	st.error(f"Missing required columns: {', '.join(missing_cols)}")
	else:
	st.success(f"Loaded {len(df)} rows from CSV")

	# Show preview
	with st.expander("Preview Data (first 5 rows)"):
	st.dataframe(df.head(), use_container_width=True)

	# Row limit check
	if len(df) > 1000:
	st.warning("File has more than 1000 rows. Only first 1000 will be processed.")
	df = df.head(1000)

	# Process button
	col1, col2, col3 = st.columns([1, 2, 1])
	with col1:
	process_button = st.button("Process All", type="primary", use_container_width=True)

	if process_button:
	results = []
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Language code mapping
	lang_code_map = {
	'en': Language.ENGLISH,
	'sw': Language.SWAHILI,
	'fr': Language.FRENCH,
	'ki': Language.GIKUYU
	}

	for idx, row in df.iterrows():
	status_text.text(f"Processing {idx + 1}/{len(df)}...")

	try:
	lang_code = row['language'].lower()
	if lang_code not in lang_code_map:
	results.append({
	'id': row['id'],
	'original_text': row['text'],
	'corrected_text': row['text'],
	'bias_detected': False,
	'edits_count': 0,
	'status': f'Invalid language code: {lang_code}'
	})
	continue

	language = lang_code_map[lang_code]
	result = detector.detect_bias(row['text'], language)

	corrected_text = row['text']
	if result.detected_edits:
	for edit in result.detected_edits:
	corrected_text = corrected_text.replace(edit["from"], edit["to"])

	results.append({
	'id': row['id'],
	'language': row['language'],
	'original_text': row['text'],
	'corrected_text': corrected_text,
	'bias_detected': result.has_bias_detected,
	'edits_count': len(result.detected_edits),
	'edits': "; ".join([f"{e['from']}→{e['to']}" for e in result.detected_edits]),
	'status': 'Success'
	})

	except Exception as e:
	results.append({
	'id': row['id'],
	'original_text': row['text'],
	'corrected_text': row['text'],
	'bias_detected': False,
	'edits_count': 0,
	'status': f'Error: {str(e)}'
	})

	progress_bar.progress((idx + 1) / len(df))

	status_text.text("Processing complete!")

	# Display results
	results_df = pd.DataFrame(results)
	st.subheader("Batch Processing Results")

	# Summary metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Processed", len(results_df))
	with col2:
	bias_count = results_df['bias_detected'].sum()
	st.metric("Bias Detected", bias_count)
	with col3:
	success_count = (results_df['status'] == 'Success').sum()
	st.metric("Successful", success_count)
	with col4:
	total_edits = results_df['edits_count'].sum()
	st.metric("Total Edits", total_edits)

	# Results table
	st.dataframe(results_df, use_container_width=True)

	# Download results
	csv_output = results_df.to_csv(index=False)
	st.download_button(
	"Download Results as CSV",
	csv_output,
	"bias_detection_results.csv",
	"text/csv",
	help="Download the complete results with all columns"
	)

	except Exception as e:
	st.error(f"Error reading CSV file: {e}")
	st.exception(e)

	# ===================================
	# TAB 3: STATISTICS
	# ===================================
	with tab3:
	st.header("Language Statistics & System Information")

	# System info
	st.subheader("Detection System")
	st.markdown("""
	- Engine: Rules-based bias detection with lexicon matching
	- Approach: Regular expression pattern matching with word boundaries
	- Case Handling: Case-preserving replacement
	- Precision: 1.000 (zero false positives) across all languages
	""")

	st.markdown("---")

	# Language statistics
	st.subheader("Supported Languages")

	lang_stats = {
	"Language": ["English", "Swahili", "French", "Gikuyu"],
	"F1 Score": [0.786, 0.708, 0.571, 0.260],
	"Precision": [1.000, 1.000, 1.000, 0.814],
	"Recall": [0.647, 0.548, 0.400, 0.155],
	"Lexicon Size": ["515 terms", "151 terms", "51 terms", "1,209 terms"],
	"Ground Truth": ["67 samples", "64 samples", "51 samples", "5,254 samples"],
	"Status": ["Production", "Foundation", "Beta", "Beta"]
	}

	stats_df = pd.DataFrame(lang_stats)
	st.dataframe(stats_df, use_container_width=True, hide_index=True)

	st.markdown("---")

	# Bias categories
	st.subheader("Detected Bias Categories")

	categories = {
	"Category": [
	"Occupation",
	"Pronoun Assumption",
	"Generic Pronoun",
	"Honorific",
	"Morphology"
	],
	"Description": [
	"Gendered job titles (chairman, policeman)",
	"Assumed pronouns (he/she when gender unknown)",
	"Generic male pronouns (he as universal)",
	"Gendered titles (Mr./Mrs., Mzee/Bi)",
	"Gender markers in word structure (wa kike/wa kiume)"
	],
	"Example": [
	"chairman → chair",
	"yeye ni → ni",
	"his → their",
	"Mzee → Mheshimiwa",
	"wa kike → [removed]"
	]
	}

	categories_df = pd.DataFrame(categories)
	st.dataframe(categories_df, use_container_width=True, hide_index=True)

	st.markdown("---")

	# Usage tips
	st.subheader("Usage Tips")
	st.markdown("""
	Best Practices:
	- Always review suggested corrections before accepting them
	- Consider cultural and contextual appropriateness
	- Test with various sentence structures
	- Use batch processing for large datasets
	- Export results for further analysis

	Limitations:
	- Detection is lexicon-based (limited to known patterns)
	- Context-dependent bias may be missed
	- Some languages have smaller lexicons (ongoing expansion)
	- Review all ML-flagged items carefully
	""")

	st.markdown("---")

	# Footer
	st.markdown("""
	<div style='text-align: center; color: gray; padding: 20px;'>
	JuaKazi Gender Sensitization Engine \| Version 0.3<br>
	Perfect Precision: 1.000 (Zero False Positives)<br>
	Culturally Adapted for African Languages
	</div>
	""", unsafe_allow_html=True)