Spaces:

RoyAalekh
/

hackathon_code4change

Running

App Files Files Community

hackathon_code4change / src /dashboard /pages /1_Data_And_Insights.py

RoyAalekh

refactored project structure. renamed scheduler dir to src

6a28f91 3 months ago

raw

history blame contribute delete

36.8 kB

	"""Data & Insights page - Historical analysis, interactive exploration, and parameters.

	This page provides three views:
	1. Historical Analysis - Pre-generated visualizations from EDA pipeline
	2. Interactive Exploration - Dynamic filtering and custom analysis
	3. Parameter Summary - Extracted parameters from historical data
	"""

	from __future__ import annotations

	import re
	from pathlib import Path

	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import streamlit as st
	import streamlit.components.v1 as components

	from src.dashboard.utils import (
	get_case_statistics,
	load_cleaned_data,
	load_cleaned_hearings,
	load_param_loader,
	)

	# Page configuration
	st.set_page_config(
	page_title="Data & Insights",
	page_icon="chart",
	layout="wide",
	)

	st.title("Data & Insights")
	st.markdown("Historical case data analysis and extracted parameters")

	# Data source info
	with st.expander("Data Source Information", expanded=False):
	st.info("""
	Data loaded from latest EDA output (`reports/figures/v*/`).

	Performance Note: For optimal loading speed, both cases and hearings data are sampled to 50,000 rows if larger.
	All statistics and visualizations remain representative of the full dataset.
	""")


	# Load data with sampling for performance
	@st.cache_data(ttl=3600)
	def load_dashboard_data():
	"""Load and sample data for dashboard performance."""
	cases = load_cleaned_data()
	hearings = load_cleaned_hearings()

	# Track original counts before sampling
	total_cases_count = len(cases)
	total_hearings_count = len(hearings)

	# Sample both cases and hearings if too large for better performance
	if len(cases) > 50000:
	cases = cases.sample(n=50000, random_state=42)

	if len(hearings) > 50000:
	hearings = hearings.sample(n=50000, random_state=42)

	params = load_param_loader()
	stats = get_case_statistics(cases) if not cases.empty else {}

	return cases, hearings, params, stats, total_cases_count, total_hearings_count


	with st.spinner("Loading data..."):
	try:
	cases_df, hearings_df, params, stats, total_cases, total_hearings = (
	load_dashboard_data()
	)
	except Exception as e:
	st.error(f"Error loading data: {e}")
	st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
	st.stop()

	if cases_df.empty and hearings_df.empty:
	st.warning(
	"No data available. The EDA pipeline needs to be run first to process historical court data."
	)

	st.markdown("""
	The EDA pipeline will:
	- Load raw court data (cases and hearings)
	- Clean and validate the data
	- Extract statistical parameters (distributions, transition probabilities, durations)
	- Generate analysis visualizations
	- Save processed data for dashboard use

	Processing time: ~2-5 minutes depending on data size
	""")

	col1, col2 = st.columns([1, 2])

	with col1:
	if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
	from eda.load_clean import run_load_and_clean
	from eda.exploration import run_exploration
	from eda.parameters import run_parameter_export

	with st.spinner("Running EDA pipeline... This will take a few minutes."):
	try:
	# Step 1: Load & clean data
	run_load_and_clean()
	# Step 2: Generate visualizations
	run_exploration()
	# Step 3: Extract parameters
	run_parameter_export()
	st.success("EDA pipeline completed successfully!")
	st.info("Reload this page to see the updated data.")
	if st.button("Reload Page"):
	st.rerun()
	except Exception as e:
	with st.expander("Error details"):
	st.exception(e)

	with col2:
	with st.expander("Alternative: Run via CLI"):
	st.code("uv run court-scheduler eda", language="bash")
	st.caption("Run this command in your terminal, then refresh this page.")

	st.stop()

	# Overview metrics
	st.markdown("### Overview")
	col1, col2, col3, col4, col5 = st.columns(5)

	with col1:
	st.metric("Total Cases", f"{total_cases:,}")
	if "YEAR_FILED" in cases_df.columns:
	year_range = (
	f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
	)
	st.caption(f"Years: {year_range}")

	with col2:
	st.metric("Total Hearings", f"{total_hearings:,}")
	if total_cases > 0:
	avg_hearings = total_hearings / total_cases
	st.caption(f"Avg: {avg_hearings:.1f}/case")

	with col3:
	# Try both uppercase and mixed case
	if "CASE_TYPE" in cases_df.columns:
	n_case_types = len(cases_df["CASE_TYPE"].unique())
	elif "CaseType" in cases_df.columns:
	n_case_types = len(cases_df["CaseType"].unique())
	else:
	n_case_types = 0
	st.metric("Case Types", n_case_types)
	st.caption("Categories")

	with col4:
	# Get stages from hearings data
	if "Remappedstages" in hearings_df.columns:
	n_stages = len(hearings_df["Remappedstages"].dropna().unique())
	else:
	n_stages = 0
	st.metric("Court Stages", n_stages)
	st.caption("Phases")

	with col5:
	# Average disposal time if available
	if "DISPOSALTIME_ADJ" in cases_df.columns:
	avg_disposal = cases_df["DISPOSALTIME_ADJ"].median()
	st.metric("Median Disposal", f"{avg_disposal:.0f} days")
	st.caption("Time to resolve")
	elif "N_HEARINGS" in cases_df.columns:
	avg_n_hearings = cases_df["N_HEARINGS"].median()
	st.metric("Median Hearings", f"{avg_n_hearings:.0f}")
	st.caption("Per case")

	st.markdown("---")

	# Main tabs
	tab1, tab2, tab3 = st.tabs(
	["Historical Analysis", "Interactive Exploration", "Parameters"]
	)

	# TAB 1: Historical Analysis - Pre-generated figures
	with tab1:
	st.markdown("""
	### Historical Analysis
	Pre-generated visualizations from EDA pipeline based on historical court case data.
	""")

	figures_dir = Path("reports/figures")

	if not figures_dir.exists():
	st.warning(
	"EDA figures not found. Run the EDA pipeline to generate visualizations."
	)
	st.code("uv run court-scheduler eda")
	else:
	# Find latest versioned directory
	version_dirs = [
	d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")
	]

	if not version_dirs:
	st.warning(
	"No EDA output directories found. Run the EDA pipeline to generate visualizations."
	)
	st.code("uv run court-scheduler eda")
	else:
	# Use the most recent version directory
	latest_dir = max(version_dirs, key=lambda p: p.stat().st_mtime)
	st.caption(f"Showing visualizations from: {latest_dir.name}")

	# List available figures from the versioned directory
	# Exclude deprecated/removed visuals like the monthly waterfall
	figure_files = [
	f
	for f in sorted(latest_dir.glob("*.html"))
	if "waterfall" not in f.name.lower()
	]

	if not figure_files:
	st.info(f"No figures found in {latest_dir.name}")
	else:
	st.markdown(f"{len(figure_files)} visualizations available")

	# Organize figures by category
	distribution_figs = [
	f
	for f in figure_files
	if any(x in f.name for x in ["distribution", "filed", "type"])
	]
	stage_figs = [
	f
	for f in figure_files
	if any(x in f.name for x in ["stage", "sankey", "transition"])
	]
	time_figs = [
	f
	for f in figure_files
	if any(x in f.name for x in ["monthly", "load", "gap"])
	]
	other_figs = [
	f
	for f in figure_files
	if f not in distribution_figs + stage_figs + time_figs
	]

	# Category 1: Case Distributions
	if distribution_figs:
	st.markdown("#### Case Distributions")
	for fig_path in distribution_figs:
	# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
	clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
	clean_name = clean_name.replace("_", " ").title()

	with st.expander(clean_name, expanded=False):
	with open(fig_path, "r", encoding="utf-8") as f:
	html_content = f.read()
	components.html(html_content, height=600, scrolling=True)

	# Category 2: Stage Analysis
	if stage_figs:
	st.markdown("#### Stage Analysis")
	for fig_path in stage_figs:
	# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
	clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
	clean_name = clean_name.replace("_", " ").title()

	with st.expander(clean_name, expanded=False):
	with open(fig_path, "r", encoding="utf-8") as f:
	html_content = f.read()
	components.html(html_content, height=600, scrolling=True)

	# Category 3: Time-based Analysis
	if time_figs:
	st.markdown("#### Time-based Analysis")
	for fig_path in time_figs:
	# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
	clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
	clean_name = clean_name.replace("_", " ").title()

	with st.expander(clean_name, expanded=False):
	with open(fig_path, "r", encoding="utf-8") as f:
	html_content = f.read()
	components.html(html_content, height=600, scrolling=True)

	# Category 4: Other Analysis
	if other_figs:
	st.markdown("#### Additional Analysis")
	for fig_path in other_figs:
	# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
	clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
	clean_name = clean_name.replace("_", " ").title()

	with st.expander(clean_name, expanded=False):
	with open(fig_path, "r", encoding="utf-8") as f:
	html_content = f.read()
	components.html(html_content, height=600, scrolling=True)

	# TAB 2: Interactive Exploration
	with tab2:
	st.markdown("""
	### Interactive Exploration
	Apply filters and explore the data dynamically.
	""")

	# Sidebar filters
	st.sidebar.markdown("---")
	st.sidebar.header("Filters (Interactive Tab)")

	# Determine actual column names
	case_type_col = (
	"CASE_TYPE"
	if "CASE_TYPE" in cases_df.columns
	else ("CaseType" if "CaseType" in cases_df.columns else None)
	)
	stage_col = "Remappedstages" if "Remappedstages" in hearings_df.columns else None

	# Case type filter (from cases)
	if case_type_col:
	available_case_types = cases_df[case_type_col].unique().tolist()
	selected_case_types = st.sidebar.multiselect(
	"Case Types",
	options=available_case_types,
	default=available_case_types[:5]
	if len(available_case_types) > 5
	else available_case_types,
	key="case_type_filter",
	)
	else:
	selected_case_types = []
	st.sidebar.info("No case type data available")

	# Stage filter (from hearings)
	if stage_col:
	available_stages = hearings_df[stage_col].unique().tolist()
	selected_stages = st.sidebar.multiselect(
	"Stages",
	options=available_stages,
	default=available_stages[:10]
	if len(available_stages) > 10
	else available_stages,
	key="stage_filter",
	)
	else:
	selected_stages = []
	st.sidebar.info("No stage data available")

	# Apply filters with copy to ensure clean dataframes
	if selected_case_types and case_type_col:
	filtered_cases = cases_df[
	cases_df[case_type_col].isin(selected_case_types)
	].copy()
	else:
	filtered_cases = cases_df.copy()

	if selected_stages and stage_col:
	filtered_hearings = hearings_df[
	hearings_df[stage_col].isin(selected_stages)
	].copy()
	else:
	filtered_hearings = hearings_df.copy()

	# Filtered metrics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	"Filtered Cases",
	f"{len(filtered_cases):,}",
	delta=f"{len(filtered_cases) - total_cases}",
	)
	st.caption(f"Hearings: {len(filtered_hearings):,}")

	with col2:
	if case_type_col and case_type_col in filtered_cases.columns:
	n_types_filtered = len(filtered_cases[case_type_col].unique())
	else:
	n_types_filtered = 0
	st.metric("Case Types", n_types_filtered)

	with col3:
	if stage_col and stage_col in filtered_hearings.columns:
	n_stages_filtered = len(filtered_hearings[stage_col].unique())
	else:
	n_stages_filtered = 0
	st.metric("Stages", n_stages_filtered)

	with col4:
	if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
	adj_rate_filtered = (
	filtered_hearings["Outcome"] == "ADJOURNED"
	).sum() / len(filtered_hearings)
	st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
	else:
	st.metric("Adjournment Rate", "N/A")

	st.markdown("---")

	# Sub-tabs for different analyses
	sub_tab1, sub_tab2, sub_tab3, sub_tab4 = st.tabs(
	["Case Distribution", "Stage Analysis", "Adjournment Patterns", "Raw Data"]
	)

	with sub_tab1:
	st.markdown("#### Case Distribution by Type")

	if (
	case_type_col
	and case_type_col in filtered_cases.columns
	and len(filtered_cases) > 0
	):
	# Compute value counts and ensure proper structure
	case_type_counts = (
	filtered_cases[case_type_col].value_counts().reset_index()
	)
	# Rename columns for clarity (works across pandas versions)
	case_type_counts.columns = ["CaseType", "Count"]

	# Debug data preview
	with st.expander("Data Preview (Debug)", expanded=False):
	st.write(f"Total rows: {len(case_type_counts)}")
	st.dataframe(case_type_counts.head(10))

	col1, col2 = st.columns(2)

	with col1:
	fig = px.bar(
	case_type_counts,
	x="CaseType",
	y="Count",
	title="Cases by Type",
	labels={"CaseType": "Case Type", "Count": "Count"},
	color="Count",
	color_continuous_scale="Blues",
	)
	fig.update_layout(xaxis_tickangle=-45, height=400)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	fig_pie = px.pie(
	case_type_counts,
	values="Count",
	names="CaseType",
	title="Case Type Distribution",
	)
	fig_pie.update_layout(height=400)
	st.plotly_chart(fig_pie, use_container_width=True)
	else:
	st.info("No data available for selected filters")

	with sub_tab2:
	st.markdown("#### Stage Analysis")

	if (
	stage_col
	and stage_col in filtered_hearings.columns
	and len(filtered_hearings) > 0
	):
	stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
	stage_counts.columns = ["Stage", "Count"]

	fig = px.bar(
	stage_counts.head(15),
	x="Count",
	y="Stage",
	orientation="h",
	title="Top 15 Stages by Case Count",
	labels={"Stage": "Stage", "Count": "Count"},
	color="Count",
	color_continuous_scale="Greens",
	)
	fig.update_layout(height=600)
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.info("No data available for selected filters")

	with sub_tab3:
	st.markdown("#### Adjournment Patterns")

	if (
	"Outcome" in filtered_hearings.columns
	and len(filtered_hearings) > 0
	and case_type_col
	and stage_col
	):
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Overall Adjournment Rate")
	total_hearings = len(filtered_hearings)
	adjourned = (filtered_hearings["Outcome"] == "ADJOURNED").sum()
	not_adjourned = total_hearings - adjourned

	outcome_df = pd.DataFrame(
	{
	"Outcome": ["ADJOURNED", "NOT ADJOURNED"],
	"Count": [adjourned, not_adjourned],
	}
	)

	fig_pie = px.pie(
	outcome_df,
	values="Count",
	names="Outcome",
	title=f"Outcome Distribution (Total: {total_hearings:,})",
	color="Outcome",
	color_discrete_map={
	"ADJOURNED": "#ef4444",
	"NOT ADJOURNED": "#22c55e",
	},
	)
	fig_pie.update_layout(height=400)
	st.plotly_chart(fig_pie, use_container_width=True)

	with col2:
	st.markdown("By Stage")
	adj_by_stage = (
	filtered_hearings.groupby(stage_col)["Outcome"]
	.apply(
	lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0
	)
	.reset_index()
	)
	adj_by_stage.columns = ["Stage", "Rate"]
	adj_by_stage["Rate"] = adj_by_stage["Rate"] * 100

	fig = px.bar(
	adj_by_stage.sort_values("Rate", ascending=False).head(10),
	x="Rate",
	y="Stage",
	orientation="h",
	title="Top 10 Stages by Adjournment Rate",
	labels={"Stage": "Stage", "Rate": "Rate (%)"},
	color="Rate",
	color_continuous_scale="Oranges",
	)
	fig.update_layout(height=400)
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.info("No data available for selected filters")

	with sub_tab4:
	st.markdown("#### Raw Data")

	data_view = st.radio(
	"Select data to view:", ["Cases", "Hearings"], horizontal=True
	)

	if data_view == "Cases":
	st.dataframe(
	filtered_cases.head(500),
	use_container_width=True,
	height=600,
	)

	st.markdown(
	f"Showing first 500 of {len(filtered_cases):,} filtered cases"
	)

	# Download button
	csv = filtered_cases.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="Download filtered cases as CSV",
	data=csv,
	file_name="filtered_cases.csv",
	mime="text/csv",
	)
	else:
	st.dataframe(
	filtered_hearings.head(500),
	use_container_width=True,
	height=600,
	)

	st.markdown(
	f"Showing first 500 of {len(filtered_hearings):,} filtered hearings"
	)

	# Download button
	csv = filtered_hearings.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="Download filtered hearings as CSV",
	data=csv,
	file_name="filtered_hearings.csv",
	mime="text/csv",
	)

	# TAB 3: Parameter Summary
	with tab3:
	st.markdown("""
	### Parameter Summary
	Statistical parameters extracted from historical data, used throughout the system.
	""")

	if not params:
	st.warning("Parameters not loaded. Run EDA pipeline to extract parameters.")
	st.code("uv run court-scheduler eda")
	else:
	# Case Types
	st.markdown("#### Case Types")
	if "case_types" in params and params["case_types"]:
	case_types_df = pd.DataFrame(
	{
	"Case Type": params["case_types"],
	"Index": range(len(params["case_types"])),
	}
	)
	st.dataframe(case_types_df, use_container_width=True, hide_index=True)
	st.caption(f"Total: {len(params['case_types'])} case types")
	else:
	st.info("No case types found")

	st.markdown("---")

	# Stages
	st.markdown("#### Stages")
	if "stages" in params and params["stages"]:
	stages_df = pd.DataFrame(
	{"Stage": params["stages"], "Index": range(len(params["stages"]))}
	)
	st.dataframe(stages_df, use_container_width=True, hide_index=True)
	st.caption(f"Total: {len(params['stages'])} stages")
	else:
	st.info("No stages found")

	st.markdown("---")

	# Stage Transitions
	st.markdown("#### Stage Transition Graph")
	if "stage_graph" in params and params["stage_graph"]:
	st.markdown("Sample transitions from each stage:")

	# Show sample transitions
	sample_stages = list(params["stage_graph"].keys())[:5]
	for stage in sample_stages:
	transitions = params["stage_graph"][stage]
	if transitions:
	with st.expander(f"From: {stage}"):
	trans_df = pd.DataFrame(transitions)
	if not trans_df.empty:
	st.dataframe(
	trans_df, use_container_width=True, hide_index=True
	)

	st.caption(
	f"Total: {len(params['stage_graph'])} stages with transition data"
	)
	else:
	st.info("No stage transition data found")

	st.markdown("---")

	# Adjournment Statistics
	st.markdown("#### Adjournment Probabilities")
	if "adjournment_stats" in params and params["adjournment_stats"]:
	st.markdown("Adjournment probability by stage and case type:")

	# Create heatmap
	adj_stats = params["adjournment_stats"]
	stages_list = list(adj_stats.keys())[
	:20
	] # Limit to 20 stages for readability
	case_types_list = params.get("case_types", [])[
	:15
	] # Limit to 15 case types

	if stages_list and case_types_list:
	heatmap_data = []
	for stage in stages_list:
	row = []
	for ct in case_types_list:
	prob = adj_stats.get(stage, {}).get(ct, 0)
	row.append(prob * 100)
	heatmap_data.append(row)

	fig = go.Figure(
	data=go.Heatmap(
	z=heatmap_data,
	x=case_types_list,
	y=stages_list,
	colorscale="RdYlGn_r",
	text=[[f"{val:.1f}%" for val in row] for row in heatmap_data],
	texttemplate="%{text}",
	textfont={"size": 8},
	colorbar=dict(title="Adj. Prob. (%)"),
	)
	)
	fig.update_layout(
	title="Adjournment Probability by Stage and Case Type",
	xaxis_title="Case Type",
	yaxis_title="Stage",
	height=700,
	)
	st.plotly_chart(fig, use_container_width=True)
	st.caption("Showing top 20 stages and top 15 case types")
	else:
	st.info("Insufficient data for heatmap")
	else:
	st.info("No adjournment statistics found")

	st.markdown("---")

	# System Configuration Section
	st.markdown("### System Configuration")
	st.info("""
	These parameters control how the system analyzes historical data and generates simulation cases.
	Most are derived from historical data patterns, while some are configurable thresholds.
	""")

	config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
	[
	"EDA Parameters",
	"Ripeness Classifier",
	"Case Generator",
	"Simulation Defaults",
	]
	)

	with config_tab1:
	st.markdown("#### EDA Analysis Parameters")
	st.markdown("These parameters control historical data analysis:")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Readiness Score Calculation")
	st.code(
	"""
	Readiness Score =
	0.4 * (hearings / 50) [capped at 1.0]
	+ 0.3 * (100 / gap_median) [capped at 1.0]
	+ 0.3 if stage in [ARGUMENTS, EVIDENCE, ORDERS/JUDGMENT]
	+ 0.1 otherwise
	""",
	language="text",
	)
	st.caption("Weights: 40% hearing count, 30% gap, 30% stage")

	st.markdown("Alert Thresholds")
	st.code(
	"""
	ALERT_P90_TYPE: Disposal time > P90 within case type
	ALERT_HEARING_HEAVY: Hearing count > P90 within case type
	ALERT_LONG_GAP: Median gap > P90 within case type
	""",
	language="text",
	)

	with col2:
	st.markdown("Adjournment Proxy Detection")
	st.code(
	"""
	Gap threshold: 1.3x median gap for that stage
	If hearing_gap > 1.3 * stage_median_gap:
	is_adjourn_proxy = True
	""",
	language="python",
	)

	st.markdown("Not-Reached Keywords")
	st.code(
	"""
	"NOT REACHED", "NR",
	"NOT TAKEN UP", "NOT HEARD"
	""",
	language="text",
	)

	st.markdown("---")

	st.markdown("Stage Order (for transition analysis)")
	st.code(
	"""
	1. PRE-ADMISSION
	2. ADMISSION
	3. FRAMING OF CHARGES
	4. EVIDENCE
	5. ARGUMENTS
	6. INTERLOCUTORY APPLICATION
	7. SETTLEMENT
	8. ORDERS / JUDGMENT
	9. FINAL DISPOSAL
	10. OTHER
	""",
	language="text",
	)
	st.caption("Only forward transitions are counted (by index order)")

	with config_tab2:
	st.markdown("#### Ripeness Classification Thresholds")
	st.markdown("""
	These thresholds determine if a case is RIPE (ready for hearing) or UNRIPE (has bottlenecks).
	""")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Classification Thresholds")
	from src.core.ripeness import RipenessClassifier

	thresholds = RipenessClassifier.get_current_thresholds()

	thresh_df = pd.DataFrame(
	[
	{
	"Parameter": "MIN_SERVICE_HEARINGS",
	"Value": thresholds["MIN_SERVICE_HEARINGS"],
	"Description": "Minimum hearings to confirm service/compliance",
	},
	{
	"Parameter": "MIN_STAGE_DAYS",
	"Value": thresholds["MIN_STAGE_DAYS"],
	"Description": "Minimum days in stage to show compliance efforts",
	},
	{
	"Parameter": "MIN_CASE_AGE_DAYS",
	"Value": thresholds["MIN_CASE_AGE_DAYS"],
	"Description": "Minimum case maturity before assuming readiness",
	},
	]
	)
	st.dataframe(thresh_df, use_container_width=True, hide_index=True)

	st.markdown("ADMISSION Stage Rule")
	st.code(
	"""
	if stage == ADMISSION and hearing_count < 3:
	return UNRIPE_SUMMONS
	""",
	language="python",
	)

	st.markdown("Stuck Case Detection")
	st.code(
	"""
	if hearing_count > 10:
	avg_gap = age_days / hearing_count
	if avg_gap > 60 days:
	return UNRIPE_PARTY
	""",
	language="python",
	)

	with col2:
	st.markdown("Ripeness Priority Multipliers")
	st.code(
	"""
	RIPE cases: 1.5x priority
	UNRIPE cases: 0.7x priority
	""",
	language="text",
	)

	st.markdown("Bottleneck Keywords")
	bottleneck_df = pd.DataFrame(
	[
	{"Keyword": "SUMMONS", "Type": "UNRIPE_SUMMONS"},
	{"Keyword": "NOTICE", "Type": "UNRIPE_SUMMONS"},
	{"Keyword": "ISSUE", "Type": "UNRIPE_SUMMONS"},
	{"Keyword": "SERVICE", "Type": "UNRIPE_SUMMONS"},
	{"Keyword": "STAY", "Type": "UNRIPE_DEPENDENT"},
	{"Keyword": "PENDING", "Type": "UNRIPE_DEPENDENT"},
	]
	)
	st.dataframe(bottleneck_df, use_container_width=True, hide_index=True)

	st.markdown("Ripe Stage Keywords")
	st.code(
	'"ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"',
	language="text",
	)

	st.markdown("---")

	st.markdown("Ripening Time Estimates (days)")
	ripening_df = pd.DataFrame(
	[
	{"Bottleneck Type": "UNRIPE_SUMMONS", "Estimated Days": 30},
	{"Bottleneck Type": "UNRIPE_DEPENDENT", "Estimated Days": 60},
	{"Bottleneck Type": "UNRIPE_PARTY", "Estimated Days": 14},
	{"Bottleneck Type": "UNRIPE_DOCUMENT", "Estimated Days": 21},
	]
	)
	st.dataframe(ripening_df, use_container_width=True, hide_index=True)

	with config_tab3:
	st.markdown("#### Case Generator Configuration")
	st.markdown("""
	These parameters control synthetic case generation for simulations.
	""")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Default Case Type Distribution")
	from src.data.config import CASE_TYPE_DISTRIBUTION

	dist_df = pd.DataFrame(
	[
	{"Case Type": ct, "Probability": f"{p * 100:.1f}%"}
	for ct, p in CASE_TYPE_DISTRIBUTION.items()
	]
	)
	st.dataframe(dist_df, use_container_width=True, hide_index=True)
	st.caption("Based on historical distribution from EDA")

	st.markdown("Urgent Case Percentage")
	from src.data.config import URGENT_CASE_PERCENTAGE

	st.metric("Urgent Cases", f"{URGENT_CASE_PERCENTAGE * 100:.1f}%")

	with col2:
	st.markdown("Monthly Seasonality Factors")
	from src.data.config import MONTHLY_SEASONALITY

	season_df = pd.DataFrame(
	[
	{"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)}
	for i in range(1, 13)
	]
	)
	st.dataframe(season_df, use_container_width=True, hide_index=True)
	st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")

	st.markdown("---")

	st.markdown("Initial Case State Generation")
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Hearing History Simulation")
	st.code(
	"""
	if days_since_filed > 30:
	hearing_count = max(1, days_since_filed // 30)

	# Last hearing: 7-30 days before sim start
	days_before_end = random(7, 30)
	last_hearing_date = end_date - days_before_end
	days_since_last_hearing = days_before_end
	""",
	language="python",
	)
	st.caption("Ensures staggered eligibility, not all at once")

	with col2:
	st.markdown("Ripeness Purpose Assignment")
	st.code(
	"""
	Bottleneck purposes (20% probability):
	- ISSUE SUMMONS, FOR NOTICE
	- AWAIT SERVICE OF NOTICE
	- STAY APPLICATION PENDING
	- FOR ORDERS

	Ripe purposes (80% probability):
	- ARGUMENTS, HEARING
	- FINAL ARGUMENTS, FOR JUDGMENT
	- EVIDENCE
	""",
	language="text",
	)
	st.caption(
	"Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe"
	)

	with config_tab4:
	st.markdown("#### Simulation Defaults")
	st.markdown("""
	Default values used in simulation when not explicitly configured by user.
	""")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Duration Estimation")
	st.code(
	"""
	Method: lognormal
	- Uses historical median and P90
	- Ensures realistic variance
	- Min duration: 1 day

	Formula:
	sigma = (log(p90) - log(median)) / 1.2816
	mu = log(median)
	duration = exp(mu + sigma * randn())
	""",
	language="text",
	)

	st.markdown("Courtroom Capacity")
	if params and "court_capacity_global" in params:
	cap = params["court_capacity_global"]
	st.metric(
	"Median slots/day", f"{cap.get('slots_median_global', 151):.0f}"
	)
	st.metric(
	"P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}"
	)
	else:
	st.info("Run EDA to load capacity statistics")

	with col2:
	st.markdown("Policy Defaults")
	st.code(
	"""
	READINESS policy weights:
	- age: 0.2
	- hearings: 0.2
	- urgency: 0.3
	- stage: 0.3

	Minimum hearing gap: 7 days

	RL policy:
	- Model: latest from models/ directory
	- Fallback: readiness policy
	""",
	language="text",
	)

	st.markdown("Working Days")
	st.code(
	"""
	Excludes:
	- Weekends (Saturday, Sunday)
	- National holidays (loaded from config)
	- Court closure days
	""",
	language="text",
	)

	# Footer
	st.markdown("---")
	st.caption("Data loaded from EDA pipeline. Use refresh button to reload.")