hackathon_code4change / src /dashboard /pages /1_Data_And_Insights.py
RoyAalekh's picture
refactored project structure. renamed scheduler dir to src
6a28f91
"""Data & Insights page - Historical analysis, interactive exploration, and parameters.
This page provides three views:
1. Historical Analysis - Pre-generated visualizations from EDA pipeline
2. Interactive Exploration - Dynamic filtering and custom analysis
3. Parameter Summary - Extracted parameters from historical data
"""
from __future__ import annotations
import re
from pathlib import Path
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
import streamlit.components.v1 as components
from src.dashboard.utils import (
get_case_statistics,
load_cleaned_data,
load_cleaned_hearings,
load_param_loader,
)
# Page configuration
st.set_page_config(
page_title="Data & Insights",
page_icon="chart",
layout="wide",
)
st.title("Data & Insights")
st.markdown("Historical case data analysis and extracted parameters")
# Data source info
with st.expander("Data Source Information", expanded=False):
st.info("""
Data loaded from latest EDA output (`reports/figures/v*/`).
**Performance Note**: For optimal loading speed, both cases and hearings data are sampled to 50,000 rows if larger.
All statistics and visualizations remain representative of the full dataset.
""")
# Load data with sampling for performance
@st.cache_data(ttl=3600)
def load_dashboard_data():
"""Load and sample data for dashboard performance."""
cases = load_cleaned_data()
hearings = load_cleaned_hearings()
# Track original counts before sampling
total_cases_count = len(cases)
total_hearings_count = len(hearings)
# Sample both cases and hearings if too large for better performance
if len(cases) > 50000:
cases = cases.sample(n=50000, random_state=42)
if len(hearings) > 50000:
hearings = hearings.sample(n=50000, random_state=42)
params = load_param_loader()
stats = get_case_statistics(cases) if not cases.empty else {}
return cases, hearings, params, stats, total_cases_count, total_hearings_count
with st.spinner("Loading data..."):
try:
cases_df, hearings_df, params, stats, total_cases, total_hearings = (
load_dashboard_data()
)
except Exception as e:
st.error(f"Error loading data: {e}")
st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
st.stop()
if cases_df.empty and hearings_df.empty:
st.warning(
"No data available. The EDA pipeline needs to be run first to process historical court data."
)
st.markdown("""
**The EDA pipeline will:**
- Load raw court data (cases and hearings)
- Clean and validate the data
- Extract statistical parameters (distributions, transition probabilities, durations)
- Generate analysis visualizations
- Save processed data for dashboard use
**Processing time**: ~2-5 minutes depending on data size
""")
col1, col2 = st.columns([1, 2])
with col1:
if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
from eda.load_clean import run_load_and_clean
from eda.exploration import run_exploration
from eda.parameters import run_parameter_export
with st.spinner("Running EDA pipeline... This will take a few minutes."):
try:
# Step 1: Load & clean data
run_load_and_clean()
# Step 2: Generate visualizations
run_exploration()
# Step 3: Extract parameters
run_parameter_export()
st.success("EDA pipeline completed successfully!")
st.info("Reload this page to see the updated data.")
if st.button("Reload Page"):
st.rerun()
except Exception as e:
with st.expander("Error details"):
st.exception(e)
with col2:
with st.expander("Alternative: Run via CLI"):
st.code("uv run court-scheduler eda", language="bash")
st.caption("Run this command in your terminal, then refresh this page.")
st.stop()
# Overview metrics
st.markdown("### Overview")
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Total Cases", f"{total_cases:,}")
if "YEAR_FILED" in cases_df.columns:
year_range = (
f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
)
st.caption(f"Years: {year_range}")
with col2:
st.metric("Total Hearings", f"{total_hearings:,}")
if total_cases > 0:
avg_hearings = total_hearings / total_cases
st.caption(f"Avg: {avg_hearings:.1f}/case")
with col3:
# Try both uppercase and mixed case
if "CASE_TYPE" in cases_df.columns:
n_case_types = len(cases_df["CASE_TYPE"].unique())
elif "CaseType" in cases_df.columns:
n_case_types = len(cases_df["CaseType"].unique())
else:
n_case_types = 0
st.metric("Case Types", n_case_types)
st.caption("Categories")
with col4:
# Get stages from hearings data
if "Remappedstages" in hearings_df.columns:
n_stages = len(hearings_df["Remappedstages"].dropna().unique())
else:
n_stages = 0
st.metric("Court Stages", n_stages)
st.caption("Phases")
with col5:
# Average disposal time if available
if "DISPOSALTIME_ADJ" in cases_df.columns:
avg_disposal = cases_df["DISPOSALTIME_ADJ"].median()
st.metric("Median Disposal", f"{avg_disposal:.0f} days")
st.caption("Time to resolve")
elif "N_HEARINGS" in cases_df.columns:
avg_n_hearings = cases_df["N_HEARINGS"].median()
st.metric("Median Hearings", f"{avg_n_hearings:.0f}")
st.caption("Per case")
st.markdown("---")
# Main tabs
tab1, tab2, tab3 = st.tabs(
["Historical Analysis", "Interactive Exploration", "Parameters"]
)
# TAB 1: Historical Analysis - Pre-generated figures
with tab1:
st.markdown("""
### Historical Analysis
Pre-generated visualizations from EDA pipeline based on historical court case data.
""")
figures_dir = Path("reports/figures")
if not figures_dir.exists():
st.warning(
"EDA figures not found. Run the EDA pipeline to generate visualizations."
)
st.code("uv run court-scheduler eda")
else:
# Find latest versioned directory
version_dirs = [
d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")
]
if not version_dirs:
st.warning(
"No EDA output directories found. Run the EDA pipeline to generate visualizations."
)
st.code("uv run court-scheduler eda")
else:
# Use the most recent version directory
latest_dir = max(version_dirs, key=lambda p: p.stat().st_mtime)
st.caption(f"Showing visualizations from: {latest_dir.name}")
# List available figures from the versioned directory
# Exclude deprecated/removed visuals like the monthly waterfall
figure_files = [
f
for f in sorted(latest_dir.glob("*.html"))
if "waterfall" not in f.name.lower()
]
if not figure_files:
st.info(f"No figures found in {latest_dir.name}")
else:
st.markdown(f"**{len(figure_files)} visualizations available**")
# Organize figures by category
distribution_figs = [
f
for f in figure_files
if any(x in f.name for x in ["distribution", "filed", "type"])
]
stage_figs = [
f
for f in figure_files
if any(x in f.name for x in ["stage", "sankey", "transition"])
]
time_figs = [
f
for f in figure_files
if any(x in f.name for x in ["monthly", "load", "gap"])
]
other_figs = [
f
for f in figure_files
if f not in distribution_figs + stage_figs + time_figs
]
# Category 1: Case Distributions
if distribution_figs:
st.markdown("#### Case Distributions")
for fig_path in distribution_figs:
# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
clean_name = clean_name.replace("_", " ").title()
with st.expander(clean_name, expanded=False):
with open(fig_path, "r", encoding="utf-8") as f:
html_content = f.read()
components.html(html_content, height=600, scrolling=True)
# Category 2: Stage Analysis
if stage_figs:
st.markdown("#### Stage Analysis")
for fig_path in stage_figs:
# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
clean_name = clean_name.replace("_", " ").title()
with st.expander(clean_name, expanded=False):
with open(fig_path, "r", encoding="utf-8") as f:
html_content = f.read()
components.html(html_content, height=600, scrolling=True)
# Category 3: Time-based Analysis
if time_figs:
st.markdown("#### Time-based Analysis")
for fig_path in time_figs:
# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
clean_name = clean_name.replace("_", " ").title()
with st.expander(clean_name, expanded=False):
with open(fig_path, "r", encoding="utf-8") as f:
html_content = f.read()
components.html(html_content, height=600, scrolling=True)
# Category 4: Other Analysis
if other_figs:
st.markdown("#### Additional Analysis")
for fig_path in other_figs:
# Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
clean_name = clean_name.replace("_", " ").title()
with st.expander(clean_name, expanded=False):
with open(fig_path, "r", encoding="utf-8") as f:
html_content = f.read()
components.html(html_content, height=600, scrolling=True)
# TAB 2: Interactive Exploration
with tab2:
st.markdown("""
### Interactive Exploration
Apply filters and explore the data dynamically.
""")
# Sidebar filters
st.sidebar.markdown("---")
st.sidebar.header("Filters (Interactive Tab)")
# Determine actual column names
case_type_col = (
"CASE_TYPE"
if "CASE_TYPE" in cases_df.columns
else ("CaseType" if "CaseType" in cases_df.columns else None)
)
stage_col = "Remappedstages" if "Remappedstages" in hearings_df.columns else None
# Case type filter (from cases)
if case_type_col:
available_case_types = cases_df[case_type_col].unique().tolist()
selected_case_types = st.sidebar.multiselect(
"Case Types",
options=available_case_types,
default=available_case_types[:5]
if len(available_case_types) > 5
else available_case_types,
key="case_type_filter",
)
else:
selected_case_types = []
st.sidebar.info("No case type data available")
# Stage filter (from hearings)
if stage_col:
available_stages = hearings_df[stage_col].unique().tolist()
selected_stages = st.sidebar.multiselect(
"Stages",
options=available_stages,
default=available_stages[:10]
if len(available_stages) > 10
else available_stages,
key="stage_filter",
)
else:
selected_stages = []
st.sidebar.info("No stage data available")
# Apply filters with copy to ensure clean dataframes
if selected_case_types and case_type_col:
filtered_cases = cases_df[
cases_df[case_type_col].isin(selected_case_types)
].copy()
else:
filtered_cases = cases_df.copy()
if selected_stages and stage_col:
filtered_hearings = hearings_df[
hearings_df[stage_col].isin(selected_stages)
].copy()
else:
filtered_hearings = hearings_df.copy()
# Filtered metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Filtered Cases",
f"{len(filtered_cases):,}",
delta=f"{len(filtered_cases) - total_cases}",
)
st.caption(f"Hearings: {len(filtered_hearings):,}")
with col2:
if case_type_col and case_type_col in filtered_cases.columns:
n_types_filtered = len(filtered_cases[case_type_col].unique())
else:
n_types_filtered = 0
st.metric("Case Types", n_types_filtered)
with col3:
if stage_col and stage_col in filtered_hearings.columns:
n_stages_filtered = len(filtered_hearings[stage_col].unique())
else:
n_stages_filtered = 0
st.metric("Stages", n_stages_filtered)
with col4:
if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
adj_rate_filtered = (
filtered_hearings["Outcome"] == "ADJOURNED"
).sum() / len(filtered_hearings)
st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
else:
st.metric("Adjournment Rate", "N/A")
st.markdown("---")
# Sub-tabs for different analyses
sub_tab1, sub_tab2, sub_tab3, sub_tab4 = st.tabs(
["Case Distribution", "Stage Analysis", "Adjournment Patterns", "Raw Data"]
)
with sub_tab1:
st.markdown("#### Case Distribution by Type")
if (
case_type_col
and case_type_col in filtered_cases.columns
and len(filtered_cases) > 0
):
# Compute value counts and ensure proper structure
case_type_counts = (
filtered_cases[case_type_col].value_counts().reset_index()
)
# Rename columns for clarity (works across pandas versions)
case_type_counts.columns = ["CaseType", "Count"]
# Debug data preview
with st.expander("Data Preview (Debug)", expanded=False):
st.write(f"Total rows: {len(case_type_counts)}")
st.dataframe(case_type_counts.head(10))
col1, col2 = st.columns(2)
with col1:
fig = px.bar(
case_type_counts,
x="CaseType",
y="Count",
title="Cases by Type",
labels={"CaseType": "Case Type", "Count": "Count"},
color="Count",
color_continuous_scale="Blues",
)
fig.update_layout(xaxis_tickangle=-45, height=400)
st.plotly_chart(fig, use_container_width=True)
with col2:
fig_pie = px.pie(
case_type_counts,
values="Count",
names="CaseType",
title="Case Type Distribution",
)
fig_pie.update_layout(height=400)
st.plotly_chart(fig_pie, use_container_width=True)
else:
st.info("No data available for selected filters")
with sub_tab2:
st.markdown("#### Stage Analysis")
if (
stage_col
and stage_col in filtered_hearings.columns
and len(filtered_hearings) > 0
):
stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
stage_counts.columns = ["Stage", "Count"]
fig = px.bar(
stage_counts.head(15),
x="Count",
y="Stage",
orientation="h",
title="Top 15 Stages by Case Count",
labels={"Stage": "Stage", "Count": "Count"},
color="Count",
color_continuous_scale="Greens",
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("No data available for selected filters")
with sub_tab3:
st.markdown("#### Adjournment Patterns")
if (
"Outcome" in filtered_hearings.columns
and len(filtered_hearings) > 0
and case_type_col
and stage_col
):
col1, col2 = st.columns(2)
with col1:
st.markdown("**Overall Adjournment Rate**")
total_hearings = len(filtered_hearings)
adjourned = (filtered_hearings["Outcome"] == "ADJOURNED").sum()
not_adjourned = total_hearings - adjourned
outcome_df = pd.DataFrame(
{
"Outcome": ["ADJOURNED", "NOT ADJOURNED"],
"Count": [adjourned, not_adjourned],
}
)
fig_pie = px.pie(
outcome_df,
values="Count",
names="Outcome",
title=f"Outcome Distribution (Total: {total_hearings:,})",
color="Outcome",
color_discrete_map={
"ADJOURNED": "#ef4444",
"NOT ADJOURNED": "#22c55e",
},
)
fig_pie.update_layout(height=400)
st.plotly_chart(fig_pie, use_container_width=True)
with col2:
st.markdown("**By Stage**")
adj_by_stage = (
filtered_hearings.groupby(stage_col)["Outcome"]
.apply(
lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0
)
.reset_index()
)
adj_by_stage.columns = ["Stage", "Rate"]
adj_by_stage["Rate"] = adj_by_stage["Rate"] * 100
fig = px.bar(
adj_by_stage.sort_values("Rate", ascending=False).head(10),
x="Rate",
y="Stage",
orientation="h",
title="Top 10 Stages by Adjournment Rate",
labels={"Stage": "Stage", "Rate": "Rate (%)"},
color="Rate",
color_continuous_scale="Oranges",
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("No data available for selected filters")
with sub_tab4:
st.markdown("#### Raw Data")
data_view = st.radio(
"Select data to view:", ["Cases", "Hearings"], horizontal=True
)
if data_view == "Cases":
st.dataframe(
filtered_cases.head(500),
use_container_width=True,
height=600,
)
st.markdown(
f"**Showing first 500 of {len(filtered_cases):,} filtered cases**"
)
# Download button
csv = filtered_cases.to_csv(index=False).encode("utf-8")
st.download_button(
label="Download filtered cases as CSV",
data=csv,
file_name="filtered_cases.csv",
mime="text/csv",
)
else:
st.dataframe(
filtered_hearings.head(500),
use_container_width=True,
height=600,
)
st.markdown(
f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**"
)
# Download button
csv = filtered_hearings.to_csv(index=False).encode("utf-8")
st.download_button(
label="Download filtered hearings as CSV",
data=csv,
file_name="filtered_hearings.csv",
mime="text/csv",
)
# TAB 3: Parameter Summary
with tab3:
st.markdown("""
### Parameter Summary
Statistical parameters extracted from historical data, used throughout the system.
""")
if not params:
st.warning("Parameters not loaded. Run EDA pipeline to extract parameters.")
st.code("uv run court-scheduler eda")
else:
# Case Types
st.markdown("#### Case Types")
if "case_types" in params and params["case_types"]:
case_types_df = pd.DataFrame(
{
"Case Type": params["case_types"],
"Index": range(len(params["case_types"])),
}
)
st.dataframe(case_types_df, use_container_width=True, hide_index=True)
st.caption(f"Total: {len(params['case_types'])} case types")
else:
st.info("No case types found")
st.markdown("---")
# Stages
st.markdown("#### Stages")
if "stages" in params and params["stages"]:
stages_df = pd.DataFrame(
{"Stage": params["stages"], "Index": range(len(params["stages"]))}
)
st.dataframe(stages_df, use_container_width=True, hide_index=True)
st.caption(f"Total: {len(params['stages'])} stages")
else:
st.info("No stages found")
st.markdown("---")
# Stage Transitions
st.markdown("#### Stage Transition Graph")
if "stage_graph" in params and params["stage_graph"]:
st.markdown("**Sample transitions from each stage:**")
# Show sample transitions
sample_stages = list(params["stage_graph"].keys())[:5]
for stage in sample_stages:
transitions = params["stage_graph"][stage]
if transitions:
with st.expander(f"From: {stage}"):
trans_df = pd.DataFrame(transitions)
if not trans_df.empty:
st.dataframe(
trans_df, use_container_width=True, hide_index=True
)
st.caption(
f"Total: {len(params['stage_graph'])} stages with transition data"
)
else:
st.info("No stage transition data found")
st.markdown("---")
# Adjournment Statistics
st.markdown("#### Adjournment Probabilities")
if "adjournment_stats" in params and params["adjournment_stats"]:
st.markdown("**Adjournment probability by stage and case type:**")
# Create heatmap
adj_stats = params["adjournment_stats"]
stages_list = list(adj_stats.keys())[
:20
] # Limit to 20 stages for readability
case_types_list = params.get("case_types", [])[
:15
] # Limit to 15 case types
if stages_list and case_types_list:
heatmap_data = []
for stage in stages_list:
row = []
for ct in case_types_list:
prob = adj_stats.get(stage, {}).get(ct, 0)
row.append(prob * 100)
heatmap_data.append(row)
fig = go.Figure(
data=go.Heatmap(
z=heatmap_data,
x=case_types_list,
y=stages_list,
colorscale="RdYlGn_r",
text=[[f"{val:.1f}%" for val in row] for row in heatmap_data],
texttemplate="%{text}",
textfont={"size": 8},
colorbar=dict(title="Adj. Prob. (%)"),
)
)
fig.update_layout(
title="Adjournment Probability by Stage and Case Type",
xaxis_title="Case Type",
yaxis_title="Stage",
height=700,
)
st.plotly_chart(fig, use_container_width=True)
st.caption("Showing top 20 stages and top 15 case types")
else:
st.info("Insufficient data for heatmap")
else:
st.info("No adjournment statistics found")
st.markdown("---")
# System Configuration Section
st.markdown("### System Configuration")
st.info("""
These parameters control how the system analyzes historical data and generates simulation cases.
Most are derived from historical data patterns, while some are configurable thresholds.
""")
config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
[
"EDA Parameters",
"Ripeness Classifier",
"Case Generator",
"Simulation Defaults",
]
)
with config_tab1:
st.markdown("#### EDA Analysis Parameters")
st.markdown("**These parameters control historical data analysis:**")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Readiness Score Calculation**")
st.code(
"""
Readiness Score =
0.4 * (hearings / 50) [capped at 1.0]
+ 0.3 * (100 / gap_median) [capped at 1.0]
+ 0.3 if stage in [ARGUMENTS, EVIDENCE, ORDERS/JUDGMENT]
+ 0.1 otherwise
""",
language="text",
)
st.caption("Weights: 40% hearing count, 30% gap, 30% stage")
st.markdown("**Alert Thresholds**")
st.code(
"""
ALERT_P90_TYPE: Disposal time > P90 within case type
ALERT_HEARING_HEAVY: Hearing count > P90 within case type
ALERT_LONG_GAP: Median gap > P90 within case type
""",
language="text",
)
with col2:
st.markdown("**Adjournment Proxy Detection**")
st.code(
"""
Gap threshold: 1.3x median gap for that stage
If hearing_gap > 1.3 * stage_median_gap:
is_adjourn_proxy = True
""",
language="python",
)
st.markdown("**Not-Reached Keywords**")
st.code(
"""
"NOT REACHED", "NR",
"NOT TAKEN UP", "NOT HEARD"
""",
language="text",
)
st.markdown("---")
st.markdown("**Stage Order (for transition analysis)**")
st.code(
"""
1. PRE-ADMISSION
2. ADMISSION
3. FRAMING OF CHARGES
4. EVIDENCE
5. ARGUMENTS
6. INTERLOCUTORY APPLICATION
7. SETTLEMENT
8. ORDERS / JUDGMENT
9. FINAL DISPOSAL
10. OTHER
""",
language="text",
)
st.caption("Only forward transitions are counted (by index order)")
with config_tab2:
st.markdown("#### Ripeness Classification Thresholds")
st.markdown("""
These thresholds determine if a case is RIPE (ready for hearing) or UNRIPE (has bottlenecks).
""")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Classification Thresholds**")
from src.core.ripeness import RipenessClassifier
thresholds = RipenessClassifier.get_current_thresholds()
thresh_df = pd.DataFrame(
[
{
"Parameter": "MIN_SERVICE_HEARINGS",
"Value": thresholds["MIN_SERVICE_HEARINGS"],
"Description": "Minimum hearings to confirm service/compliance",
},
{
"Parameter": "MIN_STAGE_DAYS",
"Value": thresholds["MIN_STAGE_DAYS"],
"Description": "Minimum days in stage to show compliance efforts",
},
{
"Parameter": "MIN_CASE_AGE_DAYS",
"Value": thresholds["MIN_CASE_AGE_DAYS"],
"Description": "Minimum case maturity before assuming readiness",
},
]
)
st.dataframe(thresh_df, use_container_width=True, hide_index=True)
st.markdown("**ADMISSION Stage Rule**")
st.code(
"""
if stage == ADMISSION and hearing_count < 3:
return UNRIPE_SUMMONS
""",
language="python",
)
st.markdown("**Stuck Case Detection**")
st.code(
"""
if hearing_count > 10:
avg_gap = age_days / hearing_count
if avg_gap > 60 days:
return UNRIPE_PARTY
""",
language="python",
)
with col2:
st.markdown("**Ripeness Priority Multipliers**")
st.code(
"""
RIPE cases: 1.5x priority
UNRIPE cases: 0.7x priority
""",
language="text",
)
st.markdown("**Bottleneck Keywords**")
bottleneck_df = pd.DataFrame(
[
{"Keyword": "SUMMONS", "Type": "UNRIPE_SUMMONS"},
{"Keyword": "NOTICE", "Type": "UNRIPE_SUMMONS"},
{"Keyword": "ISSUE", "Type": "UNRIPE_SUMMONS"},
{"Keyword": "SERVICE", "Type": "UNRIPE_SUMMONS"},
{"Keyword": "STAY", "Type": "UNRIPE_DEPENDENT"},
{"Keyword": "PENDING", "Type": "UNRIPE_DEPENDENT"},
]
)
st.dataframe(bottleneck_df, use_container_width=True, hide_index=True)
st.markdown("**Ripe Stage Keywords**")
st.code(
'"ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"',
language="text",
)
st.markdown("---")
st.markdown("**Ripening Time Estimates (days)**")
ripening_df = pd.DataFrame(
[
{"Bottleneck Type": "UNRIPE_SUMMONS", "Estimated Days": 30},
{"Bottleneck Type": "UNRIPE_DEPENDENT", "Estimated Days": 60},
{"Bottleneck Type": "UNRIPE_PARTY", "Estimated Days": 14},
{"Bottleneck Type": "UNRIPE_DOCUMENT", "Estimated Days": 21},
]
)
st.dataframe(ripening_df, use_container_width=True, hide_index=True)
with config_tab3:
st.markdown("#### Case Generator Configuration")
st.markdown("""
These parameters control synthetic case generation for simulations.
""")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Default Case Type Distribution**")
from src.data.config import CASE_TYPE_DISTRIBUTION
dist_df = pd.DataFrame(
[
{"Case Type": ct, "Probability": f"{p * 100:.1f}%"}
for ct, p in CASE_TYPE_DISTRIBUTION.items()
]
)
st.dataframe(dist_df, use_container_width=True, hide_index=True)
st.caption("Based on historical distribution from EDA")
st.markdown("**Urgent Case Percentage**")
from src.data.config import URGENT_CASE_PERCENTAGE
st.metric("Urgent Cases", f"{URGENT_CASE_PERCENTAGE * 100:.1f}%")
with col2:
st.markdown("**Monthly Seasonality Factors**")
from src.data.config import MONTHLY_SEASONALITY
season_df = pd.DataFrame(
[
{"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)}
for i in range(1, 13)
]
)
st.dataframe(season_df, use_container_width=True, hide_index=True)
st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")
st.markdown("---")
st.markdown("**Initial Case State Generation**")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Hearing History Simulation**")
st.code(
"""
if days_since_filed > 30:
hearing_count = max(1, days_since_filed // 30)
# Last hearing: 7-30 days before sim start
days_before_end = random(7, 30)
last_hearing_date = end_date - days_before_end
days_since_last_hearing = days_before_end
""",
language="python",
)
st.caption("Ensures staggered eligibility, not all at once")
with col2:
st.markdown("**Ripeness Purpose Assignment**")
st.code(
"""
Bottleneck purposes (20% probability):
- ISSUE SUMMONS, FOR NOTICE
- AWAIT SERVICE OF NOTICE
- STAY APPLICATION PENDING
- FOR ORDERS
Ripe purposes (80% probability):
- ARGUMENTS, HEARING
- FINAL ARGUMENTS, FOR JUDGMENT
- EVIDENCE
""",
language="text",
)
st.caption(
"Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe"
)
with config_tab4:
st.markdown("#### Simulation Defaults")
st.markdown("""
Default values used in simulation when not explicitly configured by user.
""")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Duration Estimation**")
st.code(
"""
Method: lognormal
- Uses historical median and P90
- Ensures realistic variance
- Min duration: 1 day
Formula:
sigma = (log(p90) - log(median)) / 1.2816
mu = log(median)
duration = exp(mu + sigma * randn())
""",
language="text",
)
st.markdown("**Courtroom Capacity**")
if params and "court_capacity_global" in params:
cap = params["court_capacity_global"]
st.metric(
"Median slots/day", f"{cap.get('slots_median_global', 151):.0f}"
)
st.metric(
"P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}"
)
else:
st.info("Run EDA to load capacity statistics")
with col2:
st.markdown("**Policy Defaults**")
st.code(
"""
READINESS policy weights:
- age: 0.2
- hearings: 0.2
- urgency: 0.3
- stage: 0.3
Minimum hearing gap: 7 days
RL policy:
- Model: latest from models/ directory
- Fallback: readiness policy
""",
language="text",
)
st.markdown("**Working Days**")
st.code(
"""
Excludes:
- Weekends (Saturday, Sunday)
- National holidays (loaded from config)
- Court closure days
""",
language="text",
)
# Footer
st.markdown("---")
st.caption("Data loaded from EDA pipeline. Use refresh button to reload.")