"""Data & Insights page - Historical analysis, interactive exploration, and parameters. This page provides three views: 1. Historical Analysis - Pre-generated visualizations from EDA pipeline 2. Interactive Exploration - Dynamic filtering and custom analysis 3. Parameter Summary - Extracted parameters from historical data """ from __future__ import annotations import re from pathlib import Path import pandas as pd import plotly.express as px import plotly.graph_objects as go import streamlit as st import streamlit.components.v1 as components from src.dashboard.utils import ( get_case_statistics, load_cleaned_data, load_cleaned_hearings, load_param_loader, ) # Page configuration st.set_page_config( page_title="Data & Insights", page_icon="chart", layout="wide", ) st.title("Data & Insights") st.markdown("Historical case data analysis and extracted parameters") # Data source info with st.expander("Data Source Information", expanded=False): st.info(""" Data loaded from latest EDA output (`reports/figures/v*/`). **Performance Note**: For optimal loading speed, both cases and hearings data are sampled to 50,000 rows if larger. All statistics and visualizations remain representative of the full dataset. """) # Load data with sampling for performance @st.cache_data(ttl=3600) def load_dashboard_data(): """Load and sample data for dashboard performance.""" cases = load_cleaned_data() hearings = load_cleaned_hearings() # Track original counts before sampling total_cases_count = len(cases) total_hearings_count = len(hearings) # Sample both cases and hearings if too large for better performance if len(cases) > 50000: cases = cases.sample(n=50000, random_state=42) if len(hearings) > 50000: hearings = hearings.sample(n=50000, random_state=42) params = load_param_loader() stats = get_case_statistics(cases) if not cases.empty else {} return cases, hearings, params, stats, total_cases_count, total_hearings_count with st.spinner("Loading data..."): try: cases_df, hearings_df, params, stats, total_cases, total_hearings = ( load_dashboard_data() ) except Exception as e: st.error(f"Error loading data: {e}") st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`") st.stop() if cases_df.empty and hearings_df.empty: st.warning( "No data available. The EDA pipeline needs to be run first to process historical court data." ) st.markdown(""" **The EDA pipeline will:** - Load raw court data (cases and hearings) - Clean and validate the data - Extract statistical parameters (distributions, transition probabilities, durations) - Generate analysis visualizations - Save processed data for dashboard use **Processing time**: ~2-5 minutes depending on data size """) col1, col2 = st.columns([1, 2]) with col1: if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True): from eda.load_clean import run_load_and_clean from eda.exploration import run_exploration from eda.parameters import run_parameter_export with st.spinner("Running EDA pipeline... This will take a few minutes."): try: # Step 1: Load & clean data run_load_and_clean() # Step 2: Generate visualizations run_exploration() # Step 3: Extract parameters run_parameter_export() st.success("EDA pipeline completed successfully!") st.info("Reload this page to see the updated data.") if st.button("Reload Page"): st.rerun() except Exception as e: with st.expander("Error details"): st.exception(e) with col2: with st.expander("Alternative: Run via CLI"): st.code("uv run court-scheduler eda", language="bash") st.caption("Run this command in your terminal, then refresh this page.") st.stop() # Overview metrics st.markdown("### Overview") col1, col2, col3, col4, col5 = st.columns(5) with col1: st.metric("Total Cases", f"{total_cases:,}") if "YEAR_FILED" in cases_df.columns: year_range = ( f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}" ) st.caption(f"Years: {year_range}") with col2: st.metric("Total Hearings", f"{total_hearings:,}") if total_cases > 0: avg_hearings = total_hearings / total_cases st.caption(f"Avg: {avg_hearings:.1f}/case") with col3: # Try both uppercase and mixed case if "CASE_TYPE" in cases_df.columns: n_case_types = len(cases_df["CASE_TYPE"].unique()) elif "CaseType" in cases_df.columns: n_case_types = len(cases_df["CaseType"].unique()) else: n_case_types = 0 st.metric("Case Types", n_case_types) st.caption("Categories") with col4: # Get stages from hearings data if "Remappedstages" in hearings_df.columns: n_stages = len(hearings_df["Remappedstages"].dropna().unique()) else: n_stages = 0 st.metric("Court Stages", n_stages) st.caption("Phases") with col5: # Average disposal time if available if "DISPOSALTIME_ADJ" in cases_df.columns: avg_disposal = cases_df["DISPOSALTIME_ADJ"].median() st.metric("Median Disposal", f"{avg_disposal:.0f} days") st.caption("Time to resolve") elif "N_HEARINGS" in cases_df.columns: avg_n_hearings = cases_df["N_HEARINGS"].median() st.metric("Median Hearings", f"{avg_n_hearings:.0f}") st.caption("Per case") st.markdown("---") # Main tabs tab1, tab2, tab3 = st.tabs( ["Historical Analysis", "Interactive Exploration", "Parameters"] ) # TAB 1: Historical Analysis - Pre-generated figures with tab1: st.markdown(""" ### Historical Analysis Pre-generated visualizations from EDA pipeline based on historical court case data. """) figures_dir = Path("reports/figures") if not figures_dir.exists(): st.warning( "EDA figures not found. Run the EDA pipeline to generate visualizations." ) st.code("uv run court-scheduler eda") else: # Find latest versioned directory version_dirs = [ d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v") ] if not version_dirs: st.warning( "No EDA output directories found. Run the EDA pipeline to generate visualizations." ) st.code("uv run court-scheduler eda") else: # Use the most recent version directory latest_dir = max(version_dirs, key=lambda p: p.stat().st_mtime) st.caption(f"Showing visualizations from: {latest_dir.name}") # List available figures from the versioned directory # Exclude deprecated/removed visuals like the monthly waterfall figure_files = [ f for f in sorted(latest_dir.glob("*.html")) if "waterfall" not in f.name.lower() ] if not figure_files: st.info(f"No figures found in {latest_dir.name}") else: st.markdown(f"**{len(figure_files)} visualizations available**") # Organize figures by category distribution_figs = [ f for f in figure_files if any(x in f.name for x in ["distribution", "filed", "type"]) ] stage_figs = [ f for f in figure_files if any(x in f.name for x in ["stage", "sankey", "transition"]) ] time_figs = [ f for f in figure_files if any(x in f.name for x in ["monthly", "load", "gap"]) ] other_figs = [ f for f in figure_files if f not in distribution_figs + stage_figs + time_figs ] # Category 1: Case Distributions if distribution_figs: st.markdown("#### Case Distributions") for fig_path in distribution_figs: # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem) clean_name = clean_name.replace("_", " ").title() with st.expander(clean_name, expanded=False): with open(fig_path, "r", encoding="utf-8") as f: html_content = f.read() components.html(html_content, height=600, scrolling=True) # Category 2: Stage Analysis if stage_figs: st.markdown("#### Stage Analysis") for fig_path in stage_figs: # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem) clean_name = clean_name.replace("_", " ").title() with st.expander(clean_name, expanded=False): with open(fig_path, "r", encoding="utf-8") as f: html_content = f.read() components.html(html_content, height=600, scrolling=True) # Category 3: Time-based Analysis if time_figs: st.markdown("#### Time-based Analysis") for fig_path in time_figs: # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem) clean_name = clean_name.replace("_", " ").title() with st.expander(clean_name, expanded=False): with open(fig_path, "r", encoding="utf-8") as f: html_content = f.read() components.html(html_content, height=600, scrolling=True) # Category 4: Other Analysis if other_figs: st.markdown("#### Additional Analysis") for fig_path in other_figs: # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem) clean_name = clean_name.replace("_", " ").title() with st.expander(clean_name, expanded=False): with open(fig_path, "r", encoding="utf-8") as f: html_content = f.read() components.html(html_content, height=600, scrolling=True) # TAB 2: Interactive Exploration with tab2: st.markdown(""" ### Interactive Exploration Apply filters and explore the data dynamically. """) # Sidebar filters st.sidebar.markdown("---") st.sidebar.header("Filters (Interactive Tab)") # Determine actual column names case_type_col = ( "CASE_TYPE" if "CASE_TYPE" in cases_df.columns else ("CaseType" if "CaseType" in cases_df.columns else None) ) stage_col = "Remappedstages" if "Remappedstages" in hearings_df.columns else None # Case type filter (from cases) if case_type_col: available_case_types = cases_df[case_type_col].unique().tolist() selected_case_types = st.sidebar.multiselect( "Case Types", options=available_case_types, default=available_case_types[:5] if len(available_case_types) > 5 else available_case_types, key="case_type_filter", ) else: selected_case_types = [] st.sidebar.info("No case type data available") # Stage filter (from hearings) if stage_col: available_stages = hearings_df[stage_col].unique().tolist() selected_stages = st.sidebar.multiselect( "Stages", options=available_stages, default=available_stages[:10] if len(available_stages) > 10 else available_stages, key="stage_filter", ) else: selected_stages = [] st.sidebar.info("No stage data available") # Apply filters with copy to ensure clean dataframes if selected_case_types and case_type_col: filtered_cases = cases_df[ cases_df[case_type_col].isin(selected_case_types) ].copy() else: filtered_cases = cases_df.copy() if selected_stages and stage_col: filtered_hearings = hearings_df[ hearings_df[stage_col].isin(selected_stages) ].copy() else: filtered_hearings = hearings_df.copy() # Filtered metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric( "Filtered Cases", f"{len(filtered_cases):,}", delta=f"{len(filtered_cases) - total_cases}", ) st.caption(f"Hearings: {len(filtered_hearings):,}") with col2: if case_type_col and case_type_col in filtered_cases.columns: n_types_filtered = len(filtered_cases[case_type_col].unique()) else: n_types_filtered = 0 st.metric("Case Types", n_types_filtered) with col3: if stage_col and stage_col in filtered_hearings.columns: n_stages_filtered = len(filtered_hearings[stage_col].unique()) else: n_stages_filtered = 0 st.metric("Stages", n_stages_filtered) with col4: if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0: adj_rate_filtered = ( filtered_hearings["Outcome"] == "ADJOURNED" ).sum() / len(filtered_hearings) st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}") else: st.metric("Adjournment Rate", "N/A") st.markdown("---") # Sub-tabs for different analyses sub_tab1, sub_tab2, sub_tab3, sub_tab4 = st.tabs( ["Case Distribution", "Stage Analysis", "Adjournment Patterns", "Raw Data"] ) with sub_tab1: st.markdown("#### Case Distribution by Type") if ( case_type_col and case_type_col in filtered_cases.columns and len(filtered_cases) > 0 ): # Compute value counts and ensure proper structure case_type_counts = ( filtered_cases[case_type_col].value_counts().reset_index() ) # Rename columns for clarity (works across pandas versions) case_type_counts.columns = ["CaseType", "Count"] # Debug data preview with st.expander("Data Preview (Debug)", expanded=False): st.write(f"Total rows: {len(case_type_counts)}") st.dataframe(case_type_counts.head(10)) col1, col2 = st.columns(2) with col1: fig = px.bar( case_type_counts, x="CaseType", y="Count", title="Cases by Type", labels={"CaseType": "Case Type", "Count": "Count"}, color="Count", color_continuous_scale="Blues", ) fig.update_layout(xaxis_tickangle=-45, height=400) st.plotly_chart(fig, use_container_width=True) with col2: fig_pie = px.pie( case_type_counts, values="Count", names="CaseType", title="Case Type Distribution", ) fig_pie.update_layout(height=400) st.plotly_chart(fig_pie, use_container_width=True) else: st.info("No data available for selected filters") with sub_tab2: st.markdown("#### Stage Analysis") if ( stage_col and stage_col in filtered_hearings.columns and len(filtered_hearings) > 0 ): stage_counts = filtered_hearings[stage_col].value_counts().reset_index() stage_counts.columns = ["Stage", "Count"] fig = px.bar( stage_counts.head(15), x="Count", y="Stage", orientation="h", title="Top 15 Stages by Case Count", labels={"Stage": "Stage", "Count": "Count"}, color="Count", color_continuous_scale="Greens", ) fig.update_layout(height=600) st.plotly_chart(fig, use_container_width=True) else: st.info("No data available for selected filters") with sub_tab3: st.markdown("#### Adjournment Patterns") if ( "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0 and case_type_col and stage_col ): col1, col2 = st.columns(2) with col1: st.markdown("**Overall Adjournment Rate**") total_hearings = len(filtered_hearings) adjourned = (filtered_hearings["Outcome"] == "ADJOURNED").sum() not_adjourned = total_hearings - adjourned outcome_df = pd.DataFrame( { "Outcome": ["ADJOURNED", "NOT ADJOURNED"], "Count": [adjourned, not_adjourned], } ) fig_pie = px.pie( outcome_df, values="Count", names="Outcome", title=f"Outcome Distribution (Total: {total_hearings:,})", color="Outcome", color_discrete_map={ "ADJOURNED": "#ef4444", "NOT ADJOURNED": "#22c55e", }, ) fig_pie.update_layout(height=400) st.plotly_chart(fig_pie, use_container_width=True) with col2: st.markdown("**By Stage**") adj_by_stage = ( filtered_hearings.groupby(stage_col)["Outcome"] .apply( lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0 ) .reset_index() ) adj_by_stage.columns = ["Stage", "Rate"] adj_by_stage["Rate"] = adj_by_stage["Rate"] * 100 fig = px.bar( adj_by_stage.sort_values("Rate", ascending=False).head(10), x="Rate", y="Stage", orientation="h", title="Top 10 Stages by Adjournment Rate", labels={"Stage": "Stage", "Rate": "Rate (%)"}, color="Rate", color_continuous_scale="Oranges", ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) else: st.info("No data available for selected filters") with sub_tab4: st.markdown("#### Raw Data") data_view = st.radio( "Select data to view:", ["Cases", "Hearings"], horizontal=True ) if data_view == "Cases": st.dataframe( filtered_cases.head(500), use_container_width=True, height=600, ) st.markdown( f"**Showing first 500 of {len(filtered_cases):,} filtered cases**" ) # Download button csv = filtered_cases.to_csv(index=False).encode("utf-8") st.download_button( label="Download filtered cases as CSV", data=csv, file_name="filtered_cases.csv", mime="text/csv", ) else: st.dataframe( filtered_hearings.head(500), use_container_width=True, height=600, ) st.markdown( f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**" ) # Download button csv = filtered_hearings.to_csv(index=False).encode("utf-8") st.download_button( label="Download filtered hearings as CSV", data=csv, file_name="filtered_hearings.csv", mime="text/csv", ) # TAB 3: Parameter Summary with tab3: st.markdown(""" ### Parameter Summary Statistical parameters extracted from historical data, used throughout the system. """) if not params: st.warning("Parameters not loaded. Run EDA pipeline to extract parameters.") st.code("uv run court-scheduler eda") else: # Case Types st.markdown("#### Case Types") if "case_types" in params and params["case_types"]: case_types_df = pd.DataFrame( { "Case Type": params["case_types"], "Index": range(len(params["case_types"])), } ) st.dataframe(case_types_df, use_container_width=True, hide_index=True) st.caption(f"Total: {len(params['case_types'])} case types") else: st.info("No case types found") st.markdown("---") # Stages st.markdown("#### Stages") if "stages" in params and params["stages"]: stages_df = pd.DataFrame( {"Stage": params["stages"], "Index": range(len(params["stages"]))} ) st.dataframe(stages_df, use_container_width=True, hide_index=True) st.caption(f"Total: {len(params['stages'])} stages") else: st.info("No stages found") st.markdown("---") # Stage Transitions st.markdown("#### Stage Transition Graph") if "stage_graph" in params and params["stage_graph"]: st.markdown("**Sample transitions from each stage:**") # Show sample transitions sample_stages = list(params["stage_graph"].keys())[:5] for stage in sample_stages: transitions = params["stage_graph"][stage] if transitions: with st.expander(f"From: {stage}"): trans_df = pd.DataFrame(transitions) if not trans_df.empty: st.dataframe( trans_df, use_container_width=True, hide_index=True ) st.caption( f"Total: {len(params['stage_graph'])} stages with transition data" ) else: st.info("No stage transition data found") st.markdown("---") # Adjournment Statistics st.markdown("#### Adjournment Probabilities") if "adjournment_stats" in params and params["adjournment_stats"]: st.markdown("**Adjournment probability by stage and case type:**") # Create heatmap adj_stats = params["adjournment_stats"] stages_list = list(adj_stats.keys())[ :20 ] # Limit to 20 stages for readability case_types_list = params.get("case_types", [])[ :15 ] # Limit to 15 case types if stages_list and case_types_list: heatmap_data = [] for stage in stages_list: row = [] for ct in case_types_list: prob = adj_stats.get(stage, {}).get(ct, 0) row.append(prob * 100) heatmap_data.append(row) fig = go.Figure( data=go.Heatmap( z=heatmap_data, x=case_types_list, y=stages_list, colorscale="RdYlGn_r", text=[[f"{val:.1f}%" for val in row] for row in heatmap_data], texttemplate="%{text}", textfont={"size": 8}, colorbar=dict(title="Adj. Prob. (%)"), ) ) fig.update_layout( title="Adjournment Probability by Stage and Case Type", xaxis_title="Case Type", yaxis_title="Stage", height=700, ) st.plotly_chart(fig, use_container_width=True) st.caption("Showing top 20 stages and top 15 case types") else: st.info("Insufficient data for heatmap") else: st.info("No adjournment statistics found") st.markdown("---") # System Configuration Section st.markdown("### System Configuration") st.info(""" These parameters control how the system analyzes historical data and generates simulation cases. Most are derived from historical data patterns, while some are configurable thresholds. """) config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs( [ "EDA Parameters", "Ripeness Classifier", "Case Generator", "Simulation Defaults", ] ) with config_tab1: st.markdown("#### EDA Analysis Parameters") st.markdown("**These parameters control historical data analysis:**") col1, col2 = st.columns(2) with col1: st.markdown("**Readiness Score Calculation**") st.code( """ Readiness Score = 0.4 * (hearings / 50) [capped at 1.0] + 0.3 * (100 / gap_median) [capped at 1.0] + 0.3 if stage in [ARGUMENTS, EVIDENCE, ORDERS/JUDGMENT] + 0.1 otherwise """, language="text", ) st.caption("Weights: 40% hearing count, 30% gap, 30% stage") st.markdown("**Alert Thresholds**") st.code( """ ALERT_P90_TYPE: Disposal time > P90 within case type ALERT_HEARING_HEAVY: Hearing count > P90 within case type ALERT_LONG_GAP: Median gap > P90 within case type """, language="text", ) with col2: st.markdown("**Adjournment Proxy Detection**") st.code( """ Gap threshold: 1.3x median gap for that stage If hearing_gap > 1.3 * stage_median_gap: is_adjourn_proxy = True """, language="python", ) st.markdown("**Not-Reached Keywords**") st.code( """ "NOT REACHED", "NR", "NOT TAKEN UP", "NOT HEARD" """, language="text", ) st.markdown("---") st.markdown("**Stage Order (for transition analysis)**") st.code( """ 1. PRE-ADMISSION 2. ADMISSION 3. FRAMING OF CHARGES 4. EVIDENCE 5. ARGUMENTS 6. INTERLOCUTORY APPLICATION 7. SETTLEMENT 8. ORDERS / JUDGMENT 9. FINAL DISPOSAL 10. OTHER """, language="text", ) st.caption("Only forward transitions are counted (by index order)") with config_tab2: st.markdown("#### Ripeness Classification Thresholds") st.markdown(""" These thresholds determine if a case is RIPE (ready for hearing) or UNRIPE (has bottlenecks). """) col1, col2 = st.columns(2) with col1: st.markdown("**Classification Thresholds**") from src.core.ripeness import RipenessClassifier thresholds = RipenessClassifier.get_current_thresholds() thresh_df = pd.DataFrame( [ { "Parameter": "MIN_SERVICE_HEARINGS", "Value": thresholds["MIN_SERVICE_HEARINGS"], "Description": "Minimum hearings to confirm service/compliance", }, { "Parameter": "MIN_STAGE_DAYS", "Value": thresholds["MIN_STAGE_DAYS"], "Description": "Minimum days in stage to show compliance efforts", }, { "Parameter": "MIN_CASE_AGE_DAYS", "Value": thresholds["MIN_CASE_AGE_DAYS"], "Description": "Minimum case maturity before assuming readiness", }, ] ) st.dataframe(thresh_df, use_container_width=True, hide_index=True) st.markdown("**ADMISSION Stage Rule**") st.code( """ if stage == ADMISSION and hearing_count < 3: return UNRIPE_SUMMONS """, language="python", ) st.markdown("**Stuck Case Detection**") st.code( """ if hearing_count > 10: avg_gap = age_days / hearing_count if avg_gap > 60 days: return UNRIPE_PARTY """, language="python", ) with col2: st.markdown("**Ripeness Priority Multipliers**") st.code( """ RIPE cases: 1.5x priority UNRIPE cases: 0.7x priority """, language="text", ) st.markdown("**Bottleneck Keywords**") bottleneck_df = pd.DataFrame( [ {"Keyword": "SUMMONS", "Type": "UNRIPE_SUMMONS"}, {"Keyword": "NOTICE", "Type": "UNRIPE_SUMMONS"}, {"Keyword": "ISSUE", "Type": "UNRIPE_SUMMONS"}, {"Keyword": "SERVICE", "Type": "UNRIPE_SUMMONS"}, {"Keyword": "STAY", "Type": "UNRIPE_DEPENDENT"}, {"Keyword": "PENDING", "Type": "UNRIPE_DEPENDENT"}, ] ) st.dataframe(bottleneck_df, use_container_width=True, hide_index=True) st.markdown("**Ripe Stage Keywords**") st.code( '"ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"', language="text", ) st.markdown("---") st.markdown("**Ripening Time Estimates (days)**") ripening_df = pd.DataFrame( [ {"Bottleneck Type": "UNRIPE_SUMMONS", "Estimated Days": 30}, {"Bottleneck Type": "UNRIPE_DEPENDENT", "Estimated Days": 60}, {"Bottleneck Type": "UNRIPE_PARTY", "Estimated Days": 14}, {"Bottleneck Type": "UNRIPE_DOCUMENT", "Estimated Days": 21}, ] ) st.dataframe(ripening_df, use_container_width=True, hide_index=True) with config_tab3: st.markdown("#### Case Generator Configuration") st.markdown(""" These parameters control synthetic case generation for simulations. """) col1, col2 = st.columns(2) with col1: st.markdown("**Default Case Type Distribution**") from src.data.config import CASE_TYPE_DISTRIBUTION dist_df = pd.DataFrame( [ {"Case Type": ct, "Probability": f"{p * 100:.1f}%"} for ct, p in CASE_TYPE_DISTRIBUTION.items() ] ) st.dataframe(dist_df, use_container_width=True, hide_index=True) st.caption("Based on historical distribution from EDA") st.markdown("**Urgent Case Percentage**") from src.data.config import URGENT_CASE_PERCENTAGE st.metric("Urgent Cases", f"{URGENT_CASE_PERCENTAGE * 100:.1f}%") with col2: st.markdown("**Monthly Seasonality Factors**") from src.data.config import MONTHLY_SEASONALITY season_df = pd.DataFrame( [ {"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)} for i in range(1, 13) ] ) st.dataframe(season_df, use_container_width=True, hide_index=True) st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases") st.markdown("---") st.markdown("**Initial Case State Generation**") col1, col2 = st.columns(2) with col1: st.markdown("**Hearing History Simulation**") st.code( """ if days_since_filed > 30: hearing_count = max(1, days_since_filed // 30) # Last hearing: 7-30 days before sim start days_before_end = random(7, 30) last_hearing_date = end_date - days_before_end days_since_last_hearing = days_before_end """, language="python", ) st.caption("Ensures staggered eligibility, not all at once") with col2: st.markdown("**Ripeness Purpose Assignment**") st.code( """ Bottleneck purposes (20% probability): - ISSUE SUMMONS, FOR NOTICE - AWAIT SERVICE OF NOTICE - STAY APPLICATION PENDING - FOR ORDERS Ripe purposes (80% probability): - ARGUMENTS, HEARING - FINAL ARGUMENTS, FOR JUDGMENT - EVIDENCE """, language="text", ) st.caption( "Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe" ) with config_tab4: st.markdown("#### Simulation Defaults") st.markdown(""" Default values used in simulation when not explicitly configured by user. """) col1, col2 = st.columns(2) with col1: st.markdown("**Duration Estimation**") st.code( """ Method: lognormal - Uses historical median and P90 - Ensures realistic variance - Min duration: 1 day Formula: sigma = (log(p90) - log(median)) / 1.2816 mu = log(median) duration = exp(mu + sigma * randn()) """, language="text", ) st.markdown("**Courtroom Capacity**") if params and "court_capacity_global" in params: cap = params["court_capacity_global"] st.metric( "Median slots/day", f"{cap.get('slots_median_global', 151):.0f}" ) st.metric( "P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}" ) else: st.info("Run EDA to load capacity statistics") with col2: st.markdown("**Policy Defaults**") st.code( """ READINESS policy weights: - age: 0.2 - hearings: 0.2 - urgency: 0.3 - stage: 0.3 Minimum hearing gap: 7 days RL policy: - Model: latest from models/ directory - Fallback: readiness policy """, language="text", ) st.markdown("**Working Days**") st.code( """ Excludes: - Weekends (Saturday, Sunday) - National holidays (loaded from config) - Court closure days """, language="text", ) # Footer st.markdown("---") st.caption("Data loaded from EDA pipeline. Use refresh button to reload.")