"""Data & Insights page - Historical analysis, interactive exploration, and parameters.

This page provides three views:
1. Historical Analysis - Pre-generated visualizations from EDA pipeline
2. Interactive Exploration - Dynamic filtering and custom analysis
3. Parameter Summary - Extracted parameters from historical data
"""

from __future__ import annotations

import re
from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
import streamlit.components.v1 as components

from src.dashboard.utils import (
    get_case_statistics,
    load_cleaned_data,
    load_cleaned_hearings,
    load_param_loader,
)

# Page configuration
st.set_page_config(
    page_title="Data & Insights",
    page_icon="chart",
    layout="wide",
)

st.title("Data & Insights")
st.markdown("Historical case data analysis and extracted parameters")

# Data source info
with st.expander("Data Source Information", expanded=False):
    st.info("""
    Data loaded from latest EDA output (`reports/figures/v*/`).

    **Performance Note**: For optimal loading speed, both cases and hearings data are sampled to 50,000 rows if larger.
    All statistics and visualizations remain representative of the full dataset.
    """)


# Load data with sampling for performance
@st.cache_data(ttl=3600)
def load_dashboard_data():
    """Load and sample data for dashboard performance."""
    cases = load_cleaned_data()
    hearings = load_cleaned_hearings()

    # Track original counts before sampling
    total_cases_count = len(cases)
    total_hearings_count = len(hearings)

    # Sample both cases and hearings if too large for better performance
    if len(cases) > 50000:
        cases = cases.sample(n=50000, random_state=42)

    if len(hearings) > 50000:
        hearings = hearings.sample(n=50000, random_state=42)

    params = load_param_loader()
    stats = get_case_statistics(cases) if not cases.empty else {}

    return cases, hearings, params, stats, total_cases_count, total_hearings_count


with st.spinner("Loading data..."):
    try:
        cases_df, hearings_df, params, stats, total_cases, total_hearings = (
            load_dashboard_data()
        )
    except Exception as e:
        st.error(f"Error loading data: {e}")
        st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
        st.stop()

if cases_df.empty and hearings_df.empty:
    st.warning(
        "No data available. The EDA pipeline needs to be run first to process historical court data."
    )

    st.markdown("""
    **The EDA pipeline will:**
    - Load raw court data (cases and hearings)
    - Clean and validate the data
    - Extract statistical parameters (distributions, transition probabilities, durations)
    - Generate analysis visualizations
    - Save processed data for dashboard use

    **Processing time**: ~2-5 minutes depending on data size
    """)

    col1, col2 = st.columns([1, 2])

    with col1:
        if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
            from eda.load_clean import run_load_and_clean
            from eda.exploration import run_exploration
            from eda.parameters import run_parameter_export

            with st.spinner("Running EDA pipeline... This will take a few minutes."):
                try:
                    # Step 1: Load & clean data
                    run_load_and_clean()
                    # Step 2: Generate visualizations
                    run_exploration()
                    # Step 3: Extract parameters
                    run_parameter_export()
                    st.success("EDA pipeline completed successfully!")
                    st.info("Reload this page to see the updated data.")
                    if st.button("Reload Page"):
                        st.rerun()
                except Exception as e:
                    with st.expander("Error details"):
                        st.exception(e)

    with col2:
        with st.expander("Alternative: Run via CLI"):
            st.code("uv run court-scheduler eda", language="bash")
            st.caption("Run this command in your terminal, then refresh this page.")

    st.stop()

# Overview metrics
st.markdown("### Overview")
col1, col2, col3, col4, col5 = st.columns(5)

with col1:
    st.metric("Total Cases", f"{total_cases:,}")
    if "YEAR_FILED" in cases_df.columns:
        year_range = (
            f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
        )
        st.caption(f"Years: {year_range}")

with col2:
    st.metric("Total Hearings", f"{total_hearings:,}")
    if total_cases > 0:
        avg_hearings = total_hearings / total_cases
        st.caption(f"Avg: {avg_hearings:.1f}/case")

with col3:
    # Try both uppercase and mixed case
    if "CASE_TYPE" in cases_df.columns:
        n_case_types = len(cases_df["CASE_TYPE"].unique())
    elif "CaseType" in cases_df.columns:
        n_case_types = len(cases_df["CaseType"].unique())
    else:
        n_case_types = 0
    st.metric("Case Types", n_case_types)
    st.caption("Categories")

with col4:
    # Get stages from hearings data
    if "Remappedstages" in hearings_df.columns:
        n_stages = len(hearings_df["Remappedstages"].dropna().unique())
    else:
        n_stages = 0
    st.metric("Court Stages", n_stages)
    st.caption("Phases")

with col5:
    # Average disposal time if available
    if "DISPOSALTIME_ADJ" in cases_df.columns:
        avg_disposal = cases_df["DISPOSALTIME_ADJ"].median()
        st.metric("Median Disposal", f"{avg_disposal:.0f} days")
        st.caption("Time to resolve")
    elif "N_HEARINGS" in cases_df.columns:
        avg_n_hearings = cases_df["N_HEARINGS"].median()
        st.metric("Median Hearings", f"{avg_n_hearings:.0f}")
        st.caption("Per case")

st.markdown("---")

# Main tabs
tab1, tab2, tab3 = st.tabs(
    ["Historical Analysis", "Interactive Exploration", "Parameters"]
)

# TAB 1: Historical Analysis - Pre-generated figures
with tab1:
    st.markdown("""
    ### Historical Analysis
    Pre-generated visualizations from EDA pipeline based on historical court case data.
    """)

    figures_dir = Path("reports/figures")

    if not figures_dir.exists():
        st.warning(
            "EDA figures not found. Run the EDA pipeline to generate visualizations."
        )
        st.code("uv run court-scheduler eda")
    else:
        # Find latest versioned directory
        version_dirs = [
            d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")
        ]

        if not version_dirs:
            st.warning(
                "No EDA output directories found. Run the EDA pipeline to generate visualizations."
            )
            st.code("uv run court-scheduler eda")
        else:
            # Use the most recent version directory
            latest_dir = max(version_dirs, key=lambda p: p.stat().st_mtime)
            st.caption(f"Showing visualizations from: {latest_dir.name}")

            # List available figures from the versioned directory
            # Exclude deprecated/removed visuals like the monthly waterfall
            figure_files = [
                f
                for f in sorted(latest_dir.glob("*.html"))
                if "waterfall" not in f.name.lower()
            ]

            if not figure_files:
                st.info(f"No figures found in {latest_dir.name}")
            else:
                st.markdown(f"**{len(figure_files)} visualizations available**")

                # Organize figures by category
                distribution_figs = [
                    f
                    for f in figure_files
                    if any(x in f.name for x in ["distribution", "filed", "type"])
                ]
                stage_figs = [
                    f
                    for f in figure_files
                    if any(x in f.name for x in ["stage", "sankey", "transition"])
                ]
                time_figs = [
                    f
                    for f in figure_files
                    if any(x in f.name for x in ["monthly", "load", "gap"])
                ]
                other_figs = [
                    f
                    for f in figure_files
                    if f not in distribution_figs + stage_figs + time_figs
                ]

                # Category 1: Case Distributions
                if distribution_figs:
                    st.markdown("#### Case Distributions")
                for fig_path in distribution_figs:
                    # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
                    clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
                    clean_name = clean_name.replace("_", " ").title()

                    with st.expander(clean_name, expanded=False):
                        with open(fig_path, "r", encoding="utf-8") as f:
                            html_content = f.read()
                        components.html(html_content, height=600, scrolling=True)

                # Category 2: Stage Analysis
                if stage_figs:
                    st.markdown("#### Stage Analysis")
                    for fig_path in stage_figs:
                        # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
                        clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
                        clean_name = clean_name.replace("_", " ").title()

                        with st.expander(clean_name, expanded=False):
                            with open(fig_path, "r", encoding="utf-8") as f:
                                html_content = f.read()
                            components.html(html_content, height=600, scrolling=True)

                # Category 3: Time-based Analysis
                if time_figs:
                    st.markdown("#### Time-based Analysis")
                    for fig_path in time_figs:
                        # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
                        clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
                        clean_name = clean_name.replace("_", " ").title()

                        with st.expander(clean_name, expanded=False):
                            with open(fig_path, "r", encoding="utf-8") as f:
                                html_content = f.read()
                            components.html(html_content, height=600, scrolling=True)

                # Category 4: Other Analysis
                if other_figs:
                    st.markdown("#### Additional Analysis")
                    for fig_path in other_figs:
                        # Clean name: remove alphanumeric prefixes (e.g., 1_, 11B_) and underscores
                        clean_name = re.sub(r"^[\d\w]+_", "", fig_path.stem)
                        clean_name = clean_name.replace("_", " ").title()

                        with st.expander(clean_name, expanded=False):
                            with open(fig_path, "r", encoding="utf-8") as f:
                                html_content = f.read()
                            components.html(html_content, height=600, scrolling=True)

# TAB 2: Interactive Exploration
with tab2:
    st.markdown("""
    ### Interactive Exploration
    Apply filters and explore the data dynamically.
    """)

    # Sidebar filters
    st.sidebar.markdown("---")
    st.sidebar.header("Filters (Interactive Tab)")

    # Determine actual column names
    case_type_col = (
        "CASE_TYPE"
        if "CASE_TYPE" in cases_df.columns
        else ("CaseType" if "CaseType" in cases_df.columns else None)
    )
    stage_col = "Remappedstages" if "Remappedstages" in hearings_df.columns else None

    # Case type filter (from cases)
    if case_type_col:
        available_case_types = cases_df[case_type_col].unique().tolist()
        selected_case_types = st.sidebar.multiselect(
            "Case Types",
            options=available_case_types,
            default=available_case_types[:5]
            if len(available_case_types) > 5
            else available_case_types,
            key="case_type_filter",
        )
    else:
        selected_case_types = []
        st.sidebar.info("No case type data available")

    # Stage filter (from hearings)
    if stage_col:
        available_stages = hearings_df[stage_col].unique().tolist()
        selected_stages = st.sidebar.multiselect(
            "Stages",
            options=available_stages,
            default=available_stages[:10]
            if len(available_stages) > 10
            else available_stages,
            key="stage_filter",
        )
    else:
        selected_stages = []
        st.sidebar.info("No stage data available")

    # Apply filters with copy to ensure clean dataframes
    if selected_case_types and case_type_col:
        filtered_cases = cases_df[
            cases_df[case_type_col].isin(selected_case_types)
        ].copy()
    else:
        filtered_cases = cases_df.copy()

    if selected_stages and stage_col:
        filtered_hearings = hearings_df[
            hearings_df[stage_col].isin(selected_stages)
        ].copy()
    else:
        filtered_hearings = hearings_df.copy()

    # Filtered metrics
    col1, col2, col3, col4 = st.columns(4)

    with col1:
        st.metric(
            "Filtered Cases",
            f"{len(filtered_cases):,}",
            delta=f"{len(filtered_cases) - total_cases}",
        )
        st.caption(f"Hearings: {len(filtered_hearings):,}")

    with col2:
        if case_type_col and case_type_col in filtered_cases.columns:
            n_types_filtered = len(filtered_cases[case_type_col].unique())
        else:
            n_types_filtered = 0
        st.metric("Case Types", n_types_filtered)

    with col3:
        if stage_col and stage_col in filtered_hearings.columns:
            n_stages_filtered = len(filtered_hearings[stage_col].unique())
        else:
            n_stages_filtered = 0
        st.metric("Stages", n_stages_filtered)

    with col4:
        if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
            adj_rate_filtered = (
                filtered_hearings["Outcome"] == "ADJOURNED"
            ).sum() / len(filtered_hearings)
            st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
        else:
            st.metric("Adjournment Rate", "N/A")

    st.markdown("---")

    # Sub-tabs for different analyses
    sub_tab1, sub_tab2, sub_tab3, sub_tab4 = st.tabs(
        ["Case Distribution", "Stage Analysis", "Adjournment Patterns", "Raw Data"]
    )

    with sub_tab1:
        st.markdown("#### Case Distribution by Type")

        if (
            case_type_col
            and case_type_col in filtered_cases.columns
            and len(filtered_cases) > 0
        ):
            # Compute value counts and ensure proper structure
            case_type_counts = (
                filtered_cases[case_type_col].value_counts().reset_index()
            )
            # Rename columns for clarity (works across pandas versions)
            case_type_counts.columns = ["CaseType", "Count"]

            # Debug data preview
            with st.expander("Data Preview (Debug)", expanded=False):
                st.write(f"Total rows: {len(case_type_counts)}")
                st.dataframe(case_type_counts.head(10))

            col1, col2 = st.columns(2)

            with col1:
                fig = px.bar(
                    case_type_counts,
                    x="CaseType",
                    y="Count",
                    title="Cases by Type",
                    labels={"CaseType": "Case Type", "Count": "Count"},
                    color="Count",
                    color_continuous_scale="Blues",
                )
                fig.update_layout(xaxis_tickangle=-45, height=400)
                st.plotly_chart(fig, use_container_width=True)

            with col2:
                fig_pie = px.pie(
                    case_type_counts,
                    values="Count",
                    names="CaseType",
                    title="Case Type Distribution",
                )
                fig_pie.update_layout(height=400)
                st.plotly_chart(fig_pie, use_container_width=True)
        else:
            st.info("No data available for selected filters")

    with sub_tab2:
        st.markdown("#### Stage Analysis")

        if (
            stage_col
            and stage_col in filtered_hearings.columns
            and len(filtered_hearings) > 0
        ):
            stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
            stage_counts.columns = ["Stage", "Count"]

            fig = px.bar(
                stage_counts.head(15),
                x="Count",
                y="Stage",
                orientation="h",
                title="Top 15 Stages by Case Count",
                labels={"Stage": "Stage", "Count": "Count"},
                color="Count",
                color_continuous_scale="Greens",
            )
            fig.update_layout(height=600)
            st.plotly_chart(fig, use_container_width=True)
        else:
            st.info("No data available for selected filters")

    with sub_tab3:
        st.markdown("#### Adjournment Patterns")

        if (
            "Outcome" in filtered_hearings.columns
            and len(filtered_hearings) > 0
            and case_type_col
            and stage_col
        ):
            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Overall Adjournment Rate**")
                total_hearings = len(filtered_hearings)
                adjourned = (filtered_hearings["Outcome"] == "ADJOURNED").sum()
                not_adjourned = total_hearings - adjourned

                outcome_df = pd.DataFrame(
                    {
                        "Outcome": ["ADJOURNED", "NOT ADJOURNED"],
                        "Count": [adjourned, not_adjourned],
                    }
                )

                fig_pie = px.pie(
                    outcome_df,
                    values="Count",
                    names="Outcome",
                    title=f"Outcome Distribution (Total: {total_hearings:,})",
                    color="Outcome",
                    color_discrete_map={
                        "ADJOURNED": "#ef4444",
                        "NOT ADJOURNED": "#22c55e",
                    },
                )
                fig_pie.update_layout(height=400)
                st.plotly_chart(fig_pie, use_container_width=True)

            with col2:
                st.markdown("**By Stage**")
                adj_by_stage = (
                    filtered_hearings.groupby(stage_col)["Outcome"]
                    .apply(
                        lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0
                    )
                    .reset_index()
                )
                adj_by_stage.columns = ["Stage", "Rate"]
                adj_by_stage["Rate"] = adj_by_stage["Rate"] * 100

                fig = px.bar(
                    adj_by_stage.sort_values("Rate", ascending=False).head(10),
                    x="Rate",
                    y="Stage",
                    orientation="h",
                    title="Top 10 Stages by Adjournment Rate",
                    labels={"Stage": "Stage", "Rate": "Rate (%)"},
                    color="Rate",
                    color_continuous_scale="Oranges",
                )
                fig.update_layout(height=400)
                st.plotly_chart(fig, use_container_width=True)
        else:
            st.info("No data available for selected filters")

    with sub_tab4:
        st.markdown("#### Raw Data")

        data_view = st.radio(
            "Select data to view:", ["Cases", "Hearings"], horizontal=True
        )

        if data_view == "Cases":
            st.dataframe(
                filtered_cases.head(500),
                use_container_width=True,
                height=600,
            )

            st.markdown(
                f"**Showing first 500 of {len(filtered_cases):,} filtered cases**"
            )

            # Download button
            csv = filtered_cases.to_csv(index=False).encode("utf-8")
            st.download_button(
                label="Download filtered cases as CSV",
                data=csv,
                file_name="filtered_cases.csv",
                mime="text/csv",
            )
        else:
            st.dataframe(
                filtered_hearings.head(500),
                use_container_width=True,
                height=600,
            )

            st.markdown(
                f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**"
            )

            # Download button
            csv = filtered_hearings.to_csv(index=False).encode("utf-8")
            st.download_button(
                label="Download filtered hearings as CSV",
                data=csv,
                file_name="filtered_hearings.csv",
                mime="text/csv",
            )

# TAB 3: Parameter Summary
with tab3:
    st.markdown("""
    ### Parameter Summary
    Statistical parameters extracted from historical data, used throughout the system.
    """)

    if not params:
        st.warning("Parameters not loaded. Run EDA pipeline to extract parameters.")
        st.code("uv run court-scheduler eda")
    else:
        # Case Types
        st.markdown("#### Case Types")
        if "case_types" in params and params["case_types"]:
            case_types_df = pd.DataFrame(
                {
                    "Case Type": params["case_types"],
                    "Index": range(len(params["case_types"])),
                }
            )
            st.dataframe(case_types_df, use_container_width=True, hide_index=True)
            st.caption(f"Total: {len(params['case_types'])} case types")
        else:
            st.info("No case types found")

        st.markdown("---")

        # Stages
        st.markdown("#### Stages")
        if "stages" in params and params["stages"]:
            stages_df = pd.DataFrame(
                {"Stage": params["stages"], "Index": range(len(params["stages"]))}
            )
            st.dataframe(stages_df, use_container_width=True, hide_index=True)
            st.caption(f"Total: {len(params['stages'])} stages")
        else:
            st.info("No stages found")

        st.markdown("---")

        # Stage Transitions
        st.markdown("#### Stage Transition Graph")
        if "stage_graph" in params and params["stage_graph"]:
            st.markdown("**Sample transitions from each stage:**")

            # Show sample transitions
            sample_stages = list(params["stage_graph"].keys())[:5]
            for stage in sample_stages:
                transitions = params["stage_graph"][stage]
                if transitions:
                    with st.expander(f"From: {stage}"):
                        trans_df = pd.DataFrame(transitions)
                        if not trans_df.empty:
                            st.dataframe(
                                trans_df, use_container_width=True, hide_index=True
                            )

            st.caption(
                f"Total: {len(params['stage_graph'])} stages with transition data"
            )
        else:
            st.info("No stage transition data found")

        st.markdown("---")

        # Adjournment Statistics
        st.markdown("#### Adjournment Probabilities")
        if "adjournment_stats" in params and params["adjournment_stats"]:
            st.markdown("**Adjournment probability by stage and case type:**")

            # Create heatmap
            adj_stats = params["adjournment_stats"]
            stages_list = list(adj_stats.keys())[
                :20
            ]  # Limit to 20 stages for readability
            case_types_list = params.get("case_types", [])[
                :15
            ]  # Limit to 15 case types

            if stages_list and case_types_list:
                heatmap_data = []
                for stage in stages_list:
                    row = []
                    for ct in case_types_list:
                        prob = adj_stats.get(stage, {}).get(ct, 0)
                        row.append(prob * 100)
                    heatmap_data.append(row)

                fig = go.Figure(
                    data=go.Heatmap(
                        z=heatmap_data,
                        x=case_types_list,
                        y=stages_list,
                        colorscale="RdYlGn_r",
                        text=[[f"{val:.1f}%" for val in row] for row in heatmap_data],
                        texttemplate="%{text}",
                        textfont={"size": 8},
                        colorbar=dict(title="Adj. Prob. (%)"),
                    )
                )
                fig.update_layout(
                    title="Adjournment Probability by Stage and Case Type",
                    xaxis_title="Case Type",
                    yaxis_title="Stage",
                    height=700,
                )
                st.plotly_chart(fig, use_container_width=True)
                st.caption("Showing top 20 stages and top 15 case types")
            else:
                st.info("Insufficient data for heatmap")
        else:
            st.info("No adjournment statistics found")

        st.markdown("---")

        # System Configuration Section
        st.markdown("### System Configuration")
        st.info("""
        These parameters control how the system analyzes historical data and generates simulation cases.
        Most are derived from historical data patterns, while some are configurable thresholds.
        """)

        config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
            [
                "EDA Parameters",
                "Ripeness Classifier",
                "Case Generator",
                "Simulation Defaults",
            ]
        )

        with config_tab1:
            st.markdown("#### EDA Analysis Parameters")
            st.markdown("**These parameters control historical data analysis:**")

            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Readiness Score Calculation**")
                st.code(
                    """
Readiness Score = 
  0.4 * (hearings / 50)  [capped at 1.0]
+ 0.3 * (100 / gap_median)  [capped at 1.0]
+ 0.3  if stage in [ARGUMENTS, EVIDENCE, ORDERS/JUDGMENT]
+ 0.1  otherwise
                """,
                    language="text",
                )
                st.caption("Weights: 40% hearing count, 30% gap, 30% stage")

                st.markdown("**Alert Thresholds**")
                st.code(
                    """
ALERT_P90_TYPE: Disposal time > P90 within case type
ALERT_HEARING_HEAVY: Hearing count > P90 within case type
ALERT_LONG_GAP: Median gap > P90 within case type
                """,
                    language="text",
                )

            with col2:
                st.markdown("**Adjournment Proxy Detection**")
                st.code(
                    """
Gap threshold: 1.3x median gap for that stage
If hearing_gap > 1.3 * stage_median_gap:
  is_adjourn_proxy = True
                """,
                    language="python",
                )

                st.markdown("**Not-Reached Keywords**")
                st.code(
                    """
"NOT REACHED", "NR", 
"NOT TAKEN UP", "NOT HEARD"
                """,
                    language="text",
                )

            st.markdown("---")

            st.markdown("**Stage Order (for transition analysis)**")
            st.code(
                """
1. PRE-ADMISSION
2. ADMISSION
3. FRAMING OF CHARGES
4. EVIDENCE
5. ARGUMENTS
6. INTERLOCUTORY APPLICATION
7. SETTLEMENT
8. ORDERS / JUDGMENT
9. FINAL DISPOSAL
10. OTHER
            """,
                language="text",
            )
            st.caption("Only forward transitions are counted (by index order)")

        with config_tab2:
            st.markdown("#### Ripeness Classification Thresholds")
            st.markdown("""
            These thresholds determine if a case is RIPE (ready for hearing) or UNRIPE (has bottlenecks).
            """)

            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Classification Thresholds**")
                from src.core.ripeness import RipenessClassifier

                thresholds = RipenessClassifier.get_current_thresholds()

                thresh_df = pd.DataFrame(
                    [
                        {
                            "Parameter": "MIN_SERVICE_HEARINGS",
                            "Value": thresholds["MIN_SERVICE_HEARINGS"],
                            "Description": "Minimum hearings to confirm service/compliance",
                        },
                        {
                            "Parameter": "MIN_STAGE_DAYS",
                            "Value": thresholds["MIN_STAGE_DAYS"],
                            "Description": "Minimum days in stage to show compliance efforts",
                        },
                        {
                            "Parameter": "MIN_CASE_AGE_DAYS",
                            "Value": thresholds["MIN_CASE_AGE_DAYS"],
                            "Description": "Minimum case maturity before assuming readiness",
                        },
                    ]
                )
                st.dataframe(thresh_df, use_container_width=True, hide_index=True)

                st.markdown("**ADMISSION Stage Rule**")
                st.code(
                    """
if stage == ADMISSION and hearing_count < 3:
  return UNRIPE_SUMMONS
                """,
                    language="python",
                )

                st.markdown("**Stuck Case Detection**")
                st.code(
                    """
if hearing_count > 10:
  avg_gap = age_days / hearing_count
  if avg_gap > 60 days:
    return UNRIPE_PARTY
                """,
                    language="python",
                )

            with col2:
                st.markdown("**Ripeness Priority Multipliers**")
                st.code(
                    """
RIPE cases: 1.5x priority
UNRIPE cases: 0.7x priority
                """,
                    language="text",
                )

                st.markdown("**Bottleneck Keywords**")
                bottleneck_df = pd.DataFrame(
                    [
                        {"Keyword": "SUMMONS", "Type": "UNRIPE_SUMMONS"},
                        {"Keyword": "NOTICE", "Type": "UNRIPE_SUMMONS"},
                        {"Keyword": "ISSUE", "Type": "UNRIPE_SUMMONS"},
                        {"Keyword": "SERVICE", "Type": "UNRIPE_SUMMONS"},
                        {"Keyword": "STAY", "Type": "UNRIPE_DEPENDENT"},
                        {"Keyword": "PENDING", "Type": "UNRIPE_DEPENDENT"},
                    ]
                )
                st.dataframe(bottleneck_df, use_container_width=True, hide_index=True)

                st.markdown("**Ripe Stage Keywords**")
                st.code(
                    '"ARGUMENTS", "HEARING", "FINAL", "JUDGMENT", "ORDERS", "DISPOSAL"',
                    language="text",
                )

            st.markdown("---")

            st.markdown("**Ripening Time Estimates (days)**")
            ripening_df = pd.DataFrame(
                [
                    {"Bottleneck Type": "UNRIPE_SUMMONS", "Estimated Days": 30},
                    {"Bottleneck Type": "UNRIPE_DEPENDENT", "Estimated Days": 60},
                    {"Bottleneck Type": "UNRIPE_PARTY", "Estimated Days": 14},
                    {"Bottleneck Type": "UNRIPE_DOCUMENT", "Estimated Days": 21},
                ]
            )
            st.dataframe(ripening_df, use_container_width=True, hide_index=True)

        with config_tab3:
            st.markdown("#### Case Generator Configuration")
            st.markdown("""
            These parameters control synthetic case generation for simulations.
            """)

            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Default Case Type Distribution**")
                from src.data.config import CASE_TYPE_DISTRIBUTION

                dist_df = pd.DataFrame(
                    [
                        {"Case Type": ct, "Probability": f"{p * 100:.1f}%"}
                        for ct, p in CASE_TYPE_DISTRIBUTION.items()
                    ]
                )
                st.dataframe(dist_df, use_container_width=True, hide_index=True)
                st.caption("Based on historical distribution from EDA")

                st.markdown("**Urgent Case Percentage**")
                from src.data.config import URGENT_CASE_PERCENTAGE

                st.metric("Urgent Cases", f"{URGENT_CASE_PERCENTAGE * 100:.1f}%")

            with col2:
                st.markdown("**Monthly Seasonality Factors**")
                from src.data.config import MONTHLY_SEASONALITY

                season_df = pd.DataFrame(
                    [
                        {"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)}
                        for i in range(1, 13)
                    ]
                )
                st.dataframe(season_df, use_container_width=True, hide_index=True)
                st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")

            st.markdown("---")

            st.markdown("**Initial Case State Generation**")
            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Hearing History Simulation**")
                st.code(
                    """
if days_since_filed > 30:
  hearing_count = max(1, days_since_filed // 30)
  
  # Last hearing: 7-30 days before sim start
  days_before_end = random(7, 30)
  last_hearing_date = end_date - days_before_end
  days_since_last_hearing = days_before_end
                """,
                    language="python",
                )
                st.caption("Ensures staggered eligibility, not all at once")

            with col2:
                st.markdown("**Ripeness Purpose Assignment**")
                st.code(
                    """
Bottleneck purposes (20% probability):
- ISSUE SUMMONS, FOR NOTICE
- AWAIT SERVICE OF NOTICE
- STAY APPLICATION PENDING
- FOR ORDERS

Ripe purposes (80% probability):
- ARGUMENTS, HEARING
- FINAL ARGUMENTS, FOR JUDGMENT
- EVIDENCE
                """,
                    language="text",
                )
                st.caption(
                    "Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe"
                )

        with config_tab4:
            st.markdown("#### Simulation Defaults")
            st.markdown("""
            Default values used in simulation when not explicitly configured by user.
            """)

            col1, col2 = st.columns(2)

            with col1:
                st.markdown("**Duration Estimation**")
                st.code(
                    """
Method: lognormal
  - Uses historical median and P90
  - Ensures realistic variance
  - Min duration: 1 day

Formula:
  sigma = (log(p90) - log(median)) / 1.2816
  mu = log(median)
  duration = exp(mu + sigma * randn())
                """,
                    language="text",
                )

                st.markdown("**Courtroom Capacity**")
                if params and "court_capacity_global" in params:
                    cap = params["court_capacity_global"]
                    st.metric(
                        "Median slots/day", f"{cap.get('slots_median_global', 151):.0f}"
                    )
                    st.metric(
                        "P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}"
                    )
                else:
                    st.info("Run EDA to load capacity statistics")

            with col2:
                st.markdown("**Policy Defaults**")
                st.code(
                    """
READINESS policy weights:
- age: 0.2
- hearings: 0.2
- urgency: 0.3
- stage: 0.3

Minimum hearing gap: 7 days

RL policy:
- Model: latest from models/ directory
- Fallback: readiness policy
                """,
                    language="text",
                )

                st.markdown("**Working Days**")
                st.code(
                    """
Excludes:
- Weekends (Saturday, Sunday)
- National holidays (loaded from config)
- Court closure days
                """,
                    language="text",
                )

# Footer
st.markdown("---")
st.caption("Data loaded from EDA pipeline. Use refresh button to reload.")