Spaces:

RoyAalekh
/

hackathon_code4change

Sleeping

App Files Files Community

RoyAalekh commited on Dec 1, 2025

Commit

f9934a8

2 Parent(s): 2f87460 b43adf7

Merge pull request #8 from RoyAalekh/dev/removing_cli_from_dashboard_backend

Browse files

Files changed (9) hide show

cli/main.py +60 -23
configs/simulate.sample.toml +3 -3
eda/load_clean.py +18 -3
scheduler/dashboard/app.py +50 -47
scheduler/dashboard/pages/1_Data_And_Insights.py +119 -51
scheduler/dashboard/pages/2_Ripeness_Classifier.py +35 -10
scheduler/dashboard/pages/3_Simulation_Workflow.py +175 -133
scheduler/dashboard/utils/simulation_runner.py +111 -0
scheduler/dashboard/utils/ui_input_parser.py +45 -0

cli/main.py CHANGED Viewed

@@ -36,9 +36,15 @@ console = Console(legacy_windows=False)
 @app.command()
 def eda(
-    skip_clean: bool = typer.Option(False, "--skip-clean", help="Skip data loading and cleaning"),
-    skip_viz: bool = typer.Option(False, "--skip-viz", help="Skip visualization generation"),
-    skip_params: bool = typer.Option(False, "--skip-params", help="Skip parameter extraction"),
 ) -> None:
     """Run the EDA pipeline (load, explore, extract parameters)."""
     console.print("[bold blue]Running EDA Pipeline[/bold blue]")
@@ -60,7 +66,9 @@ def eda(
                 console.print("Data loaded and cleaned")
             if not skip_viz:
-                task = progress.add_task("Step 2/3: Generate visualizations...", total=None)
                 run_exploration()
                 progress.update(task, completed=True)
                 console.print("Visualizations generated")
@@ -92,8 +100,12 @@ def generate(
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
-    n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
-    start_date: str = typer.Option("2022-01-01", "--start", help="Start date (YYYY-MM-DD)"),
     end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
     output: str = typer.Option(
         "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
@@ -178,7 +190,9 @@ def generate(
             user_dist = _parse_case_type_dist(case_type_dist)
             gen = CaseGenerator(start=start, end=end, seed=seed)
-            cases = gen.generate(n_cases, stage_mix_auto=True, case_type_distribution=user_dist)
             # Write primary cases file
             CaseGenerator.to_csv(cases, output_path)
             # Also write detailed hearings history alongside, for the dashboard/classifier
@@ -209,14 +223,22 @@ def simulate(
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
-    cases_csv: str = typer.Option("data/generated/cases.csv", "--cases", help="Input cases CSV"),
-    days: int = typer.Option(384, "--days", "-d", help="Number of working days to simulate"),
-    start_date: str = typer.Option(None, "--start", help="Simulation start date (YYYY-MM-DD)"),
     policy: str = typer.Option(
         "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
     ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
-    log_dir: str = typer.Option(None, "--log-dir", "-o", help="Output directory for logs"),
 ) -> None:
     """Run court scheduling simulation."""
     console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
@@ -238,7 +260,9 @@ def simulate(
                 update={
                     "cases": Path(cases_csv) if cases_csv else scfg.cases,
                     "days": days if days else scfg.days,
-                    "start": (date_cls.fromisoformat(start_date) if start_date else scfg.start),
                     "policy": policy if policy else scfg.policy,
                     "seed": seed if seed else scfg.seed,
                     "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
@@ -249,12 +273,16 @@ def simulate(
                 cases_csv = typer.prompt("Cases CSV", default=cases_csv)
                 days = typer.prompt("Days to simulate", default=days)
                 start_date = (
-                    typer.prompt("Start date (YYYY-MM-DD) or blank", default=start_date or "")
                     or None
                 )
                 policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
                 seed = typer.prompt("Random seed", default=seed)
-                log_dir = typer.prompt("Log dir (or blank)", default=log_dir or "") or None
             scfg = SimulateConfig(
                 cases=Path(cases_csv),
                 days=days,
@@ -268,9 +296,13 @@ def simulate(
         path = scfg.cases
         if path.exists():
             cases = CaseGenerator.from_csv(path)
-            start = scfg.start or (max(c.filed_date for c in cases) if cases else date_cls.today())
         else:
-            console.print(f"[yellow]Warning:[/yellow] {path} not found. Generating test cases...")
             start = scfg.start or date_cls.today().replace(day=1)
             gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
             cases = gen.generate(n_cases=5 * 151)
@@ -315,7 +347,9 @@ def simulate(
         gini_disp = gini(disp_times) if disp_times else 0.0
         console.print("\n[bold]Disposal Metrics:[/bold]")
-        console.print(f"  Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})")
         console.print(f"  Gini coefficient: {gini_disp:.3f}")
         console.print("\n[bold]Efficiency:[/bold]")
@@ -333,14 +367,15 @@ def simulate(
         raise typer.Exit(code=1)
-# RL training command removed
 @app.command()
 def workflow(
-    n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
     sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
-    output_dir: str = typer.Option("data/workflow_run", "--output", "-o", help="Output directory"),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
 ) -> None:
     """Run full workflow: EDA -> Generate -> Simulate -> Report."""
@@ -417,7 +452,9 @@ def dashboard(
         app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
         if not app_path.exists():
-            console.print(f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}")
             raise typer.Exit(code=1)
         # Run streamlit

 @app.command()
 def eda(
+    skip_clean: bool = typer.Option(
+        False, "--skip-clean", help="Skip data loading and cleaning"
+    ),
+    skip_viz: bool = typer.Option(
+        False, "--skip-viz", help="Skip visualization generation"
+    ),
+    skip_params: bool = typer.Option(
+        False, "--skip-params", help="Skip parameter extraction"
+    ),
 ) -> None:
     """Run the EDA pipeline (load, explore, extract parameters)."""
     console.print("[bold blue]Running EDA Pipeline[/bold blue]")
                 console.print("Data loaded and cleaned")
             if not skip_viz:
+                task = progress.add_task(
+                    "Step 2/3: Generate visualizations...", total=None
+                )
                 run_exploration()
                 progress.update(task, completed=True)
                 console.print("Visualizations generated")
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
+    n_cases: int = typer.Option(
+        10000, "--cases", "-n", help="Number of cases to generate"
+    ),
+    start_date: str = typer.Option(
+        "2022-01-01", "--start", help="Start date (YYYY-MM-DD)"
+    ),
     end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
     output: str = typer.Option(
         "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
             user_dist = _parse_case_type_dist(case_type_dist)
             gen = CaseGenerator(start=start, end=end, seed=seed)
+            cases = gen.generate(
+                n_cases, stage_mix_auto=True, case_type_distribution=user_dist
+            )
             # Write primary cases file
             CaseGenerator.to_csv(cases, output_path)
             # Also write detailed hearings history alongside, for the dashboard/classifier
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
+    cases_csv: str = typer.Option(
+        "data/generated/cases.csv", "--cases", help="Input cases CSV"
+    ),
+    days: int = typer.Option(
+        384, "--days", "-d", help="Number of working days to simulate"
+    ),
+    start_date: str = typer.Option(
+        None, "--start", help="Simulation start date (YYYY-MM-DD)"
+    ),
     policy: str = typer.Option(
         "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
     ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
+    log_dir: str = typer.Option(
+        None, "--log-dir", "-o", help="Output directory for logs"
+    ),
 ) -> None:
     """Run court scheduling simulation."""
     console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
                 update={
                     "cases": Path(cases_csv) if cases_csv else scfg.cases,
                     "days": days if days else scfg.days,
+                    "start": (
+                        date_cls.fromisoformat(start_date) if start_date else scfg.start
+                    ),
                     "policy": policy if policy else scfg.policy,
                     "seed": seed if seed else scfg.seed,
                     "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
                 cases_csv = typer.prompt("Cases CSV", default=cases_csv)
                 days = typer.prompt("Days to simulate", default=days)
                 start_date = (
+                    typer.prompt(
+                        "Start date (YYYY-MM-DD) or blank", default=start_date or ""
+                    )
                     or None
                 )
                 policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
                 seed = typer.prompt("Random seed", default=seed)
+                log_dir = (
+                    typer.prompt("Log dir (or blank)", default=log_dir or "") or None
+                )
             scfg = SimulateConfig(
                 cases=Path(cases_csv),
                 days=days,
         path = scfg.cases
         if path.exists():
             cases = CaseGenerator.from_csv(path)
+            start = scfg.start or (
+                max(c.filed_date for c in cases) if cases else date_cls.today()
+            )
         else:
+            console.print(
+                f"[yellow]Warning:[/yellow] {path} not found. Generating test cases..."
+            )
             start = scfg.start or date_cls.today().replace(day=1)
             gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
             cases = gen.generate(n_cases=5 * 151)
         gini_disp = gini(disp_times) if disp_times else 0.0
         console.print("\n[bold]Disposal Metrics:[/bold]")
+        console.print(
+            f"  Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})"
+        )
         console.print(f"  Gini coefficient: {gini_disp:.3f}")
         console.print("\n[bold]Efficiency:[/bold]")
         raise typer.Exit(code=1)
 @app.command()
 def workflow(
+    n_cases: int = typer.Option(
+        10000, "--cases", "-n", help="Number of cases to generate"
+    ),
     sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
+    output_dir: str = typer.Option(
+        "data/workflow_run", "--output", "-o", help="Output directory"
+    ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
 ) -> None:
     """Run full workflow: EDA -> Generate -> Simulate -> Report."""
         app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
         if not app_path.exists():
+            console.print(
+                f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}"
+            )
             raise typer.Exit(code=1)
         # Run streamlit

configs/simulate.sample.toml CHANGED Viewed

@@ -4,7 +4,7 @@ days = 384
 # start = "2024-01-01"         # optional; if omitted, uses max filed_date in cases
 policy = "readiness"            # readiness|fifo|age
 seed = 42
-# duration_percentile = "median" # median|p90
-# courtrooms = 5                 # optional; uses engine default if omitted
-# daily_capacity = 151           # optional; uses engine default if omitted
 # log_dir = "data/sim_runs/example"

 # start = "2024-01-01"         # optional; if omitted, uses max filed_date in cases
 policy = "readiness"            # readiness|fifo|age
 seed = 42
+duration_percentile = "median" # median|p90
+courtrooms = 5                 # optional; uses engine default if omitted
+daily_capacity = 151           # optional; uses engine default if omitted
 # log_dir = "data/sim_runs/example"

eda/load_clean.py CHANGED Viewed

@@ -60,6 +60,7 @@ def _null_summary(df: pl.DataFrame, name: str) -> None:
 def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     try:
         import duckdb
         if DUCKDB_FILE.exists():
             print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
             conn = duckdb.connect(str(DUCKDB_FILE))
@@ -72,6 +73,8 @@ def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     except Exception as e:
         print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
     print("Loading raw data from CSVs (fallback)...")
     cases = pl.read_csv(
         CASES_FILE,
         try_parse_dates=True,
@@ -95,7 +98,9 @@ def clean_and_augment(
     # Standardise date columns if needed
     for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
         if col in cases.columns and cases[col].dtype == pl.Utf8:
-            cases = cases.with_columns(pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False))
     # Deduplicate on keys
     if "CNR_NUMBER" in cases.columns:
@@ -158,7 +163,10 @@ def clean_and_augment(
             hearings.filter(pl.col("BusinessOnDate").is_not_null())
             .sort(["CNR_NUMBER", "BusinessOnDate"])
             .with_columns(
-                ((pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1)) / timedelta(days=1))
                 .over("CNR_NUMBER")
                 .alias("HEARING_GAP_DAYS")
             )
@@ -175,7 +183,14 @@ def clean_and_augment(
         )
         cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
     else:
-        for col in ["GAP_MEAN", "GAP_MEDIAN", "GAP_P25", "GAP_P75", "GAP_STD", "N_GAPS"]:
             cases = cases.with_columns(pl.lit(None).alias(col))
     # Fill some basics

 def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     try:
         import duckdb
         if DUCKDB_FILE.exists():
             print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
             conn = duckdb.connect(str(DUCKDB_FILE))
     except Exception as e:
         print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
     print("Loading raw data from CSVs (fallback)...")
+    if not CASES_FILE.exists() or not HEAR_FILE.exists():
+        raise FileNotFoundError("One or both CSV files are missing.")
     cases = pl.read_csv(
         CASES_FILE,
         try_parse_dates=True,
     # Standardise date columns if needed
     for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
         if col in cases.columns and cases[col].dtype == pl.Utf8:
+            cases = cases.with_columns(
+                pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False)
+            )
     # Deduplicate on keys
     if "CNR_NUMBER" in cases.columns:
             hearings.filter(pl.col("BusinessOnDate").is_not_null())
             .sort(["CNR_NUMBER", "BusinessOnDate"])
             .with_columns(
+                (
+                    (pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1))
+                    / timedelta(days=1)
+                )
                 .over("CNR_NUMBER")
                 .alias("HEARING_GAP_DAYS")
             )
         )
         cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
     else:
+        for col in [
+            "GAP_MEAN",
+            "GAP_MEDIAN",
+            "GAP_P25",
+            "GAP_P75",
+            "GAP_STD",
+            "N_GAPS",
+        ]:
             cases = cases.with_columns(pl.lit(None).alias(col))
     # Fill some basics

scheduler/dashboard/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Main dashboard application for Court Scheduling System.
 This is the entry point for the Streamlit multi-page dashboard.
-Launch with: uv run court-scheduler dashboard
 """
 from __future__ import annotations
@@ -12,8 +12,6 @@ ROOT = Path("/app")  # absolute, unambiguous
 if str(ROOT) not in sys.path:
     sys.path.insert(0, str(ROOT))
-import subprocess
-from pathlib import Path
 import streamlit as st
@@ -27,29 +25,17 @@ st.set_page_config(
     initial_sidebar_state="expanded",
 )
-# Enforce `uv` availability for all dashboard-triggered commands
-try:
-    uv_check = subprocess.run(["uv", "--version"], capture_output=True, text=True)
-    if uv_check.returncode != 0:
-        raise RuntimeError(uv_check.stderr or "uv not available")
-except Exception:
-    import streamlit as st
-    st.error(
-        "'uv' is required to run this dashboard's commands. Please install uv and rerun.\n\n"
-        "Install on macOS/Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`\n"
-        "Install on Windows (PowerShell): `irm https://astral.sh/uv/install.ps1 | iex`"
-    )
-    st.stop()
 # Main page content
 st.title("Court Scheduling System Dashboard")
-st.markdown("**Karnataka High Court - Algorithmic Decision Support for Fair Scheduling**")
 st.markdown("---")
 # Introduction
-st.markdown("""
 ### Overview
 This system provides data-driven scheduling recommendations while maintaining judicial control and autonomy.
@@ -63,7 +49,8 @@ This system provides data-driven scheduling recommendations while maintaining ju
 - Reinforcement learning optimization
 Use the sidebar to navigate between sections.
-""")
 # System status
 status_header_col1, status_header_col2 = st.columns([3, 1])
@@ -99,47 +86,57 @@ with col3:
         st.caption("Run EDA pipeline to generate visualizations")
 # Setup Controls
-eda_ready = data_status["cleaned_data"] and data_status["parameters"] and data_status["eda_figures"]
 if not eda_ready:
     st.markdown("---")
     st.markdown("### Initial Setup")
-    st.warning("Run the EDA pipeline to process historical data and extract parameters.")
     col1, col2 = st.columns([2, 1])
     with col1:
-        st.markdown("""
         The EDA pipeline:
         - Loads and cleans historical court case data
         - Extracts statistical parameters (distributions, transition probabilities)
         - Generates analysis visualizations
         This is required before using other dashboard features.
-        """)
     with col2:
         if st.button("Run EDA Pipeline", type="primary", use_container_width=True):
-            import subprocess
             with st.spinner("Running EDA pipeline... This may take a few minutes."):
                 try:
-                    result = subprocess.run(
-                        ["uv", "run", "court-scheduler", "eda"],
-                        capture_output=True,
-                        text=True,
-                        cwd=str(Path.cwd()),
-                    )
-                    if result.returncode == 0:
-                        st.success("EDA pipeline completed")
-                        st.rerun()
-                    else:
-                        st.error(f"Pipeline failed with error code {result.returncode}")
-                        with st.expander("Show error details"):
-                            st.code(result.stderr, language="text")
                 except Exception as e:
-                    st.error(f"Error running pipeline: {e}")
     with st.expander("Run manually via CLI"):
         st.code("uv run court-scheduler eda", language="bash")
@@ -154,7 +151,8 @@ st.markdown("### Dashboard Sections")
 col1, col2 = st.columns(2)
 with col1:
-    st.markdown("""
     #### 1. Data & Insights
     Explore historical case data, view analysis visualizations, and review extracted parameters.
@@ -163,10 +161,12 @@ with col1:
     #### 3. Simulation Workflow
     Generate cases, configure simulation parameters, run scheduling simulations, and view results.
-    """)
 with col2:
-    st.markdown("""
     #### 4. Cause Lists & Overrides
     View generated cause lists, make judge overrides, and track modification history.
@@ -175,13 +175,15 @@ with col2:
     #### 6. Analytics & Reports
     Compare simulation runs, analyze performance metrics, and export comprehensive reports.
-    """)
 st.markdown("---")
 # Typical Workflow
 with st.expander("Typical Usage Workflow"):
-    st.markdown("""
     **Step 1: Initial Setup**
     - Run EDA pipeline to process historical data (one-time setup)
@@ -208,7 +210,8 @@ with st.expander("Typical Usage Workflow"):
     - Use Analytics & Reports to evaluate fairness and efficiency
     - Compare different scheduling policies
     - Identify bottlenecks and improvement opportunities
-    """)
 # Footer
 st.markdown("---")

 """Main dashboard application for Court Scheduling System.
 This is the entry point for the Streamlit multi-page dashboard.
+Launch with: uv run court-scheduler dashboard  (or `streamlit run` directly)
 """
 from __future__ import annotations
 if str(ROOT) not in sys.path:
     sys.path.insert(0, str(ROOT))
 import streamlit as st
     initial_sidebar_state="expanded",
 )
 # Main page content
 st.title("Court Scheduling System Dashboard")
+st.markdown(
+    "**Karnataka High Court - Algorithmic Decision Support for Fair Scheduling**"
+)
 st.markdown("---")
 # Introduction
+st.markdown(
+    """
 ### Overview
 This system provides data-driven scheduling recommendations while maintaining judicial control and autonomy.
 - Reinforcement learning optimization
 Use the sidebar to navigate between sections.
+"""
+)
 # System status
 status_header_col1, status_header_col2 = st.columns([3, 1])
         st.caption("Run EDA pipeline to generate visualizations")
 # Setup Controls
+eda_ready = (
+    data_status["cleaned_data"]
+    and data_status["parameters"]
+    and data_status["eda_figures"]
+)
 if not eda_ready:
     st.markdown("---")
     st.markdown("### Initial Setup")
+    st.warning(
+        "Run the EDA pipeline to process historical data and extract parameters."
+    )
     col1, col2 = st.columns([2, 1])
     with col1:
+        st.markdown(
+            """
         The EDA pipeline:
         - Loads and cleans historical court case data
         - Extracts statistical parameters (distributions, transition probabilities)
         - Generates analysis visualizations
         This is required before using other dashboard features.
+        """
+        )
     with col2:
         if st.button("Run EDA Pipeline", type="primary", use_container_width=True):
+            from eda.load_clean import run_load_and_clean
+            from eda.exploration import run_exploration
+            from eda.parameters import run_parameter_export
             with st.spinner("Running EDA pipeline... This may take a few minutes."):
                 try:
+                    # Step 1: Load & clean data
+                    run_load_and_clean()
+                    # Step 2: Generate visualizations
+                    run_exploration()
+                    # Step 3: Extract parameters
+                    run_parameter_export()
+                    st.success("EDA pipeline completed")
+                    st.rerun()
                 except Exception as e:
+                    st.error("Pipeline failed while running inside the dashboard.")
+                    with st.expander("Show error details"):
+                        st.exception(e)
     with st.expander("Run manually via CLI"):
         st.code("uv run court-scheduler eda", language="bash")
 col1, col2 = st.columns(2)
 with col1:
+    st.markdown(
+        """
     #### 1. Data & Insights
     Explore historical case data, view analysis visualizations, and review extracted parameters.
     #### 3. Simulation Workflow
     Generate cases, configure simulation parameters, run scheduling simulations, and view results.
+    """
+    )
 with col2:
+    st.markdown(
+        """
     #### 4. Cause Lists & Overrides
     View generated cause lists, make judge overrides, and track modification history.
     #### 6. Analytics & Reports
     Compare simulation runs, analyze performance metrics, and export comprehensive reports.
+    """
+    )
 st.markdown("---")
 # Typical Workflow
 with st.expander("Typical Usage Workflow"):
+    st.markdown(
+        """
     **Step 1: Initial Setup**
     - Run EDA pipeline to process historical data (one-time setup)
     - Use Analytics & Reports to evaluate fairness and efficiency
     - Compare different scheduling policies
     - Identify bottlenecks and improvement opportunities
+    """
+    )
 # Footer
 st.markdown("---")

scheduler/dashboard/pages/1_Data_And_Insights.py CHANGED Viewed

@@ -70,7 +70,9 @@ def load_dashboard_data():
 with st.spinner("Loading data..."):
     try:
-        cases_df, hearings_df, params, stats, total_cases, total_hearings = load_dashboard_data()
     except Exception as e:
         st.error(f"Error loading data: {e}")
         st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
@@ -96,28 +98,25 @@ if cases_df.empty and hearings_df.empty:
     with col1:
         if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
-            import subprocess
             with st.spinner("Running EDA pipeline... This will take a few minutes."):
                 try:
-                    result = subprocess.run(
-                        ["uv", "run", "court-scheduler", "eda"],
-                        capture_output=True,
-                        text=True,
-                        cwd=str(Path.cwd()),
-                    )
-                    if result.returncode == 0:
-                        st.success("EDA pipeline completed successfully!")
-                        st.info("Reload this page to see the data.")
-                        if st.button("Reload Page"):
-                            st.rerun()
-                    else:
-                        st.error(f"Pipeline failed with error code {result.returncode}")
-                        with st.expander("Error details"):
-                            st.code(result.stderr, language="text")
                 except Exception as e:
-                    st.error(f"Error: {e}")
     with col2:
         with st.expander("Alternative: Run via CLI"):
@@ -133,7 +132,9 @@ col1, col2, col3, col4, col5 = st.columns(5)
 with col1:
     st.metric("Total Cases", f"{total_cases:,}")
     if "YEAR_FILED" in cases_df.columns:
-        year_range = f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
         st.caption(f"Years: {year_range}")
 with col2:
@@ -176,7 +177,9 @@ with col5:
 st.markdown("---")
 # Main tabs
-tab1, tab2, tab3 = st.tabs(["Historical Analysis", "Interactive Exploration", "Parameters"])
 # TAB 1: Historical Analysis - Pre-generated figures
 with tab1:
@@ -188,11 +191,15 @@ with tab1:
     figures_dir = Path("reports/figures")
     if not figures_dir.exists():
-        st.warning("EDA figures not found. Run the EDA pipeline to generate visualizations.")
         st.code("uv run court-scheduler eda")
     else:
         # Find latest versioned directory
-        version_dirs = [d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")]
         if not version_dirs:
             st.warning(
@@ -207,7 +214,9 @@ with tab1:
             # List available figures from the versioned directory
             # Exclude deprecated/removed visuals like the monthly waterfall
             figure_files = [
-                f for f in sorted(latest_dir.glob("*.html")) if "waterfall" not in f.name.lower()
             ]
             if not figure_files:
@@ -227,10 +236,14 @@ with tab1:
                     if any(x in f.name for x in ["stage", "sankey", "transition"])
                 ]
                 time_figs = [
-                    f for f in figure_files if any(x in f.name for x in ["monthly", "load", "gap"])
                 ]
                 other_figs = [
-                    f for f in figure_files if f not in distribution_figs + stage_figs + time_figs
                 ]
                 # Category 1: Case Distributions
@@ -325,7 +338,9 @@ with tab2:
         selected_stages = st.sidebar.multiselect(
             "Stages",
             options=available_stages,
-            default=available_stages[:10] if len(available_stages) > 10 else available_stages,
             key="stage_filter",
         )
     else:
@@ -334,12 +349,16 @@ with tab2:
     # Apply filters with copy to ensure clean dataframes
     if selected_case_types and case_type_col:
-        filtered_cases = cases_df[cases_df[case_type_col].isin(selected_case_types)].copy()
     else:
         filtered_cases = cases_df.copy()
     if selected_stages and stage_col:
-        filtered_hearings = hearings_df[hearings_df[stage_col].isin(selected_stages)].copy()
     else:
         filtered_hearings = hearings_df.copy()
@@ -370,9 +389,9 @@ with tab2:
     with col4:
         if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
-            adj_rate_filtered = (filtered_hearings["Outcome"] == "ADJOURNED").sum() / len(
-                filtered_hearings
-            )
             st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
         else:
             st.metric("Adjournment Rate", "N/A")
@@ -387,9 +406,15 @@ with tab2:
     with sub_tab1:
         st.markdown("#### Case Distribution by Type")
-        if case_type_col and case_type_col in filtered_cases.columns and len(filtered_cases) > 0:
             # Compute value counts and ensure proper structure
-            case_type_counts = filtered_cases[case_type_col].value_counts().reset_index()
             # Rename columns for clarity (works across pandas versions)
             case_type_counts.columns = ["CaseType", "Count"]
@@ -428,7 +453,11 @@ with tab2:
     with sub_tab2:
         st.markdown("#### Stage Analysis")
-        if stage_col and stage_col in filtered_hearings.columns and len(filtered_hearings) > 0:
             stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
             stage_counts.columns = ["Stage", "Count"]
@@ -465,7 +494,10 @@ with tab2:
                 not_adjourned = total_hearings - adjourned
                 outcome_df = pd.DataFrame(
-                    {"Outcome": ["ADJOURNED", "NOT ADJOURNED"], "Count": [adjourned, not_adjourned]}
                 )
                 fig_pie = px.pie(
@@ -474,7 +506,10 @@ with tab2:
                     names="Outcome",
                     title=f"Outcome Distribution (Total: {total_hearings:,})",
                     color="Outcome",
-                    color_discrete_map={"ADJOURNED": "#ef4444", "NOT ADJOURNED": "#22c55e"},
                 )
                 fig_pie.update_layout(height=400)
                 st.plotly_chart(fig_pie, use_container_width=True)
@@ -483,7 +518,9 @@ with tab2:
                 st.markdown("**By Stage**")
                 adj_by_stage = (
                     filtered_hearings.groupby(stage_col)["Outcome"]
-                    .apply(lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0)
                     .reset_index()
                 )
                 adj_by_stage.columns = ["Stage", "Rate"]
@@ -507,7 +544,9 @@ with tab2:
     with sub_tab4:
         st.markdown("#### Raw Data")
-        data_view = st.radio("Select data to view:", ["Cases", "Hearings"], horizontal=True)
         if data_view == "Cases":
             st.dataframe(
@@ -516,7 +555,9 @@ with tab2:
                 height=600,
             )
-            st.markdown(f"**Showing first 500 of {len(filtered_cases):,} filtered cases**")
             # Download button
             csv = filtered_cases.to_csv(index=False).encode("utf-8")
@@ -533,7 +574,9 @@ with tab2:
                 height=600,
             )
-            st.markdown(f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**")
             # Download button
             csv = filtered_hearings.to_csv(index=False).encode("utf-8")
@@ -559,7 +602,10 @@ with tab3:
         st.markdown("#### Case Types")
         if "case_types" in params and params["case_types"]:
             case_types_df = pd.DataFrame(
-                {"Case Type": params["case_types"], "Index": range(len(params["case_types"]))}
             )
             st.dataframe(case_types_df, use_container_width=True, hide_index=True)
             st.caption(f"Total: {len(params['case_types'])} case types")
@@ -594,9 +640,13 @@ with tab3:
                     with st.expander(f"From: {stage}"):
                         trans_df = pd.DataFrame(transitions)
                         if not trans_df.empty:
-                            st.dataframe(trans_df, use_container_width=True, hide_index=True)
-            st.caption(f"Total: {len(params['stage_graph'])} stages with transition data")
         else:
             st.info("No stage transition data found")
@@ -609,8 +659,12 @@ with tab3:
             # Create heatmap
             adj_stats = params["adjournment_stats"]
-            stages_list = list(adj_stats.keys())[:20]  # Limit to 20 stages for readability
-            case_types_list = params.get("case_types", [])[:15]  # Limit to 15 case types
             if stages_list and case_types_list:
                 heatmap_data = []
@@ -656,7 +710,12 @@ with tab3:
         """)
         config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
-            ["EDA Parameters", "Ripeness Classifier", "Case Generator", "Simulation Defaults"]
         )
         with config_tab1:
@@ -857,7 +916,10 @@ UNRIPE cases: 0.7x priority
                 from scheduler.data.config import MONTHLY_SEASONALITY
                 season_df = pd.DataFrame(
-                    [{"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)} for i in range(1, 13)]
                 )
                 st.dataframe(season_df, use_container_width=True, hide_index=True)
                 st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")
@@ -900,7 +962,9 @@ Ripe purposes (80% probability):
                 """,
                     language="text",
                 )
-                st.caption("Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe")
         with config_tab4:
             st.markdown("#### Simulation Defaults")
@@ -930,8 +994,12 @@ Formula:
                 st.markdown("**Courtroom Capacity**")
                 if params and "court_capacity_global" in params:
                     cap = params["court_capacity_global"]
-                    st.metric("Median slots/day", f"{cap.get('slots_median_global', 151):.0f}")
-                    st.metric("P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}")
                 else:
                     st.info("Run EDA to load capacity statistics")

 with st.spinner("Loading data..."):
     try:
+        cases_df, hearings_df, params, stats, total_cases, total_hearings = (
+            load_dashboard_data()
+        )
     except Exception as e:
         st.error(f"Error loading data: {e}")
         st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
     with col1:
         if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
+            from eda.load_clean import run_load_and_clean
+            from eda.exploration import run_exploration
+            from eda.parameters import run_parameter_export
             with st.spinner("Running EDA pipeline... This will take a few minutes."):
                 try:
+                    # Step 1: Load & clean data
+                    run_load_and_clean()
+                    # Step 2: Generate visualizations
+                    run_exploration()
+                    # Step 3: Extract parameters
+                    run_parameter_export()
+                    st.success("EDA pipeline completed successfully!")
+                    st.info("Reload this page to see the updated data.")
+                    if st.button("Reload Page"):
+                        st.rerun()
                 except Exception as e:
+                    with st.expander("Error details"):
+                        st.exception(e)
     with col2:
         with st.expander("Alternative: Run via CLI"):
 with col1:
     st.metric("Total Cases", f"{total_cases:,}")
     if "YEAR_FILED" in cases_df.columns:
+        year_range = (
+            f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
+        )
         st.caption(f"Years: {year_range}")
 with col2:
 st.markdown("---")
 # Main tabs
+tab1, tab2, tab3 = st.tabs(
+    ["Historical Analysis", "Interactive Exploration", "Parameters"]
+)
 # TAB 1: Historical Analysis - Pre-generated figures
 with tab1:
     figures_dir = Path("reports/figures")
     if not figures_dir.exists():
+        st.warning(
+            "EDA figures not found. Run the EDA pipeline to generate visualizations."
+        )
         st.code("uv run court-scheduler eda")
     else:
         # Find latest versioned directory
+        version_dirs = [
+            d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")
+        ]
         if not version_dirs:
             st.warning(
             # List available figures from the versioned directory
             # Exclude deprecated/removed visuals like the monthly waterfall
             figure_files = [
+                f
+                for f in sorted(latest_dir.glob("*.html"))
+                if "waterfall" not in f.name.lower()
             ]
             if not figure_files:
                     if any(x in f.name for x in ["stage", "sankey", "transition"])
                 ]
                 time_figs = [
+                    f
+                    for f in figure_files
+                    if any(x in f.name for x in ["monthly", "load", "gap"])
                 ]
                 other_figs = [
+                    f
+                    for f in figure_files
+                    if f not in distribution_figs + stage_figs + time_figs
                 ]
                 # Category 1: Case Distributions
         selected_stages = st.sidebar.multiselect(
             "Stages",
             options=available_stages,
+            default=available_stages[:10]
+            if len(available_stages) > 10
+            else available_stages,
             key="stage_filter",
         )
     else:
     # Apply filters with copy to ensure clean dataframes
     if selected_case_types and case_type_col:
+        filtered_cases = cases_df[
+            cases_df[case_type_col].isin(selected_case_types)
+        ].copy()
     else:
         filtered_cases = cases_df.copy()
     if selected_stages and stage_col:
+        filtered_hearings = hearings_df[
+            hearings_df[stage_col].isin(selected_stages)
+        ].copy()
     else:
         filtered_hearings = hearings_df.copy()
     with col4:
         if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
+            adj_rate_filtered = (
+                filtered_hearings["Outcome"] == "ADJOURNED"
+            ).sum() / len(filtered_hearings)
             st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
         else:
             st.metric("Adjournment Rate", "N/A")
     with sub_tab1:
         st.markdown("#### Case Distribution by Type")
+        if (
+            case_type_col
+            and case_type_col in filtered_cases.columns
+            and len(filtered_cases) > 0
+        ):
             # Compute value counts and ensure proper structure
+            case_type_counts = (
+                filtered_cases[case_type_col].value_counts().reset_index()
+            )
             # Rename columns for clarity (works across pandas versions)
             case_type_counts.columns = ["CaseType", "Count"]
     with sub_tab2:
         st.markdown("#### Stage Analysis")
+        if (
+            stage_col
+            and stage_col in filtered_hearings.columns
+            and len(filtered_hearings) > 0
+        ):
             stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
             stage_counts.columns = ["Stage", "Count"]
                 not_adjourned = total_hearings - adjourned
                 outcome_df = pd.DataFrame(
+                    {
+                        "Outcome": ["ADJOURNED", "NOT ADJOURNED"],
+                        "Count": [adjourned, not_adjourned],
+                    }
                 )
                 fig_pie = px.pie(
                     names="Outcome",
                     title=f"Outcome Distribution (Total: {total_hearings:,})",
                     color="Outcome",
+                    color_discrete_map={
+                        "ADJOURNED": "#ef4444",
+                        "NOT ADJOURNED": "#22c55e",
+                    },
                 )
                 fig_pie.update_layout(height=400)
                 st.plotly_chart(fig_pie, use_container_width=True)
                 st.markdown("**By Stage**")
                 adj_by_stage = (
                     filtered_hearings.groupby(stage_col)["Outcome"]
+                    .apply(
+                        lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0
+                    )
                     .reset_index()
                 )
                 adj_by_stage.columns = ["Stage", "Rate"]
     with sub_tab4:
         st.markdown("#### Raw Data")
+        data_view = st.radio(
+            "Select data to view:", ["Cases", "Hearings"], horizontal=True
+        )
         if data_view == "Cases":
             st.dataframe(
                 height=600,
             )
+            st.markdown(
+                f"**Showing first 500 of {len(filtered_cases):,} filtered cases**"
+            )
             # Download button
             csv = filtered_cases.to_csv(index=False).encode("utf-8")
                 height=600,
             )
+            st.markdown(
+                f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**"
+            )
             # Download button
             csv = filtered_hearings.to_csv(index=False).encode("utf-8")
         st.markdown("#### Case Types")
         if "case_types" in params and params["case_types"]:
             case_types_df = pd.DataFrame(
+                {
+                    "Case Type": params["case_types"],
+                    "Index": range(len(params["case_types"])),
+                }
             )
             st.dataframe(case_types_df, use_container_width=True, hide_index=True)
             st.caption(f"Total: {len(params['case_types'])} case types")
                     with st.expander(f"From: {stage}"):
                         trans_df = pd.DataFrame(transitions)
                         if not trans_df.empty:
+                            st.dataframe(
+                                trans_df, use_container_width=True, hide_index=True
+                            )
+            st.caption(
+                f"Total: {len(params['stage_graph'])} stages with transition data"
+            )
         else:
             st.info("No stage transition data found")
             # Create heatmap
             adj_stats = params["adjournment_stats"]
+            stages_list = list(adj_stats.keys())[
+                :20
+            ]  # Limit to 20 stages for readability
+            case_types_list = params.get("case_types", [])[
+                :15
+            ]  # Limit to 15 case types
             if stages_list and case_types_list:
                 heatmap_data = []
         """)
         config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
+            [
+                "EDA Parameters",
+                "Ripeness Classifier",
+                "Case Generator",
+                "Simulation Defaults",
+            ]
         )
         with config_tab1:
                 from scheduler.data.config import MONTHLY_SEASONALITY
                 season_df = pd.DataFrame(
+                    [
+                        {"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)}
+                        for i in range(1, 13)
+                    ]
                 )
                 st.dataframe(season_df, use_container_width=True, hide_index=True)
                 st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")
                 """,
                     language="text",
                 )
+                st.caption(
+                    "Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe"
+                )
         with config_tab4:
             st.markdown("#### Simulation Defaults")
                 st.markdown("**Courtroom Capacity**")
                 if params and "court_capacity_global" in params:
                     cap = params["court_capacity_global"]
+                    st.metric(
+                        "Median slots/day", f"{cap.get('slots_median_global', 151):.0f}"
+                    )
+                    st.metric(
+                        "P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}"
+                    )
                 else:
                     st.info("Run EDA to load capacity statistics")

scheduler/dashboard/pages/2_Ripeness_Classifier.py CHANGED Viewed

@@ -99,7 +99,9 @@ RipenessClassifier.set_thresholds(
 )
 # Main content
-tab1, tab2, tab3 = st.tabs(["Current Configuration", "Interactive Testing", "Batch Classification"])
 with tab1:
     st.markdown("### Current Classifier Configuration")
@@ -153,7 +155,10 @@ with tab1:
     stage_rules = {
         "PRE-TRIAL": {"min_days": 60, "keywords": ["affidavit filed", "reply filed"]},
         "TRIAL": {"min_days": 45, "keywords": ["evidence complete", "cross complete"]},
-        "POST-TRIAL": {"min_days": 30, "keywords": ["arguments complete", "written note"]},
         "FINAL DISPOSAL": {"min_days": 15, "keywords": ["disposed", "judgment"]},
     }
@@ -190,8 +195,12 @@ with tab2:
         service_hearings_count = st.number_input(
             "Service Hearings", min_value=0, max_value=20, value=3
         )
-        days_in_stage = st.number_input("Days in Stage", min_value=0, max_value=365, value=45)
-        case_age = st.number_input("Case Age (days)", min_value=0, max_value=3650, value=120)
     # Keywords
     has_keywords = st.multiselect(
@@ -213,7 +222,7 @@ with tab2:
         test_case = Case(
             case_id=case_id,
-            case_type=case_type,  # Use string directly instead of CaseType enum
             filed_date=filed_date,
             current_stage=case_stage,
             status=CaseStatus.PENDING,
@@ -286,15 +295,25 @@ with tab3:
                     with col1:
                         pct = classifications["RIPE"] / len(cases) * 100
-                        st.metric("RIPE Cases", f"{classifications['RIPE']:,}", f"{pct:.1f}%")
                     with col2:
                         pct = classifications["UNKNOWN"] / len(cases) * 100
-                        st.metric("UNKNOWN Cases", f"{classifications['UNKNOWN']:,}", f"{pct:.1f}%")
                     with col3:
                         pct = classifications["UNRIPE"] / len(cases) * 100
-                        st.metric("UNRIPE Cases", f"{classifications['UNRIPE']:,}", f"{pct:.1f}%")
                     # Pie chart
                     fig = px.pie(
@@ -302,7 +321,11 @@ with tab3:
                         names=list(classifications.keys()),
                         title="Classification Distribution",
                         color=list(classifications.keys()),
-                        color_discrete_map={"RIPE": "green", "UNKNOWN": "orange", "UNRIPE": "red"},
                     )
                     st.plotly_chart(fig, use_container_width=True)
@@ -311,4 +334,6 @@ with tab3:
 # Footer
 st.markdown("---")
-st.markdown("*Adjust thresholds in the sidebar to see real-time impact on classification*")

 )
 # Main content
+tab1, tab2, tab3 = st.tabs(
+    ["Current Configuration", "Interactive Testing", "Batch Classification"]
+)
 with tab1:
     st.markdown("### Current Classifier Configuration")
     stage_rules = {
         "PRE-TRIAL": {"min_days": 60, "keywords": ["affidavit filed", "reply filed"]},
         "TRIAL": {"min_days": 45, "keywords": ["evidence complete", "cross complete"]},
+        "POST-TRIAL": {
+            "min_days": 30,
+            "keywords": ["arguments complete", "written note"],
+        },
         "FINAL DISPOSAL": {"min_days": 15, "keywords": ["disposed", "judgment"]},
     }
         service_hearings_count = st.number_input(
             "Service Hearings", min_value=0, max_value=20, value=3
         )
+        days_in_stage = st.number_input(
+            "Days in Stage", min_value=0, max_value=365, value=45
+        )
+        case_age = st.number_input(
+            "Case Age (days)", min_value=0, max_value=3650, value=120
+        )
     # Keywords
     has_keywords = st.multiselect(
         test_case = Case(
             case_id=case_id,
+            case_type=case_type,
             filed_date=filed_date,
             current_stage=case_stage,
             status=CaseStatus.PENDING,
                     with col1:
                         pct = classifications["RIPE"] / len(cases) * 100
+                        st.metric(
+                            "RIPE Cases", f"{classifications['RIPE']:,}", f"{pct:.1f}%"
+                        )
                     with col2:
                         pct = classifications["UNKNOWN"] / len(cases) * 100
+                        st.metric(
+                            "UNKNOWN Cases",
+                            f"{classifications['UNKNOWN']:,}",
+                            f"{pct:.1f}%",
+                        )
                     with col3:
                         pct = classifications["UNRIPE"] / len(cases) * 100
+                        st.metric(
+                            "UNRIPE Cases",
+                            f"{classifications['UNRIPE']:,}",
+                            f"{pct:.1f}%",
+                        )
                     # Pie chart
                     fig = px.pie(
                         names=list(classifications.keys()),
                         title="Classification Distribution",
                         color=list(classifications.keys()),
+                        color_discrete_map={
+                            "RIPE": "green",
+                            "UNKNOWN": "orange",
+                            "UNRIPE": "red",
+                        },
                     )
                     st.plotly_chart(fig, use_container_width=True)
 # Footer
 st.markdown("---")
+st.markdown(
+    "*Adjust thresholds in the sidebar to see real-time impact on classification*"
+)

scheduler/dashboard/pages/3_Simulation_Workflow.py CHANGED Viewed

@@ -9,7 +9,6 @@ Multi-step workflow:
 from __future__ import annotations
-import subprocess
 from datetime import date, datetime
 from pathlib import Path
@@ -107,11 +106,15 @@ if st.session_state.workflow_step == 1:
             )
             start_date = st.date_input(
-                "Filing period start", value=date(2022, 1, 1), help="Start date for case filings"
             )
             end_date = st.date_input(
-                "Filing period end", value=date(2023, 12, 31), help="End date for case filings"
             )
         with col2:
@@ -124,7 +127,9 @@ if st.session_state.workflow_step == 1:
             )
             output_dir = st.text_input(
-                "Output directory", value="data/generated", help="Directory to save generated cases"
             )
             st.info(f"Cases will be saved to: {output_dir}/cases.csv")
@@ -142,13 +147,21 @@ if st.session_state.workflow_step == 1:
                 col_a, col_b, col_c = st.columns(3)
                 with col_a:
-                    rsa_pct = st.number_input("RSA %", 0, 100, 20, help="Regular Second Appeal")
-                    rfa_pct = st.number_input("RFA %", 0, 100, 17, help="Regular First Appeal")
-                    crp_pct = st.number_input("CRP %", 0, 100, 20, help="Civil Revision Petition")
                 with col_b:
                     ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
-                    ccc_pct = st.number_input("CCC %", 0, 100, 11, help="Civil Contempt")
                     cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
                 with col_c:
@@ -156,55 +169,92 @@ if st.session_state.workflow_step == 1:
                         "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
                     )
-                    total_pct = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
                     if total_pct != 100:
                         st.error(f"Total: {total_pct}% (must be 100%)")
                     else:
                         st.success(f"Total: {total_pct}%")
             else:
                 st.info("Using default distribution from historical data")
         if st.button("Generate Cases", type="primary", use_container_width=True):
             with st.spinner(f"Generating {n_cases:,} cases..."):
                 try:
-                    # Ensure output directory exists
-                    output_path = Path(output_dir)
-                    output_path.mkdir(parents=True, exist_ok=True)
-                    cases_file = output_path / "cases.csv"
-                    # Run generation via CLI
-                    result = subprocess.run(
-                        [
-                            "uv",
-                            "run",
-                            "court-scheduler",
-                            "generate",
-                            "--cases",
-                            str(n_cases),
-                            "--start",
-                            start_date.isoformat(),
-                            "--end",
-                            end_date.isoformat(),
-                            "--output",
-                            str(cases_file),
-                            "--seed",
-                            str(seed),
-                        ],
-                        capture_output=True,
-                        text=True,
-                        cwd=str(Path.cwd()),
                     )
-                    if result.returncode == 0:
-                        st.success(f"Generated {n_cases:,} cases successfully")
-                        st.session_state.cases_ready = True
-                        st.session_state.cases_path = str(cases_file)
-                        st.session_state.workflow_step = 2
-                        st.rerun()
-                    else:
-                        st.error(f"Generation failed with error code {result.returncode}")
-                        with st.expander("Show error details"):
-                            st.code(result.stderr, language="text")
                 except Exception as e:
                     st.error(f"Error generating cases: {e}")
@@ -253,7 +303,9 @@ if st.session_state.workflow_step == 1:
                     cases_file = temp_path / "uploaded_cases.csv"
                     df.to_csv(cases_file, index=False)
-                    if st.button("Use This Dataset", type="primary", use_container_width=True):
                         st.session_state.cases_ready = True
                         st.session_state.cases_path = str(cases_file)
                         st.session_state.workflow_step = 2
@@ -305,7 +357,11 @@ elif st.session_state.workflow_step == 2:
         )
         seed_sim = st.number_input(
-            "Random seed", min_value=0, max_value=9999, value=42, help="Seed for reproducibility"
         )
         log_dir = st.text_input(
@@ -394,7 +450,9 @@ elif st.session_state.workflow_step == 2:
             st.rerun()
     with col2:
-        if st.button("Next: Run Simulation ->", type="primary", use_container_width=True):
             st.session_state.workflow_step = 3
             st.rerun()
@@ -425,98 +483,71 @@ elif st.session_state.workflow_step == 3:
     if st.button("Start Simulation", type="primary", use_container_width=True):
         with st.spinner("Running simulation... This may take several minutes."):
             try:
-                # Create a unique per-run directory under the selected base output folder
-                ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-                base_out_dir = (
-                    Path(config["log_dir"])
-                    if config.get("log_dir")
-                    else Path("outputs") / "simulation_runs"
                 )
                 run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
                 run_dir.mkdir(parents=True, exist_ok=True)
-                # Persist effective run directory
                 st.session_state.sim_config["log_dir"] = str(run_dir)
-                # Build command
-                cmd = [
-                    "uv",
-                    "run",
-                    "court-scheduler",
-                    "simulate",
-                    "--cases",
-                    config["cases"],
-                    "--days",
-                    str(config["days"]),
-                    "--policy",
-                    config["policy"],
-                    "--seed",
-                    str(config["seed"]),
-                ]
-                if config.get("start"):
-                    cmd.extend(["--start", config["start"]])
-                # Always pass the per-run output directory
-                cmd.extend(["--log-dir", str(run_dir)])
-                # Run simulation
-                result = subprocess.run(
-                    cmd,
-                    capture_output=True,
-                    text=True,
-                    cwd=str(Path.cwd()),
-                )
-                if result.returncode == 0:
-                    st.success("Simulation completed successfully")
-                    # Parse output to extract results
-                    st.session_state.sim_results = {
-                        "success": True,
-                        "output": result.stdout,
-                        "log_dir": str(run_dir),
-                        "completed_at": datetime.now().isoformat(),
-                    }
-                    # Auto-generate Daily Cause Lists from events.csv
-                    try:
-                        log_dir_path = (
-                            Path(st.session_state.sim_results["log_dir"])
-                            if st.session_state.sim_results.get("log_dir")
-                            else run_dir
-                        )
-                        events_path = log_dir_path / "events.csv"
-                        if events_path.exists():
-                            generator = CauseListGenerator(events_path)
-                            # Save directly in the run directory (no subfolder)
-                            compiled_path = generator.generate_daily_lists(log_dir_path)
-                            summary_path = log_dir_path / "daily_summaries.csv"
-                            # Store generated paths for display in Step 4
-                            st.session_state.sim_results["cause_lists"] = {
-                                "compiled": str(compiled_path),
-                                "summary": str(summary_path),
-                            }
-                            st.info(f"Daily cause lists generated in {log_dir_path}")
-                        else:
-                            st.warning(
-                                f"events.csv not found at {events_path}. Skipping cause list generation."
-                            )
-                    except Exception as gen_err:
-                        st.warning(f"Failed to generate daily cause lists: {gen_err}")
-                    st.session_state.workflow_step = 4
-                    st.rerun()
-                else:
-                    st.error(f"Simulation failed with error code {result.returncode}")
-                    with st.expander("Show error details"):
-                        st.code(result.stderr, language="text")
-                    st.session_state.sim_results = {
-                        "success": False,
-                        "error": result.stderr,
                     }
             except Exception as e:
                 st.error(f"Error running simulation: {e}")
                 st.session_state.sim_results = {
@@ -565,7 +596,9 @@ elif st.session_state.workflow_step == 4:
                 for file in files:
                     col1, col2 = st.columns([3, 1])
                     with col1:
-                        st.markdown(f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)")
                     with col2:
                         if file.suffix in [".csv", ".txt"]:
                             with open(file, "rb") as f:
@@ -573,7 +606,9 @@ elif st.session_state.workflow_step == 4:
                                     label="Download",
                                     data=f.read(),
                                     file_name=file.name,
-                                    mime="text/csv" if file.suffix == ".csv" else "text/plain",
                                     key=f"download_{file.name}",
                                 )
@@ -594,7 +629,10 @@ elif st.session_state.workflow_step == 4:
                                     x=metrics_df.index,
                                     y="disposal_rate",
                                     title="Disposal Rate Over Time",
-                                    labels={"x": "Day", "disposal_rate": "Disposal Rate"},
                                 )
                                 st.plotly_chart(fig, use_container_width=True)
@@ -611,7 +649,9 @@ elif st.session_state.workflow_step == 4:
                             # Show summary statistics
                             st.markdown("### Summary Statistics")
-                            st.dataframe(metrics_df.describe(), use_container_width=True)
                     except Exception as e:
                         st.warning(f"Could not load metrics: {e}")
@@ -660,7 +700,9 @@ elif st.session_state.workflow_step == 4:
                 else None
             )
             if events_csv and events_csv.exists():
-                if st.button("Generate Daily Cause Lists Now", use_container_width=False):
                     try:
                         # Save directly alongside events.csv (run directory root)
                         out_dir = events_csv.parent

 from __future__ import annotations
 from datetime import date, datetime
 from pathlib import Path
             )
             start_date = st.date_input(
+                "Filing period start",
+                value=date(2022, 1, 1),
+                help="Start date for case filings",
             )
             end_date = st.date_input(
+                "Filing period end",
+                value=date(2023, 12, 31),
+                help="End date for case filings",
             )
         with col2:
             )
             output_dir = st.text_input(
+                "Output directory",
+                value="data/generated",
+                help="Directory to save generated cases",
             )
             st.info(f"Cases will be saved to: {output_dir}/cases.csv")
                 col_a, col_b, col_c = st.columns(3)
                 with col_a:
+                    rsa_pct = st.number_input(
+                        "RSA %", 0, 100, 20, help="Regular Second Appeal"
+                    )
+                    rfa_pct = st.number_input(
+                        "RFA %", 0, 100, 17, help="Regular First Appeal"
+                    )
+                    crp_pct = st.number_input(
+                        "CRP %", 0, 100, 20, help="Civil Revision Petition"
+                    )
                 with col_b:
                     ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
+                    ccc_pct = st.number_input(
+                        "CCC %", 0, 100, 11, help="Civil Contempt"
+                    )
                     cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
                 with col_c:
                         "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
                     )
+                    total_pct = (
+                        rsa_pct
+                        + rfa_pct
+                        + crp_pct
+                        + ca_pct
+                        + ccc_pct
+                        + cp_pct
+                        + cmp_pct
+                    )
                     if total_pct != 100:
                         st.error(f"Total: {total_pct}% (must be 100%)")
                     else:
                         st.success(f"Total: {total_pct}%")
             else:
                 st.info("Using default distribution from historical data")
+        from scheduler.dashboard.utils.ui_input_parser import (
+            build_case_type_distribution,
+            merge_with_default_config,
+        )
+        case_type_dist_dict = None
+        if use_custom_dist:
+            case_type_dist_dict = build_case_type_distribution(
+                rsa_pct,
+                rfa_pct,
+                crp_pct,
+                ca_pct,
+                ccc_pct,
+                cp_pct,
+                cmp_pct,
+            )
         if st.button("Generate Cases", type="primary", use_container_width=True):
             with st.spinner(f"Generating {n_cases:,} cases..."):
                 try:
+                    from cli.config import load_generate_config
+                    from scheduler.data.case_generator import CaseGenerator
+                    DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml")
+                    config_from_file = None
+                    if DEFAULT_GENERATE_CFG_PATH.exists():
+                        config_from_file = load_generate_config(
+                            DEFAULT_GENERATE_CFG_PATH
+                        )
+                    cfg = merge_with_default_config(
+                        config_from_file,
+                        n_cases=n_cases,
+                        start_date=start_date,
+                        end_date=end_date,
+                        output_dir=output_dir,
+                        seed=seed,
                     )
+                    # Prepare output dir
+                    cfg.output.parent.mkdir(parents=True, exist_ok=True)
+                    case_type_dist_dict = None
+                    if use_custom_dist:
+                        from scheduler.dashboard.utils.ui_input_parser import (
+                            build_case_type_distribution,
+                        )
+                        case_type_dist_dict = build_case_type_distribution(
+                            rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct
+                        )
+                    gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed)
+                    cases = gen.generate(
+                        cfg.n_cases,
+                        stage_mix_auto=True,
+                        case_type_distribution=case_type_dist_dict,
+                    )
+                    # Save files
+                    CaseGenerator.to_csv(cases, cfg.output)
+                    hearings_path = cfg.output.parent / "hearings.csv"
+                    CaseGenerator.to_hearings_csv(cases, hearings_path)
+                    st.success(f"Generated {len(cases):,} cases successfully!")
+                    st.session_state.cases_ready = True
+                    st.session_state.cases_path = str(cfg.output)
+                    st.session_state.workflow_step = 2
+                    st.rerun()
                 except Exception as e:
                     st.error(f"Error generating cases: {e}")
                     cases_file = temp_path / "uploaded_cases.csv"
                     df.to_csv(cases_file, index=False)
+                    if st.button(
+                        "Use This Dataset", type="primary", use_container_width=True
+                    ):
                         st.session_state.cases_ready = True
                         st.session_state.cases_path = str(cases_file)
                         st.session_state.workflow_step = 2
         )
         seed_sim = st.number_input(
+            "Random seed",
+            min_value=0,
+            max_value=9999,
+            value=42,
+            help="Seed for reproducibility",
         )
         log_dir = st.text_input(
             st.rerun()
     with col2:
+        if st.button(
+            "Next: Run Simulation ->", type="primary", use_container_width=True
+        ):
             st.session_state.workflow_step = 3
             st.rerun()
     if st.button("Start Simulation", type="primary", use_container_width=True):
         with st.spinner("Running simulation... This may take several minutes."):
             try:
+                from cli.config import load_simulate_config
+                from scheduler.dashboard.utils.simulation_runner import (
+                    merge_simulation_config,
+                    run_simulation_dashboard,
+                )
+                DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml")
+                if DEFAULT_SIM_CFG_PATH.exists():
+                    default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH)
+                else:
+                    default_cfg = (
+                        load_simulate_config(Path("parameter_sweep.toml"))
+                        if Path("parameter_sweep.toml").exists()
+                        else None
+                    )
+                if default_cfg is None:
+                    st.error("No default simulate config found.")
+                    st.stop()
+                merged_cfg = merge_simulation_config(
+                    default_cfg,
+                    cases_path=config["cases"],
+                    days=config["days"],
+                    start_date=date.fromisoformat(config["start"])
+                    if config.get("start")
+                    else None,
+                    policy=config["policy"],
+                    seed=config["seed"],
+                    log_dir=config["log_dir"],
                 )
+                ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+                base_out_dir = Path(config["log_dir"])
                 run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
                 run_dir.mkdir(parents=True, exist_ok=True)
+                # Update session config
                 st.session_state.sim_config["log_dir"] = str(run_dir)
+                result = run_simulation_dashboard(merged_cfg, run_dir)
+                st.success("Simulation completed successfully!")
+                st.session_state.sim_results = {
+                    "success": True,
+                    "output": result["summary"],
+                    "log_dir": str(run_dir),
+                    "completed_at": datetime.now().isoformat(),
+                }
+                events_path = result["events_path"]
+                if events_path.exists():
+                    generator = CauseListGenerator(events_path)
+                    compiled_path = generator.generate_daily_lists(run_dir)
+                    summary_path = run_dir / "daily_summaries.csv"
+                    st.session_state.sim_results["cause_lists"] = {
+                        "compiled": str(compiled_path),
+                        "summary": str(summary_path),
                     }
+                st.session_state.workflow_step = 4
+                st.rerun()
             except Exception as e:
                 st.error(f"Error running simulation: {e}")
                 st.session_state.sim_results = {
                 for file in files:
                     col1, col2 = st.columns([3, 1])
                     with col1:
+                        st.markdown(
+                            f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)"
+                        )
                     with col2:
                         if file.suffix in [".csv", ".txt"]:
                             with open(file, "rb") as f:
                                     label="Download",
                                     data=f.read(),
                                     file_name=file.name,
+                                    mime="text/csv"
+                                    if file.suffix == ".csv"
+                                    else "text/plain",
                                     key=f"download_{file.name}",
                                 )
                                     x=metrics_df.index,
                                     y="disposal_rate",
                                     title="Disposal Rate Over Time",
+                                    labels={
+                                        "x": "Day",
+                                        "disposal_rate": "Disposal Rate",
+                                    },
                                 )
                                 st.plotly_chart(fig, use_container_width=True)
                             # Show summary statistics
                             st.markdown("### Summary Statistics")
+                            st.dataframe(
+                                metrics_df.describe(), use_container_width=True
+                            )
                     except Exception as e:
                         st.warning(f"Could not load metrics: {e}")
                 else None
             )
             if events_csv and events_csv.exists():
+                if st.button(
+                    "Generate Daily Cause Lists Now", use_container_width=False
+                ):
                     try:
                         # Save directly alongside events.csv (run directory root)
                         out_dir = events_csv.parent

scheduler/dashboard/utils/simulation_runner.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from __future__ import annotations
+from pathlib import Path
+from datetime import date
+from cli.config import SimulateConfig
+from scheduler.data.case_generator import CaseGenerator
+from scheduler.simulation.engine import CourtSim, CourtSimConfig
+from scheduler.core.case import CaseStatus
+from scheduler.metrics.basic import gini
+def merge_simulation_config(
+    default_cfg: SimulateConfig,
+    cases_path: str,
+    days: int,
+    start_date: date | None,
+    policy: str,
+    seed: int,
+    log_dir: str,
+) -> SimulateConfig:
+    """Merge UI inputs with default simulation config."""
+    return SimulateConfig(
+        cases=Path(cases_path) if cases_path else default_cfg.cases,
+        days=days or default_cfg.days,
+        start=start_date or default_cfg.start,
+        policy=policy or default_cfg.policy,
+        seed=seed if seed is not None else default_cfg.seed,
+        log_dir=Path(log_dir) if log_dir else default_cfg.log_dir,
+    )
+def run_simulation_dashboard(scfg: SimulateConfig, run_dir: Path):
+    """
+    Execute simulation based on the provided Streamlit configuration.
+    """
+    # ------------------------------------------------------------------
+    # Load case data
+    # ------------------------------------------------------------------
+    path = scfg.cases
+    if path.exists():
+        cases = CaseGenerator.from_csv(path)
+        start = scfg.start or (
+            max(c.filed_date for c in cases) if cases else date.today()
+        )
+    else:
+        # Fallback (CLI fallback behaviour)
+        start = scfg.start or date.today().replace(day=1)
+        gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
+        cases = gen.generate(n_cases=5 * 151)
+    # ------------------------------------------------------------------
+    # Build CourtSimConfig
+    # ------------------------------------------------------------------
+    cfg = CourtSimConfig(
+        start=start,
+        days=scfg.days,
+        seed=scfg.seed,
+        policy=scfg.policy,
+        duration_percentile=scfg.duration_percentile,
+        log_dir=run_dir,
+    )
+    # ------------------------------------------------------------------
+    # Run simulation
+    # ------------------------------------------------------------------
+    sim = CourtSim(cfg, cases)
+    res = sim.run()
+    # ------------------------------------------------------------------
+    # Collect metrics exactly like CLI
+    # ------------------------------------------------------------------
+    disp_times = [
+        (c.disposal_date - c.filed_date).days
+        for c in cases
+        if c.disposal_date is not None and c.status == CaseStatus.DISPOSED
+    ]
+    gini_disp = gini(disp_times) if disp_times else 0.0
+    summary_text = f"""
+Simulation Complete!
+Horizon: {cfg.start} -> {res.end_date} ({cfg.days} days)
+Hearing Metrics:
+  Total: {res.hearings_total}
+  Heard: {res.hearings_heard} ({res.hearings_heard / max(1, res.hearings_total):.1%})
+  Adjourned: {res.hearings_adjourned} ({res.hearings_adjourned / max(1, res.hearings_total):.1%})
+Disposal Metrics:
+  Disposed: {res.disposals} ({res.disposals / len(cases):.1%})
+  Gini coefficient: {gini_disp:.3f}
+Efficiency:
+  Utilization: {res.utilization:.2%}
+  Avg hearings/day: {res.hearings_total / max(1, cfg.days):.2f}
+"""
+    (run_dir / "report.txt").write_text(summary_text, encoding="utf-8")
+    # -------------------------------------------------------
+    # Locate generated CSVs (if they exist)
+    # -------------------------------------------------------
+    metrics_path = run_dir / "metrics.csv"
+    events_path = run_dir / "events.csv"
+    return {
+        "summary": summary_text,
+        "end_date": res.end_date,
+        "metrics_path": metrics_path if metrics_path.exists() else None,
+        "events_path": events_path if events_path.exists() else None,
+    }

scheduler/dashboard/utils/ui_input_parser.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from pathlib import Path
+from cli.config import GenerateConfig
+def merge_with_default_config(
+    default_cfg: GenerateConfig,
+    n_cases: int,
+    start_date,
+    end_date,
+    output_dir: str,
+    seed: int,
+) -> GenerateConfig:
+    """Merge UI values with the repo's default generate config."""
+    return GenerateConfig(
+        n_cases=n_cases or default_cfg.n_cases,
+        start=start_date or default_cfg.start,
+        end=end_date or default_cfg.end,
+        output=Path(output_dir) / "cases.csv" if output_dir else default_cfg.output,
+        seed=seed if seed is not None else default_cfg.seed,
+    )
+def build_case_type_distribution(
+    rsa_pct: int,
+    rfa_pct: int,
+    crp_pct: int,
+    ca_pct: int,
+    ccc_pct: int,
+    cp_pct: int,
+    cmp_pct: int,
+) -> dict[str, float]:
+    """Convert percentage inputs into a probability distribution."""
+    total = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
+    if total == 0:
+        return {}
+    return {
+        "RSA": rsa_pct / total,
+        "RFA": rfa_pct / total,
+        "CRP": crp_pct / total,
+        "CA": ca_pct / total,
+        "CCC": ccc_pct / total,
+        "CP": cp_pct / total,
+        "CMP": cmp_pct / total,
+    }