Spaces:

RoyAalekh
/

hackathon_code4change

Sleeping

App Files Files Community

RoyAalekh commited on Dec 1, 2025

Commit

b43adf7

1 Parent(s): 7a39bba

Fixed all pages

Browse files

Files changed (6) hide show

cli/main.py +60 -23
configs/simulate.sample.toml +3 -3
eda/load_clean.py +18 -3
scheduler/dashboard/pages/3_Simulation_Workflow.py +175 -133
scheduler/dashboard/utils/simulation_runner.py +111 -0
scheduler/dashboard/utils/ui_input_parser.py +45 -0

cli/main.py CHANGED Viewed

@@ -36,9 +36,15 @@ console = Console(legacy_windows=False)
 @app.command()
 def eda(
-    skip_clean: bool = typer.Option(False, "--skip-clean", help="Skip data loading and cleaning"),
-    skip_viz: bool = typer.Option(False, "--skip-viz", help="Skip visualization generation"),
-    skip_params: bool = typer.Option(False, "--skip-params", help="Skip parameter extraction"),
 ) -> None:
     """Run the EDA pipeline (load, explore, extract parameters)."""
     console.print("[bold blue]Running EDA Pipeline[/bold blue]")
@@ -60,7 +66,9 @@ def eda(
                 console.print("Data loaded and cleaned")
             if not skip_viz:
-                task = progress.add_task("Step 2/3: Generate visualizations...", total=None)
                 run_exploration()
                 progress.update(task, completed=True)
                 console.print("Visualizations generated")
@@ -92,8 +100,12 @@ def generate(
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
-    n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
-    start_date: str = typer.Option("2022-01-01", "--start", help="Start date (YYYY-MM-DD)"),
     end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
     output: str = typer.Option(
         "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
@@ -178,7 +190,9 @@ def generate(
             user_dist = _parse_case_type_dist(case_type_dist)
             gen = CaseGenerator(start=start, end=end, seed=seed)
-            cases = gen.generate(n_cases, stage_mix_auto=True, case_type_distribution=user_dist)
             # Write primary cases file
             CaseGenerator.to_csv(cases, output_path)
             # Also write detailed hearings history alongside, for the dashboard/classifier
@@ -209,14 +223,22 @@ def simulate(
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
-    cases_csv: str = typer.Option("data/generated/cases.csv", "--cases", help="Input cases CSV"),
-    days: int = typer.Option(384, "--days", "-d", help="Number of working days to simulate"),
-    start_date: str = typer.Option(None, "--start", help="Simulation start date (YYYY-MM-DD)"),
     policy: str = typer.Option(
         "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
     ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
-    log_dir: str = typer.Option(None, "--log-dir", "-o", help="Output directory for logs"),
 ) -> None:
     """Run court scheduling simulation."""
     console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
@@ -238,7 +260,9 @@ def simulate(
                 update={
                     "cases": Path(cases_csv) if cases_csv else scfg.cases,
                     "days": days if days else scfg.days,
-                    "start": (date_cls.fromisoformat(start_date) if start_date else scfg.start),
                     "policy": policy if policy else scfg.policy,
                     "seed": seed if seed else scfg.seed,
                     "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
@@ -249,12 +273,16 @@ def simulate(
                 cases_csv = typer.prompt("Cases CSV", default=cases_csv)
                 days = typer.prompt("Days to simulate", default=days)
                 start_date = (
-                    typer.prompt("Start date (YYYY-MM-DD) or blank", default=start_date or "")
                     or None
                 )
                 policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
                 seed = typer.prompt("Random seed", default=seed)
-                log_dir = typer.prompt("Log dir (or blank)", default=log_dir or "") or None
             scfg = SimulateConfig(
                 cases=Path(cases_csv),
                 days=days,
@@ -268,9 +296,13 @@ def simulate(
         path = scfg.cases
         if path.exists():
             cases = CaseGenerator.from_csv(path)
-            start = scfg.start or (max(c.filed_date for c in cases) if cases else date_cls.today())
         else:
-            console.print(f"[yellow]Warning:[/yellow] {path} not found. Generating test cases...")
             start = scfg.start or date_cls.today().replace(day=1)
             gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
             cases = gen.generate(n_cases=5 * 151)
@@ -315,7 +347,9 @@ def simulate(
         gini_disp = gini(disp_times) if disp_times else 0.0
         console.print("\n[bold]Disposal Metrics:[/bold]")
-        console.print(f"  Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})")
         console.print(f"  Gini coefficient: {gini_disp:.3f}")
         console.print("\n[bold]Efficiency:[/bold]")
@@ -333,14 +367,15 @@ def simulate(
         raise typer.Exit(code=1)
-# RL training command removed
 @app.command()
 def workflow(
-    n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
     sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
-    output_dir: str = typer.Option("data/workflow_run", "--output", "-o", help="Output directory"),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
 ) -> None:
     """Run full workflow: EDA -> Generate -> Simulate -> Report."""
@@ -417,7 +452,9 @@ def dashboard(
         app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
         if not app_path.exists():
-            console.print(f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}")
             raise typer.Exit(code=1)
         # Run streamlit

 @app.command()
 def eda(
+    skip_clean: bool = typer.Option(
+        False, "--skip-clean", help="Skip data loading and cleaning"
+    ),
+    skip_viz: bool = typer.Option(
+        False, "--skip-viz", help="Skip visualization generation"
+    ),
+    skip_params: bool = typer.Option(
+        False, "--skip-params", help="Skip parameter extraction"
+    ),
 ) -> None:
     """Run the EDA pipeline (load, explore, extract parameters)."""
     console.print("[bold blue]Running EDA Pipeline[/bold blue]")
                 console.print("Data loaded and cleaned")
             if not skip_viz:
+                task = progress.add_task(
+                    "Step 2/3: Generate visualizations...", total=None
+                )
                 run_exploration()
                 progress.update(task, completed=True)
                 console.print("Visualizations generated")
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
+    n_cases: int = typer.Option(
+        10000, "--cases", "-n", help="Number of cases to generate"
+    ),
+    start_date: str = typer.Option(
+        "2022-01-01", "--start", help="Start date (YYYY-MM-DD)"
+    ),
     end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
     output: str = typer.Option(
         "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
             user_dist = _parse_case_type_dist(case_type_dist)
             gen = CaseGenerator(start=start, end=end, seed=seed)
+            cases = gen.generate(
+                n_cases, stage_mix_auto=True, case_type_distribution=user_dist
+            )
             # Write primary cases file
             CaseGenerator.to_csv(cases, output_path)
             # Also write detailed hearings history alongside, for the dashboard/classifier
     interactive: bool = typer.Option(
         False, "--interactive", help="Prompt for parameters interactively"
     ),
+    cases_csv: str = typer.Option(
+        "data/generated/cases.csv", "--cases", help="Input cases CSV"
+    ),
+    days: int = typer.Option(
+        384, "--days", "-d", help="Number of working days to simulate"
+    ),
+    start_date: str = typer.Option(
+        None, "--start", help="Simulation start date (YYYY-MM-DD)"
+    ),
     policy: str = typer.Option(
         "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
     ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
+    log_dir: str = typer.Option(
+        None, "--log-dir", "-o", help="Output directory for logs"
+    ),
 ) -> None:
     """Run court scheduling simulation."""
     console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
                 update={
                     "cases": Path(cases_csv) if cases_csv else scfg.cases,
                     "days": days if days else scfg.days,
+                    "start": (
+                        date_cls.fromisoformat(start_date) if start_date else scfg.start
+                    ),
                     "policy": policy if policy else scfg.policy,
                     "seed": seed if seed else scfg.seed,
                     "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
                 cases_csv = typer.prompt("Cases CSV", default=cases_csv)
                 days = typer.prompt("Days to simulate", default=days)
                 start_date = (
+                    typer.prompt(
+                        "Start date (YYYY-MM-DD) or blank", default=start_date or ""
+                    )
                     or None
                 )
                 policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
                 seed = typer.prompt("Random seed", default=seed)
+                log_dir = (
+                    typer.prompt("Log dir (or blank)", default=log_dir or "") or None
+                )
             scfg = SimulateConfig(
                 cases=Path(cases_csv),
                 days=days,
         path = scfg.cases
         if path.exists():
             cases = CaseGenerator.from_csv(path)
+            start = scfg.start or (
+                max(c.filed_date for c in cases) if cases else date_cls.today()
+            )
         else:
+            console.print(
+                f"[yellow]Warning:[/yellow] {path} not found. Generating test cases..."
+            )
             start = scfg.start or date_cls.today().replace(day=1)
             gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
             cases = gen.generate(n_cases=5 * 151)
         gini_disp = gini(disp_times) if disp_times else 0.0
         console.print("\n[bold]Disposal Metrics:[/bold]")
+        console.print(
+            f"  Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})"
+        )
         console.print(f"  Gini coefficient: {gini_disp:.3f}")
         console.print("\n[bold]Efficiency:[/bold]")
         raise typer.Exit(code=1)
 @app.command()
 def workflow(
+    n_cases: int = typer.Option(
+        10000, "--cases", "-n", help="Number of cases to generate"
+    ),
     sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
+    output_dir: str = typer.Option(
+        "data/workflow_run", "--output", "-o", help="Output directory"
+    ),
     seed: int = typer.Option(42, "--seed", help="Random seed"),
 ) -> None:
     """Run full workflow: EDA -> Generate -> Simulate -> Report."""
         app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
         if not app_path.exists():
+            console.print(
+                f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}"
+            )
             raise typer.Exit(code=1)
         # Run streamlit

configs/simulate.sample.toml CHANGED Viewed

@@ -4,7 +4,7 @@ days = 384
 # start = "2024-01-01"         # optional; if omitted, uses max filed_date in cases
 policy = "readiness"            # readiness|fifo|age
 seed = 42
-# duration_percentile = "median" # median|p90
-# courtrooms = 5                 # optional; uses engine default if omitted
-# daily_capacity = 151           # optional; uses engine default if omitted
 # log_dir = "data/sim_runs/example"

 # start = "2024-01-01"         # optional; if omitted, uses max filed_date in cases
 policy = "readiness"            # readiness|fifo|age
 seed = 42
+duration_percentile = "median" # median|p90
+courtrooms = 5                 # optional; uses engine default if omitted
+daily_capacity = 151           # optional; uses engine default if omitted
 # log_dir = "data/sim_runs/example"

eda/load_clean.py CHANGED Viewed

@@ -60,6 +60,7 @@ def _null_summary(df: pl.DataFrame, name: str) -> None:
 def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     try:
         import duckdb
         if DUCKDB_FILE.exists():
             print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
             conn = duckdb.connect(str(DUCKDB_FILE))
@@ -72,6 +73,8 @@ def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     except Exception as e:
         print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
     print("Loading raw data from CSVs (fallback)...")
     cases = pl.read_csv(
         CASES_FILE,
         try_parse_dates=True,
@@ -95,7 +98,9 @@ def clean_and_augment(
     # Standardise date columns if needed
     for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
         if col in cases.columns and cases[col].dtype == pl.Utf8:
-            cases = cases.with_columns(pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False))
     # Deduplicate on keys
     if "CNR_NUMBER" in cases.columns:
@@ -158,7 +163,10 @@ def clean_and_augment(
             hearings.filter(pl.col("BusinessOnDate").is_not_null())
             .sort(["CNR_NUMBER", "BusinessOnDate"])
             .with_columns(
-                ((pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1)) / timedelta(days=1))
                 .over("CNR_NUMBER")
                 .alias("HEARING_GAP_DAYS")
             )
@@ -175,7 +183,14 @@ def clean_and_augment(
         )
         cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
     else:
-        for col in ["GAP_MEAN", "GAP_MEDIAN", "GAP_P25", "GAP_P75", "GAP_STD", "N_GAPS"]:
             cases = cases.with_columns(pl.lit(None).alias(col))
     # Fill some basics

 def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
     try:
         import duckdb
         if DUCKDB_FILE.exists():
             print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
             conn = duckdb.connect(str(DUCKDB_FILE))
     except Exception as e:
         print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
     print("Loading raw data from CSVs (fallback)...")
+    if not CASES_FILE.exists() or not HEAR_FILE.exists():
+        raise FileNotFoundError("One or both CSV files are missing.")
     cases = pl.read_csv(
         CASES_FILE,
         try_parse_dates=True,
     # Standardise date columns if needed
     for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
         if col in cases.columns and cases[col].dtype == pl.Utf8:
+            cases = cases.with_columns(
+                pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False)
+            )
     # Deduplicate on keys
     if "CNR_NUMBER" in cases.columns:
             hearings.filter(pl.col("BusinessOnDate").is_not_null())
             .sort(["CNR_NUMBER", "BusinessOnDate"])
             .with_columns(
+                (
+                    (pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1))
+                    / timedelta(days=1)
+                )
                 .over("CNR_NUMBER")
                 .alias("HEARING_GAP_DAYS")
             )
         )
         cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
     else:
+        for col in [
+            "GAP_MEAN",
+            "GAP_MEDIAN",
+            "GAP_P25",
+            "GAP_P75",
+            "GAP_STD",
+            "N_GAPS",
+        ]:
             cases = cases.with_columns(pl.lit(None).alias(col))
     # Fill some basics

scheduler/dashboard/pages/3_Simulation_Workflow.py CHANGED Viewed

@@ -9,7 +9,6 @@ Multi-step workflow:
 from __future__ import annotations
-import subprocess
 from datetime import date, datetime
 from pathlib import Path
@@ -107,11 +106,15 @@ if st.session_state.workflow_step == 1:
             )
             start_date = st.date_input(
-                "Filing period start", value=date(2022, 1, 1), help="Start date for case filings"
             )
             end_date = st.date_input(
-                "Filing period end", value=date(2023, 12, 31), help="End date for case filings"
             )
         with col2:
@@ -124,7 +127,9 @@ if st.session_state.workflow_step == 1:
             )
             output_dir = st.text_input(
-                "Output directory", value="data/generated", help="Directory to save generated cases"
             )
             st.info(f"Cases will be saved to: {output_dir}/cases.csv")
@@ -142,13 +147,21 @@ if st.session_state.workflow_step == 1:
                 col_a, col_b, col_c = st.columns(3)
                 with col_a:
-                    rsa_pct = st.number_input("RSA %", 0, 100, 20, help="Regular Second Appeal")
-                    rfa_pct = st.number_input("RFA %", 0, 100, 17, help="Regular First Appeal")
-                    crp_pct = st.number_input("CRP %", 0, 100, 20, help="Civil Revision Petition")
                 with col_b:
                     ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
-                    ccc_pct = st.number_input("CCC %", 0, 100, 11, help="Civil Contempt")
                     cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
                 with col_c:
@@ -156,55 +169,92 @@ if st.session_state.workflow_step == 1:
                         "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
                     )
-                    total_pct = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
                     if total_pct != 100:
                         st.error(f"Total: {total_pct}% (must be 100%)")
                     else:
                         st.success(f"Total: {total_pct}%")
             else:
                 st.info("Using default distribution from historical data")
         if st.button("Generate Cases", type="primary", use_container_width=True):
             with st.spinner(f"Generating {n_cases:,} cases..."):
                 try:
-                    # Ensure output directory exists
-                    output_path = Path(output_dir)
-                    output_path.mkdir(parents=True, exist_ok=True)
-                    cases_file = output_path / "cases.csv"
-                    # Run generation via CLI
-                    result = subprocess.run(
-                        [
-                            "uv",
-                            "run",
-                            "court-scheduler",
-                            "generate",
-                            "--cases",
-                            str(n_cases),
-                            "--start",
-                            start_date.isoformat(),
-                            "--end",
-                            end_date.isoformat(),
-                            "--output",
-                            str(cases_file),
-                            "--seed",
-                            str(seed),
-                        ],
-                        capture_output=True,
-                        text=True,
-                        cwd=str(Path.cwd()),
                     )
-                    if result.returncode == 0:
-                        st.success(f"Generated {n_cases:,} cases successfully")
-                        st.session_state.cases_ready = True
-                        st.session_state.cases_path = str(cases_file)
-                        st.session_state.workflow_step = 2
-                        st.rerun()
-                    else:
-                        st.error(f"Generation failed with error code {result.returncode}")
-                        with st.expander("Show error details"):
-                            st.code(result.stderr, language="text")
                 except Exception as e:
                     st.error(f"Error generating cases: {e}")
@@ -253,7 +303,9 @@ if st.session_state.workflow_step == 1:
                     cases_file = temp_path / "uploaded_cases.csv"
                     df.to_csv(cases_file, index=False)
-                    if st.button("Use This Dataset", type="primary", use_container_width=True):
                         st.session_state.cases_ready = True
                         st.session_state.cases_path = str(cases_file)
                         st.session_state.workflow_step = 2
@@ -305,7 +357,11 @@ elif st.session_state.workflow_step == 2:
         )
         seed_sim = st.number_input(
-            "Random seed", min_value=0, max_value=9999, value=42, help="Seed for reproducibility"
         )
         log_dir = st.text_input(
@@ -394,7 +450,9 @@ elif st.session_state.workflow_step == 2:
             st.rerun()
     with col2:
-        if st.button("Next: Run Simulation ->", type="primary", use_container_width=True):
             st.session_state.workflow_step = 3
             st.rerun()
@@ -425,98 +483,71 @@ elif st.session_state.workflow_step == 3:
     if st.button("Start Simulation", type="primary", use_container_width=True):
         with st.spinner("Running simulation... This may take several minutes."):
             try:
-                # Create a unique per-run directory under the selected base output folder
-                ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-                base_out_dir = (
-                    Path(config["log_dir"])
-                    if config.get("log_dir")
-                    else Path("outputs") / "simulation_runs"
                 )
                 run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
                 run_dir.mkdir(parents=True, exist_ok=True)
-                # Persist effective run directory
                 st.session_state.sim_config["log_dir"] = str(run_dir)
-                # Build command
-                cmd = [
-                    "uv",
-                    "run",
-                    "court-scheduler",
-                    "simulate",
-                    "--cases",
-                    config["cases"],
-                    "--days",
-                    str(config["days"]),
-                    "--policy",
-                    config["policy"],
-                    "--seed",
-                    str(config["seed"]),
-                ]
-                if config.get("start"):
-                    cmd.extend(["--start", config["start"]])
-                # Always pass the per-run output directory
-                cmd.extend(["--log-dir", str(run_dir)])
-                # Run simulation
-                result = subprocess.run(
-                    cmd,
-                    capture_output=True,
-                    text=True,
-                    cwd=str(Path.cwd()),
-                )
-                if result.returncode == 0:
-                    st.success("Simulation completed successfully")
-                    # Parse output to extract results
-                    st.session_state.sim_results = {
-                        "success": True,
-                        "output": result.stdout,
-                        "log_dir": str(run_dir),
-                        "completed_at": datetime.now().isoformat(),
-                    }
-                    # Auto-generate Daily Cause Lists from events.csv
-                    try:
-                        log_dir_path = (
-                            Path(st.session_state.sim_results["log_dir"])
-                            if st.session_state.sim_results.get("log_dir")
-                            else run_dir
-                        )
-                        events_path = log_dir_path / "events.csv"
-                        if events_path.exists():
-                            generator = CauseListGenerator(events_path)
-                            # Save directly in the run directory (no subfolder)
-                            compiled_path = generator.generate_daily_lists(log_dir_path)
-                            summary_path = log_dir_path / "daily_summaries.csv"
-                            # Store generated paths for display in Step 4
-                            st.session_state.sim_results["cause_lists"] = {
-                                "compiled": str(compiled_path),
-                                "summary": str(summary_path),
-                            }
-                            st.info(f"Daily cause lists generated in {log_dir_path}")
-                        else:
-                            st.warning(
-                                f"events.csv not found at {events_path}. Skipping cause list generation."
-                            )
-                    except Exception as gen_err:
-                        st.warning(f"Failed to generate daily cause lists: {gen_err}")
-                    st.session_state.workflow_step = 4
-                    st.rerun()
-                else:
-                    st.error(f"Simulation failed with error code {result.returncode}")
-                    with st.expander("Show error details"):
-                        st.code(result.stderr, language="text")
-                    st.session_state.sim_results = {
-                        "success": False,
-                        "error": result.stderr,
                     }
             except Exception as e:
                 st.error(f"Error running simulation: {e}")
                 st.session_state.sim_results = {
@@ -565,7 +596,9 @@ elif st.session_state.workflow_step == 4:
                 for file in files:
                     col1, col2 = st.columns([3, 1])
                     with col1:
-                        st.markdown(f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)")
                     with col2:
                         if file.suffix in [".csv", ".txt"]:
                             with open(file, "rb") as f:
@@ -573,7 +606,9 @@ elif st.session_state.workflow_step == 4:
                                     label="Download",
                                     data=f.read(),
                                     file_name=file.name,
-                                    mime="text/csv" if file.suffix == ".csv" else "text/plain",
                                     key=f"download_{file.name}",
                                 )
@@ -594,7 +629,10 @@ elif st.session_state.workflow_step == 4:
                                     x=metrics_df.index,
                                     y="disposal_rate",
                                     title="Disposal Rate Over Time",
-                                    labels={"x": "Day", "disposal_rate": "Disposal Rate"},
                                 )
                                 st.plotly_chart(fig, use_container_width=True)
@@ -611,7 +649,9 @@ elif st.session_state.workflow_step == 4:
                             # Show summary statistics
                             st.markdown("### Summary Statistics")
-                            st.dataframe(metrics_df.describe(), use_container_width=True)
                     except Exception as e:
                         st.warning(f"Could not load metrics: {e}")
@@ -660,7 +700,9 @@ elif st.session_state.workflow_step == 4:
                 else None
             )
             if events_csv and events_csv.exists():
-                if st.button("Generate Daily Cause Lists Now", use_container_width=False):
                     try:
                         # Save directly alongside events.csv (run directory root)
                         out_dir = events_csv.parent

 from __future__ import annotations
 from datetime import date, datetime
 from pathlib import Path
             )
             start_date = st.date_input(
+                "Filing period start",
+                value=date(2022, 1, 1),
+                help="Start date for case filings",
             )
             end_date = st.date_input(
+                "Filing period end",
+                value=date(2023, 12, 31),
+                help="End date for case filings",
             )
         with col2:
             )
             output_dir = st.text_input(
+                "Output directory",
+                value="data/generated",
+                help="Directory to save generated cases",
             )
             st.info(f"Cases will be saved to: {output_dir}/cases.csv")
                 col_a, col_b, col_c = st.columns(3)
                 with col_a:
+                    rsa_pct = st.number_input(
+                        "RSA %", 0, 100, 20, help="Regular Second Appeal"
+                    )
+                    rfa_pct = st.number_input(
+                        "RFA %", 0, 100, 17, help="Regular First Appeal"
+                    )
+                    crp_pct = st.number_input(
+                        "CRP %", 0, 100, 20, help="Civil Revision Petition"
+                    )
                 with col_b:
                     ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
+                    ccc_pct = st.number_input(
+                        "CCC %", 0, 100, 11, help="Civil Contempt"
+                    )
                     cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
                 with col_c:
                         "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
                     )
+                    total_pct = (
+                        rsa_pct
+                        + rfa_pct
+                        + crp_pct
+                        + ca_pct
+                        + ccc_pct
+                        + cp_pct
+                        + cmp_pct
+                    )
                     if total_pct != 100:
                         st.error(f"Total: {total_pct}% (must be 100%)")
                     else:
                         st.success(f"Total: {total_pct}%")
             else:
                 st.info("Using default distribution from historical data")
+        from scheduler.dashboard.utils.ui_input_parser import (
+            build_case_type_distribution,
+            merge_with_default_config,
+        )
+        case_type_dist_dict = None
+        if use_custom_dist:
+            case_type_dist_dict = build_case_type_distribution(
+                rsa_pct,
+                rfa_pct,
+                crp_pct,
+                ca_pct,
+                ccc_pct,
+                cp_pct,
+                cmp_pct,
+            )
         if st.button("Generate Cases", type="primary", use_container_width=True):
             with st.spinner(f"Generating {n_cases:,} cases..."):
                 try:
+                    from cli.config import load_generate_config
+                    from scheduler.data.case_generator import CaseGenerator
+                    DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml")
+                    config_from_file = None
+                    if DEFAULT_GENERATE_CFG_PATH.exists():
+                        config_from_file = load_generate_config(
+                            DEFAULT_GENERATE_CFG_PATH
+                        )
+                    cfg = merge_with_default_config(
+                        config_from_file,
+                        n_cases=n_cases,
+                        start_date=start_date,
+                        end_date=end_date,
+                        output_dir=output_dir,
+                        seed=seed,
                     )
+                    # Prepare output dir
+                    cfg.output.parent.mkdir(parents=True, exist_ok=True)
+                    case_type_dist_dict = None
+                    if use_custom_dist:
+                        from scheduler.dashboard.utils.ui_input_parser import (
+                            build_case_type_distribution,
+                        )
+                        case_type_dist_dict = build_case_type_distribution(
+                            rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct
+                        )
+                    gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed)
+                    cases = gen.generate(
+                        cfg.n_cases,
+                        stage_mix_auto=True,
+                        case_type_distribution=case_type_dist_dict,
+                    )
+                    # Save files
+                    CaseGenerator.to_csv(cases, cfg.output)
+                    hearings_path = cfg.output.parent / "hearings.csv"
+                    CaseGenerator.to_hearings_csv(cases, hearings_path)
+                    st.success(f"Generated {len(cases):,} cases successfully!")
+                    st.session_state.cases_ready = True
+                    st.session_state.cases_path = str(cfg.output)
+                    st.session_state.workflow_step = 2
+                    st.rerun()
                 except Exception as e:
                     st.error(f"Error generating cases: {e}")
                     cases_file = temp_path / "uploaded_cases.csv"
                     df.to_csv(cases_file, index=False)
+                    if st.button(
+                        "Use This Dataset", type="primary", use_container_width=True
+                    ):
                         st.session_state.cases_ready = True
                         st.session_state.cases_path = str(cases_file)
                         st.session_state.workflow_step = 2
         )
         seed_sim = st.number_input(
+            "Random seed",
+            min_value=0,
+            max_value=9999,
+            value=42,
+            help="Seed for reproducibility",
         )
         log_dir = st.text_input(
             st.rerun()
     with col2:
+        if st.button(
+            "Next: Run Simulation ->", type="primary", use_container_width=True
+        ):
             st.session_state.workflow_step = 3
             st.rerun()
     if st.button("Start Simulation", type="primary", use_container_width=True):
         with st.spinner("Running simulation... This may take several minutes."):
             try:
+                from cli.config import load_simulate_config
+                from scheduler.dashboard.utils.simulation_runner import (
+                    merge_simulation_config,
+                    run_simulation_dashboard,
+                )
+                DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml")
+                if DEFAULT_SIM_CFG_PATH.exists():
+                    default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH)
+                else:
+                    default_cfg = (
+                        load_simulate_config(Path("parameter_sweep.toml"))
+                        if Path("parameter_sweep.toml").exists()
+                        else None
+                    )
+                if default_cfg is None:
+                    st.error("No default simulate config found.")
+                    st.stop()
+                merged_cfg = merge_simulation_config(
+                    default_cfg,
+                    cases_path=config["cases"],
+                    days=config["days"],
+                    start_date=date.fromisoformat(config["start"])
+                    if config.get("start")
+                    else None,
+                    policy=config["policy"],
+                    seed=config["seed"],
+                    log_dir=config["log_dir"],
                 )
+                ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+                base_out_dir = Path(config["log_dir"])
                 run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
                 run_dir.mkdir(parents=True, exist_ok=True)
+                # Update session config
                 st.session_state.sim_config["log_dir"] = str(run_dir)
+                result = run_simulation_dashboard(merged_cfg, run_dir)
+                st.success("Simulation completed successfully!")
+                st.session_state.sim_results = {
+                    "success": True,
+                    "output": result["summary"],
+                    "log_dir": str(run_dir),
+                    "completed_at": datetime.now().isoformat(),
+                }
+                events_path = result["events_path"]
+                if events_path.exists():
+                    generator = CauseListGenerator(events_path)
+                    compiled_path = generator.generate_daily_lists(run_dir)
+                    summary_path = run_dir / "daily_summaries.csv"
+                    st.session_state.sim_results["cause_lists"] = {
+                        "compiled": str(compiled_path),
+                        "summary": str(summary_path),
                     }
+                st.session_state.workflow_step = 4
+                st.rerun()
             except Exception as e:
                 st.error(f"Error running simulation: {e}")
                 st.session_state.sim_results = {
                 for file in files:
                     col1, col2 = st.columns([3, 1])
                     with col1:
+                        st.markdown(
+                            f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)"
+                        )
                     with col2:
                         if file.suffix in [".csv", ".txt"]:
                             with open(file, "rb") as f:
                                     label="Download",
                                     data=f.read(),
                                     file_name=file.name,
+                                    mime="text/csv"
+                                    if file.suffix == ".csv"
+                                    else "text/plain",
                                     key=f"download_{file.name}",
                                 )
                                     x=metrics_df.index,
                                     y="disposal_rate",
                                     title="Disposal Rate Over Time",
+                                    labels={
+                                        "x": "Day",
+                                        "disposal_rate": "Disposal Rate",
+                                    },
                                 )
                                 st.plotly_chart(fig, use_container_width=True)
                             # Show summary statistics
                             st.markdown("### Summary Statistics")
+                            st.dataframe(
+                                metrics_df.describe(), use_container_width=True
+                            )
                     except Exception as e:
                         st.warning(f"Could not load metrics: {e}")
                 else None
             )
             if events_csv and events_csv.exists():
+                if st.button(
+                    "Generate Daily Cause Lists Now", use_container_width=False
+                ):
                     try:
                         # Save directly alongside events.csv (run directory root)
                         out_dir = events_csv.parent

scheduler/dashboard/utils/simulation_runner.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from __future__ import annotations
+from pathlib import Path
+from datetime import date
+from cli.config import SimulateConfig
+from scheduler.data.case_generator import CaseGenerator
+from scheduler.simulation.engine import CourtSim, CourtSimConfig
+from scheduler.core.case import CaseStatus
+from scheduler.metrics.basic import gini
+def merge_simulation_config(
+    default_cfg: SimulateConfig,
+    cases_path: str,
+    days: int,
+    start_date: date | None,
+    policy: str,
+    seed: int,
+    log_dir: str,
+) -> SimulateConfig:
+    """Merge UI inputs with default simulation config."""
+    return SimulateConfig(
+        cases=Path(cases_path) if cases_path else default_cfg.cases,
+        days=days or default_cfg.days,
+        start=start_date or default_cfg.start,
+        policy=policy or default_cfg.policy,
+        seed=seed if seed is not None else default_cfg.seed,
+        log_dir=Path(log_dir) if log_dir else default_cfg.log_dir,
+    )
+def run_simulation_dashboard(scfg: SimulateConfig, run_dir: Path):
+    """
+    Execute simulation based on the provided Streamlit configuration.
+    """
+    # ------------------------------------------------------------------
+    # Load case data
+    # ------------------------------------------------------------------
+    path = scfg.cases
+    if path.exists():
+        cases = CaseGenerator.from_csv(path)
+        start = scfg.start or (
+            max(c.filed_date for c in cases) if cases else date.today()
+        )
+    else:
+        # Fallback (CLI fallback behaviour)
+        start = scfg.start or date.today().replace(day=1)
+        gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
+        cases = gen.generate(n_cases=5 * 151)
+    # ------------------------------------------------------------------
+    # Build CourtSimConfig
+    # ------------------------------------------------------------------
+    cfg = CourtSimConfig(
+        start=start,
+        days=scfg.days,
+        seed=scfg.seed,
+        policy=scfg.policy,
+        duration_percentile=scfg.duration_percentile,
+        log_dir=run_dir,
+    )
+    # ------------------------------------------------------------------
+    # Run simulation
+    # ------------------------------------------------------------------
+    sim = CourtSim(cfg, cases)
+    res = sim.run()
+    # ------------------------------------------------------------------
+    # Collect metrics exactly like CLI
+    # ------------------------------------------------------------------
+    disp_times = [
+        (c.disposal_date - c.filed_date).days
+        for c in cases
+        if c.disposal_date is not None and c.status == CaseStatus.DISPOSED
+    ]
+    gini_disp = gini(disp_times) if disp_times else 0.0
+    summary_text = f"""
+Simulation Complete!
+Horizon: {cfg.start} -> {res.end_date} ({cfg.days} days)
+Hearing Metrics:
+  Total: {res.hearings_total}
+  Heard: {res.hearings_heard} ({res.hearings_heard / max(1, res.hearings_total):.1%})
+  Adjourned: {res.hearings_adjourned} ({res.hearings_adjourned / max(1, res.hearings_total):.1%})
+Disposal Metrics:
+  Disposed: {res.disposals} ({res.disposals / len(cases):.1%})
+  Gini coefficient: {gini_disp:.3f}
+Efficiency:
+  Utilization: {res.utilization:.2%}
+  Avg hearings/day: {res.hearings_total / max(1, cfg.days):.2f}
+"""
+    (run_dir / "report.txt").write_text(summary_text, encoding="utf-8")
+    # -------------------------------------------------------
+    # Locate generated CSVs (if they exist)
+    # -------------------------------------------------------
+    metrics_path = run_dir / "metrics.csv"
+    events_path = run_dir / "events.csv"
+    return {
+        "summary": summary_text,
+        "end_date": res.end_date,
+        "metrics_path": metrics_path if metrics_path.exists() else None,
+        "events_path": events_path if events_path.exists() else None,
+    }

scheduler/dashboard/utils/ui_input_parser.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from pathlib import Path
+from cli.config import GenerateConfig
+def merge_with_default_config(
+    default_cfg: GenerateConfig,
+    n_cases: int,
+    start_date,
+    end_date,
+    output_dir: str,
+    seed: int,
+) -> GenerateConfig:
+    """Merge UI values with the repo's default generate config."""
+    return GenerateConfig(
+        n_cases=n_cases or default_cfg.n_cases,
+        start=start_date or default_cfg.start,
+        end=end_date or default_cfg.end,
+        output=Path(output_dir) / "cases.csv" if output_dir else default_cfg.output,
+        seed=seed if seed is not None else default_cfg.seed,
+    )
+def build_case_type_distribution(
+    rsa_pct: int,
+    rfa_pct: int,
+    crp_pct: int,
+    ca_pct: int,
+    ccc_pct: int,
+    cp_pct: int,
+    cmp_pct: int,
+) -> dict[str, float]:
+    """Convert percentage inputs into a probability distribution."""
+    total = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
+    if total == 0:
+        return {}
+    return {
+        "RSA": rsa_pct / total,
+        "RFA": rfa_pct / total,
+        "CRP": crp_pct / total,
+        "CA": ca_pct / total,
+        "CCC": ccc_pct / total,
+        "CP": cp_pct / total,
+        "CMP": cmp_pct / total,
+    }