RoyAalekh commited on
Commit
f9934a8
·
2 Parent(s): 2f87460 b43adf7

Merge pull request #8 from RoyAalekh/dev/removing_cli_from_dashboard_backend

Browse files
cli/main.py CHANGED
@@ -36,9 +36,15 @@ console = Console(legacy_windows=False)
36
 
37
  @app.command()
38
  def eda(
39
- skip_clean: bool = typer.Option(False, "--skip-clean", help="Skip data loading and cleaning"),
40
- skip_viz: bool = typer.Option(False, "--skip-viz", help="Skip visualization generation"),
41
- skip_params: bool = typer.Option(False, "--skip-params", help="Skip parameter extraction"),
 
 
 
 
 
 
42
  ) -> None:
43
  """Run the EDA pipeline (load, explore, extract parameters)."""
44
  console.print("[bold blue]Running EDA Pipeline[/bold blue]")
@@ -60,7 +66,9 @@ def eda(
60
  console.print("Data loaded and cleaned")
61
 
62
  if not skip_viz:
63
- task = progress.add_task("Step 2/3: Generate visualizations...", total=None)
 
 
64
  run_exploration()
65
  progress.update(task, completed=True)
66
  console.print("Visualizations generated")
@@ -92,8 +100,12 @@ def generate(
92
  interactive: bool = typer.Option(
93
  False, "--interactive", help="Prompt for parameters interactively"
94
  ),
95
- n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
96
- start_date: str = typer.Option("2022-01-01", "--start", help="Start date (YYYY-MM-DD)"),
 
 
 
 
97
  end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
98
  output: str = typer.Option(
99
  "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
@@ -178,7 +190,9 @@ def generate(
178
  user_dist = _parse_case_type_dist(case_type_dist)
179
 
180
  gen = CaseGenerator(start=start, end=end, seed=seed)
181
- cases = gen.generate(n_cases, stage_mix_auto=True, case_type_distribution=user_dist)
 
 
182
  # Write primary cases file
183
  CaseGenerator.to_csv(cases, output_path)
184
  # Also write detailed hearings history alongside, for the dashboard/classifier
@@ -209,14 +223,22 @@ def simulate(
209
  interactive: bool = typer.Option(
210
  False, "--interactive", help="Prompt for parameters interactively"
211
  ),
212
- cases_csv: str = typer.Option("data/generated/cases.csv", "--cases", help="Input cases CSV"),
213
- days: int = typer.Option(384, "--days", "-d", help="Number of working days to simulate"),
214
- start_date: str = typer.Option(None, "--start", help="Simulation start date (YYYY-MM-DD)"),
 
 
 
 
 
 
215
  policy: str = typer.Option(
216
  "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
217
  ),
218
  seed: int = typer.Option(42, "--seed", help="Random seed"),
219
- log_dir: str = typer.Option(None, "--log-dir", "-o", help="Output directory for logs"),
 
 
220
  ) -> None:
221
  """Run court scheduling simulation."""
222
  console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
@@ -238,7 +260,9 @@ def simulate(
238
  update={
239
  "cases": Path(cases_csv) if cases_csv else scfg.cases,
240
  "days": days if days else scfg.days,
241
- "start": (date_cls.fromisoformat(start_date) if start_date else scfg.start),
 
 
242
  "policy": policy if policy else scfg.policy,
243
  "seed": seed if seed else scfg.seed,
244
  "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
@@ -249,12 +273,16 @@ def simulate(
249
  cases_csv = typer.prompt("Cases CSV", default=cases_csv)
250
  days = typer.prompt("Days to simulate", default=days)
251
  start_date = (
252
- typer.prompt("Start date (YYYY-MM-DD) or blank", default=start_date or "")
 
 
253
  or None
254
  )
255
  policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
256
  seed = typer.prompt("Random seed", default=seed)
257
- log_dir = typer.prompt("Log dir (or blank)", default=log_dir or "") or None
 
 
258
  scfg = SimulateConfig(
259
  cases=Path(cases_csv),
260
  days=days,
@@ -268,9 +296,13 @@ def simulate(
268
  path = scfg.cases
269
  if path.exists():
270
  cases = CaseGenerator.from_csv(path)
271
- start = scfg.start or (max(c.filed_date for c in cases) if cases else date_cls.today())
 
 
272
  else:
273
- console.print(f"[yellow]Warning:[/yellow] {path} not found. Generating test cases...")
 
 
274
  start = scfg.start or date_cls.today().replace(day=1)
275
  gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
276
  cases = gen.generate(n_cases=5 * 151)
@@ -315,7 +347,9 @@ def simulate(
315
  gini_disp = gini(disp_times) if disp_times else 0.0
316
 
317
  console.print("\n[bold]Disposal Metrics:[/bold]")
318
- console.print(f" Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})")
 
 
319
  console.print(f" Gini coefficient: {gini_disp:.3f}")
320
 
321
  console.print("\n[bold]Efficiency:[/bold]")
@@ -333,14 +367,15 @@ def simulate(
333
  raise typer.Exit(code=1)
334
 
335
 
336
- # RL training command removed
337
-
338
-
339
  @app.command()
340
  def workflow(
341
- n_cases: int = typer.Option(10000, "--cases", "-n", help="Number of cases to generate"),
 
 
342
  sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
343
- output_dir: str = typer.Option("data/workflow_run", "--output", "-o", help="Output directory"),
 
 
344
  seed: int = typer.Option(42, "--seed", help="Random seed"),
345
  ) -> None:
346
  """Run full workflow: EDA -> Generate -> Simulate -> Report."""
@@ -417,7 +452,9 @@ def dashboard(
417
  app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
418
 
419
  if not app_path.exists():
420
- console.print(f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}")
 
 
421
  raise typer.Exit(code=1)
422
 
423
  # Run streamlit
 
36
 
37
  @app.command()
38
  def eda(
39
+ skip_clean: bool = typer.Option(
40
+ False, "--skip-clean", help="Skip data loading and cleaning"
41
+ ),
42
+ skip_viz: bool = typer.Option(
43
+ False, "--skip-viz", help="Skip visualization generation"
44
+ ),
45
+ skip_params: bool = typer.Option(
46
+ False, "--skip-params", help="Skip parameter extraction"
47
+ ),
48
  ) -> None:
49
  """Run the EDA pipeline (load, explore, extract parameters)."""
50
  console.print("[bold blue]Running EDA Pipeline[/bold blue]")
 
66
  console.print("Data loaded and cleaned")
67
 
68
  if not skip_viz:
69
+ task = progress.add_task(
70
+ "Step 2/3: Generate visualizations...", total=None
71
+ )
72
  run_exploration()
73
  progress.update(task, completed=True)
74
  console.print("Visualizations generated")
 
100
  interactive: bool = typer.Option(
101
  False, "--interactive", help="Prompt for parameters interactively"
102
  ),
103
+ n_cases: int = typer.Option(
104
+ 10000, "--cases", "-n", help="Number of cases to generate"
105
+ ),
106
+ start_date: str = typer.Option(
107
+ "2022-01-01", "--start", help="Start date (YYYY-MM-DD)"
108
+ ),
109
  end_date: str = typer.Option("2023-12-31", "--end", help="End date (YYYY-MM-DD)"),
110
  output: str = typer.Option(
111
  "data/generated/cases.csv", "--output", "-o", help="Output CSV file"
 
190
  user_dist = _parse_case_type_dist(case_type_dist)
191
 
192
  gen = CaseGenerator(start=start, end=end, seed=seed)
193
+ cases = gen.generate(
194
+ n_cases, stage_mix_auto=True, case_type_distribution=user_dist
195
+ )
196
  # Write primary cases file
197
  CaseGenerator.to_csv(cases, output_path)
198
  # Also write detailed hearings history alongside, for the dashboard/classifier
 
223
  interactive: bool = typer.Option(
224
  False, "--interactive", help="Prompt for parameters interactively"
225
  ),
226
+ cases_csv: str = typer.Option(
227
+ "data/generated/cases.csv", "--cases", help="Input cases CSV"
228
+ ),
229
+ days: int = typer.Option(
230
+ 384, "--days", "-d", help="Number of working days to simulate"
231
+ ),
232
+ start_date: str = typer.Option(
233
+ None, "--start", help="Simulation start date (YYYY-MM-DD)"
234
+ ),
235
  policy: str = typer.Option(
236
  "readiness", "--policy", "-p", help="Scheduling policy (fifo/age/readiness)"
237
  ),
238
  seed: int = typer.Option(42, "--seed", help="Random seed"),
239
+ log_dir: str = typer.Option(
240
+ None, "--log-dir", "-o", help="Output directory for logs"
241
+ ),
242
  ) -> None:
243
  """Run court scheduling simulation."""
244
  console.print(f"[bold blue]Running {days}-day simulation[/bold blue]")
 
260
  update={
261
  "cases": Path(cases_csv) if cases_csv else scfg.cases,
262
  "days": days if days else scfg.days,
263
+ "start": (
264
+ date_cls.fromisoformat(start_date) if start_date else scfg.start
265
+ ),
266
  "policy": policy if policy else scfg.policy,
267
  "seed": seed if seed else scfg.seed,
268
  "log_dir": (Path(log_dir) if log_dir else scfg.log_dir),
 
273
  cases_csv = typer.prompt("Cases CSV", default=cases_csv)
274
  days = typer.prompt("Days to simulate", default=days)
275
  start_date = (
276
+ typer.prompt(
277
+ "Start date (YYYY-MM-DD) or blank", default=start_date or ""
278
+ )
279
  or None
280
  )
281
  policy = typer.prompt("Policy [readiness|fifo|age]", default=policy)
282
  seed = typer.prompt("Random seed", default=seed)
283
+ log_dir = (
284
+ typer.prompt("Log dir (or blank)", default=log_dir or "") or None
285
+ )
286
  scfg = SimulateConfig(
287
  cases=Path(cases_csv),
288
  days=days,
 
296
  path = scfg.cases
297
  if path.exists():
298
  cases = CaseGenerator.from_csv(path)
299
+ start = scfg.start or (
300
+ max(c.filed_date for c in cases) if cases else date_cls.today()
301
+ )
302
  else:
303
+ console.print(
304
+ f"[yellow]Warning:[/yellow] {path} not found. Generating test cases..."
305
+ )
306
  start = scfg.start or date_cls.today().replace(day=1)
307
  gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
308
  cases = gen.generate(n_cases=5 * 151)
 
347
  gini_disp = gini(disp_times) if disp_times else 0.0
348
 
349
  console.print("\n[bold]Disposal Metrics:[/bold]")
350
+ console.print(
351
+ f" Cases disposed: {res.disposals:,} ({res.disposals / len(cases):.1%})"
352
+ )
353
  console.print(f" Gini coefficient: {gini_disp:.3f}")
354
 
355
  console.print("\n[bold]Efficiency:[/bold]")
 
367
  raise typer.Exit(code=1)
368
 
369
 
 
 
 
370
  @app.command()
371
  def workflow(
372
+ n_cases: int = typer.Option(
373
+ 10000, "--cases", "-n", help="Number of cases to generate"
374
+ ),
375
  sim_days: int = typer.Option(384, "--days", "-d", help="Simulation days"),
376
+ output_dir: str = typer.Option(
377
+ "data/workflow_run", "--output", "-o", help="Output directory"
378
+ ),
379
  seed: int = typer.Option(42, "--seed", help="Random seed"),
380
  ) -> None:
381
  """Run full workflow: EDA -> Generate -> Simulate -> Report."""
 
452
  app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
453
 
454
  if not app_path.exists():
455
+ console.print(
456
+ f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}"
457
+ )
458
  raise typer.Exit(code=1)
459
 
460
  # Run streamlit
configs/simulate.sample.toml CHANGED
@@ -4,7 +4,7 @@ days = 384
4
  # start = "2024-01-01" # optional; if omitted, uses max filed_date in cases
5
  policy = "readiness" # readiness|fifo|age
6
  seed = 42
7
- # duration_percentile = "median" # median|p90
8
- # courtrooms = 5 # optional; uses engine default if omitted
9
- # daily_capacity = 151 # optional; uses engine default if omitted
10
  # log_dir = "data/sim_runs/example"
 
4
  # start = "2024-01-01" # optional; if omitted, uses max filed_date in cases
5
  policy = "readiness" # readiness|fifo|age
6
  seed = 42
7
+ duration_percentile = "median" # median|p90
8
+ courtrooms = 5 # optional; uses engine default if omitted
9
+ daily_capacity = 151 # optional; uses engine default if omitted
10
  # log_dir = "data/sim_runs/example"
eda/load_clean.py CHANGED
@@ -60,6 +60,7 @@ def _null_summary(df: pl.DataFrame, name: str) -> None:
60
  def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
61
  try:
62
  import duckdb
 
63
  if DUCKDB_FILE.exists():
64
  print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
65
  conn = duckdb.connect(str(DUCKDB_FILE))
@@ -72,6 +73,8 @@ def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
72
  except Exception as e:
73
  print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
74
  print("Loading raw data from CSVs (fallback)...")
 
 
75
  cases = pl.read_csv(
76
  CASES_FILE,
77
  try_parse_dates=True,
@@ -95,7 +98,9 @@ def clean_and_augment(
95
  # Standardise date columns if needed
96
  for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
97
  if col in cases.columns and cases[col].dtype == pl.Utf8:
98
- cases = cases.with_columns(pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False))
 
 
99
 
100
  # Deduplicate on keys
101
  if "CNR_NUMBER" in cases.columns:
@@ -158,7 +163,10 @@ def clean_and_augment(
158
  hearings.filter(pl.col("BusinessOnDate").is_not_null())
159
  .sort(["CNR_NUMBER", "BusinessOnDate"])
160
  .with_columns(
161
- ((pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1)) / timedelta(days=1))
 
 
 
162
  .over("CNR_NUMBER")
163
  .alias("HEARING_GAP_DAYS")
164
  )
@@ -175,7 +183,14 @@ def clean_and_augment(
175
  )
176
  cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
177
  else:
178
- for col in ["GAP_MEAN", "GAP_MEDIAN", "GAP_P25", "GAP_P75", "GAP_STD", "N_GAPS"]:
 
 
 
 
 
 
 
179
  cases = cases.with_columns(pl.lit(None).alias(col))
180
 
181
  # Fill some basics
 
60
  def load_raw() -> tuple[pl.DataFrame, pl.DataFrame]:
61
  try:
62
  import duckdb
63
+
64
  if DUCKDB_FILE.exists():
65
  print(f"Loading raw data from DuckDB: {DUCKDB_FILE}")
66
  conn = duckdb.connect(str(DUCKDB_FILE))
 
73
  except Exception as e:
74
  print(f"[WARN] DuckDB load failed ({e}), falling back to CSV...")
75
  print("Loading raw data from CSVs (fallback)...")
76
+ if not CASES_FILE.exists() or not HEAR_FILE.exists():
77
+ raise FileNotFoundError("One or both CSV files are missing.")
78
  cases = pl.read_csv(
79
  CASES_FILE,
80
  try_parse_dates=True,
 
98
  # Standardise date columns if needed
99
  for col in ["DATE_FILED", "DECISION_DATE", "REGISTRATION_DATE", "LAST_SYNC_TIME"]:
100
  if col in cases.columns and cases[col].dtype == pl.Utf8:
101
+ cases = cases.with_columns(
102
+ pl.col(col).str.strptime(pl.Date, "%d-%m-%Y", strict=False)
103
+ )
104
 
105
  # Deduplicate on keys
106
  if "CNR_NUMBER" in cases.columns:
 
163
  hearings.filter(pl.col("BusinessOnDate").is_not_null())
164
  .sort(["CNR_NUMBER", "BusinessOnDate"])
165
  .with_columns(
166
+ (
167
+ (pl.col("BusinessOnDate") - pl.col("BusinessOnDate").shift(1))
168
+ / timedelta(days=1)
169
+ )
170
  .over("CNR_NUMBER")
171
  .alias("HEARING_GAP_DAYS")
172
  )
 
183
  )
184
  cases = cases.join(gap_stats, on="CNR_NUMBER", how="left")
185
  else:
186
+ for col in [
187
+ "GAP_MEAN",
188
+ "GAP_MEDIAN",
189
+ "GAP_P25",
190
+ "GAP_P75",
191
+ "GAP_STD",
192
+ "N_GAPS",
193
+ ]:
194
  cases = cases.with_columns(pl.lit(None).alias(col))
195
 
196
  # Fill some basics
scheduler/dashboard/app.py CHANGED
@@ -1,7 +1,7 @@
1
  """Main dashboard application for Court Scheduling System.
2
 
3
  This is the entry point for the Streamlit multi-page dashboard.
4
- Launch with: uv run court-scheduler dashboard
5
  """
6
 
7
  from __future__ import annotations
@@ -12,8 +12,6 @@ ROOT = Path("/app") # absolute, unambiguous
12
  if str(ROOT) not in sys.path:
13
  sys.path.insert(0, str(ROOT))
14
 
15
- import subprocess
16
- from pathlib import Path
17
 
18
  import streamlit as st
19
 
@@ -27,29 +25,17 @@ st.set_page_config(
27
  initial_sidebar_state="expanded",
28
  )
29
 
30
- # Enforce `uv` availability for all dashboard-triggered commands
31
- try:
32
- uv_check = subprocess.run(["uv", "--version"], capture_output=True, text=True)
33
- if uv_check.returncode != 0:
34
- raise RuntimeError(uv_check.stderr or "uv not available")
35
- except Exception:
36
- import streamlit as st
37
-
38
- st.error(
39
- "'uv' is required to run this dashboard's commands. Please install uv and rerun.\n\n"
40
- "Install on macOS/Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`\n"
41
- "Install on Windows (PowerShell): `irm https://astral.sh/uv/install.ps1 | iex`"
42
- )
43
- st.stop()
44
-
45
  # Main page content
46
  st.title("Court Scheduling System Dashboard")
47
- st.markdown("**Karnataka High Court - Algorithmic Decision Support for Fair Scheduling**")
 
 
48
 
49
  st.markdown("---")
50
 
51
  # Introduction
52
- st.markdown("""
 
53
  ### Overview
54
 
55
  This system provides data-driven scheduling recommendations while maintaining judicial control and autonomy.
@@ -63,7 +49,8 @@ This system provides data-driven scheduling recommendations while maintaining ju
63
  - Reinforcement learning optimization
64
 
65
  Use the sidebar to navigate between sections.
66
- """)
 
67
 
68
  # System status
69
  status_header_col1, status_header_col2 = st.columns([3, 1])
@@ -99,47 +86,57 @@ with col3:
99
  st.caption("Run EDA pipeline to generate visualizations")
100
 
101
  # Setup Controls
102
- eda_ready = data_status["cleaned_data"] and data_status["parameters"] and data_status["eda_figures"]
 
 
 
 
103
 
104
  if not eda_ready:
105
  st.markdown("---")
106
  st.markdown("### Initial Setup")
107
- st.warning("Run the EDA pipeline to process historical data and extract parameters.")
 
 
108
 
109
  col1, col2 = st.columns([2, 1])
110
 
111
  with col1:
112
- st.markdown("""
 
113
  The EDA pipeline:
114
  - Loads and cleans historical court case data
115
  - Extracts statistical parameters (distributions, transition probabilities)
116
  - Generates analysis visualizations
117
 
118
  This is required before using other dashboard features.
119
- """)
 
120
 
121
  with col2:
122
  if st.button("Run EDA Pipeline", type="primary", use_container_width=True):
123
- import subprocess
 
 
124
 
125
  with st.spinner("Running EDA pipeline... This may take a few minutes."):
126
  try:
127
- result = subprocess.run(
128
- ["uv", "run", "court-scheduler", "eda"],
129
- capture_output=True,
130
- text=True,
131
- cwd=str(Path.cwd()),
132
- )
133
-
134
- if result.returncode == 0:
135
- st.success("EDA pipeline completed")
136
- st.rerun()
137
- else:
138
- st.error(f"Pipeline failed with error code {result.returncode}")
139
- with st.expander("Show error details"):
140
- st.code(result.stderr, language="text")
141
  except Exception as e:
142
- st.error(f"Error running pipeline: {e}")
 
 
143
 
144
  with st.expander("Run manually via CLI"):
145
  st.code("uv run court-scheduler eda", language="bash")
@@ -154,7 +151,8 @@ st.markdown("### Dashboard Sections")
154
  col1, col2 = st.columns(2)
155
 
156
  with col1:
157
- st.markdown("""
 
158
  #### 1. Data & Insights
159
  Explore historical case data, view analysis visualizations, and review extracted parameters.
160
 
@@ -163,10 +161,12 @@ with col1:
163
 
164
  #### 3. Simulation Workflow
165
  Generate cases, configure simulation parameters, run scheduling simulations, and view results.
166
- """)
 
167
 
168
  with col2:
169
- st.markdown("""
 
170
  #### 4. Cause Lists & Overrides
171
  View generated cause lists, make judge overrides, and track modification history.
172
 
@@ -175,13 +175,15 @@ with col2:
175
 
176
  #### 6. Analytics & Reports
177
  Compare simulation runs, analyze performance metrics, and export comprehensive reports.
178
- """)
 
179
 
180
  st.markdown("---")
181
 
182
  # Typical Workflow
183
  with st.expander("Typical Usage Workflow"):
184
- st.markdown("""
 
185
  **Step 1: Initial Setup**
186
  - Run EDA pipeline to process historical data (one-time setup)
187
 
@@ -208,7 +210,8 @@ with st.expander("Typical Usage Workflow"):
208
  - Use Analytics & Reports to evaluate fairness and efficiency
209
  - Compare different scheduling policies
210
  - Identify bottlenecks and improvement opportunities
211
- """)
 
212
 
213
  # Footer
214
  st.markdown("---")
 
1
  """Main dashboard application for Court Scheduling System.
2
 
3
  This is the entry point for the Streamlit multi-page dashboard.
4
+ Launch with: uv run court-scheduler dashboard (or `streamlit run` directly)
5
  """
6
 
7
  from __future__ import annotations
 
12
  if str(ROOT) not in sys.path:
13
  sys.path.insert(0, str(ROOT))
14
 
 
 
15
 
16
  import streamlit as st
17
 
 
25
  initial_sidebar_state="expanded",
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Main page content
29
  st.title("Court Scheduling System Dashboard")
30
+ st.markdown(
31
+ "**Karnataka High Court - Algorithmic Decision Support for Fair Scheduling**"
32
+ )
33
 
34
  st.markdown("---")
35
 
36
  # Introduction
37
+ st.markdown(
38
+ """
39
  ### Overview
40
 
41
  This system provides data-driven scheduling recommendations while maintaining judicial control and autonomy.
 
49
  - Reinforcement learning optimization
50
 
51
  Use the sidebar to navigate between sections.
52
+ """
53
+ )
54
 
55
  # System status
56
  status_header_col1, status_header_col2 = st.columns([3, 1])
 
86
  st.caption("Run EDA pipeline to generate visualizations")
87
 
88
  # Setup Controls
89
+ eda_ready = (
90
+ data_status["cleaned_data"]
91
+ and data_status["parameters"]
92
+ and data_status["eda_figures"]
93
+ )
94
 
95
  if not eda_ready:
96
  st.markdown("---")
97
  st.markdown("### Initial Setup")
98
+ st.warning(
99
+ "Run the EDA pipeline to process historical data and extract parameters."
100
+ )
101
 
102
  col1, col2 = st.columns([2, 1])
103
 
104
  with col1:
105
+ st.markdown(
106
+ """
107
  The EDA pipeline:
108
  - Loads and cleans historical court case data
109
  - Extracts statistical parameters (distributions, transition probabilities)
110
  - Generates analysis visualizations
111
 
112
  This is required before using other dashboard features.
113
+ """
114
+ )
115
 
116
  with col2:
117
  if st.button("Run EDA Pipeline", type="primary", use_container_width=True):
118
+ from eda.load_clean import run_load_and_clean
119
+ from eda.exploration import run_exploration
120
+ from eda.parameters import run_parameter_export
121
 
122
  with st.spinner("Running EDA pipeline... This may take a few minutes."):
123
  try:
124
+ # Step 1: Load & clean data
125
+ run_load_and_clean()
126
+
127
+ # Step 2: Generate visualizations
128
+ run_exploration()
129
+
130
+ # Step 3: Extract parameters
131
+ run_parameter_export()
132
+
133
+ st.success("EDA pipeline completed")
134
+ st.rerun()
135
+
 
 
136
  except Exception as e:
137
+ st.error("Pipeline failed while running inside the dashboard.")
138
+ with st.expander("Show error details"):
139
+ st.exception(e)
140
 
141
  with st.expander("Run manually via CLI"):
142
  st.code("uv run court-scheduler eda", language="bash")
 
151
  col1, col2 = st.columns(2)
152
 
153
  with col1:
154
+ st.markdown(
155
+ """
156
  #### 1. Data & Insights
157
  Explore historical case data, view analysis visualizations, and review extracted parameters.
158
 
 
161
 
162
  #### 3. Simulation Workflow
163
  Generate cases, configure simulation parameters, run scheduling simulations, and view results.
164
+ """
165
+ )
166
 
167
  with col2:
168
+ st.markdown(
169
+ """
170
  #### 4. Cause Lists & Overrides
171
  View generated cause lists, make judge overrides, and track modification history.
172
 
 
175
 
176
  #### 6. Analytics & Reports
177
  Compare simulation runs, analyze performance metrics, and export comprehensive reports.
178
+ """
179
+ )
180
 
181
  st.markdown("---")
182
 
183
  # Typical Workflow
184
  with st.expander("Typical Usage Workflow"):
185
+ st.markdown(
186
+ """
187
  **Step 1: Initial Setup**
188
  - Run EDA pipeline to process historical data (one-time setup)
189
 
 
210
  - Use Analytics & Reports to evaluate fairness and efficiency
211
  - Compare different scheduling policies
212
  - Identify bottlenecks and improvement opportunities
213
+ """
214
+ )
215
 
216
  # Footer
217
  st.markdown("---")
scheduler/dashboard/pages/1_Data_And_Insights.py CHANGED
@@ -70,7 +70,9 @@ def load_dashboard_data():
70
 
71
  with st.spinner("Loading data..."):
72
  try:
73
- cases_df, hearings_df, params, stats, total_cases, total_hearings = load_dashboard_data()
 
 
74
  except Exception as e:
75
  st.error(f"Error loading data: {e}")
76
  st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
@@ -96,28 +98,25 @@ if cases_df.empty and hearings_df.empty:
96
 
97
  with col1:
98
  if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
99
- import subprocess
 
 
100
 
101
  with st.spinner("Running EDA pipeline... This will take a few minutes."):
102
  try:
103
- result = subprocess.run(
104
- ["uv", "run", "court-scheduler", "eda"],
105
- capture_output=True,
106
- text=True,
107
- cwd=str(Path.cwd()),
108
- )
109
-
110
- if result.returncode == 0:
111
- st.success("EDA pipeline completed successfully!")
112
- st.info("Reload this page to see the data.")
113
- if st.button("Reload Page"):
114
- st.rerun()
115
- else:
116
- st.error(f"Pipeline failed with error code {result.returncode}")
117
- with st.expander("Error details"):
118
- st.code(result.stderr, language="text")
119
  except Exception as e:
120
- st.error(f"Error: {e}")
 
121
 
122
  with col2:
123
  with st.expander("Alternative: Run via CLI"):
@@ -133,7 +132,9 @@ col1, col2, col3, col4, col5 = st.columns(5)
133
  with col1:
134
  st.metric("Total Cases", f"{total_cases:,}")
135
  if "YEAR_FILED" in cases_df.columns:
136
- year_range = f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
 
 
137
  st.caption(f"Years: {year_range}")
138
 
139
  with col2:
@@ -176,7 +177,9 @@ with col5:
176
  st.markdown("---")
177
 
178
  # Main tabs
179
- tab1, tab2, tab3 = st.tabs(["Historical Analysis", "Interactive Exploration", "Parameters"])
 
 
180
 
181
  # TAB 1: Historical Analysis - Pre-generated figures
182
  with tab1:
@@ -188,11 +191,15 @@ with tab1:
188
  figures_dir = Path("reports/figures")
189
 
190
  if not figures_dir.exists():
191
- st.warning("EDA figures not found. Run the EDA pipeline to generate visualizations.")
 
 
192
  st.code("uv run court-scheduler eda")
193
  else:
194
  # Find latest versioned directory
195
- version_dirs = [d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")]
 
 
196
 
197
  if not version_dirs:
198
  st.warning(
@@ -207,7 +214,9 @@ with tab1:
207
  # List available figures from the versioned directory
208
  # Exclude deprecated/removed visuals like the monthly waterfall
209
  figure_files = [
210
- f for f in sorted(latest_dir.glob("*.html")) if "waterfall" not in f.name.lower()
 
 
211
  ]
212
 
213
  if not figure_files:
@@ -227,10 +236,14 @@ with tab1:
227
  if any(x in f.name for x in ["stage", "sankey", "transition"])
228
  ]
229
  time_figs = [
230
- f for f in figure_files if any(x in f.name for x in ["monthly", "load", "gap"])
 
 
231
  ]
232
  other_figs = [
233
- f for f in figure_files if f not in distribution_figs + stage_figs + time_figs
 
 
234
  ]
235
 
236
  # Category 1: Case Distributions
@@ -325,7 +338,9 @@ with tab2:
325
  selected_stages = st.sidebar.multiselect(
326
  "Stages",
327
  options=available_stages,
328
- default=available_stages[:10] if len(available_stages) > 10 else available_stages,
 
 
329
  key="stage_filter",
330
  )
331
  else:
@@ -334,12 +349,16 @@ with tab2:
334
 
335
  # Apply filters with copy to ensure clean dataframes
336
  if selected_case_types and case_type_col:
337
- filtered_cases = cases_df[cases_df[case_type_col].isin(selected_case_types)].copy()
 
 
338
  else:
339
  filtered_cases = cases_df.copy()
340
 
341
  if selected_stages and stage_col:
342
- filtered_hearings = hearings_df[hearings_df[stage_col].isin(selected_stages)].copy()
 
 
343
  else:
344
  filtered_hearings = hearings_df.copy()
345
 
@@ -370,9 +389,9 @@ with tab2:
370
 
371
  with col4:
372
  if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
373
- adj_rate_filtered = (filtered_hearings["Outcome"] == "ADJOURNED").sum() / len(
374
- filtered_hearings
375
- )
376
  st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
377
  else:
378
  st.metric("Adjournment Rate", "N/A")
@@ -387,9 +406,15 @@ with tab2:
387
  with sub_tab1:
388
  st.markdown("#### Case Distribution by Type")
389
 
390
- if case_type_col and case_type_col in filtered_cases.columns and len(filtered_cases) > 0:
 
 
 
 
391
  # Compute value counts and ensure proper structure
392
- case_type_counts = filtered_cases[case_type_col].value_counts().reset_index()
 
 
393
  # Rename columns for clarity (works across pandas versions)
394
  case_type_counts.columns = ["CaseType", "Count"]
395
 
@@ -428,7 +453,11 @@ with tab2:
428
  with sub_tab2:
429
  st.markdown("#### Stage Analysis")
430
 
431
- if stage_col and stage_col in filtered_hearings.columns and len(filtered_hearings) > 0:
 
 
 
 
432
  stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
433
  stage_counts.columns = ["Stage", "Count"]
434
 
@@ -465,7 +494,10 @@ with tab2:
465
  not_adjourned = total_hearings - adjourned
466
 
467
  outcome_df = pd.DataFrame(
468
- {"Outcome": ["ADJOURNED", "NOT ADJOURNED"], "Count": [adjourned, not_adjourned]}
 
 
 
469
  )
470
 
471
  fig_pie = px.pie(
@@ -474,7 +506,10 @@ with tab2:
474
  names="Outcome",
475
  title=f"Outcome Distribution (Total: {total_hearings:,})",
476
  color="Outcome",
477
- color_discrete_map={"ADJOURNED": "#ef4444", "NOT ADJOURNED": "#22c55e"},
 
 
 
478
  )
479
  fig_pie.update_layout(height=400)
480
  st.plotly_chart(fig_pie, use_container_width=True)
@@ -483,7 +518,9 @@ with tab2:
483
  st.markdown("**By Stage**")
484
  adj_by_stage = (
485
  filtered_hearings.groupby(stage_col)["Outcome"]
486
- .apply(lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0)
 
 
487
  .reset_index()
488
  )
489
  adj_by_stage.columns = ["Stage", "Rate"]
@@ -507,7 +544,9 @@ with tab2:
507
  with sub_tab4:
508
  st.markdown("#### Raw Data")
509
 
510
- data_view = st.radio("Select data to view:", ["Cases", "Hearings"], horizontal=True)
 
 
511
 
512
  if data_view == "Cases":
513
  st.dataframe(
@@ -516,7 +555,9 @@ with tab2:
516
  height=600,
517
  )
518
 
519
- st.markdown(f"**Showing first 500 of {len(filtered_cases):,} filtered cases**")
 
 
520
 
521
  # Download button
522
  csv = filtered_cases.to_csv(index=False).encode("utf-8")
@@ -533,7 +574,9 @@ with tab2:
533
  height=600,
534
  )
535
 
536
- st.markdown(f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**")
 
 
537
 
538
  # Download button
539
  csv = filtered_hearings.to_csv(index=False).encode("utf-8")
@@ -559,7 +602,10 @@ with tab3:
559
  st.markdown("#### Case Types")
560
  if "case_types" in params and params["case_types"]:
561
  case_types_df = pd.DataFrame(
562
- {"Case Type": params["case_types"], "Index": range(len(params["case_types"]))}
 
 
 
563
  )
564
  st.dataframe(case_types_df, use_container_width=True, hide_index=True)
565
  st.caption(f"Total: {len(params['case_types'])} case types")
@@ -594,9 +640,13 @@ with tab3:
594
  with st.expander(f"From: {stage}"):
595
  trans_df = pd.DataFrame(transitions)
596
  if not trans_df.empty:
597
- st.dataframe(trans_df, use_container_width=True, hide_index=True)
 
 
598
 
599
- st.caption(f"Total: {len(params['stage_graph'])} stages with transition data")
 
 
600
  else:
601
  st.info("No stage transition data found")
602
 
@@ -609,8 +659,12 @@ with tab3:
609
 
610
  # Create heatmap
611
  adj_stats = params["adjournment_stats"]
612
- stages_list = list(adj_stats.keys())[:20] # Limit to 20 stages for readability
613
- case_types_list = params.get("case_types", [])[:15] # Limit to 15 case types
 
 
 
 
614
 
615
  if stages_list and case_types_list:
616
  heatmap_data = []
@@ -656,7 +710,12 @@ with tab3:
656
  """)
657
 
658
  config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
659
- ["EDA Parameters", "Ripeness Classifier", "Case Generator", "Simulation Defaults"]
 
 
 
 
 
660
  )
661
 
662
  with config_tab1:
@@ -857,7 +916,10 @@ UNRIPE cases: 0.7x priority
857
  from scheduler.data.config import MONTHLY_SEASONALITY
858
 
859
  season_df = pd.DataFrame(
860
- [{"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)} for i in range(1, 13)]
 
 
 
861
  )
862
  st.dataframe(season_df, use_container_width=True, hide_index=True)
863
  st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")
@@ -900,7 +962,9 @@ Ripe purposes (80% probability):
900
  """,
901
  language="text",
902
  )
903
- st.caption("Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe")
 
 
904
 
905
  with config_tab4:
906
  st.markdown("#### Simulation Defaults")
@@ -930,8 +994,12 @@ Formula:
930
  st.markdown("**Courtroom Capacity**")
931
  if params and "court_capacity_global" in params:
932
  cap = params["court_capacity_global"]
933
- st.metric("Median slots/day", f"{cap.get('slots_median_global', 151):.0f}")
934
- st.metric("P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}")
 
 
 
 
935
  else:
936
  st.info("Run EDA to load capacity statistics")
937
 
 
70
 
71
  with st.spinner("Loading data..."):
72
  try:
73
+ cases_df, hearings_df, params, stats, total_cases, total_hearings = (
74
+ load_dashboard_data()
75
+ )
76
  except Exception as e:
77
  st.error(f"Error loading data: {e}")
78
  st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
 
98
 
99
  with col1:
100
  if st.button("Run EDA Pipeline Now", type="primary", use_container_width=True):
101
+ from eda.load_clean import run_load_and_clean
102
+ from eda.exploration import run_exploration
103
+ from eda.parameters import run_parameter_export
104
 
105
  with st.spinner("Running EDA pipeline... This will take a few minutes."):
106
  try:
107
+ # Step 1: Load & clean data
108
+ run_load_and_clean()
109
+ # Step 2: Generate visualizations
110
+ run_exploration()
111
+ # Step 3: Extract parameters
112
+ run_parameter_export()
113
+ st.success("EDA pipeline completed successfully!")
114
+ st.info("Reload this page to see the updated data.")
115
+ if st.button("Reload Page"):
116
+ st.rerun()
 
 
 
 
 
 
117
  except Exception as e:
118
+ with st.expander("Error details"):
119
+ st.exception(e)
120
 
121
  with col2:
122
  with st.expander("Alternative: Run via CLI"):
 
132
  with col1:
133
  st.metric("Total Cases", f"{total_cases:,}")
134
  if "YEAR_FILED" in cases_df.columns:
135
+ year_range = (
136
+ f"{cases_df['YEAR_FILED'].min():.0f}-{cases_df['YEAR_FILED'].max():.0f}"
137
+ )
138
  st.caption(f"Years: {year_range}")
139
 
140
  with col2:
 
177
  st.markdown("---")
178
 
179
  # Main tabs
180
+ tab1, tab2, tab3 = st.tabs(
181
+ ["Historical Analysis", "Interactive Exploration", "Parameters"]
182
+ )
183
 
184
  # TAB 1: Historical Analysis - Pre-generated figures
185
  with tab1:
 
191
  figures_dir = Path("reports/figures")
192
 
193
  if not figures_dir.exists():
194
+ st.warning(
195
+ "EDA figures not found. Run the EDA pipeline to generate visualizations."
196
+ )
197
  st.code("uv run court-scheduler eda")
198
  else:
199
  # Find latest versioned directory
200
+ version_dirs = [
201
+ d for d in figures_dir.iterdir() if d.is_dir() and d.name.startswith("v")
202
+ ]
203
 
204
  if not version_dirs:
205
  st.warning(
 
214
  # List available figures from the versioned directory
215
  # Exclude deprecated/removed visuals like the monthly waterfall
216
  figure_files = [
217
+ f
218
+ for f in sorted(latest_dir.glob("*.html"))
219
+ if "waterfall" not in f.name.lower()
220
  ]
221
 
222
  if not figure_files:
 
236
  if any(x in f.name for x in ["stage", "sankey", "transition"])
237
  ]
238
  time_figs = [
239
+ f
240
+ for f in figure_files
241
+ if any(x in f.name for x in ["monthly", "load", "gap"])
242
  ]
243
  other_figs = [
244
+ f
245
+ for f in figure_files
246
+ if f not in distribution_figs + stage_figs + time_figs
247
  ]
248
 
249
  # Category 1: Case Distributions
 
338
  selected_stages = st.sidebar.multiselect(
339
  "Stages",
340
  options=available_stages,
341
+ default=available_stages[:10]
342
+ if len(available_stages) > 10
343
+ else available_stages,
344
  key="stage_filter",
345
  )
346
  else:
 
349
 
350
  # Apply filters with copy to ensure clean dataframes
351
  if selected_case_types and case_type_col:
352
+ filtered_cases = cases_df[
353
+ cases_df[case_type_col].isin(selected_case_types)
354
+ ].copy()
355
  else:
356
  filtered_cases = cases_df.copy()
357
 
358
  if selected_stages and stage_col:
359
+ filtered_hearings = hearings_df[
360
+ hearings_df[stage_col].isin(selected_stages)
361
+ ].copy()
362
  else:
363
  filtered_hearings = hearings_df.copy()
364
 
 
389
 
390
  with col4:
391
  if "Outcome" in filtered_hearings.columns and len(filtered_hearings) > 0:
392
+ adj_rate_filtered = (
393
+ filtered_hearings["Outcome"] == "ADJOURNED"
394
+ ).sum() / len(filtered_hearings)
395
  st.metric("Adjournment Rate", f"{adj_rate_filtered:.1%}")
396
  else:
397
  st.metric("Adjournment Rate", "N/A")
 
406
  with sub_tab1:
407
  st.markdown("#### Case Distribution by Type")
408
 
409
+ if (
410
+ case_type_col
411
+ and case_type_col in filtered_cases.columns
412
+ and len(filtered_cases) > 0
413
+ ):
414
  # Compute value counts and ensure proper structure
415
+ case_type_counts = (
416
+ filtered_cases[case_type_col].value_counts().reset_index()
417
+ )
418
  # Rename columns for clarity (works across pandas versions)
419
  case_type_counts.columns = ["CaseType", "Count"]
420
 
 
453
  with sub_tab2:
454
  st.markdown("#### Stage Analysis")
455
 
456
+ if (
457
+ stage_col
458
+ and stage_col in filtered_hearings.columns
459
+ and len(filtered_hearings) > 0
460
+ ):
461
  stage_counts = filtered_hearings[stage_col].value_counts().reset_index()
462
  stage_counts.columns = ["Stage", "Count"]
463
 
 
494
  not_adjourned = total_hearings - adjourned
495
 
496
  outcome_df = pd.DataFrame(
497
+ {
498
+ "Outcome": ["ADJOURNED", "NOT ADJOURNED"],
499
+ "Count": [adjourned, not_adjourned],
500
+ }
501
  )
502
 
503
  fig_pie = px.pie(
 
506
  names="Outcome",
507
  title=f"Outcome Distribution (Total: {total_hearings:,})",
508
  color="Outcome",
509
+ color_discrete_map={
510
+ "ADJOURNED": "#ef4444",
511
+ "NOT ADJOURNED": "#22c55e",
512
+ },
513
  )
514
  fig_pie.update_layout(height=400)
515
  st.plotly_chart(fig_pie, use_container_width=True)
 
518
  st.markdown("**By Stage**")
519
  adj_by_stage = (
520
  filtered_hearings.groupby(stage_col)["Outcome"]
521
+ .apply(
522
+ lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0
523
+ )
524
  .reset_index()
525
  )
526
  adj_by_stage.columns = ["Stage", "Rate"]
 
544
  with sub_tab4:
545
  st.markdown("#### Raw Data")
546
 
547
+ data_view = st.radio(
548
+ "Select data to view:", ["Cases", "Hearings"], horizontal=True
549
+ )
550
 
551
  if data_view == "Cases":
552
  st.dataframe(
 
555
  height=600,
556
  )
557
 
558
+ st.markdown(
559
+ f"**Showing first 500 of {len(filtered_cases):,} filtered cases**"
560
+ )
561
 
562
  # Download button
563
  csv = filtered_cases.to_csv(index=False).encode("utf-8")
 
574
  height=600,
575
  )
576
 
577
+ st.markdown(
578
+ f"**Showing first 500 of {len(filtered_hearings):,} filtered hearings**"
579
+ )
580
 
581
  # Download button
582
  csv = filtered_hearings.to_csv(index=False).encode("utf-8")
 
602
  st.markdown("#### Case Types")
603
  if "case_types" in params and params["case_types"]:
604
  case_types_df = pd.DataFrame(
605
+ {
606
+ "Case Type": params["case_types"],
607
+ "Index": range(len(params["case_types"])),
608
+ }
609
  )
610
  st.dataframe(case_types_df, use_container_width=True, hide_index=True)
611
  st.caption(f"Total: {len(params['case_types'])} case types")
 
640
  with st.expander(f"From: {stage}"):
641
  trans_df = pd.DataFrame(transitions)
642
  if not trans_df.empty:
643
+ st.dataframe(
644
+ trans_df, use_container_width=True, hide_index=True
645
+ )
646
 
647
+ st.caption(
648
+ f"Total: {len(params['stage_graph'])} stages with transition data"
649
+ )
650
  else:
651
  st.info("No stage transition data found")
652
 
 
659
 
660
  # Create heatmap
661
  adj_stats = params["adjournment_stats"]
662
+ stages_list = list(adj_stats.keys())[
663
+ :20
664
+ ] # Limit to 20 stages for readability
665
+ case_types_list = params.get("case_types", [])[
666
+ :15
667
+ ] # Limit to 15 case types
668
 
669
  if stages_list and case_types_list:
670
  heatmap_data = []
 
710
  """)
711
 
712
  config_tab1, config_tab2, config_tab3, config_tab4 = st.tabs(
713
+ [
714
+ "EDA Parameters",
715
+ "Ripeness Classifier",
716
+ "Case Generator",
717
+ "Simulation Defaults",
718
+ ]
719
  )
720
 
721
  with config_tab1:
 
916
  from scheduler.data.config import MONTHLY_SEASONALITY
917
 
918
  season_df = pd.DataFrame(
919
+ [
920
+ {"Month": i, "Factor": MONTHLY_SEASONALITY.get(i, 1.0)}
921
+ for i in range(1, 13)
922
+ ]
923
  )
924
  st.dataframe(season_df, use_container_width=True, hide_index=True)
925
  st.caption("1.0 = average, >1.0 = more cases, <1.0 = fewer cases")
 
962
  """,
963
  language="text",
964
  )
965
+ st.caption(
966
+ "Early ADMISSION: 40% bottleneck, Advanced stages: mostly ripe"
967
+ )
968
 
969
  with config_tab4:
970
  st.markdown("#### Simulation Defaults")
 
994
  st.markdown("**Courtroom Capacity**")
995
  if params and "court_capacity_global" in params:
996
  cap = params["court_capacity_global"]
997
+ st.metric(
998
+ "Median slots/day", f"{cap.get('slots_median_global', 151):.0f}"
999
+ )
1000
+ st.metric(
1001
+ "P90 slots/day", f"{cap.get('slots_p90_global', 200):.0f}"
1002
+ )
1003
  else:
1004
  st.info("Run EDA to load capacity statistics")
1005
 
scheduler/dashboard/pages/2_Ripeness_Classifier.py CHANGED
@@ -99,7 +99,9 @@ RipenessClassifier.set_thresholds(
99
  )
100
 
101
  # Main content
102
- tab1, tab2, tab3 = st.tabs(["Current Configuration", "Interactive Testing", "Batch Classification"])
 
 
103
 
104
  with tab1:
105
  st.markdown("### Current Classifier Configuration")
@@ -153,7 +155,10 @@ with tab1:
153
  stage_rules = {
154
  "PRE-TRIAL": {"min_days": 60, "keywords": ["affidavit filed", "reply filed"]},
155
  "TRIAL": {"min_days": 45, "keywords": ["evidence complete", "cross complete"]},
156
- "POST-TRIAL": {"min_days": 30, "keywords": ["arguments complete", "written note"]},
 
 
 
157
  "FINAL DISPOSAL": {"min_days": 15, "keywords": ["disposed", "judgment"]},
158
  }
159
 
@@ -190,8 +195,12 @@ with tab2:
190
  service_hearings_count = st.number_input(
191
  "Service Hearings", min_value=0, max_value=20, value=3
192
  )
193
- days_in_stage = st.number_input("Days in Stage", min_value=0, max_value=365, value=45)
194
- case_age = st.number_input("Case Age (days)", min_value=0, max_value=3650, value=120)
 
 
 
 
195
 
196
  # Keywords
197
  has_keywords = st.multiselect(
@@ -213,7 +222,7 @@ with tab2:
213
 
214
  test_case = Case(
215
  case_id=case_id,
216
- case_type=case_type, # Use string directly instead of CaseType enum
217
  filed_date=filed_date,
218
  current_stage=case_stage,
219
  status=CaseStatus.PENDING,
@@ -286,15 +295,25 @@ with tab3:
286
 
287
  with col1:
288
  pct = classifications["RIPE"] / len(cases) * 100
289
- st.metric("RIPE Cases", f"{classifications['RIPE']:,}", f"{pct:.1f}%")
 
 
290
 
291
  with col2:
292
  pct = classifications["UNKNOWN"] / len(cases) * 100
293
- st.metric("UNKNOWN Cases", f"{classifications['UNKNOWN']:,}", f"{pct:.1f}%")
 
 
 
 
294
 
295
  with col3:
296
  pct = classifications["UNRIPE"] / len(cases) * 100
297
- st.metric("UNRIPE Cases", f"{classifications['UNRIPE']:,}", f"{pct:.1f}%")
 
 
 
 
298
 
299
  # Pie chart
300
  fig = px.pie(
@@ -302,7 +321,11 @@ with tab3:
302
  names=list(classifications.keys()),
303
  title="Classification Distribution",
304
  color=list(classifications.keys()),
305
- color_discrete_map={"RIPE": "green", "UNKNOWN": "orange", "UNRIPE": "red"},
 
 
 
 
306
  )
307
  st.plotly_chart(fig, use_container_width=True)
308
 
@@ -311,4 +334,6 @@ with tab3:
311
 
312
  # Footer
313
  st.markdown("---")
314
- st.markdown("*Adjust thresholds in the sidebar to see real-time impact on classification*")
 
 
 
99
  )
100
 
101
  # Main content
102
+ tab1, tab2, tab3 = st.tabs(
103
+ ["Current Configuration", "Interactive Testing", "Batch Classification"]
104
+ )
105
 
106
  with tab1:
107
  st.markdown("### Current Classifier Configuration")
 
155
  stage_rules = {
156
  "PRE-TRIAL": {"min_days": 60, "keywords": ["affidavit filed", "reply filed"]},
157
  "TRIAL": {"min_days": 45, "keywords": ["evidence complete", "cross complete"]},
158
+ "POST-TRIAL": {
159
+ "min_days": 30,
160
+ "keywords": ["arguments complete", "written note"],
161
+ },
162
  "FINAL DISPOSAL": {"min_days": 15, "keywords": ["disposed", "judgment"]},
163
  }
164
 
 
195
  service_hearings_count = st.number_input(
196
  "Service Hearings", min_value=0, max_value=20, value=3
197
  )
198
+ days_in_stage = st.number_input(
199
+ "Days in Stage", min_value=0, max_value=365, value=45
200
+ )
201
+ case_age = st.number_input(
202
+ "Case Age (days)", min_value=0, max_value=3650, value=120
203
+ )
204
 
205
  # Keywords
206
  has_keywords = st.multiselect(
 
222
 
223
  test_case = Case(
224
  case_id=case_id,
225
+ case_type=case_type,
226
  filed_date=filed_date,
227
  current_stage=case_stage,
228
  status=CaseStatus.PENDING,
 
295
 
296
  with col1:
297
  pct = classifications["RIPE"] / len(cases) * 100
298
+ st.metric(
299
+ "RIPE Cases", f"{classifications['RIPE']:,}", f"{pct:.1f}%"
300
+ )
301
 
302
  with col2:
303
  pct = classifications["UNKNOWN"] / len(cases) * 100
304
+ st.metric(
305
+ "UNKNOWN Cases",
306
+ f"{classifications['UNKNOWN']:,}",
307
+ f"{pct:.1f}%",
308
+ )
309
 
310
  with col3:
311
  pct = classifications["UNRIPE"] / len(cases) * 100
312
+ st.metric(
313
+ "UNRIPE Cases",
314
+ f"{classifications['UNRIPE']:,}",
315
+ f"{pct:.1f}%",
316
+ )
317
 
318
  # Pie chart
319
  fig = px.pie(
 
321
  names=list(classifications.keys()),
322
  title="Classification Distribution",
323
  color=list(classifications.keys()),
324
+ color_discrete_map={
325
+ "RIPE": "green",
326
+ "UNKNOWN": "orange",
327
+ "UNRIPE": "red",
328
+ },
329
  )
330
  st.plotly_chart(fig, use_container_width=True)
331
 
 
334
 
335
  # Footer
336
  st.markdown("---")
337
+ st.markdown(
338
+ "*Adjust thresholds in the sidebar to see real-time impact on classification*"
339
+ )
scheduler/dashboard/pages/3_Simulation_Workflow.py CHANGED
@@ -9,7 +9,6 @@ Multi-step workflow:
9
 
10
  from __future__ import annotations
11
 
12
- import subprocess
13
  from datetime import date, datetime
14
  from pathlib import Path
15
 
@@ -107,11 +106,15 @@ if st.session_state.workflow_step == 1:
107
  )
108
 
109
  start_date = st.date_input(
110
- "Filing period start", value=date(2022, 1, 1), help="Start date for case filings"
 
 
111
  )
112
 
113
  end_date = st.date_input(
114
- "Filing period end", value=date(2023, 12, 31), help="End date for case filings"
 
 
115
  )
116
 
117
  with col2:
@@ -124,7 +127,9 @@ if st.session_state.workflow_step == 1:
124
  )
125
 
126
  output_dir = st.text_input(
127
- "Output directory", value="data/generated", help="Directory to save generated cases"
 
 
128
  )
129
 
130
  st.info(f"Cases will be saved to: {output_dir}/cases.csv")
@@ -142,13 +147,21 @@ if st.session_state.workflow_step == 1:
142
  col_a, col_b, col_c = st.columns(3)
143
 
144
  with col_a:
145
- rsa_pct = st.number_input("RSA %", 0, 100, 20, help="Regular Second Appeal")
146
- rfa_pct = st.number_input("RFA %", 0, 100, 17, help="Regular First Appeal")
147
- crp_pct = st.number_input("CRP %", 0, 100, 20, help="Civil Revision Petition")
 
 
 
 
 
 
148
 
149
  with col_b:
150
  ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
151
- ccc_pct = st.number_input("CCC %", 0, 100, 11, help="Civil Contempt")
 
 
152
  cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
153
 
154
  with col_c:
@@ -156,55 +169,92 @@ if st.session_state.workflow_step == 1:
156
  "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
157
  )
158
 
159
- total_pct = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
 
 
 
 
 
 
 
 
160
  if total_pct != 100:
161
  st.error(f"Total: {total_pct}% (must be 100%)")
162
  else:
163
  st.success(f"Total: {total_pct}%")
164
  else:
165
  st.info("Using default distribution from historical data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  if st.button("Generate Cases", type="primary", use_container_width=True):
168
  with st.spinner(f"Generating {n_cases:,} cases..."):
169
  try:
170
- # Ensure output directory exists
171
- output_path = Path(output_dir)
172
- output_path.mkdir(parents=True, exist_ok=True)
173
- cases_file = output_path / "cases.csv"
174
-
175
- # Run generation via CLI
176
- result = subprocess.run(
177
- [
178
- "uv",
179
- "run",
180
- "court-scheduler",
181
- "generate",
182
- "--cases",
183
- str(n_cases),
184
- "--start",
185
- start_date.isoformat(),
186
- "--end",
187
- end_date.isoformat(),
188
- "--output",
189
- str(cases_file),
190
- "--seed",
191
- str(seed),
192
- ],
193
- capture_output=True,
194
- text=True,
195
- cwd=str(Path.cwd()),
196
  )
197
 
198
- if result.returncode == 0:
199
- st.success(f"Generated {n_cases:,} cases successfully")
200
- st.session_state.cases_ready = True
201
- st.session_state.cases_path = str(cases_file)
202
- st.session_state.workflow_step = 2
203
- st.rerun()
204
- else:
205
- st.error(f"Generation failed with error code {result.returncode}")
206
- with st.expander("Show error details"):
207
- st.code(result.stderr, language="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  except Exception as e:
209
  st.error(f"Error generating cases: {e}")
210
 
@@ -253,7 +303,9 @@ if st.session_state.workflow_step == 1:
253
  cases_file = temp_path / "uploaded_cases.csv"
254
  df.to_csv(cases_file, index=False)
255
 
256
- if st.button("Use This Dataset", type="primary", use_container_width=True):
 
 
257
  st.session_state.cases_ready = True
258
  st.session_state.cases_path = str(cases_file)
259
  st.session_state.workflow_step = 2
@@ -305,7 +357,11 @@ elif st.session_state.workflow_step == 2:
305
  )
306
 
307
  seed_sim = st.number_input(
308
- "Random seed", min_value=0, max_value=9999, value=42, help="Seed for reproducibility"
 
 
 
 
309
  )
310
 
311
  log_dir = st.text_input(
@@ -394,7 +450,9 @@ elif st.session_state.workflow_step == 2:
394
  st.rerun()
395
 
396
  with col2:
397
- if st.button("Next: Run Simulation ->", type="primary", use_container_width=True):
 
 
398
  st.session_state.workflow_step = 3
399
  st.rerun()
400
 
@@ -425,98 +483,71 @@ elif st.session_state.workflow_step == 3:
425
  if st.button("Start Simulation", type="primary", use_container_width=True):
426
  with st.spinner("Running simulation... This may take several minutes."):
427
  try:
428
- # Create a unique per-run directory under the selected base output folder
429
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
430
- base_out_dir = (
431
- Path(config["log_dir"])
432
- if config.get("log_dir")
433
- else Path("outputs") / "simulation_runs"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  )
 
 
 
435
  run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
436
  run_dir.mkdir(parents=True, exist_ok=True)
437
 
438
- # Persist effective run directory
439
  st.session_state.sim_config["log_dir"] = str(run_dir)
440
 
441
- # Build command
442
- cmd = [
443
- "uv",
444
- "run",
445
- "court-scheduler",
446
- "simulate",
447
- "--cases",
448
- config["cases"],
449
- "--days",
450
- str(config["days"]),
451
- "--policy",
452
- config["policy"],
453
- "--seed",
454
- str(config["seed"]),
455
- ]
456
-
457
- if config.get("start"):
458
- cmd.extend(["--start", config["start"]])
459
-
460
- # Always pass the per-run output directory
461
- cmd.extend(["--log-dir", str(run_dir)])
462
-
463
- # Run simulation
464
- result = subprocess.run(
465
- cmd,
466
- capture_output=True,
467
- text=True,
468
- cwd=str(Path.cwd()),
469
- )
470
-
471
- if result.returncode == 0:
472
- st.success("Simulation completed successfully")
473
 
474
- # Parse output to extract results
475
- st.session_state.sim_results = {
476
- "success": True,
477
- "output": result.stdout,
478
- "log_dir": str(run_dir),
479
- "completed_at": datetime.now().isoformat(),
480
- }
481
 
482
- # Auto-generate Daily Cause Lists from events.csv
483
- try:
484
- log_dir_path = (
485
- Path(st.session_state.sim_results["log_dir"])
486
- if st.session_state.sim_results.get("log_dir")
487
- else run_dir
488
- )
489
- events_path = log_dir_path / "events.csv"
490
- if events_path.exists():
491
- generator = CauseListGenerator(events_path)
492
- # Save directly in the run directory (no subfolder)
493
- compiled_path = generator.generate_daily_lists(log_dir_path)
494
- summary_path = log_dir_path / "daily_summaries.csv"
495
- # Store generated paths for display in Step 4
496
- st.session_state.sim_results["cause_lists"] = {
497
- "compiled": str(compiled_path),
498
- "summary": str(summary_path),
499
- }
500
- st.info(f"Daily cause lists generated in {log_dir_path}")
501
- else:
502
- st.warning(
503
- f"events.csv not found at {events_path}. Skipping cause list generation."
504
- )
505
- except Exception as gen_err:
506
- st.warning(f"Failed to generate daily cause lists: {gen_err}")
507
 
508
- st.session_state.workflow_step = 4
509
- st.rerun()
510
- else:
511
- st.error(f"Simulation failed with error code {result.returncode}")
512
- with st.expander("Show error details"):
513
- st.code(result.stderr, language="text")
514
 
515
- st.session_state.sim_results = {
516
- "success": False,
517
- "error": result.stderr,
518
  }
519
 
 
 
 
520
  except Exception as e:
521
  st.error(f"Error running simulation: {e}")
522
  st.session_state.sim_results = {
@@ -565,7 +596,9 @@ elif st.session_state.workflow_step == 4:
565
  for file in files:
566
  col1, col2 = st.columns([3, 1])
567
  with col1:
568
- st.markdown(f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)")
 
 
569
  with col2:
570
  if file.suffix in [".csv", ".txt"]:
571
  with open(file, "rb") as f:
@@ -573,7 +606,9 @@ elif st.session_state.workflow_step == 4:
573
  label="Download",
574
  data=f.read(),
575
  file_name=file.name,
576
- mime="text/csv" if file.suffix == ".csv" else "text/plain",
 
 
577
  key=f"download_{file.name}",
578
  )
579
 
@@ -594,7 +629,10 @@ elif st.session_state.workflow_step == 4:
594
  x=metrics_df.index,
595
  y="disposal_rate",
596
  title="Disposal Rate Over Time",
597
- labels={"x": "Day", "disposal_rate": "Disposal Rate"},
 
 
 
598
  )
599
  st.plotly_chart(fig, use_container_width=True)
600
 
@@ -611,7 +649,9 @@ elif st.session_state.workflow_step == 4:
611
 
612
  # Show summary statistics
613
  st.markdown("### Summary Statistics")
614
- st.dataframe(metrics_df.describe(), use_container_width=True)
 
 
615
 
616
  except Exception as e:
617
  st.warning(f"Could not load metrics: {e}")
@@ -660,7 +700,9 @@ elif st.session_state.workflow_step == 4:
660
  else None
661
  )
662
  if events_csv and events_csv.exists():
663
- if st.button("Generate Daily Cause Lists Now", use_container_width=False):
 
 
664
  try:
665
  # Save directly alongside events.csv (run directory root)
666
  out_dir = events_csv.parent
 
9
 
10
  from __future__ import annotations
11
 
 
12
  from datetime import date, datetime
13
  from pathlib import Path
14
 
 
106
  )
107
 
108
  start_date = st.date_input(
109
+ "Filing period start",
110
+ value=date(2022, 1, 1),
111
+ help="Start date for case filings",
112
  )
113
 
114
  end_date = st.date_input(
115
+ "Filing period end",
116
+ value=date(2023, 12, 31),
117
+ help="End date for case filings",
118
  )
119
 
120
  with col2:
 
127
  )
128
 
129
  output_dir = st.text_input(
130
+ "Output directory",
131
+ value="data/generated",
132
+ help="Directory to save generated cases",
133
  )
134
 
135
  st.info(f"Cases will be saved to: {output_dir}/cases.csv")
 
147
  col_a, col_b, col_c = st.columns(3)
148
 
149
  with col_a:
150
+ rsa_pct = st.number_input(
151
+ "RSA %", 0, 100, 20, help="Regular Second Appeal"
152
+ )
153
+ rfa_pct = st.number_input(
154
+ "RFA %", 0, 100, 17, help="Regular First Appeal"
155
+ )
156
+ crp_pct = st.number_input(
157
+ "CRP %", 0, 100, 20, help="Civil Revision Petition"
158
+ )
159
 
160
  with col_b:
161
  ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
162
+ ccc_pct = st.number_input(
163
+ "CCC %", 0, 100, 11, help="Civil Contempt"
164
+ )
165
  cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
166
 
167
  with col_c:
 
169
  "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
170
  )
171
 
172
+ total_pct = (
173
+ rsa_pct
174
+ + rfa_pct
175
+ + crp_pct
176
+ + ca_pct
177
+ + ccc_pct
178
+ + cp_pct
179
+ + cmp_pct
180
+ )
181
  if total_pct != 100:
182
  st.error(f"Total: {total_pct}% (must be 100%)")
183
  else:
184
  st.success(f"Total: {total_pct}%")
185
  else:
186
  st.info("Using default distribution from historical data")
187
+ from scheduler.dashboard.utils.ui_input_parser import (
188
+ build_case_type_distribution,
189
+ merge_with_default_config,
190
+ )
191
+
192
+ case_type_dist_dict = None
193
+ if use_custom_dist:
194
+ case_type_dist_dict = build_case_type_distribution(
195
+ rsa_pct,
196
+ rfa_pct,
197
+ crp_pct,
198
+ ca_pct,
199
+ ccc_pct,
200
+ cp_pct,
201
+ cmp_pct,
202
+ )
203
 
204
  if st.button("Generate Cases", type="primary", use_container_width=True):
205
  with st.spinner(f"Generating {n_cases:,} cases..."):
206
  try:
207
+ from cli.config import load_generate_config
208
+ from scheduler.data.case_generator import CaseGenerator
209
+
210
+ DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml")
211
+ config_from_file = None
212
+
213
+ if DEFAULT_GENERATE_CFG_PATH.exists():
214
+ config_from_file = load_generate_config(
215
+ DEFAULT_GENERATE_CFG_PATH
216
+ )
217
+ cfg = merge_with_default_config(
218
+ config_from_file,
219
+ n_cases=n_cases,
220
+ start_date=start_date,
221
+ end_date=end_date,
222
+ output_dir=output_dir,
223
+ seed=seed,
 
 
 
 
 
 
 
 
 
224
  )
225
 
226
+ # Prepare output dir
227
+ cfg.output.parent.mkdir(parents=True, exist_ok=True)
228
+
229
+ case_type_dist_dict = None
230
+ if use_custom_dist:
231
+ from scheduler.dashboard.utils.ui_input_parser import (
232
+ build_case_type_distribution,
233
+ )
234
+
235
+ case_type_dist_dict = build_case_type_distribution(
236
+ rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct
237
+ )
238
+
239
+ gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed)
240
+
241
+ cases = gen.generate(
242
+ cfg.n_cases,
243
+ stage_mix_auto=True,
244
+ case_type_distribution=case_type_dist_dict,
245
+ )
246
+
247
+ # Save files
248
+ CaseGenerator.to_csv(cases, cfg.output)
249
+ hearings_path = cfg.output.parent / "hearings.csv"
250
+ CaseGenerator.to_hearings_csv(cases, hearings_path)
251
+
252
+ st.success(f"Generated {len(cases):,} cases successfully!")
253
+ st.session_state.cases_ready = True
254
+ st.session_state.cases_path = str(cfg.output)
255
+ st.session_state.workflow_step = 2
256
+ st.rerun()
257
+
258
  except Exception as e:
259
  st.error(f"Error generating cases: {e}")
260
 
 
303
  cases_file = temp_path / "uploaded_cases.csv"
304
  df.to_csv(cases_file, index=False)
305
 
306
+ if st.button(
307
+ "Use This Dataset", type="primary", use_container_width=True
308
+ ):
309
  st.session_state.cases_ready = True
310
  st.session_state.cases_path = str(cases_file)
311
  st.session_state.workflow_step = 2
 
357
  )
358
 
359
  seed_sim = st.number_input(
360
+ "Random seed",
361
+ min_value=0,
362
+ max_value=9999,
363
+ value=42,
364
+ help="Seed for reproducibility",
365
  )
366
 
367
  log_dir = st.text_input(
 
450
  st.rerun()
451
 
452
  with col2:
453
+ if st.button(
454
+ "Next: Run Simulation ->", type="primary", use_container_width=True
455
+ ):
456
  st.session_state.workflow_step = 3
457
  st.rerun()
458
 
 
483
  if st.button("Start Simulation", type="primary", use_container_width=True):
484
  with st.spinner("Running simulation... This may take several minutes."):
485
  try:
486
+ from cli.config import load_simulate_config
487
+ from scheduler.dashboard.utils.simulation_runner import (
488
+ merge_simulation_config,
489
+ run_simulation_dashboard,
490
+ )
491
+
492
+ DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml")
493
+ if DEFAULT_SIM_CFG_PATH.exists():
494
+ default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH)
495
+ else:
496
+ default_cfg = (
497
+ load_simulate_config(Path("parameter_sweep.toml"))
498
+ if Path("parameter_sweep.toml").exists()
499
+ else None
500
+ )
501
+
502
+ if default_cfg is None:
503
+ st.error("No default simulate config found.")
504
+ st.stop()
505
+
506
+ merged_cfg = merge_simulation_config(
507
+ default_cfg,
508
+ cases_path=config["cases"],
509
+ days=config["days"],
510
+ start_date=date.fromisoformat(config["start"])
511
+ if config.get("start")
512
+ else None,
513
+ policy=config["policy"],
514
+ seed=config["seed"],
515
+ log_dir=config["log_dir"],
516
  )
517
+
518
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
519
+ base_out_dir = Path(config["log_dir"])
520
  run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
521
  run_dir.mkdir(parents=True, exist_ok=True)
522
 
523
+ # Update session config
524
  st.session_state.sim_config["log_dir"] = str(run_dir)
525
 
526
+ result = run_simulation_dashboard(merged_cfg, run_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
 
528
+ st.success("Simulation completed successfully!")
 
 
 
 
 
 
529
 
530
+ st.session_state.sim_results = {
531
+ "success": True,
532
+ "output": result["summary"],
533
+ "log_dir": str(run_dir),
534
+ "completed_at": datetime.now().isoformat(),
535
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
+ events_path = result["events_path"]
538
+ if events_path.exists():
539
+ generator = CauseListGenerator(events_path)
540
+ compiled_path = generator.generate_daily_lists(run_dir)
541
+ summary_path = run_dir / "daily_summaries.csv"
 
542
 
543
+ st.session_state.sim_results["cause_lists"] = {
544
+ "compiled": str(compiled_path),
545
+ "summary": str(summary_path),
546
  }
547
 
548
+ st.session_state.workflow_step = 4
549
+ st.rerun()
550
+
551
  except Exception as e:
552
  st.error(f"Error running simulation: {e}")
553
  st.session_state.sim_results = {
 
596
  for file in files:
597
  col1, col2 = st.columns([3, 1])
598
  with col1:
599
+ st.markdown(
600
+ f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)"
601
+ )
602
  with col2:
603
  if file.suffix in [".csv", ".txt"]:
604
  with open(file, "rb") as f:
 
606
  label="Download",
607
  data=f.read(),
608
  file_name=file.name,
609
+ mime="text/csv"
610
+ if file.suffix == ".csv"
611
+ else "text/plain",
612
  key=f"download_{file.name}",
613
  )
614
 
 
629
  x=metrics_df.index,
630
  y="disposal_rate",
631
  title="Disposal Rate Over Time",
632
+ labels={
633
+ "x": "Day",
634
+ "disposal_rate": "Disposal Rate",
635
+ },
636
  )
637
  st.plotly_chart(fig, use_container_width=True)
638
 
 
649
 
650
  # Show summary statistics
651
  st.markdown("### Summary Statistics")
652
+ st.dataframe(
653
+ metrics_df.describe(), use_container_width=True
654
+ )
655
 
656
  except Exception as e:
657
  st.warning(f"Could not load metrics: {e}")
 
700
  else None
701
  )
702
  if events_csv and events_csv.exists():
703
+ if st.button(
704
+ "Generate Daily Cause Lists Now", use_container_width=False
705
+ ):
706
  try:
707
  # Save directly alongside events.csv (run directory root)
708
  out_dir = events_csv.parent
scheduler/dashboard/utils/simulation_runner.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from pathlib import Path
3
+ from datetime import date
4
+
5
+ from cli.config import SimulateConfig
6
+ from scheduler.data.case_generator import CaseGenerator
7
+ from scheduler.simulation.engine import CourtSim, CourtSimConfig
8
+ from scheduler.core.case import CaseStatus
9
+ from scheduler.metrics.basic import gini
10
+
11
+
12
+ def merge_simulation_config(
13
+ default_cfg: SimulateConfig,
14
+ cases_path: str,
15
+ days: int,
16
+ start_date: date | None,
17
+ policy: str,
18
+ seed: int,
19
+ log_dir: str,
20
+ ) -> SimulateConfig:
21
+ """Merge UI inputs with default simulation config."""
22
+ return SimulateConfig(
23
+ cases=Path(cases_path) if cases_path else default_cfg.cases,
24
+ days=days or default_cfg.days,
25
+ start=start_date or default_cfg.start,
26
+ policy=policy or default_cfg.policy,
27
+ seed=seed if seed is not None else default_cfg.seed,
28
+ log_dir=Path(log_dir) if log_dir else default_cfg.log_dir,
29
+ )
30
+
31
+
32
+ def run_simulation_dashboard(scfg: SimulateConfig, run_dir: Path):
33
+ """
34
+ Execute simulation based on the provided Streamlit configuration.
35
+ """
36
+
37
+ # ------------------------------------------------------------------
38
+ # Load case data
39
+ # ------------------------------------------------------------------
40
+ path = scfg.cases
41
+ if path.exists():
42
+ cases = CaseGenerator.from_csv(path)
43
+ start = scfg.start or (
44
+ max(c.filed_date for c in cases) if cases else date.today()
45
+ )
46
+ else:
47
+ # Fallback (CLI fallback behaviour)
48
+ start = scfg.start or date.today().replace(day=1)
49
+ gen = CaseGenerator(start=start, end=start.replace(day=28), seed=scfg.seed)
50
+ cases = gen.generate(n_cases=5 * 151)
51
+
52
+ # ------------------------------------------------------------------
53
+ # Build CourtSimConfig
54
+ # ------------------------------------------------------------------
55
+ cfg = CourtSimConfig(
56
+ start=start,
57
+ days=scfg.days,
58
+ seed=scfg.seed,
59
+ policy=scfg.policy,
60
+ duration_percentile=scfg.duration_percentile,
61
+ log_dir=run_dir,
62
+ )
63
+
64
+ # ------------------------------------------------------------------
65
+ # Run simulation
66
+ # ------------------------------------------------------------------
67
+ sim = CourtSim(cfg, cases)
68
+ res = sim.run()
69
+
70
+ # ------------------------------------------------------------------
71
+ # Collect metrics exactly like CLI
72
+ # ------------------------------------------------------------------
73
+ disp_times = [
74
+ (c.disposal_date - c.filed_date).days
75
+ for c in cases
76
+ if c.disposal_date is not None and c.status == CaseStatus.DISPOSED
77
+ ]
78
+ gini_disp = gini(disp_times) if disp_times else 0.0
79
+
80
+ summary_text = f"""
81
+ Simulation Complete!
82
+ Horizon: {cfg.start} -> {res.end_date} ({cfg.days} days)
83
+
84
+ Hearing Metrics:
85
+ Total: {res.hearings_total}
86
+ Heard: {res.hearings_heard} ({res.hearings_heard / max(1, res.hearings_total):.1%})
87
+ Adjourned: {res.hearings_adjourned} ({res.hearings_adjourned / max(1, res.hearings_total):.1%})
88
+
89
+ Disposal Metrics:
90
+ Disposed: {res.disposals} ({res.disposals / len(cases):.1%})
91
+ Gini coefficient: {gini_disp:.3f}
92
+
93
+ Efficiency:
94
+ Utilization: {res.utilization:.2%}
95
+ Avg hearings/day: {res.hearings_total / max(1, cfg.days):.2f}
96
+ """
97
+
98
+ (run_dir / "report.txt").write_text(summary_text, encoding="utf-8")
99
+
100
+ # -------------------------------------------------------
101
+ # Locate generated CSVs (if they exist)
102
+ # -------------------------------------------------------
103
+ metrics_path = run_dir / "metrics.csv"
104
+ events_path = run_dir / "events.csv"
105
+
106
+ return {
107
+ "summary": summary_text,
108
+ "end_date": res.end_date,
109
+ "metrics_path": metrics_path if metrics_path.exists() else None,
110
+ "events_path": events_path if events_path.exists() else None,
111
+ }
scheduler/dashboard/utils/ui_input_parser.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from cli.config import GenerateConfig
3
+
4
+
5
+ def merge_with_default_config(
6
+ default_cfg: GenerateConfig,
7
+ n_cases: int,
8
+ start_date,
9
+ end_date,
10
+ output_dir: str,
11
+ seed: int,
12
+ ) -> GenerateConfig:
13
+ """Merge UI values with the repo's default generate config."""
14
+ return GenerateConfig(
15
+ n_cases=n_cases or default_cfg.n_cases,
16
+ start=start_date or default_cfg.start,
17
+ end=end_date or default_cfg.end,
18
+ output=Path(output_dir) / "cases.csv" if output_dir else default_cfg.output,
19
+ seed=seed if seed is not None else default_cfg.seed,
20
+ )
21
+
22
+
23
+ def build_case_type_distribution(
24
+ rsa_pct: int,
25
+ rfa_pct: int,
26
+ crp_pct: int,
27
+ ca_pct: int,
28
+ ccc_pct: int,
29
+ cp_pct: int,
30
+ cmp_pct: int,
31
+ ) -> dict[str, float]:
32
+ """Convert percentage inputs into a probability distribution."""
33
+ total = rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct
34
+ if total == 0:
35
+ return {}
36
+
37
+ return {
38
+ "RSA": rsa_pct / total,
39
+ "RFA": rfa_pct / total,
40
+ "CRP": crp_pct / total,
41
+ "CA": ca_pct / total,
42
+ "CCC": ccc_pct / total,
43
+ "CP": cp_pct / total,
44
+ "CMP": cmp_pct / total,
45
+ }