Spaces:
Sleeping
Sleeping
| """Simulation Workflow page - End-to-end scheduling simulation. | |
| Multi-step workflow: | |
| 1. Data Preparation - Generate or upload cases | |
| 2. Configuration - Set simulation parameters and policy | |
| 3. Run Simulation - Execute simulation with progress tracking | |
| 4. Results - View metrics, charts, and download outputs | |
| """ | |
| from __future__ import annotations | |
| from datetime import date, datetime | |
| from pathlib import Path | |
| import pandas as pd | |
| import plotly.express as px | |
| import streamlit as st | |
| from src.output.cause_list import CauseListGenerator | |
| from src.config.paths import get_runs_base | |
| CLI_VERSION = "1.0.0" | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Simulation Workflow", | |
| page_icon="gear", | |
| layout="wide", | |
| ) | |
| st.title("Simulation Workflow") | |
| st.markdown("Run scheduling simulations with configurable parameters") | |
| # Initialize session state for workflow | |
| if "workflow_step" not in st.session_state: | |
| st.session_state.workflow_step = 1 | |
| if "cases_ready" not in st.session_state: | |
| st.session_state.cases_ready = False | |
| if "sim_config" not in st.session_state: | |
| st.session_state.sim_config = {} | |
| if "sim_results" not in st.session_state: | |
| st.session_state.sim_results = None | |
| if "cases_path" not in st.session_state: | |
| st.session_state.cases_path = None | |
| # Progress indicator | |
| st.markdown("### Workflow Progress") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| status = ( | |
| "[DONE]" | |
| if st.session_state.workflow_step > 1 | |
| else ("[NOW]" if st.session_state.workflow_step == 1 else "[ ]") | |
| ) | |
| st.markdown(f"**{status} 1. Data Preparation**") | |
| with col2: | |
| status = ( | |
| "[DONE]" | |
| if st.session_state.workflow_step > 2 | |
| else ("[NOW]" if st.session_state.workflow_step == 2 else "[ ]") | |
| ) | |
| st.markdown(f"**{status} 2. Configuration**") | |
| with col3: | |
| status = ( | |
| "[DONE]" | |
| if st.session_state.workflow_step > 3 | |
| else ("[NOW]" if st.session_state.workflow_step == 3 else "[ ]") | |
| ) | |
| st.markdown(f"**{status} 3. Run Simulation**") | |
| with col4: | |
| status = ( | |
| "[DONE]" | |
| if st.session_state.workflow_step == 4 | |
| else ("[NOW]" if st.session_state.workflow_step == 4 else "[ ]") | |
| ) | |
| st.markdown(f"**{status} 4. View Results**") | |
| st.markdown("---") | |
| # STEP 1: Data Preparation | |
| if st.session_state.workflow_step == 1: | |
| st.markdown("## Step 1: Data Preparation") | |
| st.markdown("Choose how to provide case data for simulation") | |
| data_source = st.radio( | |
| "Data Source", | |
| ["Generate Synthetic Cases", "Upload Case CSV"], | |
| help="Generate synthetic cases based on parameters, or upload your own dataset", | |
| ) | |
| if data_source == "Generate Synthetic Cases": | |
| st.markdown("### Generate Synthetic Cases") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| n_cases = st.number_input( | |
| "Number of cases", | |
| min_value=100, | |
| max_value=100000, | |
| value=10000, | |
| step=100, | |
| help="Number of cases to generate", | |
| ) | |
| start_date = st.date_input( | |
| "Filing period start", | |
| value=date(2022, 1, 1), | |
| help="Start date for case filings", | |
| ) | |
| end_date = st.date_input( | |
| "Filing period end", | |
| value=date(2023, 12, 31), | |
| help="End date for case filings", | |
| ) | |
| with col2: | |
| seed = st.number_input( | |
| "Random seed", | |
| min_value=0, | |
| max_value=9999, | |
| value=42, | |
| help="Seed for reproducibility", | |
| ) | |
| output_dir = st.text_input( | |
| "Output directory", | |
| value="data/generated", | |
| help="Directory to save generated cases", | |
| ) | |
| st.info(f"Cases will be saved to: {output_dir}/cases.csv") | |
| # Advanced: Case Type Distribution | |
| with st.expander("Advanced: Case Type Distribution", expanded=False): | |
| st.markdown( | |
| """Customize the distribution of case types. Leave default for realistic distribution based on historical data.""" | |
| ) | |
| use_custom_dist = st.checkbox("Use custom distribution", value=False) | |
| if use_custom_dist: | |
| st.warning("Custom distribution: Percentages must sum to 100%") | |
| col_a, col_b, col_c = st.columns(3) | |
| with col_a: | |
| rsa_pct = st.number_input( | |
| "RSA %", 0, 100, 20, help="Regular Second Appeal" | |
| ) | |
| rfa_pct = st.number_input( | |
| "RFA %", 0, 100, 17, help="Regular First Appeal" | |
| ) | |
| crp_pct = st.number_input( | |
| "CRP %", 0, 100, 20, help="Civil Revision Petition" | |
| ) | |
| with col_b: | |
| ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal") | |
| ccc_pct = st.number_input( | |
| "CCC %", 0, 100, 11, help="Civil Contempt" | |
| ) | |
| cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition") | |
| with col_c: | |
| cmp_pct = st.number_input( | |
| "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition" | |
| ) | |
| total_pct = ( | |
| rsa_pct | |
| + rfa_pct | |
| + crp_pct | |
| + ca_pct | |
| + ccc_pct | |
| + cp_pct | |
| + cmp_pct | |
| ) | |
| if total_pct != 100: | |
| st.error(f"Total: {total_pct}% (must be 100%)") | |
| else: | |
| st.success(f"Total: {total_pct}%") | |
| else: | |
| st.info("Using default distribution from historical data") | |
| from src.dashboard.utils.ui_input_parser import ( | |
| build_case_type_distribution, | |
| merge_with_default_config, | |
| ) | |
| case_type_dist_dict = None | |
| if use_custom_dist: | |
| case_type_dist_dict = build_case_type_distribution( | |
| rsa_pct, | |
| rfa_pct, | |
| crp_pct, | |
| ca_pct, | |
| ccc_pct, | |
| cp_pct, | |
| cmp_pct, | |
| ) | |
| if st.button("Generate Cases", type="primary", use_container_width=True): | |
| with st.spinner(f"Generating {n_cases:,} cases..."): | |
| try: | |
| from cli.config import load_generate_config | |
| from src.data.case_generator import CaseGenerator | |
| DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml") | |
| config_from_file = None | |
| if DEFAULT_GENERATE_CFG_PATH.exists(): | |
| config_from_file = load_generate_config( | |
| DEFAULT_GENERATE_CFG_PATH | |
| ) | |
| cfg = merge_with_default_config( | |
| config_from_file, | |
| n_cases=n_cases, | |
| start_date=start_date, | |
| end_date=end_date, | |
| output_dir=output_dir, | |
| seed=seed, | |
| ) | |
| # Prepare output dir | |
| cfg.output.parent.mkdir(parents=True, exist_ok=True) | |
| case_type_dist_dict = None | |
| if use_custom_dist: | |
| from src.dashboard.utils.ui_input_parser import ( | |
| build_case_type_distribution, | |
| ) | |
| case_type_dist_dict = build_case_type_distribution( | |
| rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct | |
| ) | |
| gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed) | |
| cases = gen.generate( | |
| cfg.n_cases, | |
| stage_mix_auto=True, | |
| case_type_distribution=case_type_dist_dict, | |
| ) | |
| # Save files | |
| CaseGenerator.to_csv(cases, cfg.output) | |
| hearings_path = cfg.output.parent / "hearings.csv" | |
| CaseGenerator.to_hearings_csv(cases, hearings_path) | |
| st.success(f"Generated {len(cases):,} cases successfully!") | |
| st.session_state.cases_ready = True | |
| st.session_state.cases_path = str(cfg.output) | |
| st.session_state.workflow_step = 2 | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error generating cases: {e}") | |
| else: # Upload CSV | |
| st.markdown("### Upload Case CSV") | |
| st.markdown(""" | |
| Upload a CSV file with case data. Required columns: | |
| - `case_id`: Unique case identifier | |
| - `case_type`: Type of case (RSA, RFA, etc.) | |
| - `filed_date`: Date case was filed (YYYY-MM-DD) | |
| - `stage`: Current stage (or `current_stage` — will be accepted and mapped to `stage`) | |
| - Additional columns will be preserved | |
| """) | |
| uploaded_file = st.file_uploader( | |
| "Choose a CSV file", type=["csv"], help="Upload CSV with case data" | |
| ) | |
| if uploaded_file is not None: | |
| try: | |
| # Read and validate | |
| df = pd.read_csv(uploaded_file) | |
| # If the uploaded file uses `current_stage`, map it to `stage` for compatibility | |
| if "stage" not in df.columns and "current_stage" in df.columns: | |
| # Preserve original `current_stage` column and add `stage` | |
| df["stage"] = df["current_stage"] | |
| # Check required columns | |
| required_cols = ["case_id", "case_type", "filed_date", "stage"] | |
| missing_cols = [col for col in required_cols if col not in df.columns] | |
| if missing_cols: | |
| st.error(f"Missing required columns: {', '.join(missing_cols)}") | |
| else: | |
| st.success(f"Valid CSV uploaded with {len(df):,} cases") | |
| # Show preview | |
| st.markdown("**Preview:**") | |
| st.dataframe(df.head(10), use_container_width=True) | |
| # Save to temporary location | |
| temp_path = Path("data/generated") | |
| temp_path.mkdir(parents=True, exist_ok=True) | |
| cases_file = temp_path / "uploaded_cases.csv" | |
| df.to_csv(cases_file, index=False) | |
| if st.button( | |
| "Use This Dataset", type="primary", use_container_width=True | |
| ): | |
| st.session_state.cases_ready = True | |
| st.session_state.cases_path = str(cases_file) | |
| st.session_state.workflow_step = 2 | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error reading CSV: {e}") | |
| # STEP 2: Configuration | |
| elif st.session_state.workflow_step == 2: | |
| st.markdown("## Step 2: Configuration") | |
| st.markdown("Configure simulation parameters and scheduling policy") | |
| st.info(f"Cases loaded from: {st.session_state.cases_path}") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### Simulation Parameters") | |
| days = st.number_input( | |
| "Simulation days", | |
| min_value=30, | |
| max_value=1000, | |
| value=384, | |
| help="Number of working days to simulate (384 = ~2 years)", | |
| ) | |
| courtrooms = st.number_input( | |
| "Number of courtrooms", | |
| min_value=1, | |
| max_value=20, | |
| value=5, | |
| help="Number of courtrooms to simulate", | |
| ) | |
| daily_capacity = st.number_input( | |
| "Daily capacity per courtroom", | |
| min_value=10, | |
| max_value=300, | |
| value=151, | |
| help="Maximum hearings per courtroom per day (median from historical data: 151)", | |
| ) | |
| start_date_sim = st.date_input( | |
| "Simulation start date", | |
| value=date.today(), | |
| help="Start date for simulation (leave default to use last filing date)", | |
| ) | |
| seed_sim = st.number_input( | |
| "Random seed", | |
| min_value=0, | |
| max_value=9999, | |
| value=42, | |
| help="Seed for reproducibility", | |
| ) | |
| log_dir = st.text_input( | |
| "Output directory", | |
| value=str(get_runs_base()), | |
| help="Directory to save simulation outputs (override with DASHBOARD_RUNS_BASE env var)", | |
| ) | |
| with col2: | |
| st.markdown("### Scheduling Policy") | |
| policy = st.selectbox( | |
| "Policy", | |
| ["readiness", "fifo", "age"], | |
| index=0, | |
| help="readiness: score-based | fifo: first-in-first-out | age: oldest first", | |
| ) | |
| if policy == "readiness": | |
| st.markdown("**Readiness Policy Parameters:**") | |
| fairness_weight = st.slider( | |
| "Fairness weight", | |
| min_value=0.0, | |
| max_value=1.0, | |
| value=0.4, | |
| step=0.05, | |
| help="Weight for fairness (age-based priority)", | |
| ) | |
| efficiency_weight = st.slider( | |
| "Efficiency weight", | |
| min_value=0.0, | |
| max_value=1.0, | |
| value=0.3, | |
| step=0.05, | |
| help="Weight for efficiency (stage readiness)", | |
| ) | |
| urgency_weight = st.slider( | |
| "Urgency weight", | |
| min_value=0.0, | |
| max_value=1.0, | |
| value=0.3, | |
| step=0.05, | |
| help="Weight for urgency (priority cases)", | |
| ) | |
| total = fairness_weight + efficiency_weight + urgency_weight | |
| if abs(total - 1.0) > 0.01: | |
| st.warning(f"Weights sum to {total:.2f}, should sum to 1.0") | |
| st.markdown("---") | |
| st.markdown("**Advanced Options:**") | |
| duration_percentile = st.selectbox( | |
| "Duration estimation", | |
| ["median", "mean", "p75"], | |
| index=0, | |
| help="How to estimate hearing durations", | |
| ) | |
| # Store configuration | |
| st.session_state.sim_config = { | |
| "cases": st.session_state.cases_path, | |
| "days": days, | |
| "start": start_date_sim.isoformat() if start_date_sim else None, | |
| "policy": policy, | |
| "seed": seed_sim, | |
| "log_dir": log_dir, | |
| "duration_percentile": duration_percentile, | |
| } | |
| if policy == "readiness": | |
| st.session_state.sim_config["fairness_weight"] = fairness_weight | |
| st.session_state.sim_config["efficiency_weight"] = efficiency_weight | |
| st.session_state.sim_config["urgency_weight"] = urgency_weight | |
| st.markdown("---") | |
| col1, col2 = st.columns([1, 3]) | |
| with col1: | |
| if st.button("← Back", use_container_width=True): | |
| st.session_state.workflow_step = 1 | |
| st.rerun() | |
| with col2: | |
| if st.button( | |
| "Next: Run Simulation ->", type="primary", use_container_width=True | |
| ): | |
| st.session_state.workflow_step = 3 | |
| st.rerun() | |
| # STEP 3: Run Simulation | |
| elif st.session_state.workflow_step == 3: | |
| st.markdown("## Step 3: Run Simulation") | |
| config = st.session_state.sim_config | |
| st.markdown("### Configuration Summary") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(f""" | |
| - **Cases:** {config["cases"]} | |
| - **Simulation days:** {config["days"]} | |
| - **Policy:** {config["policy"]} | |
| """) | |
| with col2: | |
| st.markdown(f""" | |
| - **Random seed:** {config["seed"]} | |
| - **Output:** {config["log_dir"]} | |
| """) | |
| st.markdown("---") | |
| if st.button("Start Simulation", type="primary", use_container_width=True): | |
| with st.spinner("Running simulation... This may take several minutes."): | |
| try: | |
| from cli.config import load_simulate_config | |
| from src.dashboard.utils.simulation_runner import ( | |
| merge_simulation_config, | |
| run_simulation_dashboard, | |
| ) | |
| DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml") | |
| if DEFAULT_SIM_CFG_PATH.exists(): | |
| default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH) | |
| else: | |
| default_cfg = ( | |
| load_simulate_config(Path("parameter_sweep.toml")) | |
| if Path("parameter_sweep.toml").exists() | |
| else None | |
| ) | |
| if default_cfg is None: | |
| st.error("No default simulate config found.") | |
| st.stop() | |
| merged_cfg = merge_simulation_config( | |
| default_cfg, | |
| cases_path=config["cases"], | |
| days=config["days"], | |
| start_date=date.fromisoformat(config["start"]) | |
| if config.get("start") | |
| else None, | |
| policy=config["policy"], | |
| seed=config["seed"], | |
| log_dir=config["log_dir"], | |
| ) | |
| ts = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| base_out_dir = Path(config["log_dir"]) | |
| run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}" | |
| run_dir.mkdir(parents=True, exist_ok=True) | |
| # Update session config | |
| st.session_state.sim_config["log_dir"] = str(run_dir) | |
| result = run_simulation_dashboard(merged_cfg, run_dir) | |
| st.success("Simulation completed successfully!") | |
| st.session_state.sim_results = { | |
| "success": True, | |
| "output": result["summary"], | |
| "insights": result.get("insights"), | |
| "log_dir": str(run_dir), | |
| "completed_at": datetime.now().isoformat(), | |
| } | |
| events_path = result["events_path"] | |
| if events_path.exists(): | |
| generator = CauseListGenerator(events_path) | |
| compiled_path = generator.generate_daily_lists(run_dir) | |
| summary_path = run_dir / "daily_summaries.csv" | |
| st.session_state.sim_results["cause_lists"] = { | |
| "compiled": str(compiled_path), | |
| "summary": str(summary_path), | |
| } | |
| st.session_state.workflow_step = 4 | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error running simulation: {e}") | |
| st.session_state.sim_results = { | |
| "success": False, | |
| "error": str(e), | |
| } | |
| st.markdown("---") | |
| if st.button("← Back to Configuration", use_container_width=True): | |
| st.session_state.workflow_step = 2 | |
| st.rerun() | |
| # STEP 4: Results | |
| elif st.session_state.workflow_step == 4: | |
| st.markdown("## Step 4: Results") | |
| results = st.session_state.sim_results | |
| if not results or not results.get("success"): | |
| st.error("Simulation did not complete successfully") | |
| if results and results.get("error"): | |
| with st.expander("Error details"): | |
| st.code(results["error"], language="text") | |
| if st.button("← Back to Run", use_container_width=True): | |
| st.session_state.workflow_step = 3 | |
| st.rerun() | |
| else: | |
| st.success(f"Simulation completed at {results['completed_at']}") | |
| # Display console output | |
| with st.expander("View simulation output"): | |
| st.code(results["output"], language="text") | |
| # Key Insights from engine (if available) | |
| insights_text = results.get("insights") | |
| if insights_text: | |
| st.markdown("### Key Insights") | |
| with st.expander("Show engine insights", expanded=True): | |
| st.code(insights_text, language="text") | |
| # Check for generated files | |
| log_dir = Path(results["log_dir"]) | |
| if log_dir.exists(): | |
| st.markdown("### Generated Files") | |
| files = list(log_dir.glob("*")) | |
| if files: | |
| st.markdown(f"**{len(files)} files generated in {log_dir}**") | |
| for file in files: | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.markdown( | |
| f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)" | |
| ) | |
| with col2: | |
| if file.suffix in [".csv", ".txt"]: | |
| with open(file, "rb") as f: | |
| st.download_button( | |
| label="Download", | |
| data=f.read(), | |
| file_name=file.name, | |
| mime="text/csv" | |
| if file.suffix == ".csv" | |
| else "text/plain", | |
| key=f"download_{file.name}", | |
| ) | |
| # Try to load and display metrics | |
| metrics_file = log_dir / "metrics.csv" | |
| if metrics_file.exists(): | |
| st.markdown("---") | |
| st.markdown("### Metrics Over Time") | |
| try: | |
| metrics_df = pd.read_csv(metrics_file) | |
| if not metrics_df.empty: | |
| # Plot disposal rate over time | |
| if "disposal_rate" in metrics_df.columns: | |
| fig = px.line( | |
| metrics_df, | |
| x=metrics_df.index, | |
| y="disposal_rate", | |
| title="Disposal Rate Over Time", | |
| labels={ | |
| "x": "Day", | |
| "disposal_rate": "Disposal Rate", | |
| }, | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Plot utilization if available | |
| if "utilization" in metrics_df.columns: | |
| fig = px.line( | |
| metrics_df, | |
| x=metrics_df.index, | |
| y="utilization", | |
| title="Courtroom Utilization Over Time", | |
| labels={"x": "Day", "utilization": "Utilization"}, | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Show summary statistics | |
| st.markdown("### Summary Statistics") | |
| st.dataframe( | |
| metrics_df.describe(), use_container_width=True | |
| ) | |
| except Exception as e: | |
| st.warning(f"Could not load metrics: {e}") | |
| else: | |
| st.info("No output files found") | |
| else: | |
| st.warning(f"Output directory not found: {log_dir}") | |
| st.markdown("---") | |
| # Daily Cause Lists Section | |
| st.markdown("### Daily Cause Lists") | |
| cause_info = (results or {}).get("cause_lists") | |
| def _render_download(label: str, file_path: Path, mime: str = "text/csv"): | |
| try: | |
| with file_path.open("rb") as f: | |
| st.download_button( | |
| label=label, | |
| data=f.read(), | |
| file_name=file_path.name, | |
| mime=mime, | |
| key=f"dl_{file_path.name}", | |
| ) | |
| except Exception as e: | |
| st.warning(f"Unable to read {file_path.name}: {e}") | |
| if cause_info: | |
| compiled_path = Path(cause_info.get("compiled", "")) | |
| summary_path = Path(cause_info.get("summary", "")) | |
| if compiled_path.exists(): | |
| st.success(f"Compiled cause list ready: {compiled_path}") | |
| _render_download("Download compiled_cause_list.csv", compiled_path) | |
| try: | |
| df_preview = pd.read_csv(compiled_path, nrows=200) | |
| st.dataframe(df_preview.head(50), use_container_width=True) | |
| except Exception as e: | |
| st.warning(f"Preview unavailable: {e}") | |
| if summary_path.exists(): | |
| _render_download("Download daily_summaries.csv", summary_path) | |
| else: | |
| # Offer on-demand generation if not already created | |
| events_csv = ( | |
| (Path(results["log_dir"]) / "events.csv") | |
| if results and results.get("log_dir") | |
| else None | |
| ) | |
| if events_csv and events_csv.exists(): | |
| if st.button( | |
| "Generate Daily Cause Lists Now", use_container_width=False | |
| ): | |
| try: | |
| # Save directly alongside events.csv (run directory root) | |
| out_dir = events_csv.parent | |
| generator = CauseListGenerator(events_csv) | |
| compiled_path = generator.generate_daily_lists(out_dir) | |
| summary_path = out_dir / "daily_summaries.csv" | |
| st.session_state.sim_results["cause_lists"] = { | |
| "compiled": str(compiled_path), | |
| "summary": str(summary_path), | |
| } | |
| st.success(f"Daily cause lists generated in {out_dir}") | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Failed to generate cause lists: {e}") | |
| else: | |
| st.info( | |
| "events.csv not found; run a simulation first to enable cause list generation." | |
| ) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("Run New Simulation", use_container_width=True): | |
| # Reset workflow | |
| st.session_state.workflow_step = 1 | |
| st.session_state.cases_ready = False | |
| st.session_state.sim_results = None | |
| st.rerun() | |
| with col2: | |
| if st.button("Modify Configuration", use_container_width=True): | |
| st.session_state.workflow_step = 2 | |
| st.session_state.sim_results = None | |
| st.rerun() | |
| # Footer | |
| st.markdown("---") | |
| st.caption("Simulation Workflow - Configure and run scheduling simulations") | |