"""Simulation Workflow page - End-to-end scheduling simulation. Multi-step workflow: 1. Data Preparation - Generate or upload cases 2. Configuration - Set simulation parameters and policy 3. Run Simulation - Execute simulation with progress tracking 4. Results - View metrics, charts, and download outputs """ from __future__ import annotations from datetime import date, datetime from pathlib import Path import pandas as pd import plotly.express as px import streamlit as st from src.output.cause_list import CauseListGenerator from src.config.paths import get_runs_base CLI_VERSION = "1.0.0" # Page configuration st.set_page_config( page_title="Simulation Workflow", page_icon="gear", layout="wide", ) st.title("Simulation Workflow") st.markdown("Run scheduling simulations with configurable parameters") # Initialize session state for workflow if "workflow_step" not in st.session_state: st.session_state.workflow_step = 1 if "cases_ready" not in st.session_state: st.session_state.cases_ready = False if "sim_config" not in st.session_state: st.session_state.sim_config = {} if "sim_results" not in st.session_state: st.session_state.sim_results = None if "cases_path" not in st.session_state: st.session_state.cases_path = None # Progress indicator st.markdown("### Workflow Progress") col1, col2, col3, col4 = st.columns(4) with col1: status = ( "[DONE]" if st.session_state.workflow_step > 1 else ("[NOW]" if st.session_state.workflow_step == 1 else "[ ]") ) st.markdown(f"**{status} 1. Data Preparation**") with col2: status = ( "[DONE]" if st.session_state.workflow_step > 2 else ("[NOW]" if st.session_state.workflow_step == 2 else "[ ]") ) st.markdown(f"**{status} 2. Configuration**") with col3: status = ( "[DONE]" if st.session_state.workflow_step > 3 else ("[NOW]" if st.session_state.workflow_step == 3 else "[ ]") ) st.markdown(f"**{status} 3. Run Simulation**") with col4: status = ( "[DONE]" if st.session_state.workflow_step == 4 else ("[NOW]" if st.session_state.workflow_step == 4 else "[ ]") ) st.markdown(f"**{status} 4. View Results**") st.markdown("---") # STEP 1: Data Preparation if st.session_state.workflow_step == 1: st.markdown("## Step 1: Data Preparation") st.markdown("Choose how to provide case data for simulation") data_source = st.radio( "Data Source", ["Generate Synthetic Cases", "Upload Case CSV"], help="Generate synthetic cases based on parameters, or upload your own dataset", ) if data_source == "Generate Synthetic Cases": st.markdown("### Generate Synthetic Cases") col1, col2 = st.columns(2) with col1: n_cases = st.number_input( "Number of cases", min_value=100, max_value=100000, value=10000, step=100, help="Number of cases to generate", ) start_date = st.date_input( "Filing period start", value=date(2022, 1, 1), help="Start date for case filings", ) end_date = st.date_input( "Filing period end", value=date(2023, 12, 31), help="End date for case filings", ) with col2: seed = st.number_input( "Random seed", min_value=0, max_value=9999, value=42, help="Seed for reproducibility", ) output_dir = st.text_input( "Output directory", value="data/generated", help="Directory to save generated cases", ) st.info(f"Cases will be saved to: {output_dir}/cases.csv") # Advanced: Case Type Distribution with st.expander("Advanced: Case Type Distribution", expanded=False): st.markdown( """Customize the distribution of case types. Leave default for realistic distribution based on historical data.""" ) use_custom_dist = st.checkbox("Use custom distribution", value=False) if use_custom_dist: st.warning("Custom distribution: Percentages must sum to 100%") col_a, col_b, col_c = st.columns(3) with col_a: rsa_pct = st.number_input( "RSA %", 0, 100, 20, help="Regular Second Appeal" ) rfa_pct = st.number_input( "RFA %", 0, 100, 17, help="Regular First Appeal" ) crp_pct = st.number_input( "CRP %", 0, 100, 20, help="Civil Revision Petition" ) with col_b: ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal") ccc_pct = st.number_input( "CCC %", 0, 100, 11, help="Civil Contempt" ) cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition") with col_c: cmp_pct = st.number_input( "CMP %", 0, 100, 3, help="Civil Miscellaneous Petition" ) total_pct = ( rsa_pct + rfa_pct + crp_pct + ca_pct + ccc_pct + cp_pct + cmp_pct ) if total_pct != 100: st.error(f"Total: {total_pct}% (must be 100%)") else: st.success(f"Total: {total_pct}%") else: st.info("Using default distribution from historical data") from src.dashboard.utils.ui_input_parser import ( build_case_type_distribution, merge_with_default_config, ) case_type_dist_dict = None if use_custom_dist: case_type_dist_dict = build_case_type_distribution( rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct, ) if st.button("Generate Cases", type="primary", use_container_width=True): with st.spinner(f"Generating {n_cases:,} cases..."): try: from cli.config import load_generate_config from src.data.case_generator import CaseGenerator DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml") config_from_file = None if DEFAULT_GENERATE_CFG_PATH.exists(): config_from_file = load_generate_config( DEFAULT_GENERATE_CFG_PATH ) cfg = merge_with_default_config( config_from_file, n_cases=n_cases, start_date=start_date, end_date=end_date, output_dir=output_dir, seed=seed, ) # Prepare output dir cfg.output.parent.mkdir(parents=True, exist_ok=True) case_type_dist_dict = None if use_custom_dist: from src.dashboard.utils.ui_input_parser import ( build_case_type_distribution, ) case_type_dist_dict = build_case_type_distribution( rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct ) gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed) cases = gen.generate( cfg.n_cases, stage_mix_auto=True, case_type_distribution=case_type_dist_dict, ) # Save files CaseGenerator.to_csv(cases, cfg.output) hearings_path = cfg.output.parent / "hearings.csv" CaseGenerator.to_hearings_csv(cases, hearings_path) st.success(f"Generated {len(cases):,} cases successfully!") st.session_state.cases_ready = True st.session_state.cases_path = str(cfg.output) st.session_state.workflow_step = 2 st.rerun() except Exception as e: st.error(f"Error generating cases: {e}") else: # Upload CSV st.markdown("### Upload Case CSV") st.markdown(""" Upload a CSV file with case data. Required columns: - `case_id`: Unique case identifier - `case_type`: Type of case (RSA, RFA, etc.) - `filed_date`: Date case was filed (YYYY-MM-DD) - `stage`: Current stage (or `current_stage` — will be accepted and mapped to `stage`) - Additional columns will be preserved """) uploaded_file = st.file_uploader( "Choose a CSV file", type=["csv"], help="Upload CSV with case data" ) if uploaded_file is not None: try: # Read and validate df = pd.read_csv(uploaded_file) # If the uploaded file uses `current_stage`, map it to `stage` for compatibility if "stage" not in df.columns and "current_stage" in df.columns: # Preserve original `current_stage` column and add `stage` df["stage"] = df["current_stage"] # Check required columns required_cols = ["case_id", "case_type", "filed_date", "stage"] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: st.error(f"Missing required columns: {', '.join(missing_cols)}") else: st.success(f"Valid CSV uploaded with {len(df):,} cases") # Show preview st.markdown("**Preview:**") st.dataframe(df.head(10), use_container_width=True) # Save to temporary location temp_path = Path("data/generated") temp_path.mkdir(parents=True, exist_ok=True) cases_file = temp_path / "uploaded_cases.csv" df.to_csv(cases_file, index=False) if st.button( "Use This Dataset", type="primary", use_container_width=True ): st.session_state.cases_ready = True st.session_state.cases_path = str(cases_file) st.session_state.workflow_step = 2 st.rerun() except Exception as e: st.error(f"Error reading CSV: {e}") # STEP 2: Configuration elif st.session_state.workflow_step == 2: st.markdown("## Step 2: Configuration") st.markdown("Configure simulation parameters and scheduling policy") st.info(f"Cases loaded from: {st.session_state.cases_path}") col1, col2 = st.columns(2) with col1: st.markdown("### Simulation Parameters") days = st.number_input( "Simulation days", min_value=30, max_value=1000, value=384, help="Number of working days to simulate (384 = ~2 years)", ) courtrooms = st.number_input( "Number of courtrooms", min_value=1, max_value=20, value=5, help="Number of courtrooms to simulate", ) daily_capacity = st.number_input( "Daily capacity per courtroom", min_value=10, max_value=300, value=151, help="Maximum hearings per courtroom per day (median from historical data: 151)", ) start_date_sim = st.date_input( "Simulation start date", value=date.today(), help="Start date for simulation (leave default to use last filing date)", ) seed_sim = st.number_input( "Random seed", min_value=0, max_value=9999, value=42, help="Seed for reproducibility", ) log_dir = st.text_input( "Output directory", value=str(get_runs_base()), help="Directory to save simulation outputs (override with DASHBOARD_RUNS_BASE env var)", ) with col2: st.markdown("### Scheduling Policy") policy = st.selectbox( "Policy", ["readiness", "fifo", "age"], index=0, help="readiness: score-based | fifo: first-in-first-out | age: oldest first", ) if policy == "readiness": st.markdown("**Readiness Policy Parameters:**") fairness_weight = st.slider( "Fairness weight", min_value=0.0, max_value=1.0, value=0.4, step=0.05, help="Weight for fairness (age-based priority)", ) efficiency_weight = st.slider( "Efficiency weight", min_value=0.0, max_value=1.0, value=0.3, step=0.05, help="Weight for efficiency (stage readiness)", ) urgency_weight = st.slider( "Urgency weight", min_value=0.0, max_value=1.0, value=0.3, step=0.05, help="Weight for urgency (priority cases)", ) total = fairness_weight + efficiency_weight + urgency_weight if abs(total - 1.0) > 0.01: st.warning(f"Weights sum to {total:.2f}, should sum to 1.0") st.markdown("---") st.markdown("**Advanced Options:**") duration_percentile = st.selectbox( "Duration estimation", ["median", "mean", "p75"], index=0, help="How to estimate hearing durations", ) # Store configuration st.session_state.sim_config = { "cases": st.session_state.cases_path, "days": days, "start": start_date_sim.isoformat() if start_date_sim else None, "policy": policy, "seed": seed_sim, "log_dir": log_dir, "duration_percentile": duration_percentile, } if policy == "readiness": st.session_state.sim_config["fairness_weight"] = fairness_weight st.session_state.sim_config["efficiency_weight"] = efficiency_weight st.session_state.sim_config["urgency_weight"] = urgency_weight st.markdown("---") col1, col2 = st.columns([1, 3]) with col1: if st.button("← Back", use_container_width=True): st.session_state.workflow_step = 1 st.rerun() with col2: if st.button( "Next: Run Simulation ->", type="primary", use_container_width=True ): st.session_state.workflow_step = 3 st.rerun() # STEP 3: Run Simulation elif st.session_state.workflow_step == 3: st.markdown("## Step 3: Run Simulation") config = st.session_state.sim_config st.markdown("### Configuration Summary") col1, col2 = st.columns(2) with col1: st.markdown(f""" - **Cases:** {config["cases"]} - **Simulation days:** {config["days"]} - **Policy:** {config["policy"]} """) with col2: st.markdown(f""" - **Random seed:** {config["seed"]} - **Output:** {config["log_dir"]} """) st.markdown("---") if st.button("Start Simulation", type="primary", use_container_width=True): with st.spinner("Running simulation... This may take several minutes."): try: from cli.config import load_simulate_config from src.dashboard.utils.simulation_runner import ( merge_simulation_config, run_simulation_dashboard, ) DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml") if DEFAULT_SIM_CFG_PATH.exists(): default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH) else: default_cfg = ( load_simulate_config(Path("parameter_sweep.toml")) if Path("parameter_sweep.toml").exists() else None ) if default_cfg is None: st.error("No default simulate config found.") st.stop() merged_cfg = merge_simulation_config( default_cfg, cases_path=config["cases"], days=config["days"], start_date=date.fromisoformat(config["start"]) if config.get("start") else None, policy=config["policy"], seed=config["seed"], log_dir=config["log_dir"], ) ts = datetime.now().strftime("%Y%m%d_%H%M%S") base_out_dir = Path(config["log_dir"]) run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}" run_dir.mkdir(parents=True, exist_ok=True) # Update session config st.session_state.sim_config["log_dir"] = str(run_dir) result = run_simulation_dashboard(merged_cfg, run_dir) st.success("Simulation completed successfully!") st.session_state.sim_results = { "success": True, "output": result["summary"], "insights": result.get("insights"), "log_dir": str(run_dir), "completed_at": datetime.now().isoformat(), } events_path = result["events_path"] if events_path.exists(): generator = CauseListGenerator(events_path) compiled_path = generator.generate_daily_lists(run_dir) summary_path = run_dir / "daily_summaries.csv" st.session_state.sim_results["cause_lists"] = { "compiled": str(compiled_path), "summary": str(summary_path), } st.session_state.workflow_step = 4 st.rerun() except Exception as e: st.error(f"Error running simulation: {e}") st.session_state.sim_results = { "success": False, "error": str(e), } st.markdown("---") if st.button("← Back to Configuration", use_container_width=True): st.session_state.workflow_step = 2 st.rerun() # STEP 4: Results elif st.session_state.workflow_step == 4: st.markdown("## Step 4: Results") results = st.session_state.sim_results if not results or not results.get("success"): st.error("Simulation did not complete successfully") if results and results.get("error"): with st.expander("Error details"): st.code(results["error"], language="text") if st.button("← Back to Run", use_container_width=True): st.session_state.workflow_step = 3 st.rerun() else: st.success(f"Simulation completed at {results['completed_at']}") # Display console output with st.expander("View simulation output"): st.code(results["output"], language="text") # Key Insights from engine (if available) insights_text = results.get("insights") if insights_text: st.markdown("### Key Insights") with st.expander("Show engine insights", expanded=True): st.code(insights_text, language="text") # Check for generated files log_dir = Path(results["log_dir"]) if log_dir.exists(): st.markdown("### Generated Files") files = list(log_dir.glob("*")) if files: st.markdown(f"**{len(files)} files generated in {log_dir}**") for file in files: col1, col2 = st.columns([3, 1]) with col1: st.markdown( f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)" ) with col2: if file.suffix in [".csv", ".txt"]: with open(file, "rb") as f: st.download_button( label="Download", data=f.read(), file_name=file.name, mime="text/csv" if file.suffix == ".csv" else "text/plain", key=f"download_{file.name}", ) # Try to load and display metrics metrics_file = log_dir / "metrics.csv" if metrics_file.exists(): st.markdown("---") st.markdown("### Metrics Over Time") try: metrics_df = pd.read_csv(metrics_file) if not metrics_df.empty: # Plot disposal rate over time if "disposal_rate" in metrics_df.columns: fig = px.line( metrics_df, x=metrics_df.index, y="disposal_rate", title="Disposal Rate Over Time", labels={ "x": "Day", "disposal_rate": "Disposal Rate", }, ) st.plotly_chart(fig, use_container_width=True) # Plot utilization if available if "utilization" in metrics_df.columns: fig = px.line( metrics_df, x=metrics_df.index, y="utilization", title="Courtroom Utilization Over Time", labels={"x": "Day", "utilization": "Utilization"}, ) st.plotly_chart(fig, use_container_width=True) # Show summary statistics st.markdown("### Summary Statistics") st.dataframe( metrics_df.describe(), use_container_width=True ) except Exception as e: st.warning(f"Could not load metrics: {e}") else: st.info("No output files found") else: st.warning(f"Output directory not found: {log_dir}") st.markdown("---") # Daily Cause Lists Section st.markdown("### Daily Cause Lists") cause_info = (results or {}).get("cause_lists") def _render_download(label: str, file_path: Path, mime: str = "text/csv"): try: with file_path.open("rb") as f: st.download_button( label=label, data=f.read(), file_name=file_path.name, mime=mime, key=f"dl_{file_path.name}", ) except Exception as e: st.warning(f"Unable to read {file_path.name}: {e}") if cause_info: compiled_path = Path(cause_info.get("compiled", "")) summary_path = Path(cause_info.get("summary", "")) if compiled_path.exists(): st.success(f"Compiled cause list ready: {compiled_path}") _render_download("Download compiled_cause_list.csv", compiled_path) try: df_preview = pd.read_csv(compiled_path, nrows=200) st.dataframe(df_preview.head(50), use_container_width=True) except Exception as e: st.warning(f"Preview unavailable: {e}") if summary_path.exists(): _render_download("Download daily_summaries.csv", summary_path) else: # Offer on-demand generation if not already created events_csv = ( (Path(results["log_dir"]) / "events.csv") if results and results.get("log_dir") else None ) if events_csv and events_csv.exists(): if st.button( "Generate Daily Cause Lists Now", use_container_width=False ): try: # Save directly alongside events.csv (run directory root) out_dir = events_csv.parent generator = CauseListGenerator(events_csv) compiled_path = generator.generate_daily_lists(out_dir) summary_path = out_dir / "daily_summaries.csv" st.session_state.sim_results["cause_lists"] = { "compiled": str(compiled_path), "summary": str(summary_path), } st.success(f"Daily cause lists generated in {out_dir}") st.rerun() except Exception as e: st.error(f"Failed to generate cause lists: {e}") else: st.info( "events.csv not found; run a simulation first to enable cause list generation." ) col1, col2 = st.columns(2) with col1: if st.button("Run New Simulation", use_container_width=True): # Reset workflow st.session_state.workflow_step = 1 st.session_state.cases_ready = False st.session_state.sim_results = None st.rerun() with col2: if st.button("Modify Configuration", use_container_width=True): st.session_state.workflow_step = 2 st.session_state.sim_results = None st.rerun() # Footer st.markdown("---") st.caption("Simulation Workflow - Configure and run scheduling simulations")