hackathon_code4change / src /dashboard /pages /3_Simulation_Workflow.py
RoyAalekh's picture
enhancements, added view for scehduled cases as tickets
9eaac57
"""Simulation Workflow page - End-to-end scheduling simulation.
Multi-step workflow:
1. Data Preparation - Generate or upload cases
2. Configuration - Set simulation parameters and policy
3. Run Simulation - Execute simulation with progress tracking
4. Results - View metrics, charts, and download outputs
"""
from __future__ import annotations
from datetime import date, datetime
from pathlib import Path
import pandas as pd
import plotly.express as px
import streamlit as st
from src.output.cause_list import CauseListGenerator
from src.config.paths import get_runs_base
CLI_VERSION = "1.0.0"
# Page configuration
st.set_page_config(
page_title="Simulation Workflow",
page_icon="gear",
layout="wide",
)
st.title("Simulation Workflow")
st.markdown("Run scheduling simulations with configurable parameters")
# Initialize session state for workflow
if "workflow_step" not in st.session_state:
st.session_state.workflow_step = 1
if "cases_ready" not in st.session_state:
st.session_state.cases_ready = False
if "sim_config" not in st.session_state:
st.session_state.sim_config = {}
if "sim_results" not in st.session_state:
st.session_state.sim_results = None
if "cases_path" not in st.session_state:
st.session_state.cases_path = None
# Progress indicator
st.markdown("### Workflow Progress")
col1, col2, col3, col4 = st.columns(4)
with col1:
status = (
"[DONE]"
if st.session_state.workflow_step > 1
else ("[NOW]" if st.session_state.workflow_step == 1 else "[ ]")
)
st.markdown(f"**{status} 1. Data Preparation**")
with col2:
status = (
"[DONE]"
if st.session_state.workflow_step > 2
else ("[NOW]" if st.session_state.workflow_step == 2 else "[ ]")
)
st.markdown(f"**{status} 2. Configuration**")
with col3:
status = (
"[DONE]"
if st.session_state.workflow_step > 3
else ("[NOW]" if st.session_state.workflow_step == 3 else "[ ]")
)
st.markdown(f"**{status} 3. Run Simulation**")
with col4:
status = (
"[DONE]"
if st.session_state.workflow_step == 4
else ("[NOW]" if st.session_state.workflow_step == 4 else "[ ]")
)
st.markdown(f"**{status} 4. View Results**")
st.markdown("---")
# STEP 1: Data Preparation
if st.session_state.workflow_step == 1:
st.markdown("## Step 1: Data Preparation")
st.markdown("Choose how to provide case data for simulation")
data_source = st.radio(
"Data Source",
["Generate Synthetic Cases", "Upload Case CSV"],
help="Generate synthetic cases based on parameters, or upload your own dataset",
)
if data_source == "Generate Synthetic Cases":
st.markdown("### Generate Synthetic Cases")
col1, col2 = st.columns(2)
with col1:
n_cases = st.number_input(
"Number of cases",
min_value=100,
max_value=100000,
value=10000,
step=100,
help="Number of cases to generate",
)
start_date = st.date_input(
"Filing period start",
value=date(2022, 1, 1),
help="Start date for case filings",
)
end_date = st.date_input(
"Filing period end",
value=date(2023, 12, 31),
help="End date for case filings",
)
with col2:
seed = st.number_input(
"Random seed",
min_value=0,
max_value=9999,
value=42,
help="Seed for reproducibility",
)
output_dir = st.text_input(
"Output directory",
value="data/generated",
help="Directory to save generated cases",
)
st.info(f"Cases will be saved to: {output_dir}/cases.csv")
# Advanced: Case Type Distribution
with st.expander("Advanced: Case Type Distribution", expanded=False):
st.markdown(
"""Customize the distribution of case types. Leave default for realistic distribution based on historical data."""
)
use_custom_dist = st.checkbox("Use custom distribution", value=False)
if use_custom_dist:
st.warning("Custom distribution: Percentages must sum to 100%")
col_a, col_b, col_c = st.columns(3)
with col_a:
rsa_pct = st.number_input(
"RSA %", 0, 100, 20, help="Regular Second Appeal"
)
rfa_pct = st.number_input(
"RFA %", 0, 100, 17, help="Regular First Appeal"
)
crp_pct = st.number_input(
"CRP %", 0, 100, 20, help="Civil Revision Petition"
)
with col_b:
ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
ccc_pct = st.number_input(
"CCC %", 0, 100, 11, help="Civil Contempt"
)
cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")
with col_c:
cmp_pct = st.number_input(
"CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
)
total_pct = (
rsa_pct
+ rfa_pct
+ crp_pct
+ ca_pct
+ ccc_pct
+ cp_pct
+ cmp_pct
)
if total_pct != 100:
st.error(f"Total: {total_pct}% (must be 100%)")
else:
st.success(f"Total: {total_pct}%")
else:
st.info("Using default distribution from historical data")
from src.dashboard.utils.ui_input_parser import (
build_case_type_distribution,
merge_with_default_config,
)
case_type_dist_dict = None
if use_custom_dist:
case_type_dist_dict = build_case_type_distribution(
rsa_pct,
rfa_pct,
crp_pct,
ca_pct,
ccc_pct,
cp_pct,
cmp_pct,
)
if st.button("Generate Cases", type="primary", use_container_width=True):
with st.spinner(f"Generating {n_cases:,} cases..."):
try:
from cli.config import load_generate_config
from src.data.case_generator import CaseGenerator
DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml")
config_from_file = None
if DEFAULT_GENERATE_CFG_PATH.exists():
config_from_file = load_generate_config(
DEFAULT_GENERATE_CFG_PATH
)
cfg = merge_with_default_config(
config_from_file,
n_cases=n_cases,
start_date=start_date,
end_date=end_date,
output_dir=output_dir,
seed=seed,
)
# Prepare output dir
cfg.output.parent.mkdir(parents=True, exist_ok=True)
case_type_dist_dict = None
if use_custom_dist:
from src.dashboard.utils.ui_input_parser import (
build_case_type_distribution,
)
case_type_dist_dict = build_case_type_distribution(
rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct
)
gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed)
cases = gen.generate(
cfg.n_cases,
stage_mix_auto=True,
case_type_distribution=case_type_dist_dict,
)
# Save files
CaseGenerator.to_csv(cases, cfg.output)
hearings_path = cfg.output.parent / "hearings.csv"
CaseGenerator.to_hearings_csv(cases, hearings_path)
st.success(f"Generated {len(cases):,} cases successfully!")
st.session_state.cases_ready = True
st.session_state.cases_path = str(cfg.output)
st.session_state.workflow_step = 2
st.rerun()
except Exception as e:
st.error(f"Error generating cases: {e}")
else: # Upload CSV
st.markdown("### Upload Case CSV")
st.markdown("""
Upload a CSV file with case data. Required columns:
- `case_id`: Unique case identifier
- `case_type`: Type of case (RSA, RFA, etc.)
- `filed_date`: Date case was filed (YYYY-MM-DD)
- `stage`: Current stage (or `current_stage` — will be accepted and mapped to `stage`)
- Additional columns will be preserved
""")
uploaded_file = st.file_uploader(
"Choose a CSV file", type=["csv"], help="Upload CSV with case data"
)
if uploaded_file is not None:
try:
# Read and validate
df = pd.read_csv(uploaded_file)
# If the uploaded file uses `current_stage`, map it to `stage` for compatibility
if "stage" not in df.columns and "current_stage" in df.columns:
# Preserve original `current_stage` column and add `stage`
df["stage"] = df["current_stage"]
# Check required columns
required_cols = ["case_id", "case_type", "filed_date", "stage"]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
st.error(f"Missing required columns: {', '.join(missing_cols)}")
else:
st.success(f"Valid CSV uploaded with {len(df):,} cases")
# Show preview
st.markdown("**Preview:**")
st.dataframe(df.head(10), use_container_width=True)
# Save to temporary location
temp_path = Path("data/generated")
temp_path.mkdir(parents=True, exist_ok=True)
cases_file = temp_path / "uploaded_cases.csv"
df.to_csv(cases_file, index=False)
if st.button(
"Use This Dataset", type="primary", use_container_width=True
):
st.session_state.cases_ready = True
st.session_state.cases_path = str(cases_file)
st.session_state.workflow_step = 2
st.rerun()
except Exception as e:
st.error(f"Error reading CSV: {e}")
# STEP 2: Configuration
elif st.session_state.workflow_step == 2:
st.markdown("## Step 2: Configuration")
st.markdown("Configure simulation parameters and scheduling policy")
st.info(f"Cases loaded from: {st.session_state.cases_path}")
col1, col2 = st.columns(2)
with col1:
st.markdown("### Simulation Parameters")
days = st.number_input(
"Simulation days",
min_value=30,
max_value=1000,
value=384,
help="Number of working days to simulate (384 = ~2 years)",
)
courtrooms = st.number_input(
"Number of courtrooms",
min_value=1,
max_value=20,
value=5,
help="Number of courtrooms to simulate",
)
daily_capacity = st.number_input(
"Daily capacity per courtroom",
min_value=10,
max_value=300,
value=151,
help="Maximum hearings per courtroom per day (median from historical data: 151)",
)
start_date_sim = st.date_input(
"Simulation start date",
value=date.today(),
help="Start date for simulation (leave default to use last filing date)",
)
seed_sim = st.number_input(
"Random seed",
min_value=0,
max_value=9999,
value=42,
help="Seed for reproducibility",
)
log_dir = st.text_input(
"Output directory",
value=str(get_runs_base()),
help="Directory to save simulation outputs (override with DASHBOARD_RUNS_BASE env var)",
)
with col2:
st.markdown("### Scheduling Policy")
policy = st.selectbox(
"Policy",
["readiness", "fifo", "age"],
index=0,
help="readiness: score-based | fifo: first-in-first-out | age: oldest first",
)
if policy == "readiness":
st.markdown("**Readiness Policy Parameters:**")
fairness_weight = st.slider(
"Fairness weight",
min_value=0.0,
max_value=1.0,
value=0.4,
step=0.05,
help="Weight for fairness (age-based priority)",
)
efficiency_weight = st.slider(
"Efficiency weight",
min_value=0.0,
max_value=1.0,
value=0.3,
step=0.05,
help="Weight for efficiency (stage readiness)",
)
urgency_weight = st.slider(
"Urgency weight",
min_value=0.0,
max_value=1.0,
value=0.3,
step=0.05,
help="Weight for urgency (priority cases)",
)
total = fairness_weight + efficiency_weight + urgency_weight
if abs(total - 1.0) > 0.01:
st.warning(f"Weights sum to {total:.2f}, should sum to 1.0")
st.markdown("---")
st.markdown("**Advanced Options:**")
duration_percentile = st.selectbox(
"Duration estimation",
["median", "mean", "p75"],
index=0,
help="How to estimate hearing durations",
)
# Store configuration
st.session_state.sim_config = {
"cases": st.session_state.cases_path,
"days": days,
"start": start_date_sim.isoformat() if start_date_sim else None,
"policy": policy,
"seed": seed_sim,
"log_dir": log_dir,
"duration_percentile": duration_percentile,
}
if policy == "readiness":
st.session_state.sim_config["fairness_weight"] = fairness_weight
st.session_state.sim_config["efficiency_weight"] = efficiency_weight
st.session_state.sim_config["urgency_weight"] = urgency_weight
st.markdown("---")
col1, col2 = st.columns([1, 3])
with col1:
if st.button("← Back", use_container_width=True):
st.session_state.workflow_step = 1
st.rerun()
with col2:
if st.button(
"Next: Run Simulation ->", type="primary", use_container_width=True
):
st.session_state.workflow_step = 3
st.rerun()
# STEP 3: Run Simulation
elif st.session_state.workflow_step == 3:
st.markdown("## Step 3: Run Simulation")
config = st.session_state.sim_config
st.markdown("### Configuration Summary")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"""
- **Cases:** {config["cases"]}
- **Simulation days:** {config["days"]}
- **Policy:** {config["policy"]}
""")
with col2:
st.markdown(f"""
- **Random seed:** {config["seed"]}
- **Output:** {config["log_dir"]}
""")
st.markdown("---")
if st.button("Start Simulation", type="primary", use_container_width=True):
with st.spinner("Running simulation... This may take several minutes."):
try:
from cli.config import load_simulate_config
from src.dashboard.utils.simulation_runner import (
merge_simulation_config,
run_simulation_dashboard,
)
DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml")
if DEFAULT_SIM_CFG_PATH.exists():
default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH)
else:
default_cfg = (
load_simulate_config(Path("parameter_sweep.toml"))
if Path("parameter_sweep.toml").exists()
else None
)
if default_cfg is None:
st.error("No default simulate config found.")
st.stop()
merged_cfg = merge_simulation_config(
default_cfg,
cases_path=config["cases"],
days=config["days"],
start_date=date.fromisoformat(config["start"])
if config.get("start")
else None,
policy=config["policy"],
seed=config["seed"],
log_dir=config["log_dir"],
)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
base_out_dir = Path(config["log_dir"])
run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
run_dir.mkdir(parents=True, exist_ok=True)
# Update session config
st.session_state.sim_config["log_dir"] = str(run_dir)
result = run_simulation_dashboard(merged_cfg, run_dir)
st.success("Simulation completed successfully!")
st.session_state.sim_results = {
"success": True,
"output": result["summary"],
"insights": result.get("insights"),
"log_dir": str(run_dir),
"completed_at": datetime.now().isoformat(),
}
events_path = result["events_path"]
if events_path.exists():
generator = CauseListGenerator(events_path)
compiled_path = generator.generate_daily_lists(run_dir)
summary_path = run_dir / "daily_summaries.csv"
st.session_state.sim_results["cause_lists"] = {
"compiled": str(compiled_path),
"summary": str(summary_path),
}
st.session_state.workflow_step = 4
st.rerun()
except Exception as e:
st.error(f"Error running simulation: {e}")
st.session_state.sim_results = {
"success": False,
"error": str(e),
}
st.markdown("---")
if st.button("← Back to Configuration", use_container_width=True):
st.session_state.workflow_step = 2
st.rerun()
# STEP 4: Results
elif st.session_state.workflow_step == 4:
st.markdown("## Step 4: Results")
results = st.session_state.sim_results
if not results or not results.get("success"):
st.error("Simulation did not complete successfully")
if results and results.get("error"):
with st.expander("Error details"):
st.code(results["error"], language="text")
if st.button("← Back to Run", use_container_width=True):
st.session_state.workflow_step = 3
st.rerun()
else:
st.success(f"Simulation completed at {results['completed_at']}")
# Display console output
with st.expander("View simulation output"):
st.code(results["output"], language="text")
# Key Insights from engine (if available)
insights_text = results.get("insights")
if insights_text:
st.markdown("### Key Insights")
with st.expander("Show engine insights", expanded=True):
st.code(insights_text, language="text")
# Check for generated files
log_dir = Path(results["log_dir"])
if log_dir.exists():
st.markdown("### Generated Files")
files = list(log_dir.glob("*"))
if files:
st.markdown(f"**{len(files)} files generated in {log_dir}**")
for file in files:
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(
f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)"
)
with col2:
if file.suffix in [".csv", ".txt"]:
with open(file, "rb") as f:
st.download_button(
label="Download",
data=f.read(),
file_name=file.name,
mime="text/csv"
if file.suffix == ".csv"
else "text/plain",
key=f"download_{file.name}",
)
# Try to load and display metrics
metrics_file = log_dir / "metrics.csv"
if metrics_file.exists():
st.markdown("---")
st.markdown("### Metrics Over Time")
try:
metrics_df = pd.read_csv(metrics_file)
if not metrics_df.empty:
# Plot disposal rate over time
if "disposal_rate" in metrics_df.columns:
fig = px.line(
metrics_df,
x=metrics_df.index,
y="disposal_rate",
title="Disposal Rate Over Time",
labels={
"x": "Day",
"disposal_rate": "Disposal Rate",
},
)
st.plotly_chart(fig, use_container_width=True)
# Plot utilization if available
if "utilization" in metrics_df.columns:
fig = px.line(
metrics_df,
x=metrics_df.index,
y="utilization",
title="Courtroom Utilization Over Time",
labels={"x": "Day", "utilization": "Utilization"},
)
st.plotly_chart(fig, use_container_width=True)
# Show summary statistics
st.markdown("### Summary Statistics")
st.dataframe(
metrics_df.describe(), use_container_width=True
)
except Exception as e:
st.warning(f"Could not load metrics: {e}")
else:
st.info("No output files found")
else:
st.warning(f"Output directory not found: {log_dir}")
st.markdown("---")
# Daily Cause Lists Section
st.markdown("### Daily Cause Lists")
cause_info = (results or {}).get("cause_lists")
def _render_download(label: str, file_path: Path, mime: str = "text/csv"):
try:
with file_path.open("rb") as f:
st.download_button(
label=label,
data=f.read(),
file_name=file_path.name,
mime=mime,
key=f"dl_{file_path.name}",
)
except Exception as e:
st.warning(f"Unable to read {file_path.name}: {e}")
if cause_info:
compiled_path = Path(cause_info.get("compiled", ""))
summary_path = Path(cause_info.get("summary", ""))
if compiled_path.exists():
st.success(f"Compiled cause list ready: {compiled_path}")
_render_download("Download compiled_cause_list.csv", compiled_path)
try:
df_preview = pd.read_csv(compiled_path, nrows=200)
st.dataframe(df_preview.head(50), use_container_width=True)
except Exception as e:
st.warning(f"Preview unavailable: {e}")
if summary_path.exists():
_render_download("Download daily_summaries.csv", summary_path)
else:
# Offer on-demand generation if not already created
events_csv = (
(Path(results["log_dir"]) / "events.csv")
if results and results.get("log_dir")
else None
)
if events_csv and events_csv.exists():
if st.button(
"Generate Daily Cause Lists Now", use_container_width=False
):
try:
# Save directly alongside events.csv (run directory root)
out_dir = events_csv.parent
generator = CauseListGenerator(events_csv)
compiled_path = generator.generate_daily_lists(out_dir)
summary_path = out_dir / "daily_summaries.csv"
st.session_state.sim_results["cause_lists"] = {
"compiled": str(compiled_path),
"summary": str(summary_path),
}
st.success(f"Daily cause lists generated in {out_dir}")
st.rerun()
except Exception as e:
st.error(f"Failed to generate cause lists: {e}")
else:
st.info(
"events.csv not found; run a simulation first to enable cause list generation."
)
col1, col2 = st.columns(2)
with col1:
if st.button("Run New Simulation", use_container_width=True):
# Reset workflow
st.session_state.workflow_step = 1
st.session_state.cases_ready = False
st.session_state.sim_results = None
st.rerun()
with col2:
if st.button("Modify Configuration", use_container_width=True):
st.session_state.workflow_step = 2
st.session_state.sim_results = None
st.rerun()
# Footer
st.markdown("---")
st.caption("Simulation Workflow - Configure and run scheduling simulations")