Spaces:

RoyAalekh
/

hackathon_code4change

Sleeping

App Files Files Community

hackathon_code4change / src /dashboard /pages /3_Simulation_Workflow.py

RoyAalekh

enhancements, added view for scehduled cases as tickets

9eaac57 about 2 months ago

raw

history blame contribute delete

27.5 kB

	"""Simulation Workflow page - End-to-end scheduling simulation.

	Multi-step workflow:
	1. Data Preparation - Generate or upload cases
	2. Configuration - Set simulation parameters and policy
	3. Run Simulation - Execute simulation with progress tracking
	4. Results - View metrics, charts, and download outputs
	"""

	from __future__ import annotations

	from datetime import date, datetime
	from pathlib import Path

	import pandas as pd
	import plotly.express as px
	import streamlit as st

	from src.output.cause_list import CauseListGenerator
	from src.config.paths import get_runs_base

	CLI_VERSION = "1.0.0"
	# Page configuration
	st.set_page_config(
	page_title="Simulation Workflow",
	page_icon="gear",
	layout="wide",
	)

	st.title("Simulation Workflow")
	st.markdown("Run scheduling simulations with configurable parameters")

	# Initialize session state for workflow
	if "workflow_step" not in st.session_state:
	st.session_state.workflow_step = 1
	if "cases_ready" not in st.session_state:
	st.session_state.cases_ready = False
	if "sim_config" not in st.session_state:
	st.session_state.sim_config = {}
	if "sim_results" not in st.session_state:
	st.session_state.sim_results = None
	if "cases_path" not in st.session_state:
	st.session_state.cases_path = None

	# Progress indicator
	st.markdown("### Workflow Progress")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	status = (
	"[DONE]"
	if st.session_state.workflow_step > 1
	else ("[NOW]" if st.session_state.workflow_step == 1 else "[ ]")
	)
	st.markdown(f"{status} 1. Data Preparation")

	with col2:
	status = (
	"[DONE]"
	if st.session_state.workflow_step > 2
	else ("[NOW]" if st.session_state.workflow_step == 2 else "[ ]")
	)
	st.markdown(f"{status} 2. Configuration")

	with col3:
	status = (
	"[DONE]"
	if st.session_state.workflow_step > 3
	else ("[NOW]" if st.session_state.workflow_step == 3 else "[ ]")
	)
	st.markdown(f"{status} 3. Run Simulation")

	with col4:
	status = (
	"[DONE]"
	if st.session_state.workflow_step == 4
	else ("[NOW]" if st.session_state.workflow_step == 4 else "[ ]")
	)
	st.markdown(f"{status} 4. View Results")

	st.markdown("---")

	# STEP 1: Data Preparation
	if st.session_state.workflow_step == 1:
	st.markdown("## Step 1: Data Preparation")
	st.markdown("Choose how to provide case data for simulation")

	data_source = st.radio(
	"Data Source",
	["Generate Synthetic Cases", "Upload Case CSV"],
	help="Generate synthetic cases based on parameters, or upload your own dataset",
	)

	if data_source == "Generate Synthetic Cases":
	st.markdown("### Generate Synthetic Cases")

	col1, col2 = st.columns(2)

	with col1:
	n_cases = st.number_input(
	"Number of cases",
	min_value=100,
	max_value=100000,
	value=10000,
	step=100,
	help="Number of cases to generate",
	)

	start_date = st.date_input(
	"Filing period start",
	value=date(2022, 1, 1),
	help="Start date for case filings",
	)

	end_date = st.date_input(
	"Filing period end",
	value=date(2023, 12, 31),
	help="End date for case filings",
	)

	with col2:
	seed = st.number_input(
	"Random seed",
	min_value=0,
	max_value=9999,
	value=42,
	help="Seed for reproducibility",
	)

	output_dir = st.text_input(
	"Output directory",
	value="data/generated",
	help="Directory to save generated cases",
	)

	st.info(f"Cases will be saved to: {output_dir}/cases.csv")

	# Advanced: Case Type Distribution
	with st.expander("Advanced: Case Type Distribution", expanded=False):
	st.markdown(
	"""Customize the distribution of case types. Leave default for realistic distribution based on historical data."""
	)

	use_custom_dist = st.checkbox("Use custom distribution", value=False)

	if use_custom_dist:
	st.warning("Custom distribution: Percentages must sum to 100%")
	col_a, col_b, col_c = st.columns(3)

	with col_a:
	rsa_pct = st.number_input(
	"RSA %", 0, 100, 20, help="Regular Second Appeal"
	)
	rfa_pct = st.number_input(
	"RFA %", 0, 100, 17, help="Regular First Appeal"
	)
	crp_pct = st.number_input(
	"CRP %", 0, 100, 20, help="Civil Revision Petition"
	)

	with col_b:
	ca_pct = st.number_input("CA %", 0, 100, 20, help="Civil Appeal")
	ccc_pct = st.number_input(
	"CCC %", 0, 100, 11, help="Civil Contempt"
	)
	cp_pct = st.number_input("CP %", 0, 100, 9, help="Civil Petition")

	with col_c:
	cmp_pct = st.number_input(
	"CMP %", 0, 100, 3, help="Civil Miscellaneous Petition"
	)

	total_pct = (
	rsa_pct
	+ rfa_pct
	+ crp_pct
	+ ca_pct
	+ ccc_pct
	+ cp_pct
	+ cmp_pct
	)
	if total_pct != 100:
	st.error(f"Total: {total_pct}% (must be 100%)")
	else:
	st.success(f"Total: {total_pct}%")
	else:
	st.info("Using default distribution from historical data")
	from src.dashboard.utils.ui_input_parser import (
	build_case_type_distribution,
	merge_with_default_config,
	)

	case_type_dist_dict = None
	if use_custom_dist:
	case_type_dist_dict = build_case_type_distribution(
	rsa_pct,
	rfa_pct,
	crp_pct,
	ca_pct,
	ccc_pct,
	cp_pct,
	cmp_pct,
	)

	if st.button("Generate Cases", type="primary", use_container_width=True):
	with st.spinner(f"Generating {n_cases:,} cases..."):
	try:
	from cli.config import load_generate_config
	from src.data.case_generator import CaseGenerator

	DEFAULT_GENERATE_CFG_PATH = Path("configs/generate.sample.toml")
	config_from_file = None

	if DEFAULT_GENERATE_CFG_PATH.exists():
	config_from_file = load_generate_config(
	DEFAULT_GENERATE_CFG_PATH
	)
	cfg = merge_with_default_config(
	config_from_file,
	n_cases=n_cases,
	start_date=start_date,
	end_date=end_date,
	output_dir=output_dir,
	seed=seed,
	)

	# Prepare output dir
	cfg.output.parent.mkdir(parents=True, exist_ok=True)

	case_type_dist_dict = None
	if use_custom_dist:
	from src.dashboard.utils.ui_input_parser import (
	build_case_type_distribution,
	)

	case_type_dist_dict = build_case_type_distribution(
	rsa_pct, rfa_pct, crp_pct, ca_pct, ccc_pct, cp_pct, cmp_pct
	)

	gen = CaseGenerator(start=cfg.start, end=cfg.end, seed=cfg.seed)

	cases = gen.generate(
	cfg.n_cases,
	stage_mix_auto=True,
	case_type_distribution=case_type_dist_dict,
	)

	# Save files
	CaseGenerator.to_csv(cases, cfg.output)
	hearings_path = cfg.output.parent / "hearings.csv"
	CaseGenerator.to_hearings_csv(cases, hearings_path)

	st.success(f"Generated {len(cases):,} cases successfully!")
	st.session_state.cases_ready = True
	st.session_state.cases_path = str(cfg.output)
	st.session_state.workflow_step = 2
	st.rerun()

	except Exception as e:
	st.error(f"Error generating cases: {e}")

	else: # Upload CSV
	st.markdown("### Upload Case CSV")

	st.markdown("""
	Upload a CSV file with case data. Required columns:
	- `case_id`: Unique case identifier
	- `case_type`: Type of case (RSA, RFA, etc.)
	- `filed_date`: Date case was filed (YYYY-MM-DD)
	- `stage`: Current stage (or `current_stage` — will be accepted and mapped to `stage`)
	- Additional columns will be preserved
	""")

	uploaded_file = st.file_uploader(
	"Choose a CSV file", type=["csv"], help="Upload CSV with case data"
	)

	if uploaded_file is not None:
	try:
	# Read and validate
	df = pd.read_csv(uploaded_file)

	# If the uploaded file uses `current_stage`, map it to `stage` for compatibility
	if "stage" not in df.columns and "current_stage" in df.columns:
	# Preserve original `current_stage` column and add `stage`
	df["stage"] = df["current_stage"]

	# Check required columns
	required_cols = ["case_id", "case_type", "filed_date", "stage"]
	missing_cols = [col for col in required_cols if col not in df.columns]

	if missing_cols:
	st.error(f"Missing required columns: {', '.join(missing_cols)}")
	else:
	st.success(f"Valid CSV uploaded with {len(df):,} cases")

	# Show preview
	st.markdown("Preview:")
	st.dataframe(df.head(10), use_container_width=True)

	# Save to temporary location
	temp_path = Path("data/generated")
	temp_path.mkdir(parents=True, exist_ok=True)
	cases_file = temp_path / "uploaded_cases.csv"
	df.to_csv(cases_file, index=False)

	if st.button(
	"Use This Dataset", type="primary", use_container_width=True
	):
	st.session_state.cases_ready = True
	st.session_state.cases_path = str(cases_file)
	st.session_state.workflow_step = 2
	st.rerun()

	except Exception as e:
	st.error(f"Error reading CSV: {e}")

	# STEP 2: Configuration
	elif st.session_state.workflow_step == 2:
	st.markdown("## Step 2: Configuration")
	st.markdown("Configure simulation parameters and scheduling policy")

	st.info(f"Cases loaded from: {st.session_state.cases_path}")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("### Simulation Parameters")

	days = st.number_input(
	"Simulation days",
	min_value=30,
	max_value=1000,
	value=384,
	help="Number of working days to simulate (384 = ~2 years)",
	)

	courtrooms = st.number_input(
	"Number of courtrooms",
	min_value=1,
	max_value=20,
	value=5,
	help="Number of courtrooms to simulate",
	)

	daily_capacity = st.number_input(
	"Daily capacity per courtroom",
	min_value=10,
	max_value=300,
	value=151,
	help="Maximum hearings per courtroom per day (median from historical data: 151)",
	)

	start_date_sim = st.date_input(
	"Simulation start date",
	value=date.today(),
	help="Start date for simulation (leave default to use last filing date)",
	)

	seed_sim = st.number_input(
	"Random seed",
	min_value=0,
	max_value=9999,
	value=42,
	help="Seed for reproducibility",
	)

	log_dir = st.text_input(
	"Output directory",
	value=str(get_runs_base()),
	help="Directory to save simulation outputs (override with DASHBOARD_RUNS_BASE env var)",
	)

	with col2:
	st.markdown("### Scheduling Policy")

	policy = st.selectbox(
	"Policy",
	["readiness", "fifo", "age"],
	index=0,
	help="readiness: score-based \| fifo: first-in-first-out \| age: oldest first",
	)

	if policy == "readiness":
	st.markdown("Readiness Policy Parameters:")

	fairness_weight = st.slider(
	"Fairness weight",
	min_value=0.0,
	max_value=1.0,
	value=0.4,
	step=0.05,
	help="Weight for fairness (age-based priority)",
	)

	efficiency_weight = st.slider(
	"Efficiency weight",
	min_value=0.0,
	max_value=1.0,
	value=0.3,
	step=0.05,
	help="Weight for efficiency (stage readiness)",
	)

	urgency_weight = st.slider(
	"Urgency weight",
	min_value=0.0,
	max_value=1.0,
	value=0.3,
	step=0.05,
	help="Weight for urgency (priority cases)",
	)

	total = fairness_weight + efficiency_weight + urgency_weight
	if abs(total - 1.0) > 0.01:
	st.warning(f"Weights sum to {total:.2f}, should sum to 1.0")

	st.markdown("---")
	st.markdown("Advanced Options:")

	duration_percentile = st.selectbox(
	"Duration estimation",
	["median", "mean", "p75"],
	index=0,
	help="How to estimate hearing durations",
	)

	# Store configuration
	st.session_state.sim_config = {
	"cases": st.session_state.cases_path,
	"days": days,
	"start": start_date_sim.isoformat() if start_date_sim else None,
	"policy": policy,
	"seed": seed_sim,
	"log_dir": log_dir,
	"duration_percentile": duration_percentile,
	}

	if policy == "readiness":
	st.session_state.sim_config["fairness_weight"] = fairness_weight
	st.session_state.sim_config["efficiency_weight"] = efficiency_weight
	st.session_state.sim_config["urgency_weight"] = urgency_weight

	st.markdown("---")

	col1, col2 = st.columns([1, 3])

	with col1:
	if st.button("← Back", use_container_width=True):
	st.session_state.workflow_step = 1
	st.rerun()

	with col2:
	if st.button(
	"Next: Run Simulation ->", type="primary", use_container_width=True
	):
	st.session_state.workflow_step = 3
	st.rerun()

	# STEP 3: Run Simulation
	elif st.session_state.workflow_step == 3:
	st.markdown("## Step 3: Run Simulation")

	config = st.session_state.sim_config

	st.markdown("### Configuration Summary")
	col1, col2 = st.columns(2)

	with col1:
	st.markdown(f"""
	- Cases: {config["cases"]}
	- Simulation days: {config["days"]}
	- Policy: {config["policy"]}
	""")

	with col2:
	st.markdown(f"""
	- Random seed: {config["seed"]}
	- Output: {config["log_dir"]}
	""")

	st.markdown("---")

	if st.button("Start Simulation", type="primary", use_container_width=True):
	with st.spinner("Running simulation... This may take several minutes."):
	try:
	from cli.config import load_simulate_config
	from src.dashboard.utils.simulation_runner import (
	merge_simulation_config,
	run_simulation_dashboard,
	)

	DEFAULT_SIM_CFG_PATH = Path("configs/simulate.sample.toml")
	if DEFAULT_SIM_CFG_PATH.exists():
	default_cfg = load_simulate_config(DEFAULT_SIM_CFG_PATH)
	else:
	default_cfg = (
	load_simulate_config(Path("parameter_sweep.toml"))
	if Path("parameter_sweep.toml").exists()
	else None
	)

	if default_cfg is None:
	st.error("No default simulate config found.")
	st.stop()

	merged_cfg = merge_simulation_config(
	default_cfg,
	cases_path=config["cases"],
	days=config["days"],
	start_date=date.fromisoformat(config["start"])
	if config.get("start")
	else None,
	policy=config["policy"],
	seed=config["seed"],
	log_dir=config["log_dir"],
	)

	ts = datetime.now().strftime("%Y%m%d_%H%M%S")
	base_out_dir = Path(config["log_dir"])
	run_dir = base_out_dir / f"v{CLI_VERSION}_{ts}"
	run_dir.mkdir(parents=True, exist_ok=True)

	# Update session config
	st.session_state.sim_config["log_dir"] = str(run_dir)

	result = run_simulation_dashboard(merged_cfg, run_dir)

	st.success("Simulation completed successfully!")

	st.session_state.sim_results = {
	"success": True,
	"output": result["summary"],
	"insights": result.get("insights"),
	"log_dir": str(run_dir),
	"completed_at": datetime.now().isoformat(),
	}

	events_path = result["events_path"]
	if events_path.exists():
	generator = CauseListGenerator(events_path)
	compiled_path = generator.generate_daily_lists(run_dir)
	summary_path = run_dir / "daily_summaries.csv"

	st.session_state.sim_results["cause_lists"] = {
	"compiled": str(compiled_path),
	"summary": str(summary_path),
	}

	st.session_state.workflow_step = 4
	st.rerun()

	except Exception as e:
	st.error(f"Error running simulation: {e}")
	st.session_state.sim_results = {
	"success": False,
	"error": str(e),
	}

	st.markdown("---")

	if st.button("← Back to Configuration", use_container_width=True):
	st.session_state.workflow_step = 2
	st.rerun()

	# STEP 4: Results
	elif st.session_state.workflow_step == 4:
	st.markdown("## Step 4: Results")

	results = st.session_state.sim_results

	if not results or not results.get("success"):
	st.error("Simulation did not complete successfully")
	if results and results.get("error"):
	with st.expander("Error details"):
	st.code(results["error"], language="text")

	if st.button("← Back to Run", use_container_width=True):
	st.session_state.workflow_step = 3
	st.rerun()
	else:
	st.success(f"Simulation completed at {results['completed_at']}")

	# Display console output
	with st.expander("View simulation output"):
	st.code(results["output"], language="text")

	# Key Insights from engine (if available)
	insights_text = results.get("insights")
	if insights_text:
	st.markdown("### Key Insights")
	with st.expander("Show engine insights", expanded=True):
	st.code(insights_text, language="text")

	# Check for generated files
	log_dir = Path(results["log_dir"])

	if log_dir.exists():
	st.markdown("### Generated Files")

	files = list(log_dir.glob("*"))
	if files:
	st.markdown(f"{len(files)} files generated in {log_dir}")

	for file in files:
	col1, col2 = st.columns([3, 1])
	with col1:
	st.markdown(
	f"- `{file.name}` ({file.stat().st_size / 1024:.1f} KB)"
	)
	with col2:
	if file.suffix in [".csv", ".txt"]:
	with open(file, "rb") as f:
	st.download_button(
	label="Download",
	data=f.read(),
	file_name=file.name,
	mime="text/csv"
	if file.suffix == ".csv"
	else "text/plain",
	key=f"download_{file.name}",
	)

	# Try to load and display metrics
	metrics_file = log_dir / "metrics.csv"
	if metrics_file.exists():
	st.markdown("---")
	st.markdown("### Metrics Over Time")

	try:
	metrics_df = pd.read_csv(metrics_file)

	if not metrics_df.empty:
	# Plot disposal rate over time
	if "disposal_rate" in metrics_df.columns:
	fig = px.line(
	metrics_df,
	x=metrics_df.index,
	y="disposal_rate",
	title="Disposal Rate Over Time",
	labels={
	"x": "Day",
	"disposal_rate": "Disposal Rate",
	},
	)
	st.plotly_chart(fig, use_container_width=True)

	# Plot utilization if available
	if "utilization" in metrics_df.columns:
	fig = px.line(
	metrics_df,
	x=metrics_df.index,
	y="utilization",
	title="Courtroom Utilization Over Time",
	labels={"x": "Day", "utilization": "Utilization"},
	)
	st.plotly_chart(fig, use_container_width=True)

	# Show summary statistics
	st.markdown("### Summary Statistics")
	st.dataframe(
	metrics_df.describe(), use_container_width=True
	)

	except Exception as e:
	st.warning(f"Could not load metrics: {e}")
	else:
	st.info("No output files found")
	else:
	st.warning(f"Output directory not found: {log_dir}")

	st.markdown("---")

	# Daily Cause Lists Section
	st.markdown("### Daily Cause Lists")
	cause_info = (results or {}).get("cause_lists")

	def _render_download(label: str, file_path: Path, mime: str = "text/csv"):
	try:
	with file_path.open("rb") as f:
	st.download_button(
	label=label,
	data=f.read(),
	file_name=file_path.name,
	mime=mime,
	key=f"dl_{file_path.name}",
	)
	except Exception as e:
	st.warning(f"Unable to read {file_path.name}: {e}")

	if cause_info:
	compiled_path = Path(cause_info.get("compiled", ""))
	summary_path = Path(cause_info.get("summary", ""))
	if compiled_path.exists():
	st.success(f"Compiled cause list ready: {compiled_path}")
	_render_download("Download compiled_cause_list.csv", compiled_path)
	try:
	df_preview = pd.read_csv(compiled_path, nrows=200)
	st.dataframe(df_preview.head(50), use_container_width=True)
	except Exception as e:
	st.warning(f"Preview unavailable: {e}")
	if summary_path.exists():
	_render_download("Download daily_summaries.csv", summary_path)
	else:
	# Offer on-demand generation if not already created
	events_csv = (
	(Path(results["log_dir"]) / "events.csv")
	if results and results.get("log_dir")
	else None
	)
	if events_csv and events_csv.exists():
	if st.button(
	"Generate Daily Cause Lists Now", use_container_width=False
	):
	try:
	# Save directly alongside events.csv (run directory root)
	out_dir = events_csv.parent
	generator = CauseListGenerator(events_csv)
	compiled_path = generator.generate_daily_lists(out_dir)
	summary_path = out_dir / "daily_summaries.csv"
	st.session_state.sim_results["cause_lists"] = {
	"compiled": str(compiled_path),
	"summary": str(summary_path),
	}
	st.success(f"Daily cause lists generated in {out_dir}")
	st.rerun()
	except Exception as e:
	st.error(f"Failed to generate cause lists: {e}")
	else:
	st.info(
	"events.csv not found; run a simulation first to enable cause list generation."
	)

	col1, col2 = st.columns(2)

	with col1:
	if st.button("Run New Simulation", use_container_width=True):
	# Reset workflow
	st.session_state.workflow_step = 1
	st.session_state.cases_ready = False
	st.session_state.sim_results = None
	st.rerun()

	with col2:
	if st.button("Modify Configuration", use_container_width=True):
	st.session_state.workflow_step = 2
	st.session_state.sim_results = None
	st.rerun()

	# Footer
	st.markdown("---")
	st.caption("Simulation Workflow - Configure and run scheduling simulations")