"""Analytics & Reports page - Compare simulation runs and analyze performance. Features: 1. Simulation Comparison - Compare multiple simulation runs side-by-side 2. Performance Trends - Analyze metrics over time 3. Fairness Analysis - Evaluate equity and distribution 4. Report Generation - Export comprehensive analysis """ from __future__ import annotations from datetime import datetime from pathlib import Path import numpy as np import pandas as pd import plotly.express as px import plotly.graph_objects as go import streamlit as st # Page configuration st.set_page_config( page_title="Analytics & Reports", page_icon="chart", layout="wide", ) st.title("Analytics & Reports") st.markdown("Compare simulation runs and analyze system performance") st.markdown("---") # Main tabs tab1, tab2, tab3, tab4 = st.tabs( [ "Simulation Comparison", "Performance Trends", "Fairness Analysis", "Report Generation", ] ) # TAB 1: Simulation Comparison with tab1: st.markdown("### Simulation Comparison") st.markdown( "Compare multiple simulation runs to evaluate different policies and parameters." ) # Check for available simulation runs (centralized base) from src.config.paths import get_runs_base runs_dir = get_runs_base() if not runs_dir.exists(): st.warning( "No simulation outputs found. Run simulations first to generate data." ) else: # Collect all run directories that actually contain a metrics.csv file. # Some runs may be nested (version folder inside timestamp). We treat every # directory that has metrics.csv as a runnable result. metric_files = list(runs_dir.rglob("metrics.csv")) run_paths = sorted({p.parent for p in metric_files}) # Build label -> path map; label is relative path inside simulation_runs run_map = {str(p.relative_to(runs_dir)): p for p in run_paths} if len(run_map) < 2: st.info( "At least 2 simulation runs needed for comparison. Run more simulations to enable comparison." ) else: st.markdown(f"**{len(run_map)} simulation run(s) available**") # Select runs to compare col1, col2 = st.columns(2) labels = sorted(run_map.keys()) with col1: run1_label = st.selectbox( "First simulation run", options=labels, key="compare_run1" ) with col2: run2_options = [lbl for lbl in labels if lbl != run1_label] run2_label = st.selectbox( "Second simulation run", options=run2_options, key="compare_run2", ) if st.button("Compare Runs", type="primary"): # Load metrics from both runs run1_metrics_path = run_map[run1_label] / "metrics.csv" run2_metrics_path = run_map[run2_label] / "metrics.csv" if not run1_metrics_path.exists() or not run2_metrics_path.exists(): st.error("Metrics files not found for one or both runs.") else: try: df1 = pd.read_csv(run1_metrics_path) df2 = pd.read_csv(run2_metrics_path) st.success("Loaded metrics successfully") # Show Key Insights from report.txt for both runs st.markdown("#### Key Insights (from report.txt)") col_ins_1, col_ins_2 = st.columns(2) report1_path = run_map[run1_label] / "report.txt" report2_path = run_map[run2_label] / "report.txt" with col_ins_1: st.markdown(f"**{run1_label}**") if report1_path.exists(): st.code( report1_path.read_text(encoding="utf-8"), language="text", ) else: st.info("No report.txt found for this run.") with col_ins_2: st.markdown(f"**{run2_label}**") if report2_path.exists(): st.code( report2_path.read_text(encoding="utf-8"), language="text", ) else: st.info("No report.txt found for this run.") # Summary comparison st.markdown("#### Summary Comparison") col1, col2, col3 = st.columns(3) with col1: st.markdown(f"**{run1_label}**") if "disposal_rate" in df1.columns: avg_disposal1 = df1["disposal_rate"].mean() st.metric("Avg. Disposal Rate", f"{avg_disposal1:.2%}") if "utilization" in df1.columns: avg_util1 = df1["utilization"].mean() st.metric("Avg. Utilization", f"{avg_util1:.2%}") with col2: st.markdown(f"**{run2_label}**") if "disposal_rate" in df2.columns: avg_disposal2 = df2["disposal_rate"].mean() st.metric("Avg. Disposal Rate", f"{avg_disposal2:.2%}") if "utilization" in df2.columns: avg_util2 = df2["utilization"].mean() st.metric("Avg. Utilization", f"{avg_util2:.2%}") with col3: st.markdown("**Difference**") if ( "disposal_rate" in df1.columns and "disposal_rate" in df2.columns ): diff_disposal = avg_disposal2 - avg_disposal1 st.metric("Disposal Rate Δ", f"{diff_disposal:+.2%}") if ( "utilization" in df1.columns and "utilization" in df2.columns ): diff_util = avg_util2 - avg_util1 st.metric("Utilization Δ", f"{diff_util:+.2%}") st.markdown("---") # Time series comparison st.markdown("#### Performance Over Time") if ( "disposal_rate" in df1.columns and "disposal_rate" in df2.columns ): fig = go.Figure() fig.add_trace( go.Scatter( x=df1.index, y=df1["disposal_rate"], mode="lines", name=run1_label, line=dict(color="blue"), ) ) fig.add_trace( go.Scatter( x=df2.index, y=df2["disposal_rate"], mode="lines", name=run2_label, line=dict(color="red"), ) ) fig.update_layout( title="Disposal Rate Comparison", xaxis_title="Day", yaxis_title="Disposal Rate", height=400, ) st.plotly_chart(fig, use_container_width=True) if ( "utilization" in df1.columns and "utilization" in df2.columns ): fig = go.Figure() fig.add_trace( go.Scatter( x=df1.index, y=df1["utilization"], mode="lines", name=run1_label, line=dict(color="blue"), ) ) fig.add_trace( go.Scatter( x=df2.index, y=df2["utilization"], mode="lines", name=run2_label, line=dict(color="red"), ) ) fig.update_layout( title="Utilization Comparison", xaxis_title="Day", yaxis_title="Utilization", height=400, ) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.error(f"Error comparing runs: {e}") # TAB 2: Performance Trends with tab2: st.markdown("### Performance Trends") st.markdown("Analyze performance metrics across all simulation runs.") # Use centralized runs directory recursively from src.config.paths import get_runs_base runs_dir = get_runs_base() if not runs_dir.exists(): st.warning("No simulation outputs found.") else: metric_files = list(runs_dir.rglob("metrics.csv")) run_paths = sorted({p.parent for p in metric_files}) if not run_paths: st.info("No simulation runs found.") else: # Aggregate metrics from all runs all_metrics = [] for run_dir in run_paths: metrics_path = run_dir / "metrics.csv" try: df = pd.read_csv(metrics_path) # Use relative label for clarity across nested structures try: df["run"] = str(run_dir.relative_to(runs_dir)) except ValueError: # Fallback to folder name if not under base (shouldn't happen) df["run"] = run_dir.name all_metrics.append(df) except Exception: pass # Skip invalid metrics files if not all_metrics: st.warning("No valid metrics files found.") else: combined_df = pd.concat(all_metrics, ignore_index=True) st.markdown(f"**Loaded metrics from {len(all_metrics)} run(s)**") # Aggregate statistics st.markdown("#### Aggregate Statistics") col1, col2, col3 = st.columns(3) with col1: if "disposal_rate" in combined_df.columns: overall_avg = combined_df["disposal_rate"].mean() st.metric("Overall Avg. Disposal Rate", f"{overall_avg:.2%}") with col2: if "utilization" in combined_df.columns: overall_util = combined_df["utilization"].mean() st.metric("Overall Avg. Utilization", f"{overall_util:.2%}") with col3: st.metric("Total Simulation Days", len(combined_df)) st.markdown("---") # Distribution plots st.markdown("#### Metric Distributions") if "disposal_rate" in combined_df.columns: fig = px.box( combined_df, x="run", y="disposal_rate", title="Disposal Rate Distribution by Run", labels={ "disposal_rate": "Disposal Rate", "run": "Simulation Run", }, ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) if "utilization" in combined_df.columns: fig = px.box( combined_df, x="run", y="utilization", title="Utilization Distribution by Run", labels={"utilization": "Utilization", "run": "Simulation Run"}, ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) # TAB 3: Fairness Analysis with tab3: st.markdown("### Fairness Analysis") st.markdown("Evaluate equity and distribution of case handling across the system.") st.markdown(""" Fairness metrics evaluate whether the scheduling system treats all cases equitably: - **Gini Coefficient**: Measures inequality in disposal times (0 = perfect equality, 1 = maximum inequality) - **Age Distribution**: Shows how long cases wait before disposal - **Case Type Balance**: Ensures no case type is systematically disadvantaged """) from src.config.paths import get_runs_base runs_dir = get_runs_base() if not runs_dir.exists(): st.warning("No simulation outputs found.") else: event_files = list(runs_dir.rglob("events.csv")) run_event_paths = sorted({p.parent for p in event_files}) if not run_event_paths: st.info("No simulation runs found.") else: # Select run for fairness analysis labels = [str(p.relative_to(runs_dir)) for p in run_event_paths] label_to_path = {str(p.relative_to(runs_dir)): p for p in run_event_paths} selected_run = st.selectbox( "Select simulation run for fairness analysis", options=labels, key="fairness_run", ) # Look for events file (contains case-level data) events_path = label_to_path[selected_run] / "events.csv" if not events_path.exists(): st.warning( "Events file not found. Fairness analysis requires detailed event logs." ) else: try: events_df = pd.read_csv(events_path) st.success("Loaded event data") # Case age analysis if "case_id" in events_df.columns and "date" in events_df.columns: st.markdown("#### Case Age Distribution") # Calculate case ages (simplified - would need filed_date for accurate calculation) case_dates = events_df.groupby("case_id")["date"].agg( ["min", "max"] ) case_dates["age_days"] = ( pd.to_datetime(case_dates["max"]) - pd.to_datetime(case_dates["min"]) ).dt.days fig = px.histogram( case_dates, x="age_days", nbins=30, title="Distribution of Case Ages", labels={ "age_days": "Age (days)", "count": "Number of Cases", }, ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) # Summary statistics col1, col2, col3 = st.columns(3) with col1: st.metric( "Median Age", f"{case_dates['age_days'].median():.0f} days", ) with col2: st.metric( "Mean Age", f"{case_dates['age_days'].mean():.0f} days" ) with col3: st.metric( "Max Age", f"{case_dates['age_days'].max():.0f} days" ) # Additional Fairness Metrics: Gini and Lorenz Curve st.markdown("#### Inequality Metrics (Fairness)") def _gini(values: np.ndarray) -> float: v = np.asarray(values, dtype=float) v = v[np.isfinite(v)] v = v[v >= 0] if v.size == 0: return float("nan") if np.all(v == 0): return 0.0 v_sorted = np.sort(v) n = v_sorted.size cumulative = np.cumsum(v_sorted) # Gini based on cumulative shares gini = (n + 1 - 2 * np.sum(cumulative) / cumulative[-1]) / n return float(gini) ages = case_dates["age_days"].to_numpy() gini_age = _gini(ages) col_a, col_b = st.columns(2) with col_a: if np.isfinite(gini_age): st.metric("Gini (Age Inequality)", f"{gini_age:.3f}") else: st.info("Gini (Age) not available") # Lorenz curve for ages with col_b: try: ages_clean = ages[np.isfinite(ages)] ages_clean = ages_clean[ages_clean >= 0] if ages_clean.size > 0: ages_sorted = np.sort(ages_clean) cum_ages = np.cumsum(ages_sorted) cum_ages = np.insert(cum_ages, 0, 0) cum_pop = np.linspace(0, 1, num=cum_ages.size) lorenz = cum_ages / cum_ages[-1] fig_lorenz = go.Figure() fig_lorenz.add_trace( go.Scatter( x=cum_pop, y=lorenz, mode="lines", name="Lorenz", ) ) fig_lorenz.add_trace( go.Scatter( x=[0, 1], y=[0, 1], mode="lines", name="Equality", line=dict(dash="dash"), ) ) fig_lorenz.update_layout( title="Lorenz Curve of Case Ages", xaxis_title="Cumulative share of cases", yaxis_title="Cumulative share of total age", height=350, ) st.plotly_chart( fig_lorenz, use_container_width=True ) else: st.info("Not enough data to plot Lorenz curve") except Exception: st.info( "Unable to compute Lorenz curve for current data" ) # Case type fairness if "case_type" in events_df.columns: st.markdown("---") st.markdown("#### Case Type Balance") case_type_counts = ( events_df["case_type"].value_counts().reset_index() ) case_type_counts.columns = ["case_type", "count"] fig = px.bar( case_type_counts.head(10), x="case_type", y="count", title="Top 10 Case Types by Hearing Count", labels={ "case_type": "Case Type", "count": "Number of Hearings", }, ) fig.update_layout(height=400, xaxis_tickangle=-45) st.plotly_chart(fig, use_container_width=True) # Age distribution by case type (top N by cases) st.markdown("#### Age Distribution by Case Type (Top 8)") try: # Map each case_id to a case_type (take the first occurrence) cid_to_type = ( events_df.sort_values("date") .groupby("case_id")["case_type"] .first() ) age_with_type = ( case_dates[["age_days"]] .join(cid_to_type, how="left") .dropna( subset=["case_type"] ) # keep only cases with type ) top_types = ( age_with_type["case_type"] .value_counts() .head(8) .index.tolist() ) filt = age_with_type["case_type"].isin(top_types) fig_box = px.box( age_with_type[filt], x="case_type", y="age_days", points="outliers", title="Case Age by Case Type (Top 8)", labels={ "case_type": "Case Type", "age_days": "Age (days)", }, ) fig_box.update_layout(height=420, xaxis_tickangle=-45) st.plotly_chart(fig_box, use_container_width=True) # Gini by case type (Top 8) st.markdown("#### Inequality by Case Type (Gini)") gini_rows = [] for ctype in top_types: vals = age_with_type.loc[ age_with_type["case_type"] == ctype, "age_days" ].to_numpy() g = _gini(vals) gini_rows.append({"case_type": ctype, "gini": g}) gini_df = pd.DataFrame(gini_rows).dropna() if not gini_df.empty: fig_gini = px.bar( gini_df, x="case_type", y="gini", title="Gini Coefficient by Case Type (Top 8)", labels={"case_type": "Case Type", "gini": "Gini"}, ) fig_gini.update_layout( height=380, xaxis_tickangle=-45, yaxis_range=[0, 1] ) st.plotly_chart(fig_gini, use_container_width=True) else: st.info("Insufficient data to compute per-type Gini") except Exception as _: st.info( "Unable to compute per-type age distributions for current data" ) except Exception as e: st.error(f"Error loading events data: {e}") # TAB 4: Report Generation with tab4: st.markdown("### Report Generation") st.markdown( "Generate comprehensive reports summarizing system performance and analysis." ) outputs_dir = Path("outputs") runs_dir = outputs_dir / "simulation_runs" if not runs_dir.exists(): st.warning("No simulation outputs found.") else: metric_files = list(runs_dir.rglob("metrics.csv")) run_paths = sorted({p.parent for p in metric_files}) if not run_paths: st.info("No simulation runs found.") else: st.markdown("#### Select Data for Report") # Multi-select runs labels = [str(p.relative_to(runs_dir)) for p in run_paths] label_to_path = {str(p.relative_to(runs_dir)): p for p in run_paths} selected_runs = st.multiselect( "Include simulation runs", options=labels, default=[labels[0]] if labels else [], key="report_runs", ) # Report options include_metrics = st.checkbox("Include performance metrics", value=True) include_fairness = st.checkbox("Include fairness analysis", value=True) include_comparison = st.checkbox( "Include run comparisons", value=len(selected_runs) > 1 ) if st.button("Generate Report", type="primary", use_container_width=True): if not selected_runs: st.error("Select at least one simulation run") else: with st.spinner("Generating report..."): # Create report content report_sections = [] # Header report_sections.append( "# Court Scheduling System - Performance Report" ) report_sections.append( f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) report_sections.append( f"Runs included: {', '.join(selected_runs)}" ) report_sections.append("") # Performance metrics if include_metrics: report_sections.append("## Performance Metrics") for run_name in selected_runs: metrics_path = label_to_path[run_name] / "metrics.csv" if metrics_path.exists(): df = pd.read_csv(metrics_path) report_sections.append(f"### {run_name}") if "disposal_rate" in df.columns: avg_disposal = df["disposal_rate"].mean() report_sections.append( f"- Average Disposal Rate: {avg_disposal:.2%}" ) if "utilization" in df.columns: avg_util = df["utilization"].mean() report_sections.append( f"- Average Utilization: {avg_util:.2%}" ) report_sections.append( f"- Simulation Days: {len(df)}" ) report_sections.append("") # Comparison if include_comparison and len(selected_runs) > 1: report_sections.append("## Comparison Analysis") report_sections.append( f"Comparing: {selected_runs[0]} vs {selected_runs[1]}" ) report_sections.append("") # Fairness if include_fairness: report_sections.append("## Fairness Analysis") report_sections.append( "Fairness metrics evaluate equitable treatment of all cases." ) report_sections.append("") # Footer report_sections.append("---") report_sections.append( "Report generated by Court Scheduling System Analytics" ) report_content = "\n".join(report_sections) # Display report st.markdown("#### Report Preview") st.markdown(report_content) # Download button timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") st.download_button( label="Download Report (Markdown)", data=report_content, file_name=f"scheduling_report_{timestamp}.md", mime="text/markdown", ) # Footer st.markdown("---") st.caption("Analytics & Reports - Performance analysis and comparative evaluation")