Spaces:
Sleeping
Sleeping
feat: Add interactive multi-page dashboard with EDA, Ripeness, and RL visualization
Browse filesImplemented comprehensive Streamlit dashboard with three main pages:
- Page 1 (EDA Analysis): Interactive visualizations with filters, adjournment heatmaps, data export
- Page 2 (Ripeness Classifier): Full explainability with threshold tuning and batch classification
- Page 3 (RL Training): Training configuration, progress visualization, model comparison
Key features:
- Cached data loading for performance
- CLI integration via dashboard command
- Interactive controls and real-time updates
- Component reuse from existing modules
- Comprehensive documentation in docs/DASHBOARD.md
- cli/main.py +45 -0
- docs/DASHBOARD.md +404 -0
- scheduler/dashboard/__init__.py +3 -0
- scheduler/dashboard/app.py +110 -0
- scheduler/dashboard/pages/1_EDA_Analysis.py +273 -0
- scheduler/dashboard/pages/2_Ripeness_Classifier.py +343 -0
- scheduler/dashboard/pages/3_RL_Training.py +335 -0
- scheduler/dashboard/utils/__init__.py +19 -0
- scheduler/dashboard/utils/data_loader.py +149 -0
cli/main.py
CHANGED
|
@@ -370,6 +370,51 @@ def workflow(
|
|
| 370 |
raise typer.Exit(code=1)
|
| 371 |
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
@app.command()
|
| 374 |
def version() -> None:
|
| 375 |
"""Show version information."""
|
|
|
|
| 370 |
raise typer.Exit(code=1)
|
| 371 |
|
| 372 |
|
| 373 |
+
@app.command()
|
| 374 |
+
def dashboard(
|
| 375 |
+
port: int = typer.Option(8501, "--port", "-p", help="Port to run dashboard on"),
|
| 376 |
+
host: str = typer.Option("localhost", "--host", help="Host address to bind to"),
|
| 377 |
+
) -> None:
|
| 378 |
+
"""Launch interactive dashboard."""
|
| 379 |
+
console.print("[bold blue]Launching Interactive Dashboard[/bold blue]")
|
| 380 |
+
console.print(f"Dashboard will be available at: http://{host}:{port}")
|
| 381 |
+
console.print("Press Ctrl+C to stop the dashboard\n")
|
| 382 |
+
|
| 383 |
+
try:
|
| 384 |
+
import subprocess
|
| 385 |
+
import sys
|
| 386 |
+
|
| 387 |
+
# Get path to dashboard app
|
| 388 |
+
app_path = Path(__file__).parent.parent / "scheduler" / "dashboard" / "app.py"
|
| 389 |
+
|
| 390 |
+
if not app_path.exists():
|
| 391 |
+
console.print(f"[bold red]Error:[/bold red] Dashboard app not found at {app_path}")
|
| 392 |
+
raise typer.Exit(code=1)
|
| 393 |
+
|
| 394 |
+
# Run streamlit
|
| 395 |
+
cmd = [
|
| 396 |
+
sys.executable,
|
| 397 |
+
"-m",
|
| 398 |
+
"streamlit",
|
| 399 |
+
"run",
|
| 400 |
+
str(app_path),
|
| 401 |
+
"--server.port",
|
| 402 |
+
str(port),
|
| 403 |
+
"--server.address",
|
| 404 |
+
host,
|
| 405 |
+
"--browser.gatherUsageStats",
|
| 406 |
+
"false",
|
| 407 |
+
]
|
| 408 |
+
|
| 409 |
+
subprocess.run(cmd)
|
| 410 |
+
|
| 411 |
+
except KeyboardInterrupt:
|
| 412 |
+
console.print("\n[yellow]Dashboard stopped[/yellow]")
|
| 413 |
+
except Exception as e:
|
| 414 |
+
console.print(f"[bold red]Error:[/bold red] {e}")
|
| 415 |
+
raise typer.Exit(code=1)
|
| 416 |
+
|
| 417 |
+
|
| 418 |
@app.command()
|
| 419 |
def version() -> None:
|
| 420 |
"""Show version information."""
|
docs/DASHBOARD.md
ADDED
|
@@ -0,0 +1,404 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Interactive Dashboard - Living Documentation
|
| 2 |
+
|
| 3 |
+
**Last Updated**: 2025-11-27
|
| 4 |
+
**Status**: Initial Implementation Complete
|
| 5 |
+
**Version**: 0.1.0
|
| 6 |
+
|
| 7 |
+
## Overview
|
| 8 |
+
|
| 9 |
+
This document tracks the design decisions, architecture, usage patterns, and evolution of the Interactive Multi-Page Dashboard for the Court Scheduling System.
|
| 10 |
+
|
| 11 |
+
## Purpose and Goals
|
| 12 |
+
|
| 13 |
+
The dashboard provides three key functionalities:
|
| 14 |
+
1. **EDA Analysis** - Visualize and explore court case data patterns
|
| 15 |
+
2. **Ripeness Classifier** - Interactive explainability and threshold tuning
|
| 16 |
+
3. **RL Training** - Train and visualize reinforcement learning agents
|
| 17 |
+
|
| 18 |
+
### Design Philosophy
|
| 19 |
+
- Transparency: Every algorithm decision should be explainable
|
| 20 |
+
- Interactivity: Users can adjust parameters and see immediate impact
|
| 21 |
+
- Efficiency: Data caching to minimize load times
|
| 22 |
+
- Integration: Seamless integration with existing CLI and modules
|
| 23 |
+
|
| 24 |
+
## Architecture
|
| 25 |
+
|
| 26 |
+
### Technology Stack
|
| 27 |
+
|
| 28 |
+
**Framework**: Streamlit 1.28+
|
| 29 |
+
- Chosen for rapid prototyping and native multi-page support
|
| 30 |
+
- Built-in state management via `st.session_state`
|
| 31 |
+
- Excellent integration with Plotly and Pandas/Polars
|
| 32 |
+
|
| 33 |
+
**Visualization**: Plotly
|
| 34 |
+
- Interactive charts (zoom, pan, hover)
|
| 35 |
+
- Better aesthetics than Matplotlib for dashboards
|
| 36 |
+
- Native Streamlit support
|
| 37 |
+
|
| 38 |
+
**Data Processing**:
|
| 39 |
+
- Polars for fast CSV loading
|
| 40 |
+
- Pandas for compatibility with existing code
|
| 41 |
+
- Caching with `@st.cache_data` decorator
|
| 42 |
+
|
| 43 |
+
### Directory Structure
|
| 44 |
+
|
| 45 |
+
```
|
| 46 |
+
scheduler/
|
| 47 |
+
dashboard/
|
| 48 |
+
__init__.py # Package initialization
|
| 49 |
+
app.py # Main entry point (home page)
|
| 50 |
+
utils/
|
| 51 |
+
__init__.py
|
| 52 |
+
data_loader.py # Cached data loading functions
|
| 53 |
+
pages/
|
| 54 |
+
1_EDA_Analysis.py # EDA visualizations
|
| 55 |
+
2_Ripeness_Classifier.py # Ripeness explainability
|
| 56 |
+
3_RL_Training.py # RL training interface
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### Module Reuse Strategy
|
| 60 |
+
|
| 61 |
+
The dashboard reuses existing components without duplication:
|
| 62 |
+
- `scheduler.data.param_loader.ParameterLoader` - Load EDA-derived parameters
|
| 63 |
+
- `scheduler.data.case_generator.CaseGenerator` - Load generated cases
|
| 64 |
+
- `scheduler.core.ripeness.RipenessClassifier` - Classification logic
|
| 65 |
+
- `scheduler.core.case.Case` - Case data structure
|
| 66 |
+
- `rl.training.train_agent()` - RL training (future integration)
|
| 67 |
+
|
| 68 |
+
## Page Implementations
|
| 69 |
+
|
| 70 |
+
### Page 1: EDA Analysis
|
| 71 |
+
|
| 72 |
+
**Features**:
|
| 73 |
+
- Key metrics dashboard (total cases, adjournment rates, stages)
|
| 74 |
+
- Interactive filters (case type, stage)
|
| 75 |
+
- Multiple visualizations:
|
| 76 |
+
- Case distribution by type (bar chart + pie chart)
|
| 77 |
+
- Stage analysis (bar chart + pie chart)
|
| 78 |
+
- Adjournment patterns (bar charts by type and stage)
|
| 79 |
+
- Adjournment probability heatmap (stage × case type)
|
| 80 |
+
- Raw data viewer with download capability
|
| 81 |
+
|
| 82 |
+
**Data Sources**:
|
| 83 |
+
- `Data/processed/cleaned_cases.csv` - Cleaned case data from EDA pipeline
|
| 84 |
+
- `configs/parameters/` - Pre-computed parameters from ParameterLoader
|
| 85 |
+
|
| 86 |
+
**Design Decisions**:
|
| 87 |
+
- Use tabs instead of separate sections for better organization
|
| 88 |
+
- Show top 10/15 items in charts to avoid clutter
|
| 89 |
+
- Provide download button for filtered data
|
| 90 |
+
- Cache data with 1-hour TTL to balance freshness and performance
|
| 91 |
+
|
| 92 |
+
### Page 2: Ripeness Classifier
|
| 93 |
+
|
| 94 |
+
**Features**:
|
| 95 |
+
- **Tab 1: Configuration**
|
| 96 |
+
- Display current thresholds
|
| 97 |
+
- Stage-specific rules table
|
| 98 |
+
- Decision tree logic explanation
|
| 99 |
+
- **Tab 2: Interactive Testing**
|
| 100 |
+
- Synthetic case creation
|
| 101 |
+
- Real-time classification with explanations
|
| 102 |
+
- Feature importance visualization
|
| 103 |
+
- Criteria pass/fail breakdown
|
| 104 |
+
- **Tab 3: Batch Classification**
|
| 105 |
+
- Load generated test cases
|
| 106 |
+
- Classify all with current thresholds
|
| 107 |
+
- Show distribution (RIPE/UNRIPE/UNKNOWN)
|
| 108 |
+
|
| 109 |
+
**State Management**:
|
| 110 |
+
- Thresholds stored in `st.session_state`
|
| 111 |
+
- Sidebar sliders for real-time adjustment
|
| 112 |
+
- Reset button to restore defaults
|
| 113 |
+
- Session-based (not persisted to disk)
|
| 114 |
+
|
| 115 |
+
**Explainability Approach**:
|
| 116 |
+
- Clear criteria breakdown (service hearings, case age, stage days, keywords)
|
| 117 |
+
- Visual indicators (✓/✗) for pass/fail
|
| 118 |
+
- Feature importance bar chart
|
| 119 |
+
- Before/after comparison capability
|
| 120 |
+
|
| 121 |
+
**Design Decisions**:
|
| 122 |
+
- Simplified classification logic for demo (uses basic criteria)
|
| 123 |
+
- Future: Integrate actual RipenessClassifier.classify_case()
|
| 124 |
+
- Stage-specific rules hardcoded for now (future: load from config)
|
| 125 |
+
- Color coding: green (RIPE), orange (UNKNOWN), red (UNRIPE)
|
| 126 |
+
|
| 127 |
+
### Page 3: RL Training
|
| 128 |
+
|
| 129 |
+
**Features**:
|
| 130 |
+
- **Tab 1: Train Agent**
|
| 131 |
+
- Configuration form (episodes, learning rate, epsilon, etc.)
|
| 132 |
+
- Training progress visualization (demo mode)
|
| 133 |
+
- Multiple live charts (disposal rate, rewards, states, epsilon decay)
|
| 134 |
+
- Command generation for CLI training
|
| 135 |
+
- **Tab 2: Training History**
|
| 136 |
+
- Load and display previous training runs
|
| 137 |
+
- Plot historical performance
|
| 138 |
+
- **Tab 3: Model Comparison**
|
| 139 |
+
- Load saved models from models/ directory
|
| 140 |
+
- Compare Q-table sizes and hyperparameters
|
| 141 |
+
- Visualization of model differences
|
| 142 |
+
|
| 143 |
+
**Demo Mode**:
|
| 144 |
+
- Current implementation simulates training results
|
| 145 |
+
- Generates synthetic stats for visualization
|
| 146 |
+
- Shows CLI command for actual training
|
| 147 |
+
- Future: Integrate real-time training with rl.training.train_agent()
|
| 148 |
+
|
| 149 |
+
**Design Decisions**:
|
| 150 |
+
- Demo mode chosen for initial release (no blocking UI during training)
|
| 151 |
+
- Future: Add async training with progress updates
|
| 152 |
+
- Hyperparameter guide in expander for educational value
|
| 153 |
+
- Model persistence via pickle (existing pattern)
|
| 154 |
+
|
| 155 |
+
## CLI Integration
|
| 156 |
+
|
| 157 |
+
### Command
|
| 158 |
+
```bash
|
| 159 |
+
uv run court-scheduler dashboard [--port PORT] [--host HOST]
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
**Default**: `http://localhost:8501`
|
| 163 |
+
|
| 164 |
+
**Implementation**:
|
| 165 |
+
- Added to `cli/main.py` as `@app.command()`
|
| 166 |
+
- Uses subprocess to launch Streamlit
|
| 167 |
+
- Validates dashboard app.py exists before launching
|
| 168 |
+
- Handles KeyboardInterrupt gracefully
|
| 169 |
+
|
| 170 |
+
**Usage Example**:
|
| 171 |
+
```bash
|
| 172 |
+
# Launch on default port
|
| 173 |
+
uv run court-scheduler dashboard
|
| 174 |
+
|
| 175 |
+
# Custom port
|
| 176 |
+
uv run court-scheduler dashboard --port 8080
|
| 177 |
+
|
| 178 |
+
# Bind to all interfaces
|
| 179 |
+
uv run court-scheduler dashboard --host 0.0.0.0 --port 8080
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
## Data Flow
|
| 183 |
+
|
| 184 |
+
### Loading Sequence
|
| 185 |
+
1. User launches dashboard via CLI
|
| 186 |
+
2. `app.py` loads, displays home page and system status
|
| 187 |
+
3. User navigates to a page (e.g., EDA Analysis)
|
| 188 |
+
4. Page imports data_loader utilities
|
| 189 |
+
5. `@st.cache_data` checks cache for data
|
| 190 |
+
6. If not cached, load from disk and cache
|
| 191 |
+
7. Data processed and visualized
|
| 192 |
+
8. User interactions trigger re-renders (cached data reused)
|
| 193 |
+
|
| 194 |
+
### Caching Strategy
|
| 195 |
+
- **TTL**: 3600 seconds (1 hour) for data files
|
| 196 |
+
- **No TTL**: For computed statistics (invalidates on data change)
|
| 197 |
+
- **Session State**: For UI state (thresholds, training configs)
|
| 198 |
+
|
| 199 |
+
### Performance Considerations
|
| 200 |
+
- Polars for fast CSV loading
|
| 201 |
+
- Limit DataFrame display to first 100 rows
|
| 202 |
+
- Top N filtering for visualizations (top 10/15)
|
| 203 |
+
- Lazy loading (pages only load data when accessed)
|
| 204 |
+
|
| 205 |
+
## Usage Patterns
|
| 206 |
+
|
| 207 |
+
### Typical Workflow 1: EDA Exploration
|
| 208 |
+
1. Run EDA pipeline: `uv run court-scheduler eda`
|
| 209 |
+
2. Launch dashboard: `uv run court-scheduler dashboard`
|
| 210 |
+
3. Navigate to EDA Analysis page
|
| 211 |
+
4. Apply filters (case type, stage)
|
| 212 |
+
5. Explore visualizations
|
| 213 |
+
6. Download filtered data if needed
|
| 214 |
+
|
| 215 |
+
### Typical Workflow 2: Threshold Tuning
|
| 216 |
+
1. Generate test cases: `uv run court-scheduler generate`
|
| 217 |
+
2. Launch dashboard: `uv run court-scheduler dashboard`
|
| 218 |
+
3. Navigate to Ripeness Classifier page
|
| 219 |
+
4. Adjust thresholds in sidebar
|
| 220 |
+
5. Test with synthetic case (Tab 2)
|
| 221 |
+
6. Run batch classification (Tab 3)
|
| 222 |
+
7. Analyze impact on RIPE/UNRIPE distribution
|
| 223 |
+
|
| 224 |
+
### Typical Workflow 3: RL Training
|
| 225 |
+
1. Launch dashboard: `uv run court-scheduler dashboard`
|
| 226 |
+
2. Navigate to RL Training page
|
| 227 |
+
3. Configure hyperparameters (Tab 1)
|
| 228 |
+
4. Copy CLI command and run separately (or use demo)
|
| 229 |
+
5. Return to dashboard, view history (Tab 2)
|
| 230 |
+
6. Compare models (Tab 3)
|
| 231 |
+
|
| 232 |
+
## Future Enhancements
|
| 233 |
+
|
| 234 |
+
### Planned Features
|
| 235 |
+
- [ ] Real-time RL training integration (non-blocking)
|
| 236 |
+
- [ ] RipenessCalibrator integration (auto-suggest thresholds)
|
| 237 |
+
- [ ] RipenessMetrics tracking (false positive/negative rates)
|
| 238 |
+
- [ ] Actual RipenessClassifier integration (not simplified logic)
|
| 239 |
+
- [ ] EDA plot regeneration option
|
| 240 |
+
- [ ] Export threshold configurations
|
| 241 |
+
- [ ] Simulation runner from dashboard
|
| 242 |
+
- [ ] Authentication (if deployed externally)
|
| 243 |
+
|
| 244 |
+
### Technical Improvements
|
| 245 |
+
- [ ] Async data loading for large datasets
|
| 246 |
+
- [ ] WebSocket support for real-time training updates
|
| 247 |
+
- [ ] Plotly Dash migration (if more customization needed)
|
| 248 |
+
- [ ] Unit tests for dashboard components
|
| 249 |
+
- [ ] Playwright automated UI tests
|
| 250 |
+
|
| 251 |
+
### UX Improvements
|
| 252 |
+
- [ ] Dark mode support
|
| 253 |
+
- [ ] Custom color themes
|
| 254 |
+
- [ ] Keyboard shortcuts
|
| 255 |
+
- [ ] Save/load dashboard state
|
| 256 |
+
- [ ] Export visualizations as PNG/PDF
|
| 257 |
+
- [ ] Guided tour for new users
|
| 258 |
+
|
| 259 |
+
## Testing Strategy
|
| 260 |
+
|
| 261 |
+
### Manual Testing Checklist
|
| 262 |
+
- [ ] Dashboard launches without errors
|
| 263 |
+
- [ ] All pages load correctly
|
| 264 |
+
- [ ] EDA page: filters work, visualizations render
|
| 265 |
+
- [ ] Ripeness page: sliders adjust thresholds, classification updates
|
| 266 |
+
- [ ] RL page: form submission works, charts render
|
| 267 |
+
- [ ] CLI command generation correct
|
| 268 |
+
- [ ] System status checks work
|
| 269 |
+
|
| 270 |
+
### Integration Testing
|
| 271 |
+
- [ ] Load actual cleaned data
|
| 272 |
+
- [ ] Load generated test cases
|
| 273 |
+
- [ ] Load parameters from configs/
|
| 274 |
+
- [ ] Verify caching behavior
|
| 275 |
+
- [ ] Test with missing data files
|
| 276 |
+
|
| 277 |
+
### Performance Testing
|
| 278 |
+
- [ ] Large dataset loading (100K+ rows)
|
| 279 |
+
- [ ] Batch classification (10K+ cases)
|
| 280 |
+
- [ ] Multiple concurrent users (if deployed)
|
| 281 |
+
|
| 282 |
+
## Troubleshooting
|
| 283 |
+
|
| 284 |
+
### Common Issues
|
| 285 |
+
|
| 286 |
+
**Issue**: Dashboard won't launch
|
| 287 |
+
- **Check**: Is Streamlit installed? `pip list | grep streamlit`
|
| 288 |
+
- **Solution**: Ensure venv is activated, run `uv sync`
|
| 289 |
+
|
| 290 |
+
**Issue**: "Data file not found" warnings
|
| 291 |
+
- **Check**: Has EDA pipeline been run?
|
| 292 |
+
- **Solution**: Run `uv run court-scheduler eda`
|
| 293 |
+
|
| 294 |
+
**Issue**: Empty visualizations
|
| 295 |
+
- **Check**: Is `Data/processed/cleaned_cases.csv` empty?
|
| 296 |
+
- **Solution**: Verify EDA pipeline completed successfully
|
| 297 |
+
|
| 298 |
+
**Issue**: Ripeness batch classification fails
|
| 299 |
+
- **Check**: Are test cases generated?
|
| 300 |
+
- **Solution**: Run `uv run court-scheduler generate`
|
| 301 |
+
|
| 302 |
+
**Issue**: Slow page loads
|
| 303 |
+
- **Check**: Is data being cached?
|
| 304 |
+
- **Solution**: Check Streamlit cache, reduce data size
|
| 305 |
+
|
| 306 |
+
## Design Decisions Log
|
| 307 |
+
|
| 308 |
+
### Decision 1: Streamlit over Dash/Gradio
|
| 309 |
+
**Date**: 2025-11-27
|
| 310 |
+
**Rationale**:
|
| 311 |
+
- Already in dependencies (no new install)
|
| 312 |
+
- Simpler multi-page support
|
| 313 |
+
- Better for data science workflows
|
| 314 |
+
- Faster development time
|
| 315 |
+
|
| 316 |
+
**Alternatives Considered**:
|
| 317 |
+
- Dash: More customizable but more boilerplate
|
| 318 |
+
- Gradio: Better for ML demos, less flexible
|
| 319 |
+
|
| 320 |
+
### Decision 2: Plotly over Matplotlib
|
| 321 |
+
**Date**: 2025-11-27
|
| 322 |
+
**Rationale**:
|
| 323 |
+
- Interactive by default (zoom, pan, hover)
|
| 324 |
+
- Better aesthetics for dashboards
|
| 325 |
+
- Native Streamlit integration
|
| 326 |
+
- Users expect interactivity in modern dashboards
|
| 327 |
+
|
| 328 |
+
**Note**: Matplotlib still used for static EDA plots already generated
|
| 329 |
+
|
| 330 |
+
### Decision 3: Session State for Thresholds
|
| 331 |
+
**Date**: 2025-11-27
|
| 332 |
+
**Rationale**:
|
| 333 |
+
- Ephemeral experimentation (users can reset easily)
|
| 334 |
+
- No need to persist to disk
|
| 335 |
+
- Simpler implementation
|
| 336 |
+
- Users can export configs separately if needed
|
| 337 |
+
|
| 338 |
+
**Future**: May add "save configuration" feature
|
| 339 |
+
|
| 340 |
+
### Decision 4: Demo Mode for RL Training
|
| 341 |
+
**Date**: 2025-11-27
|
| 342 |
+
**Rationale**:
|
| 343 |
+
- Avoid blocking UI during long training runs
|
| 344 |
+
- Show visualization capabilities
|
| 345 |
+
- Guide users to use CLI for actual training
|
| 346 |
+
- Simpler initial implementation
|
| 347 |
+
|
| 348 |
+
**Future**: Add async training with WebSocket updates
|
| 349 |
+
|
| 350 |
+
### Decision 5: Simplified Ripeness Logic
|
| 351 |
+
**Date**: 2025-11-27
|
| 352 |
+
**Rationale**:
|
| 353 |
+
- Demonstrate explainability concept
|
| 354 |
+
- Avoid tight coupling with RipenessClassifier implementation
|
| 355 |
+
- Easier to understand for users
|
| 356 |
+
- Placeholder for full integration
|
| 357 |
+
|
| 358 |
+
**Future**: Integrate actual RipenessClassifier.classify_case()
|
| 359 |
+
|
| 360 |
+
## Maintenance Notes
|
| 361 |
+
|
| 362 |
+
### Dependencies
|
| 363 |
+
- Streamlit: Keep updated for security fixes
|
| 364 |
+
- Plotly: Monitor for breaking changes
|
| 365 |
+
- Polars: Ensure compatibility with Pandas conversion
|
| 366 |
+
|
| 367 |
+
### Code Quality
|
| 368 |
+
- Follow project ruff/black style
|
| 369 |
+
- Add docstrings to new functions
|
| 370 |
+
- Keep pages under 350 lines if possible
|
| 371 |
+
- Extract reusable components to utils/
|
| 372 |
+
|
| 373 |
+
### Performance Monitoring
|
| 374 |
+
- Monitor cache hit rates
|
| 375 |
+
- Track page load times
|
| 376 |
+
- Watch for memory leaks with large datasets
|
| 377 |
+
|
| 378 |
+
## Educational Value
|
| 379 |
+
|
| 380 |
+
The dashboard serves an educational purpose:
|
| 381 |
+
- **Transparency**: Shows how algorithms work (ripeness classifier)
|
| 382 |
+
- **Interactivity**: Lets users experiment (threshold tuning)
|
| 383 |
+
- **Visualization**: Makes complex data accessible (EDA plots)
|
| 384 |
+
- **Learning**: Explains RL concepts (hyperparameter guide)
|
| 385 |
+
|
| 386 |
+
This aligns with the "explainability" goal of the Code4Change project.
|
| 387 |
+
|
| 388 |
+
## Conclusion
|
| 389 |
+
|
| 390 |
+
The dashboard successfully provides:
|
| 391 |
+
1. Comprehensive EDA visualization
|
| 392 |
+
2. Full ripeness classifier explainability
|
| 393 |
+
3. RL training interface (demo mode)
|
| 394 |
+
4. CLI integration
|
| 395 |
+
5. Cached data loading
|
| 396 |
+
6. Interactive threshold tuning
|
| 397 |
+
|
| 398 |
+
Next steps focus on integrating real RL training and enhancing the ripeness classifier with actual implementation.
|
| 399 |
+
|
| 400 |
+
---
|
| 401 |
+
|
| 402 |
+
**Contributors**: Roy Aalekh (Initial Implementation)
|
| 403 |
+
**Project**: Code4Change Court Scheduling System
|
| 404 |
+
**Target**: Karnataka High Court Scheduling Optimization
|
scheduler/dashboard/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Interactive dashboard for Court Scheduling System."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
scheduler/dashboard/app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main dashboard application for Court Scheduling System.
|
| 2 |
+
|
| 3 |
+
This is the entry point for the Streamlit multi-page dashboard.
|
| 4 |
+
Launch with: uv run court-scheduler dashboard
|
| 5 |
+
Or directly: streamlit run scheduler/dashboard/app.py
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import streamlit as st
|
| 11 |
+
|
| 12 |
+
from scheduler.dashboard.utils import get_data_status
|
| 13 |
+
|
| 14 |
+
# Page configuration
|
| 15 |
+
st.set_page_config(
|
| 16 |
+
page_title="Court Scheduling System Dashboard",
|
| 17 |
+
page_icon="⚖️",
|
| 18 |
+
layout="wide",
|
| 19 |
+
initial_sidebar_state="expanded",
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Main page content
|
| 23 |
+
st.title("⚖️ Court Scheduling System Dashboard")
|
| 24 |
+
st.markdown("**Karnataka High Court - Fair & Transparent Scheduling**")
|
| 25 |
+
|
| 26 |
+
st.markdown("---")
|
| 27 |
+
|
| 28 |
+
# Introduction
|
| 29 |
+
st.markdown("""
|
| 30 |
+
### Welcome to the Interactive Dashboard
|
| 31 |
+
|
| 32 |
+
This dashboard provides comprehensive insights and controls for the Court Scheduling System:
|
| 33 |
+
|
| 34 |
+
- **EDA Analysis**: Explore case data, stage transitions, and adjournment patterns
|
| 35 |
+
- **Ripeness Classifier**: Understand and tune the case readiness algorithm with full explainability
|
| 36 |
+
- **RL Training**: Train and visualize reinforcement learning agents for optimal scheduling
|
| 37 |
+
|
| 38 |
+
Navigate using the sidebar to access different sections.
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
# System status
|
| 42 |
+
st.markdown("### System Status")
|
| 43 |
+
|
| 44 |
+
data_status = get_data_status()
|
| 45 |
+
|
| 46 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 47 |
+
|
| 48 |
+
with col1:
|
| 49 |
+
status = "✓" if data_status["cleaned_data"] else "✗"
|
| 50 |
+
color = "green" if data_status["cleaned_data"] else "red"
|
| 51 |
+
st.markdown(f":{color}[{status}] **Cleaned Data**")
|
| 52 |
+
|
| 53 |
+
with col2:
|
| 54 |
+
status = "✓" if data_status["parameters"] else "✗"
|
| 55 |
+
color = "green" if data_status["parameters"] else "red"
|
| 56 |
+
st.markdown(f":{color}[{status}] **Parameters**")
|
| 57 |
+
|
| 58 |
+
with col3:
|
| 59 |
+
status = "✓" if data_status["generated_cases"] else "✗"
|
| 60 |
+
color = "green" if data_status["generated_cases"] else "red"
|
| 61 |
+
st.markdown(f":{color}[{status}] **Test Cases**")
|
| 62 |
+
|
| 63 |
+
with col4:
|
| 64 |
+
status = "✓" if data_status["eda_figures"] else "✗"
|
| 65 |
+
color = "green" if data_status["eda_figures"] else "red"
|
| 66 |
+
st.markdown(f":{color}[{status}] **EDA Figures**")
|
| 67 |
+
|
| 68 |
+
st.markdown("---")
|
| 69 |
+
|
| 70 |
+
# Quick start guide
|
| 71 |
+
st.markdown("### Quick Start")
|
| 72 |
+
|
| 73 |
+
with st.expander("How to use this dashboard"):
|
| 74 |
+
st.markdown("""
|
| 75 |
+
**1. EDA Analysis**
|
| 76 |
+
- View statistical insights from court case data
|
| 77 |
+
- Explore case distributions, stage transitions, and patterns
|
| 78 |
+
- Filter by case type, stage, and date range
|
| 79 |
+
|
| 80 |
+
**2. Ripeness Classifier**
|
| 81 |
+
- Understand how cases are classified as RIPE/UNRIPE/UNKNOWN
|
| 82 |
+
- Adjust thresholds interactively and see real-time impact
|
| 83 |
+
- View case-level explainability with detailed reasoning
|
| 84 |
+
- Run calibration analysis to optimize thresholds
|
| 85 |
+
|
| 86 |
+
**3. RL Training**
|
| 87 |
+
- Configure and train reinforcement learning agents
|
| 88 |
+
- Monitor training progress in real-time
|
| 89 |
+
- Compare different models and hyperparameters
|
| 90 |
+
- Visualize Q-table and action distributions
|
| 91 |
+
""")
|
| 92 |
+
|
| 93 |
+
with st.expander("Prerequisites"):
|
| 94 |
+
st.markdown("""
|
| 95 |
+
Before using the dashboard, ensure you have:
|
| 96 |
+
|
| 97 |
+
1. **Run EDA pipeline**: `uv run court-scheduler eda`
|
| 98 |
+
2. **Generate test cases** (optional): `uv run court-scheduler generate`
|
| 99 |
+
3. **Parameters extracted**: Check that `configs/parameters/` exists
|
| 100 |
+
|
| 101 |
+
If any system status shows ✗ above, run the corresponding command first.
|
| 102 |
+
""")
|
| 103 |
+
|
| 104 |
+
# Footer
|
| 105 |
+
st.markdown("---")
|
| 106 |
+
st.markdown("""
|
| 107 |
+
<div style='text-align: center'>
|
| 108 |
+
<small>Court Scheduling System | Code4Change Hackathon | Karnataka High Court</small>
|
| 109 |
+
</div>
|
| 110 |
+
""", unsafe_allow_html=True)
|
scheduler/dashboard/pages/1_EDA_Analysis.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""EDA Analysis page - Explore court case data insights.
|
| 2 |
+
|
| 3 |
+
This page displays exploratory data analysis visualizations and statistics
|
| 4 |
+
from the court case dataset.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import plotly.express as px
|
| 13 |
+
import plotly.graph_objects as go
|
| 14 |
+
import streamlit as st
|
| 15 |
+
|
| 16 |
+
from scheduler.dashboard.utils import (
|
| 17 |
+
get_case_statistics,
|
| 18 |
+
load_cleaned_data,
|
| 19 |
+
load_param_loader,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Page configuration
|
| 23 |
+
st.set_page_config(
|
| 24 |
+
page_title="EDA Analysis",
|
| 25 |
+
page_icon="📊",
|
| 26 |
+
layout="wide",
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
st.title("📊 Exploratory Data Analysis")
|
| 30 |
+
st.markdown("Statistical insights from court case data")
|
| 31 |
+
|
| 32 |
+
# Load data
|
| 33 |
+
with st.spinner("Loading data..."):
|
| 34 |
+
try:
|
| 35 |
+
df = load_cleaned_data()
|
| 36 |
+
params = load_param_loader()
|
| 37 |
+
stats = get_case_statistics(df)
|
| 38 |
+
except Exception as e:
|
| 39 |
+
st.error(f"Error loading data: {e}")
|
| 40 |
+
st.info("Please run the EDA pipeline first: `uv run court-scheduler eda`")
|
| 41 |
+
st.stop()
|
| 42 |
+
|
| 43 |
+
if df.empty:
|
| 44 |
+
st.warning("No data available. Please run the EDA pipeline first.")
|
| 45 |
+
st.code("uv run court-scheduler eda")
|
| 46 |
+
st.stop()
|
| 47 |
+
|
| 48 |
+
# Sidebar filters
|
| 49 |
+
st.sidebar.header("Filters")
|
| 50 |
+
|
| 51 |
+
# Case type filter
|
| 52 |
+
available_case_types = df["CaseType"].unique().tolist() if "CaseType" in df else []
|
| 53 |
+
selected_case_types = st.sidebar.multiselect(
|
| 54 |
+
"Case Types",
|
| 55 |
+
options=available_case_types,
|
| 56 |
+
default=available_case_types,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Stage filter
|
| 60 |
+
available_stages = df["Remappedstages"].unique().tolist() if "Remappedstages" in df else []
|
| 61 |
+
selected_stages = st.sidebar.multiselect(
|
| 62 |
+
"Stages",
|
| 63 |
+
options=available_stages,
|
| 64 |
+
default=available_stages,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Apply filters
|
| 68 |
+
filtered_df = df.copy()
|
| 69 |
+
if selected_case_types:
|
| 70 |
+
filtered_df = filtered_df[filtered_df["CaseType"].isin(selected_case_types)]
|
| 71 |
+
if selected_stages:
|
| 72 |
+
filtered_df = filtered_df[filtered_df["Remappedstages"].isin(selected_stages)]
|
| 73 |
+
|
| 74 |
+
# Key metrics
|
| 75 |
+
st.markdown("### Key Metrics")
|
| 76 |
+
|
| 77 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 78 |
+
|
| 79 |
+
with col1:
|
| 80 |
+
total_cases = len(filtered_df)
|
| 81 |
+
st.metric("Total Cases", f"{total_cases:,}")
|
| 82 |
+
|
| 83 |
+
with col2:
|
| 84 |
+
n_case_types = len(filtered_df["CaseType"].unique()) if "CaseType" in filtered_df else 0
|
| 85 |
+
st.metric("Case Types", n_case_types)
|
| 86 |
+
|
| 87 |
+
with col3:
|
| 88 |
+
n_stages = len(filtered_df["Remappedstages"].unique()) if "Remappedstages" in filtered_df else 0
|
| 89 |
+
st.metric("Unique Stages", n_stages)
|
| 90 |
+
|
| 91 |
+
with col4:
|
| 92 |
+
if "Outcome" in filtered_df.columns:
|
| 93 |
+
adj_rate = (filtered_df["Outcome"] == "ADJOURNED").sum() / len(filtered_df)
|
| 94 |
+
st.metric("Adjournment Rate", f"{adj_rate:.1%}")
|
| 95 |
+
else:
|
| 96 |
+
st.metric("Adjournment Rate", "N/A")
|
| 97 |
+
|
| 98 |
+
st.markdown("---")
|
| 99 |
+
|
| 100 |
+
# Visualizations
|
| 101 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Case Distribution", "Stage Analysis", "Adjournment Patterns", "Raw Data"])
|
| 102 |
+
|
| 103 |
+
with tab1:
|
| 104 |
+
st.markdown("### Case Distribution by Type")
|
| 105 |
+
|
| 106 |
+
if "CaseType" in filtered_df:
|
| 107 |
+
case_type_counts = filtered_df["CaseType"].value_counts().reset_index()
|
| 108 |
+
case_type_counts.columns = ["CaseType", "Count"]
|
| 109 |
+
|
| 110 |
+
fig = px.bar(
|
| 111 |
+
case_type_counts,
|
| 112 |
+
x="CaseType",
|
| 113 |
+
y="Count",
|
| 114 |
+
title="Number of Cases by Type",
|
| 115 |
+
labels={"CaseType": "Case Type", "Count": "Number of Cases"},
|
| 116 |
+
color="Count",
|
| 117 |
+
color_continuous_scale="Blues",
|
| 118 |
+
)
|
| 119 |
+
fig.update_layout(xaxis_tickangle=-45, height=500)
|
| 120 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 121 |
+
|
| 122 |
+
# Pie chart
|
| 123 |
+
fig_pie = px.pie(
|
| 124 |
+
case_type_counts,
|
| 125 |
+
values="Count",
|
| 126 |
+
names="CaseType",
|
| 127 |
+
title="Case Type Distribution",
|
| 128 |
+
)
|
| 129 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
| 130 |
+
else:
|
| 131 |
+
st.info("CaseType column not found in data")
|
| 132 |
+
|
| 133 |
+
with tab2:
|
| 134 |
+
st.markdown("### Stage Analysis")
|
| 135 |
+
|
| 136 |
+
if "Remappedstages" in filtered_df:
|
| 137 |
+
col1, col2 = st.columns(2)
|
| 138 |
+
|
| 139 |
+
with col1:
|
| 140 |
+
stage_counts = filtered_df["Remappedstages"].value_counts().reset_index()
|
| 141 |
+
stage_counts.columns = ["Stage", "Count"]
|
| 142 |
+
|
| 143 |
+
fig = px.bar(
|
| 144 |
+
stage_counts.head(10),
|
| 145 |
+
x="Count",
|
| 146 |
+
y="Stage",
|
| 147 |
+
orientation="h",
|
| 148 |
+
title="Top 10 Stages by Case Count",
|
| 149 |
+
labels={"Stage": "Stage", "Count": "Number of Cases"},
|
| 150 |
+
color="Count",
|
| 151 |
+
color_continuous_scale="Greens",
|
| 152 |
+
)
|
| 153 |
+
fig.update_layout(height=500)
|
| 154 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 155 |
+
|
| 156 |
+
with col2:
|
| 157 |
+
# Stage distribution pie chart
|
| 158 |
+
fig_pie = px.pie(
|
| 159 |
+
stage_counts.head(10),
|
| 160 |
+
values="Count",
|
| 161 |
+
names="Stage",
|
| 162 |
+
title="Stage Distribution (Top 10)",
|
| 163 |
+
)
|
| 164 |
+
fig_pie.update_layout(height=500)
|
| 165 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
| 166 |
+
else:
|
| 167 |
+
st.info("Remappedstages column not found in data")
|
| 168 |
+
|
| 169 |
+
with tab3:
|
| 170 |
+
st.markdown("### Adjournment Patterns")
|
| 171 |
+
|
| 172 |
+
# Adjournment rate by case type
|
| 173 |
+
if "CaseType" in filtered_df and "Outcome" in filtered_df:
|
| 174 |
+
adj_by_type = (
|
| 175 |
+
filtered_df.groupby("CaseType")["Outcome"]
|
| 176 |
+
.apply(lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0)
|
| 177 |
+
.reset_index()
|
| 178 |
+
)
|
| 179 |
+
adj_by_type.columns = ["CaseType", "Adjournment_Rate"]
|
| 180 |
+
adj_by_type["Adjournment_Rate"] = adj_by_type["Adjournment_Rate"] * 100
|
| 181 |
+
|
| 182 |
+
fig = px.bar(
|
| 183 |
+
adj_by_type.sort_values("Adjournment_Rate", ascending=False),
|
| 184 |
+
x="CaseType",
|
| 185 |
+
y="Adjournment_Rate",
|
| 186 |
+
title="Adjournment Rate by Case Type (%)",
|
| 187 |
+
labels={"CaseType": "Case Type", "Adjournment_Rate": "Adjournment Rate (%)"},
|
| 188 |
+
color="Adjournment_Rate",
|
| 189 |
+
color_continuous_scale="Reds",
|
| 190 |
+
)
|
| 191 |
+
fig.update_layout(xaxis_tickangle=-45, height=500)
|
| 192 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 193 |
+
|
| 194 |
+
# Adjournment rate by stage
|
| 195 |
+
if "Remappedstages" in filtered_df and "Outcome" in filtered_df:
|
| 196 |
+
adj_by_stage = (
|
| 197 |
+
filtered_df.groupby("Remappedstages")["Outcome"]
|
| 198 |
+
.apply(lambda x: (x == "ADJOURNED").sum() / len(x) if len(x) > 0 else 0)
|
| 199 |
+
.reset_index()
|
| 200 |
+
)
|
| 201 |
+
adj_by_stage.columns = ["Stage", "Adjournment_Rate"]
|
| 202 |
+
adj_by_stage["Adjournment_Rate"] = adj_by_stage["Adjournment_Rate"] * 100
|
| 203 |
+
|
| 204 |
+
fig = px.bar(
|
| 205 |
+
adj_by_stage.sort_values("Adjournment_Rate", ascending=False).head(15),
|
| 206 |
+
x="Adjournment_Rate",
|
| 207 |
+
y="Stage",
|
| 208 |
+
orientation="h",
|
| 209 |
+
title="Adjournment Rate by Stage (Top 15, %)",
|
| 210 |
+
labels={"Stage": "Stage", "Adjournment_Rate": "Adjournment Rate (%)"},
|
| 211 |
+
color="Adjournment_Rate",
|
| 212 |
+
color_continuous_scale="Oranges",
|
| 213 |
+
)
|
| 214 |
+
fig.update_layout(height=600)
|
| 215 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 216 |
+
|
| 217 |
+
# Heatmap: Adjournment probability by stage and case type
|
| 218 |
+
if params and "adjournment_stats" in params:
|
| 219 |
+
st.markdown("#### Adjournment Probability Heatmap (Stage × Case Type)")
|
| 220 |
+
|
| 221 |
+
adj_stats = params["adjournment_stats"]
|
| 222 |
+
stages = list(adj_stats.keys())
|
| 223 |
+
case_types = params["case_types"]
|
| 224 |
+
|
| 225 |
+
heatmap_data = []
|
| 226 |
+
for stage in stages:
|
| 227 |
+
row = []
|
| 228 |
+
for ct in case_types:
|
| 229 |
+
prob = adj_stats.get(stage, {}).get(ct, 0)
|
| 230 |
+
row.append(prob * 100) # Convert to percentage
|
| 231 |
+
heatmap_data.append(row)
|
| 232 |
+
|
| 233 |
+
fig = go.Figure(data=go.Heatmap(
|
| 234 |
+
z=heatmap_data,
|
| 235 |
+
x=case_types,
|
| 236 |
+
y=stages,
|
| 237 |
+
colorscale="RdYlGn_r",
|
| 238 |
+
text=[[f"{val:.1f}%" for val in row] for row in heatmap_data],
|
| 239 |
+
texttemplate="%{text}",
|
| 240 |
+
textfont={"size": 8},
|
| 241 |
+
colorbar=dict(title="Adj. Rate (%)"),
|
| 242 |
+
))
|
| 243 |
+
fig.update_layout(
|
| 244 |
+
title="Adjournment Probability Heatmap",
|
| 245 |
+
xaxis_title="Case Type",
|
| 246 |
+
yaxis_title="Stage",
|
| 247 |
+
height=700,
|
| 248 |
+
)
|
| 249 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 250 |
+
|
| 251 |
+
with tab4:
|
| 252 |
+
st.markdown("### Raw Data")
|
| 253 |
+
|
| 254 |
+
st.dataframe(
|
| 255 |
+
filtered_df.head(100),
|
| 256 |
+
use_container_width=True,
|
| 257 |
+
height=600,
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
st.markdown(f"**Showing first 100 of {len(filtered_df):,} filtered rows**")
|
| 261 |
+
|
| 262 |
+
# Download button
|
| 263 |
+
csv = filtered_df.to_csv(index=False).encode('utf-8')
|
| 264 |
+
st.download_button(
|
| 265 |
+
label="Download filtered data as CSV",
|
| 266 |
+
data=csv,
|
| 267 |
+
file_name="filtered_cases.csv",
|
| 268 |
+
mime="text/csv",
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
# Footer
|
| 272 |
+
st.markdown("---")
|
| 273 |
+
st.markdown("*Data loaded from EDA pipeline. Refresh to reload.*")
|
scheduler/dashboard/pages/2_Ripeness_Classifier.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Ripeness Classifier page - Interactive explainability and threshold tuning.
|
| 2 |
+
|
| 3 |
+
This page provides full transparency into how cases are classified as RIPE/UNRIPE/UNKNOWN,
|
| 4 |
+
allows interactive threshold tuning, and provides case-level explainability.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from datetime import date, timedelta
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import plotly.express as px
|
| 13 |
+
import plotly.graph_objects as go
|
| 14 |
+
import streamlit as st
|
| 15 |
+
|
| 16 |
+
from scheduler.core.case import Case, CaseStatus, CaseType
|
| 17 |
+
from scheduler.core.ripeness import RipenessClassifier, RipenessStatus
|
| 18 |
+
from scheduler.dashboard.utils import load_generated_cases
|
| 19 |
+
|
| 20 |
+
# Page configuration
|
| 21 |
+
st.set_page_config(
|
| 22 |
+
page_title="Ripeness Classifier",
|
| 23 |
+
page_icon="🎯",
|
| 24 |
+
layout="wide",
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
st.title("🎯 Ripeness Classifier - Explainability Dashboard")
|
| 28 |
+
st.markdown("Understand and tune the case readiness algorithm")
|
| 29 |
+
|
| 30 |
+
# Initialize session state for thresholds
|
| 31 |
+
if "min_service_hearings" not in st.session_state:
|
| 32 |
+
st.session_state.min_service_hearings = 2
|
| 33 |
+
if "min_stage_days" not in st.session_state:
|
| 34 |
+
st.session_state.min_stage_days = 30
|
| 35 |
+
if "min_case_age_days" not in st.session_state:
|
| 36 |
+
st.session_state.min_case_age_days = 90
|
| 37 |
+
|
| 38 |
+
# Sidebar: Threshold controls
|
| 39 |
+
st.sidebar.header("Threshold Configuration")
|
| 40 |
+
|
| 41 |
+
st.sidebar.markdown("### Adjust Ripeness Thresholds")
|
| 42 |
+
|
| 43 |
+
min_service_hearings = st.sidebar.slider(
|
| 44 |
+
"Min Service Hearings",
|
| 45 |
+
min_value=0,
|
| 46 |
+
max_value=10,
|
| 47 |
+
value=st.session_state.min_service_hearings,
|
| 48 |
+
step=1,
|
| 49 |
+
help="Minimum number of service hearings before a case is considered RIPE",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
min_stage_days = st.sidebar.slider(
|
| 53 |
+
"Min Stage Days",
|
| 54 |
+
min_value=0,
|
| 55 |
+
max_value=180,
|
| 56 |
+
value=st.session_state.min_stage_days,
|
| 57 |
+
step=5,
|
| 58 |
+
help="Minimum days in current stage",
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
min_case_age_days = st.sidebar.slider(
|
| 62 |
+
"Min Case Age (days)",
|
| 63 |
+
min_value=0,
|
| 64 |
+
max_value=730,
|
| 65 |
+
value=st.session_state.min_case_age_days,
|
| 66 |
+
step=30,
|
| 67 |
+
help="Minimum case age before considered RIPE",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Reset button
|
| 71 |
+
if st.sidebar.button("Reset to Defaults"):
|
| 72 |
+
st.session_state.min_service_hearings = 2
|
| 73 |
+
st.session_state.min_stage_days = 30
|
| 74 |
+
st.session_state.min_case_age_days = 90
|
| 75 |
+
st.rerun()
|
| 76 |
+
|
| 77 |
+
# Update session state
|
| 78 |
+
st.session_state.min_service_hearings = min_service_hearings
|
| 79 |
+
st.session_state.min_stage_days = min_stage_days
|
| 80 |
+
st.session_state.min_case_age_days = min_case_age_days
|
| 81 |
+
|
| 82 |
+
# Main content
|
| 83 |
+
tab1, tab2, tab3 = st.tabs(["Current Configuration", "Interactive Testing", "Batch Classification"])
|
| 84 |
+
|
| 85 |
+
with tab1:
|
| 86 |
+
st.markdown("### Current Classifier Configuration")
|
| 87 |
+
|
| 88 |
+
col1, col2, col3 = st.columns(3)
|
| 89 |
+
|
| 90 |
+
with col1:
|
| 91 |
+
st.metric("Min Service Hearings", min_service_hearings)
|
| 92 |
+
st.caption("Cases need at least this many service hearings")
|
| 93 |
+
|
| 94 |
+
with col2:
|
| 95 |
+
st.metric("Min Stage Days", min_stage_days)
|
| 96 |
+
st.caption("Days in current stage threshold")
|
| 97 |
+
|
| 98 |
+
with col3:
|
| 99 |
+
st.metric("Min Case Age", f"{min_case_age_days} days")
|
| 100 |
+
st.caption("Minimum case age requirement")
|
| 101 |
+
|
| 102 |
+
st.markdown("---")
|
| 103 |
+
|
| 104 |
+
# Classification logic flowchart
|
| 105 |
+
st.markdown("### Classification Logic")
|
| 106 |
+
|
| 107 |
+
with st.expander("View Decision Tree Logic"):
|
| 108 |
+
st.markdown("""
|
| 109 |
+
The ripeness classifier uses the following decision logic:
|
| 110 |
+
|
| 111 |
+
**1. Service Hearings Check**
|
| 112 |
+
- If `service_hearings < MIN_SERVICE_HEARINGS` → **UNRIPE**
|
| 113 |
+
|
| 114 |
+
**2. Case Age Check**
|
| 115 |
+
- If `case_age < MIN_CASE_AGE_DAYS` → **UNRIPE**
|
| 116 |
+
|
| 117 |
+
**3. Stage-Specific Checks**
|
| 118 |
+
- Each stage has minimum days requirement
|
| 119 |
+
- If `days_in_stage < stage_requirement` → **UNRIPE**
|
| 120 |
+
|
| 121 |
+
**4. Keyword Analysis**
|
| 122 |
+
- Certain keywords indicate ripeness (e.g., "reply filed", "arguments complete")
|
| 123 |
+
- If keywords found → **RIPE**
|
| 124 |
+
|
| 125 |
+
**5. Final Classification**
|
| 126 |
+
- If all criteria met → **RIPE**
|
| 127 |
+
- If some criteria failed but not critical → **UNKNOWN**
|
| 128 |
+
- Otherwise → **UNRIPE**
|
| 129 |
+
""")
|
| 130 |
+
|
| 131 |
+
# Show stage-specific rules
|
| 132 |
+
st.markdown("### Stage-Specific Rules")
|
| 133 |
+
|
| 134 |
+
stage_rules = {
|
| 135 |
+
"PRE-TRIAL": {"min_days": 60, "keywords": ["affidavit filed", "reply filed"]},
|
| 136 |
+
"TRIAL": {"min_days": 45, "keywords": ["evidence complete", "cross complete"]},
|
| 137 |
+
"POST-TRIAL": {"min_days": 30, "keywords": ["arguments complete", "written note"]},
|
| 138 |
+
"FINAL DISPOSAL": {"min_days": 15, "keywords": ["disposed", "judgment"]},
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
df_rules = pd.DataFrame([
|
| 142 |
+
{"Stage": stage, "Min Days": rules["min_days"], "Keywords": ", ".join(rules["keywords"])}
|
| 143 |
+
for stage, rules in stage_rules.items()
|
| 144 |
+
])
|
| 145 |
+
|
| 146 |
+
st.dataframe(df_rules, use_container_width=True, hide_index=True)
|
| 147 |
+
|
| 148 |
+
with tab2:
|
| 149 |
+
st.markdown("### Interactive Case Classification Testing")
|
| 150 |
+
|
| 151 |
+
st.markdown("Create a synthetic case and see how it would be classified with current thresholds")
|
| 152 |
+
|
| 153 |
+
col1, col2 = st.columns(2)
|
| 154 |
+
|
| 155 |
+
with col1:
|
| 156 |
+
case_id = st.text_input("Case ID", value="TEST-001")
|
| 157 |
+
case_type = st.selectbox("Case Type", ["CIVIL", "CRIMINAL", "WRIT", "PIL"])
|
| 158 |
+
case_stage = st.selectbox("Current Stage", ["PRE-TRIAL", "TRIAL", "POST-TRIAL", "FINAL DISPOSAL"])
|
| 159 |
+
|
| 160 |
+
with col2:
|
| 161 |
+
service_hearings_count = st.number_input("Service Hearings", min_value=0, max_value=20, value=3)
|
| 162 |
+
days_in_stage = st.number_input("Days in Stage", min_value=0, max_value=365, value=45)
|
| 163 |
+
case_age = st.number_input("Case Age (days)", min_value=0, max_value=3650, value=120)
|
| 164 |
+
|
| 165 |
+
# Keywords
|
| 166 |
+
has_keywords = st.multiselect(
|
| 167 |
+
"Keywords Found",
|
| 168 |
+
options=["reply filed", "affidavit filed", "arguments complete", "evidence complete", "written note"],
|
| 169 |
+
default=[],
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
if st.button("Classify Case"):
|
| 173 |
+
# Create synthetic case
|
| 174 |
+
today = date.today()
|
| 175 |
+
filed_date = today - timedelta(days=case_age)
|
| 176 |
+
|
| 177 |
+
test_case = Case(
|
| 178 |
+
case_id=case_id,
|
| 179 |
+
case_type=CaseType(case_type),
|
| 180 |
+
filed_date=filed_date,
|
| 181 |
+
current_stage=case_stage,
|
| 182 |
+
status=CaseStatus.PENDING,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Simulate service hearings
|
| 186 |
+
test_case.hearings_history = [
|
| 187 |
+
{"date": filed_date + timedelta(days=i*20), "type": "SERVICE"}
|
| 188 |
+
for i in range(service_hearings_count)
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
# Classify using current thresholds
|
| 192 |
+
# Note: This is a simplified classification for demo purposes
|
| 193 |
+
# The actual RipenessClassifier has more complex logic
|
| 194 |
+
|
| 195 |
+
criteria_passed = []
|
| 196 |
+
criteria_failed = []
|
| 197 |
+
|
| 198 |
+
# Check service hearings
|
| 199 |
+
if service_hearings_count >= min_service_hearings:
|
| 200 |
+
criteria_passed.append(f"✓ Service hearings: {service_hearings_count} (threshold: {min_service_hearings})")
|
| 201 |
+
else:
|
| 202 |
+
criteria_failed.append(f"✗ Service hearings: {service_hearings_count} (threshold: {min_service_hearings})")
|
| 203 |
+
|
| 204 |
+
# Check case age
|
| 205 |
+
if case_age >= min_case_age_days:
|
| 206 |
+
criteria_passed.append(f"✓ Case age: {case_age} days (threshold: {min_case_age_days})")
|
| 207 |
+
else:
|
| 208 |
+
criteria_failed.append(f"✗ Case age: {case_age} days (threshold: {min_case_age_days})")
|
| 209 |
+
|
| 210 |
+
# Check stage days
|
| 211 |
+
stage_threshold = stage_rules.get(case_stage, {}).get("min_days", min_stage_days)
|
| 212 |
+
if days_in_stage >= stage_threshold:
|
| 213 |
+
criteria_passed.append(f"✓ Stage days: {days_in_stage} (threshold: {stage_threshold} for {case_stage})")
|
| 214 |
+
else:
|
| 215 |
+
criteria_failed.append(f"✗ Stage days: {days_in_stage} (threshold: {stage_threshold} for {case_stage})")
|
| 216 |
+
|
| 217 |
+
# Check keywords
|
| 218 |
+
expected_keywords = stage_rules.get(case_stage, {}).get("keywords", [])
|
| 219 |
+
keywords_found = [kw for kw in has_keywords if kw in expected_keywords]
|
| 220 |
+
if keywords_found:
|
| 221 |
+
criteria_passed.append(f"✓ Keywords: {', '.join(keywords_found)}")
|
| 222 |
+
else:
|
| 223 |
+
criteria_failed.append(f"✗ No relevant keywords found")
|
| 224 |
+
|
| 225 |
+
# Final classification
|
| 226 |
+
if len(criteria_failed) == 0:
|
| 227 |
+
classification = "RIPE"
|
| 228 |
+
color = "green"
|
| 229 |
+
elif len(criteria_failed) <= 1:
|
| 230 |
+
classification = "UNKNOWN"
|
| 231 |
+
color = "orange"
|
| 232 |
+
else:
|
| 233 |
+
classification = "UNRIPE"
|
| 234 |
+
color = "red"
|
| 235 |
+
|
| 236 |
+
# Display results
|
| 237 |
+
st.markdown("### Classification Result")
|
| 238 |
+
st.markdown(f":{color}[**{classification}**]")
|
| 239 |
+
|
| 240 |
+
col1, col2 = st.columns(2)
|
| 241 |
+
|
| 242 |
+
with col1:
|
| 243 |
+
st.markdown("#### Criteria Passed")
|
| 244 |
+
for criterion in criteria_passed:
|
| 245 |
+
st.markdown(criterion)
|
| 246 |
+
|
| 247 |
+
with col2:
|
| 248 |
+
st.markdown("#### Criteria Failed")
|
| 249 |
+
if criteria_failed:
|
| 250 |
+
for criterion in criteria_failed:
|
| 251 |
+
st.markdown(criterion)
|
| 252 |
+
else:
|
| 253 |
+
st.markdown("*All criteria passed*")
|
| 254 |
+
|
| 255 |
+
# Feature importance
|
| 256 |
+
st.markdown("---")
|
| 257 |
+
st.markdown("### Feature Importance")
|
| 258 |
+
|
| 259 |
+
feature_scores = {
|
| 260 |
+
"Service Hearings": 1 if service_hearings_count >= min_service_hearings else 0,
|
| 261 |
+
"Case Age": 1 if case_age >= min_case_age_days else 0,
|
| 262 |
+
"Stage Days": 1 if days_in_stage >= stage_threshold else 0,
|
| 263 |
+
"Keywords": 1 if keywords_found else 0,
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
fig = px.bar(
|
| 267 |
+
x=list(feature_scores.keys()),
|
| 268 |
+
y=list(feature_scores.values()),
|
| 269 |
+
labels={"x": "Feature", "y": "Score (0=Fail, 1=Pass)"},
|
| 270 |
+
title="Feature Contribution to Ripeness",
|
| 271 |
+
color=list(feature_scores.values()),
|
| 272 |
+
color_continuous_scale=["red", "green"],
|
| 273 |
+
)
|
| 274 |
+
fig.update_layout(height=400, showlegend=False)
|
| 275 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 276 |
+
|
| 277 |
+
with tab3:
|
| 278 |
+
st.markdown("### Batch Classification Analysis")
|
| 279 |
+
|
| 280 |
+
st.markdown("Load generated test cases and classify them with current thresholds")
|
| 281 |
+
|
| 282 |
+
if st.button("Load & Classify Test Cases"):
|
| 283 |
+
with st.spinner("Loading cases..."):
|
| 284 |
+
try:
|
| 285 |
+
cases = load_generated_cases()
|
| 286 |
+
|
| 287 |
+
if not cases:
|
| 288 |
+
st.warning("No test cases found. Generate cases first: `uv run court-scheduler generate`")
|
| 289 |
+
else:
|
| 290 |
+
st.success(f"Loaded {len(cases)} test cases")
|
| 291 |
+
|
| 292 |
+
# Classify all cases (simplified)
|
| 293 |
+
classifications = {"RIPE": 0, "UNRIPE": 0, "UNKNOWN": 0}
|
| 294 |
+
|
| 295 |
+
# For demo, use simplified logic
|
| 296 |
+
for case in cases:
|
| 297 |
+
service_count = len([h for h in case.hearings_history if h.get("type") == "SERVICE"])
|
| 298 |
+
case_age_days = (date.today() - case.filed_date).days
|
| 299 |
+
|
| 300 |
+
criteria_met = 0
|
| 301 |
+
if service_count >= min_service_hearings:
|
| 302 |
+
criteria_met += 1
|
| 303 |
+
if case_age_days >= min_case_age_days:
|
| 304 |
+
criteria_met += 1
|
| 305 |
+
|
| 306 |
+
if criteria_met == 2:
|
| 307 |
+
classifications["RIPE"] += 1
|
| 308 |
+
elif criteria_met == 1:
|
| 309 |
+
classifications["UNKNOWN"] += 1
|
| 310 |
+
else:
|
| 311 |
+
classifications["UNRIPE"] += 1
|
| 312 |
+
|
| 313 |
+
# Display results
|
| 314 |
+
col1, col2, col3 = st.columns(3)
|
| 315 |
+
|
| 316 |
+
with col1:
|
| 317 |
+
pct = classifications["RIPE"] / len(cases) * 100
|
| 318 |
+
st.metric("RIPE Cases", f"{classifications['RIPE']:,}", f"{pct:.1f}%")
|
| 319 |
+
|
| 320 |
+
with col2:
|
| 321 |
+
pct = classifications["UNKNOWN"] / len(cases) * 100
|
| 322 |
+
st.metric("UNKNOWN Cases", f"{classifications['UNKNOWN']:,}", f"{pct:.1f}%")
|
| 323 |
+
|
| 324 |
+
with col3:
|
| 325 |
+
pct = classifications["UNRIPE"] / len(cases) * 100
|
| 326 |
+
st.metric("UNRIPE Cases", f"{classifications['UNRIPE']:,}", f"{pct:.1f}%")
|
| 327 |
+
|
| 328 |
+
# Pie chart
|
| 329 |
+
fig = px.pie(
|
| 330 |
+
values=list(classifications.values()),
|
| 331 |
+
names=list(classifications.keys()),
|
| 332 |
+
title="Classification Distribution",
|
| 333 |
+
color=list(classifications.keys()),
|
| 334 |
+
color_discrete_map={"RIPE": "green", "UNKNOWN": "orange", "UNRIPE": "red"},
|
| 335 |
+
)
|
| 336 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 337 |
+
|
| 338 |
+
except Exception as e:
|
| 339 |
+
st.error(f"Error loading cases: {e}")
|
| 340 |
+
|
| 341 |
+
# Footer
|
| 342 |
+
st.markdown("---")
|
| 343 |
+
st.markdown("*Adjust thresholds in the sidebar to see real-time impact on classification*")
|
scheduler/dashboard/pages/3_RL_Training.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""RL Training page - Interactive training and visualization.
|
| 2 |
+
|
| 3 |
+
This page allows users to configure and train reinforcement learning agents,
|
| 4 |
+
monitor training progress in real-time, and visualize results.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import pickle
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import plotly.express as px
|
| 14 |
+
import plotly.graph_objects as go
|
| 15 |
+
import streamlit as st
|
| 16 |
+
|
| 17 |
+
from scheduler.dashboard.utils import load_rl_training_history
|
| 18 |
+
|
| 19 |
+
# Page configuration
|
| 20 |
+
st.set_page_config(
|
| 21 |
+
page_title="RL Training",
|
| 22 |
+
page_icon="🤖",
|
| 23 |
+
layout="wide",
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
st.title("🤖 Reinforcement Learning Training")
|
| 27 |
+
st.markdown("Train and visualize RL agents for optimal case scheduling")
|
| 28 |
+
|
| 29 |
+
# Initialize session state
|
| 30 |
+
if "training_complete" not in st.session_state:
|
| 31 |
+
st.session_state.training_complete = False
|
| 32 |
+
if "training_stats" not in st.session_state:
|
| 33 |
+
st.session_state.training_stats = None
|
| 34 |
+
|
| 35 |
+
# Tabs
|
| 36 |
+
tab1, tab2, tab3 = st.tabs(["Train Agent", "Training History", "Model Comparison"])
|
| 37 |
+
|
| 38 |
+
with tab1:
|
| 39 |
+
st.markdown("### Configure and Train RL Agent")
|
| 40 |
+
|
| 41 |
+
col1, col2 = st.columns([1, 2])
|
| 42 |
+
|
| 43 |
+
with col1:
|
| 44 |
+
st.markdown("#### Training Configuration")
|
| 45 |
+
|
| 46 |
+
with st.form("training_config"):
|
| 47 |
+
episodes = st.slider(
|
| 48 |
+
"Number of Episodes",
|
| 49 |
+
min_value=5,
|
| 50 |
+
max_value=100,
|
| 51 |
+
value=20,
|
| 52 |
+
step=5,
|
| 53 |
+
help="More episodes = better learning but longer training time",
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
cases_per_episode = st.slider(
|
| 57 |
+
"Cases per Episode",
|
| 58 |
+
min_value=50,
|
| 59 |
+
max_value=500,
|
| 60 |
+
value=200,
|
| 61 |
+
step=50,
|
| 62 |
+
help="Number of cases to simulate in each episode",
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
learning_rate = st.slider(
|
| 66 |
+
"Learning Rate",
|
| 67 |
+
min_value=0.01,
|
| 68 |
+
max_value=0.5,
|
| 69 |
+
value=0.15,
|
| 70 |
+
step=0.01,
|
| 71 |
+
help="How quickly the agent learns from experiences",
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
epsilon = st.slider(
|
| 75 |
+
"Initial Epsilon",
|
| 76 |
+
min_value=0.1,
|
| 77 |
+
max_value=1.0,
|
| 78 |
+
value=0.4,
|
| 79 |
+
step=0.05,
|
| 80 |
+
help="Exploration rate (higher = more exploration)",
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
discount = st.slider(
|
| 84 |
+
"Discount Factor (gamma)",
|
| 85 |
+
min_value=0.8,
|
| 86 |
+
max_value=0.99,
|
| 87 |
+
value=0.95,
|
| 88 |
+
step=0.01,
|
| 89 |
+
help="Importance of future rewards",
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
seed = st.number_input(
|
| 93 |
+
"Random Seed",
|
| 94 |
+
min_value=0,
|
| 95 |
+
max_value=10000,
|
| 96 |
+
value=42,
|
| 97 |
+
help="For reproducibility",
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
submitted = st.form_submit_button("Start Training", type="primary")
|
| 101 |
+
|
| 102 |
+
if submitted:
|
| 103 |
+
st.info("Training functionality requires RL modules to be imported. This is a demo interface.")
|
| 104 |
+
st.markdown(f"""
|
| 105 |
+
**Training Configuration:**
|
| 106 |
+
- Episodes: {episodes}
|
| 107 |
+
- Cases/Episode: {cases_per_episode}
|
| 108 |
+
- Learning Rate: {learning_rate}
|
| 109 |
+
- Epsilon: {epsilon}
|
| 110 |
+
- Discount: {discount}
|
| 111 |
+
- Seed: {seed}
|
| 112 |
+
|
| 113 |
+
**Command to run training via CLI:**
|
| 114 |
+
```bash
|
| 115 |
+
uv run court-scheduler train \\
|
| 116 |
+
--episodes {episodes} \\
|
| 117 |
+
--cases {cases_per_episode} \\
|
| 118 |
+
--lr {learning_rate} \\
|
| 119 |
+
--epsilon {epsilon} \\
|
| 120 |
+
--seed {seed}
|
| 121 |
+
```
|
| 122 |
+
""")
|
| 123 |
+
|
| 124 |
+
# Simulate training stats for demo
|
| 125 |
+
demo_stats = {
|
| 126 |
+
"episodes": list(range(1, episodes + 1)),
|
| 127 |
+
"disposal_rates": [0.3 + (i / episodes) * 0.4 for i in range(episodes)],
|
| 128 |
+
"avg_rewards": [100 + (i / episodes) * 200 for i in range(episodes)],
|
| 129 |
+
"states_explored": [50 * (i + 1) for i in range(episodes)],
|
| 130 |
+
"epsilon_values": [epsilon * (0.95 ** i) for i in range(episodes)],
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
st.session_state.training_stats = demo_stats
|
| 134 |
+
st.session_state.training_complete = True
|
| 135 |
+
|
| 136 |
+
with col2:
|
| 137 |
+
st.markdown("#### Training Progress")
|
| 138 |
+
|
| 139 |
+
if st.session_state.training_complete and st.session_state.training_stats:
|
| 140 |
+
stats = st.session_state.training_stats
|
| 141 |
+
|
| 142 |
+
# Metrics
|
| 143 |
+
col1, col2, col3 = st.columns(3)
|
| 144 |
+
with col1:
|
| 145 |
+
final_disposal = stats["disposal_rates"][-1]
|
| 146 |
+
st.metric("Final Disposal Rate", f"{final_disposal:.1%}")
|
| 147 |
+
with col2:
|
| 148 |
+
total_states = stats["states_explored"][-1]
|
| 149 |
+
st.metric("States Explored", f"{total_states:,}")
|
| 150 |
+
with col3:
|
| 151 |
+
final_reward = stats["avg_rewards"][-1]
|
| 152 |
+
st.metric("Avg Reward", f"{final_reward:.1f}")
|
| 153 |
+
|
| 154 |
+
# Disposal rate over episodes
|
| 155 |
+
fig = px.line(
|
| 156 |
+
x=stats["episodes"],
|
| 157 |
+
y=stats["disposal_rates"],
|
| 158 |
+
labels={"x": "Episode", "y": "Disposal Rate"},
|
| 159 |
+
title="Disposal Rate Over Episodes",
|
| 160 |
+
)
|
| 161 |
+
fig.update_traces(line_color="#1f77b4", line_width=3)
|
| 162 |
+
fig.update_layout(height=300)
|
| 163 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 164 |
+
|
| 165 |
+
# Average reward
|
| 166 |
+
fig = px.line(
|
| 167 |
+
x=stats["episodes"],
|
| 168 |
+
y=stats["avg_rewards"],
|
| 169 |
+
labels={"x": "Episode", "y": "Average Reward"},
|
| 170 |
+
title="Average Reward Over Episodes",
|
| 171 |
+
)
|
| 172 |
+
fig.update_traces(line_color="#ff7f0e", line_width=3)
|
| 173 |
+
fig.update_layout(height=300)
|
| 174 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 175 |
+
|
| 176 |
+
# States explored
|
| 177 |
+
fig = px.line(
|
| 178 |
+
x=stats["episodes"],
|
| 179 |
+
y=stats["states_explored"],
|
| 180 |
+
labels={"x": "Episode", "y": "States Explored"},
|
| 181 |
+
title="Cumulative States Explored",
|
| 182 |
+
)
|
| 183 |
+
fig.update_traces(line_color="#2ca02c", line_width=3)
|
| 184 |
+
fig.update_layout(height=300)
|
| 185 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 186 |
+
|
| 187 |
+
# Epsilon decay
|
| 188 |
+
fig = px.line(
|
| 189 |
+
x=stats["episodes"],
|
| 190 |
+
y=stats["epsilon_values"],
|
| 191 |
+
labels={"x": "Episode", "y": "Epsilon"},
|
| 192 |
+
title="Epsilon Decay (Exploration Rate)",
|
| 193 |
+
)
|
| 194 |
+
fig.update_traces(line_color="#d62728", line_width=3)
|
| 195 |
+
fig.update_layout(height=300)
|
| 196 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 197 |
+
|
| 198 |
+
else:
|
| 199 |
+
st.info("Configure training parameters and click 'Start Training' to begin.")
|
| 200 |
+
|
| 201 |
+
st.markdown("""
|
| 202 |
+
**What is RL Training?**
|
| 203 |
+
|
| 204 |
+
Reinforcement Learning trains an agent to make optimal scheduling decisions
|
| 205 |
+
by learning from simulated court scheduling scenarios.
|
| 206 |
+
|
| 207 |
+
The agent learns to:
|
| 208 |
+
- Prioritize cases effectively
|
| 209 |
+
- Balance workload across courtrooms
|
| 210 |
+
- Maximize disposal rates
|
| 211 |
+
- Minimize adjournments
|
| 212 |
+
|
| 213 |
+
**Key Hyperparameters:**
|
| 214 |
+
- **Episodes**: Number of complete training runs
|
| 215 |
+
- **Learning Rate**: How fast the agent updates its knowledge
|
| 216 |
+
- **Epsilon**: Balance between exploration (try new actions) and exploitation (use known good actions)
|
| 217 |
+
- **Discount Factor**: How much to value future rewards vs immediate rewards
|
| 218 |
+
""")
|
| 219 |
+
|
| 220 |
+
with tab2:
|
| 221 |
+
st.markdown("### Training History")
|
| 222 |
+
|
| 223 |
+
st.markdown("View results from previous training runs")
|
| 224 |
+
|
| 225 |
+
# Try to load training history
|
| 226 |
+
history_df = load_rl_training_history()
|
| 227 |
+
|
| 228 |
+
if not history_df.empty:
|
| 229 |
+
st.dataframe(history_df, use_container_width=True)
|
| 230 |
+
|
| 231 |
+
# Plot disposal rates over time
|
| 232 |
+
if "episode" in history_df.columns and "disposal_rate" in history_df.columns:
|
| 233 |
+
fig = px.line(
|
| 234 |
+
history_df,
|
| 235 |
+
x="episode",
|
| 236 |
+
y="disposal_rate",
|
| 237 |
+
title="Historical Training Performance",
|
| 238 |
+
labels={"episode": "Episode", "disposal_rate": "Disposal Rate"},
|
| 239 |
+
)
|
| 240 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 241 |
+
else:
|
| 242 |
+
st.info("No training history found. Run training first using the CLI or the Train Agent tab.")
|
| 243 |
+
|
| 244 |
+
st.code("uv run court-scheduler train --episodes 20 --cases 200")
|
| 245 |
+
|
| 246 |
+
with tab3:
|
| 247 |
+
st.markdown("### Model Comparison")
|
| 248 |
+
|
| 249 |
+
st.markdown("Compare different trained models and their hyperparameters")
|
| 250 |
+
|
| 251 |
+
# Check for saved models
|
| 252 |
+
models_dir = Path("models")
|
| 253 |
+
if models_dir.exists():
|
| 254 |
+
model_files = list(models_dir.glob("*.pkl"))
|
| 255 |
+
|
| 256 |
+
if model_files:
|
| 257 |
+
st.success(f"Found {len(model_files)} saved model(s)")
|
| 258 |
+
|
| 259 |
+
# Model selection
|
| 260 |
+
selected_models = st.multiselect(
|
| 261 |
+
"Select models to compare",
|
| 262 |
+
options=[f.name for f in model_files],
|
| 263 |
+
default=[model_files[0].name] if model_files else [],
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
if selected_models:
|
| 267 |
+
comparison_data = []
|
| 268 |
+
|
| 269 |
+
for model_name in selected_models:
|
| 270 |
+
try:
|
| 271 |
+
model_path = models_dir / model_name
|
| 272 |
+
with model_path.open("rb") as f:
|
| 273 |
+
agent = pickle.load(f)
|
| 274 |
+
|
| 275 |
+
# Extract model info
|
| 276 |
+
model_info = {
|
| 277 |
+
"Model": model_name,
|
| 278 |
+
"Q-table Size": len(getattr(agent, "q_table", {})),
|
| 279 |
+
"Learning Rate": getattr(agent, "learning_rate", "N/A"),
|
| 280 |
+
"Epsilon": getattr(agent, "epsilon", "N/A"),
|
| 281 |
+
}
|
| 282 |
+
comparison_data.append(model_info)
|
| 283 |
+
except Exception as e:
|
| 284 |
+
st.warning(f"Could not load {model_name}: {e}")
|
| 285 |
+
|
| 286 |
+
if comparison_data:
|
| 287 |
+
df_comparison = pd.DataFrame(comparison_data)
|
| 288 |
+
st.dataframe(df_comparison, use_container_width=True, hide_index=True)
|
| 289 |
+
|
| 290 |
+
# Visualize Q-table sizes
|
| 291 |
+
fig = px.bar(
|
| 292 |
+
df_comparison,
|
| 293 |
+
x="Model",
|
| 294 |
+
y="Q-table Size",
|
| 295 |
+
title="Q-table Size Comparison",
|
| 296 |
+
labels={"Model": "Model Name", "Q-table Size": "Number of States"},
|
| 297 |
+
)
|
| 298 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 299 |
+
else:
|
| 300 |
+
st.info("No trained models found in models/ directory")
|
| 301 |
+
else:
|
| 302 |
+
st.info("models/ directory not found. Train a model first.")
|
| 303 |
+
|
| 304 |
+
st.markdown("---")
|
| 305 |
+
|
| 306 |
+
# Hyperparameter analysis
|
| 307 |
+
with st.expander("Hyperparameter Guide"):
|
| 308 |
+
st.markdown("""
|
| 309 |
+
**Learning Rate** (α)
|
| 310 |
+
- Range: 0.01 - 0.5
|
| 311 |
+
- Low (0.01-0.1): Slow, stable learning
|
| 312 |
+
- Medium (0.1-0.2): Balanced
|
| 313 |
+
- High (0.2-0.5): Fast but potentially unstable
|
| 314 |
+
|
| 315 |
+
**Epsilon** (ε)
|
| 316 |
+
- Range: 0.1 - 1.0
|
| 317 |
+
- Low (0.1-0.3): More exploitation, less exploration
|
| 318 |
+
- Medium (0.3-0.5): Balanced
|
| 319 |
+
- High (0.5-1.0): More exploration, may take longer to converge
|
| 320 |
+
|
| 321 |
+
**Discount Factor** (γ)
|
| 322 |
+
- Range: 0.8 - 0.99
|
| 323 |
+
- Low (0.8-0.9): Prioritize immediate rewards
|
| 324 |
+
- Medium (0.9-0.95): Balanced
|
| 325 |
+
- High (0.95-0.99): Prioritize long-term rewards
|
| 326 |
+
|
| 327 |
+
**Episodes**
|
| 328 |
+
- Fewer (5-20): Quick training, may underfit
|
| 329 |
+
- Medium (20-50): Good for most cases
|
| 330 |
+
- Many (50-100+): Better convergence, longer training time
|
| 331 |
+
""")
|
| 332 |
+
|
| 333 |
+
# Footer
|
| 334 |
+
st.markdown("---")
|
| 335 |
+
st.markdown("*RL training helps optimize scheduling decisions through simulated learning*")
|
scheduler/dashboard/utils/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Dashboard utilities package."""
|
| 2 |
+
|
| 3 |
+
from .data_loader import (
|
| 4 |
+
get_case_statistics,
|
| 5 |
+
get_data_status,
|
| 6 |
+
load_cleaned_data,
|
| 7 |
+
load_generated_cases,
|
| 8 |
+
load_param_loader,
|
| 9 |
+
load_rl_training_history,
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"load_param_loader",
|
| 14 |
+
"load_cleaned_data",
|
| 15 |
+
"load_generated_cases",
|
| 16 |
+
"get_case_statistics",
|
| 17 |
+
"load_rl_training_history",
|
| 18 |
+
"get_data_status",
|
| 19 |
+
]
|
scheduler/dashboard/utils/data_loader.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data loading utilities for dashboard with caching.
|
| 2 |
+
|
| 3 |
+
This module provides cached data loading functions to avoid
|
| 4 |
+
reloading large datasets on every user interaction.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from datetime import date
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import polars as pl
|
| 15 |
+
import streamlit as st
|
| 16 |
+
|
| 17 |
+
from scheduler.data.case_generator import CaseGenerator
|
| 18 |
+
from scheduler.data.param_loader import ParameterLoader
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@st.cache_data(ttl=3600)
|
| 22 |
+
def load_param_loader(params_dir: str = "configs/parameters") -> dict[str, Any]:
|
| 23 |
+
"""Load EDA-derived parameters.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
params_dir: Directory containing parameter files
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Dictionary containing key parameter data
|
| 30 |
+
"""
|
| 31 |
+
loader = ParameterLoader(Path(params_dir))
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
"case_types": loader.get_case_types(),
|
| 35 |
+
"stages": loader.get_stages(),
|
| 36 |
+
"stage_graph": loader.get_stage_graph(),
|
| 37 |
+
"adjournment_stats": {
|
| 38 |
+
stage: {
|
| 39 |
+
ct: loader.get_adjournment_prob(stage, ct)
|
| 40 |
+
for ct in loader.get_case_types()
|
| 41 |
+
}
|
| 42 |
+
for stage in loader.get_stages()
|
| 43 |
+
},
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@st.cache_data(ttl=3600)
|
| 48 |
+
def load_cleaned_data(data_path: str = "Data/processed/cleaned_cases.csv") -> pd.DataFrame:
|
| 49 |
+
"""Load cleaned case data.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
data_path: Path to cleaned CSV file
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Pandas DataFrame with case data
|
| 56 |
+
"""
|
| 57 |
+
path = Path(data_path)
|
| 58 |
+
if not path.exists():
|
| 59 |
+
st.warning(f"Data file not found: {data_path}")
|
| 60 |
+
return pd.DataFrame()
|
| 61 |
+
|
| 62 |
+
# Use Polars for faster loading, then convert to Pandas for compatibility
|
| 63 |
+
df = pl.read_csv(path).to_pandas()
|
| 64 |
+
return df
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@st.cache_data(ttl=3600)
|
| 68 |
+
def load_generated_cases(cases_path: str = "data/generated/cases.csv") -> list:
|
| 69 |
+
"""Load generated test cases.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
cases_path: Path to generated cases CSV
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
List of Case objects
|
| 76 |
+
"""
|
| 77 |
+
path = Path(cases_path)
|
| 78 |
+
if not path.exists():
|
| 79 |
+
st.warning(f"Cases file not found: {cases_path}")
|
| 80 |
+
return []
|
| 81 |
+
|
| 82 |
+
cases = CaseGenerator.from_csv(path)
|
| 83 |
+
return cases
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@st.cache_data
|
| 87 |
+
def get_case_statistics(df: pd.DataFrame) -> dict[str, Any]:
|
| 88 |
+
"""Compute statistics from case DataFrame.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
df: Case data DataFrame
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
Dictionary of statistics
|
| 95 |
+
"""
|
| 96 |
+
if df.empty:
|
| 97 |
+
return {}
|
| 98 |
+
|
| 99 |
+
stats = {
|
| 100 |
+
"total_cases": len(df),
|
| 101 |
+
"case_types": df["CaseType"].value_counts().to_dict() if "CaseType" in df else {},
|
| 102 |
+
"stages": df["Remappedstages"].value_counts().to_dict() if "Remappedstages" in df else {},
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
# Adjournment rate if applicable
|
| 106 |
+
if "Outcome" in df.columns:
|
| 107 |
+
total_hearings = len(df)
|
| 108 |
+
adjourned = len(df[df["Outcome"] == "ADJOURNED"])
|
| 109 |
+
stats["adjournment_rate"] = adjourned / total_hearings if total_hearings > 0 else 0
|
| 110 |
+
|
| 111 |
+
return stats
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
@st.cache_data
|
| 115 |
+
def load_rl_training_history(log_dir: str = "runs") -> pd.DataFrame:
|
| 116 |
+
"""Load RL training history from logs.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
log_dir: Directory containing training logs
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
DataFrame with training metrics
|
| 123 |
+
"""
|
| 124 |
+
path = Path(log_dir)
|
| 125 |
+
if not path.exists():
|
| 126 |
+
return pd.DataFrame()
|
| 127 |
+
|
| 128 |
+
# Look for training log files
|
| 129 |
+
log_files = list(path.glob("**/training_stats.csv"))
|
| 130 |
+
if not log_files:
|
| 131 |
+
return pd.DataFrame()
|
| 132 |
+
|
| 133 |
+
# Load most recent
|
| 134 |
+
latest_log = max(log_files, key=lambda p: p.stat().st_mtime)
|
| 135 |
+
return pd.read_csv(latest_log)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def get_data_status() -> dict[str, bool]:
|
| 139 |
+
"""Check availability of various data sources.
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
Dictionary mapping data source to availability status
|
| 143 |
+
"""
|
| 144 |
+
return {
|
| 145 |
+
"cleaned_data": Path("Data/processed/cleaned_cases.csv").exists(),
|
| 146 |
+
"parameters": Path("configs/parameters").exists(),
|
| 147 |
+
"generated_cases": Path("data/generated/cases.csv").exists(),
|
| 148 |
+
"eda_figures": Path("reports/figures").exists(),
|
| 149 |
+
}
|