RoyAalekh commited on
Commit
00a2e2f
·
1 Parent(s): 8d2e8fa

Add default scheduler params and auto-generate fallback

Browse files
.gitignore CHANGED
@@ -30,3 +30,7 @@ Data/test_verification/
30
  !Data/README.md
31
  !pyproject.toml
32
  !Data/court_data.duckdb
 
 
 
 
 
30
  !Data/README.md
31
  !pyproject.toml
32
  !Data/court_data.duckdb
33
+
34
+ # Bundled baseline parameters for scheduler
35
+ !scheduler/data/defaults/*.csv
36
+ !scheduler/data/defaults/*.json
scheduler/data/config.py CHANGED
@@ -4,30 +4,107 @@ This module contains all configuration parameters and constants used throughout
4
  the scheduler implementation.
5
  """
6
 
 
 
 
7
  from pathlib import Path
8
- from typing import Dict, List
9
 
10
  # Project paths
11
  PROJECT_ROOT = Path(__file__).parent.parent.parent
12
  REPORTS_DIR = PROJECT_ROOT / "reports" / "figures"
 
 
13
 
14
- # Find the latest versioned output directory
15
- def get_latest_params_dir() -> Path:
16
- """Get the latest versioned parameters directory from EDA outputs."""
17
  if not REPORTS_DIR.exists():
18
- raise FileNotFoundError(f"Reports directory not found: {REPORTS_DIR}")
19
-
20
  version_dirs = [d for d in REPORTS_DIR.iterdir() if d.is_dir() and d.name.startswith("v")]
21
  if not version_dirs:
22
- raise FileNotFoundError(f"No versioned directories found in {REPORTS_DIR}")
23
-
24
- latest_dir = max(version_dirs, key=lambda d: d.stat().st_mtime)
25
- params_dir = latest_dir / "params"
26
-
27
- if not params_dir.exists():
28
- params_dir = latest_dir # Fallback if params/ subdirectory doesn't exist
29
-
30
- return params_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Court operational constants
33
  WORKING_DAYS_PER_YEAR = 192 # From Karnataka High Court calendar
@@ -120,3 +197,37 @@ RANDOM_SEED = 42
120
  # Logging configuration
121
  LOG_LEVEL = "INFO"
122
  LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  the scheduler implementation.
5
  """
6
 
7
+ import argparse
8
+ import subprocess
9
+ import sys
10
  from pathlib import Path
11
+ from typing import Dict, List, Optional
12
 
13
  # Project paths
14
  PROJECT_ROOT = Path(__file__).parent.parent.parent
15
  REPORTS_DIR = PROJECT_ROOT / "reports" / "figures"
16
+ DEFAULT_PARAMS_DIR = Path(__file__).parent / "defaults"
17
+ RUN_EDA_SCRIPT = PROJECT_ROOT / "src" / "run_eda.py"
18
 
19
+ def _discover_latest_report_dir() -> Optional[Path]:
20
+ """Return the latest versioned report directory if it exists."""
 
21
  if not REPORTS_DIR.exists():
22
+ return None
23
+
24
  version_dirs = [d for d in REPORTS_DIR.iterdir() if d.is_dir() and d.name.startswith("v")]
25
  if not version_dirs:
26
+ return None
27
+
28
+ return max(version_dirs, key=lambda d: d.stat().st_mtime)
29
+
30
+
31
+ def _try_run_eda() -> None:
32
+ """Run the EDA pipeline to regenerate parameters."""
33
+ if not RUN_EDA_SCRIPT.exists():
34
+ raise FileNotFoundError(
35
+ f"Unable to regenerate parameters because {RUN_EDA_SCRIPT} is missing. "
36
+ "Please ensure the EDA pipeline is available."
37
+ )
38
+
39
+ print("No EDA outputs found. Running src/run_eda.py to generate parameters...", file=sys.stderr)
40
+ result = subprocess.run([sys.executable, str(RUN_EDA_SCRIPT)], check=False)
41
+ if result.returncode != 0:
42
+ raise RuntimeError(
43
+ "Failed to regenerate parameters via src/run_eda.py. "
44
+ "Check the data dependencies and try again."
45
+ )
46
+
47
+
48
+ # Find the latest versioned output directory
49
+ def get_latest_params_dir(
50
+ regenerate: bool = False,
51
+ allow_generate: bool = True,
52
+ allow_defaults: bool = True,
53
+ prefer_defaults: bool = False,
54
+ ) -> Path:
55
+ """Get the latest parameters directory from EDA outputs or bundled defaults.
56
+
57
+ The lookup strategy is:
58
+ 1) Use the latest versioned directory in reports/figures (unless regenerating).
59
+ 2) Optionally run the EDA pipeline to create parameters when none exist.
60
+ 3) Fallback to bundled defaults when available.
61
+
62
+ Args:
63
+ regenerate: When True, always run the EDA pipeline before resolving params.
64
+ allow_generate: If True, run EDA automatically when no outputs exist.
65
+ allow_defaults: If True, fallback to bundled defaults if EDA outputs are missing.
66
+ prefer_defaults: If True, return bundled defaults immediately when available.
67
+
68
+ Returns:
69
+ Path to a directory containing parameter files.
70
+
71
+ Raises:
72
+ FileNotFoundError: When parameters cannot be located or generated.
73
+ RuntimeError: When regeneration is attempted but fails.
74
+ """
75
+
76
+ if prefer_defaults and allow_defaults and DEFAULT_PARAMS_DIR.exists():
77
+ print(
78
+ "Using bundled baseline parameters from scheduler/data/defaults (preferred).",
79
+ file=sys.stderr,
80
+ )
81
+ return DEFAULT_PARAMS_DIR
82
+
83
+ if not regenerate:
84
+ latest_dir = _discover_latest_report_dir()
85
+ if latest_dir:
86
+ params_dir = latest_dir / "params"
87
+ return params_dir if params_dir.exists() else latest_dir
88
+
89
+ if regenerate or (allow_generate and not _discover_latest_report_dir()):
90
+ _try_run_eda()
91
+ latest_dir = _discover_latest_report_dir()
92
+ if latest_dir:
93
+ params_dir = latest_dir / "params"
94
+ return params_dir if params_dir.exists() else latest_dir
95
+
96
+ if allow_defaults and DEFAULT_PARAMS_DIR.exists():
97
+ print(
98
+ "Using bundled baseline parameters from scheduler/data/defaults (EDA outputs not found).",
99
+ file=sys.stderr,
100
+ )
101
+ return DEFAULT_PARAMS_DIR
102
+
103
+ missing_reports_msg = (
104
+ "No parameter directory found. Ensure EDA has been run (python src/run_eda.py) "
105
+ "or use bundled defaults via get_latest_params_dir(allow_defaults=True)."
106
+ )
107
+ raise FileNotFoundError(missing_reports_msg)
108
 
109
  # Court operational constants
110
  WORKING_DAYS_PER_YEAR = 192 # From Karnataka High Court calendar
 
197
  # Logging configuration
198
  LOG_LEVEL = "INFO"
199
  LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
200
+
201
+
202
+ def _parse_args() -> argparse.Namespace:
203
+ parser = argparse.ArgumentParser(
204
+ description=(
205
+ "Resolve the scheduler parameter directory, optionally regenerating via the EDA pipeline."
206
+ )
207
+ )
208
+ parser.add_argument(
209
+ "--regenerate",
210
+ action="store_true",
211
+ help="Run src/run_eda.py before resolving parameters.",
212
+ )
213
+ parser.add_argument(
214
+ "--use-defaults",
215
+ action="store_true",
216
+ help="Force use of bundled defaults instead of scanning reports/figures.",
217
+ )
218
+ return parser.parse_args()
219
+
220
+
221
+ def _main() -> None:
222
+ args = _parse_args()
223
+ params_dir = get_latest_params_dir(
224
+ regenerate=args.regenerate,
225
+ allow_generate=not args.use_defaults,
226
+ allow_defaults=True,
227
+ prefer_defaults=args.use_defaults,
228
+ )
229
+ print(params_dir)
230
+
231
+
232
+ if __name__ == "__main__":
233
+ _main()
scheduler/data/defaults/adjournment_proxies.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Remappedstages,casetype,p_adjourn_proxy,p_not_reached_proxy,n
2
+ PRE-ADMISSION,CRP,0.3,0.05,80
3
+ PRE-ADMISSION,CA,0.28,0.05,80
4
+ ADMISSION,CRP,0.32,0.07,90
5
+ ADMISSION,CA,0.35,0.08,90
6
+ FRAMING OF CHARGES,RSA,0.25,0.05,70
7
+ EVIDENCE,RSA,0.4,0.1,100
8
+ ARGUMENTS,RFA,0.22,0.05,85
9
+ INTERLOCUTORY APPLICATION,CCC,0.45,0.1,40
10
+ ORDERS / JUDGMENT,CP,0.18,0.02,60
11
+ SETTLEMENT,CMP,0.15,0.02,30
scheduler/data/defaults/case_type_summary.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ CASE_TYPE,n_cases,disp_median,disp_p90,hear_median,gap_median
2
+ CRP,1200,180,320,6,25
3
+ CA,1100,200,360,7,28
4
+ RSA,900,240,420,8,32
5
+ RFA,850,210,400,7,30
6
+ CCC,500,160,280,5,20
7
+ CP,450,150,260,5,18
8
+ CMP,300,140,240,4,16
scheduler/data/defaults/court_capacity_global.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "slots_median_global": 151,
3
+ "slots_p90_global": 210
4
+ }
scheduler/data/defaults/stage_duration.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STAGE,RUN_MEDIAN_DAYS,RUN_P90_DAYS,HEARINGS_PER_RUN_MED,N_RUNS
2
+ PRE-ADMISSION,7,14,1,150
3
+ ADMISSION,14,30,1,150
4
+ FRAMING OF CHARGES,21,45,2,120
5
+ EVIDENCE,60,120,3,110
6
+ ARGUMENTS,30,60,2,130
7
+ INTERLOCUTORY APPLICATION,14,28,1,60
8
+ SETTLEMENT,10,20,1,50
9
+ ORDERS / JUDGMENT,14,28,1,140
10
+ FINAL DISPOSAL,1,2,1,200
11
+ OTHER,7,14,1,40
12
+ NA,1,1,1,10
scheduler/data/defaults/stage_transition_entropy.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STAGE_FROM,entropy
2
+ PRE-ADMISSION,0.6
3
+ ADMISSION,0.8
4
+ FRAMING OF CHARGES,0.7
5
+ EVIDENCE,0.5
6
+ ARGUMENTS,0.4
7
+ INTERLOCUTORY APPLICATION,0.65
8
+ ORDERS / JUDGMENT,0.3
9
+ SETTLEMENT,0.1
10
+ OTHER,0.2
11
+ NA,0.0
scheduler/data/defaults/stage_transition_probs.csv ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STAGE_FROM,STAGE_TO,N,row_n,p
2
+ PRE-ADMISSION,ADMISSION,100,1,0.7
3
+ PRE-ADMISSION,FINAL DISPOSAL,100,2,0.3
4
+ ADMISSION,FRAMING OF CHARGES,120,1,0.6
5
+ ADMISSION,ARGUMENTS,120,2,0.25
6
+ ADMISSION,FINAL DISPOSAL,120,3,0.15
7
+ FRAMING OF CHARGES,EVIDENCE,90,1,0.65
8
+ FRAMING OF CHARGES,ARGUMENTS,90,2,0.2
9
+ FRAMING OF CHARGES,FINAL DISPOSAL,90,3,0.15
10
+ EVIDENCE,ARGUMENTS,110,1,0.7
11
+ EVIDENCE,FINAL DISPOSAL,110,2,0.3
12
+ ARGUMENTS,ORDERS / JUDGMENT,130,1,0.75
13
+ ARGUMENTS,FINAL DISPOSAL,130,2,0.25
14
+ INTERLOCUTORY APPLICATION,ARGUMENTS,30,1,0.6
15
+ INTERLOCUTORY APPLICATION,FINAL DISPOSAL,30,2,0.4
16
+ ORDERS / JUDGMENT,FINAL DISPOSAL,140,1,0.9
17
+ ORDERS / JUDGMENT,SETTLEMENT,140,2,0.1
18
+ SETTLEMENT,FINAL DISPOSAL,50,1,1.0
19
+ OTHER,FINAL DISPOSAL,40,1,1.0
20
+ NA,NA,10,1,1.0