#!/usr/bin/env python3 """Pre-submit local validation script for 911 Dispatch Supervisor RL Environment.""" from __future__ import annotations import subprocess import shutil import sys from pathlib import Path def run_command( cmd: list[str], description: str, check: bool = True ) -> subprocess.CompletedProcess: print(f"\n{'=' * 60}") print(f"CHECK: {description}") print(f"CMD: {' '.join(cmd)}") print(f"{'=' * 60}") try: result = subprocess.run( cmd, capture_output=True, text=True, encoding="utf-8", errors="replace", ) except FileNotFoundError as exc: print(f"FAILED: {description}") print(f"ERROR: command not found: {cmd[0]}") return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc)) if result.stdout: print(result.stdout) if result.stderr: print(result.stderr, file=sys.stderr) if check and result.returncode != 0: print(f"FAILED: {description}") return result print(f"PASSED: {description}") return result def _tool_path(name: str) -> str | None: """Resolve tool path from PATH or current interpreter's Scripts directory.""" found = shutil.which(name) if found: return found scripts_dir = Path(sys.executable).resolve().parent candidates = [ scripts_dir / name, scripts_dir / f"{name}.exe", ] for candidate in candidates: if candidate.exists(): return str(candidate) return None def _python_cmd(*args: str) -> list[str]: """Build a Python command, preferring uv when available.""" uv = _tool_path("uv") if uv: return [uv, "run", "python", *args] return [sys.executable, *args] def check_pytest() -> bool: result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass") return result.returncode == 0 def check_inference() -> bool: import os env = os.environ.copy() env["API_BASE_URL"] = "https://api.openai.com/v1" env["MODEL_NAME"] = "gpt-4" env["OPENAI_API_KEY"] = "dummy-token-for-local-validation" env["USE_RANDOM"] = "true" print("\nNOTE: Running inference.py in random-agent mode for local validation") result = subprocess.run( _python_cmd("inference.py"), capture_output=True, text=True, encoding="utf-8", errors="replace", env=env, timeout=300, ) if result.stdout: print(result.stdout) if result.stderr: print(result.stderr, file=sys.stderr) has_start = "[START]" in result.stdout has_end = "[END]" in result.stdout if has_start and has_end: print("PASSED: inference.py produces [START]/[END] output") return True else: print(f"FAILED: inference.py output missing [START] or [END] markers") return False def check_docker_build() -> bool: if not shutil.which("docker"): print("FAILED: Docker build succeeds") print("ERROR: docker command not found") return False result = run_command( ["docker", "build", "-t", "citywide-dispatch-supervisor", "."], "Docker build succeeds", check=False, ) return result.returncode == 0 def check_openenv_validate() -> bool: openenv = _tool_path("openenv") if not openenv: print("FAILED: openenv validate passes") print("ERROR: openenv command not found") print("HINT: Install with: pip install openenv-core") return False result = run_command([openenv, "validate"], "openenv validate passes", check=False) return result.returncode == 0 def check_benchmark_scores() -> bool: from src.benchmark import list_tasks, run_task tasks = list_tasks() print(f"\nFound {len(tasks)} tasks:") all_valid = True for task in tasks: task_id = task["task_id"] print(f" - {task_id}: {task['name']} ({task['difficulty']})") result = run_task(task_id, seed=42) score = result["score"] print(f" Score: {score:.3f}") if not (0.0 <= score <= 1.0): print(f" FAILED: Score {score} is outside [0.0, 1.0]") all_valid = False else: print(f" PASSED: Score is in [0.0, 1.0]") return all_valid def main() -> int: print("911 Dispatch RL Environment - Pre-submit Validation") print("=" * 60) checks = [ ("pytest", check_pytest), ("inference", check_inference), ("docker_build", check_docker_build), ("openenv_validate", check_openenv_validate), ("benchmark_scores", check_benchmark_scores), ] results: dict[str, bool] = {} for name, check_fn in checks: try: results[name] = check_fn() except Exception as e: print(f"ERROR in {name}: {e}") results[name] = False print("\n" + "=" * 60) print("VALIDATION SUMMARY") print("=" * 60) all_passed = True for name, passed in results.items(): status = "āœ“ PASSED" if passed else "āœ— FAILED" print(f" {name}: {status}") if not passed: all_passed = False print("=" * 60) if all_passed: print("\nāœ“ ALL CHECKS PASSED - Ready for submission!") return 0 else: print("\nāœ— SOME CHECKS FAILED - Fix issues before submitting") return 1 if __name__ == "__main__": sys.exit(main())