911 / validate_local.py
garvitsachdeva's picture
Submission polish: compliance hardening, baseline matrix, dashboard UX, tests, and docs
775befb
#!/usr/bin/env python3
"""Pre-submit local validation script for 911 Dispatch Supervisor RL Environment."""
from __future__ import annotations
import subprocess
import shutil
import sys
from pathlib import Path
def run_command(
cmd: list[str], description: str, check: bool = True
) -> subprocess.CompletedProcess:
print(f"\n{'=' * 60}")
print(f"CHECK: {description}")
print(f"CMD: {' '.join(cmd)}")
print(f"{'=' * 60}")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
)
except FileNotFoundError as exc:
print(f"FAILED: {description}")
print(f"ERROR: command not found: {cmd[0]}")
return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
if check and result.returncode != 0:
print(f"FAILED: {description}")
return result
print(f"PASSED: {description}")
return result
def _tool_path(name: str) -> str | None:
"""Resolve tool path from PATH or current interpreter's Scripts directory."""
found = shutil.which(name)
if found:
return found
scripts_dir = Path(sys.executable).resolve().parent
candidates = [
scripts_dir / name,
scripts_dir / f"{name}.exe",
]
for candidate in candidates:
if candidate.exists():
return str(candidate)
return None
def _python_cmd(*args: str) -> list[str]:
"""Build a Python command, preferring uv when available."""
uv = _tool_path("uv")
if uv:
return [uv, "run", "python", *args]
return [sys.executable, *args]
def check_pytest() -> bool:
result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass")
return result.returncode == 0
def check_inference() -> bool:
import os
env = os.environ.copy()
env["API_BASE_URL"] = "https://api.openai.com/v1"
env["MODEL_NAME"] = "gpt-4"
env["OPENAI_API_KEY"] = "dummy-token-for-local-validation"
env["USE_RANDOM"] = "true"
print("\nNOTE: Running inference.py in random-agent mode for local validation")
result = subprocess.run(
_python_cmd("inference.py"),
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
env=env,
timeout=300,
)
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
has_start = "[START]" in result.stdout
has_end = "[END]" in result.stdout
if has_start and has_end:
print("PASSED: inference.py produces [START]/[END] output")
return True
else:
print(f"FAILED: inference.py output missing [START] or [END] markers")
return False
def check_docker_build() -> bool:
if not shutil.which("docker"):
print("FAILED: Docker build succeeds")
print("ERROR: docker command not found")
return False
result = run_command(
["docker", "build", "-t", "citywide-dispatch-supervisor", "."],
"Docker build succeeds",
check=False,
)
return result.returncode == 0
def check_openenv_validate() -> bool:
openenv = _tool_path("openenv")
if not openenv:
print("FAILED: openenv validate passes")
print("ERROR: openenv command not found")
print("HINT: Install with: pip install openenv-core")
return False
result = run_command([openenv, "validate"], "openenv validate passes", check=False)
return result.returncode == 0
def check_benchmark_scores() -> bool:
from src.benchmark import list_tasks, run_task
tasks = list_tasks()
print(f"\nFound {len(tasks)} tasks:")
all_valid = True
for task in tasks:
task_id = task["task_id"]
print(f" - {task_id}: {task['name']} ({task['difficulty']})")
result = run_task(task_id, seed=42)
score = result["score"]
print(f" Score: {score:.3f}")
if not (0.0 <= score <= 1.0):
print(f" FAILED: Score {score} is outside [0.0, 1.0]")
all_valid = False
else:
print(f" PASSED: Score is in [0.0, 1.0]")
return all_valid
def main() -> int:
print("911 Dispatch RL Environment - Pre-submit Validation")
print("=" * 60)
checks = [
("pytest", check_pytest),
("inference", check_inference),
("docker_build", check_docker_build),
("openenv_validate", check_openenv_validate),
("benchmark_scores", check_benchmark_scores),
]
results: dict[str, bool] = {}
for name, check_fn in checks:
try:
results[name] = check_fn()
except Exception as e:
print(f"ERROR in {name}: {e}")
results[name] = False
print("\n" + "=" * 60)
print("VALIDATION SUMMARY")
print("=" * 60)
all_passed = True
for name, passed in results.items():
status = "✓ PASSED" if passed else "✗ FAILED"
print(f" {name}: {status}")
if not passed:
all_passed = False
print("=" * 60)
if all_passed:
print("\n✓ ALL CHECKS PASSED - Ready for submission!")
return 0
else:
print("\n✗ SOME CHECKS FAILED - Fix issues before submitting")
return 1
if __name__ == "__main__":
sys.exit(main())