Spaces:
Sleeping
Sleeping
File size: 5,522 Bytes
4904e85 6172160 4904e85 775befb 4904e85 775befb 4904e85 775befb 4904e85 775befb 4904e85 775befb 4904e85 43f2683 14170d7 4904e85 14170d7 4904e85 775befb 4904e85 775befb 4904e85 14170d7 4904e85 775befb 4904e85 885a0b4 4904e85 775befb 4904e85 885a0b4 4904e85 775befb 4904e85 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | #!/usr/bin/env python3
"""Pre-submit local validation script for 911 Dispatch Supervisor RL Environment."""
from __future__ import annotations
import subprocess
import shutil
import sys
from pathlib import Path
def run_command(
cmd: list[str], description: str, check: bool = True
) -> subprocess.CompletedProcess:
print(f"\n{'=' * 60}")
print(f"CHECK: {description}")
print(f"CMD: {' '.join(cmd)}")
print(f"{'=' * 60}")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
)
except FileNotFoundError as exc:
print(f"FAILED: {description}")
print(f"ERROR: command not found: {cmd[0]}")
return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
if check and result.returncode != 0:
print(f"FAILED: {description}")
return result
print(f"PASSED: {description}")
return result
def _tool_path(name: str) -> str | None:
"""Resolve tool path from PATH or current interpreter's Scripts directory."""
found = shutil.which(name)
if found:
return found
scripts_dir = Path(sys.executable).resolve().parent
candidates = [
scripts_dir / name,
scripts_dir / f"{name}.exe",
]
for candidate in candidates:
if candidate.exists():
return str(candidate)
return None
def _python_cmd(*args: str) -> list[str]:
"""Build a Python command, preferring uv when available."""
uv = _tool_path("uv")
if uv:
return [uv, "run", "python", *args]
return [sys.executable, *args]
def check_pytest() -> bool:
result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass")
return result.returncode == 0
def check_inference() -> bool:
import os
env = os.environ.copy()
env["API_BASE_URL"] = "https://api.openai.com/v1"
env["MODEL_NAME"] = "gpt-4"
env["OPENAI_API_KEY"] = "dummy-token-for-local-validation"
env["USE_RANDOM"] = "true"
print("\nNOTE: Running inference.py in random-agent mode for local validation")
result = subprocess.run(
_python_cmd("inference.py"),
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
env=env,
timeout=300,
)
if result.stdout:
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
has_start = "[START]" in result.stdout
has_end = "[END]" in result.stdout
if has_start and has_end:
print("PASSED: inference.py produces [START]/[END] output")
return True
else:
print(f"FAILED: inference.py output missing [START] or [END] markers")
return False
def check_docker_build() -> bool:
if not shutil.which("docker"):
print("FAILED: Docker build succeeds")
print("ERROR: docker command not found")
return False
result = run_command(
["docker", "build", "-t", "citywide-dispatch-supervisor", "."],
"Docker build succeeds",
check=False,
)
return result.returncode == 0
def check_openenv_validate() -> bool:
openenv = _tool_path("openenv")
if not openenv:
print("FAILED: openenv validate passes")
print("ERROR: openenv command not found")
print("HINT: Install with: pip install openenv-core")
return False
result = run_command([openenv, "validate"], "openenv validate passes", check=False)
return result.returncode == 0
def check_benchmark_scores() -> bool:
from src.benchmark import list_tasks, run_task
tasks = list_tasks()
print(f"\nFound {len(tasks)} tasks:")
all_valid = True
for task in tasks:
task_id = task["task_id"]
print(f" - {task_id}: {task['name']} ({task['difficulty']})")
result = run_task(task_id, seed=42)
score = result["score"]
print(f" Score: {score:.3f}")
if not (0.0 <= score <= 1.0):
print(f" FAILED: Score {score} is outside [0.0, 1.0]")
all_valid = False
else:
print(f" PASSED: Score is in [0.0, 1.0]")
return all_valid
def main() -> int:
print("911 Dispatch RL Environment - Pre-submit Validation")
print("=" * 60)
checks = [
("pytest", check_pytest),
("inference", check_inference),
("docker_build", check_docker_build),
("openenv_validate", check_openenv_validate),
("benchmark_scores", check_benchmark_scores),
]
results: dict[str, bool] = {}
for name, check_fn in checks:
try:
results[name] = check_fn()
except Exception as e:
print(f"ERROR in {name}: {e}")
results[name] = False
print("\n" + "=" * 60)
print("VALIDATION SUMMARY")
print("=" * 60)
all_passed = True
for name, passed in results.items():
status = "✓ PASSED" if passed else "✗ FAILED"
print(f" {name}: {status}")
if not passed:
all_passed = False
print("=" * 60)
if all_passed:
print("\n✓ ALL CHECKS PASSED - Ready for submission!")
return 0
else:
print("\n✗ SOME CHECKS FAILED - Fix issues before submitting")
return 1
if __name__ == "__main__":
sys.exit(main())
|