File size: 5,522 Bytes
4904e85
6172160
4904e85
 
 
 
775befb
4904e85
775befb
4904e85
 
 
 
 
 
 
 
 
775befb
 
 
 
 
 
 
 
 
 
 
 
4904e85
 
 
 
 
 
 
 
 
 
 
775befb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4904e85
775befb
4904e85
 
 
 
 
 
 
 
 
43f2683
14170d7
4904e85
14170d7
4904e85
775befb
4904e85
 
775befb
 
4904e85
14170d7
4904e85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775befb
 
 
 
 
4904e85
885a0b4
4904e85
 
 
 
 
 
775befb
 
 
 
 
 
 
 
 
 
 
 
4904e85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885a0b4
4904e85
 
 
 
 
 
775befb
4904e85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python3
"""Pre-submit local validation script for 911 Dispatch Supervisor RL Environment."""

from __future__ import annotations

import subprocess
import shutil
import sys
from pathlib import Path


def run_command(
    cmd: list[str], description: str, check: bool = True
) -> subprocess.CompletedProcess:
    print(f"\n{'=' * 60}")
    print(f"CHECK: {description}")
    print(f"CMD: {' '.join(cmd)}")
    print(f"{'=' * 60}")
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
        )
    except FileNotFoundError as exc:
        print(f"FAILED: {description}")
        print(f"ERROR: command not found: {cmd[0]}")
        return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr, file=sys.stderr)
    if check and result.returncode != 0:
        print(f"FAILED: {description}")
        return result
    print(f"PASSED: {description}")
    return result


def _tool_path(name: str) -> str | None:
    """Resolve tool path from PATH or current interpreter's Scripts directory."""
    found = shutil.which(name)
    if found:
        return found

    scripts_dir = Path(sys.executable).resolve().parent
    candidates = [
        scripts_dir / name,
        scripts_dir / f"{name}.exe",
    ]
    for candidate in candidates:
        if candidate.exists():
            return str(candidate)
    return None


def _python_cmd(*args: str) -> list[str]:
    """Build a Python command, preferring uv when available."""
    uv = _tool_path("uv")
    if uv:
        return [uv, "run", "python", *args]
    return [sys.executable, *args]


def check_pytest() -> bool:
    result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass")
    return result.returncode == 0


def check_inference() -> bool:
    import os

    env = os.environ.copy()
    env["API_BASE_URL"] = "https://api.openai.com/v1"
    env["MODEL_NAME"] = "gpt-4"
    env["OPENAI_API_KEY"] = "dummy-token-for-local-validation"
    env["USE_RANDOM"] = "true"

    print("\nNOTE: Running inference.py in random-agent mode for local validation")
    result = subprocess.run(
        _python_cmd("inference.py"),
        capture_output=True,
        text=True,
        encoding="utf-8",
        errors="replace",
        env=env,
        timeout=300,
    )

    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr, file=sys.stderr)

    has_start = "[START]" in result.stdout
    has_end = "[END]" in result.stdout

    if has_start and has_end:
        print("PASSED: inference.py produces [START]/[END] output")
        return True
    else:
        print(f"FAILED: inference.py output missing [START] or [END] markers")
        return False


def check_docker_build() -> bool:
    if not shutil.which("docker"):
        print("FAILED: Docker build succeeds")
        print("ERROR: docker command not found")
        return False

    result = run_command(
        ["docker", "build", "-t", "citywide-dispatch-supervisor", "."],
        "Docker build succeeds",
        check=False,
    )
    return result.returncode == 0


def check_openenv_validate() -> bool:
    openenv = _tool_path("openenv")
    if not openenv:
        print("FAILED: openenv validate passes")
        print("ERROR: openenv command not found")
        print("HINT: Install with: pip install openenv-core")
        return False

    result = run_command([openenv, "validate"], "openenv validate passes", check=False)
    return result.returncode == 0


def check_benchmark_scores() -> bool:
    from src.benchmark import list_tasks, run_task

    tasks = list_tasks()
    print(f"\nFound {len(tasks)} tasks:")

    all_valid = True
    for task in tasks:
        task_id = task["task_id"]
        print(f"  - {task_id}: {task['name']} ({task['difficulty']})")

        result = run_task(task_id, seed=42)
        score = result["score"]

        print(f"    Score: {score:.3f}")

        if not (0.0 <= score <= 1.0):
            print(f"    FAILED: Score {score} is outside [0.0, 1.0]")
            all_valid = False
        else:
            print(f"    PASSED: Score is in [0.0, 1.0]")

    return all_valid


def main() -> int:
    print("911 Dispatch RL Environment - Pre-submit Validation")
    print("=" * 60)

    checks = [
        ("pytest", check_pytest),
        ("inference", check_inference),
        ("docker_build", check_docker_build),
        ("openenv_validate", check_openenv_validate),
        ("benchmark_scores", check_benchmark_scores),
    ]

    results: dict[str, bool] = {}

    for name, check_fn in checks:
        try:
            results[name] = check_fn()
        except Exception as e:
            print(f"ERROR in {name}: {e}")
            results[name] = False

    print("\n" + "=" * 60)
    print("VALIDATION SUMMARY")
    print("=" * 60)

    all_passed = True
    for name, passed in results.items():
        status = "✓ PASSED" if passed else "✗ FAILED"
        print(f"  {name}: {status}")
        if not passed:
            all_passed = False

    print("=" * 60)

    if all_passed:
        print("\n✓ ALL CHECKS PASSED - Ready for submission!")
        return 0
    else:
        print("\n✗ SOME CHECKS FAILED - Fix issues before submitting")
        return 1


if __name__ == "__main__":
    sys.exit(main())