Spaces:

garvitsachdeva
/

911

Sleeping

File size: 5,522 Bytes

#!/usr/bin/env python3
"""Pre-submit local validation script for 911 Dispatch Supervisor RL Environment."""

from __future__ import annotations

import subprocess
import shutil
import sys
from pathlib import Path


def run_command(
    cmd: list[str], description: str, check: bool = True
) -> subprocess.CompletedProcess:
    print(f"\n{'=' * 60}")
    print(f"CHECK: {description}")
    print(f"CMD: {' '.join(cmd)}")
    print(f"{'=' * 60}")
    try:
        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            encoding="utf-8",
            errors="replace",
        )
    except FileNotFoundError as exc:
        print(f"FAILED: {description}")
        print(f"ERROR: command not found: {cmd[0]}")
        return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr, file=sys.stderr)
    if check and result.returncode != 0:
        print(f"FAILED: {description}")
        return result
    print(f"PASSED: {description}")
    return result


def _tool_path(name: str) -> str | None:
    """Resolve tool path from PATH or current interpreter's Scripts directory."""
    found = shutil.which(name)
    if found:
        return found

    scripts_dir = Path(sys.executable).resolve().parent
    candidates = [
        scripts_dir / name,
        scripts_dir / f"{name}.exe",
    ]
    for candidate in candidates:
        if candidate.exists():
            return str(candidate)
    return None


def _python_cmd(*args: str) -> list[str]:
    """Build a Python command, preferring uv when available."""
    uv = _tool_path("uv")
    if uv:
        return [uv, "run", "python", *args]
    return [sys.executable, *args]


def check_pytest() -> bool:
    result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass")
    return result.returncode == 0


def check_inference() -> bool:
    import os

    env = os.environ.copy()
    env["API_BASE_URL"] = "https://api.openai.com/v1"
    env["MODEL_NAME"] = "gpt-4"
    env["OPENAI_API_KEY"] = "dummy-token-for-local-validation"
    env["USE_RANDOM"] = "true"

    print("\nNOTE: Running inference.py in random-agent mode for local validation")
    result = subprocess.run(
        _python_cmd("inference.py"),
        capture_output=True,
        text=True,
        encoding="utf-8",
        errors="replace",
        env=env,
        timeout=300,
    )

    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr, file=sys.stderr)

    has_start = "[START]" in result.stdout
    has_end = "[END]" in result.stdout

    if has_start and has_end:
        print("PASSED: inference.py produces [START]/[END] output")
        return True
    else:
        print(f"FAILED: inference.py output missing [START] or [END] markers")
        return False


def check_docker_build() -> bool:
    if not shutil.which("docker"):
        print("FAILED: Docker build succeeds")
        print("ERROR: docker command not found")
        return False

    result = run_command(
        ["docker", "build", "-t", "citywide-dispatch-supervisor", "."],
        "Docker build succeeds",
        check=False,
    )
    return result.returncode == 0


def check_openenv_validate() -> bool:
    openenv = _tool_path("openenv")
    if not openenv:
        print("FAILED: openenv validate passes")
        print("ERROR: openenv command not found")
        print("HINT: Install with: pip install openenv-core")
        return False

    result = run_command([openenv, "validate"], "openenv validate passes", check=False)
    return result.returncode == 0


def check_benchmark_scores() -> bool:
    from src.benchmark import list_tasks, run_task

    tasks = list_tasks()
    print(f"\nFound {len(tasks)} tasks:")

    all_valid = True
    for task in tasks:
        task_id = task["task_id"]
        print(f"  - {task_id}: {task['name']} ({task['difficulty']})")

        result = run_task(task_id, seed=42)
        score = result["score"]

        print(f"    Score: {score:.3f}")

        if not (0.0 <= score <= 1.0):
            print(f"    FAILED: Score {score} is outside [0.0, 1.0]")
            all_valid = False
        else:
            print(f"    PASSED: Score is in [0.0, 1.0]")

    return all_valid


def main() -> int:
    print("911 Dispatch RL Environment - Pre-submit Validation")
    print("=" * 60)

    checks = [
        ("pytest", check_pytest),
        ("inference", check_inference),
        ("docker_build", check_docker_build),
        ("openenv_validate", check_openenv_validate),
        ("benchmark_scores", check_benchmark_scores),
    ]

    results: dict[str, bool] = {}

    for name, check_fn in checks:
        try:
            results[name] = check_fn()
        except Exception as e:
            print(f"ERROR in {name}: {e}")
            results[name] = False

    print("\n" + "=" * 60)
    print("VALIDATION SUMMARY")
    print("=" * 60)

    all_passed = True
    for name, passed in results.items():
        status = "✓ PASSED" if passed else "✗ FAILED"
        print(f"  {name}: {status}")
        if not passed:
            all_passed = False

    print("=" * 60)

    if all_passed:
        print("\n✓ ALL CHECKS PASSED - Ready for submission!")
        return 0
    else:
        print("\n✗ SOME CHECKS FAILED - Fix issues before submitting")
        return 1


if __name__ == "__main__":
    sys.exit(main())