Spaces:

garvitsachdeva
/

911

Sleeping

App Files Files Community

911 / validate_local.py

garvitsachdeva

Submission polish: compliance hardening, baseline matrix, dashboard UX, tests, and docs

775befb about 1 month ago

raw

history blame contribute delete

5.52 kB

	#!/usr/bin/env python3
	"""Pre-submit local validation script for 911 Dispatch Supervisor RL Environment."""

	from __future__ import annotations

	import subprocess
	import shutil
	import sys
	from pathlib import Path


	def run_command(
	cmd: list[str], description: str, check: bool = True
	) -> subprocess.CompletedProcess:
	print(f"\n{'=' * 60}")
	print(f"CHECK: {description}")
	print(f"CMD: {' '.join(cmd)}")
	print(f"{'=' * 60}")
	try:
	result = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	encoding="utf-8",
	errors="replace",
	)
	except FileNotFoundError as exc:
	print(f"FAILED: {description}")
	print(f"ERROR: command not found: {cmd[0]}")
	return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(exc))
	if result.stdout:
	print(result.stdout)
	if result.stderr:
	print(result.stderr, file=sys.stderr)
	if check and result.returncode != 0:
	print(f"FAILED: {description}")
	return result
	print(f"PASSED: {description}")
	return result


	def _tool_path(name: str) -> str \| None:
	"""Resolve tool path from PATH or current interpreter's Scripts directory."""
	found = shutil.which(name)
	if found:
	return found

	scripts_dir = Path(sys.executable).resolve().parent
	candidates = [
	scripts_dir / name,
	scripts_dir / f"{name}.exe",
	]
	for candidate in candidates:
	if candidate.exists():
	return str(candidate)
	return None


	def _python_cmd(*args: str) -> list[str]:
	"""Build a Python command, preferring uv when available."""
	uv = _tool_path("uv")
	if uv:
	return [uv, "run", "python", *args]
	return [sys.executable, *args]


	def check_pytest() -> bool:
	result = run_command(_python_cmd("-m", "pytest", "tests/", "-q"), "All tests pass")
	return result.returncode == 0


	def check_inference() -> bool:
	import os

	env = os.environ.copy()
	env["API_BASE_URL"] = "https://api.openai.com/v1"
	env["MODEL_NAME"] = "gpt-4"
	env["OPENAI_API_KEY"] = "dummy-token-for-local-validation"
	env["USE_RANDOM"] = "true"

	print("\nNOTE: Running inference.py in random-agent mode for local validation")
	result = subprocess.run(
	_python_cmd("inference.py"),
	capture_output=True,
	text=True,
	encoding="utf-8",
	errors="replace",
	env=env,
	timeout=300,
	)

	if result.stdout:
	print(result.stdout)
	if result.stderr:
	print(result.stderr, file=sys.stderr)

	has_start = "[START]" in result.stdout
	has_end = "[END]" in result.stdout

	if has_start and has_end:
	print("PASSED: inference.py produces [START]/[END] output")
	return True
	else:
	print(f"FAILED: inference.py output missing [START] or [END] markers")
	return False


	def check_docker_build() -> bool:
	if not shutil.which("docker"):
	print("FAILED: Docker build succeeds")
	print("ERROR: docker command not found")
	return False

	result = run_command(
	["docker", "build", "-t", "citywide-dispatch-supervisor", "."],
	"Docker build succeeds",
	check=False,
	)
	return result.returncode == 0


	def check_openenv_validate() -> bool:
	openenv = _tool_path("openenv")
	if not openenv:
	print("FAILED: openenv validate passes")
	print("ERROR: openenv command not found")
	print("HINT: Install with: pip install openenv-core")
	return False

	result = run_command([openenv, "validate"], "openenv validate passes", check=False)
	return result.returncode == 0


	def check_benchmark_scores() -> bool:
	from src.benchmark import list_tasks, run_task

	tasks = list_tasks()
	print(f"\nFound {len(tasks)} tasks:")

	all_valid = True
	for task in tasks:
	task_id = task["task_id"]
	print(f" - {task_id}: {task['name']} ({task['difficulty']})")

	result = run_task(task_id, seed=42)
	score = result["score"]

	print(f" Score: {score:.3f}")

	if not (0.0 <= score <= 1.0):
	print(f" FAILED: Score {score} is outside [0.0, 1.0]")
	all_valid = False
	else:
	print(f" PASSED: Score is in [0.0, 1.0]")

	return all_valid


	def main() -> int:
	print("911 Dispatch RL Environment - Pre-submit Validation")
	print("=" * 60)

	checks = [
	("pytest", check_pytest),
	("inference", check_inference),
	("docker_build", check_docker_build),
	("openenv_validate", check_openenv_validate),
	("benchmark_scores", check_benchmark_scores),
	]

	results: dict[str, bool] = {}

	for name, check_fn in checks:
	try:
	results[name] = check_fn()
	except Exception as e:
	print(f"ERROR in {name}: {e}")
	results[name] = False

	print("\n" + "=" * 60)
	print("VALIDATION SUMMARY")
	print("=" * 60)

	all_passed = True
	for name, passed in results.items():
	status = "✓ PASSED" if passed else "✗ FAILED"
	print(f" {name}: {status}")
	if not passed:
	all_passed = False

	print("=" * 60)

	if all_passed:
	print("\n✓ ALL CHECKS PASSED - Ready for submission!")
	return 0
	else:
	print("\n✗ SOME CHECKS FAILED - Fix issues before submitting")
	return 1


	if __name__ == "__main__":
	sys.exit(main())