Spaces:

AGIreflex
/

omnibench-env

Running

App Files Files Community

omnibench-env / scripts /smoke_local.py

AGIreflex

Sync from GitHub via hub-sync

9ea9f15 verified 7 days ago

raw

history blame contribute delete

7.49 kB

	from __future__ import annotations

	"""Local smoke checker for omnibench_aegis_env.

	Usage:
	python smoke_local.py
	python smoke_local.py --base-url http://127.0.0.1:8000
	python -m omnibench_aegis_env.smoke_local --verbose

	What it validates:
	- GET /health
	- POST /reset using env_seed.json
	- POST /step using the first sample action available
	- GET /state
	- minimal subset matching against expected_* fixtures
	"""

	import argparse
	import json
	import os
	import sys
	from pathlib import Path
	from typing import Any, Mapping, Sequence


	PACKAGE_ROOT = Path(__file__).resolve().parent
	PARENT_ROOT = PACKAGE_ROOT.parent
	if str(PARENT_ROOT) not in sys.path:
	sys.path.insert(0, str(PARENT_ROOT))

	from omnibench_aegis_env.client import OpenEnvClient, OpenEnvClientError # noqa: E402


	DEFAULT_BASE_URL = os.getenv("OPENENV_BASE_URL", "http://127.0.0.1:8000")
	DEFAULT_TIMEOUT = float(os.getenv("OPENENV_TIMEOUT", "10"))


	class SmokeFailure(RuntimeError):
	pass


	def load_json(name: str) -> Any:
	path = PACKAGE_ROOT / name
	with path.open("r", encoding="utf-8") as fh:
	return json.load(fh)


	def is_subset(expected: Any, actual: Any, path: str = "$") -> list[str]:
	errors: list[str] = []

	if isinstance(expected, Mapping):
	if not isinstance(actual, Mapping):
	return [f"{path}: expected object, got {type(actual).__name__}"]
	for key, value in expected.items():
	if key not in actual:
	errors.append(f"{path}.{key}: missing key")
	continue
	errors.extend(is_subset(value, actual[key], f"{path}.{key}"))
	return errors

	if isinstance(expected, Sequence) and not isinstance(expected, (str, bytes, bytearray)):
	if not isinstance(actual, Sequence) or isinstance(actual, (str, bytes, bytearray)):
	return [f"{path}: expected array, got {type(actual).__name__}"]
	if len(expected) > len(actual):
	errors.append(f"{path}: expected at least {len(expected)} items, got {len(actual)}")
	for idx, value in enumerate(expected):
	if idx >= len(actual):
	break
	errors.extend(is_subset(value, actual[idx], f"{path}[{idx}]"))
	return errors

	if expected != actual:
	return [f"{path}: expected {expected!r}, got {actual!r}"]
	return errors


	def choose_step_payload() -> Mapping[str, Any]:
	for candidate in (
	"sample_actions_research.json",
	"sample_actions_web.json",
	"sample_actions_coding.json",
	"sample_actions_finance.json",
	"sample_actions_agent_safety.json",
	):
	data = load_json(candidate)
	if isinstance(data, list) and data:
	first = data[0]
	if isinstance(first, Mapping):
	return dict(first)
	if isinstance(data, Mapping):
	plan = data.get("action_plan")
	if isinstance(plan, list) and plan and isinstance(plan[0], Mapping):
	return dict(plan[0])
	if "action" in data or "name" in data:
	return dict(data)
	return {"action": "advance", "value": 1}


	def summarize_payload(payload: Any, max_chars: int = 180) -> str:
	text = json.dumps(payload, ensure_ascii=False)
	if len(text) <= max_chars:
	return text
	return text[: max_chars - 3] + "..."


	def run_smoke(base_url: str, timeout: float, verbose: bool = False) -> dict[str, Any]:
	client = OpenEnvClient(base_url=base_url, timeout=timeout)

	env_seed = load_json("env_seed.json")
	expected_reset = load_json("expected_reset_min.json")
	expected_step = load_json("expected_step_min.json")
	expected_state = load_json("expected_state_min.json")
	step_payload = choose_step_payload()

	report: dict[str, Any] = {
	"base_url": base_url,
	"timeout": timeout,
	"checks": {},
	}

	health = client.health()
	health_ok = all(key in health for key in ("status", "env", "initialized")) and health.get("status") == "ok"
	report["checks"]["health"] = {
	"ok": health_ok,
	"summary": summarize_payload(health),
	}
	if not health_ok:
	raise SmokeFailure("/health did not satisfy the minimal contract")
	if verbose:
	print("[ok] health", summarize_payload(health))

	reset = client.reset(env_seed if isinstance(env_seed, Mapping) else None)
	reset_errors = is_subset(expected_reset, reset)
	report["checks"]["reset"] = {
	"ok": not reset_errors,
	"errors": reset_errors,
	"summary": summarize_payload(reset),
	}
	if reset_errors:
	raise SmokeFailure("/reset did not match expected_reset_min.json")
	if verbose:
	print("[ok] reset", summarize_payload(reset))

	step = client.step(step_payload)
	step_errors = is_subset(expected_step, step)
	report["checks"]["step"] = {
	"ok": not step_errors,
	"errors": step_errors,
	"request": dict(step_payload),
	"summary": summarize_payload(step),
	}
	if step_errors:
	raise SmokeFailure("/step did not match expected_step_min.json")
	if verbose:
	print("[ok] step", summarize_payload(step))

	state = client.state()
	state_errors = is_subset(expected_state, state)
	report["checks"]["state"] = {
	"ok": not state_errors,
	"errors": state_errors,
	"summary": summarize_payload(state),
	}
	if state_errors:
	raise SmokeFailure("/state did not match expected_state_min.json")
	if verbose:
	print("[ok] state", summarize_payload(state))

	report["ok"] = all(section.get("ok") for section in report["checks"].values())
	return report


	def main(argv: Sequence[str] \| None = None) -> int:
	parser = argparse.ArgumentParser(description="Run a local smoke check against omnibench_aegis_env.")
	parser.add_argument("--base-url", default=DEFAULT_BASE_URL, help="Environment server base URL")
	parser.add_argument("--timeout", type=float, default=DEFAULT_TIMEOUT, help="HTTP timeout in seconds")
	parser.add_argument("--verbose", action="store_true", help="Print live step-by-step output")
	parser.add_argument("--json", action="store_true", help="Print the final report as JSON")
	args = parser.parse_args(list(argv) if argv is not None else None)

	try:
	report = run_smoke(base_url=args.base_url, timeout=args.timeout, verbose=args.verbose)
	except OpenEnvClientError as exc:
	report = {
	"ok": False,
	"base_url": args.base_url,
	"error": str(exc),
	"type": "client_error",
	}
	if args.json:
	print(json.dumps(report, indent=2, ensure_ascii=False))
	else:
	print(f"[fail] {report['error']}")
	return 1
	except SmokeFailure as exc:
	report = {
	"ok": False,
	"base_url": args.base_url,
	"error": str(exc),
	"type": "contract_error",
	}
	if args.json:
	print(json.dumps(report, indent=2, ensure_ascii=False))
	else:
	print(f"[fail] {report['error']}")
	return 1

	if args.json:
	print(json.dumps(report, indent=2, ensure_ascii=False))
	else:
	print("[ok] local smoke passed")
	print(f"- base_url: {report['base_url']}")
	for name, section in report["checks"].items():
	print(f"- {name}: PASS")
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())