""" ACRE pre-submission validator. Checks the repository against the submission checklist and, when a server URL is available, probes the HTTP API as well. Run: python validate.py --url http://localhost:7860 """ from __future__ import annotations import argparse import ast import re import sys from typing import Any, Tuple try: import requests except ImportError: print("[ERROR] requests is required. Run: pip install requests") sys.exit(1) PASS = "\033[92m[PASS]\033[0m" FAIL = "\033[91m[FAIL]\033[0m" def check(label: str, ok: bool, detail: str = "") -> bool: status = PASS if ok else FAIL message = f" {status} {label}" if detail: message += f" - {detail}" print(message) return ok def get(url: str, path: str, timeout: int = 15) -> Tuple[bool, Any]: try: response = requests.get(f"{url}{path}", timeout=timeout) response.raise_for_status() return True, response.json() except Exception as exc: return False, str(exc) def post(url: str, path: str, payload: dict, timeout: int = 15) -> Tuple[bool, Any]: try: response = requests.post(f"{url}{path}", json=payload, timeout=timeout) response.raise_for_status() return True, response.json() except Exception as exc: return False, str(exc) def read_text(path: str) -> str: with open(path, encoding="utf-8") as handle: return handle.read() def run_validation(base_url: str) -> int: failures = 0 print("\n" + "=" * 60) print(" ACRE Pre-Submission Validator") print("=" * 60) print(f" Target: {base_url}\n") print("1. Static repository checks") try: interface_src = read_text("openenv_interface.py") tree = ast.parse(interface_src) classes = {node.name: node for node in tree.body if isinstance(node, ast.ClassDef)} env_cls = classes.get("OpenEnvRefactorEnv") failures += 0 if check("openenv_interface.py exists", True) else 1 failures += 0 if check("OpenEnvRefactorEnv is defined", env_cls is not None) else 1 if env_cls is not None: methods = {node.name for node in env_cls.body if isinstance(node, ast.FunctionDef)} for method_name in ["reset", "step", "state"]: failures += 0 if check( f"OpenEnvRefactorEnv implements {method_name}()", method_name in methods, ) else 1 except FileNotFoundError: failures += 1 check("openenv_interface.py exists", False, "file not found") try: models_src = read_text("models.py") for name in ["ObservationModel", "ActionModel", "RewardModel"]: failures += 0 if check( f"{name} is defined in models.py", f"class {name}" in models_src, ) else 1 except FileNotFoundError: failures += 1 check("models.py exists", False, "file not found") print("\n2. Health check (GET /)") ok, data = get(base_url, "/") failures += 0 if check("GET / returns HTTP 200", ok) else 1 if ok: failures += 0 if check( "Response has status field", isinstance(data, dict) and "status" in data, str(data), ) else 1 print("\n3. Tasks (GET /tasks)") ok, data = get(base_url, "/tasks") failures += 0 if check("GET /tasks returns 200", ok) else 1 if ok: tasks = data.get("tasks", []) if isinstance(data, dict) else [] failures += 0 if check("At least 3 tasks defined", len(tasks) >= 3, f"found {len(tasks)}") else 1 difficulties = [t.get("difficulty", "") for t in tasks] for diff in ["easy", "medium", "hard"]: failures += 0 if check(f"Task with difficulty '{diff}' exists", diff in difficulties) else 1 for task in tasks: failures += 0 if check( f"Task '{task.get('id')}' has initial_code", bool(task.get("initial_code")), ) else 1 print("\n4. Reset (POST /reset)") ok, data = post(base_url, "/reset", {}) failures += 0 if check("POST /reset returns 200", ok) else 1 if ok: observation = data.get("observation", {}) failures += 0 if check("Response has observation field", isinstance(observation, dict)) else 1 failures += 0 if check( "Observation is typed with 4 fields", {"code_length", "complexity_score", "runtime_s", "error_flag"}.issubset(observation), str(observation), ) else 1 ok, _ = post(base_url, "/reset", {"task_id": "rename_variables"}) failures += 0 if check("POST /reset with task_id works", ok) else 1 print("\n5. State (GET /state)") ok, data = get(base_url, "/state") failures += 0 if check("GET /state returns 200", ok) else 1 if ok: required_keys = [ "current_code", "episode_steps", "max_steps", "complexity", "observation", "observation_vector", "action_meanings", ] for key in required_keys: failures += 0 if check(f"State has '{key}' field", key in data) else 1 print("\n6. Step (POST /step)") post(base_url, "/reset", {"task_id": "rename_variables"}) for action in range(5): ok, data = post(base_url, "/step", {"action": action}) failures += 0 if check( f"Action {action} executes without error", ok and isinstance(data, dict) and "reward" in data and "done" in data, ) else 1 if ok: reward_payload = data.get("reward", {}) norm = reward_payload.get("normalized", -1) failures += 0 if check( f"Action {action} returns typed reward payload", {"raw", "normalized", "components"}.issubset(reward_payload), str(reward_payload), ) else 1 failures += 0 if check( f"Action {action} normalized_reward in [0,1]", isinstance(norm, (int, float)) and 0.0 <= float(norm) <= 1.0, f"got {norm}", ) else 1 if data.get("done"): break ok, data = post(base_url, "/step", {"action": 99}) check("Invalid action returns error (not crash)", not ok or "detail" in str(data), "(expected 4xx)") print("\n7. Task graders (POST /tasks/{id}/grade)") for task_id in ["rename_variables", "remove_dead_code", "full_refactor"]: ok, data = post(base_url, f"/tasks/{task_id}/grade", {"code": "def f(): pass"}) failures += 0 if check(f"Grade endpoint for '{task_id}' works", ok) else 1 if ok: score = data.get("score", -1) failures += 0 if check( f"Score for '{task_id}' in [0.0, 1.0]", isinstance(score, (int, float)) and 0.0 <= float(score) <= 1.0, f"got {score}", ) else 1 print("\n8. openenv.yaml") try: openenv_yaml = read_text("openenv.yaml") failures += 0 if check("openenv.yaml exists", True) else 1 for field in ["tasks:", "action_space:", "observation_space:", "reward:", "entrypoint:", "validation:"]: failures += 0 if check(f"openenv.yaml has '{field}' section", field in openenv_yaml) else 1 except FileNotFoundError: failures += 1 check("openenv.yaml exists", False, "file not found") print("\n9. inference.py") try: inference_src = read_text("inference.py") failures += 0 if check("inference.py exists", True) else 1 # Accept legacy JSON markers and modern strict bracketed format: # [START] task= # [STEP] action= # [END] task= score= json_markers_ok = all(m in inference_src for m in ['"event": "START"', '"event": "STEP"', '"event": "END"']) bracket_markers_ok = all(m in inference_src for m in ["[START]", "[STEP]", "[END]"]) line_markers_ok = all(m in inference_src for m in ["START ", "STEP ", "END "]) failures += 0 if check("inference.py emits START marker", json_markers_ok or line_markers_ok or bracket_markers_ok) else 1 failures += 0 if check("inference.py emits STEP marker", json_markers_ok or line_markers_ok or bracket_markers_ok) else 1 failures += 0 if check("inference.py emits END marker", json_markers_ok or line_markers_ok or bracket_markers_ok) else 1 failures += 0 if check( "Uses OpenAI client", "from openai import OpenAI" in inference_src, ) else 1 for var in ["API_BASE_URL", "MODEL_NAME", "ENV_URL", "LOCAL_IMAGE_NAME"]: failures += 0 if check(f"inference.py reads {var} from env", var in inference_src) else 1 failures += 0 if check( "inference.py reads API credentials from env (API_KEY or HF_TOKEN)", ("API_KEY" in inference_src) or ("HF_TOKEN" in inference_src), ) else 1 api_base_default_ok = ( 'os.getenv("API_BASE_URL", "https://api.openai.com/v1")' in inference_src or re.search(r'API_BASE_URL\s*=.*os\.getenv\("API_BASE_URL"\)\s*or\s*"https://api\.openai\.com/v1"', inference_src) is not None ) api_base_env_required_ok = ( re.search(r'base_url\s*=\s*os\.getenv\("API_BASE_URL"\)', inference_src) is not None or re.search(r'base_url\s*=\s*os\.environ\["API_BASE_URL"\]', inference_src) is not None ) failures += 0 if check( "API_BASE_URL handling is valid (default or strict env)", api_base_default_ok or api_base_env_required_ok, ) else 1 model_default_ok = ( 'os.getenv("MODEL_NAME", "gpt-4o-mini")' in inference_src or re.search(r'MODEL_NAME\s*=.*os\.getenv\("MODEL_NAME"\)\s*or\s*"gpt-4o-mini"', inference_src) is not None ) failures += 0 if check("MODEL_NAME has a default", model_default_ok) else 1 api_key_no_default_ok = ( re.search(r'API_KEY\s*=.*os\.getenv\("API_KEY"\)', inference_src) is not None and re.search(r'os\.getenv\("API_KEY"\s*,', inference_src) is None ) hf_token_no_default_ok = ( re.search(r'HF_TOKEN\s*=.*os\.getenv\("HF_TOKEN"\)', inference_src) is not None and re.search(r'os\.getenv\("HF_TOKEN"\s*,', inference_src) is None ) failures += 0 if check( "API key variable has no default", api_key_no_default_ok or hf_token_no_default_ok, ) else 1 except FileNotFoundError: failures += 1 check("inference.py exists", False, "file not found") print("\n10. Dockerfile") try: dockerfile = read_text("Dockerfile") failures += 0 if check("Dockerfile exists", True) else 1 failures += 0 if check("Exposes port 7860", "7860" in dockerfile) else 1 failures += 0 if check("Has CMD/ENTRYPOINT", "CMD" in dockerfile or "ENTRYPOINT" in dockerfile) else 1 failures += 0 if check("Does not set a default HF_TOKEN", "ENV HF_TOKEN" not in dockerfile) else 1 except FileNotFoundError: failures += 1 check("Dockerfile exists", False, "file not found") print("\n11. README / Hugging Face metadata") try: readme = read_text("README.md") failures += 0 if check("README has docker SDK front matter", "sdk: docker" in readme) else 1 failures += 0 if check("README includes openenv tag", "openenv" in readme) else 1 for section in [ "Environment Overview and Motivation", "Definitions of Action and Observation Spaces", "Task Descriptions with Expected Difficulty Levels", "Setup and Usage Instructions", "Baseline Performance Scores", ]: failures += 0 if check(f"README includes '{section}'", section in readme) else 1 except FileNotFoundError: failures += 1 check("README.md exists", False, "file not found") print("\n" + "=" * 60) if failures == 0: print(f" {PASS} All checks passed. Repository is submission-ready.") else: print(f" {FAIL} {failures} check(s) failed. Fix before submitting.") print("=" * 60 + "\n") return failures def main() -> None: parser = argparse.ArgumentParser(description="ACRE pre-submission validator") parser.add_argument( "--url", default="http://localhost:7860", help="Base URL of the running ACRE server", ) args = parser.parse_args() sys.exit(run_validation(args.url)) if __name__ == "__main__": main()