#!/usr/bin/env python3 # validator/pre_submit_check.py # Run this BEFORE submitting to catch any disqualifying issues. # # Usage: # python validator/pre_submit_check.py # python validator/pre_submit_check.py --url https://your-space.hf.space import os import sys import json import argparse import requests PASS = "✅" FAIL = "❌" WARN = "⚠️" results = [] def check(name: str, passed: bool, detail: str = ""): status = PASS if passed else FAIL results.append({"check": name, "passed": passed, "detail": detail}) print(f" {status} {name}" + (f": {detail}" if detail else "")) return passed def run_checks(base_url: str): print(f"\n{'='*60}") print(f" Code Debug Environment — Pre-Submission Validator") print(f" Target: {base_url}") print(f"{'='*60}\n") all_passed = True # ── 1. Health check ─────────────────────────────────────────── print("[ CHECK 1 ] Health endpoint") try: r = requests.get(f"{base_url}/health", timeout=10) passed = r.status_code == 200 and r.json().get("status") == "ok" check("GET /health returns 200 with status=ok", passed, f"HTTP {r.status_code}") all_passed &= passed except Exception as e: check("GET /health", False, str(e)) all_passed = False # ── 2. Reset responds ───────────────────────────────────────── print("\n[ CHECK 2 ] POST /reset") obs = None for difficulty in ["easy", "medium", "hard"]: try: r = requests.post(f"{base_url}/reset", json={"difficulty": difficulty}, timeout=15) data = r.json() obs = data.get("observation", {}) has_fields = all(k in obs for k in ["task_id", "difficulty", "buggy_code", "instructions"]) passed = r.status_code == 200 and has_fields check(f"reset(difficulty='{difficulty}') returns valid observation", passed, f"task_id={obs.get('task_id', 'MISSING')}") all_passed &= passed except Exception as e: check(f"reset(difficulty='{difficulty}')", False, str(e)) all_passed = False # ── 3. Step responds ────────────────────────────────────────── print("\n[ CHECK 3 ] POST /step") try: # Reset first to get a fresh task r = requests.post(f"{base_url}/reset", json={"difficulty": "easy"}, timeout=15) buggy_code = r.json()["observation"]["buggy_code"] # Submit the buggy code as-is (reward may be 0, that's fine) r = requests.post(f"{base_url}/step", json={"fixed_code": buggy_code}, timeout=15) data = r.json() has_reward = "reward" in data and isinstance(data["reward"], (int, float)) has_done = "done" in data and isinstance(data["done"], bool) reward_in_range = 0.0 <= data.get("reward", -1) <= 1.0 passed = r.status_code == 200 and has_reward and has_done and reward_in_range check("step() returns reward in [0.0, 1.0] and done flag", passed, f"reward={data.get('reward')}, done={data.get('done')}") all_passed &= passed except Exception as e: check("POST /step", False, str(e)) all_passed = False # ── 4. State responds ───────────────────────────────────────── print("\n[ CHECK 4 ] GET /state") try: r = requests.get(f"{base_url}/state", timeout=10) data = r.json() has_fields = all(k in data for k in ["episode_id", "step_count", "difficulty"]) passed = r.status_code == 200 and has_fields check("GET /state returns episode_id, step_count, difficulty", passed) all_passed &= passed except Exception as e: check("GET /state", False, str(e)) all_passed = False # ── 5. 3 difficulties all work ──────────────────────────────── print("\n[ CHECK 5 ] All 3 task difficulties functional") for difficulty in ["easy", "medium", "hard"]: try: r = requests.post(f"{base_url}/reset", json={"difficulty": difficulty}, timeout=15) obs = r.json()["observation"] passed = obs.get("difficulty") == difficulty check(f"difficulty='{difficulty}' task loads correctly", passed, f"got difficulty={obs.get('difficulty')}") all_passed &= passed except Exception as e: check(f"difficulty='{difficulty}'", False, str(e)) all_passed = False # ── 6. Reward range on perfect answer ───────────────────────── print("\n[ CHECK 6 ] Reward range validation (correct fix)") try: from server.tasks.task_easy import EASY_TASKS task = EASY_TASKS[0] # Reset with the first easy task r = requests.post(f"{base_url}/reset", json={"difficulty": "easy"}, timeout=15) # Submit the known correct fix r = requests.post(f"{base_url}/step", json={"fixed_code": task["fixed_code"]}, timeout=15) data = r.json() reward = data.get("reward", -1) passed = 0.0 <= reward <= 1.0 check(f"Submitting correct fix yields reward in [0.0, 1.0]", passed, f"reward={reward}") all_passed &= passed except Exception as e: check("Reward range check", False, str(e)) all_passed = False # ── 7. openenv.yaml exists ──────────────────────────────────── print("\n[ CHECK 7 ] Project structure") required_files = [ "openenv.yaml", "inference.py", "models.py", "server/app.py", "server/environment.py", "server/Dockerfile", "server/requirements.txt", "pyproject.toml", "README.md", ] for fname in required_files: exists = os.path.exists(fname) check(f"File exists: {fname}", exists) all_passed &= exists # ── 8. inference.py has required log format ─────────────────── print("\n[ CHECK 8 ] inference.py log format") try: with open("inference.py") as f: content = f.read() has_start = "[START] task=" in content has_step = "[STEP] step=" in content has_end = "[END] success=" in content avoids_json_logs = "print(json.dumps(log_entry)" not in content rewards_csv = "rewards=[" not in content check("inference.py emits [START] logs", has_start) check("inference.py emits [STEP] logs", has_step) check("inference.py emits [END] logs", has_end) check("inference.py avoids JSON log dict dumps", avoids_json_logs) check("inference.py emits CSV rewards in [END]", rewards_csv) all_passed &= has_start and has_step and has_end and avoids_json_logs and rewards_csv except Exception as e: check("inference.py log format", False, str(e)) all_passed = False # ── Final summary ───────────────────────────────────────────── total = len(results) passed_count = sum(1 for r in results if r["passed"]) print(f"\n{'='*60}") print(f" Results: {passed_count}/{total} checks passed") if all_passed: print(f" {PASS} ALL CHECKS PASSED — you are safe to submit!") else: failed = [r["check"] for r in results if not r["passed"]] print(f" {FAIL} FAILED CHECKS — fix these before submitting:") for f in failed: print(f" • {f}") print(f"{'='*60}\n") return all_passed if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--url", default="http://localhost:7860", help="Base URL of the running environment") args = parser.parse_args() success = run_checks(args.url.rstrip("/")) sys.exit(0 if success else 1)