Spaces:
Running
Running
File size: 8,397 Bytes
2ce1061 2efa047 66d8c67 2ce1061 2efa047 66d8c67 2ce1061 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | #!/usr/bin/env python3
# validator/pre_submit_check.py
# Run this BEFORE submitting to catch any disqualifying issues.
#
# Usage:
# python validator/pre_submit_check.py
# python validator/pre_submit_check.py --url https://your-space.hf.space
import os
import sys
import json
import argparse
import requests
PASS = "β
"
FAIL = "β"
WARN = "β οΈ"
results = []
def check(name: str, passed: bool, detail: str = ""):
status = PASS if passed else FAIL
results.append({"check": name, "passed": passed, "detail": detail})
print(f" {status} {name}" + (f": {detail}" if detail else ""))
return passed
def run_checks(base_url: str):
print(f"\n{'='*60}")
print(f" Code Debug Environment β Pre-Submission Validator")
print(f" Target: {base_url}")
print(f"{'='*60}\n")
all_passed = True
# ββ 1. Health check βββββββββββββββββββββββββββββββββββββββββββ
print("[ CHECK 1 ] Health endpoint")
try:
r = requests.get(f"{base_url}/health", timeout=10)
passed = r.status_code == 200 and r.json().get("status") == "ok"
check("GET /health returns 200 with status=ok", passed, f"HTTP {r.status_code}")
all_passed &= passed
except Exception as e:
check("GET /health", False, str(e))
all_passed = False
# ββ 2. Reset responds βββββββββββββββββββββββββββββββββββββββββ
print("\n[ CHECK 2 ] POST /reset")
obs = None
for difficulty in ["easy", "medium", "hard"]:
try:
r = requests.post(f"{base_url}/reset", json={"difficulty": difficulty}, timeout=15)
data = r.json()
obs = data.get("observation", {})
has_fields = all(k in obs for k in ["task_id", "difficulty", "buggy_code", "instructions"])
passed = r.status_code == 200 and has_fields
check(f"reset(difficulty='{difficulty}') returns valid observation", passed,
f"task_id={obs.get('task_id', 'MISSING')}")
all_passed &= passed
except Exception as e:
check(f"reset(difficulty='{difficulty}')", False, str(e))
all_passed = False
# ββ 3. Step responds ββββββββββββββββββββββββββββββββββββββββββ
print("\n[ CHECK 3 ] POST /step")
try:
# Reset first to get a fresh task
r = requests.post(f"{base_url}/reset", json={"difficulty": "easy"}, timeout=15)
buggy_code = r.json()["observation"]["buggy_code"]
# Submit the buggy code as-is (reward may be 0, that's fine)
r = requests.post(f"{base_url}/step", json={"fixed_code": buggy_code}, timeout=15)
data = r.json()
has_reward = "reward" in data and isinstance(data["reward"], (int, float))
has_done = "done" in data and isinstance(data["done"], bool)
reward_in_range = 0.0 <= data.get("reward", -1) <= 1.0
passed = r.status_code == 200 and has_reward and has_done and reward_in_range
check("step() returns reward in [0.0, 1.0] and done flag", passed,
f"reward={data.get('reward')}, done={data.get('done')}")
all_passed &= passed
except Exception as e:
check("POST /step", False, str(e))
all_passed = False
# ββ 4. State responds βββββββββββββββββββββββββββββββββββββββββ
print("\n[ CHECK 4 ] GET /state")
try:
r = requests.get(f"{base_url}/state", timeout=10)
data = r.json()
has_fields = all(k in data for k in ["episode_id", "step_count", "difficulty"])
passed = r.status_code == 200 and has_fields
check("GET /state returns episode_id, step_count, difficulty", passed)
all_passed &= passed
except Exception as e:
check("GET /state", False, str(e))
all_passed = False
# ββ 5. 3 difficulties all work ββββββββββββββββββββββββββββββββ
print("\n[ CHECK 5 ] All 3 task difficulties functional")
for difficulty in ["easy", "medium", "hard"]:
try:
r = requests.post(f"{base_url}/reset", json={"difficulty": difficulty}, timeout=15)
obs = r.json()["observation"]
passed = obs.get("difficulty") == difficulty
check(f"difficulty='{difficulty}' task loads correctly",
passed, f"got difficulty={obs.get('difficulty')}")
all_passed &= passed
except Exception as e:
check(f"difficulty='{difficulty}'", False, str(e))
all_passed = False
# ββ 6. Reward range on perfect answer βββββββββββββββββββββββββ
print("\n[ CHECK 6 ] Reward range validation (correct fix)")
try:
from server.tasks.task_easy import EASY_TASKS
task = EASY_TASKS[0]
# Reset with the first easy task
r = requests.post(f"{base_url}/reset", json={"difficulty": "easy"}, timeout=15)
# Submit the known correct fix
r = requests.post(f"{base_url}/step",
json={"fixed_code": task["fixed_code"]}, timeout=15)
data = r.json()
reward = data.get("reward", -1)
passed = 0.0 <= reward <= 1.0
check(f"Submitting correct fix yields reward in [0.0, 1.0]", passed,
f"reward={reward}")
all_passed &= passed
except Exception as e:
check("Reward range check", False, str(e))
all_passed = False
# ββ 7. openenv.yaml exists ββββββββββββββββββββββββββββββββββββ
print("\n[ CHECK 7 ] Project structure")
required_files = [
"openenv.yaml",
"inference.py",
"models.py",
"server/app.py",
"server/environment.py",
"server/Dockerfile",
"server/requirements.txt",
"pyproject.toml",
"README.md",
]
for fname in required_files:
exists = os.path.exists(fname)
check(f"File exists: {fname}", exists)
all_passed &= exists
# ββ 8. inference.py has required log format βββββββββββββββββββ
print("\n[ CHECK 8 ] inference.py log format")
try:
with open("inference.py") as f:
content = f.read()
has_start = "[START] task=" in content
has_step = "[STEP] step=" in content
has_end = "[END] success=" in content
avoids_json_logs = "print(json.dumps(log_entry)" not in content
rewards_csv = "rewards=[" not in content
check("inference.py emits [START] logs", has_start)
check("inference.py emits [STEP] logs", has_step)
check("inference.py emits [END] logs", has_end)
check("inference.py avoids JSON log dict dumps", avoids_json_logs)
check("inference.py emits CSV rewards in [END]", rewards_csv)
all_passed &= has_start and has_step and has_end and avoids_json_logs and rewards_csv
except Exception as e:
check("inference.py log format", False, str(e))
all_passed = False
# ββ Final summary βββββββββββββββββββββββββββββββββββββββββββββ
total = len(results)
passed_count = sum(1 for r in results if r["passed"])
print(f"\n{'='*60}")
print(f" Results: {passed_count}/{total} checks passed")
if all_passed:
print(f" {PASS} ALL CHECKS PASSED β you are safe to submit!")
else:
failed = [r["check"] for r in results if not r["passed"]]
print(f" {FAIL} FAILED CHECKS β fix these before submitting:")
for f in failed:
print(f" β’ {f}")
print(f"{'='*60}\n")
return all_passed
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--url", default="http://localhost:7860",
help="Base URL of the running environment")
args = parser.parse_args()
success = run_checks(args.url.rstrip("/"))
sys.exit(0 if success else 1)
|