Spaces:
Sleeping
Sleeping
| """ | |
| validate.py β Pre-submission validation script. | |
| Checks every requirement from the submission checklist: | |
| [1] openenv.yaml β exists and has required fields | |
| [2] Dockerfile β exists | |
| [3] inference.py β exists at repo root, uses OpenAI client, correct env vars | |
| [4] requirements.txt β exists, includes openai | |
| [5] Env vars β API_BASE_URL, MODEL_NAME, HF_TOKEN defined | |
| [6] Environment API β reset() / step() / state() work correctly | |
| [7] 3+ tasks β each task produces a reward in [0.0, 1.0] | |
| [8] Reward range β all rewards normalised and deterministic | |
| Run: | |
| python validate.py | |
| """ | |
| import importlib | |
| import json | |
| import os | |
| import re | |
| import sys | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| PASS = "\033[92m[PASS]\033[0m" | |
| FAIL = "\033[91m[FAIL]\033[0m" | |
| WARN = "\033[93m[WARN]\033[0m" | |
| INFO = "\033[94m[INFO]\033[0m" | |
| errors = 0 | |
| warnings = 0 | |
| def ok(msg): | |
| print(f" {PASS} {msg}") | |
| def fail(msg): | |
| global errors | |
| errors += 1 | |
| print(f" {FAIL} {msg}") | |
| def warn(msg): | |
| global warnings | |
| warnings += 1 | |
| print(f" {WARN} {msg}") | |
| def info(msg): | |
| print(f" {INFO} {msg}") | |
| def section(title): | |
| print(f"\nββ {title} {'β' * (55 - len(title))}") | |
| # ββ [1] openenv.yaml ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("1. openenv.yaml") | |
| if not os.path.exists("openenv.yaml"): | |
| fail("openenv.yaml not found") | |
| else: | |
| ok("openenv.yaml exists") | |
| try: | |
| import yaml | |
| with open("openenv.yaml") as f: | |
| spec = yaml.safe_load(f) | |
| required_keys = ["name", "version", "tasks", "action_space", "observation_space", "reward"] | |
| for k in required_keys: | |
| if k in spec: | |
| ok(f" field '{k}' present") | |
| else: | |
| fail(f" field '{k}' missing from openenv.yaml") | |
| tasks = spec.get("tasks", []) | |
| if len(tasks) >= 3: | |
| ok(f" {len(tasks)} tasks defined (β₯ 3 required)") | |
| else: | |
| fail(f" only {len(tasks)} task(s) defined β need β₯ 3") | |
| except ImportError: | |
| warn("pyyaml not installed β skipping yaml field validation (pip install pyyaml)") | |
| except Exception as e: | |
| fail(f" failed to parse openenv.yaml: {e}") | |
| # ββ [2] Dockerfile ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("2. Dockerfile") | |
| if not os.path.exists("Dockerfile"): | |
| fail("Dockerfile not found") | |
| else: | |
| ok("Dockerfile exists") | |
| content = open("Dockerfile").read() | |
| if "7860" in content: | |
| ok(" port 7860 exposed (required for HF Spaces)") | |
| else: | |
| fail(" port 7860 not found in Dockerfile") | |
| if "uvicorn" in content or "CMD" in content: | |
| ok(" CMD/entrypoint present") | |
| else: | |
| fail(" no CMD found in Dockerfile") | |
| # ββ [3] inference.py βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("3. inference.py") | |
| if not os.path.exists("inference.py"): | |
| fail("inference.py not found at repo root") | |
| else: | |
| ok("inference.py exists at repo root") | |
| src = open("inference.py").read() | |
| if "from openai import OpenAI" in src or "import openai" in src: | |
| ok(" uses OpenAI client") | |
| else: | |
| fail(" OpenAI client not found β must use 'from openai import OpenAI'") | |
| for var in ("API_BASE_URL", "MODEL_NAME", "HF_TOKEN"): | |
| if var in src: | |
| ok(f" env var {var} referenced") | |
| else: | |
| fail(f" env var {var} not referenced in inference.py") | |
| for tag in ("[START]", "[STEP]", "[END]", "final_reward"): | |
| if tag in src: | |
| ok(f" log tag '{tag}' present") | |
| else: | |
| fail(f" log tag '{tag}' missing from inference.py") | |
| # ββ [4] requirements.txt ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("4. requirements.txt") | |
| if not os.path.exists("requirements.txt"): | |
| fail("requirements.txt not found") | |
| else: | |
| ok("requirements.txt exists") | |
| reqs = open("requirements.txt").read().lower() | |
| if "openai" in reqs: | |
| ok(" openai listed") | |
| else: | |
| fail(" openai missing from requirements.txt") | |
| if "fastapi" in reqs: | |
| ok(" fastapi listed (needed for HF Space)") | |
| else: | |
| warn(" fastapi not in requirements.txt β needed for app.py / HF Space") | |
| if "uvicorn" in reqs: | |
| ok(" uvicorn listed") | |
| else: | |
| warn(" uvicorn not in requirements.txt β needed to serve app.py") | |
| # ββ [5] Env vars ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("5. Environment variables") | |
| # Load .env if present | |
| _env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env") | |
| if os.path.exists(_env_path): | |
| with open(_env_path) as f: | |
| for line in f: | |
| line = line.strip() | |
| if line and not line.startswith("#") and "=" in line: | |
| k, v = line.split("=", 1) | |
| os.environ.setdefault(k.strip(), v.strip()) | |
| info(".env loaded") | |
| for var in ("API_BASE_URL", "MODEL_NAME", "HF_TOKEN"): | |
| val = os.environ.get(var, "") | |
| if val: | |
| # Mask token value | |
| display = val if var != "HF_TOKEN" else val[:8] + "..." + val[-4:] | |
| ok(f" {var} = {display}") | |
| else: | |
| fail(f" {var} is not set") | |
| # ββ [6 & 7] Environment API + tasks ββββββββββββββββββββββββββββββββββββββββββ | |
| section("6 & 7. Environment API + task graders (β₯ 3 tasks, rewards in [0,1])") | |
| try: | |
| from env import VulnEnv | |
| env = VulnEnv() | |
| ok("VulnEnv imported successfully") | |
| task_ids = env.task_ids | |
| if len(task_ids) >= 3: | |
| ok(f" {len(task_ids)} tasks available: {task_ids}") | |
| else: | |
| fail(f" only {len(task_ids)} task(s) β need β₯ 3") | |
| # Probe payloads β correct tool for phase/step 1 of each task | |
| PROBES = { | |
| "sql_injection": {"type": "select", "target": "submit_form_value", "payload": ""}, | |
| "spearphish_credential": {"type": "select", "target": "deliver_prepared_package", "payload": ""}, | |
| "cloud_identity_intrusion":{"type": "select", "target": "test_identifier_responses", "payload": ""}, | |
| "ai_tool_exploitation": {"type": "select", "target": "submit_routine_request", "payload": ""}, | |
| } | |
| for task_id in task_ids: | |
| state = env.reset(task_id) | |
| # reset() must return a dict with required fields | |
| for field in ("task", "code_context", "signals", "step_count"): | |
| if field not in state: | |
| fail(f" [{task_id}] reset() state missing field '{field}'") | |
| ok(f" [{task_id}] reset() returned valid state") | |
| probe = PROBES.get(task_id, {"type": "input", "target": "query", "payload": "test"}) | |
| state2, reward, done, info_dict = env.step(probe) | |
| # Reward must be in [0, 1] | |
| if not (0.0 <= reward <= 1.0): | |
| fail(f" [{task_id}] reward {reward} out of [0.0, 1.0]") | |
| else: | |
| ok(f" [{task_id}] step() reward = {reward:.4f} β [0.0, 1.0]") | |
| # done must be bool | |
| if not isinstance(done, bool): | |
| fail(f" [{task_id}] done is not bool: {type(done)}") | |
| else: | |
| ok(f" [{task_id}] done = {done} (bool)") | |
| # Determinism check β same action, same reward | |
| env.reset(task_id) | |
| _, reward2, _, _ = env.step(probe) | |
| if reward == reward2: | |
| ok(f" [{task_id}] deterministic (same action β same reward)") | |
| else: | |
| fail(f" [{task_id}] non-deterministic: {reward} β {reward2}") | |
| except Exception as e: | |
| fail(f"Environment validation error: {e}") | |
| import traceback; traceback.print_exc() | |
| # ββ [8] app.py (HF Space server) ββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("8. app.py (HF Space server)") | |
| if not os.path.exists("app.py"): | |
| fail("app.py not found β required for HF Space /health ping") | |
| else: | |
| ok("app.py exists") | |
| src = open("app.py").read() | |
| for endpoint in ("/health", "/reset", "/step", "/state"): | |
| if endpoint in src: | |
| ok(f" endpoint '{endpoint}' defined") | |
| else: | |
| fail(f" endpoint '{endpoint}' missing from app.py") | |
| if "7860" in src: | |
| ok(" port 7860 present") | |
| else: | |
| warn(" port 7860 not found in app.py") | |
| # ββ Summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| section("Summary") | |
| print(f"\n Errors: {errors}") | |
| print(f" Warnings: {warnings}") | |
| if errors == 0 and warnings == 0: | |
| print(f"\n {PASS} All checks passed β ready to submit!\n") | |
| elif errors == 0: | |
| print(f"\n {WARN} No errors, but {warnings} warning(s) β review before submitting.\n") | |
| else: | |
| print(f"\n {FAIL} {errors} error(s) found β fix before submitting.\n") | |
| sys.exit(0 if errors == 0 else 1) | |