File size: 9,708 Bytes
64305ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0e5a58
64305ea
687481a
 
 
 
64305ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
"""
validate.py  –  Pre-submission validation script.

Checks every requirement from the submission checklist:

  [1] openenv.yaml      – exists and has required fields
  [2] Dockerfile        – exists
  [3] inference.py      – exists at repo root, uses OpenAI client, correct env vars
  [4] requirements.txt  – exists, includes openai
  [5] Env vars          – API_BASE_URL, MODEL_NAME, HF_TOKEN defined
  [6] Environment API   – reset() / step() / state() work correctly
  [7] 3+ tasks          – each task produces a reward in [0.0, 1.0]
  [8] Reward range      – all rewards normalised and deterministic

Run:
    python validate.py
"""

import importlib
import json
import os
import re
import sys

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

PASS  = "\033[92m[PASS]\033[0m"
FAIL  = "\033[91m[FAIL]\033[0m"
WARN  = "\033[93m[WARN]\033[0m"
INFO  = "\033[94m[INFO]\033[0m"

errors   = 0
warnings = 0


def ok(msg):
    print(f"  {PASS} {msg}")

def fail(msg):
    global errors
    errors += 1
    print(f"  {FAIL} {msg}")

def warn(msg):
    global warnings
    warnings += 1
    print(f"  {WARN} {msg}")

def info(msg):
    print(f"  {INFO} {msg}")

def section(title):
    print(f"\n── {title} {'─' * (55 - len(title))}")


# ── [1] openenv.yaml ──────────────────────────────────────────────────────────
section("1. openenv.yaml")

if not os.path.exists("openenv.yaml"):
    fail("openenv.yaml not found")
else:
    ok("openenv.yaml exists")
    try:
        import yaml
        with open("openenv.yaml") as f:
            spec = yaml.safe_load(f)
        required_keys = ["name", "version", "tasks", "action_space", "observation_space", "reward"]
        for k in required_keys:
            if k in spec:
                ok(f"  field '{k}' present")
            else:
                fail(f"  field '{k}' missing from openenv.yaml")
        tasks = spec.get("tasks", [])
        if len(tasks) >= 3:
            ok(f"  {len(tasks)} tasks defined (β‰₯ 3 required)")
        else:
            fail(f"  only {len(tasks)} task(s) defined β€” need β‰₯ 3")
    except ImportError:
        warn("pyyaml not installed β€” skipping yaml field validation (pip install pyyaml)")
    except Exception as e:
        fail(f"  failed to parse openenv.yaml: {e}")


# ── [2] Dockerfile ────────────────────────────────────────────────────────────
section("2. Dockerfile")

if not os.path.exists("Dockerfile"):
    fail("Dockerfile not found")
else:
    ok("Dockerfile exists")
    content = open("Dockerfile").read()
    if "7860" in content:
        ok("  port 7860 exposed (required for HF Spaces)")
    else:
        fail("  port 7860 not found in Dockerfile")
    if "uvicorn" in content or "CMD" in content:
        ok("  CMD/entrypoint present")
    else:
        fail("  no CMD found in Dockerfile")


# ── [3] inference.py ─────────────────────────────────────────────────────────
section("3. inference.py")

if not os.path.exists("inference.py"):
    fail("inference.py not found at repo root")
else:
    ok("inference.py exists at repo root")
    src = open("inference.py").read()

    if "from openai import OpenAI" in src or "import openai" in src:
        ok("  uses OpenAI client")
    else:
        fail("  OpenAI client not found β€” must use 'from openai import OpenAI'")

    for var in ("API_BASE_URL", "MODEL_NAME", "HF_TOKEN"):
        if var in src:
            ok(f"  env var {var} referenced")
        else:
            fail(f"  env var {var} not referenced in inference.py")

    for tag in ("[START]", "[STEP]", "[END]", "final_reward"):
        if tag in src:
            ok(f"  log tag '{tag}' present")
        else:
            fail(f"  log tag '{tag}' missing from inference.py")


# ── [4] requirements.txt ──────────────────────────────────────────────────────
section("4. requirements.txt")

if not os.path.exists("requirements.txt"):
    fail("requirements.txt not found")
else:
    ok("requirements.txt exists")
    reqs = open("requirements.txt").read().lower()
    if "openai" in reqs:
        ok("  openai listed")
    else:
        fail("  openai missing from requirements.txt")
    if "fastapi" in reqs:
        ok("  fastapi listed (needed for HF Space)")
    else:
        warn("  fastapi not in requirements.txt β€” needed for app.py / HF Space")
    if "uvicorn" in reqs:
        ok("  uvicorn listed")
    else:
        warn("  uvicorn not in requirements.txt β€” needed to serve app.py")


# ── [5] Env vars ──────────────────────────────────────────────────────────────
section("5. Environment variables")

# Load .env if present
_env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
if os.path.exists(_env_path):
    with open(_env_path) as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                k, v = line.split("=", 1)
                os.environ.setdefault(k.strip(), v.strip())
    info(".env loaded")

for var in ("API_BASE_URL", "MODEL_NAME", "HF_TOKEN"):
    val = os.environ.get(var, "")
    if val:
        # Mask token value
        display = val if var != "HF_TOKEN" else val[:8] + "..." + val[-4:]
        ok(f"  {var} = {display}")
    else:
        fail(f"  {var} is not set")


# ── [6 & 7] Environment API + tasks ──────────────────────────────────────────
section("6 & 7. Environment API + task graders (β‰₯ 3 tasks, rewards in [0,1])")

try:
    from env import VulnEnv
    env = VulnEnv()
    ok("VulnEnv imported successfully")

    task_ids = env.task_ids
    if len(task_ids) >= 3:
        ok(f"  {len(task_ids)} tasks available: {task_ids}")
    else:
        fail(f"  only {len(task_ids)} task(s) β€” need β‰₯ 3")

    # Probe payloads β€” correct tool for phase/step 1 of each task
    PROBES = {
        "sql_injection":           {"type": "select", "target": "submit_form_value",            "payload": ""},
        "spearphish_credential":   {"type": "select", "target": "deliver_prepared_package",     "payload": ""},
        "cloud_identity_intrusion":{"type": "select", "target": "test_identifier_responses",    "payload": ""},
        "ai_tool_exploitation":    {"type": "select", "target": "submit_routine_request",       "payload": ""},
    }

    for task_id in task_ids:
        state = env.reset(task_id)

        # reset() must return a dict with required fields
        for field in ("task", "code_context", "signals", "step_count"):
            if field not in state:
                fail(f"  [{task_id}] reset() state missing field '{field}'")

        ok(f"  [{task_id}] reset() returned valid state")

        probe = PROBES.get(task_id, {"type": "input", "target": "query", "payload": "test"})
        state2, reward, done, info_dict = env.step(probe)

        # Reward must be in [0, 1]
        if not (0.0 <= reward <= 1.0):
            fail(f"  [{task_id}] reward {reward} out of [0.0, 1.0]")
        else:
            ok(f"  [{task_id}] step() reward = {reward:.4f} ∈ [0.0, 1.0]")

        # done must be bool
        if not isinstance(done, bool):
            fail(f"  [{task_id}] done is not bool: {type(done)}")
        else:
            ok(f"  [{task_id}] done = {done} (bool)")

        # Determinism check β€” same action, same reward
        env.reset(task_id)
        _, reward2, _, _ = env.step(probe)
        if reward == reward2:
            ok(f"  [{task_id}] deterministic (same action β†’ same reward)")
        else:
            fail(f"  [{task_id}] non-deterministic: {reward} β‰  {reward2}")

except Exception as e:
    fail(f"Environment validation error: {e}")
    import traceback; traceback.print_exc()


# ── [8] app.py (HF Space server) ──────────────────────────────────────────────
section("8. app.py (HF Space server)")

if not os.path.exists("app.py"):
    fail("app.py not found β€” required for HF Space /health ping")
else:
    ok("app.py exists")
    src = open("app.py").read()
    for endpoint in ("/health", "/reset", "/step", "/state"):
        if endpoint in src:
            ok(f"  endpoint '{endpoint}' defined")
        else:
            fail(f"  endpoint '{endpoint}' missing from app.py")
    if "7860" in src:
        ok("  port 7860 present")
    else:
        warn("  port 7860 not found in app.py")


# ── Summary ───────────────────────────────────────────────────────────────────
section("Summary")
print(f"\n  Errors:   {errors}")
print(f"  Warnings: {warnings}")

if errors == 0 and warnings == 0:
    print(f"\n  {PASS} All checks passed β€” ready to submit!\n")
elif errors == 0:
    print(f"\n  {WARN} No errors, but {warnings} warning(s) β€” review before submitting.\n")
else:
    print(f"\n  {FAIL} {errors} error(s) found β€” fix before submitting.\n")

sys.exit(0 if errors == 0 else 1)