Spaces:
Sleeping
Sleeping
sameerkatte Claude Opus 4.6 (1M context) commited on
Commit ·
6dbb8cf
1
Parent(s): 8d618ab
Add [START]/[STEP]/[END] structured output markers to inference.py
Browse filesThe validator parses stdout for these markers to extract per-task scores.
Previously inference.py only printed freeform text, so the Output Parsing
check failed with "No [START]/[STEP]/[END] in stdout".
Changes:
- Import sys/functools and wrap print() with flush=True so all output
is flushed immediately (no buffering inside the validator harness)
- run_episode() emits [START] task=NAME on entry
- Each step emits [STEP] step=N reward=VALUE
- Episode end emits [END] task=NAME score=VALUE steps=N
Verified locally against the live HF Space: all 3 tasks emit the
full marker set even when the LLM call fails (fallback to mark_complete).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- inference.py +22 -4
inference.py
CHANGED
|
@@ -12,11 +12,17 @@ MANDATORY environment variables:
|
|
| 12 |
|
| 13 |
import os
|
| 14 |
import re
|
|
|
|
| 15 |
import json
|
| 16 |
import time
|
|
|
|
| 17 |
import requests as http_requests
|
| 18 |
from openai import OpenAI
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# === Configuration ===
|
| 21 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 22 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
|
@@ -221,7 +227,14 @@ def parse_action(text: str) -> dict:
|
|
| 221 |
|
| 222 |
|
| 223 |
def run_episode(task_id: str, scenario_id: str = None) -> float:
|
| 224 |
-
"""Run a single episode and return the final score.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
reset_body = {"task_id": task_id}
|
| 226 |
if scenario_id:
|
| 227 |
reset_body["scenario_id"] = scenario_id
|
|
@@ -266,8 +279,10 @@ def run_episode(task_id: str, scenario_id: str = None) -> float:
|
|
| 266 |
observation = step_data["observation"]
|
| 267 |
done = step_data["done"]
|
| 268 |
step_count += 1
|
|
|
|
|
|
|
|
|
|
| 269 |
if done:
|
| 270 |
-
final_score = step_data["reward"]
|
| 271 |
break
|
| 272 |
|
| 273 |
# Main agent loop
|
|
@@ -310,13 +325,16 @@ def run_episode(task_id: str, scenario_id: str = None) -> float:
|
|
| 310 |
|
| 311 |
observation = step_data["observation"]
|
| 312 |
done = step_data["done"]
|
| 313 |
-
|
|
|
|
| 314 |
step_count += 1
|
|
|
|
| 315 |
|
| 316 |
# Small delay to avoid rate limiting
|
| 317 |
time.sleep(0.3)
|
| 318 |
|
| 319 |
-
|
|
|
|
| 320 |
return final_score
|
| 321 |
|
| 322 |
|
|
|
|
| 12 |
|
| 13 |
import os
|
| 14 |
import re
|
| 15 |
+
import sys
|
| 16 |
import json
|
| 17 |
import time
|
| 18 |
+
import functools
|
| 19 |
import requests as http_requests
|
| 20 |
from openai import OpenAI
|
| 21 |
|
| 22 |
+
# All print calls flush stdout immediately so the validator can parse
|
| 23 |
+
# [START]/[STEP]/[END] markers in real time.
|
| 24 |
+
print = functools.partial(print, flush=True)
|
| 25 |
+
|
| 26 |
# === Configuration ===
|
| 27 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 28 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
|
|
|
| 227 |
|
| 228 |
|
| 229 |
def run_episode(task_id: str, scenario_id: str = None) -> float:
|
| 230 |
+
"""Run a single episode and return the final score.
|
| 231 |
+
|
| 232 |
+
Emits ``[START]``, ``[STEP]``, and ``[END]`` markers on stdout for
|
| 233 |
+
the validator to parse.
|
| 234 |
+
"""
|
| 235 |
+
# === [START] marker ===
|
| 236 |
+
print(f"[START] task={task_id}")
|
| 237 |
+
|
| 238 |
reset_body = {"task_id": task_id}
|
| 239 |
if scenario_id:
|
| 240 |
reset_body["scenario_id"] = scenario_id
|
|
|
|
| 279 |
observation = step_data["observation"]
|
| 280 |
done = step_data["done"]
|
| 281 |
step_count += 1
|
| 282 |
+
reward_val = step_data.get("reward", 0.0) or 0.0
|
| 283 |
+
final_score = reward_val
|
| 284 |
+
print(f"[STEP] step={step_count} reward={reward_val}")
|
| 285 |
if done:
|
|
|
|
| 286 |
break
|
| 287 |
|
| 288 |
# Main agent loop
|
|
|
|
| 325 |
|
| 326 |
observation = step_data["observation"]
|
| 327 |
done = step_data["done"]
|
| 328 |
+
reward_val = step_data.get("reward", 0.0) or 0.0
|
| 329 |
+
final_score = reward_val
|
| 330 |
step_count += 1
|
| 331 |
+
print(f"[STEP] step={step_count} reward={reward_val}")
|
| 332 |
|
| 333 |
# Small delay to avoid rate limiting
|
| 334 |
time.sleep(0.3)
|
| 335 |
|
| 336 |
+
# === [END] marker ===
|
| 337 |
+
print(f"[END] task={task_id} score={final_score} steps={step_count}")
|
| 338 |
return final_score
|
| 339 |
|
| 340 |
|