Fix Phase 2: Add [START]/[STEP]/[END] structured stdout blocks for validator
Browse files- inference.py +5 -0
inference.py
CHANGED
|
@@ -187,6 +187,7 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
|
|
| 187 |
env = DevOpsIncidentEnv(task_id=task_id, seed=seed)
|
| 188 |
obs = env.reset()
|
| 189 |
|
|
|
|
| 190 |
print(f"\n{'━'*64}")
|
| 191 |
print(f" Task: {task_id.upper()} | Seed: {seed} | Model: {MODEL_NAME}")
|
| 192 |
print(f"{'━'*64}")
|
|
@@ -262,6 +263,8 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
|
|
| 262 |
resolution_str = f" *** {result.info.get('resolution', '')} ***" if result.done and result.info.get("resolution") else ""
|
| 263 |
print(f" Step {step:02d} reasoning: {reasoning[:100]}...")
|
| 264 |
print(f" Step {step:02d} action: {action_label}{reward_str}{resolution_str}")
|
|
|
|
|
|
|
| 265 |
|
| 266 |
if obs.last_action_error:
|
| 267 |
print(f" ⚠ {obs.last_action_error[:80]}")
|
|
@@ -283,6 +286,8 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
|
|
| 283 |
print(f" Steps taken : {step}")
|
| 284 |
print(f" Rewards : {[e['reward'] for e in state.action_history if e['reward'] != 0]}")
|
| 285 |
print(f" Final score : {final_score:.4f}")
|
|
|
|
|
|
|
| 286 |
|
| 287 |
return {
|
| 288 |
"task_id": task_id,
|
|
|
|
| 187 |
env = DevOpsIncidentEnv(task_id=task_id, seed=seed)
|
| 188 |
obs = env.reset()
|
| 189 |
|
| 190 |
+
print(f"[START] task={task_id} seed={seed} model={MODEL_NAME}", flush=True)
|
| 191 |
print(f"\n{'━'*64}")
|
| 192 |
print(f" Task: {task_id.upper()} | Seed: {seed} | Model: {MODEL_NAME}")
|
| 193 |
print(f"{'━'*64}")
|
|
|
|
| 263 |
resolution_str = f" *** {result.info.get('resolution', '')} ***" if result.done and result.info.get("resolution") else ""
|
| 264 |
print(f" Step {step:02d} reasoning: {reasoning[:100]}...")
|
| 265 |
print(f" Step {step:02d} action: {action_label}{reward_str}{resolution_str}")
|
| 266 |
+
# Structured output required by Phase 2 validator
|
| 267 |
+
print(f"[STEP] task={task_id} step={step} action={action.action_type.value} reward={result.reward:.4f}", flush=True)
|
| 268 |
|
| 269 |
if obs.last_action_error:
|
| 270 |
print(f" ⚠ {obs.last_action_error[:80]}")
|
|
|
|
| 286 |
print(f" Steps taken : {step}")
|
| 287 |
print(f" Rewards : {[e['reward'] for e in state.action_history if e['reward'] != 0]}")
|
| 288 |
print(f" Final score : {final_score:.4f}")
|
| 289 |
+
# Structured output required by Phase 2 validator
|
| 290 |
+
print(f"[END] task={task_id} score={final_score:.4f} steps={step} resolved={state.incident_resolved}", flush=True)
|
| 291 |
|
| 292 |
return {
|
| 293 |
"task_id": task_id,
|