Arijit-07 commited on
Commit
e16d919
·
1 Parent(s): ae87d2d

Fix Phase 2: Add [START]/[STEP]/[END] structured stdout blocks for validator

Browse files
Files changed (1) hide show
  1. inference.py +5 -0
inference.py CHANGED
@@ -187,6 +187,7 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
187
  env = DevOpsIncidentEnv(task_id=task_id, seed=seed)
188
  obs = env.reset()
189
 
 
190
  print(f"\n{'━'*64}")
191
  print(f" Task: {task_id.upper()} | Seed: {seed} | Model: {MODEL_NAME}")
192
  print(f"{'━'*64}")
@@ -262,6 +263,8 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
262
  resolution_str = f" *** {result.info.get('resolution', '')} ***" if result.done and result.info.get("resolution") else ""
263
  print(f" Step {step:02d} reasoning: {reasoning[:100]}...")
264
  print(f" Step {step:02d} action: {action_label}{reward_str}{resolution_str}")
 
 
265
 
266
  if obs.last_action_error:
267
  print(f" ⚠ {obs.last_action_error[:80]}")
@@ -283,6 +286,8 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
283
  print(f" Steps taken : {step}")
284
  print(f" Rewards : {[e['reward'] for e in state.action_history if e['reward'] != 0]}")
285
  print(f" Final score : {final_score:.4f}")
 
 
286
 
287
  return {
288
  "task_id": task_id,
 
187
  env = DevOpsIncidentEnv(task_id=task_id, seed=seed)
188
  obs = env.reset()
189
 
190
+ print(f"[START] task={task_id} seed={seed} model={MODEL_NAME}", flush=True)
191
  print(f"\n{'━'*64}")
192
  print(f" Task: {task_id.upper()} | Seed: {seed} | Model: {MODEL_NAME}")
193
  print(f"{'━'*64}")
 
263
  resolution_str = f" *** {result.info.get('resolution', '')} ***" if result.done and result.info.get("resolution") else ""
264
  print(f" Step {step:02d} reasoning: {reasoning[:100]}...")
265
  print(f" Step {step:02d} action: {action_label}{reward_str}{resolution_str}")
266
+ # Structured output required by Phase 2 validator
267
+ print(f"[STEP] task={task_id} step={step} action={action.action_type.value} reward={result.reward:.4f}", flush=True)
268
 
269
  if obs.last_action_error:
270
  print(f" ⚠ {obs.last_action_error[:80]}")
 
286
  print(f" Steps taken : {step}")
287
  print(f" Rewards : {[e['reward'] for e in state.action_history if e['reward'] != 0]}")
288
  print(f" Final score : {final_score:.4f}")
289
+ # Structured output required by Phase 2 validator
290
+ print(f"[END] task={task_id} score={final_score:.4f} steps={step} resolved={state.incident_resolved}", flush=True)
291
 
292
  return {
293
  "task_id": task_id,