samrat-rm commited on
Commit
43c1c2a
·
1 Parent(s): 61e83f1

fix: remove exception

Browse files
Files changed (1) hide show
  1. inference.py +1 -6
inference.py CHANGED
@@ -111,7 +111,6 @@ SYSTEM_PROMPT = textwrap.dedent("""
111
  - Never inspect the same source twice.
112
  """).strip()
113
 
114
- numbers = []
115
 
116
 
117
  def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
@@ -227,7 +226,6 @@ async def run_episode(
227
  rewards.append(reward)
228
  data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
229
  history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n Data: {data_seen}")
230
- numbers.append(f"{reward:.2f}")
231
  print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
232
 
233
  if done:
@@ -258,7 +256,6 @@ async def run_episode(
258
  finally:
259
  steps_taken = len(rewards)
260
  rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
261
- numbers.append(f"{rewards_str}")
262
  print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
263
 
264
  return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
@@ -285,7 +282,6 @@ async def run_task(task_name: str, scenario_keys: List[str], env: WhyDidItFailEn
285
 
286
  scores = [r["score"] for r in results]
287
  task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
288
- numbers.append(f"{task_score:.2f}")
289
  print(f"[END] score={task_score:.2f}", flush=True)
290
  return scores
291
 
@@ -301,10 +297,9 @@ async def main() -> None:
301
  scores += await run_task("task_hard", HARD_SCENARIOS, env, client)
302
  pass # scoring is handled by the yaml grader, not stdout
303
  finally:
304
- raise Exception(numbers)
305
  try:
306
  await env.close()
307
- except Exception as e:
308
  # print(f" [DEBUG] env.close() error: {e}", file=sys.stderr, flush=True)
309
  pass
310
 
 
111
  - Never inspect the same source twice.
112
  """).strip()
113
 
 
114
 
115
 
116
  def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
 
226
  rewards.append(reward)
227
  data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
228
  history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n Data: {data_seen}")
 
229
  print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
230
 
231
  if done:
 
256
  finally:
257
  steps_taken = len(rewards)
258
  rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
 
259
  print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
260
 
261
  return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
 
282
 
283
  scores = [r["score"] for r in results]
284
  task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
 
285
  print(f"[END] score={task_score:.2f}", flush=True)
286
  return scores
287
 
 
297
  scores += await run_task("task_hard", HARD_SCENARIOS, env, client)
298
  pass # scoring is handled by the yaml grader, not stdout
299
  finally:
 
300
  try:
301
  await env.close()
302
+ except Exception:
303
  # print(f" [DEBUG] env.close() error: {e}", file=sys.stderr, flush=True)
304
  pass
305