Spaces:
Sleeping
Sleeping
fix: remove exception
Browse files- inference.py +1 -6
inference.py
CHANGED
|
@@ -111,7 +111,6 @@ SYSTEM_PROMPT = textwrap.dedent("""
|
|
| 111 |
- Never inspect the same source twice.
|
| 112 |
""").strip()
|
| 113 |
|
| 114 |
-
numbers = []
|
| 115 |
|
| 116 |
|
| 117 |
def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
|
|
@@ -227,7 +226,6 @@ async def run_episode(
|
|
| 227 |
rewards.append(reward)
|
| 228 |
data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
|
| 229 |
history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n Data: {data_seen}")
|
| 230 |
-
numbers.append(f"{reward:.2f}")
|
| 231 |
print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
|
| 232 |
|
| 233 |
if done:
|
|
@@ -258,7 +256,6 @@ async def run_episode(
|
|
| 258 |
finally:
|
| 259 |
steps_taken = len(rewards)
|
| 260 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
|
| 261 |
-
numbers.append(f"{rewards_str}")
|
| 262 |
print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
|
| 263 |
|
| 264 |
return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
|
|
@@ -285,7 +282,6 @@ async def run_task(task_name: str, scenario_keys: List[str], env: WhyDidItFailEn
|
|
| 285 |
|
| 286 |
scores = [r["score"] for r in results]
|
| 287 |
task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
|
| 288 |
-
numbers.append(f"{task_score:.2f}")
|
| 289 |
print(f"[END] score={task_score:.2f}", flush=True)
|
| 290 |
return scores
|
| 291 |
|
|
@@ -301,10 +297,9 @@ async def main() -> None:
|
|
| 301 |
scores += await run_task("task_hard", HARD_SCENARIOS, env, client)
|
| 302 |
pass # scoring is handled by the yaml grader, not stdout
|
| 303 |
finally:
|
| 304 |
-
raise Exception(numbers)
|
| 305 |
try:
|
| 306 |
await env.close()
|
| 307 |
-
except Exception
|
| 308 |
# print(f" [DEBUG] env.close() error: {e}", file=sys.stderr, flush=True)
|
| 309 |
pass
|
| 310 |
|
|
|
|
| 111 |
- Never inspect the same source twice.
|
| 112 |
""").strip()
|
| 113 |
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
|
|
|
|
| 226 |
rewards.append(reward)
|
| 227 |
data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
|
| 228 |
history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n Data: {data_seen}")
|
|
|
|
| 229 |
print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
|
| 230 |
|
| 231 |
if done:
|
|
|
|
| 256 |
finally:
|
| 257 |
steps_taken = len(rewards)
|
| 258 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
|
|
|
|
| 259 |
print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
|
| 260 |
|
| 261 |
return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
|
|
|
|
| 282 |
|
| 283 |
scores = [r["score"] for r in results]
|
| 284 |
task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
|
|
|
|
| 285 |
print(f"[END] score={task_score:.2f}", flush=True)
|
| 286 |
return scores
|
| 287 |
|
|
|
|
| 297 |
scores += await run_task("task_hard", HARD_SCENARIOS, env, client)
|
| 298 |
pass # scoring is handled by the yaml grader, not stdout
|
| 299 |
finally:
|
|
|
|
| 300 |
try:
|
| 301 |
await env.close()
|
| 302 |
+
except Exception:
|
| 303 |
# print(f" [DEBUG] env.close() error: {e}", file=sys.stderr, flush=True)
|
| 304 |
pass
|
| 305 |
|