Spaces:

samrat-rm
/

WhyDidItFail

Sleeping

App Files Files Community

samrat-rm commited on 7 days ago

Commit

43c1c2a

1 Parent(s): 61e83f1

fix: remove exception

Browse files

Files changed (1) hide show

inference.py +1 -6

inference.py CHANGED Viewed

@@ -111,7 +111,6 @@ SYSTEM_PROMPT = textwrap.dedent("""
     - Never inspect the same source twice.
 """).strip()
-numbers = []
 def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
@@ -227,7 +226,6 @@ async def run_episode(
             rewards.append(reward)
             data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
             history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n  Data: {data_seen}")
-            numbers.append(f"{reward:.2f}")
             print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
             if done:
@@ -258,7 +256,6 @@ async def run_episode(
     finally:
         steps_taken = len(rewards)
         rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
-        numbers.append(f"{rewards_str}")
         print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
     return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
@@ -285,7 +282,6 @@ async def run_task(task_name: str, scenario_keys: List[str], env: WhyDidItFailEn
     scores = [r["score"] for r in results]
     task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
-    numbers.append(f"{task_score:.2f}")
     print(f"[END] score={task_score:.2f}", flush=True)
     return scores
@@ -301,10 +297,9 @@ async def main() -> None:
         scores += await run_task("task_hard",   HARD_SCENARIOS,   env, client)
         pass  # scoring is handled by the yaml grader, not stdout
     finally:
-        raise Exception(numbers)
         try:
             await env.close()
-        except Exception as e:
             # print(f"  [DEBUG]   env.close() error: {e}", file=sys.stderr, flush=True)
             pass

     - Never inspect the same source twice.
 """).strip()
 def _user_prompt(step: int, obs_summary: str, history: List[str]) -> str:
             rewards.append(reward)
             data_seen = json.dumps(obs.visible_data) if obs.visible_data else "{}"
             history.append(f"Step {step}: {action.action_type} → reward={reward:.2f} | {obs.feedback}\n  Data: {data_seen}")
             print(f"[STEP] step={step} action={action.action_type} reward={reward:.2f} done={str(done).lower()} error=null", flush=True)
             if done:
     finally:
         steps_taken = len(rewards)
         rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.10"
         print(f"[END] success={str(success).lower()} steps={steps_taken} rewards={rewards_str}", flush=True)
     return {"scenario_key": scenario_key, "score": score, "steps": steps_taken, "success": success}, env
     scores = [r["score"] for r in results]
     task_score = round(max(0.10, min(0.90, sum(scores) / len(scores))), 2) if scores else 0.10
     print(f"[END] score={task_score:.2f}", flush=True)
     return scores
         scores += await run_task("task_hard",   HARD_SCENARIOS,   env, client)
         pass  # scoring is handled by the yaml grader, not stdout
     finally:
         try:
             await env.close()
+        except Exception:
             # print(f"  [DEBUG]   env.close() error: {e}", file=sys.stderr, flush=True)
             pass