Harden: skip wildcard models, make LLM errors non-fatal per step
Browse files- inference.py +10 -2
inference.py
CHANGED
|
@@ -270,8 +270,16 @@ async def run_episode(client: Optional[OpenAI], env: Optional[PolicyEvolverEnv],
|
|
| 270 |
elif not isinstance(obs_dict, dict):
|
| 271 |
obs_dict = dict(obs_dict)
|
| 272 |
|
| 273 |
-
# Agent decides action
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
agent.action_history.append(action_dict)
|
| 276 |
|
| 277 |
# Validate and step
|
|
|
|
| 270 |
elif not isinstance(obs_dict, dict):
|
| 271 |
obs_dict = dict(obs_dict)
|
| 272 |
|
| 273 |
+
# Agent decides action (graceful failure per step)
|
| 274 |
+
try:
|
| 275 |
+
action_dict = agent.get_action(client, task_id, obs_dict)
|
| 276 |
+
except Exception as e:
|
| 277 |
+
# LLM call failed — log error for this step and move to next task
|
| 278 |
+
print(f"[DEBUG] LLM error on step {step}: {e}", file=sys.stderr)
|
| 279 |
+
log_step(step=step, action="llm_error", reward=0.0, done=True, error=str(e))
|
| 280 |
+
rewards.append(0.0)
|
| 281 |
+
steps_taken = step
|
| 282 |
+
break
|
| 283 |
agent.action_history.append(action_dict)
|
| 284 |
|
| 285 |
# Validate and step
|