Ajayyy00 commited on
Commit
f7f5220
·
1 Parent(s): 533611a

Gracefully handle invalid LLM actions with -0.2 penalty instead of 500 crash

Browse files
Files changed (1) hide show
  1. server/play_environment.py +18 -2
server/play_environment.py CHANGED
@@ -407,8 +407,24 @@ class CyberSOCEnvironment(Environment):
407
  action: SOCActionWrapper,
408
  ) -> SOCObservation:
409
  """Execute one Blue turn."""
410
- # Convert wrapper to typed action
411
- typed_action = action.to_typed_action()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  args = typed_action.model_dump(exclude={"metadata", "type"})
413
 
414
  # Pre-flight validation — penalise without consuming a step
 
407
  action: SOCActionWrapper,
408
  ) -> SOCObservation:
409
  """Execute one Blue turn."""
410
+ # Convert wrapper to typed action — gracefully handle hallucinated
411
+ # action types or wrong parameters from the LLM instead of crashing.
412
+ try:
413
+ typed_action = action.to_typed_action()
414
+ except Exception as exc:
415
+ # Return a negative reward signal so GRPO can learn from the mistake
416
+ penalty = -0.2
417
+ self._state.total_reward += penalty
418
+ self._state.timeline.append({
419
+ "step": self._state.step_count + 1,
420
+ "action_type": getattr(action, "type", "unknown"),
421
+ "target": "N/A",
422
+ "result": f"INVALID_ACTION: {exc}",
423
+ "reward": penalty,
424
+ })
425
+ self._state.step_count += 1
426
+ return self._build_observation(reward=penalty, done=False)
427
+
428
  args = typed_action.model_dump(exclude={"metadata", "type"})
429
 
430
  # Pre-flight validation — penalise without consuming a step