teja944 commited on
Commit
d40922a
·
verified ·
1 Parent(s): 0a2221c

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +6 -2
inference.py CHANGED
@@ -92,8 +92,12 @@ def run_inference():
92
  print(f"[STEP] step={step_idx} action={action_str} reward={reward_val:.2f} done={str(done).lower()} error={error_msg}")
93
 
94
  # [END] FORMAT
95
- # A score > 0.8 typically means success based on our grader logic
96
- final_score = sum(rewards_history) if rewards_history else 0.0
 
 
 
 
97
  success = final_score > 0.8
98
  rewards_str = ",".join([f"{r:.2f}" for r in rewards_history])
99
  print(f"[END] success={str(success).lower()} steps={step_idx} score={final_score:.2f} rewards={rewards_str}")
 
92
  print(f"[STEP] step={step_idx} action={action_str} reward={reward_val:.2f} done={str(done).lower()} error={error_msg}")
93
 
94
  # [END] FORMAT
95
+ # Use the last reward (grader's final score) as the task score
96
+ # Clamp strictly between 0.01 and 0.99 to satisfy validator
97
+ if rewards_history:
98
+ final_score = max(0.01, min(0.99, rewards_history[-1]))
99
+ else:
100
+ final_score = 0.01
101
  success = final_score > 0.8
102
  rewards_str = ",".join([f"{r:.2f}" for r in rewards_history])
103
  print(f"[END] success={str(success).lower()} steps={step_idx} score={final_score:.2f} rewards={rewards_str}")