Update inference.py
Browse files- inference.py +6 -2
inference.py
CHANGED
|
@@ -92,8 +92,12 @@ def run_inference():
|
|
| 92 |
print(f"[STEP] step={step_idx} action={action_str} reward={reward_val:.2f} done={str(done).lower()} error={error_msg}")
|
| 93 |
|
| 94 |
# [END] FORMAT
|
| 95 |
-
#
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
success = final_score > 0.8
|
| 98 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards_history])
|
| 99 |
print(f"[END] success={str(success).lower()} steps={step_idx} score={final_score:.2f} rewards={rewards_str}")
|
|
|
|
| 92 |
print(f"[STEP] step={step_idx} action={action_str} reward={reward_val:.2f} done={str(done).lower()} error={error_msg}")
|
| 93 |
|
| 94 |
# [END] FORMAT
|
| 95 |
+
# Use the last reward (grader's final score) as the task score
|
| 96 |
+
# Clamp strictly between 0.01 and 0.99 to satisfy validator
|
| 97 |
+
if rewards_history:
|
| 98 |
+
final_score = max(0.01, min(0.99, rewards_history[-1]))
|
| 99 |
+
else:
|
| 100 |
+
final_score = 0.01
|
| 101 |
success = final_score > 0.8
|
| 102 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards_history])
|
| 103 |
print(f"[END] success={str(success).lower()} steps={step_idx} score={final_score:.2f} rewards={rewards_str}")
|