Spaces:
Sleeping
Sleeping
havinashpatil commited on
Commit ·
59fd9d3
1
Parent(s): 82e39c9
fix: clamp reward to [0.01,0.99] so .2f never rounds to 0.00 or 1.00
Browse files- inference.py +3 -3
inference.py
CHANGED
|
@@ -107,8 +107,8 @@ def run_task(task_id: str, backend: str):
|
|
| 107 |
if error_msg == "null":
|
| 108 |
error_msg = str(e).replace("\n", " ").replace("\r", "")
|
| 109 |
|
| 110 |
-
# 3e. Clamp it
|
| 111 |
-
reward = max(0.
|
| 112 |
rewards.append(reward)
|
| 113 |
|
| 114 |
# 3f. Print [STEP] line immediately
|
|
@@ -136,7 +136,7 @@ def run_task(task_id: str, backend: str):
|
|
| 136 |
success = any(r > 0.5 for r in rewards)
|
| 137 |
success_str = "true" if success else "false"
|
| 138 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards])
|
| 139 |
-
score = max(0.
|
| 140 |
print(f"[END] success={success_str} steps={step} score={score:.2f} rewards={rewards_str}")
|
| 141 |
|
| 142 |
def main():
|
|
|
|
| 107 |
if error_msg == "null":
|
| 108 |
error_msg = str(e).replace("\n", " ").replace("\r", "")
|
| 109 |
|
| 110 |
+
# 3e. Clamp it — bounds chosen so :.2f never rounds to 0.00 or 1.00
|
| 111 |
+
reward = max(0.01, min(0.99, float(raw_reward)))
|
| 112 |
rewards.append(reward)
|
| 113 |
|
| 114 |
# 3f. Print [STEP] line immediately
|
|
|
|
| 136 |
success = any(r > 0.5 for r in rewards)
|
| 137 |
success_str = "true" if success else "false"
|
| 138 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards])
|
| 139 |
+
score = max(0.01, min(0.99, (sum(rewards) / len(rewards)) if rewards else 0.5))
|
| 140 |
print(f"[END] success={success_str} steps={step} score={score:.2f} rewards={rewards_str}")
|
| 141 |
|
| 142 |
def main():
|