Clamp scores strictly between 0.01 and 0.99 for Scaler grader
Browse files- inference.py +10 -6
inference.py
CHANGED
|
@@ -85,7 +85,8 @@ def main() -> None:
|
|
| 85 |
res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
|
| 86 |
if res.status_code != 200:
|
| 87 |
log_step(step=1, action="reset_failed", reward=0.0, done=True, error=f"HTTP {res.status_code}")
|
| 88 |
-
|
|
|
|
| 89 |
continue
|
| 90 |
|
| 91 |
data = res.json()
|
|
@@ -111,15 +112,18 @@ def main() -> None:
|
|
| 111 |
rewards.append(reward)
|
| 112 |
log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=None)
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
score
|
| 116 |
-
|
|
|
|
|
|
|
| 117 |
|
| 118 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 119 |
|
| 120 |
except Exception as e:
|
| 121 |
log_step(step=steps_taken+1, action="exception", reward=0.0, done=True, error=str(e).replace("\n", " "))
|
| 122 |
-
|
|
|
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
| 125 |
-
main()
|
|
|
|
| 85 |
res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
|
| 86 |
if res.status_code != 200:
|
| 87 |
log_step(step=1, action="reset_failed", reward=0.0, done=True, error=f"HTTP {res.status_code}")
|
| 88 |
+
# Forced score to 0.01 instead of 0.0
|
| 89 |
+
log_end(success=False, steps=0, score=0.01, rewards=[])
|
| 90 |
continue
|
| 91 |
|
| 92 |
data = res.json()
|
|
|
|
| 112 |
rewards.append(reward)
|
| 113 |
log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=None)
|
| 114 |
|
| 115 |
+
# --- THE FIX IS HERE ---
|
| 116 |
+
# Calculate final score and forcefully clamp it strictly between 0.01 and 0.99
|
| 117 |
+
raw_score = sum(rewards)
|
| 118 |
+
score = min(max(raw_score, 0.01), 0.99)
|
| 119 |
+
success = score > 0.01
|
| 120 |
|
| 121 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
log_step(step=steps_taken+1, action="exception", reward=0.0, done=True, error=str(e).replace("\n", " "))
|
| 125 |
+
# Forced score to 0.01 instead of 0.0
|
| 126 |
+
log_end(success=False, steps=steps_taken, score=0.01, rewards=rewards)
|
| 127 |
|
| 128 |
if __name__ == "__main__":
|
| 129 |
+
main()
|