Spaces:
Sleeping
Sleeping
Commit ·
7dff36b
1
Parent(s): 11dd1d6
Fix rewards never exactly 0.0 or 1.0 using proper normalization
Browse files- inference.py +3 -2
inference.py
CHANGED
|
@@ -228,7 +228,8 @@ def run_episode(client: OpenAI, difficulty: str, task_id: str) -> dict:
|
|
| 228 |
# Score strictly between 0 and 1 exclusive
|
| 229 |
# Score strictly between 0 and 1 exclusive — never 0.0 or 1.0
|
| 230 |
if rewards:
|
| 231 |
-
|
|
|
|
| 232 |
else:
|
| 233 |
raw_score = 0.5
|
| 234 |
|
|
@@ -242,7 +243,7 @@ def run_episode(client: OpenAI, difficulty: str, task_id: str) -> dict:
|
|
| 242 |
|
| 243 |
finally:
|
| 244 |
# Ensure rewards list for log_end is never empty
|
| 245 |
-
safe_rewards = rewards if rewards else [0.5]
|
| 246 |
log_end(
|
| 247 |
success = success,
|
| 248 |
steps = steps,
|
|
|
|
| 228 |
# Score strictly between 0 and 1 exclusive
|
| 229 |
# Score strictly between 0 and 1 exclusive — never 0.0 or 1.0
|
| 230 |
if rewards:
|
| 231 |
+
shifted = [max(0.01, min(0.99, (r + 1.0) / 2.0)) for r in rewards]
|
| 232 |
+
raw_score = sum(shifted) / len(shifted)
|
| 233 |
else:
|
| 234 |
raw_score = 0.5
|
| 235 |
|
|
|
|
| 243 |
|
| 244 |
finally:
|
| 245 |
# Ensure rewards list for log_end is never empty
|
| 246 |
+
safe_rewards = [max(0.01, min(0.99, (r + 1.0) / 2.0)) for r in rewards] if rewards else [0.5]
|
| 247 |
log_end(
|
| 248 |
success = success,
|
| 249 |
steps = steps,
|