Update inference.py
Browse files- inference.py +17 -3
inference.py
CHANGED
|
@@ -233,13 +233,27 @@ def run_episode(client: OpenAI, task_name: str) -> None:
|
|
| 233 |
if done:
|
| 234 |
break
|
| 235 |
|
| 236 |
-
score
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
finally:
|
| 240 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 241 |
|
| 242 |
-
|
| 243 |
# ---------------------------------------------------------------------------
|
| 244 |
# Main
|
| 245 |
# ---------------------------------------------------------------------------
|
|
|
|
| 233 |
if done:
|
| 234 |
break
|
| 235 |
|
| 236 |
+
# Get the raw score from the environment (e.g., 0.0 or 1.0)
|
| 237 |
+
raw_score = env_score()
|
| 238 |
+
|
| 239 |
+
# Apply a small epsilon to keep the score strictly between (0, 1)
|
| 240 |
+
# 0.0 becomes 0.0001 and 1.0 becomes 0.9999
|
| 241 |
+
epsilon = 0.0001
|
| 242 |
+
score = max(epsilon, min(1.0 - epsilon, raw_score))
|
| 243 |
+
|
| 244 |
+
# Determine success using the original raw score logic
|
| 245 |
+
success = raw_score >= 0.8
|
| 246 |
+
# --- FIX ENDS HERE ---
|
| 247 |
+
|
| 248 |
+
except Exception as e:
|
| 249 |
+
print(f"[DEBUG] Episode failed: {e}", flush=True)
|
| 250 |
+
# Even on failure, ensure the final score is valid (not 0.0)
|
| 251 |
+
score = 0.0001
|
| 252 |
+
success = False
|
| 253 |
|
| 254 |
finally:
|
| 255 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 256 |
|
|
|
|
| 257 |
# ---------------------------------------------------------------------------
|
| 258 |
# Main
|
| 259 |
# ---------------------------------------------------------------------------
|