Spaces:
Sleeping
Sleeping
Commit ·
a8f21d0
1
Parent(s): 8a428a8
Add LLM frame skip every 10 steps + clamp all scores to strict (0,1)
Browse files- inference.py +10 -4
inference.py
CHANGED
|
@@ -358,8 +358,13 @@ def run_episode(task_id: int) -> float:
|
|
| 358 |
info = {}
|
| 359 |
|
| 360 |
while not done:
|
| 361 |
-
#
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
step_resp = _session.post(
|
| 365 |
f"{ENV_URL}/step",
|
|
@@ -389,8 +394,9 @@ def run_episode(task_id: int) -> float:
|
|
| 389 |
file=sys.stderr, flush=True,
|
| 390 |
)
|
| 391 |
|
| 392 |
-
grader_score = info.get("grader_score", 0.
|
| 393 |
-
|
|
|
|
| 394 |
|
| 395 |
except Exception as exc:
|
| 396 |
print(f"[DEBUG] Episode error: {type(exc).__name__}: {exc}", file=sys.stderr, flush=True)
|
|
|
|
| 358 |
info = {}
|
| 359 |
|
| 360 |
while not done:
|
| 361 |
+
# Frame skipping: call LLM every 10 steps, heuristic in between.
|
| 362 |
+
# This caps LLM calls at ~150 total across 3 tasks, keeping runtime
|
| 363 |
+
# well under the 20-min judging limit even with 3s/call latency.
|
| 364 |
+
if step_count % 10 == 0:
|
| 365 |
+
action = llm_agent(obs)
|
| 366 |
+
else:
|
| 367 |
+
action = heuristic_agent(obs)
|
| 368 |
|
| 369 |
step_resp = _session.post(
|
| 370 |
f"{ENV_URL}/step",
|
|
|
|
| 394 |
file=sys.stderr, flush=True,
|
| 395 |
)
|
| 396 |
|
| 397 |
+
grader_score = info.get("grader_score", 0.01)
|
| 398 |
+
grader_score = max(0.01, min(0.99, grader_score)) # strict (0, 1)
|
| 399 |
+
success = grader_score > 0.01
|
| 400 |
|
| 401 |
except Exception as exc:
|
| 402 |
print(f"[DEBUG] Episode error: {type(exc).__name__}: {exc}", file=sys.stderr, flush=True)
|