krishuggingface commited on
Commit
a8f21d0
·
1 Parent(s): 8a428a8

Add LLM frame skip every 10 steps + clamp all scores to strict (0,1)

Browse files
Files changed (1) hide show
  1. inference.py +10 -4
inference.py CHANGED
@@ -358,8 +358,13 @@ def run_episode(task_id: int) -> float:
358
  info = {}
359
 
360
  while not done:
361
- # LLM is primary; circuit breaker auto-disables after first failure
362
- action = llm_agent(obs)
 
 
 
 
 
363
 
364
  step_resp = _session.post(
365
  f"{ENV_URL}/step",
@@ -389,8 +394,9 @@ def run_episode(task_id: int) -> float:
389
  file=sys.stderr, flush=True,
390
  )
391
 
392
- grader_score = info.get("grader_score", 0.0)
393
- success = grader_score > 0.0
 
394
 
395
  except Exception as exc:
396
  print(f"[DEBUG] Episode error: {type(exc).__name__}: {exc}", file=sys.stderr, flush=True)
 
358
  info = {}
359
 
360
  while not done:
361
+ # Frame skipping: call LLM every 10 steps, heuristic in between.
362
+ # This caps LLM calls at ~150 total across 3 tasks, keeping runtime
363
+ # well under the 20-min judging limit even with 3s/call latency.
364
+ if step_count % 10 == 0:
365
+ action = llm_agent(obs)
366
+ else:
367
+ action = heuristic_agent(obs)
368
 
369
  step_resp = _session.post(
370
  f"{ENV_URL}/step",
 
394
  file=sys.stderr, flush=True,
395
  )
396
 
397
+ grader_score = info.get("grader_score", 0.01)
398
+ grader_score = max(0.01, min(0.99, grader_score)) # strict (0, 1)
399
+ success = grader_score > 0.01
400
 
401
  except Exception as exc:
402
  print(f"[DEBUG] Episode error: {type(exc).__name__}: {exc}", file=sys.stderr, flush=True)