"""Secondary reward — faster code scores higher, bounded `[0, 1]`. Scales the sandbox's measured `runtime_ms` against a budget: R = max(0, (budget_ms - runtime_ms) / budget_ms) `runtime_ms = 0` → 1.0 (ideal) `runtime_ms = budget_ms` → 0.0 `runtime_ms > budget_ms` → 0.0 (clamped) Program failures (timeout, OOM, runner error) always return 0.0 — a program that didn't complete has no meaningful runtime to reward. Like `lint_reward`, this is a TIEBREAKER. Weight it small in the composite. """ from __future__ import annotations from ..sandbox.runner import RunResult DEFAULT_BUDGET_MS: int = 5000 def runtime_reward(result: RunResult, *, budget_ms: int = DEFAULT_BUDGET_MS) -> float: """Return reward in `[0, 1]` — faster runtime → higher score.""" if result.timed_out or result.oom or result.error is not None: return 0.0 if budget_ms <= 0: return 0.0 return max(0.0, (budget_ms - result.runtime_ms) / budget_ms)