File size: 978 Bytes
0dd7c80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | """Secondary reward β faster code scores higher, bounded `[0, 1]`.
Scales the sandbox's measured `runtime_ms` against a budget:
R = max(0, (budget_ms - runtime_ms) / budget_ms)
`runtime_ms = 0` β 1.0 (ideal)
`runtime_ms = budget_ms` β 0.0
`runtime_ms > budget_ms` β 0.0 (clamped)
Program failures (timeout, OOM, runner error) always return 0.0 β a program
that didn't complete has no meaningful runtime to reward.
Like `lint_reward`, this is a TIEBREAKER. Weight it small in the composite.
"""
from __future__ import annotations
from ..sandbox.runner import RunResult
DEFAULT_BUDGET_MS: int = 5000
def runtime_reward(result: RunResult, *, budget_ms: int = DEFAULT_BUDGET_MS) -> float:
"""Return reward in `[0, 1]` β faster runtime β higher score."""
if result.timed_out or result.oom or result.error is not None:
return 0.0
if budget_ms <= 0:
return 0.0
return max(0.0, (budget_ms - result.runtime_ms) / budget_ms)
|