Spaces:
Sleeping
Sleeping
File size: 1,513 Bytes
38ee4ab a038a1e 19b4563 a038a1e 38ee4ab a038a1e 38ee4ab a038a1e 38ee4ab a038a1e 19b4563 a038a1e 38ee4ab 19b4563 38ee4ab 19b4563 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | from typing import List, Optional
def safe_score(raw: float) -> float:
"""Clamp a raw score to the strictly-open interval (0.05, 0.95).
Args:
raw: Unclamped score value.
Returns:
Score guaranteed to be in [0.05, 0.95].
"""
return max(0.05, min(0.95, float(raw)))
def log_start(task: str, env: str, model: str) -> None:
"""Emit the [START] line at episode begin."""
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
"""Emit one [STEP] line immediately after env.step() returns.
Args:
step: 1-based step number.
action: Compact single-line action label (e.g. 'execute_code').
reward: Step reward, formatted to 2 decimal places.
done: Whether the episode ended after this step.
error: Raw error string from the env, or None.
"""
error_val = error.replace("\n", " ") if error else "null"
done_val = str(done).lower()
print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
def log_end(task_id: int, score: float, steps: int) -> None:
"""Emit the [END] line after the episode completes.
Args:
task_id: The task number that just ran.
score: Final clamped score in [0.05, 0.95].
steps: Total number of steps taken.
"""
print(f"[END] task={task_id} score={score:.2f} steps={steps}", flush=True)
|