Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Human-readable multi-episode eval (mean reward / grader). Not the hackathon STDOUT protocol.""" | |
| from __future__ import annotations | |
| import argparse | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| if str(ROOT) not in sys.path: | |
| sys.path.insert(0, str(ROOT)) | |
| from baseline.baseline_agent import BaselineAgent | |
| from env.scam_env import ScamEnv | |
| from tasks.graders import grade_episode | |
| from tasks.task_registry import CANONICAL_TASK_IDS, MAX_STEPS_BY_TASK, TASK_ALIASES | |
| def run_episode(env: ScamEnv, agent: BaselineAgent, seed: int | None) -> tuple[float, float, str, list[str]]: | |
| obs, info = env.reset(seed=seed) | |
| scenario_id = info["scenario_id"] | |
| agent.reset() | |
| total_reward = 0.0 | |
| done = False | |
| while not done: | |
| action = agent.act(obs, env.action_trace) | |
| obs, reward, done, _step_info = env.step(action) | |
| total_reward += reward | |
| score = grade_episode(env.task_id, env.action_trace, scenario_id, env.data_path) | |
| return total_reward, score, scenario_id, list(env.action_trace) | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Baseline benchmark — table output") | |
| _choices = sorted(set(list(CANONICAL_TASK_IDS) + list(TASK_ALIASES.keys()))) | |
| parser.add_argument("--task", choices=_choices, default="easy") | |
| parser.add_argument("--episodes", type=int, default=5) | |
| parser.add_argument("--seed", type=int, default=42) | |
| args = parser.parse_args() | |
| max_steps = MAX_STEPS_BY_TASK[args.task] | |
| env = ScamEnv(task_id=args.task, max_steps=max_steps) | |
| agent = BaselineAgent() | |
| for i in range(args.episodes): | |
| r, s, sid, trace = run_episode(env, agent, seed=args.seed + i) | |
| print(f"episode={i} scenario={sid} reward={r:.3f} grader={s:.3f} actions={trace}") | |
| env.close() | |
| if __name__ == "__main__": | |
| main() | |