#!/usr/bin/env python3 """Run a single ReplicaLab episode locally and dump logs. OBS 07 — Quick local smoke-test script. Resets the environment with a given seed/scenario/difficulty, runs a baseline propose→accept sequence, and writes the episode replay JSON + reward CSV/JSONL to the default output directories. Usage: python -m scripts.run_episode python -m scripts.run_episode --seed 42 --scenario ml_benchmark --difficulty hard python -m scripts.run_episode --max-rounds 3 """ from __future__ import annotations import argparse import sys from pathlib import Path # Ensure project root is importable when run as a script _PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(_PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(_PROJECT_ROOT)) from replicalab.config import DEFAULT_DIFFICULTY, DEFAULT_SCENARIO_TEMPLATE from replicalab.env.replicalab_env import ReplicaLabEnv from replicalab.models import ScientistAction from replicalab.scenarios import generate_scenario from replicalab.utils.logging import log_episode_reward, write_episode_log def _build_propose_action(env: ReplicaLabEnv, seed: int, scenario: str, difficulty: str) -> ScientistAction: """Build a baseline propose_protocol action from the scenario pack.""" pack = generate_scenario(seed=seed, template=scenario, difficulty=difficulty) lab = pack.lab_manager_observation spec = pack.hidden_reference_spec return ScientistAction( action_type="propose_protocol", sample_size=10, controls=["baseline", "ablation"], technique=spec.summary[:60] if spec.summary else "replication_plan", duration_days=max(1, min(2, lab.time_limit_days)), required_equipment=list(lab.equipment_available[:1]) if lab.equipment_available else [], required_reagents=list(lab.reagents_in_stock[:1]) if lab.reagents_in_stock else [], questions=[], rationale=( f"Plan addresses: {', '.join(spec.required_elements[:2])}. " f"Target metric: {spec.target_metric}. " f"Target value: {spec.target_value}. " "Stay within budget and schedule." ), ) def _build_accept_action() -> ScientistAction: return ScientistAction( action_type="accept", sample_size=0, controls=[], technique="", duration_days=0, required_equipment=[], required_reagents=[], questions=[], rationale="", ) def run_episode( seed: int = 0, scenario: str = DEFAULT_SCENARIO_TEMPLATE, difficulty: str = DEFAULT_DIFFICULTY, max_rounds: int | None = None, ) -> None: """Run one episode and persist outputs.""" env = ReplicaLabEnv() obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty) episode_id = env.episode_id() print(f"Episode {episode_id} | seed={seed} scenario={scenario} difficulty={difficulty}") print(f" Paper: {obs.scientist.paper_title}") propose_action = _build_propose_action(env, seed, scenario, difficulty) total_steps = 0 invalid_count = 0 # Step 1: propose result = env.step(propose_action) total_steps += 1 if result.info.error: invalid_count += 1 print(f" Round 1 propose | reward={result.reward:.4f} done={result.done}") if not result.done: # Step 2: accept result = env.step(_build_accept_action()) total_steps += 1 if result.info.error: invalid_count += 1 print(f" Round 2 accept | reward={result.reward:.4f} done={result.done}") state = env.state() info = result.info # Build and persist episode log from replicalab.models import EpisodeLog episode_log = EpisodeLog( episode_id=episode_id, seed=state.seed, scenario_template=state.scenario_template, difficulty=state.difficulty, final_state=state, transcript=list(state.conversation_history), reward_breakdown=info.reward_breakdown, total_reward=state.reward, rounds_used=state.round_number, agreement_reached=info.agreement_reached, judge_notes=info.judge_notes or "", verdict=info.verdict or "", top_failure_reasons=list(info.top_failure_reasons), invalid_action_count=invalid_count, invalid_action_rate=round(invalid_count / total_steps, 6) if total_steps else 0.0, ) replay_path = write_episode_log(episode_log) csv_path, jsonl_path = log_episode_reward( episode_id=episode_id, seed=state.seed, scenario_template=state.scenario_template, difficulty=state.difficulty, total_reward=state.reward, breakdown=info.reward_breakdown, rounds_used=state.round_number, agreement_reached=info.agreement_reached, verdict=info.verdict or "", judge_notes=info.judge_notes or "", ) print(f"\n Verdict: {info.verdict}") print(f" Total reward: {state.reward:.4f}") print(f" Agreement: {info.agreement_reached}") print(f" Invalid actions: {invalid_count}/{total_steps}") print(f"\n Replay JSON: {replay_path}") print(f" Reward CSV: {csv_path}") print(f" Reward JSONL: {jsonl_path}") env.close() def main() -> None: parser = argparse.ArgumentParser(description="Run a single ReplicaLab episode") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--scenario", default=DEFAULT_SCENARIO_TEMPLATE) parser.add_argument("--difficulty", default=DEFAULT_DIFFICULTY) parser.add_argument("--max-rounds", type=int, default=None) args = parser.parse_args() run_episode( seed=args.seed, scenario=args.scenario, difficulty=args.difficulty, max_rounds=args.max_rounds, ) if __name__ == "__main__": main()