Spaces:
Running
Running
File size: 5,859 Bytes
80d8c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | #!/usr/bin/env python3
"""Run a single ReplicaLab episode locally and dump logs.
OBS 07 — Quick local smoke-test script. Resets the environment with a
given seed/scenario/difficulty, runs a baseline propose→accept sequence,
and writes the episode replay JSON + reward CSV/JSONL to the default
output directories.
Usage:
python -m scripts.run_episode
python -m scripts.run_episode --seed 42 --scenario ml_benchmark --difficulty hard
python -m scripts.run_episode --max-rounds 3
"""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
# Ensure project root is importable when run as a script
_PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
from replicalab.config import DEFAULT_DIFFICULTY, DEFAULT_SCENARIO_TEMPLATE
from replicalab.env.replicalab_env import ReplicaLabEnv
from replicalab.models import ScientistAction
from replicalab.scenarios import generate_scenario
from replicalab.utils.logging import log_episode_reward, write_episode_log
def _build_propose_action(env: ReplicaLabEnv, seed: int, scenario: str, difficulty: str) -> ScientistAction:
"""Build a baseline propose_protocol action from the scenario pack."""
pack = generate_scenario(seed=seed, template=scenario, difficulty=difficulty)
lab = pack.lab_manager_observation
spec = pack.hidden_reference_spec
return ScientistAction(
action_type="propose_protocol",
sample_size=10,
controls=["baseline", "ablation"],
technique=spec.summary[:60] if spec.summary else "replication_plan",
duration_days=max(1, min(2, lab.time_limit_days)),
required_equipment=list(lab.equipment_available[:1]) if lab.equipment_available else [],
required_reagents=list(lab.reagents_in_stock[:1]) if lab.reagents_in_stock else [],
questions=[],
rationale=(
f"Plan addresses: {', '.join(spec.required_elements[:2])}. "
f"Target metric: {spec.target_metric}. "
f"Target value: {spec.target_value}. "
"Stay within budget and schedule."
),
)
def _build_accept_action() -> ScientistAction:
return ScientistAction(
action_type="accept",
sample_size=0,
controls=[],
technique="",
duration_days=0,
required_equipment=[],
required_reagents=[],
questions=[],
rationale="",
)
def run_episode(
seed: int = 0,
scenario: str = DEFAULT_SCENARIO_TEMPLATE,
difficulty: str = DEFAULT_DIFFICULTY,
max_rounds: int | None = None,
) -> None:
"""Run one episode and persist outputs."""
env = ReplicaLabEnv()
obs = env.reset(seed=seed, scenario=scenario, difficulty=difficulty)
episode_id = env.episode_id()
print(f"Episode {episode_id} | seed={seed} scenario={scenario} difficulty={difficulty}")
print(f" Paper: {obs.scientist.paper_title}")
propose_action = _build_propose_action(env, seed, scenario, difficulty)
total_steps = 0
invalid_count = 0
# Step 1: propose
result = env.step(propose_action)
total_steps += 1
if result.info.error:
invalid_count += 1
print(f" Round 1 propose | reward={result.reward:.4f} done={result.done}")
if not result.done:
# Step 2: accept
result = env.step(_build_accept_action())
total_steps += 1
if result.info.error:
invalid_count += 1
print(f" Round 2 accept | reward={result.reward:.4f} done={result.done}")
state = env.state()
info = result.info
# Build and persist episode log
from replicalab.models import EpisodeLog
episode_log = EpisodeLog(
episode_id=episode_id,
seed=state.seed,
scenario_template=state.scenario_template,
difficulty=state.difficulty,
final_state=state,
transcript=list(state.conversation_history),
reward_breakdown=info.reward_breakdown,
total_reward=state.reward,
rounds_used=state.round_number,
agreement_reached=info.agreement_reached,
judge_notes=info.judge_notes or "",
verdict=info.verdict or "",
top_failure_reasons=list(info.top_failure_reasons),
invalid_action_count=invalid_count,
invalid_action_rate=round(invalid_count / total_steps, 6) if total_steps else 0.0,
)
replay_path = write_episode_log(episode_log)
csv_path, jsonl_path = log_episode_reward(
episode_id=episode_id,
seed=state.seed,
scenario_template=state.scenario_template,
difficulty=state.difficulty,
total_reward=state.reward,
breakdown=info.reward_breakdown,
rounds_used=state.round_number,
agreement_reached=info.agreement_reached,
verdict=info.verdict or "",
judge_notes=info.judge_notes or "",
)
print(f"\n Verdict: {info.verdict}")
print(f" Total reward: {state.reward:.4f}")
print(f" Agreement: {info.agreement_reached}")
print(f" Invalid actions: {invalid_count}/{total_steps}")
print(f"\n Replay JSON: {replay_path}")
print(f" Reward CSV: {csv_path}")
print(f" Reward JSONL: {jsonl_path}")
env.close()
def main() -> None:
parser = argparse.ArgumentParser(description="Run a single ReplicaLab episode")
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--scenario", default=DEFAULT_SCENARIO_TEMPLATE)
parser.add_argument("--difficulty", default=DEFAULT_DIFFICULTY)
parser.add_argument("--max-rounds", type=int, default=None)
args = parser.parse_args()
run_episode(
seed=args.seed,
scenario=args.scenario,
difficulty=args.difficulty,
max_rounds=args.max_rounds,
)
if __name__ == "__main__":
main()
|