from __future__ import annotations import json from pathlib import Path from runway_zero.baselines import FifoPolicy, RandomPolicy, RecoveryPolicy, rollout from runway_zero.qlearning import TrainedRLPolicy def main() -> None: trace_dir = Path("results/traces") trace_dir.mkdir(parents=True, exist_ok=True) summaries = [] for stage in [1, 2, 3]: trained_path = Path(f"results/trained/q_policy_stage{stage}.json") policies = [RandomPolicy(2), FifoPolicy(), RecoveryPolicy()] if trained_path.exists(): policies.append(TrainedRLPolicy.from_file(trained_path)) for policy in policies: result = rollout(policy, stage=stage, seed=7) payload = { "title": "Runway Zero Demo Replay", "policy": result["policy"], "stage": result["stage"], "seed": result["seed"], "summary": result["metrics"], "total_reward": result["total_reward"], "frames": result["history"], } name = f"{result['policy']}_stage{stage}_seed7.json" (trace_dir / name).write_text(json.dumps(payload, indent=2), encoding="utf-8") summaries.append( { "trace": name, "policy": result["policy"], "stage": stage, "total_reward": result["total_reward"], **result["metrics"], } ) (trace_dir / "trace_manifest.json").write_text(json.dumps(summaries, indent=2), encoding="utf-8") print(f"Wrote traces to {trace_dir}") if __name__ == "__main__": main()