"""Golden: the same action sequence produces an identical SessionTrace whether driven through InteractiveSession (HTTP path) or SessionRunner + a scripted HumanAgent (CLI path). This pins the two paths together so they cannot drift.""" from __future__ import annotations import proteus.game.scenarios # noqa: F401 from proteus.game.agents.human import HumanAgent from proteus.game.engine.difficulty import Difficulty from proteus.game.runtime.interactive import InteractiveSession from proteus.game.runtime.session import SessionRunner ACTIONS = ["up", "up", "left", "stay", "right", "up"] def _scripted_human(): feed = iter(ACTIONS) def input_fn(_prompt: str) -> str: return next(feed) def output_fn(_text: str) -> None: return None return HumanAgent(input_fn=input_fn, output_fn=output_fn) def test_interactive_matches_session_runner(): runner = SessionRunner( "template", _scripted_human(), difficulty=Difficulty.EASY, seed=42, play_turns=len(ACTIONS), use_probe=False, ) cli_trace = runner.run() sess = InteractiveSession( "template", difficulty=Difficulty.EASY, seed=42, play_turns=len(ACTIONS), use_probe=False, ) for a in ACTIONS: if sess.state()["outcome"] is not None: break sess.step(a) web_trace = sess.finish() # Both are human; everything must match field-for-field. assert web_trace.model == cli_trace.model == "human" assert web_trace.cut_frames == cli_trace.cut_frames assert web_trace.outcome == cli_trace.outcome assert web_trace.metrics == cli_trace.metrics assert len(web_trace.turns) == len(cli_trace.turns) for wt, ct in zip(web_trace.turns, cli_trace.turns): assert wt.model_dump() == ct.model_dump() # Full-trace equality is the strongest pin. assert web_trace.model_dump() == cli_trace.model_dump()