Spaces:
Sleeping
Sleeping
File size: 1,285 Bytes
426093b 4d98bbb 426093b 4d98bbb 93cd78f 4d98bbb 93cd78f 4d98bbb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | from proteus.game.agents import HumanAgent, VanillaAgent
from proteus.providers import FakeProvider
from proteus.game.runtime.session import SessionRunner
def _scripted(seq):
it = iter(seq)
return lambda prompt="": next(it)
def test_human_and_llm_traces_share_schema_and_answer_keys():
# Both players commit "up" every turn under the same deterministic world,
# so cut frames and per-turn answer keys must be identical; only `model`
# differs. This is the human-baseline comparability foundation (spec §10).
human = HumanAgent(input_fn=_scripted(["up"] * 20), output_fn=lambda s: None)
h = SessionRunner(
"template", human, seed=42, play_turns=5, use_probe=False,
).run()
llm = VanillaAgent(FakeProvider(["ACTION: up"]))
v = SessionRunner(
"template", llm, seed=42, play_turns=5, use_probe=False,
).run()
assert h.cut_frames == v.cut_frames
assert [t.action for t in h.turns] == [t.action for t in v.turns]
assert [t.motive_action for t in h.turns] == [t.motive_action for t in v.turns]
assert [t.habit_action for t in h.turns] == [t.habit_action for t in v.turns]
assert h.outcome == v.outcome
assert set(h.metrics) == set(v.metrics)
assert h.model == "human"
assert v.model == "fake"
|