AgentnessBench / tests /runtime /test_template_memory.py
irregular6612's picture
refactor(scenario): rename pack_evade -> template
d4716c0
Raw
History Blame Contribute Delete
2.38 kB
"""template's handover memory is generated by a HIDDEN PERSONA weight vector
(a reference-policy self-play episode), not hand-written — per the agentness
design (docs/agentness_game_design_from_paper.md §3-4). The raw weights never
leak; only the public persona_weight_id + the behaviour trajectory are stored."""
from __future__ import annotations
import proteus.game.scenarios # noqa: F401
from proteus.game.agents.human import HumanAgent
from proteus.game.engine.difficulty import Difficulty
from proteus.game.scenarios.base import get_scenario
from proteus.game.runtime.memory import MemoryCheckpoint
from proteus.game.runtime.session import SessionRunner
def test_default_memory_is_persona_generated():
scenario = get_scenario("template")()
ckpt = scenario.default_memory(42, Difficulty.EASY)
assert isinstance(ckpt, MemoryCheckpoint)
assert ckpt.scenario == "template"
# Hidden persona weight vector -> only the public id is recorded.
assert ckpt.persona_weight_id == "risk_averse"
assert ckpt.model == "persona:risk_averse"
# The memory is the reference policy's actual played trajectory.
assert len(ckpt.memory_turns) >= 1
# The raw weights must NOT leak into the participant-visible brief.
assert "risk_cost" not in ckpt.transparent_prompt
# Compact frames: prose coords + a wall-rectangle list, never the
# 4096-char ASCII map of the 64x64 field.
assert all(len(mt.frame_ascii) < 600 for mt in ckpt.memory_turns)
# EASY has walls, so the per-turn observation names them.
assert all("Walls (blocked rectangles" in mt.frame_ascii for mt in ckpt.memory_turns)
def test_default_memory_is_deterministic():
scenario = get_scenario("template")()
a = scenario.default_memory(42, Difficulty.EASY)
b = scenario.default_memory(42, Difficulty.EASY)
assert a.model_dump() == b.model_dump()
def test_template_session_shows_persona_memory_by_default():
def feed():
seq = iter(["stay"] * 5)
return lambda _p: next(seq)
agent = HumanAgent(input_fn=feed(), output_fn=lambda _t: None)
runner = SessionRunner(
"template", agent, difficulty=Difficulty.EASY, seed=42,
play_turns=3, use_probe=False,
)
trace = runner.run()
# The turn-1 observation carries the (persona-generated) memory block.
assert "MEMORY" in trace.turns[0].observation