Spaces:

irregular6612
/

AgentnessBench

Sleeping

File size: 2,379 Bytes

"""template's handover memory is generated by a HIDDEN PERSONA weight vector
(a reference-policy self-play episode), not hand-written — per the agentness
design (docs/agentness_game_design_from_paper.md §3-4). The raw weights never
leak; only the public persona_weight_id + the behaviour trajectory are stored."""
from __future__ import annotations

import proteus.game.scenarios  # noqa: F401
from proteus.game.agents.human import HumanAgent
from proteus.game.engine.difficulty import Difficulty
from proteus.game.scenarios.base import get_scenario
from proteus.game.runtime.memory import MemoryCheckpoint
from proteus.game.runtime.session import SessionRunner


def test_default_memory_is_persona_generated():
    scenario = get_scenario("template")()
    ckpt = scenario.default_memory(42, Difficulty.EASY)
    assert isinstance(ckpt, MemoryCheckpoint)
    assert ckpt.scenario == "template"
    # Hidden persona weight vector -> only the public id is recorded.
    assert ckpt.persona_weight_id == "risk_averse"
    assert ckpt.model == "persona:risk_averse"
    # The memory is the reference policy's actual played trajectory.
    assert len(ckpt.memory_turns) >= 1
    # The raw weights must NOT leak into the participant-visible brief.
    assert "risk_cost" not in ckpt.transparent_prompt
    # Compact frames: prose coords + a wall-rectangle list, never the
    # 4096-char ASCII map of the 64x64 field.
    assert all(len(mt.frame_ascii) < 600 for mt in ckpt.memory_turns)
    # EASY has walls, so the per-turn observation names them.
    assert all("Walls (blocked rectangles" in mt.frame_ascii for mt in ckpt.memory_turns)


def test_default_memory_is_deterministic():
    scenario = get_scenario("template")()
    a = scenario.default_memory(42, Difficulty.EASY)
    b = scenario.default_memory(42, Difficulty.EASY)
    assert a.model_dump() == b.model_dump()


def test_template_session_shows_persona_memory_by_default():
    def feed():
        seq = iter(["stay"] * 5)
        return lambda _p: next(seq)

    agent = HumanAgent(input_fn=feed(), output_fn=lambda _t: None)
    runner = SessionRunner(
        "template", agent, difficulty=Difficulty.EASY, seed=42,
        play_turns=3, use_probe=False,
    )
    trace = runner.run()
    # The turn-1 observation carries the (persona-generated) memory block.
    assert "MEMORY" in trace.turns[0].observation