Spaces:

irregular6612
/

AgentnessBench

Sleeping

File size: 3,712 Bytes

8cccda6
426093b
 
 
8cccda6
 
 
 
 
 
 
93cd78f
 
 
8cccda6
 
93cd78f
8cccda6
 
 
93cd78f
8cccda6
 
 
 
 
 
 
 
 
 
 
 
93cd78f
8cccda6
 
 
 
 
 
 
93cd78f
8cccda6
93cd78f
8cccda6
 
 
 
81b3758
 
 
 
 
 
 
93cd78f
81b3758
 
 
 
 
1debdd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81b3758
93cd78f
 
 
81b3758
 
93cd78f
81b3758
93cd78f

from proteus.providers import FakeProvider
from proteus.game.agents import VanillaAgent
from proteus.game.runtime.session import SessionRunner
from proteus.game.runtime.trace import SessionTrace


def _agent(responses):
    return VanillaAgent(FakeProvider(responses=responses))


def test_optimal_player_survives_and_scores_full_motive_reading():
    # At the start the motive-congruent escape is "up" (open column away from
    # the far-east predator). An agent that always plays "up" stays congruent;
    # the runner scores each turn against the live optimal answer key.
    agent = _agent(["ACTION: up"])  # FakeProvider repeats the last response
    runner = SessionRunner(
        "template", agent, seed=42, play_turns=10, use_probe=False,
    )
    trace = runner.run()
    assert isinstance(trace, SessionTrace)
    assert trace.scenario == "template"
    assert trace.cut_frames  # Cut history captured
    assert len(trace.turns) >= 1
    first = trace.turns[0]
    assert first.motive_action == "up"
    assert first.action == "up"
    assert first.was_congruent is True
    assert "motive_reading_accuracy" in trace.metrics


def test_probe_recorded_when_enabled():
    agent = _agent(["the predator is to my east; I should go up\nACTION: up"])
    runner = SessionRunner(
        "template", agent, seed=42, play_turns=3, use_probe=True,
    )
    trace = runner.run()
    assert trace.turns[0].probe_q  # a question was asked
    assert trace.turns[0].probe_a  # an answer was recorded


def test_session_is_deterministic_for_same_inputs():
    t1 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
                       play_turns=5, use_probe=False).run()
    t2 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
                       play_turns=5, use_probe=False).run()
    # Same scripted agent + same seed -> identical realized trajectory.
    assert [t.focal_pos for t in t1.turns] == [t.focal_pos for t in t2.turns]
    assert t1.metrics == t2.metrics


def test_short_budget_yields_survived_outcome():
    # With a tiny budget the step count is exhausted (without capture) right
    # after the played turns, so the engine fires `survived`.
    agent = _agent(["ACTION: up"])
    trace = SessionRunner(
        "template", agent, seed=42, play_turns=1, use_probe=False,
    ).run()
    assert trace.outcome == "survived"
    assert trace.turns[-1].reward == 50.0  # _REWARD_SURVIVED


def test_eliminated_outcome_is_explicit_and_terminal():
    # Generic property: the engine can drive a focal into capture and the
    # outcome is the explicit, terminal "eliminated". On template the predator
    # waits far to the east, so an agent that always charges "right" walks into
    # it; the session must end on capture (before the budget is spent) and pay
    # the capture penalty.
    agent = _agent(["ACTION: right"])
    trace = SessionRunner(
        "template", agent, seed=0, play_turns=40, use_probe=False,
    ).run()
    assert trace.outcome == "eliminated"
    # Terminal: the run stopped on elimination rather than exhausting the budget.
    assert len(trace.turns) < 40
    assert trace.turns[-1].reward == -50.0  # _REWARD_CAPTURED


def test_cut_frames_count_matches_cut_length_plus_one():
    from proteus.game.engine.difficulty import Difficulty
    from proteus.game.scenarios.base import get_scenario

    agent = _agent(["ACTION: up"])
    trace = SessionRunner(
        "template", agent, seed=42, play_turns=5, use_probe=False,
    ).run()
    # initial frame + one frame per Cut pre-roll step (self-derived, not hardcoded).
    expected = get_scenario("template")().cut_length(Difficulty.EASY) + 1
    assert len(trace.cut_frames) == expected