Spaces:
Sleeping
Sleeping
| # tests/runtime/test_errand_discovery.py | |
| """Find-your-body discovery: trace fields, SELF parsing in make_turn_trace, | |
| available-action sourcing, and the Discovery metric.""" | |
| import random | |
| from proteus.game.engine.difficulty import Difficulty | |
| from proteus.game.engine.grid import MotiveGridGame | |
| from proteus.game.scenarios.base import get_scenario | |
| import proteus.game.scenarios # noqa: F401 | |
| from proteus.game.runtime.trace import TurnTrace | |
| def _errand(): | |
| scen = get_scenario("errand_runner")() | |
| game = MotiveGridGame(scen, random.Random(7), Difficulty.EASY, max_steps=80) | |
| return scen, game | |
| def test_turntrace_discovery_fields_default_none(): | |
| t = TurnTrace(turn_idx=1, observation="o", action="stay", motive_action="stay", | |
| habit_action="stay", is_diagnostic=False, was_congruent=True, | |
| reward=0.0, focal_pos=(0, 0), predator_pos=(0, 0)) | |
| assert t.self_belief is None and t.self_correct is None | |
| def test_errand_exposes_discovery_hooks(): | |
| scen, _ = _errand() | |
| assert scen.discovery_candidates() == 3 | |
| assert scen.discovery_true_index() == scen.true_body_index | |
| def test_template_has_no_discovery(): | |
| scen = get_scenario("template")() | |
| assert scen.discovery_candidates() == 0 | |
| assert scen.discovery_true_index() is None | |
| from proteus.game.runtime import _session_core as core | |
| def test_make_turn_trace_parses_correct_self_report(): | |
| scen, game = _errand() | |
| true = scen.true_body_index | |
| tt = core.make_turn_trace( | |
| scen, game, turn_idx=1, observation="obs", | |
| action="stay", raw_text=f"reasoning...\nSELF: {true}\nACTION: stay", | |
| ) | |
| assert tt.self_belief == true | |
| assert tt.self_correct is True | |
| def test_make_turn_trace_marks_wrong_self_report(): | |
| scen, game = _errand() | |
| wrong = (scen.true_body_index + 1) % 3 | |
| tt = core.make_turn_trace( | |
| scen, game, turn_idx=1, observation="obs", | |
| action="stay", raw_text=f"SELF: {wrong}\nACTION: stay", | |
| ) | |
| assert tt.self_belief == wrong and tt.self_correct is False | |
| def test_make_turn_trace_no_self_report_is_none(): | |
| scen, game = _errand() | |
| tt = core.make_turn_trace(scen, game, turn_idx=1, observation="o", | |
| action="stay", raw_text="ACTION: stay") | |
| assert tt.self_belief is None and tt.self_correct is None | |
| def test_observation_lists_scenario_action_set(): | |
| scen, game = _errand() | |
| obs = core.build_observation(scen, game, cut_frames=[], turn_idx=1) | |
| assert "interact" in obs # errand_runner exposes interact in Available actions | |
| # template stays at the 5 movement actions (regression) | |
| t = get_scenario("template")() | |
| tgame = MotiveGridGame(t, random.Random(0), Difficulty.EASY, max_steps=10) | |
| tobs = core.build_observation(t, tgame, cut_frames=[], turn_idx=1) | |
| assert "interact" not in tobs | |
| def test_interactive_accepts_interact_for_errand(): | |
| from proteus.game.runtime.interactive import InteractiveSession | |
| s = InteractiveSession("errand_runner", seed=7, play_turns=5, use_default_memory=False) | |
| assert "interact" in s.state()["actions"] | |
| s.step("interact") # must not raise "invalid action" | |
| def test_full_session_runs_and_emits_discovery_metric(): | |
| """End-to-end SessionRunner with a fake provider that always reports the | |
| correct body index; confirms discovery_turn and discovery_identified are | |
| emitted on the resulting SessionTrace.metrics.""" | |
| from proteus.providers.fake import FakeProvider # deterministic, no network | |
| from proteus.game.agents.vanilla import VanillaAgent | |
| from proteus.game.runtime.session import SessionRunner | |
| # Confirm the true_body_index for seed 7 (standalone and runner use same RNG). | |
| scen = get_scenario("errand_runner")() | |
| MotiveGridGame(scen, random.Random(7), Difficulty.EASY, max_steps=10) | |
| true = scen.true_body_index # == 1 for seed 7 | |
| # FakeProvider.responses repeats last entry once exhausted, so a single | |
| # response is sufficient for any play_turns count. | |
| provider = FakeProvider(responses=[f"SELF: {true}\nACTION: stay"]) | |
| agent = VanillaAgent(provider) | |
| runner = SessionRunner( | |
| "errand_runner", agent, seed=7, play_turns=4, | |
| use_probe=False, motive_category="errand", | |
| ) | |
| trace = runner.run() | |
| assert trace.scenario == "errand_runner" | |
| assert "discovery_turn" in trace.metrics, ( | |
| f"discovery_turn missing from metrics: {trace.metrics}" | |
| ) | |
| # Every turn reported the correct body index -> identified at turn 1. | |
| assert trace.metrics["discovery_identified"] == 100.0, ( | |
| f"Expected 100.0 but got {trace.metrics['discovery_identified']}" | |
| ) | |
| def test_errand_has_no_move_limit(): | |
| # With no turn limit, exhausting play_turns does NOT end the errand session. | |
| from proteus.game.runtime.interactive import InteractiveSession | |
| s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False) | |
| for _ in range(6): # well past play_turns=2 | |
| s.step("stay") | |
| st = s.state() | |
| assert st["phase"] != "done" and st["review"] is None # still playing, no curfew | |
| assert st["turns_left"] is None and st["play_turns"] is None # unlimited -> no countdown | |
| def test_reaching_house_triggers_analysis(): | |
| # The analysis (review) appears when the focal reaches the house goal. | |
| from proteus.game.runtime.interactive import InteractiveSession | |
| from proteus.game.scenarios import errand_world as w | |
| s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False) | |
| s._game.focal_sprite.set_position(*w.home_target_anchor(w.GAME_LAYOUT)) | |
| s.step("stay") # engine sees check_success -> win -> done | |
| st = s.state() | |
| assert st["outcome"] == "survived" and st["review"] is not None | |
| assert "errand" in st["review"] | |
| def test_review_has_errand_summary_when_done(): | |
| from proteus.game.runtime.interactive import InteractiveSession | |
| from proteus.game.scenarios import errand_world as w | |
| s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False) | |
| s._game.focal_sprite.set_position(*w.home_target_anchor(w.GAME_LAYOUT)) | |
| s.step("stay") # reach the house -> done | |
| rv = s.state()["review"] | |
| assert rv is not None and "errand" in rv | |
| es = rv["errand"] | |
| assert set(es["events"]) == {"crosswalk", "construction", "wallet", "pedestrian", "grass"} | |
| assert es["events"]["grass"] in {"cut", "avoid"} | |
| assert isinstance(es["grass_steps"], int) | |
| assert isinstance(es["touched_pedestrian"], bool) | |
| assert "grass" not in es["approximate"] # grass is precisely tracked, not approximate | |
| assert es["closest_persona"] in {"civic", "warm_outlaw", "opportunist"} | |
| assert {"discovery", "generalizability", "coherence"} <= set(es["headline"]) | |