Spaces:

irregular6612
/

AgentnessBench

Sleeping

irregular6612 commited on 29 days ago

Commit

ef62102

1 Parent(s): e5283e9

feat(errand): handover memory carries courier position + behaviour flags on the text path

The LLM only ever sees a MemoryTurn's frame_ascii (via render_memory_block),
but errand_runner left it as a bare "errand t{tick}" placeholder — so the text
handover memory had zero spatial grounding and the memory_brief promise to
"watch how you handled the crosswalk/wallet/pedestrian" was empty (the rich
visual state lived only in agents/cells, used by the web colour replay).

Populate frame_ascii with a compact one-line caption: the courier's position
plus the salient decision flags where the persona's behaviour diverges
(crosswalk-RED/GREEN, construction, grass-shortcut, by-fallen-pedestrian), and
back-annotate the pickup turn with grabbed-wallet (the frame is recorded
pre-move, so a footprint flag could never catch the wallet step). Full per-turn
ASCII maps would blow the prompt (routes run ~80-124 turns); coords convey
trajectory shape while flags carry the transferable, layout-independent lesson.

render_memory_block (shared) and the web path (memory_frames reads agents, not
frame_ascii) are untouched — no effect on predator scenarios or the animation.

Files changed (2) hide show

proteus/game/runtime/multiagent_director.py +39 -1
tests/runtime/test_director_errand_runner.py +19 -0

proteus/game/runtime/multiagent_director.py CHANGED Viewed

@@ -409,6 +409,38 @@ def author_take_a_seat(
 # --------------------------------------------------------------------------- #
 # errand_runner director (visual-only city errand; single-agent persona demo)
 # --------------------------------------------------------------------------- #
 def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
                          wallet_present: bool, tick: int, action: str,
                          events: list[str], idx: int) -> MemoryTurn:
@@ -419,7 +451,9 @@ def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
         id="pedestrian", kind=("npc_active" if ped_rescued else "npc_down"),
         pos=(ped.x, ped.y), size=ped.size))
     return MemoryTurn(
-        turn_idx=idx, frame_ascii=f"errand t{tick}", action=action,
         focal_pos=(0, 0), predator_pos=(0, 0), agents=frames,
         cells=w.overlay_cells(w.MEMORY_LAYOUT, tick, wallet_present=wallet_present),
         resources=[], events=list(events))
@@ -495,6 +529,10 @@ def author_errand_runner(*, seed: int, persona: str | None = None,
                 chosen.x, chosen.y = cand
         if wallet_present and (lay.wallet in set(w.footprint((chosen.x, chosen.y), chosen.size))):
             wallet_present = False
         if ped_rescued:                              # rescued pedestrian drifts near origin
             order = ["up", "down", "left", "right"]
             order = order[t % 4:] + order[:t % 4]

 # --------------------------------------------------------------------------- #
 # errand_runner director (visual-only city errand; single-agent persona demo)
 # --------------------------------------------------------------------------- #
+def _errand_memory_caption(chosen: _Agent, ped: _Agent, ped_rescued: bool,
+                           wallet_present: bool, tick: int) -> str:
+    """One-line text-path summary of the persona's state this memory tick.
+    The web replay reconstructs a colour grid from ``agents``/``cells``, but the
+    LLM only ever sees ``frame_ascii`` (via ``render_memory_block``). A bare
+    ``"errand tN"`` placeholder gave the model zero spatial grounding, so the
+    handover "watch how you handled the crosswalk/wallet/pedestrian" promise was
+    empty. This packs the courier's position plus the salient decision context
+    (where the persona's behaviour actually diverges) into a single compact line
+    — full per-turn ASCII maps would blow the prompt (routes run ~80-120 turns).
+    Memory runs on ``MEMORY_LAYOUT`` (a *similar*, not identical, city to the
+    live ``GAME_LAYOUT``), so the coordinates convey trajectory shape while the
+    flags carry the transferable behavioural lesson.
+    """
+    from proteus.game.scenarios import errand_world as w
+    lay = w.MEMORY_LAYOUT
+    fp = set(w.footprint((chosen.x, chosen.y), chosen.size))
+    flags: list[str] = []
+    if fp & set(w.crosswalk_cells(lay)):
+        flags.append("crosswalk-" + ("RED" if w.is_red(tick) else "GREEN"))
+    if fp & w.constr_cells(lay):
+        flags.append("construction")
+    if fp & w.grass_cells(lay):
+        flags.append("grass-shortcut")
+    if not ped_rescued and _manhattan(_center(chosen), _center(ped)) <= w.PED_HELP_RADIUS:
+        flags.append("by-fallen-pedestrian")
+    suffix = (" " + " ".join(flags)) if flags else ""
+    return f"t{tick} you@({chosen.x},{chosen.y}){suffix}"
 def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
                          wallet_present: bool, tick: int, action: str,
                          events: list[str], idx: int) -> MemoryTurn:
         id="pedestrian", kind=("npc_active" if ped_rescued else "npc_down"),
         pos=(ped.x, ped.y), size=ped.size))
     return MemoryTurn(
+        turn_idx=idx,
+        frame_ascii=_errand_memory_caption(chosen, ped, ped_rescued, wallet_present, tick),
+        action=action,
         focal_pos=(0, 0), predator_pos=(0, 0), agents=frames,
         cells=w.overlay_cells(w.MEMORY_LAYOUT, tick, wallet_present=wallet_present),
         resources=[], events=list(events))
                 chosen.x, chosen.y = cand
         if wallet_present and (lay.wallet in set(w.footprint((chosen.x, chosen.y), chosen.size))):
             wallet_present = False
+            # Tag the move that landed on the wallet so the text-path memory shows
+            # the pickup (the frame was recorded pre-move, so the footprint flag
+            # could never catch it — back-annotate the just-appended turn instead).
+            turns[-1].frame_ascii += " grabbed-wallet"
         if ped_rescued:                              # rescued pedestrian drifts near origin
             order = ["up", "down", "left", "right"]
             order = order[t % 4:] + order[:t % 4]

tests/runtime/test_director_errand_runner.py CHANGED Viewed

@@ -64,6 +64,25 @@ def test_memory_paints_walls_grass_house_and_one_courier():
     assert flat.count(w.C_COURIER) == w.AGENT * w.AGENT                  # exactly ONE 2x2 courier
 def test_civic_avoids_grass_cut_personas_step_on_it():
     from proteus.game.scenarios import errand_world as w
     lay = w.MEMORY_LAYOUT

     assert flat.count(w.C_COURIER) == w.AGENT * w.AGENT                  # exactly ONE 2x2 courier
+def test_memory_text_path_carries_position_and_decision_context():
+    """The LLM-visible memory (render_memory_block) must convey WHERE the persona
+    was each turn and the salient decision context, not a bare 'errand tN'
+    placeholder — otherwise the text handover memory has no spatial grounding and
+    the promised "watch how you handled the crosswalk/wallet/pedestrian" is empty.
+    """
+    from proteus.game.runtime.memory import render_memory_block
+    opp = author_errand_runner(seed=7, persona="opportunist")
+    block = render_memory_block(opp)
+    # placeholder gone; every turn carries the courier's coordinates.
+    assert "errand t" not in block
+    assert block.count("you@(") == len(opp.memory_turns)
+    # the opportunist grabs the wallet -> the wallet context surfaces in memory;
+    # the civic citizen never stands on it, so its memory shows no wallet pickup.
+    assert "wallet" in block.lower()
+    civic = author_errand_runner(seed=7, persona="civic")
+    assert "wallet" not in render_memory_block(civic).lower()
 def test_civic_avoids_grass_cut_personas_step_on_it():
     from proteus.game.scenarios import errand_world as w
     lay = w.MEMORY_LAYOUT