irregular6612 commited on
Commit
ef62102
·
1 Parent(s): e5283e9

feat(errand): handover memory carries courier position + behaviour flags on the text path

Browse files

The LLM only ever sees a MemoryTurn's frame_ascii (via render_memory_block),
but errand_runner left it as a bare "errand t{tick}" placeholder — so the text
handover memory had zero spatial grounding and the memory_brief promise to
"watch how you handled the crosswalk/wallet/pedestrian" was empty (the rich
visual state lived only in agents/cells, used by the web colour replay).

Populate frame_ascii with a compact one-line caption: the courier's position
plus the salient decision flags where the persona's behaviour diverges
(crosswalk-RED/GREEN, construction, grass-shortcut, by-fallen-pedestrian), and
back-annotate the pickup turn with grabbed-wallet (the frame is recorded
pre-move, so a footprint flag could never catch the wallet step). Full per-turn
ASCII maps would blow the prompt (routes run ~80-124 turns); coords convey
trajectory shape while flags carry the transferable, layout-independent lesson.

render_memory_block (shared) and the web path (memory_frames reads agents, not
frame_ascii) are untouched — no effect on predator scenarios or the animation.

proteus/game/runtime/multiagent_director.py CHANGED
@@ -409,6 +409,38 @@ def author_take_a_seat(
409
  # --------------------------------------------------------------------------- #
410
  # errand_runner director (visual-only city errand; single-agent persona demo)
411
  # --------------------------------------------------------------------------- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
413
  wallet_present: bool, tick: int, action: str,
414
  events: list[str], idx: int) -> MemoryTurn:
@@ -419,7 +451,9 @@ def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
419
  id="pedestrian", kind=("npc_active" if ped_rescued else "npc_down"),
420
  pos=(ped.x, ped.y), size=ped.size))
421
  return MemoryTurn(
422
- turn_idx=idx, frame_ascii=f"errand t{tick}", action=action,
 
 
423
  focal_pos=(0, 0), predator_pos=(0, 0), agents=frames,
424
  cells=w.overlay_cells(w.MEMORY_LAYOUT, tick, wallet_present=wallet_present),
425
  resources=[], events=list(events))
@@ -495,6 +529,10 @@ def author_errand_runner(*, seed: int, persona: str | None = None,
495
  chosen.x, chosen.y = cand
496
  if wallet_present and (lay.wallet in set(w.footprint((chosen.x, chosen.y), chosen.size))):
497
  wallet_present = False
 
 
 
 
498
  if ped_rescued: # rescued pedestrian drifts near origin
499
  order = ["up", "down", "left", "right"]
500
  order = order[t % 4:] + order[:t % 4]
 
409
  # --------------------------------------------------------------------------- #
410
  # errand_runner director (visual-only city errand; single-agent persona demo)
411
  # --------------------------------------------------------------------------- #
412
+ def _errand_memory_caption(chosen: _Agent, ped: _Agent, ped_rescued: bool,
413
+ wallet_present: bool, tick: int) -> str:
414
+ """One-line text-path summary of the persona's state this memory tick.
415
+
416
+ The web replay reconstructs a colour grid from ``agents``/``cells``, but the
417
+ LLM only ever sees ``frame_ascii`` (via ``render_memory_block``). A bare
418
+ ``"errand tN"`` placeholder gave the model zero spatial grounding, so the
419
+ handover "watch how you handled the crosswalk/wallet/pedestrian" promise was
420
+ empty. This packs the courier's position plus the salient decision context
421
+ (where the persona's behaviour actually diverges) into a single compact line
422
+ — full per-turn ASCII maps would blow the prompt (routes run ~80-120 turns).
423
+
424
+ Memory runs on ``MEMORY_LAYOUT`` (a *similar*, not identical, city to the
425
+ live ``GAME_LAYOUT``), so the coordinates convey trajectory shape while the
426
+ flags carry the transferable behavioural lesson.
427
+ """
428
+ from proteus.game.scenarios import errand_world as w
429
+ lay = w.MEMORY_LAYOUT
430
+ fp = set(w.footprint((chosen.x, chosen.y), chosen.size))
431
+ flags: list[str] = []
432
+ if fp & set(w.crosswalk_cells(lay)):
433
+ flags.append("crosswalk-" + ("RED" if w.is_red(tick) else "GREEN"))
434
+ if fp & w.constr_cells(lay):
435
+ flags.append("construction")
436
+ if fp & w.grass_cells(lay):
437
+ flags.append("grass-shortcut")
438
+ if not ped_rescued and _manhattan(_center(chosen), _center(ped)) <= w.PED_HELP_RADIUS:
439
+ flags.append("by-fallen-pedestrian")
440
+ suffix = (" " + " ".join(flags)) if flags else ""
441
+ return f"t{tick} you@({chosen.x},{chosen.y}){suffix}"
442
+
443
+
444
  def _errand_frame_single(chosen: _Agent, ped: _Agent, ped_rescued: bool,
445
  wallet_present: bool, tick: int, action: str,
446
  events: list[str], idx: int) -> MemoryTurn:
 
451
  id="pedestrian", kind=("npc_active" if ped_rescued else "npc_down"),
452
  pos=(ped.x, ped.y), size=ped.size))
453
  return MemoryTurn(
454
+ turn_idx=idx,
455
+ frame_ascii=_errand_memory_caption(chosen, ped, ped_rescued, wallet_present, tick),
456
+ action=action,
457
  focal_pos=(0, 0), predator_pos=(0, 0), agents=frames,
458
  cells=w.overlay_cells(w.MEMORY_LAYOUT, tick, wallet_present=wallet_present),
459
  resources=[], events=list(events))
 
529
  chosen.x, chosen.y = cand
530
  if wallet_present and (lay.wallet in set(w.footprint((chosen.x, chosen.y), chosen.size))):
531
  wallet_present = False
532
+ # Tag the move that landed on the wallet so the text-path memory shows
533
+ # the pickup (the frame was recorded pre-move, so the footprint flag
534
+ # could never catch it — back-annotate the just-appended turn instead).
535
+ turns[-1].frame_ascii += " grabbed-wallet"
536
  if ped_rescued: # rescued pedestrian drifts near origin
537
  order = ["up", "down", "left", "right"]
538
  order = order[t % 4:] + order[:t % 4]
tests/runtime/test_director_errand_runner.py CHANGED
@@ -64,6 +64,25 @@ def test_memory_paints_walls_grass_house_and_one_courier():
64
  assert flat.count(w.C_COURIER) == w.AGENT * w.AGENT # exactly ONE 2x2 courier
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def test_civic_avoids_grass_cut_personas_step_on_it():
68
  from proteus.game.scenarios import errand_world as w
69
  lay = w.MEMORY_LAYOUT
 
64
  assert flat.count(w.C_COURIER) == w.AGENT * w.AGENT # exactly ONE 2x2 courier
65
 
66
 
67
+ def test_memory_text_path_carries_position_and_decision_context():
68
+ """The LLM-visible memory (render_memory_block) must convey WHERE the persona
69
+ was each turn and the salient decision context, not a bare 'errand tN'
70
+ placeholder — otherwise the text handover memory has no spatial grounding and
71
+ the promised "watch how you handled the crosswalk/wallet/pedestrian" is empty.
72
+ """
73
+ from proteus.game.runtime.memory import render_memory_block
74
+ opp = author_errand_runner(seed=7, persona="opportunist")
75
+ block = render_memory_block(opp)
76
+ # placeholder gone; every turn carries the courier's coordinates.
77
+ assert "errand t" not in block
78
+ assert block.count("you@(") == len(opp.memory_turns)
79
+ # the opportunist grabs the wallet -> the wallet context surfaces in memory;
80
+ # the civic citizen never stands on it, so its memory shows no wallet pickup.
81
+ assert "wallet" in block.lower()
82
+ civic = author_errand_runner(seed=7, persona="civic")
83
+ assert "wallet" not in render_memory_block(civic).lower()
84
+
85
+
86
  def test_civic_avoids_grass_cut_personas_step_on_it():
87
  from proteus.game.scenarios import errand_world as w
88
  lay = w.MEMORY_LAYOUT