irregular6612's picture
docs(memory): note errand C=9 / distractor_idx collision is safe only single-agent
f571c87
Raw
History Blame Contribute Delete
12.7 kB
"""Memory checkpoint models + persistence for the CP7 memory pre-roll.
A MemoryCheckpoint is one full-information self-play episode the playing model
ran *before* the scored game. It is persisted as a single-file JSON checkpoint
per (model, timestamp) and shown at the handover as the model's prior
experience. Like ``trace.py`` this module is a serialization boundary: it
imports only pydantic + stdlib and NO other runtime module.
"""
from __future__ import annotations
import re
from pathlib import Path
from pydantic import BaseModel, Field
class AgentFrame(BaseModel):
"""One sprite's render state in a multi-agent memory turn.
Attributes:
id: Stable identifier (``"a0".."a3"`` or ``"predator"``).
kind: ``"agent"`` or ``"predator"`` (drives shape + colour).
pos: Top-left anchor ``(x, y)`` at the start of this turn.
size: Footprint side length (agent=2, predator=3).
alive: Painted only while alive (eaten agents disappear).
is_chosen: The agent the player continues (painted in the focal colour).
facing: Predator mouth direction for the ㄷ shape (render only).
"""
id: str
kind: str
pos: tuple[int, int]
size: int
alive: bool = True
is_chosen: bool = False
facing: str = "right"
class MemoryTurn(BaseModel):
"""One self-play turn of the memory episode.
Attributes:
turn_idx: 1-based index within the memory episode.
frame_ascii: The pre-move grid the model saw this memory turn.
action: The action the model committed.
reasoning: The model's reasoning excerpt (may be truncated).
focal_pos: Focal ``(x, y)`` BEFORE the move (JSON array on disk).
predator_pos: Predator ``(x, y)`` BEFORE the move.
"""
turn_idx: int
frame_ascii: str
action: str
reasoning: str = ""
focal_pos: tuple[int, int]
predator_pos: tuple[int, int]
agents: list[AgentFrame] = Field(default_factory=list)
"""Per-sprite render states; non-empty ⇒ the multi-agent render path."""
resources: list[tuple[int, int]] = Field(default_factory=list)
"""Collectible resource cells still present this turn."""
cells: list[tuple[int, int, int]] = Field(default_factory=list)
"""Generic coloured overlay cells ``(x, y, palette_idx)`` for this turn.
Painted after walls and before agents in :func:`memory_frames`. Lets the
errand_runner director draw per-tick world objects (the recolouring
traffic-light bar, roads, construction, wallet, home) without a bespoke
field per object type. Empty for all legacy single-/multi-agent turns."""
events: list[str] = Field(default_factory=list)
"""Narration for this turn, e.g. ``"a1 eaten"`` / ``"a0 got resource"``."""
class MemoryCheckpoint(BaseModel):
"""A persisted full-info self-play episode used as handover memory.
Attributes:
model: Provider model identifier that produced the episode.
scenario: Registered scenario name.
motive_category: Category label (default ``"survival"``).
difficulty: Difficulty band string.
seed: Seed of the memory world (same as the scored game).
created_at: ISO-ish stamp from an injectable clock (filesystem-safe).
memory_turns: The episode's per-turn records, in play order.
outcome: ``"survived"`` or ``"eliminated"``.
transparent_prompt: The full-info brief used to drive the episode.
persona_weight_id: Public id of the hidden persona that drove this
demonstration (CP8), or ``None`` for a model self-play memory. Only
the id is stored — the raw reward weights are never serialized into
the participant-visible checkpoint.
"""
model: str
scenario: str
motive_category: str = "survival"
difficulty: str
seed: int | None = None
created_at: str
memory_turns: list[MemoryTurn] = Field(default_factory=list)
outcome: str
transparent_prompt: str
persona_weight_id: str | None = None
chosen_agent_id: str | None = None
"""Id of the survivor / resource winner the player continues (multi-agent only)."""
wall_rects: list[tuple[int, int, int, int]] = Field(default_factory=list)
"""Static wall rectangles (inclusive ``(x0,y0,x1,y1)``) of the episode world.
Populated for scenarios whose observation is prose (no ASCII grid), so the
web replay can paint walls it cannot recover from ``frame_ascii``. Empty for
grid scenarios (their walls are already in each ``frame_ascii``)."""
food_cells: list[tuple[int, int]] = Field(default_factory=list)
"""Static 1x1 food cells of the episode world (same rationale as wall_rects)."""
def _safe(name: str) -> str:
"""Make a model string safe as a single path segment."""
return re.sub(r"[^A-Za-z0-9._-]", "_", name) or "model"
def save_checkpoint(ckpt: MemoryCheckpoint, root: str | Path = "runs/memory") -> Path:
"""Write *ckpt* to ``<root>/<safe(model)>/<created_at>.json`` and return the path.
Creates parent directories. The filename is the (already filesystem-safe)
``created_at`` stamp so on-disk ordering matches creation order.
"""
root = Path(root)
directory = root / _safe(ckpt.model)
directory.mkdir(parents=True, exist_ok=True)
path = directory / f"{_safe(ckpt.created_at)}.json"
path.write_text(ckpt.model_dump_json(), encoding="utf-8")
return path
def load_checkpoint(path: str | Path) -> MemoryCheckpoint:
"""Read one MemoryCheckpoint from *path* (raises FileNotFoundError on miss)."""
text = Path(path).read_text(encoding="utf-8")
return MemoryCheckpoint.model_validate_json(text)
def latest_for_model(
model: str, root: str | Path = "runs/memory"
) -> MemoryCheckpoint | None:
"""Return the newest checkpoint for *model*, or None if none exist.
"Newest" = the lexicographically greatest filename stamp (the stamps are
zero-padded ISO-ish, so lexical order == chronological order).
"""
directory = Path(root) / _safe(model)
if not directory.is_dir():
return None
files = sorted(directory.glob("*.json"))
if not files:
return None
return load_checkpoint(files[-1])
def render_memory_block(checkpoint: MemoryCheckpoint) -> str:
"""Render the memory episode as a labelled observation block.
Pure: depends only on the checkpoint (no engine). Prepended to the turn-1
observation by SessionRunner so the handover history reads as the model's
own prior play.
"""
parts = ["MEMORY — your earlier play on this scenario:"]
for mt in checkpoint.memory_turns:
parts.append(f"Memory {mt.turn_idx}:")
parts.append(mt.frame_ascii)
parts.append(f" you chose: {mt.action}")
return "\n".join(parts)
def _invert_legend(legend: dict[int, str]) -> dict[str, int]:
return {sym: idx for idx, sym in legend.items()}
def _ascii_to_grid(frame: str, sym2idx: dict[str, int]) -> list[list[int]] | None:
"""Parse *frame* as a rectangular grid of legend symbols, else None."""
lines = frame.split("\n")
if len(lines) < 2 or len({len(ln) for ln in lines}) != 1 or not lines[0]:
return None
grid: list[list[int]] = []
for ln in lines:
row: list[int] = []
for ch in ln:
if ch not in sym2idx:
return None
row.append(sym2idx[ch])
grid.append(row)
return grid
# Legacy single-agent block sizes — mirror the resized template geometry
# (3x3 predator, 2x2 focal). memory.py stays scenarios-import-free, so these
# are kept in sync by hand; the multi-agent path uses per-AgentFrame `size`.
_LEGACY_PREDATOR_BLOCK = 3
_LEGACY_FOCAL_BLOCK = 2
# Transparent (mouth) cells of the 3x3 ㄷ predator, per facing, as (col, row).
# The mouth = the centre cell + the edge-centre cell on the facing side.
_PRED_MOUTH: dict[str, set[tuple[int, int]]] = {
"right": {(1, 1), (2, 1)},
"left": {(1, 1), (0, 1)},
"down": {(1, 1), (1, 2)},
"up": {(1, 1), (1, 0)},
}
def _predator_solid_offsets(facing: str) -> list[tuple[int, int]]:
"""The (col, row) offsets PAINTED for a 3x3 ㄷ predator facing *facing*."""
mouth = _PRED_MOUTH.get(facing, _PRED_MOUTH["right"])
return [(c, r) for r in range(3) for c in range(3) if (c, r) not in mouth]
# NPC agent kinds -> palette colour (errand_runner fallen-pedestrian rescue).
_NPC_COLOR: dict[str, int] = {"npc_down": 11, "npc_active": 14} # yellow -> green
def memory_frames(
checkpoint: MemoryCheckpoint, *, legend: dict[int, str], grid_size: tuple[int, int]
) -> list[dict]:
"""Reconstruct a renderable color grid per memory turn.
Returns ``[{turn_idx, action, grid, events}]`` where ``grid`` is a
``height x width`` list of palette indices and ``events`` is the per-turn
narration list (empty ``[]`` for legacy single-agent frames).
Grid-style ``frame_ascii`` is inverted via *legend*;
prose frames are rebuilt from walls + recorded focal/predator positions.
Pure: no engine, no IO.
"""
sym2idx = _invert_legend(legend)
bg = sym2idx.get(".", 5)
focal_idx = sym2idx.get("A", 1)
# The chosen agent paints the legend's focal sprite. Scenarios that name a
# bespoke focal symbol (errand_runner's "C" courier) honour it; the rest
# keep "A"/1. Distractor (non-chosen) agents stay on `distractor_idx`.
chosen_idx = sym2idx.get("C", focal_idx)
predator_idx = sym2idx.get("B", 2)
wall_idx = sym2idx.get("#", 3)
food_idx = sym2idx.get("F", 14)
w, h = grid_size
out: list[dict] = []
def paint(grid, x0, y0, n, idx):
for j in range(n):
for i in range(n):
x, y = x0 + i, y0 + j
if 0 <= x < w and 0 <= y < h:
grid[y][x] = idx
distractor_idx = 9 # COLOR_MAP blue
# NB: errand's "C"=9 coincides with distractor_idx (9). Fine while errand is
# single-agent (every agent is is_chosen, so the distractor branch is unused);
# a future multi-agent errand would need distractor_idx to diverge or the focal
# and distractor couriers would be indistinguishable.
for mt in checkpoint.memory_turns:
if mt.agents:
grid = [[bg] * w for _ in range(h)]
for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
for y in range(max(0, ry0), min(h, ry1 + 1)):
for x in range(max(0, rx0), min(w, rx1 + 1)):
grid[y][x] = wall_idx
for (cx, cy, cidx) in mt.cells:
if 0 <= cx < w and 0 <= cy < h:
grid[cy][cx] = cidx
for (fx, fy) in mt.resources:
if 0 <= fx < w and 0 <= fy < h:
grid[fy][fx] = food_idx
for ag in mt.agents:
if not ag.alive:
continue
if ag.kind == "predator":
for (c, r) in _predator_solid_offsets(ag.facing):
x, y = ag.pos[0] + c, ag.pos[1] + r
if 0 <= x < w and 0 <= y < h:
grid[y][x] = predator_idx
elif ag.kind in _NPC_COLOR:
paint(grid, ag.pos[0], ag.pos[1], ag.size, _NPC_COLOR[ag.kind])
else:
color = chosen_idx if ag.is_chosen else distractor_idx
for r in range(ag.size):
for c in range(ag.size):
x, y = ag.pos[0] + c, ag.pos[1] + r
if 0 <= x < w and 0 <= y < h:
grid[y][x] = color
out.append({"turn_idx": mt.turn_idx, "action": mt.action,
"grid": grid, "events": list(mt.events)})
continue
grid = _ascii_to_grid(mt.frame_ascii, sym2idx)
if grid is None:
grid = [[bg] * w for _ in range(h)]
for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
for y in range(max(0, ry0), min(h, ry1 + 1)):
for x in range(max(0, rx0), min(w, rx1 + 1)):
grid[y][x] = wall_idx
for (fx, fy) in checkpoint.food_cells:
if 0 <= fx < w and 0 <= fy < h:
grid[fy][fx] = food_idx
paint(grid, mt.predator_pos[0], mt.predator_pos[1], _LEGACY_PREDATOR_BLOCK, predator_idx)
paint(grid, mt.focal_pos[0], mt.focal_pos[1], _LEGACY_FOCAL_BLOCK, focal_idx)
out.append({"turn_idx": mt.turn_idx, "action": mt.action,
"grid": grid, "events": list(mt.events)})
return out