"""Memory checkpoint models + persistence for the CP7 memory pre-roll. A MemoryCheckpoint is one full-information self-play episode the playing model ran *before* the scored game. It is persisted as a single-file JSON checkpoint per (model, timestamp) and shown at the handover as the model's prior experience. Like ``trace.py`` this module is a serialization boundary: it imports only pydantic + stdlib and NO other runtime module. """ from __future__ import annotations import re from pathlib import Path from pydantic import BaseModel, Field class AgentFrame(BaseModel): """One sprite's render state in a multi-agent memory turn. Attributes: id: Stable identifier (``"a0".."a3"`` or ``"predator"``). kind: ``"agent"`` or ``"predator"`` (drives shape + colour). pos: Top-left anchor ``(x, y)`` at the start of this turn. size: Footprint side length (agent=2, predator=3). alive: Painted only while alive (eaten agents disappear). is_chosen: The agent the player continues (painted in the focal colour). facing: Predator mouth direction for the ㄷ shape (render only). """ id: str kind: str pos: tuple[int, int] size: int alive: bool = True is_chosen: bool = False facing: str = "right" class MemoryTurn(BaseModel): """One self-play turn of the memory episode. Attributes: turn_idx: 1-based index within the memory episode. frame_ascii: The pre-move grid the model saw this memory turn. action: The action the model committed. reasoning: The model's reasoning excerpt (may be truncated). focal_pos: Focal ``(x, y)`` BEFORE the move (JSON array on disk). predator_pos: Predator ``(x, y)`` BEFORE the move. """ turn_idx: int frame_ascii: str action: str reasoning: str = "" focal_pos: tuple[int, int] predator_pos: tuple[int, int] agents: list[AgentFrame] = Field(default_factory=list) """Per-sprite render states; non-empty ⇒ the multi-agent render path.""" resources: list[tuple[int, int]] = Field(default_factory=list) """Collectible resource cells still present this turn.""" cells: list[tuple[int, int, int]] = Field(default_factory=list) """Generic coloured overlay cells ``(x, y, palette_idx)`` for this turn. Painted after walls and before agents in :func:`memory_frames`. Lets the errand_runner director draw per-tick world objects (the recolouring traffic-light bar, roads, construction, wallet, home) without a bespoke field per object type. Empty for all legacy single-/multi-agent turns.""" events: list[str] = Field(default_factory=list) """Narration for this turn, e.g. ``"a1 eaten"`` / ``"a0 got resource"``.""" class MemoryCheckpoint(BaseModel): """A persisted full-info self-play episode used as handover memory. Attributes: model: Provider model identifier that produced the episode. scenario: Registered scenario name. motive_category: Category label (default ``"survival"``). difficulty: Difficulty band string. seed: Seed of the memory world (same as the scored game). created_at: ISO-ish stamp from an injectable clock (filesystem-safe). memory_turns: The episode's per-turn records, in play order. outcome: ``"survived"`` or ``"eliminated"``. transparent_prompt: The full-info brief used to drive the episode. persona_weight_id: Public id of the hidden persona that drove this demonstration (CP8), or ``None`` for a model self-play memory. Only the id is stored — the raw reward weights are never serialized into the participant-visible checkpoint. """ model: str scenario: str motive_category: str = "survival" difficulty: str seed: int | None = None created_at: str memory_turns: list[MemoryTurn] = Field(default_factory=list) outcome: str transparent_prompt: str persona_weight_id: str | None = None chosen_agent_id: str | None = None """Id of the survivor / resource winner the player continues (multi-agent only).""" wall_rects: list[tuple[int, int, int, int]] = Field(default_factory=list) """Static wall rectangles (inclusive ``(x0,y0,x1,y1)``) of the episode world. Populated for scenarios whose observation is prose (no ASCII grid), so the web replay can paint walls it cannot recover from ``frame_ascii``. Empty for grid scenarios (their walls are already in each ``frame_ascii``).""" food_cells: list[tuple[int, int]] = Field(default_factory=list) """Static 1x1 food cells of the episode world (same rationale as wall_rects).""" def _safe(name: str) -> str: """Make a model string safe as a single path segment.""" return re.sub(r"[^A-Za-z0-9._-]", "_", name) or "model" def save_checkpoint(ckpt: MemoryCheckpoint, root: str | Path = "runs/memory") -> Path: """Write *ckpt* to ``//.json`` and return the path. Creates parent directories. The filename is the (already filesystem-safe) ``created_at`` stamp so on-disk ordering matches creation order. """ root = Path(root) directory = root / _safe(ckpt.model) directory.mkdir(parents=True, exist_ok=True) path = directory / f"{_safe(ckpt.created_at)}.json" path.write_text(ckpt.model_dump_json(), encoding="utf-8") return path def load_checkpoint(path: str | Path) -> MemoryCheckpoint: """Read one MemoryCheckpoint from *path* (raises FileNotFoundError on miss).""" text = Path(path).read_text(encoding="utf-8") return MemoryCheckpoint.model_validate_json(text) def latest_for_model( model: str, root: str | Path = "runs/memory" ) -> MemoryCheckpoint | None: """Return the newest checkpoint for *model*, or None if none exist. "Newest" = the lexicographically greatest filename stamp (the stamps are zero-padded ISO-ish, so lexical order == chronological order). """ directory = Path(root) / _safe(model) if not directory.is_dir(): return None files = sorted(directory.glob("*.json")) if not files: return None return load_checkpoint(files[-1]) def render_memory_block(checkpoint: MemoryCheckpoint) -> str: """Render the memory episode as a labelled observation block. Pure: depends only on the checkpoint (no engine). Prepended to the turn-1 observation by SessionRunner so the handover history reads as the model's own prior play. """ parts = ["MEMORY — your earlier play on this scenario:"] for mt in checkpoint.memory_turns: parts.append(f"Memory {mt.turn_idx}:") parts.append(mt.frame_ascii) parts.append(f" you chose: {mt.action}") return "\n".join(parts) def _invert_legend(legend: dict[int, str]) -> dict[str, int]: return {sym: idx for idx, sym in legend.items()} def _ascii_to_grid(frame: str, sym2idx: dict[str, int]) -> list[list[int]] | None: """Parse *frame* as a rectangular grid of legend symbols, else None.""" lines = frame.split("\n") if len(lines) < 2 or len({len(ln) for ln in lines}) != 1 or not lines[0]: return None grid: list[list[int]] = [] for ln in lines: row: list[int] = [] for ch in ln: if ch not in sym2idx: return None row.append(sym2idx[ch]) grid.append(row) return grid # Legacy single-agent block sizes — mirror the resized template geometry # (3x3 predator, 2x2 focal). memory.py stays scenarios-import-free, so these # are kept in sync by hand; the multi-agent path uses per-AgentFrame `size`. _LEGACY_PREDATOR_BLOCK = 3 _LEGACY_FOCAL_BLOCK = 2 # Transparent (mouth) cells of the 3x3 ㄷ predator, per facing, as (col, row). # The mouth = the centre cell + the edge-centre cell on the facing side. _PRED_MOUTH: dict[str, set[tuple[int, int]]] = { "right": {(1, 1), (2, 1)}, "left": {(1, 1), (0, 1)}, "down": {(1, 1), (1, 2)}, "up": {(1, 1), (1, 0)}, } def _predator_solid_offsets(facing: str) -> list[tuple[int, int]]: """The (col, row) offsets PAINTED for a 3x3 ㄷ predator facing *facing*.""" mouth = _PRED_MOUTH.get(facing, _PRED_MOUTH["right"]) return [(c, r) for r in range(3) for c in range(3) if (c, r) not in mouth] # NPC agent kinds -> palette colour (errand_runner fallen-pedestrian rescue). _NPC_COLOR: dict[str, int] = {"npc_down": 11, "npc_active": 14} # yellow -> green def memory_frames( checkpoint: MemoryCheckpoint, *, legend: dict[int, str], grid_size: tuple[int, int] ) -> list[dict]: """Reconstruct a renderable color grid per memory turn. Returns ``[{turn_idx, action, grid, events}]`` where ``grid`` is a ``height x width`` list of palette indices and ``events`` is the per-turn narration list (empty ``[]`` for legacy single-agent frames). Grid-style ``frame_ascii`` is inverted via *legend*; prose frames are rebuilt from walls + recorded focal/predator positions. Pure: no engine, no IO. """ sym2idx = _invert_legend(legend) bg = sym2idx.get(".", 5) focal_idx = sym2idx.get("A", 1) # The chosen agent paints the legend's focal sprite. Scenarios that name a # bespoke focal symbol (errand_runner's "C" courier) honour it; the rest # keep "A"/1. Distractor (non-chosen) agents stay on `distractor_idx`. chosen_idx = sym2idx.get("C", focal_idx) predator_idx = sym2idx.get("B", 2) wall_idx = sym2idx.get("#", 3) food_idx = sym2idx.get("F", 14) w, h = grid_size out: list[dict] = [] def paint(grid, x0, y0, n, idx): for j in range(n): for i in range(n): x, y = x0 + i, y0 + j if 0 <= x < w and 0 <= y < h: grid[y][x] = idx distractor_idx = 9 # COLOR_MAP blue # NB: errand's "C"=9 coincides with distractor_idx (9). Fine while errand is # single-agent (every agent is is_chosen, so the distractor branch is unused); # a future multi-agent errand would need distractor_idx to diverge or the focal # and distractor couriers would be indistinguishable. for mt in checkpoint.memory_turns: if mt.agents: grid = [[bg] * w for _ in range(h)] for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects: for y in range(max(0, ry0), min(h, ry1 + 1)): for x in range(max(0, rx0), min(w, rx1 + 1)): grid[y][x] = wall_idx for (cx, cy, cidx) in mt.cells: if 0 <= cx < w and 0 <= cy < h: grid[cy][cx] = cidx for (fx, fy) in mt.resources: if 0 <= fx < w and 0 <= fy < h: grid[fy][fx] = food_idx for ag in mt.agents: if not ag.alive: continue if ag.kind == "predator": for (c, r) in _predator_solid_offsets(ag.facing): x, y = ag.pos[0] + c, ag.pos[1] + r if 0 <= x < w and 0 <= y < h: grid[y][x] = predator_idx elif ag.kind in _NPC_COLOR: paint(grid, ag.pos[0], ag.pos[1], ag.size, _NPC_COLOR[ag.kind]) else: color = chosen_idx if ag.is_chosen else distractor_idx for r in range(ag.size): for c in range(ag.size): x, y = ag.pos[0] + c, ag.pos[1] + r if 0 <= x < w and 0 <= y < h: grid[y][x] = color out.append({"turn_idx": mt.turn_idx, "action": mt.action, "grid": grid, "events": list(mt.events)}) continue grid = _ascii_to_grid(mt.frame_ascii, sym2idx) if grid is None: grid = [[bg] * w for _ in range(h)] for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects: for y in range(max(0, ry0), min(h, ry1 + 1)): for x in range(max(0, rx0), min(w, rx1 + 1)): grid[y][x] = wall_idx for (fx, fy) in checkpoint.food_cells: if 0 <= fx < w and 0 <= fy < h: grid[fy][fx] = food_idx paint(grid, mt.predator_pos[0], mt.predator_pos[1], _LEGACY_PREDATOR_BLOCK, predator_idx) paint(grid, mt.focal_pos[0], mt.focal_pos[1], _LEGACY_FOCAL_BLOCK, focal_idx) out.append({"turn_idx": mt.turn_idx, "action": mt.action, "grid": grid, "events": list(mt.events)}) return out