Spaces:
Sleeping
Sleeping
| """Memory checkpoint models + persistence for the CP7 memory pre-roll. | |
| A MemoryCheckpoint is one full-information self-play episode the playing model | |
| ran *before* the scored game. It is persisted as a single-file JSON checkpoint | |
| per (model, timestamp) and shown at the handover as the model's prior | |
| experience. Like ``trace.py`` this module is a serialization boundary: it | |
| imports only pydantic + stdlib and NO other runtime module. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from pathlib import Path | |
| from pydantic import BaseModel, Field | |
| class AgentFrame(BaseModel): | |
| """One sprite's render state in a multi-agent memory turn. | |
| Attributes: | |
| id: Stable identifier (``"a0".."a3"`` or ``"predator"``). | |
| kind: ``"agent"`` or ``"predator"`` (drives shape + colour). | |
| pos: Top-left anchor ``(x, y)`` at the start of this turn. | |
| size: Footprint side length (agent=2, predator=3). | |
| alive: Painted only while alive (eaten agents disappear). | |
| is_chosen: The agent the player continues (painted in the focal colour). | |
| facing: Predator mouth direction for the ㄷ shape (render only). | |
| """ | |
| id: str | |
| kind: str | |
| pos: tuple[int, int] | |
| size: int | |
| alive: bool = True | |
| is_chosen: bool = False | |
| facing: str = "right" | |
| class MemoryTurn(BaseModel): | |
| """One self-play turn of the memory episode. | |
| Attributes: | |
| turn_idx: 1-based index within the memory episode. | |
| frame_ascii: The pre-move grid the model saw this memory turn. | |
| action: The action the model committed. | |
| reasoning: The model's reasoning excerpt (may be truncated). | |
| focal_pos: Focal ``(x, y)`` BEFORE the move (JSON array on disk). | |
| predator_pos: Predator ``(x, y)`` BEFORE the move. | |
| """ | |
| turn_idx: int | |
| frame_ascii: str | |
| action: str | |
| reasoning: str = "" | |
| focal_pos: tuple[int, int] | |
| predator_pos: tuple[int, int] | |
| agents: list[AgentFrame] = Field(default_factory=list) | |
| """Per-sprite render states; non-empty ⇒ the multi-agent render path.""" | |
| resources: list[tuple[int, int]] = Field(default_factory=list) | |
| """Collectible resource cells still present this turn.""" | |
| cells: list[tuple[int, int, int]] = Field(default_factory=list) | |
| """Generic coloured overlay cells ``(x, y, palette_idx)`` for this turn. | |
| Painted after walls and before agents in :func:`memory_frames`. Lets the | |
| errand_runner director draw per-tick world objects (the recolouring | |
| traffic-light bar, roads, construction, wallet, home) without a bespoke | |
| field per object type. Empty for all legacy single-/multi-agent turns.""" | |
| events: list[str] = Field(default_factory=list) | |
| """Narration for this turn, e.g. ``"a1 eaten"`` / ``"a0 got resource"``.""" | |
| class MemoryCheckpoint(BaseModel): | |
| """A persisted full-info self-play episode used as handover memory. | |
| Attributes: | |
| model: Provider model identifier that produced the episode. | |
| scenario: Registered scenario name. | |
| motive_category: Category label (default ``"survival"``). | |
| difficulty: Difficulty band string. | |
| seed: Seed of the memory world (same as the scored game). | |
| created_at: ISO-ish stamp from an injectable clock (filesystem-safe). | |
| memory_turns: The episode's per-turn records, in play order. | |
| outcome: ``"survived"`` or ``"eliminated"``. | |
| transparent_prompt: The full-info brief used to drive the episode. | |
| persona_weight_id: Public id of the hidden persona that drove this | |
| demonstration (CP8), or ``None`` for a model self-play memory. Only | |
| the id is stored — the raw reward weights are never serialized into | |
| the participant-visible checkpoint. | |
| """ | |
| model: str | |
| scenario: str | |
| motive_category: str = "survival" | |
| difficulty: str | |
| seed: int | None = None | |
| created_at: str | |
| memory_turns: list[MemoryTurn] = Field(default_factory=list) | |
| outcome: str | |
| transparent_prompt: str | |
| persona_weight_id: str | None = None | |
| chosen_agent_id: str | None = None | |
| """Id of the survivor / resource winner the player continues (multi-agent only).""" | |
| wall_rects: list[tuple[int, int, int, int]] = Field(default_factory=list) | |
| """Static wall rectangles (inclusive ``(x0,y0,x1,y1)``) of the episode world. | |
| Populated for scenarios whose observation is prose (no ASCII grid), so the | |
| web replay can paint walls it cannot recover from ``frame_ascii``. Empty for | |
| grid scenarios (their walls are already in each ``frame_ascii``).""" | |
| food_cells: list[tuple[int, int]] = Field(default_factory=list) | |
| """Static 1x1 food cells of the episode world (same rationale as wall_rects).""" | |
| def _safe(name: str) -> str: | |
| """Make a model string safe as a single path segment.""" | |
| return re.sub(r"[^A-Za-z0-9._-]", "_", name) or "model" | |
| def save_checkpoint(ckpt: MemoryCheckpoint, root: str | Path = "runs/memory") -> Path: | |
| """Write *ckpt* to ``<root>/<safe(model)>/<created_at>.json`` and return the path. | |
| Creates parent directories. The filename is the (already filesystem-safe) | |
| ``created_at`` stamp so on-disk ordering matches creation order. | |
| """ | |
| root = Path(root) | |
| directory = root / _safe(ckpt.model) | |
| directory.mkdir(parents=True, exist_ok=True) | |
| path = directory / f"{_safe(ckpt.created_at)}.json" | |
| path.write_text(ckpt.model_dump_json(), encoding="utf-8") | |
| return path | |
| def load_checkpoint(path: str | Path) -> MemoryCheckpoint: | |
| """Read one MemoryCheckpoint from *path* (raises FileNotFoundError on miss).""" | |
| text = Path(path).read_text(encoding="utf-8") | |
| return MemoryCheckpoint.model_validate_json(text) | |
| def latest_for_model( | |
| model: str, root: str | Path = "runs/memory" | |
| ) -> MemoryCheckpoint | None: | |
| """Return the newest checkpoint for *model*, or None if none exist. | |
| "Newest" = the lexicographically greatest filename stamp (the stamps are | |
| zero-padded ISO-ish, so lexical order == chronological order). | |
| """ | |
| directory = Path(root) / _safe(model) | |
| if not directory.is_dir(): | |
| return None | |
| files = sorted(directory.glob("*.json")) | |
| if not files: | |
| return None | |
| return load_checkpoint(files[-1]) | |
| def render_memory_block(checkpoint: MemoryCheckpoint) -> str: | |
| """Render the memory episode as a labelled observation block. | |
| Pure: depends only on the checkpoint (no engine). Prepended to the turn-1 | |
| observation by SessionRunner so the handover history reads as the model's | |
| own prior play. | |
| """ | |
| parts = ["MEMORY — your earlier play on this scenario:"] | |
| for mt in checkpoint.memory_turns: | |
| parts.append(f"Memory {mt.turn_idx}:") | |
| parts.append(mt.frame_ascii) | |
| parts.append(f" you chose: {mt.action}") | |
| return "\n".join(parts) | |
| def _invert_legend(legend: dict[int, str]) -> dict[str, int]: | |
| return {sym: idx for idx, sym in legend.items()} | |
| def _ascii_to_grid(frame: str, sym2idx: dict[str, int]) -> list[list[int]] | None: | |
| """Parse *frame* as a rectangular grid of legend symbols, else None.""" | |
| lines = frame.split("\n") | |
| if len(lines) < 2 or len({len(ln) for ln in lines}) != 1 or not lines[0]: | |
| return None | |
| grid: list[list[int]] = [] | |
| for ln in lines: | |
| row: list[int] = [] | |
| for ch in ln: | |
| if ch not in sym2idx: | |
| return None | |
| row.append(sym2idx[ch]) | |
| grid.append(row) | |
| return grid | |
| # Legacy single-agent block sizes — mirror the resized template geometry | |
| # (3x3 predator, 2x2 focal). memory.py stays scenarios-import-free, so these | |
| # are kept in sync by hand; the multi-agent path uses per-AgentFrame `size`. | |
| _LEGACY_PREDATOR_BLOCK = 3 | |
| _LEGACY_FOCAL_BLOCK = 2 | |
| # Transparent (mouth) cells of the 3x3 ㄷ predator, per facing, as (col, row). | |
| # The mouth = the centre cell + the edge-centre cell on the facing side. | |
| _PRED_MOUTH: dict[str, set[tuple[int, int]]] = { | |
| "right": {(1, 1), (2, 1)}, | |
| "left": {(1, 1), (0, 1)}, | |
| "down": {(1, 1), (1, 2)}, | |
| "up": {(1, 1), (1, 0)}, | |
| } | |
| def _predator_solid_offsets(facing: str) -> list[tuple[int, int]]: | |
| """The (col, row) offsets PAINTED for a 3x3 ㄷ predator facing *facing*.""" | |
| mouth = _PRED_MOUTH.get(facing, _PRED_MOUTH["right"]) | |
| return [(c, r) for r in range(3) for c in range(3) if (c, r) not in mouth] | |
| # NPC agent kinds -> palette colour (errand_runner fallen-pedestrian rescue). | |
| _NPC_COLOR: dict[str, int] = {"npc_down": 11, "npc_active": 14} # yellow -> green | |
| def memory_frames( | |
| checkpoint: MemoryCheckpoint, *, legend: dict[int, str], grid_size: tuple[int, int] | |
| ) -> list[dict]: | |
| """Reconstruct a renderable color grid per memory turn. | |
| Returns ``[{turn_idx, action, grid, events}]`` where ``grid`` is a | |
| ``height x width`` list of palette indices and ``events`` is the per-turn | |
| narration list (empty ``[]`` for legacy single-agent frames). | |
| Grid-style ``frame_ascii`` is inverted via *legend*; | |
| prose frames are rebuilt from walls + recorded focal/predator positions. | |
| Pure: no engine, no IO. | |
| """ | |
| sym2idx = _invert_legend(legend) | |
| bg = sym2idx.get(".", 5) | |
| focal_idx = sym2idx.get("A", 1) | |
| # The chosen agent paints the legend's focal sprite. Scenarios that name a | |
| # bespoke focal symbol (errand_runner's "C" courier) honour it; the rest | |
| # keep "A"/1. Distractor (non-chosen) agents stay on `distractor_idx`. | |
| chosen_idx = sym2idx.get("C", focal_idx) | |
| predator_idx = sym2idx.get("B", 2) | |
| wall_idx = sym2idx.get("#", 3) | |
| food_idx = sym2idx.get("F", 14) | |
| w, h = grid_size | |
| out: list[dict] = [] | |
| def paint(grid, x0, y0, n, idx): | |
| for j in range(n): | |
| for i in range(n): | |
| x, y = x0 + i, y0 + j | |
| if 0 <= x < w and 0 <= y < h: | |
| grid[y][x] = idx | |
| distractor_idx = 9 # COLOR_MAP blue | |
| # NB: errand's "C"=9 coincides with distractor_idx (9). Fine while errand is | |
| # single-agent (every agent is is_chosen, so the distractor branch is unused); | |
| # a future multi-agent errand would need distractor_idx to diverge or the focal | |
| # and distractor couriers would be indistinguishable. | |
| for mt in checkpoint.memory_turns: | |
| if mt.agents: | |
| grid = [[bg] * w for _ in range(h)] | |
| for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects: | |
| for y in range(max(0, ry0), min(h, ry1 + 1)): | |
| for x in range(max(0, rx0), min(w, rx1 + 1)): | |
| grid[y][x] = wall_idx | |
| for (cx, cy, cidx) in mt.cells: | |
| if 0 <= cx < w and 0 <= cy < h: | |
| grid[cy][cx] = cidx | |
| for (fx, fy) in mt.resources: | |
| if 0 <= fx < w and 0 <= fy < h: | |
| grid[fy][fx] = food_idx | |
| for ag in mt.agents: | |
| if not ag.alive: | |
| continue | |
| if ag.kind == "predator": | |
| for (c, r) in _predator_solid_offsets(ag.facing): | |
| x, y = ag.pos[0] + c, ag.pos[1] + r | |
| if 0 <= x < w and 0 <= y < h: | |
| grid[y][x] = predator_idx | |
| elif ag.kind in _NPC_COLOR: | |
| paint(grid, ag.pos[0], ag.pos[1], ag.size, _NPC_COLOR[ag.kind]) | |
| else: | |
| color = chosen_idx if ag.is_chosen else distractor_idx | |
| for r in range(ag.size): | |
| for c in range(ag.size): | |
| x, y = ag.pos[0] + c, ag.pos[1] + r | |
| if 0 <= x < w and 0 <= y < h: | |
| grid[y][x] = color | |
| out.append({"turn_idx": mt.turn_idx, "action": mt.action, | |
| "grid": grid, "events": list(mt.events)}) | |
| continue | |
| grid = _ascii_to_grid(mt.frame_ascii, sym2idx) | |
| if grid is None: | |
| grid = [[bg] * w for _ in range(h)] | |
| for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects: | |
| for y in range(max(0, ry0), min(h, ry1 + 1)): | |
| for x in range(max(0, rx0), min(w, rx1 + 1)): | |
| grid[y][x] = wall_idx | |
| for (fx, fy) in checkpoint.food_cells: | |
| if 0 <= fx < w and 0 <= fy < h: | |
| grid[fy][fx] = food_idx | |
| paint(grid, mt.predator_pos[0], mt.predator_pos[1], _LEGACY_PREDATOR_BLOCK, predator_idx) | |
| paint(grid, mt.focal_pos[0], mt.focal_pos[1], _LEGACY_FOCAL_BLOCK, focal_idx) | |
| out.append({"turn_idx": mt.turn_idx, "action": mt.action, | |
| "grid": grid, "events": list(mt.events)}) | |
| return out | |