Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / proteus /game /runtime /memory.py

irregular6612

docs(memory): note errand C=9 / distractor_idx collision is safe only single-agent

f571c87 28 days ago

Raw

History Blame Contribute Delete

12.7 kB

	"""Memory checkpoint models + persistence for the CP7 memory pre-roll.

	A MemoryCheckpoint is one full-information self-play episode the playing model
	ran before the scored game. It is persisted as a single-file JSON checkpoint
	per (model, timestamp) and shown at the handover as the model's prior
	experience. Like ``trace.py`` this module is a serialization boundary: it
	imports only pydantic + stdlib and NO other runtime module.
	"""

	from __future__ import annotations

	import re
	from pathlib import Path

	from pydantic import BaseModel, Field


	class AgentFrame(BaseModel):
	"""One sprite's render state in a multi-agent memory turn.

	Attributes:
	id: Stable identifier (``"a0".."a3"`` or ``"predator"``).
	kind: ``"agent"`` or ``"predator"`` (drives shape + colour).
	pos: Top-left anchor ``(x, y)`` at the start of this turn.
	size: Footprint side length (agent=2, predator=3).
	alive: Painted only while alive (eaten agents disappear).
	is_chosen: The agent the player continues (painted in the focal colour).
	facing: Predator mouth direction for the ㄷ shape (render only).
	"""

	id: str
	kind: str
	pos: tuple[int, int]
	size: int
	alive: bool = True
	is_chosen: bool = False
	facing: str = "right"


	class MemoryTurn(BaseModel):
	"""One self-play turn of the memory episode.

	Attributes:
	turn_idx: 1-based index within the memory episode.
	frame_ascii: The pre-move grid the model saw this memory turn.
	action: The action the model committed.
	reasoning: The model's reasoning excerpt (may be truncated).
	focal_pos: Focal ``(x, y)`` BEFORE the move (JSON array on disk).
	predator_pos: Predator ``(x, y)`` BEFORE the move.
	"""

	turn_idx: int
	frame_ascii: str
	action: str
	reasoning: str = ""
	focal_pos: tuple[int, int]
	predator_pos: tuple[int, int]
	agents: list[AgentFrame] = Field(default_factory=list)
	"""Per-sprite render states; non-empty ⇒ the multi-agent render path."""
	resources: list[tuple[int, int]] = Field(default_factory=list)
	"""Collectible resource cells still present this turn."""
	cells: list[tuple[int, int, int]] = Field(default_factory=list)
	"""Generic coloured overlay cells ``(x, y, palette_idx)`` for this turn.

	Painted after walls and before agents in :func:`memory_frames`. Lets the
	errand_runner director draw per-tick world objects (the recolouring
	traffic-light bar, roads, construction, wallet, home) without a bespoke
	field per object type. Empty for all legacy single-/multi-agent turns."""
	events: list[str] = Field(default_factory=list)
	"""Narration for this turn, e.g. ``"a1 eaten"`` / ``"a0 got resource"``."""


	class MemoryCheckpoint(BaseModel):
	"""A persisted full-info self-play episode used as handover memory.

	Attributes:
	model: Provider model identifier that produced the episode.
	scenario: Registered scenario name.
	motive_category: Category label (default ``"survival"``).
	difficulty: Difficulty band string.
	seed: Seed of the memory world (same as the scored game).
	created_at: ISO-ish stamp from an injectable clock (filesystem-safe).
	memory_turns: The episode's per-turn records, in play order.
	outcome: ``"survived"`` or ``"eliminated"``.
	transparent_prompt: The full-info brief used to drive the episode.
	persona_weight_id: Public id of the hidden persona that drove this
	demonstration (CP8), or ``None`` for a model self-play memory. Only
	the id is stored — the raw reward weights are never serialized into
	the participant-visible checkpoint.
	"""

	model: str
	scenario: str
	motive_category: str = "survival"
	difficulty: str
	seed: int \| None = None
	created_at: str
	memory_turns: list[MemoryTurn] = Field(default_factory=list)
	outcome: str
	transparent_prompt: str
	persona_weight_id: str \| None = None
	chosen_agent_id: str \| None = None
	"""Id of the survivor / resource winner the player continues (multi-agent only)."""
	wall_rects: list[tuple[int, int, int, int]] = Field(default_factory=list)
	"""Static wall rectangles (inclusive ``(x0,y0,x1,y1)``) of the episode world.

	Populated for scenarios whose observation is prose (no ASCII grid), so the
	web replay can paint walls it cannot recover from ``frame_ascii``. Empty for
	grid scenarios (their walls are already in each ``frame_ascii``)."""
	food_cells: list[tuple[int, int]] = Field(default_factory=list)
	"""Static 1x1 food cells of the episode world (same rationale as wall_rects)."""


	def _safe(name: str) -> str:
	"""Make a model string safe as a single path segment."""
	return re.sub(r"[^A-Za-z0-9._-]", "_", name) or "model"


	def save_checkpoint(ckpt: MemoryCheckpoint, root: str \| Path = "runs/memory") -> Path:
	"""Write ckpt to ``<root>/<safe(model)>/<created_at>.json`` and return the path.

	Creates parent directories. The filename is the (already filesystem-safe)
	``created_at`` stamp so on-disk ordering matches creation order.
	"""
	root = Path(root)
	directory = root / _safe(ckpt.model)
	directory.mkdir(parents=True, exist_ok=True)
	path = directory / f"{_safe(ckpt.created_at)}.json"
	path.write_text(ckpt.model_dump_json(), encoding="utf-8")
	return path


	def load_checkpoint(path: str \| Path) -> MemoryCheckpoint:
	"""Read one MemoryCheckpoint from path (raises FileNotFoundError on miss)."""
	text = Path(path).read_text(encoding="utf-8")
	return MemoryCheckpoint.model_validate_json(text)


	def latest_for_model(
	model: str, root: str \| Path = "runs/memory"
	) -> MemoryCheckpoint \| None:
	"""Return the newest checkpoint for model, or None if none exist.

	"Newest" = the lexicographically greatest filename stamp (the stamps are
	zero-padded ISO-ish, so lexical order == chronological order).
	"""
	directory = Path(root) / _safe(model)
	if not directory.is_dir():
	return None
	files = sorted(directory.glob("*.json"))
	if not files:
	return None
	return load_checkpoint(files[-1])


	def render_memory_block(checkpoint: MemoryCheckpoint) -> str:
	"""Render the memory episode as a labelled observation block.

	Pure: depends only on the checkpoint (no engine). Prepended to the turn-1
	observation by SessionRunner so the handover history reads as the model's
	own prior play.
	"""
	parts = ["MEMORY — your earlier play on this scenario:"]
	for mt in checkpoint.memory_turns:
	parts.append(f"Memory {mt.turn_idx}:")
	parts.append(mt.frame_ascii)
	parts.append(f" you chose: {mt.action}")
	return "\n".join(parts)


	def _invert_legend(legend: dict[int, str]) -> dict[str, int]:
	return {sym: idx for idx, sym in legend.items()}


	def _ascii_to_grid(frame: str, sym2idx: dict[str, int]) -> list[list[int]] \| None:
	"""Parse frame as a rectangular grid of legend symbols, else None."""
	lines = frame.split("\n")
	if len(lines) < 2 or len({len(ln) for ln in lines}) != 1 or not lines[0]:
	return None
	grid: list[list[int]] = []
	for ln in lines:
	row: list[int] = []
	for ch in ln:
	if ch not in sym2idx:
	return None
	row.append(sym2idx[ch])
	grid.append(row)
	return grid


	# Legacy single-agent block sizes — mirror the resized template geometry
	# (3x3 predator, 2x2 focal). memory.py stays scenarios-import-free, so these
	# are kept in sync by hand; the multi-agent path uses per-AgentFrame `size`.
	_LEGACY_PREDATOR_BLOCK = 3
	_LEGACY_FOCAL_BLOCK = 2

	# Transparent (mouth) cells of the 3x3 ㄷ predator, per facing, as (col, row).
	# The mouth = the centre cell + the edge-centre cell on the facing side.
	_PRED_MOUTH: dict[str, set[tuple[int, int]]] = {
	"right": {(1, 1), (2, 1)},
	"left": {(1, 1), (0, 1)},
	"down": {(1, 1), (1, 2)},
	"up": {(1, 1), (1, 0)},
	}


	def _predator_solid_offsets(facing: str) -> list[tuple[int, int]]:
	"""The (col, row) offsets PAINTED for a 3x3 ㄷ predator facing facing."""
	mouth = _PRED_MOUTH.get(facing, _PRED_MOUTH["right"])
	return [(c, r) for r in range(3) for c in range(3) if (c, r) not in mouth]


	# NPC agent kinds -> palette colour (errand_runner fallen-pedestrian rescue).
	_NPC_COLOR: dict[str, int] = {"npc_down": 11, "npc_active": 14} # yellow -> green


	def memory_frames(
	checkpoint: MemoryCheckpoint, *, legend: dict[int, str], grid_size: tuple[int, int]
	) -> list[dict]:
	"""Reconstruct a renderable color grid per memory turn.

	Returns ``[{turn_idx, action, grid, events}]`` where ``grid`` is a
	``height x width`` list of palette indices and ``events`` is the per-turn
	narration list (empty ``[]`` for legacy single-agent frames).
	Grid-style ``frame_ascii`` is inverted via legend;
	prose frames are rebuilt from walls + recorded focal/predator positions.
	Pure: no engine, no IO.
	"""
	sym2idx = _invert_legend(legend)
	bg = sym2idx.get(".", 5)
	focal_idx = sym2idx.get("A", 1)
	# The chosen agent paints the legend's focal sprite. Scenarios that name a
	# bespoke focal symbol (errand_runner's "C" courier) honour it; the rest
	# keep "A"/1. Distractor (non-chosen) agents stay on `distractor_idx`.
	chosen_idx = sym2idx.get("C", focal_idx)
	predator_idx = sym2idx.get("B", 2)
	wall_idx = sym2idx.get("#", 3)
	food_idx = sym2idx.get("F", 14)
	w, h = grid_size
	out: list[dict] = []

	def paint(grid, x0, y0, n, idx):
	for j in range(n):
	for i in range(n):
	x, y = x0 + i, y0 + j
	if 0 <= x < w and 0 <= y < h:
	grid[y][x] = idx

	distractor_idx = 9 # COLOR_MAP blue
	# NB: errand's "C"=9 coincides with distractor_idx (9). Fine while errand is
	# single-agent (every agent is is_chosen, so the distractor branch is unused);
	# a future multi-agent errand would need distractor_idx to diverge or the focal
	# and distractor couriers would be indistinguishable.
	for mt in checkpoint.memory_turns:
	if mt.agents:
	grid = [[bg] * w for _ in range(h)]
	for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
	for y in range(max(0, ry0), min(h, ry1 + 1)):
	for x in range(max(0, rx0), min(w, rx1 + 1)):
	grid[y][x] = wall_idx
	for (cx, cy, cidx) in mt.cells:
	if 0 <= cx < w and 0 <= cy < h:
	grid[cy][cx] = cidx
	for (fx, fy) in mt.resources:
	if 0 <= fx < w and 0 <= fy < h:
	grid[fy][fx] = food_idx
	for ag in mt.agents:
	if not ag.alive:
	continue
	if ag.kind == "predator":
	for (c, r) in _predator_solid_offsets(ag.facing):
	x, y = ag.pos[0] + c, ag.pos[1] + r
	if 0 <= x < w and 0 <= y < h:
	grid[y][x] = predator_idx
	elif ag.kind in _NPC_COLOR:
	paint(grid, ag.pos[0], ag.pos[1], ag.size, _NPC_COLOR[ag.kind])
	else:
	color = chosen_idx if ag.is_chosen else distractor_idx
	for r in range(ag.size):
	for c in range(ag.size):
	x, y = ag.pos[0] + c, ag.pos[1] + r
	if 0 <= x < w and 0 <= y < h:
	grid[y][x] = color
	out.append({"turn_idx": mt.turn_idx, "action": mt.action,
	"grid": grid, "events": list(mt.events)})
	continue
	grid = _ascii_to_grid(mt.frame_ascii, sym2idx)
	if grid is None:
	grid = [[bg] * w for _ in range(h)]
	for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
	for y in range(max(0, ry0), min(h, ry1 + 1)):
	for x in range(max(0, rx0), min(w, rx1 + 1)):
	grid[y][x] = wall_idx
	for (fx, fy) in checkpoint.food_cells:
	if 0 <= fx < w and 0 <= fy < h:
	grid[fy][fx] = food_idx
	paint(grid, mt.predator_pos[0], mt.predator_pos[1], _LEGACY_PREDATOR_BLOCK, predator_idx)
	paint(grid, mt.focal_pos[0], mt.focal_pos[1], _LEGACY_FOCAL_BLOCK, focal_idx)
	out.append({"turn_idx": mt.turn_idx, "action": mt.action,
	"grid": grid, "events": list(mt.events)})
	return out