"""Memory checkpoint models + persistence for the CP7 memory pre-roll.

A MemoryCheckpoint is one full-information self-play episode the playing model
ran *before* the scored game. It is persisted as a single-file JSON checkpoint
per (model, timestamp) and shown at the handover as the model's prior
experience. Like ``trace.py`` this module is a serialization boundary: it
imports only pydantic + stdlib and NO other runtime module.
"""

from __future__ import annotations

import re
from pathlib import Path

from pydantic import BaseModel, Field


class AgentFrame(BaseModel):
    """One sprite's render state in a multi-agent memory turn.

    Attributes:
        id: Stable identifier (``"a0".."a3"`` or ``"predator"``).
        kind: ``"agent"`` or ``"predator"`` (drives shape + colour).
        pos: Top-left anchor ``(x, y)`` at the start of this turn.
        size: Footprint side length (agent=2, predator=3).
        alive: Painted only while alive (eaten agents disappear).
        is_chosen: The agent the player continues (painted in the focal colour).
        facing: Predator mouth direction for the ㄷ shape (render only).
    """

    id: str
    kind: str
    pos: tuple[int, int]
    size: int
    alive: bool = True
    is_chosen: bool = False
    facing: str = "right"


class MemoryTurn(BaseModel):
    """One self-play turn of the memory episode.

    Attributes:
        turn_idx: 1-based index within the memory episode.
        frame_ascii: The pre-move grid the model saw this memory turn.
        action: The action the model committed.
        reasoning: The model's reasoning excerpt (may be truncated).
        focal_pos: Focal ``(x, y)`` BEFORE the move (JSON array on disk).
        predator_pos: Predator ``(x, y)`` BEFORE the move.
    """

    turn_idx: int
    frame_ascii: str
    action: str
    reasoning: str = ""
    focal_pos: tuple[int, int]
    predator_pos: tuple[int, int]
    agents: list[AgentFrame] = Field(default_factory=list)
    """Per-sprite render states; non-empty ⇒ the multi-agent render path."""
    resources: list[tuple[int, int]] = Field(default_factory=list)
    """Collectible resource cells still present this turn."""
    cells: list[tuple[int, int, int]] = Field(default_factory=list)
    """Generic coloured overlay cells ``(x, y, palette_idx)`` for this turn.

    Painted after walls and before agents in :func:`memory_frames`. Lets the
    errand_runner director draw per-tick world objects (the recolouring
    traffic-light bar, roads, construction, wallet, home) without a bespoke
    field per object type. Empty for all legacy single-/multi-agent turns."""
    events: list[str] = Field(default_factory=list)
    """Narration for this turn, e.g. ``"a1 eaten"`` / ``"a0 got resource"``."""


class MemoryCheckpoint(BaseModel):
    """A persisted full-info self-play episode used as handover memory.

    Attributes:
        model: Provider model identifier that produced the episode.
        scenario: Registered scenario name.
        motive_category: Category label (default ``"survival"``).
        difficulty: Difficulty band string.
        seed: Seed of the memory world (same as the scored game).
        created_at: ISO-ish stamp from an injectable clock (filesystem-safe).
        memory_turns: The episode's per-turn records, in play order.
        outcome: ``"survived"`` or ``"eliminated"``.
        transparent_prompt: The full-info brief used to drive the episode.
        persona_weight_id: Public id of the hidden persona that drove this
            demonstration (CP8), or ``None`` for a model self-play memory. Only
            the id is stored — the raw reward weights are never serialized into
            the participant-visible checkpoint.
    """

    model: str
    scenario: str
    motive_category: str = "survival"
    difficulty: str
    seed: int | None = None
    created_at: str
    memory_turns: list[MemoryTurn] = Field(default_factory=list)
    outcome: str
    transparent_prompt: str
    persona_weight_id: str | None = None
    chosen_agent_id: str | None = None
    """Id of the survivor / resource winner the player continues (multi-agent only)."""
    wall_rects: list[tuple[int, int, int, int]] = Field(default_factory=list)
    """Static wall rectangles (inclusive ``(x0,y0,x1,y1)``) of the episode world.

    Populated for scenarios whose observation is prose (no ASCII grid), so the
    web replay can paint walls it cannot recover from ``frame_ascii``. Empty for
    grid scenarios (their walls are already in each ``frame_ascii``)."""
    food_cells: list[tuple[int, int]] = Field(default_factory=list)
    """Static 1x1 food cells of the episode world (same rationale as wall_rects)."""


def _safe(name: str) -> str:
    """Make a model string safe as a single path segment."""
    return re.sub(r"[^A-Za-z0-9._-]", "_", name) or "model"


def save_checkpoint(ckpt: MemoryCheckpoint, root: str | Path = "runs/memory") -> Path:
    """Write *ckpt* to ``<root>/<safe(model)>/<created_at>.json`` and return the path.

    Creates parent directories. The filename is the (already filesystem-safe)
    ``created_at`` stamp so on-disk ordering matches creation order.
    """
    root = Path(root)
    directory = root / _safe(ckpt.model)
    directory.mkdir(parents=True, exist_ok=True)
    path = directory / f"{_safe(ckpt.created_at)}.json"
    path.write_text(ckpt.model_dump_json(), encoding="utf-8")
    return path


def load_checkpoint(path: str | Path) -> MemoryCheckpoint:
    """Read one MemoryCheckpoint from *path* (raises FileNotFoundError on miss)."""
    text = Path(path).read_text(encoding="utf-8")
    return MemoryCheckpoint.model_validate_json(text)


def latest_for_model(
    model: str, root: str | Path = "runs/memory"
) -> MemoryCheckpoint | None:
    """Return the newest checkpoint for *model*, or None if none exist.

    "Newest" = the lexicographically greatest filename stamp (the stamps are
    zero-padded ISO-ish, so lexical order == chronological order).
    """
    directory = Path(root) / _safe(model)
    if not directory.is_dir():
        return None
    files = sorted(directory.glob("*.json"))
    if not files:
        return None
    return load_checkpoint(files[-1])


def render_memory_block(checkpoint: MemoryCheckpoint) -> str:
    """Render the memory episode as a labelled observation block.

    Pure: depends only on the checkpoint (no engine). Prepended to the turn-1
    observation by SessionRunner so the handover history reads as the model's
    own prior play.
    """
    parts = ["MEMORY — your earlier play on this scenario:"]
    for mt in checkpoint.memory_turns:
        parts.append(f"Memory {mt.turn_idx}:")
        parts.append(mt.frame_ascii)
        parts.append(f"  you chose: {mt.action}")
    return "\n".join(parts)


def _invert_legend(legend: dict[int, str]) -> dict[str, int]:
    return {sym: idx for idx, sym in legend.items()}


def _ascii_to_grid(frame: str, sym2idx: dict[str, int]) -> list[list[int]] | None:
    """Parse *frame* as a rectangular grid of legend symbols, else None."""
    lines = frame.split("\n")
    if len(lines) < 2 or len({len(ln) for ln in lines}) != 1 or not lines[0]:
        return None
    grid: list[list[int]] = []
    for ln in lines:
        row: list[int] = []
        for ch in ln:
            if ch not in sym2idx:
                return None
            row.append(sym2idx[ch])
        grid.append(row)
    return grid


# Legacy single-agent block sizes — mirror the resized template geometry
# (3x3 predator, 2x2 focal). memory.py stays scenarios-import-free, so these
# are kept in sync by hand; the multi-agent path uses per-AgentFrame `size`.
_LEGACY_PREDATOR_BLOCK = 3
_LEGACY_FOCAL_BLOCK = 2

# Transparent (mouth) cells of the 3x3 ㄷ predator, per facing, as (col, row).
# The mouth = the centre cell + the edge-centre cell on the facing side.
_PRED_MOUTH: dict[str, set[tuple[int, int]]] = {
    "right": {(1, 1), (2, 1)},
    "left": {(1, 1), (0, 1)},
    "down": {(1, 1), (1, 2)},
    "up": {(1, 1), (1, 0)},
}


def _predator_solid_offsets(facing: str) -> list[tuple[int, int]]:
    """The (col, row) offsets PAINTED for a 3x3 ㄷ predator facing *facing*."""
    mouth = _PRED_MOUTH.get(facing, _PRED_MOUTH["right"])
    return [(c, r) for r in range(3) for c in range(3) if (c, r) not in mouth]


# NPC agent kinds -> palette colour (errand_runner fallen-pedestrian rescue).
_NPC_COLOR: dict[str, int] = {"npc_down": 11, "npc_active": 14}  # yellow -> green


def memory_frames(
    checkpoint: MemoryCheckpoint, *, legend: dict[int, str], grid_size: tuple[int, int]
) -> list[dict]:
    """Reconstruct a renderable color grid per memory turn.

    Returns ``[{turn_idx, action, grid, events}]`` where ``grid`` is a
    ``height x width`` list of palette indices and ``events`` is the per-turn
    narration list (empty ``[]`` for legacy single-agent frames).
    Grid-style ``frame_ascii`` is inverted via *legend*;
    prose frames are rebuilt from walls + recorded focal/predator positions.
    Pure: no engine, no IO.
    """
    sym2idx = _invert_legend(legend)
    bg = sym2idx.get(".", 5)
    focal_idx = sym2idx.get("A", 1)
    # The chosen agent paints the legend's focal sprite. Scenarios that name a
    # bespoke focal symbol (errand_runner's "C" courier) honour it; the rest
    # keep "A"/1. Distractor (non-chosen) agents stay on `distractor_idx`.
    chosen_idx = sym2idx.get("C", focal_idx)
    predator_idx = sym2idx.get("B", 2)
    wall_idx = sym2idx.get("#", 3)
    food_idx = sym2idx.get("F", 14)
    w, h = grid_size
    out: list[dict] = []

    def paint(grid, x0, y0, n, idx):
        for j in range(n):
            for i in range(n):
                x, y = x0 + i, y0 + j
                if 0 <= x < w and 0 <= y < h:
                    grid[y][x] = idx

    distractor_idx = 9  # COLOR_MAP blue
    # NB: errand's "C"=9 coincides with distractor_idx (9). Fine while errand is
    # single-agent (every agent is is_chosen, so the distractor branch is unused);
    # a future multi-agent errand would need distractor_idx to diverge or the focal
    # and distractor couriers would be indistinguishable.
    for mt in checkpoint.memory_turns:
        if mt.agents:
            grid = [[bg] * w for _ in range(h)]
            for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
                for y in range(max(0, ry0), min(h, ry1 + 1)):
                    for x in range(max(0, rx0), min(w, rx1 + 1)):
                        grid[y][x] = wall_idx
            for (cx, cy, cidx) in mt.cells:
                if 0 <= cx < w and 0 <= cy < h:
                    grid[cy][cx] = cidx
            for (fx, fy) in mt.resources:
                if 0 <= fx < w and 0 <= fy < h:
                    grid[fy][fx] = food_idx
            for ag in mt.agents:
                if not ag.alive:
                    continue
                if ag.kind == "predator":
                    for (c, r) in _predator_solid_offsets(ag.facing):
                        x, y = ag.pos[0] + c, ag.pos[1] + r
                        if 0 <= x < w and 0 <= y < h:
                            grid[y][x] = predator_idx
                elif ag.kind in _NPC_COLOR:
                    paint(grid, ag.pos[0], ag.pos[1], ag.size, _NPC_COLOR[ag.kind])
                else:
                    color = chosen_idx if ag.is_chosen else distractor_idx
                    for r in range(ag.size):
                        for c in range(ag.size):
                            x, y = ag.pos[0] + c, ag.pos[1] + r
                            if 0 <= x < w and 0 <= y < h:
                                grid[y][x] = color
            out.append({"turn_idx": mt.turn_idx, "action": mt.action,
                        "grid": grid, "events": list(mt.events)})
            continue
        grid = _ascii_to_grid(mt.frame_ascii, sym2idx)
        if grid is None:
            grid = [[bg] * w for _ in range(h)]
            for (rx0, ry0, rx1, ry1) in checkpoint.wall_rects:
                for y in range(max(0, ry0), min(h, ry1 + 1)):
                    for x in range(max(0, rx0), min(w, rx1 + 1)):
                        grid[y][x] = wall_idx
            for (fx, fy) in checkpoint.food_cells:
                if 0 <= fx < w and 0 <= fy < h:
                    grid[fy][fx] = food_idx
            paint(grid, mt.predator_pos[0], mt.predator_pos[1], _LEGACY_PREDATOR_BLOCK, predator_idx)
            paint(grid, mt.focal_pos[0], mt.focal_pos[1], _LEGACY_FOCAL_BLOCK, focal_idx)
        out.append({"turn_idx": mt.turn_idx, "action": mt.action,
                    "grid": grid, "events": list(mt.events)})
    return out