"""Tiny CPU fighter model for real-time NPC move selection.

Architecture: ~142k parameter MLP with LayerNorm (behaves correctly at
batch=1 inference, unlike BatchNorm1d which has degenerate running variance
when there's only a single sample). Fast enough for real-time combat
(< 1ms on CPU) while having enough capacity to learn strategy-conditioned
move selection.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import List, Optional

MOVES = [
    "jab", "cross", "hook", "kick", "uppercut",
    "block", "parry", "dodge",
    "advance", "retreat",
    "grapple", "throw",
    "sweep", "feint", "wait",
]
NUM_MOVES = len(MOVES)
MOVE_TO_IDX = {m: i for i, m in enumerate(MOVES)}

ATTACKS = {"jab", "cross", "hook", "kick", "uppercut", "sweep"}
DEFENSES = {"block", "parry", "dodge"}
MOVEMENT = {"advance", "retreat"}
GRAPPLES = {"grapple", "throw"}
UTILITY = {"feint", "wait"}

INPUT_DIM = 168
HIDDEN1 = 256
HIDDEN2 = 128


class TinyFighter(nn.Module):
    """Real-time NPC move policy. CPU-friendly, strategy-conditioned."""

    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(INPUT_DIM, HIDDEN1),
            nn.LayerNorm(HIDDEN1),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(HIDDEN1, HIDDEN2),
            nn.LayerNorm(HIDDEN2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(HIDDEN2, NUM_MOVES),
        )
        for m in self.net:
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
                nn.init.zeros_(m.bias)

    def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
        if x.dim() == 1:
            x = x.unsqueeze(0)
        logits = self.net(x)
        if mask is not None:
            if mask.dim() == 1:
                mask = mask.unsqueeze(0)
            logits = logits.masked_fill(mask == 0, -1e9)
        return logits

    @torch.inference_mode()
    def predict(self, feats: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
        """Single-sample inference helper.

        Cheaper than a manual `with torch.no_grad(): forward(...)` because
        inference_mode disables more bookkeeping. Callers that batch many
        samples should still use forward() under their own no_grad context,
        but for the real-time path (batch=1, one move per request) this is
        the fast path.
        """
        return self.forward(feats, mask)


def remap_bn_state_to_ln(state_dict: dict) -> dict:
    """Drop BatchNorm1d running stats from a state dict so it can load into
    the LayerNorm-based TinyFighter architecture.

    The Linear weights load unchanged. BatchNorm buffers (running_mean,
    running_var, num_batches_tracked) and the BN affine (weight, bias) are
    discarded -- the LayerNorm modules start with their PyTorch defaults
    (weight=1, bias=0), so the model still produces a well-defined output
    even if the policy will need a few rounds of additional training to
    re-converge to its previous quality.
    """
    drop_suffixes = ("running_mean", "running_var", "num_batches_tracked")
    out = {}
    for k, v in state_dict.items():
        if k.endswith(drop_suffixes):
            continue
        if k.endswith(".weight") and ".net." in k and any(
            f".net.{i}." in k for i in (1, 5)
        ):
            idx = int(k.split(".net.")[1].split(".")[0])
            if idx in (1, 5):
                continue
        if k.endswith(".bias") and ".net." in k and any(
            f".net.{i}." in k for i in (1, 5)
        ):
            idx = int(k.split(".net.")[1].split(".")[0])
            if idx in (1, 5):
                continue
        out[k] = v
    return out


def state_to_features(
    last_npc_moves: List[str],
    last_player_moves: List[str],
    player_hp: float,
    npc_hp: float,
    player_stamina: float,
    npc_stamina: float,
    distance: str,
    aggression: float,
    defense: float,
    parry_affinity: float,
    kick_affinity: float,
    grapple_affinity: float,
    round_num: int = 1,
    history_len: int = 5,
) -> torch.Tensor:
    """Convert game state to a 168-dim feature tensor."""
    features = []

    for i in range(history_len):
        idx = MOVE_TO_IDX.get(
            last_npc_moves[-(i + 1)] if len(last_npc_moves) > i else "wait", NUM_MOVES - 1
        )
        oh = [0.0] * NUM_MOVES
        oh[idx] = 1.0
        features.extend(oh)

    for i in range(history_len):
        idx = MOVE_TO_IDX.get(
            last_player_moves[-(i + 1)] if len(last_player_moves) > i else "wait", NUM_MOVES - 1
        )
        oh = [0.0] * NUM_MOVES
        oh[idx] = 1.0
        features.extend(oh)

    features.append((npc_hp - player_hp) / 100.0)
    features.append((npc_stamina - player_stamina) / 100.0)
    dist_oh = [0.0, 0.0, 0.0]
    dist_oh[["near", "mid", "far"].index(distance) if distance in ["near", "mid", "far"] else 1] = 1.0
    features.extend(dist_oh)

    features.append(aggression)
    features.append(defense)
    features.append(parry_affinity)
    features.append(kick_affinity)
    features.append(grapple_affinity)

    features.append(min(round_num, 10) / 10.0)
    features.append(player_hp / 100.0)
    features.append(npc_hp / 100.0)
    features.append(player_stamina / 100.0)
    features.append(npc_stamina / 100.0)

    while len(features) < INPUT_DIM:
        features.append(0.0)

    return torch.tensor(features, dtype=torch.float32)


def make_move_mask(distance: str) -> torch.Tensor:
    """Create a mask for moves that are valid at the given distance."""
    mask = [1.0] * NUM_MOVES
    if distance == "far":
        mask[MOVE_TO_IDX["grapple"]] = 0.0
        mask[MOVE_TO_IDX["throw"]] = 0.0
        mask[MOVE_TO_IDX["sweep"]] = 0.0
    elif distance == "near":
        mask[MOVE_TO_IDX["advance"]] = 0.0
    return torch.tensor(mask, dtype=torch.float32)


if __name__ == "__main__":
    import time

    model = TinyFighter()
    total = sum(p.numel() for p in model.parameters())
    print(f"Total params: {total:,}")

    model.eval()
    features = state_to_features(
        last_npc_moves=["jab", "block", "kick"],
        last_player_moves=["cross", "retreat", "jab"],
        player_hp=80.0, npc_hp=50.0,
        player_stamina=60.0, npc_stamina=40.0,
        distance="mid",
        aggression=0.7, defense=0.3,
        parry_affinity=0.4, kick_affinity=0.6,
        grapple_affinity=0.2, round_num=3,
    )
    mask = make_move_mask("mid")

    # Warmup so the first timed call isn't paying one-off dispatch cost.
    model.predict(features, mask)
    model.predict(features, mask)

    with torch.inference_mode():
        start = time.perf_counter()
        for _ in range(1000):
            logits = model.predict(features, mask)
        elapsed = (time.perf_counter() - start) / 1000 * 1000
        probs = F.softmax(logits, dim=-1)
        move_idx = probs.argmax().item()

    print(f"Inference: {elapsed:.3f}ms per call")
    print(f"Suggested move: {MOVES[move_idx]} (prob={probs[0][move_idx]:.3f})")