"""Pairwise adversarial evaluation + Elo.

The project goal includes evaluating models "on pairwise adversarial
conditions", not only fixed-scenario absolute scores. True co-located
1v1 (both sides model-driven in one map) is the deeper engine feature
(task #3); this module delivers the tractable, immediately-useful form:
**comparative** pairwise ranking — every model plays the *same*
controlled scenarios, and models are ranked by head-to-head composite
on shared cells via Elo. Pure + deterministic (no engine in
`pairwise_elo`), so it is fully unit-testable; `run_pairwise` wires it
to the live evaluator.
"""

from __future__ import annotations

import itertools
from pathlib import Path
from typing import Callable

from .run_eval import AgentFactory, evaluate

# Per model: {cell -> mean composite over that cell's seeds}.
ModelCellScores = dict[str, dict[str, float]]

ELO_K = 32.0
ELO_START = 1000.0
_TIE_EPS = 1e-6


def _cell_means(stats: dict) -> dict[str, float]:
    """Mean composite per cell from a run_eval stats dict (public split)."""
    by: dict[str, list[float]] = {}
    for e in stats.get("episodes", []):
        if e.get("split", "public") != "public":
            continue
        by.setdefault(e["cell"], []).append(e["composite"])
    return {c: sum(v) / len(v) for c, v in by.items() if v}


def pairwise_elo(scores: ModelCellScores) -> dict:
    """Deterministic Elo from comparative per-cell composites.

    For each unordered model pair, the match score = fraction of shared
    cells where A strictly beats B (ties = 0.5). One Elo update per
    pair; pairs processed in sorted order so the result is fully
    deterministic and order-independent given the input.
    """
    models = sorted(scores)
    elo = {m: ELO_START for m in models}
    matrix: dict[str, dict[str, float]] = {m: {} for m in models}

    for a, b in itertools.combinations(models, 2):
        shared = sorted(set(scores[a]) & set(scores[b]))
        if not shared:
            continue
        wins = 0.0
        for c in shared:
            da = scores[a][c] - scores[b][c]
            if da > _TIE_EPS:
                wins += 1.0
            elif abs(da) <= _TIE_EPS:
                wins += 0.5
        sa = wins / len(shared)  # A's match score in [0,1]
        matrix[a][b] = round(sa, 4)
        matrix[b][a] = round(1.0 - sa, 4)
        ea = 1.0 / (1.0 + 10.0 ** ((elo[b] - elo[a]) / 400.0))
        elo[a] += ELO_K * (sa - ea)
        elo[b] += ELO_K * ((1.0 - sa) - (1.0 - ea))

    ranked = sorted(models, key=lambda m: (-elo[m], m))
    return {
        "elo": {m: round(elo[m], 1) for m in models},
        "rank": {m: i + 1 for i, m in enumerate(ranked)},
        "matrix": matrix,  # matrix[a][b] = A's match score vs B
        "shared_cells": {
            f"{a}|{b}": len(set(scores[a]) & set(scores[b]))
            for a, b in itertools.combinations(models, 2)
        },
    }


def run_pairwise(
    packs: list[Path],
    levels: list[str],
    seeds: list[int],
    agents: dict[str, AgentFactory],
) -> dict:
    """Run each named model over the same packs/levels/seeds and return
    the pairwise Elo ranking + per-model cell scores."""
    if len(agents) < 2:
        raise ValueError("pairwise eval needs >= 2 models")
    scores: ModelCellScores = {}
    per_model_stats: dict[str, dict] = {}
    for name, factory in agents.items():
        st = evaluate(packs, levels, seeds, agent_factory=factory)
        per_model_stats[name] = st
        scores[name] = _cell_means(st)
    return {
        "pairwise": pairwise_elo(scores),
        "cell_scores": scores,
        "per_model_overall": {
            n: s.get("overall", {}) for n, s in per_model_stats.items()
        },
    }