from __future__ import annotations

import argparse
import json
import sys
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

import polars as pl

from dota2tuned.recommend import DraftRecommender
from dota2tuned.schemas import DraftInput

ROLES = [None, "carry", "mid", "offlane", "soft support", "hard support"]


def _confidence(sample_size: int, threshold: int) -> str:
    if sample_size >= threshold:
        return "high"
    if sample_size >= 100:
        return "medium"
    return "low"


def _read_parquet(path: Path) -> pl.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"missing parquet file: {path}")
    return pl.read_parquet(path)


def _hero_samples(parquet_dir: Path, threshold: int) -> tuple[dict[str, Any], pl.DataFrame]:
    heroes = _read_parquet(parquet_dir / "dim_hero.parquet")
    players = _read_parquet(parquet_dir / "fact_player_match.parquet")
    if "pro_pick" not in heroes.columns:
        heroes = heroes.with_columns(pl.lit(0).alias("pro_pick"))
    counts = players.group_by("hero_id").agg(pl.len().cast(pl.Int64).alias("player_games"))
    base = (
        heroes.select(["hero_id", "hero_name", "pro_pick"])
        .join(counts, on="hero_id", how="left")
        .with_columns(pl.col("player_games").fill_null(0))
        .with_columns(pl.max_horizontal("player_games", "pro_pick").alias("sample_size"))
        .with_columns(
            pl.col("sample_size")
            .map_elements(lambda value: _confidence(int(value), threshold), return_dtype=pl.String)
            .alias("confidence")
        )
    )
    summary = base.select(
        [
            pl.len().alias("heroes"),
            (pl.col("sample_size") >= threshold).sum().alias("high"),
            ((pl.col("sample_size") >= 100) & (pl.col("sample_size") < threshold))
            .sum()
            .alias("medium"),
            (pl.col("sample_size") < 100).sum().alias("low"),
            pl.col("sample_size").min().alias("min"),
            pl.col("sample_size").quantile(0.25).alias("p25"),
            pl.col("sample_size").median().alias("median"),
            pl.col("sample_size").quantile(0.75).alias("p75"),
            pl.col("sample_size").max().alias("max"),
        ]
    ).to_dicts()[0]
    summary["threshold"] = threshold
    summary["max_confidence"] = summary["low"] == 0 and summary["medium"] == 0
    return summary, base


def _recommendation_surfaces(parquet_dir: Path, threshold: int) -> list[dict[str, Any]]:
    recommender = DraftRecommender(parquet_dir)
    surfaces: list[tuple[str, DraftInput]] = [
        (role or "any", DraftInput(role=role)) for role in ROLES
    ]
    surfaces.append(
        (
            "demo_pa_wd_mid",
            DraftInput(enemy_heroes=[44, 30], role="mid", scope="pro", patch="current"),
        )
    )
    rows = []
    for label, draft in surfaces:
        recs = recommender.recommend(draft, limit=8)
        low = [rec for rec in recs if rec.sample_size < threshold]
        rows.append(
            {
                "surface": label,
                "all_high": len(low) == 0 and bool(recs),
                "min_sample": min((rec.sample_size for rec in recs), default=0),
                "recommendations": [
                    {
                        "hero": rec.hero_name,
                        "sample_size": rec.sample_size,
                        "confidence": rec.confidence,
                    }
                    for rec in recs
                ],
            }
        )
    return rows


def build_payload(parquet_dir: Path, threshold: int, lowest: int) -> dict[str, Any]:
    summary, base = _hero_samples(parquet_dir, threshold)
    low_heroes = (
        base.filter(pl.col("sample_size") < threshold)
        .sort("sample_size")
        .select(["hero_id", "hero_name", "player_games", "pro_pick", "sample_size", "confidence"])
        .head(lowest)
        .to_dicts()
    )
    surfaces = _recommendation_surfaces(parquet_dir, threshold)
    return {
        "generated_at": datetime.now(UTC).isoformat(),
        "parquet_dir": str(parquet_dir),
        "hero_samples": summary,
        "lowest_sample_heroes": low_heroes,
        "recommendation_surfaces": surfaces,
    }


def print_report(payload: dict[str, Any]) -> None:
    hero = payload["hero_samples"]
    print(
        "hero confidence: "
        f"high={hero['high']} medium={hero['medium']} low={hero['low']} "
        f"min={hero['min']} median={hero['median']} max={hero['max']} "
        f"threshold={hero['threshold']}"
    )
    print(f"max_confidence={hero['max_confidence']}")
    print("lowest sample heroes:")
    for row in payload["lowest_sample_heroes"]:
        print(
            f"- {row['hero_name']}: sample={row['sample_size']} "
            f"players={row['player_games']} pro_pick={row['pro_pick']} "
            f"confidence={row['confidence']}"
        )
    print("recommendation surfaces:")
    for surface in payload["recommendation_surfaces"]:
        print(
            f"- {surface['surface']}: all_high={surface['all_high']} "
            f"min_sample={surface['min_sample']} "
            f"top={[item['hero'] for item in surface['recommendations'][:3]]}"
        )


def main() -> None:
    parser = argparse.ArgumentParser(description="Audit DOTA2Tuned recommendation confidence.")
    parser.add_argument("--parquet-dir", type=Path, default=Path("data/parquet"))
    parser.add_argument("--threshold", type=int, default=500)
    parser.add_argument("--lowest", type=int, default=25)
    parser.add_argument("--output", type=Path)
    parser.add_argument(
        "--fail-under-max",
        action="store_true",
        help="Exit 1 unless every hero sample is at or above the threshold.",
    )
    args = parser.parse_args()

    payload = build_payload(args.parquet_dir, args.threshold, args.lowest)
    if args.output:
        args.output.parent.mkdir(parents=True, exist_ok=True)
        args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
    print_report(payload)
    if args.fail_under_max and not payload["hero_samples"]["max_confidence"]:
        sys.exit(1)


if __name__ == "__main__":
    main()