"""src/training/manifests.py — Build and validate manifest CSV files."""
from __future__ import annotations

import csv
import random
from pathlib import Path
from typing import List, Tuple

IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp"}


def build_manifest(
    image_dirs: List[Tuple[Path, int, int]],  # (dir, label, generator_idx)
    output_path: Path,
    seed: int = 42,
    train_ratio: float = 0.80,
    val_ratio:   float = 0.10,
) -> dict:
    """
    Walk image directories, build split manifests.

    Returns dict with train/val/test paths.
    """
    rng     = random.Random(seed)
    records = []

    for img_dir, label, generator in image_dirs:
        for p in sorted(Path(img_dir).rglob("*")):
            if p.suffix.lower() in IMAGE_EXTS:
                records.append({
                    "filepath":  str(p),
                    "label":     label,
                    "generator": generator,
                })

    rng.shuffle(records)
    n       = len(records)
    n_train = int(n * train_ratio)
    n_val   = int(n * val_ratio)

    splits = {
        "train": records[:n_train],
        "val":   records[n_train:n_train + n_val],
        "test":  records[n_train + n_val:],
    }

    output_path.parent.mkdir(parents=True, exist_ok=True)
    manifest_paths = {}

    for split, rows in splits.items():
        out = output_path.parent / f"{output_path.stem}_{split}.csv"
        with open(out, "w", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=["filepath", "label", "generator"])
            writer.writeheader()
            writer.writerows(rows)
        manifest_paths[split] = out

    return manifest_paths


def validate_manifest(manifest_path: Path) -> dict:
    """Check a manifest CSV is well-formed and all files exist."""
    missing = []
    counts  = {"total": 0, "real": 0, "fake": 0}

    with open(manifest_path) as f:
        for row in csv.DictReader(f):
            counts["total"] += 1
            if int(row["label"]) == 0:
                counts["real"] += 1
            else:
                counts["fake"] += 1
            if not Path(row["filepath"]).exists():
                missing.append(row["filepath"])

    return {"counts": counts, "missing": missing, "ok": len(missing) == 0}