Spaces:

ar07xd
/

deepshield

Running

File size: 3,790 Bytes

711bdfc

#!/usr/bin/env python3
"""Phase C4 — Grid-search temperature scalars for AI-image detector heads.

Reads MANIFEST.csv, sweeps GENERAL_MODEL_TEMPERATURE and
DIFFUSION_MODEL_TEMPERATURE over a grid, and reports the combination that
maximises F1 on the eval set. Write the winning values to .env or config.py.

Usage (from backend/):
    .venv/Scripts/python.exe scripts/calibrate_temperatures.py
    .venv/Scripts/python.exe scripts/calibrate_temperatures.py --steps 10

Requires eval images to be present in tests/eval/images/.
"""
from __future__ import annotations

import argparse
import csv
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

MANIFEST = Path(__file__).resolve().parent.parent / "tests" / "eval" / "MANIFEST.csv"
IMAGES_ROOT = Path(__file__).resolve().parent.parent / "tests" / "eval" / "images"


def _load_manifest():
    rows = []
    with open(MANIFEST, newline="", encoding="utf-8") as f:
        for row in csv.DictReader(f):
            p = IMAGES_ROOT / Path(row["path"]).name
            if p.exists():
                rows.append({"path": p, "label": row["label"], "family": row["family"]})
    return rows


def _score_all(rows, t_gen: float, t_diff: float) -> list[dict]:
    import os
    os.environ["GENERAL_MODEL_TEMPERATURE"] = str(t_gen)
    os.environ["DIFFUSION_MODEL_TEMPERATURE"] = str(t_diff)

    # Force reload of settings (they're read at import time via pydantic-settings)
    import importlib
    import config as cfg_mod
    importlib.reload(cfg_mod)

    from services import general_image_service as gis
    importlib.reload(gis)
    from services import image_service as ims
    importlib.reload(ims)

    from PIL import Image

    results = []
    for row in rows:
        try:
            pil = Image.open(row["path"]).convert("RGB")
            clf = ims.classify_image(pil)
            results.append({
                "label": row["label"],
                "fake_prob": clf.confidence,
                "predicted_fake": clf.confidence >= 0.5,
                "actual_fake": row["label"] == "fake",
            })
        except Exception:
            pass
    return results


def _f1(results) -> float:
    tp = sum(1 for r in results if r["predicted_fake"] and r["actual_fake"])
    fp = sum(1 for r in results if r["predicted_fake"] and not r["actual_fake"])
    fn = sum(1 for r in results if not r["predicted_fake"] and r["actual_fake"])
    denom = 2 * tp + fp + fn
    return (2 * tp / denom) if denom > 0 else 0.0


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--steps", type=int, default=6,
                        help="Grid steps per axis (default: 6 → 36 combinations)")
    args = parser.parse_args()

    rows = _load_manifest()
    if not rows:
        print(f"[WARN] No eval images found in {IMAGES_ROOT}. Populate first.")
        return

    print(f"Calibrating on {len(rows)} images with {args.steps}x{args.steps} grid…\n")

    import numpy as np
    temps = list(np.linspace(0.5, 2.0, args.steps))

    best_f1, best_tg, best_td = 0.0, 1.0, 1.0
    print(f"  {'t_gen':>6}  {'t_diff':>6}  {'F1':>6}")
    print("  " + "-" * 24)
    for tg in temps:
        for td in temps:
            results = _score_all(rows, tg, td)
            f1 = _f1(results)
            if f1 > best_f1:
                best_f1, best_tg, best_td = f1, tg, td
            print(f"  {tg:6.2f}  {td:6.2f}  {f1:6.3f}")

    print(f"\nBest: GENERAL_MODEL_TEMPERATURE={best_tg:.2f}  "
          f"DIFFUSION_MODEL_TEMPERATURE={best_td:.2f}  F1={best_f1:.3f}")
    print("\nAdd these to backend/.env:\n"
          f"  GENERAL_MODEL_TEMPERATURE={best_tg:.2f}\n"
          f"  DIFFUSION_MODEL_TEMPERATURE={best_td:.2f}")


if __name__ == "__main__":
    main()