"""Generate dashboard plots locally from the committed trace JSONs.

Use this when ``trainer_state.json`` is not available locally (it lives only on
the Colab Drive). The 24 trace files in ``data/traces/`` are enough to produce
``bypass_bars.png`` and ``per_category.png`` honestly.

Usage:
    python scripts/plots_from_traces.py --traces data/traces --out docs/plots
"""

from __future__ import annotations

import argparse
import json
from collections import defaultdict
from pathlib import Path
from typing import Dict, List


# Same baseline numbers used by scripts/make_plots.py (handcrafted-corpus side)
BASELINES = {
    "PG2 Bypass":      0.15,
    "FW Bypass":       0.20,
    "Task Success":    0.05,
    "Composed Bypass": 0.02,
}


def _aggregate(traces_dir: Path) -> Dict:
    files = sorted(traces_dir.glob("*.json"))
    n = len(files)
    if n == 0:
        raise SystemExit(f"No traces in {traces_dir}")

    pg2 = fw = task = composed = 0
    by_type: Dict[str, Dict[str, int]] = defaultdict(lambda: {"n": 0, "task": 0, "composed": 0, "pg2": 0, "fw": 0})

    for f in files:
        with f.open() as fh:
            t = json.load(fh)
        o = t.get("outcome", {})
        atype = t.get("attack_type", "?")

        by_type[atype]["n"] += 1
        if o.get("broke_pg2"):     pg2 += 1; by_type[atype]["pg2"] += 1
        if o.get("broke_fw"):      fw += 1;  by_type[atype]["fw"] += 1
        if o.get("task_succeeded"): task += 1; by_type[atype]["task"] += 1
        if o.get("composed_bypass"): composed += 1; by_type[atype]["composed"] += 1

    return {
        "n": n,
        "pg2_rate": pg2 / n,
        "fw_rate":  fw / n,
        "task_rate": task / n,
        "composed_rate": composed / n,
        "by_type": dict(by_type),
    }


def _plot_bypass_bars(stats: Dict, out: Path) -> None:
    import matplotlib.pyplot as plt
    import numpy as np

    metrics = {
        "PG2 Bypass":      stats["pg2_rate"],
        "FW Bypass":       stats["fw_rate"],
        "Task Success":    stats["task_rate"],
        "Composed Bypass": stats["composed_rate"],
    }
    x = np.arange(len(metrics))
    w = 0.35

    fig, ax = plt.subplots(figsize=(9, 5))
    b1 = ax.bar(x - w / 2, [BASELINES[k] for k in metrics], w,
                label="Handcrafted Baseline", color="#94a3b8", edgecolor="white")
    b2 = ax.bar(x + w / 2, [metrics[k] for k in metrics], w,
                label="InjectArena (RL-trained)", color="#3b82f6", edgecolor="white")

    ax.set_ylabel("Rate")
    ax.set_title(f"InjectArena — Attacker Performance vs Baseline (n={stats['n']} traces)")
    ax.set_xticks(x)
    ax.set_xticklabels(list(metrics.keys()))
    ax.set_ylim(0, 1.05)
    ax.legend()
    ax.grid(axis="y", alpha=0.3)

    for bar in list(b1) + list(b2):
        h = bar.get_height()
        if h > 0.005:
            ax.text(bar.get_x() + bar.get_width() / 2, h + 0.015,
                    f"{h:.0%}", ha="center", va="bottom", fontsize=9)

    out_path = out / "bypass_bars.png"
    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Saved {out_path}")


def _plot_per_category(stats: Dict, out: Path) -> None:
    import matplotlib.pyplot as plt
    import numpy as np

    by_type = stats["by_type"]
    types = sorted(by_type.keys())
    pg2_rates = [by_type[t]["pg2"] / by_type[t]["n"] for t in types]
    fw_rates  = [by_type[t]["fw"] / by_type[t]["n"] for t in types]
    task_rates = [by_type[t]["task"] / by_type[t]["n"] for t in types]

    x = np.arange(len(types))
    w = 0.27

    fig, ax = plt.subplots(figsize=(10, 5))
    ax.bar(x - w, pg2_rates, w, label="PG2 Bypass",  color="#3b82f6", edgecolor="white")
    ax.bar(x,     fw_rates,  w, label="FW Bypass",   color="#1d4ed8", edgecolor="white")
    ax.bar(x + w, task_rates, w, label="Task Success", color="#22c55e", edgecolor="white")

    ax.set_ylabel("Rate")
    ax.set_title("InjectArena — Per Attack Category (across all step counts)")
    ax.set_xticks(x)
    ax.set_xticklabels([t.replace("_", " ").title() for t in types])
    ax.set_ylim(0, 1.05)
    ax.legend()
    ax.grid(axis="y", alpha=0.3)

    out_path = out / "per_category.png"
    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Saved {out_path}")


def _plot_step_curve(traces_dir: Path, out: Path) -> None:
    """Bypass rate vs training-step label — the 'progression' visual."""
    import matplotlib.pyplot as plt
    from collections import defaultdict

    by_step: Dict[int, Dict[str, int]] = defaultdict(lambda: {"n": 0, "pg2": 0, "fw": 0, "task": 0})
    for f in sorted(traces_dir.glob("*.json")):
        with f.open() as fh:
            t = json.load(fh)
        s = t.get("steps")
        if s is None:
            continue
        o = t.get("outcome", {})
        by_step[s]["n"] += 1
        if o.get("broke_pg2"):      by_step[s]["pg2"]  += 1
        if o.get("broke_fw"):       by_step[s]["fw"]   += 1
        if o.get("task_succeeded"): by_step[s]["task"] += 1

    if not by_step:
        return
    xs = sorted(by_step.keys())
    pg2 = [by_step[s]["pg2"]  / by_step[s]["n"] for s in xs]
    fw  = [by_step[s]["fw"]   / by_step[s]["n"] for s in xs]
    task = [by_step[s]["task"] / by_step[s]["n"] for s in xs]

    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(xs, pg2,  marker="o", linewidth=2, label="PG2 Bypass",   color="#3b82f6")
    ax.plot(xs, fw,   marker="s", linewidth=2, label="FW Bypass",    color="#1d4ed8")
    ax.plot(xs, task, marker="^", linewidth=2, label="Task Success", color="#22c55e")
    ax.set_xlabel("Attacker training steps")
    ax.set_ylabel("Bypass rate")
    ax.set_title("InjectArena — Bypass Rate by Training Step Count")
    ax.set_ylim(0, 1.05)
    ax.legend()
    ax.grid(alpha=0.3)

    out_path = out / "reward_curve.png"   # reuse this filename so the dashboard finds it
    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Saved {out_path}")


def main() -> None:
    p = argparse.ArgumentParser()
    p.add_argument("--traces", default="data/traces")
    p.add_argument("--out", default="docs/plots")
    args = p.parse_args()

    traces = Path(args.traces)
    out = Path(args.out)
    out.mkdir(parents=True, exist_ok=True)

    import matplotlib
    matplotlib.use("Agg")

    stats = _aggregate(traces)
    print(f"Aggregated {stats['n']} traces  "
          f"PG2={stats['pg2_rate']:.0%}  FW={stats['fw_rate']:.0%}  "
          f"Task={stats['task_rate']:.0%}  Composed={stats['composed_rate']:.0%}")

    _plot_bypass_bars(stats, out)
    _plot_per_category(stats, out)
    _plot_step_curve(traces, out)


if __name__ == "__main__":
    main()