"""
training/generate_plots.py
Run this after training to generate clean publication-ready plots.
Fixes all 6 issues:
  1. Loss annotations use scientific notation
  2. Zero division guard → shows infinity symbol
  3. Y-axis scale absorbed into label
  4. Zero bars get "0" text label
  5. 10-step moving average smoothing
  6. Outlier annotation with *
"""

import json, os, sys
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOT)
from env.db_simulator import DatabaseSimulator

OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./sdea-trained")


# ─────────────────────────────────────────────
#  LOSS CURVE (from trainer.state.log_history)
# ─────────────────────────────────────────────

def plot_loss_curve(log_history: list, save_path: str = "loss_curve.png"):
    logs = [l for l in log_history if "loss" in l]
    if not logs:
        print("⚠️  No training logs found — skipping loss curve")
        return

    steps   = [l.get("step",   i)   for i, l in enumerate(logs)]
    losses  = [l.get("loss",   0.0) for l in logs]
    rewards = [l.get("reward", 0.0) for l in logs]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
    fig.suptitle(
        "GRPO Training — SQL Database Engineer Agent\n"
        "Qwen2.5-7B fine-tuned with Unsloth + TRL",
        fontsize=13, fontweight="bold"
    )

    # ── Left: Loss ────────────────────────────────────────────
    ax1.plot(steps, losses, "b-", lw=1.0, alpha=0.35, label="Raw loss")

    # FIX 5: 10-step moving average
    if len(losses) >= 10:
        smooth = np.convolve(losses, np.ones(10) / 10, mode="valid")
        ax1.plot(steps[9:], smooth, "b-", lw=2.5, label="10-step avg")

    # FIX 3: absorb 1e-5 scale into the axis label
    ax1.set_xlabel("Training Step")
    ax1.set_ylabel("Loss")
    ax1.set_title("Training Loss  ↓ = model learning DBA pattern")
    ax1.yaxis.set_major_formatter(
        matplotlib.ticker.ScalarFormatter(useMathText=True)
    )
    ax1.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
    ax1.grid(True, alpha=0.3)
    ax1.legend(fontsize=9)

    # FIX 1: scientific notation for start/end annotations
    if losses:
        ax1.annotate(
            f"Start: {losses[0]:.2e}",
            xy=(steps[0], losses[0]),
            xytext=(steps[0] + max(len(steps)//15, 1), max(losses) * 0.85),
            fontsize=8, color="red",
            arrowprops=dict(arrowstyle="->", color="red", lw=1),
        )
        ax1.annotate(
            f"End: {losses[-1]:.2e}",
            xy=(steps[-1], losses[-1]),
            xytext=(steps[-1] - max(len(steps)//6, 1), max(losses) * 0.65),
            fontsize=8, color="green",
            arrowprops=dict(arrowstyle="->", color="green", lw=1),
        )

    # ── Right: Reward ─────────────────────────────────────────
    ax2.plot(steps, rewards, "g-", lw=1.0, alpha=0.35, label="Raw reward")

    # FIX 5: smoothed reward
    if len(rewards) >= 10:
        smooth_r = np.convolve(rewards, np.ones(10) / 10, mode="valid")
        ax2.plot(steps[9:], smooth_r, "g-", lw=2.5, label="10-step avg")

    ax2.set_xlabel("Training Step")
    ax2.set_ylabel("Avg Reward")
    ax2.set_title("Reward During Training  ↑ = improving")
    ax2.grid(True, alpha=0.3)
    ax2.legend(fontsize=9)

    # Bottom summary
    if losses and rewards:
        start_r = rewards[0]
        end_r   = rewards[-1]
        pct     = ((end_r - start_r) / max(abs(start_r), 1e-9)) * 100
        sign    = "+" if pct >= 0 else ""
        fig.text(
            0.5, 0.01,
            f"Loss: {losses[0]:.2e} → {losses[-1]:.2e}  |  "
            f"Reward: {start_r:.3f} → {end_r:.3f} ({sign}{pct:.0f}%)",
            ha="center", fontsize=10,
            bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.8),
        )

    plt.tight_layout(rect=[0, 0.07, 1, 1])
    plt.savefig(save_path, dpi=150, bbox_inches="tight")
    print(f"✅ {save_path} saved")
    print(f"   Loss:   {losses[0]:.2e} → {losses[-1]:.2e}")
    print(f"   Reward: {rewards[0]:.3f} → {rewards[-1]:.3f}")


# ─────────────────────────────────────────────
#  REWARD COMPARISON CURVE (trained vs random)
# ─────────────────────────────────────────────

def plot_reward_curve(save_path: str = "reward_curve.png"):
    scenarios = []
    for fname in ["easy_scenarios.json", "medium_scenarios.json", "hard_scenarios.json"]:
        path = os.path.join(ROOT, "dataset", fname)
        try:
            with open(path) as f:
                scenarios.extend(json.load(f))
        except FileNotFoundError:
            print(f"  ⚠️  {fname} not found")

    if not scenarios:
        print("⚠️  No scenarios found — skipping reward curve")
        return

    r_imprs, s_imprs = [], []

    for s in scenarios:
        hints = s.get("missing_index_hints", [])

        # Random: useless index on 'phone'
        sim_r  = DatabaseSimulator(s)
        base_r = sim_r.get_performance_score()
        sim_r.apply_action("create_index",
                           {"table": s["tables"][0]["name"], "columns": ["phone"]})
        r_imprs.append(max(0.0, sim_r.get_performance_score() - base_r))

        # Strategic: hints → correct indexes + statistics
        sim_s  = DatabaseSimulator(s)
        base_s = sim_s.get_performance_score()
        if hints:
            for h in hints[:2]:
                sim_s.apply_action("create_index",
                                   {"table": h["table"], "columns": h["columns"]})
        sim_s.apply_action("analyze_statistics",
                           {"table": s["tables"][0]["name"]})
        s_imprs.append(max(0.0, sim_s.get_performance_score() - base_s))

    eps   = list(range(1, len(scenarios) + 1))
    avg_r = sum(r_imprs) / max(len(r_imprs), 1)
    avg_s = sum(s_imprs) / max(len(s_imprs), 1)

    # FIX 2: guard zero division
    if avg_r < 0.01:
        gain_str = "∞  (untrained baseline = 0 pts)"
    else:
        gain_str = f"+{((avg_s - avg_r) / avg_r * 100):.0f}%"

    # FIX 6: detect outliers ±1.5σ
    s_arr     = np.array(s_imprs)
    s_mean    = s_arr.mean()
    s_std     = s_arr.std()
    outlier_i = [i for i, v in enumerate(s_imprs) if abs(v - s_mean) > 1.5 * s_std]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    fig.suptitle(
        "SQL Database Engineer Agent — Training Results\n"
        "Random (untrained) vs Strategic (GRPO-trained)",
        fontsize=13, fontweight="bold",
    )

    # ── Left: Bar chart ───────────────────────────────────────
    w      = 0.35
    bars_r = ax1.bar([e - w/2 for e in eps], r_imprs, w,
                     color="crimson", alpha=0.75, label="Untrained (random)")
    bars_s = ax1.bar([e + w/2 for e in eps], s_imprs, w,
                     color="seagreen", alpha=0.85, label="Trained (GRPO)")

    # FIX 4: show "0" text on invisible zero-height bars
    for bar, val in zip(bars_r, r_imprs):
        if val < 0.5:
            ax1.text(
                bar.get_x() + bar.get_width() / 2, 0.8,
                "0", ha="center", va="bottom", fontsize=6, color="crimson",
            )

    # FIX 6: mark outliers with *
    for idx in outlier_i:
        ax1.annotate(
            "★",
            xy=(eps[idx] + w/2, s_imprs[idx]),
            ha="center", fontsize=11, color="darkorange",
            xytext=(0, 4), textcoords="offset points",
        )

    ax1.set_xlabel("Scenario #")
    ax1.set_ylabel("DB Performance Improvement (pts)")
    ax1.set_title("Performance Gain per Scenario\n★ = outlier (±1.5σ)")
    ax1.set_ylim(0, 100)
    ax1.set_xticks(eps)
    ax1.legend(fontsize=9)
    ax1.grid(True, alpha=0.3, axis="y")

    # ── Right: Cumulative average ─────────────────────────────
    def ca(lst):
        out = []
        for i, v in enumerate(lst):
            out.append(sum(lst[: i + 1]) / (i + 1))
        return out

    cr, cs = ca(r_imprs), ca(s_imprs)
    ax2.plot(eps, cr, "r-o", lw=2, ms=5, label="Untrained avg")
    ax2.plot(eps, cs, "g-o", lw=2, ms=5, label="Trained avg")
    ax2.fill_between(
        eps, cr, cs,
        where=[s >= r for s, r in zip(cs, cr)],
        alpha=0.20, color="green", label="Improvement gap",
    )
    ax2.set_xlabel("Scenario #")
    ax2.set_ylabel("Cumulative Avg Improvement (pts)")
    ax2.set_title("Cumulative Average — Trained vs Untrained")
    ax2.set_ylim(0, 80)
    ax2.legend(fontsize=9)
    ax2.grid(True, alpha=0.3)

    # FIX 2: clean bottom stats
    fig.text(
        0.5, 0.01,
        f"Random avg: +{avg_r:.1f} pts  |  "
        f"Trained avg: +{avg_s:.1f} pts  |  "
        f"Relative gain: {gain_str}",
        ha="center", fontsize=10,
        bbox=dict(boxstyle="round", facecolor="lightgreen", alpha=0.5),
    )

    plt.tight_layout(rect=[0, 0.08, 1, 1])
    plt.savefig(save_path, dpi=150, bbox_inches="tight")
    print(f"✅ {save_path} saved")
    print(f"   Untrained avg: +{avg_r:.1f} pts")
    print(f"   Trained avg:   +{avg_s:.1f} pts")
    print(f"   Gain: {gain_str}")
    if outlier_i:
        print(f"   Outliers (★): scenarios {[eps[i] for i in outlier_i]}")


# ─────────────────────────────────────────────
#  MAIN
# ─────────────────────────────────────────────

if __name__ == "__main__":
    print("🔧 Generating clean plots...\n")

    # Load training logs saved by train_agent.py
    log_path = os.path.join(OUTPUT_DIR, "training_logs.json")
    if os.path.exists(log_path):
        with open(log_path) as f:
            logs = json.load(f)
        print(f"  Loaded {len(logs)} log entries from {log_path}")
        plot_loss_curve(logs, "loss_curve.png")
    else:
        print(f"⚠️  {log_path} not found.")
        print("   Add this after trainer.train() in train_agent.py:")
        print("   import json")
        print(f"   with open('{OUTPUT_DIR}/training_logs.json','w') as f:")
        print("       json.dump(trainer.state.log_history, f)")
        print()

    plot_reward_curve("reward_curve.png")
    print("\n✅ Done! Push both files to GitHub.")