import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import re
import os
import json


REFERENCE_BASELINES = {
    "Random":         {"f1": 48.5, "step_acc": 8.7,  "color": "gray",   "ls": ":"},
    "Open-src avg":   {"f1": 45.8, "step_acc": 10.9, "color": "orange", "ls": "-."},
    "Gemini-2.5-Pro": {"f1": 64.6, "step_acc": 41.1, "color": "purple", "ls": "--"},
}


def parse_log(log_path):
    """Parse training log into lists of (epoch, train_loss, val_f1, val_step_acc)."""
    with open(log_path) as f:
        content = f.read()

    epochs      = []
    train_losses = []
    val_f1s     = []
    val_accs    = []

    blocks = re.split(r'── Epoch (\d+)/\d+ ──+', content)
    # blocks[0] = preamble, then alternating: epoch_num, block_content
    for i in range(1, len(blocks) - 1, 2):
        ep = int(blocks[i])
        blk = blocks[i + 1]

        loss_m = re.search(r'Train loss\s*:\s*([0-9.]+)', blk)
        f1_m   = re.search(r'F1\s*:\s*([0-9.]+)%', blk)
        acc_m  = re.search(r'step-acc\s*:\s*([0-9.]+)%', blk, re.IGNORECASE)

        if loss_m and f1_m and acc_m:
            epochs.append(ep)
            train_losses.append(float(loss_m.group(1)))
            val_f1s.append(float(f1_m.group(1)))
            val_accs.append(float(acc_m.group(1)))

    return epochs, train_losses, val_f1s, val_accs


def plot_learning_curves(log_path, output_path, meta_path=None):
    if not os.path.exists(log_path):
        print(f"Log file not found: {log_path}")
        return

    epochs, train_losses, val_f1s, val_accs = parse_log(log_path)
    if not epochs:
        print("Could not parse any epoch data from the log.")
        return

    best_epoch = None
    if meta_path and os.path.exists(meta_path):
        with open(meta_path) as f:
            meta = json.load(f)
        best_epoch = meta.get("epoch")

    plt.style.use("seaborn-v0_8-whitegrid")
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(11, 8), sharex=True)
    fig.suptitle("AgentSight — Training Curves", fontsize=14, fontweight="bold")

    # ── Top panel: training loss ──────────────────────────────────────────────
    ax1.plot(epochs, train_losses, color="tab:red", marker="o", ms=3,
             label="Train loss (BCE)")
    ax1.set_ylabel("Train Loss", fontweight="bold")
    ax1.yaxis.set_major_formatter(mticker.FormatStrFormatter("%.3f"))
    if best_epoch:
        ax1.axvline(best_epoch, color="green", lw=1.5, ls="--", label=f"Best epoch ({best_epoch})")
    ax1.legend(fontsize=9)

    # ── Bottom panel: val metrics + baselines ─────────────────────────────────
    ax2.plot(epochs, val_accs, color="tab:blue", marker="s", ms=3,
             label="Val Step-Acc (%)")
    ax2.plot(epochs, val_f1s,  color="tab:green", marker="^", ms=3, ls="--",
             label="Val Macro-F1 (%)")

    for name, ref in REFERENCE_BASELINES.items():
        ax2.axhline(ref["step_acc"], color=ref["color"], ls=ref["ls"], lw=1.5,
                    label=f"{name} Step-Acc ({ref['step_acc']}%)")

    if best_epoch:
        ax2.axvline(best_epoch, color="green", lw=1.5, ls="--")

    ax2.set_xlabel("Epoch", fontweight="bold")
    ax2.set_ylabel("Metric (%)", fontweight="bold")
    ax2.set_ylim(0, 100)
    ax2.legend(fontsize=8, loc="lower right")

    plt.tight_layout()
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path, dpi=300)
    print(f"Plot saved → {output_path}")


if __name__ == "__main__":
    root     = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
    log_file = os.path.join(root, "full_training_log.txt")
    meta     = os.path.join(root, "src", "models", "best_agentsight_meta.json")
    out_png  = os.path.join(root, "notebooks", "training_curve.png")
    plot_learning_curves(log_file, out_png, meta_path=meta)