Spaces:

testingaccc
/

conflict-arbitration-env

Sleeping

File size: 5,717 Bytes

ce00c50

"""
Plot training curves from a real metrics.json produced by training/train.py.

Usage:
    python3 scripts/plot_from_metrics.py [metrics.json] [output.png]

Defaults: ./metrics.json -> ./training_curves.png

The metrics.json schema (see training/metrics.py) is:
{
    "step": [...],
    "arbitration_accuracy": [...],
    "merge_success_rate": [...],
    "avg_reward": [...],
    "curriculum_phase": [...],
    "conflict_detection_rate": [...],
    "false_alarm_rate": [...],
    "wrong_agent_rate": [...]
}
"""
import json
import sys
from pathlib import Path

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np


def rolling_mean(arr, window):
    arr = np.asarray(arr, dtype=float)
    if len(arr) == 0:
        return arr
    if len(arr) < window:
        return arr
    out = np.empty_like(arr)
    cumsum = np.cumsum(np.insert(arr, 0, 0))
    for i in range(len(arr)):
        lo = max(0, i - window + 1)
        out[i] = (cumsum[i + 1] - cumsum[lo]) / (i - lo + 1)
    return out


def main(metrics_path: str = "metrics.json", out_path: str = "training_curves.png"):
    if not Path(metrics_path).exists():
        sys.exit(f"metrics.json not found at {metrics_path}")
    with open(metrics_path) as f:
        h = json.load(f)

    steps = np.asarray(h.get("step", []))
    if len(steps) == 0:
        sys.exit("metrics.json is empty (no step entries yet)")

    accs = np.asarray(h.get("arbitration_accuracy", [])) * 100
    rewards = np.asarray(h.get("avg_reward", []))
    merge = np.asarray(h.get("merge_success_rate", [])) * 100
    phases = np.asarray(h.get("curriculum_phase", []))

    plt.style.use("dark_background")
    fig, axes = plt.subplots(2, 2, figsize=(14, 9))
    fig.suptitle(
        f"Conflict Arbitration Agent - Training Progress  ({len(steps)} steps logged)",
        fontsize=14, fontweight="bold", color="#e6e6f0",
    )

    # 1. Reward
    ax = axes[0, 0]
    ax.scatter(steps, rewards, alpha=0.3, c="#8be9d6", s=15, label="per-step reward")
    if len(rewards) >= 20:
        ax.plot(steps, rolling_mean(rewards, 20), color="#ff79c6", linewidth=2.5,
                label="rolling avg (window=20)")
    ax.axhline(0, color="#444", linestyle="--", linewidth=1, alpha=0.6)
    ax.set_title("Average reward over time", color="#e6e6f0")
    ax.set_xlabel("Training step")
    ax.set_ylabel("Reward")
    ax.legend(loc="lower right", framealpha=0.3)
    ax.grid(True, alpha=0.15)

    # 2. Accuracy
    ax = axes[0, 1]
    ax.scatter(steps, accs, alpha=0.3, c="#50fa7b", s=15, label="per-step accuracy")
    if len(accs) >= 20:
        ax.plot(steps, rolling_mean(accs, 20), color="#f1fa8c", linewidth=2.5,
                label="rolling avg (window=20)")
    ax.axhline(33.3, color="#ff5555", linestyle="--", linewidth=1.5, alpha=0.7,
               label="random baseline (33.3%)")
    ax.set_title("Arbitration accuracy over time", color="#e6e6f0")
    ax.set_xlabel("Training step")
    ax.set_ylabel("Accuracy (%)")
    ax.set_ylim(-5, 105)
    ax.legend(loc="lower right", framealpha=0.3)
    ax.grid(True, alpha=0.15)

    # 3. Merge success rate
    ax = axes[1, 0]
    if len(merge) > 0:
        ax.scatter(steps, merge, alpha=0.3, c="#bd93f9", s=15, label="per-step")
        if len(merge) >= 20:
            ax.plot(steps, rolling_mean(merge, 20), color="#ffb86c", linewidth=2.5,
                    label="rolling avg (window=20)")
        ax.set_title("Merge success rate", color="#e6e6f0")
        ax.set_xlabel("Training step")
        ax.set_ylabel("Success (%)")
        ax.set_ylim(-5, 105)
        ax.legend(loc="lower right", framealpha=0.3)
        ax.grid(True, alpha=0.15)
    else:
        ax.axis("off")

    # 4. Summary stats
    ax = axes[1, 1]
    ax.axis("off")
    n = len(steps)
    head = max(1, min(100, n // 4))
    tail = max(1, min(100, n // 4))

    head_r = float(np.mean(rewards[:head]))
    tail_r = float(np.mean(rewards[-tail:]))
    head_a = float(np.mean(accs[:head]))
    tail_a = float(np.mean(accs[-tail:]))
    pos = int((rewards > 0).sum())
    above = int((accs > 33.3).sum())

    phase_summary = ""
    if len(phases) > 0:
        unique, counts = np.unique(phases, return_counts=True)
        phase_summary = "\nPHASE TIME\n" + "\n".join(
            f"  Phase {int(p)}: {int(c)} steps  ({100*c/n:.0f}%)"
            for p, c in zip(unique, counts)
        )

    text = f"""TRAINING SUMMARY
{'='*40}
Steps logged:       {n}
First step / Last:  {int(steps[0])} / {int(steps[-1])}

REWARD
  First {head} mean:  {head_r:+.2f}
  Last {tail} mean:   {tail_r:+.2f}
  Improvement:        {tail_r - head_r:+.2f}
  Best:               {float(rewards.max()):+.2f} (step {int(steps[int(np.argmax(rewards))])})
  Positive steps:     {pos} / {n}  ({100*pos/n:.0f}%)

ACCURACY
  First {head} mean:  {head_a:.1f}%
  Last {tail} mean:   {tail_a:.1f}%
  Best:               {float(accs.max()):.1f}%
  Above-chance:       {above} / {n}  ({100*above/n:.0f}%)
  Random baseline:    33.3%
{phase_summary}
"""
    ax.text(0.02, 0.98, text, transform=ax.transAxes, fontsize=10,
            verticalalignment="top", fontfamily="monospace", color="#c8c8e8")

    plt.tight_layout()
    plt.savefig(out_path, dpi=150, bbox_inches="tight", facecolor="#0a0a14")
    print(f"Saved {out_path}")
    print(f"\nFirst-{head} reward: {head_r:+.3f}   Last-{tail} reward: {tail_r:+.3f}   delta: {tail_r-head_r:+.3f}")
    print(f"First-{head} acc:    {head_a:.2f}%   Last-{tail} acc:    {tail_a:.2f}%   delta: {tail_a-head_a:+.2f}pp")


if __name__ == "__main__":
    metrics = sys.argv[1] if len(sys.argv) > 1 else "metrics.json"
    out = sys.argv[2] if len(sys.argv) > 2 else "training_curves.png"
    main(metrics, out)