| """ |
| Plot training curves from a real metrics.json produced by training/train.py. |
| |
| Usage: |
| python3 scripts/plot_from_metrics.py [metrics.json] [output.png] |
| |
| Defaults: ./metrics.json -> ./training_curves.png |
| |
| The metrics.json schema (see training/metrics.py) is: |
| { |
| "step": [...], |
| "arbitration_accuracy": [...], |
| "merge_success_rate": [...], |
| "avg_reward": [...], |
| "curriculum_phase": [...], |
| "conflict_detection_rate": [...], |
| "false_alarm_rate": [...], |
| "wrong_agent_rate": [...] |
| } |
| """ |
| import json |
| import sys |
| from pathlib import Path |
|
|
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| import numpy as np |
|
|
|
|
| def rolling_mean(arr, window): |
| arr = np.asarray(arr, dtype=float) |
| if len(arr) == 0: |
| return arr |
| if len(arr) < window: |
| return arr |
| out = np.empty_like(arr) |
| cumsum = np.cumsum(np.insert(arr, 0, 0)) |
| for i in range(len(arr)): |
| lo = max(0, i - window + 1) |
| out[i] = (cumsum[i + 1] - cumsum[lo]) / (i - lo + 1) |
| return out |
|
|
|
|
| def main(metrics_path: str = "metrics.json", out_path: str = "training_curves.png"): |
| if not Path(metrics_path).exists(): |
| sys.exit(f"metrics.json not found at {metrics_path}") |
| with open(metrics_path) as f: |
| h = json.load(f) |
|
|
| steps = np.asarray(h.get("step", [])) |
| if len(steps) == 0: |
| sys.exit("metrics.json is empty (no step entries yet)") |
|
|
| accs = np.asarray(h.get("arbitration_accuracy", [])) * 100 |
| rewards = np.asarray(h.get("avg_reward", [])) |
| merge = np.asarray(h.get("merge_success_rate", [])) * 100 |
| phases = np.asarray(h.get("curriculum_phase", [])) |
|
|
| plt.style.use("dark_background") |
| fig, axes = plt.subplots(2, 2, figsize=(14, 9)) |
| fig.suptitle( |
| f"Conflict Arbitration Agent - Training Progress ({len(steps)} steps logged)", |
| fontsize=14, fontweight="bold", color="#e6e6f0", |
| ) |
|
|
| |
| ax = axes[0, 0] |
| ax.scatter(steps, rewards, alpha=0.3, c="#8be9d6", s=15, label="per-step reward") |
| if len(rewards) >= 20: |
| ax.plot(steps, rolling_mean(rewards, 20), color="#ff79c6", linewidth=2.5, |
| label="rolling avg (window=20)") |
| ax.axhline(0, color="#444", linestyle="--", linewidth=1, alpha=0.6) |
| ax.set_title("Average reward over time", color="#e6e6f0") |
| ax.set_xlabel("Training step") |
| ax.set_ylabel("Reward") |
| ax.legend(loc="lower right", framealpha=0.3) |
| ax.grid(True, alpha=0.15) |
|
|
| |
| ax = axes[0, 1] |
| ax.scatter(steps, accs, alpha=0.3, c="#50fa7b", s=15, label="per-step accuracy") |
| if len(accs) >= 20: |
| ax.plot(steps, rolling_mean(accs, 20), color="#f1fa8c", linewidth=2.5, |
| label="rolling avg (window=20)") |
| ax.axhline(33.3, color="#ff5555", linestyle="--", linewidth=1.5, alpha=0.7, |
| label="random baseline (33.3%)") |
| ax.set_title("Arbitration accuracy over time", color="#e6e6f0") |
| ax.set_xlabel("Training step") |
| ax.set_ylabel("Accuracy (%)") |
| ax.set_ylim(-5, 105) |
| ax.legend(loc="lower right", framealpha=0.3) |
| ax.grid(True, alpha=0.15) |
|
|
| |
| ax = axes[1, 0] |
| if len(merge) > 0: |
| ax.scatter(steps, merge, alpha=0.3, c="#bd93f9", s=15, label="per-step") |
| if len(merge) >= 20: |
| ax.plot(steps, rolling_mean(merge, 20), color="#ffb86c", linewidth=2.5, |
| label="rolling avg (window=20)") |
| ax.set_title("Merge success rate", color="#e6e6f0") |
| ax.set_xlabel("Training step") |
| ax.set_ylabel("Success (%)") |
| ax.set_ylim(-5, 105) |
| ax.legend(loc="lower right", framealpha=0.3) |
| ax.grid(True, alpha=0.15) |
| else: |
| ax.axis("off") |
|
|
| |
| ax = axes[1, 1] |
| ax.axis("off") |
| n = len(steps) |
| head = max(1, min(100, n // 4)) |
| tail = max(1, min(100, n // 4)) |
|
|
| head_r = float(np.mean(rewards[:head])) |
| tail_r = float(np.mean(rewards[-tail:])) |
| head_a = float(np.mean(accs[:head])) |
| tail_a = float(np.mean(accs[-tail:])) |
| pos = int((rewards > 0).sum()) |
| above = int((accs > 33.3).sum()) |
|
|
| phase_summary = "" |
| if len(phases) > 0: |
| unique, counts = np.unique(phases, return_counts=True) |
| phase_summary = "\nPHASE TIME\n" + "\n".join( |
| f" Phase {int(p)}: {int(c)} steps ({100*c/n:.0f}%)" |
| for p, c in zip(unique, counts) |
| ) |
|
|
| text = f"""TRAINING SUMMARY |
| {'='*40} |
| Steps logged: {n} |
| First step / Last: {int(steps[0])} / {int(steps[-1])} |
| |
| REWARD |
| First {head} mean: {head_r:+.2f} |
| Last {tail} mean: {tail_r:+.2f} |
| Improvement: {tail_r - head_r:+.2f} |
| Best: {float(rewards.max()):+.2f} (step {int(steps[int(np.argmax(rewards))])}) |
| Positive steps: {pos} / {n} ({100*pos/n:.0f}%) |
| |
| ACCURACY |
| First {head} mean: {head_a:.1f}% |
| Last {tail} mean: {tail_a:.1f}% |
| Best: {float(accs.max()):.1f}% |
| Above-chance: {above} / {n} ({100*above/n:.0f}%) |
| Random baseline: 33.3% |
| {phase_summary} |
| """ |
| ax.text(0.02, 0.98, text, transform=ax.transAxes, fontsize=10, |
| verticalalignment="top", fontfamily="monospace", color="#c8c8e8") |
|
|
| plt.tight_layout() |
| plt.savefig(out_path, dpi=150, bbox_inches="tight", facecolor="#0a0a14") |
| print(f"Saved {out_path}") |
| print(f"\nFirst-{head} reward: {head_r:+.3f} Last-{tail} reward: {tail_r:+.3f} delta: {tail_r-head_r:+.3f}") |
| print(f"First-{head} acc: {head_a:.2f}% Last-{tail} acc: {tail_a:.2f}% delta: {tail_a-head_a:+.2f}pp") |
|
|
|
|
| if __name__ == "__main__": |
| metrics = sys.argv[1] if len(sys.argv) > 1 else "metrics.json" |
| out = sys.argv[2] if len(sys.argv) > 2 else "training_curves.png" |
| main(metrics, out) |
|
|