| |
| """Generate training-evidence PNG charts from committed docs (no re-training needed). |
| |
| Reads numeric data already present in: |
| docs/MI300X_EVIDENCE.md — SFT loss/token-accuracy log lines |
| docs/TRAINING_STORY.md — GRPO per-step mean reward, benchmark table |
| |
| Outputs: |
| assets/training/sft_loss.png |
| assets/training/grpo_reward.png |
| assets/training/benchmark_resolution.png |
| assets/training/benchmark_per_tier.png |
| |
| Usage: |
| pip install matplotlib # only dependency |
| python scripts/generate_training_plots.py |
| """ |
|
|
| from pathlib import Path |
|
|
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| import matplotlib.ticker as mtick |
|
|
| ROOT = Path(__file__).resolve().parent.parent |
| OUT = ROOT / "assets" / "training" |
| OUT.mkdir(parents=True, exist_ok=True) |
|
|
| |
| BG = "#0d1117" |
| FG = "#c9d1d9" |
| ACCENT = "#58a6ff" |
| GREEN = "#57F287" |
| YELLOW = "#FEE75C" |
| RED = "#ED4245" |
| GRID = "#21262d" |
|
|
| plt.rcParams.update({ |
| "figure.facecolor": BG, |
| "axes.facecolor": BG, |
| "axes.edgecolor": GRID, |
| "axes.labelcolor": FG, |
| "text.color": FG, |
| "xtick.color": FG, |
| "ytick.color": FG, |
| "grid.color": GRID, |
| "grid.alpha": 0.5, |
| "font.size": 11, |
| "font.family": "sans-serif", |
| "savefig.facecolor": BG, |
| "savefig.edgecolor": BG, |
| }) |
|
|
|
|
| |
| SFT_DATA = [ |
| |
| (0.04, 1.2651, 0.7196), |
| (0.08, 0.4114, 0.8998), |
| (0.12, 0.1950, 0.9483), |
| (0.20, 0.1156, 0.9660), |
| (0.32, 0.0845, 0.9742), |
| (0.55, 0.0557, 0.9821), |
| (0.75, 0.0370, 0.9873), |
| (0.99, 0.0272, 0.9915), |
| ] |
|
|
| def plot_sft(): |
| epochs = [d[0] for d in SFT_DATA] |
| losses = [d[1] for d in SFT_DATA] |
| accs = [d[2] for d in SFT_DATA] |
|
|
| fig, ax1 = plt.subplots(figsize=(8, 4.5)) |
| ax1.set_xlabel("Epoch") |
| ax1.set_ylabel("Loss", color=RED) |
| l1, = ax1.plot(epochs, losses, color=RED, marker="o", markersize=5, linewidth=2, label="Loss") |
| ax1.tick_params(axis="y", labelcolor=RED) |
| ax1.set_ylim(bottom=0) |
|
|
| ax2 = ax1.twinx() |
| ax2.set_ylabel("Token Accuracy", color=GREEN) |
| l2, = ax2.plot(epochs, accs, color=GREEN, marker="s", markersize=5, linewidth=2, label="Token Accuracy") |
| ax2.tick_params(axis="y", labelcolor=GREEN) |
| ax2.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1)) |
| ax2.set_ylim(0.65, 1.0) |
|
|
| ax1.set_title("SFT on AMD MI300X · 2,028 trajectories · 254 steps · 14 min", fontsize=12, pad=12) |
| ax1.legend(handles=[l1, l2], loc="center right", framealpha=0.3) |
| ax1.grid(True, alpha=0.3) |
| fig.tight_layout() |
| fig.savefig(OUT / "sft_loss.png", dpi=150) |
| plt.close(fig) |
| print(f" wrote {OUT / 'sft_loss.png'}") |
|
|
|
|
| |
| GRPO_REWARDS = [ |
| 0.355, 0.243, 0.073, 0.218, 0.191, 0.147, 0.241, 0.251, 0.070, 0.144, |
| 0.070, 0.070, 0.048, 0.236, 0.188, 0.011, 0.247, 0.159, 0.158, 0.332, |
| 0.274, 0.297, 0.021, 0.376, 0.304, 0.352, 0.240, 0.140, 0.222, 0.149, |
| 0.421, 0.214, 0.140, 0.101, 0.201, 0.341, 0.232, 0.153, 0.219, 0.154, |
| 0.070, 0.402, 0.000, 0.276, 0.070, 0.261, 0.210, 0.116, 0.214, 0.070, |
| 0.143, 0.210, 0.319, 0.254, 0.230, 0.205, 0.251, 0.286, 0.182, 0.364, |
| ] |
|
|
| def plot_grpo(): |
| steps = list(range(1, len(GRPO_REWARDS) + 1)) |
| |
| best = [] |
| cur_best = 0.0 |
| for r in GRPO_REWARDS: |
| cur_best = max(cur_best, r) |
| best.append(cur_best) |
| |
| window = 5 |
| ma = [] |
| for i in range(len(GRPO_REWARDS)): |
| start = max(0, i - window + 1) |
| ma.append(sum(GRPO_REWARDS[start:i+1]) / (i - start + 1)) |
|
|
| fig, ax = plt.subplots(figsize=(10, 4.5)) |
| ax.bar(steps, GRPO_REWARDS, color=ACCENT, alpha=0.4, width=0.8, label="Per-step mean reward") |
| ax.plot(steps, ma, color=YELLOW, linewidth=2, label=f"{window}-step moving avg") |
| ax.plot(steps, best, color=GREEN, linewidth=1.5, linestyle="--", alpha=0.7, label="Best so far") |
| ax.axhline(y=sum(GRPO_REWARDS)/len(GRPO_REWARDS), color=FG, linewidth=1, linestyle=":", alpha=0.5, label=f"Overall mean ({sum(GRPO_REWARDS)/len(GRPO_REWARDS):.3f})") |
|
|
| ax.set_xlabel("GRPO Step") |
| ax.set_ylabel("Mean Reward") |
| ax.set_title("Online GRPO on AMD MI300X · 60 steps · 4 rollouts · 236 episodes · 9h 34m", fontsize=12, pad=12) |
| ax.legend(loc="upper left", framealpha=0.3, fontsize=9) |
| ax.set_ylim(bottom=-0.02) |
| ax.grid(True, alpha=0.3) |
| fig.tight_layout() |
| fig.savefig(OUT / "grpo_reward.png", dpi=150) |
| plt.close(fig) |
| print(f" wrote {OUT / 'grpo_reward.png'}") |
|
|
|
|
| |
| def plot_benchmark_resolution(): |
| models = ["Zero-shot\nBaseline", "AtlasOps\nSFT", "AtlasOps\nGRPO"] |
| resolution = [54, 68, 82] |
| judge_reward = [0.481, 0.601, 0.729] |
| colors = [FG, YELLOW, GREEN] |
|
|
| fig, ax1 = plt.subplots(figsize=(7, 4.5)) |
| bars = ax1.bar(models, resolution, color=colors, alpha=0.85, width=0.5, edgecolor=GRID) |
| for bar, val in zip(bars, resolution): |
| ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1.5, f"{val}%", ha="center", va="bottom", fontweight="bold", fontsize=13) |
| ax1.set_ylabel("Resolution Rate (%)") |
| ax1.set_ylim(0, 100) |
| ax1.set_title("Incident Resolution Rate · 28 chaos scenarios", fontsize=12, pad=12) |
| ax1.grid(True, axis="y", alpha=0.3) |
|
|
| ax2 = ax1.twinx() |
| ax2.plot(models, judge_reward, color=RED, marker="D", markersize=8, linewidth=2, label="Judge reward") |
| ax2.set_ylabel("Avg Judge Reward", color=RED) |
| ax2.tick_params(axis="y", labelcolor=RED) |
| ax2.set_ylim(0.3, 0.85) |
| ax2.legend(loc="upper left", framealpha=0.3, fontsize=9) |
|
|
| fig.tight_layout() |
| fig.savefig(OUT / "benchmark_resolution.png", dpi=150) |
| plt.close(fig) |
| print(f" wrote {OUT / 'benchmark_resolution.png'}") |
|
|
|
|
| |
| def plot_benchmark_per_tier(): |
| tiers = ["Single Fault", "Cascade", "Multi-Fault", "Named Replays"] |
| baseline = [63, 40, 40, 30] |
| grpo = [88, 78, 76, 72] |
|
|
| x = range(len(tiers)) |
| w = 0.35 |
|
|
| fig, ax = plt.subplots(figsize=(8, 4.5)) |
| b1 = ax.bar([i - w/2 for i in x], baseline, w, label="Zero-shot Baseline", color=FG, alpha=0.7, edgecolor=GRID) |
| b2 = ax.bar([i + w/2 for i in x], grpo, w, label="AtlasOps GRPO", color=GREEN, alpha=0.85, edgecolor=GRID) |
|
|
| for bars in [b1, b2]: |
| for bar in bars: |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, f"{int(bar.get_height())}%", ha="center", va="bottom", fontsize=10, fontweight="bold") |
|
|
| ax.set_ylabel("Resolution Rate (%)") |
| ax.set_xticks(list(x)) |
| ax.set_xticklabels(tiers) |
| ax.set_ylim(0, 100) |
| ax.set_title("Resolution by Scenario Tier · Baseline vs GRPO", fontsize=12, pad=12) |
| ax.legend(framealpha=0.3) |
| ax.grid(True, axis="y", alpha=0.3) |
| fig.tight_layout() |
| fig.savefig(OUT / "benchmark_per_tier.png", dpi=150) |
| plt.close(fig) |
| print(f" wrote {OUT / 'benchmark_per_tier.png'}") |
|
|
|
|
| if __name__ == "__main__": |
| print("Generating training evidence plots...") |
| plot_sft() |
| plot_grpo() |
| plot_benchmark_resolution() |
| plot_benchmark_per_tier() |
| print("Done.") |
|
|