"""Generate the 4 new PNG figures embedded in blog.md. Outputs (idempotent): docs/figures/blog_hero.png docs/figures/tier_pyramid.png docs/figures/dataset_composition.png docs/figures/reward_components.png Run from repo root: .venv/bin/python scripts/generate_blog_figures.py """ from __future__ import annotations from pathlib import Path import matplotlib.pyplot as plt import matplotlib.patches as mpatches from matplotlib.patches import FancyBboxPatch import numpy as np REPO_ROOT = Path(__file__).resolve().parents[1] FIG_DIR = REPO_ROOT / "docs" / "figures" FIG_DIR.mkdir(parents=True, exist_ok=True) PINK = "#ff4f8b" PINK_DARK = "#c81b5a" INK = "#1a1a1a" SLATE = "#525a66" PAPER = "#fff7fa" GRID = "#e8d6df" PALETTE = ["#3a86ff", "#8338ec", "#ff006e", "#fb5607", "#ffbe0b"] def _save(fig: plt.Figure, name: str) -> None: out = FIG_DIR / name fig.savefig(out, dpi=160, bbox_inches="tight", facecolor=fig.get_facecolor()) plt.close(fig) print(f"wrote {out.relative_to(REPO_ROOT)}") def hero() -> None: fig, ax = plt.subplots(figsize=(12, 5.2)) fig.patch.set_facecolor(PAPER) ax.set_facecolor(PAPER) ax.set_xlim(0, 12) ax.set_ylim(0, 5.2) ax.axis("off") ax.text(0.45, 4.55, "AWS Cloud Operations RL", fontsize=15, color=PINK_DARK, fontweight="bold", family="DejaVu Sans") ax.text(0.45, 3.85, "From Cloud Chaos to Capable Agents", fontsize=30, color=INK, fontweight="bold", family="DejaVu Sans") ax.text(0.45, 3.25, "Training an LLM SRE on 120+ AWS Tasks with SFT \u2192 GRPO", fontsize=15, color=SLATE, family="DejaVu Sans", style="italic") stats = [ ("120+", "AWS tasks\n5 tiers + drift"), ("8\u00d7", "parallel rollouts\n1 GPU"), ("8", "anti-hacking\nlayers"), ("39\u219289%", "exact-match\npost-SFT"), ] box_w = 2.55 gap = 0.2 start_x = 0.45 y = 0.55 h = 2.1 for i, (big, small) in enumerate(stats): x = start_x + i * (box_w + gap) box = FancyBboxPatch( (x, y), box_w, h, boxstyle="round,pad=0.04,rounding_size=0.18", linewidth=1.5, edgecolor=PINK, facecolor="white", ) ax.add_patch(box) ax.text(x + box_w / 2, y + h * 0.62, big, fontsize=26, color=PINK_DARK, fontweight="bold", ha="center", va="center") ax.text(x + box_w / 2, y + h * 0.22, small, fontsize=10.5, color=SLATE, ha="center", va="center") ax.text(11.55, 4.55, "OpenEnv Hackathon \u2022 Apr 2026", fontsize=10, color=SLATE, ha="right", style="italic") _save(fig, "blog_hero.png") def tier_pyramid() -> None: # Top of pyramid (apex, narrow, hardest) \u2192 bottom (base, widest, easiest). tiers_top_down = [ ("Expert", 24, "30%", "state_checks", PALETTE[2]), ("Advanced", 25, "30%", "multi_step+services", PALETTE[1]), ("Intermediate", 25, "20%", "multi_step", PALETTE[0]), ("Beginner", 25, "10%", "resource_creation", "#06b6d4"), ("Warmup", 25, "10%", "command_match", "#22c55e"), ] fig, (ax, ax2) = plt.subplots(1, 2, figsize=(14, 6), gridspec_kw={"width_ratios": [3.2, 1]}) fig.patch.set_facecolor("white") n = len(tiers_top_down) ax.set_xlim(-1.15, 1.15) ax.set_ylim(-0.2, n + 0.4) ax.axis("off") ax.set_title("Curriculum: 124 tasks across 5 tiers", fontsize=15, fontweight="bold", color=INK, pad=12) for i, (name, count, chaos, strat, color) in enumerate(tiers_top_down): # i=0 \u2192 apex (top, narrowest); i=n-1 \u2192 base (bottom, widest) y_top = n - i y_bot = n - i - 1 half_top = 0.45 + 0.55 * (i / (n - 1)) # narrow at apex half_bot = 0.45 + 0.55 * ((i + 1) / (n - 1)) # wider at base ax.add_patch( mpatches.Polygon( [(-half_bot, y_bot), (half_bot, y_bot), (half_top, y_top), (-half_top, y_top)], closed=True, facecolor=color, edgecolor="white", linewidth=2, alpha=0.95, ) ) y_mid = (y_top + y_bot) / 2 ax.text(0, y_mid + 0.18, name, fontsize=14, fontweight="bold", color="white", ha="center", va="center") ax.text(0, y_mid - 0.18, f"{count} tasks \u00b7 chaos {chaos} \u00b7 {strat}", fontsize=9.5, color="white", ha="center", va="center", alpha=0.97) # Drift sidebar (right panel) ax2.set_xlim(0, 1) ax2.set_ylim(0, n + 0.4) ax2.axis("off") ax2.set_title("Adversarial track", fontsize=13, fontweight="bold", color=INK, pad=12) box = FancyBboxPatch( (0.08, 1.7), 0.84, 1.7, boxstyle="round,pad=0.04,rounding_size=0.10", facecolor=PINK, edgecolor=PINK_DARK, linewidth=2, alpha=0.92, ) ax2.add_patch(box) ax2.text(0.5, 3.0, "Drift", fontsize=20, fontweight="bold", color="white", ha="center") ax2.text(0.5, 2.6, "9 tasks", fontsize=12, color="white", ha="center") ax2.text(0.5, 2.05, "2\u20133 mutations\nrandomized\nper episode", fontsize=9.5, color="white", ha="center", va="center") ax2.text(0.5, 0.85, "Promotion paths\n\u2014\nstandard: min episodes + rate\nfast-track: 3 consecutive \u22650.9", fontsize=9, color=SLATE, ha="center", va="center") _save(fig, "tier_pyramid.png") def dataset_composition() -> None: traj_labels = ["success", "continuation", "failure recovery", "verification", "hint usage"] traj_sizes = [55, 20, 15, 5, 5] # Expert excluded entirely \u2014 0% is meaningless on a donut. tier_labels = ["warmup", "beginner", "intermediate", "advanced"] tier_sizes = [50, 30, 15, 5] fig, axes = plt.subplots(1, 2, figsize=(15.5, 6)) fig.patch.set_facecolor("white") fig.suptitle("SFT dataset composition \u2022 1,500 rows", fontsize=16, fontweight="bold", color=INK, y=1.02) fig.subplots_adjust(wspace=0.7, left=0.04, right=0.96) def donut(ax, sizes, labels, title, colors, center_label): wedges, _ = ax.pie( sizes, labels=None, colors=colors, wedgeprops={"width": 0.42, "edgecolor": "white", "linewidth": 2}, startangle=90, ) ax.set_title(title, fontsize=13, fontweight="bold", color=INK, pad=10) legend_labels = [f"{l} \u2014 {s}%" for l, s in zip(labels, sizes)] ax.legend(wedges, legend_labels, loc="center left", bbox_to_anchor=(1.05, 0.5), frameon=False, fontsize=11) ax.text(0, 0, center_label, fontsize=14, fontweight="bold", color=INK, ha="center", va="center") donut(axes[0], traj_sizes, traj_labels, "Trajectory types", ["#22c55e", "#3a86ff", "#fb5607", "#8338ec", "#ffbe0b"], "5 types") donut(axes[1], tier_sizes, tier_labels, "Tier weights", ["#22c55e", "#06b6d4", PALETTE[0], PALETTE[1]], "4 tiers\n+ expert*") fig.text( 0.5, -0.04, "* expert tasks excluded from SFT (randomized state checks \u2192 no canonical script). " "GRPO handles them via live reward signal.", fontsize=10, color=SLATE, ha="center", style="italic", ) _save(fig, "dataset_composition.png") def reward_components() -> None: components = [ ("task achieved", 1.00, "+", "achieve"), ("chaos survival", 0.05, "+", "achieve"), ("partial progress", 0.80, "+", "shape"), ("progress delta", 0.10, "+", "shape"), ("idempotent retry", 0.02, "+", "shape"), ("rollback (per pair)", 0.10, "-", "penalty"), ("command failed", 0.50, "-", "penalty"), ("hint decay (n=3)", 0.39, "-", "penalty"), ] color_map = { "achieve": "#22c55e", "shape": PALETTE[0], "penalty": PINK, } labels = [c[0] for c in components] values = [c[1] if c[2] == "+" else -c[1] for c in components] colors = [color_map[c[3]] for c in components] signed = [f"{c[2]}{c[1]:.2f}" for c in components] fig, ax = plt.subplots(figsize=(11.5, 5.8)) fig.patch.set_facecolor("white") y_pos = np.arange(len(labels))[::-1] ax.barh(y_pos, values, color=colors, edgecolor="white", linewidth=1.5, height=0.72, alpha=0.92) for y, v, txt in zip(y_pos, values, signed): offset = 0.025 if v >= 0 else -0.025 ha = "left" if v >= 0 else "right" ax.text(v + offset, y, txt, va="center", ha=ha, fontsize=11, color=INK, fontweight="bold") ax.set_yticks(y_pos) ax.set_yticklabels(labels, fontsize=11.5, color=INK) ax.axvline(0, color=INK, linewidth=1) ax.set_xlim(-0.65, 1.18) ax.set_xlabel("contribution to reward", fontsize=10.5, color=SLATE) ax.set_title("Reward shaping: every modifier the agent can earn or lose", fontsize=14, fontweight="bold", color=INK, pad=12) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_color(GRID) ax.spines["bottom"].set_color(GRID) ax.tick_params(axis="x", colors=SLATE) ax.grid(axis="x", color=GRID, linewidth=0.8, alpha=0.6, zorder=0) ax.set_axisbelow(True) legend_handles = [ mpatches.Patch(color="#22c55e", label="achievement (full reward)"), mpatches.Patch(color=PALETTE[0], label="dense shaping signal"), mpatches.Patch(color=PINK, label="penalty / decay"), ] ax.legend(handles=legend_handles, loc="lower right", frameon=False, fontsize=10) fig.text( 0.5, -0.04, "Final reward is clamped to [0.0, 0.99] before completion (1.0 reserved for " "verified achievement). Hint decay applied last as a multiplier (0.85^n).", fontsize=9.5, color=SLATE, ha="center", style="italic", ) _save(fig, "reward_components.png") def main() -> None: hero() tier_pyramid() dataset_composition() reward_components() if __name__ == "__main__": main()