| """Build the 5-slide submission deck (`docs/slides.pdf`). |
| |
| Uses matplotlib's PdfPages to render five 16:9 slides: |
| |
| 1. Title β OpenSOC: Self-Play SOC Triage |
| 2. Problem β Why this matters; cardinal failure mode. |
| 3. Env design β Architecture diagram (text); RLVR insight. |
| 4. Results β Headline plots embedded. |
| 5. Demo + links β Space URL, repo URL, video URL. |
| |
| Run:: |
| |
| python -m docs.build_slides --out docs/slides.pdf |
| |
| The script also reads `eval/results/summary.json` and the four PNGs so |
| the deck stays in sync with the latest eval run automatically. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import os |
| import sys |
| from typing import Any, Dict, List, Optional |
|
|
| import matplotlib |
|
|
| matplotlib.use("Agg") |
| import matplotlib.image as mpimg |
| import matplotlib.pyplot as plt |
| from matplotlib.backends.backend_pdf import PdfPages |
|
|
| _HERE = os.path.dirname(os.path.abspath(__file__)) |
| _REPO = os.path.dirname(_HERE) |
|
|
|
|
| def _new_slide(title: str, subtitle: str | None = None): |
| fig, ax = plt.subplots(figsize=(13.33, 7.5)) |
| ax.set_axis_off() |
| ax.text( |
| 0.05, 0.92, title, fontsize=32, fontweight="bold", |
| transform=ax.transAxes, |
| ) |
| if subtitle: |
| ax.text( |
| 0.05, 0.86, subtitle, fontsize=18, color="#444", |
| transform=ax.transAxes, |
| ) |
| ax.plot( |
| [0.05, 0.95], [0.83, 0.83], color="#cccccc", linewidth=1.0, |
| transform=ax.transAxes, |
| ) |
| return fig, ax |
|
|
|
|
| def _bullets(ax, lines: List[str], y_start: float = 0.74, dy: float = 0.07, fontsize: int = 18): |
| for i, line in enumerate(lines): |
| ax.text( |
| 0.07, y_start - i * dy, "β’ " + line, |
| fontsize=fontsize, transform=ax.transAxes, |
| ) |
|
|
|
|
| def _maybe_add_image(ax, img_path: str, bbox: tuple[float, float, float, float]): |
| if not os.path.exists(img_path): |
| x, y, w, h = bbox |
| ax.text( |
| x + w / 2, y + h / 2, "(plot pending)\n" + os.path.basename(img_path), |
| fontsize=12, color="#888", ha="center", va="center", |
| transform=ax.transAxes, |
| ) |
| return |
| img = mpimg.imread(img_path) |
| ax_img = ax.figure.add_axes(bbox) |
| ax_img.imshow(img) |
| ax_img.set_axis_off() |
|
|
|
|
| def _read_summary(path: str) -> Optional[Dict[str, Any]]: |
| if not os.path.exists(path): |
| return None |
| with open(path, "r", encoding="utf-8") as f: |
| data = json.load(f) |
| by_label = {row["label"]: row for row in data} |
| return by_label |
|
|
|
|
| def slide_title(pdf): |
| fig, ax = plt.subplots(figsize=(13.33, 7.5)) |
| ax.set_axis_off() |
| fig.patch.set_facecolor("#0b1220") |
| ax.text( |
| 0.5, 0.62, "OpenSOC", fontsize=72, color="white", |
| fontweight="bold", ha="center", transform=ax.transAxes, |
| ) |
| ax.text( |
| 0.5, 0.50, "RLVR self-play environment for SOC triage agents", |
| fontsize=22, color="#bbbbbb", ha="center", transform=ax.transAxes, |
| ) |
| ax.text( |
| 0.5, 0.38, "OpenEnv Hackathon, April 2026", |
| fontsize=16, color="#888", ha="center", transform=ax.transAxes, |
| ) |
| pdf.savefig(fig) |
| plt.close(fig) |
|
|
|
|
| def slide_problem(pdf): |
| fig, ax = _new_slide( |
| "The problem", |
| "Tier-1 SOC triage is judgement work, and the failure mode that hurts is dismiss-on-malicious.", |
| ) |
| _bullets(ax, [ |
| "SOCs are chronically understaffed; analysts skim hundreds of alerts/shift.", |
| "Real attackers blend in for hours before tier-2 even sees them.", |
| "An LLM that automates triage would help β IF its reward signal is honest.", |
| "Two classic traps: (1) train on a learned judge β reward-hack the judge.", |
| " (2) self-play between two LLMs β degenerate equilibrium.", |
| "OpenSOC: deterministic verifier + plausibility check = RLVR-clean self-play.", |
| ]) |
| pdf.savefig(fig) |
| plt.close(fig) |
|
|
|
|
| def slide_env(pdf): |
| fig, ax = _new_slide( |
| "Environment design", |
| "An attacker LLM crafts structured incidents; a defender LLM triages; verifier grounds the reward.", |
| ) |
| _bullets(ax, [ |
| "schema.py β single source of truth for events, actions, incident params.", |
| "verifier.compute_ground_truth(params) β pure function over structured fields.", |
| "verifier.check_plausibility(params) β gate that rejects gibberish before reward.", |
| "rubric.score_defender / score_attacker β layered, anti-hack-tested rewards.", |
| "OpenEnv-compliant API: /reset, /step, /state, /grade, /tasks, /health.", |
| "Curriculum: 4 stages (basic β multi-event β mixed β adversarial).", |
| "FastAPI + Gradio /demo on the same Space; Dockerised; runs on free CPU tier.", |
| ]) |
| pdf.savefig(fig) |
| plt.close(fig) |
|
|
|
|
| def slide_results(pdf, summary_path: str, results_dir: str): |
| fig, ax = _new_slide( |
| "Headline results", |
| "200-incident frozen hold-out; seeds disjoint from training.", |
| ) |
| summary = _read_summary(summary_path) or {} |
| base = summary.get("baseline_zero_shot") or summary.get("always_dismiss") or {} |
| trained = summary.get("opensoc_grpo") or summary.get("verifier_oracle") or {} |
|
|
| rows = [] |
| if base or trained: |
| rows.append(f"Baseline F1: {base.get('macro_f1', float('nan')):.3f}") |
| rows.append(f"OpenSOC F1: {trained.get('macro_f1', float('nan')):.3f}") |
| rows.append( |
| f"Dismiss-on-malicious: {base.get('dismiss_on_malicious', float('nan')):.3f}" |
| f" β {trained.get('dismiss_on_malicious', float('nan')):.3f}" |
| ) |
| rows.append( |
| f"Over-react rate: {base.get('over_react_rate', float('nan')):.3f}" |
| f" β {trained.get('over_react_rate', float('nan')):.3f}" |
| ) |
| else: |
| rows.append("(numbers will be filled in after the GPU run)") |
| _bullets(ax, rows, y_start=0.74, dy=0.06, fontsize=16) |
|
|
| _maybe_add_image( |
| ax, os.path.join(results_dir, "bar_dismiss_on_malicious.png"), |
| bbox=(0.07, 0.06, 0.42, 0.36), |
| ) |
| _maybe_add_image( |
| ax, os.path.join(results_dir, "training_curves.png"), |
| bbox=(0.52, 0.06, 0.42, 0.36), |
| ) |
| pdf.savefig(fig) |
| plt.close(fig) |
|
|
|
|
| def slide_demo(pdf): |
| fig, ax = _new_slide( |
| "Demo & links", |
| "Click /demo on the Space to see live before-vs-after triage.", |
| ) |
| _bullets(ax, [ |
| "HF Space: https://huggingface.co/spaces/<USER>/opensoc-env", |
| " UI: https://<USER>-opensoc-env.hf.space/demo", |
| "Repo: https://huggingface.co/<USER>/opensoc-env", |
| "Blog: https://huggingface.co/blog/<USER>/opensoc-rlvr-soc-triage", |
| "Video: https://youtu.be/<UNLISTED-ID>", |
| "All four eval PNGs are committed in eval/results/.", |
| "Total compute for the trained checkpoint: ~$3 on HF Jupyter L4.", |
| ]) |
| pdf.savefig(fig) |
| plt.close(fig) |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--out", default="docs/slides.pdf") |
| parser.add_argument("--summary", default="eval/results/summary.json") |
| parser.add_argument("--results-dir", default="eval/results") |
| args = parser.parse_args() |
|
|
| out_path = os.path.join(_REPO, args.out) |
| summary_path = os.path.join(_REPO, args.summary) |
| results_dir = os.path.join(_REPO, args.results_dir) |
| os.makedirs(os.path.dirname(out_path), exist_ok=True) |
|
|
| with PdfPages(out_path) as pdf: |
| slide_title(pdf) |
| slide_problem(pdf) |
| slide_env(pdf) |
| slide_results(pdf, summary_path, results_dir) |
| slide_demo(pdf) |
| print(f"Wrote {out_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|