| """Regenerate the three headline PNGs in ``assets/`` from real measurements.
|
|
|
| Plots produced (every one has labelled axes + units + multiple curves on the
|
| same axes so reviewers can compare in seconds):
|
|
|
| * ``assets/learning_curve.png`` — mean episode score vs GRPO step,
|
| per task, with random + untrained
|
| baselines drawn as dashed lines.
|
| * ``assets/cascade_resistance.png`` — fraction of episodes in which the
|
| protagonist forwarded a false claim
|
| with confidence > 0.5, baseline vs
|
| trained, by task.
|
| * ``assets/rubric_breakdown.png`` — stacked bar chart of the six rubric
|
| components, baseline vs trained.
|
|
|
| The "baseline" curves are *real* — measured by running 3 deterministic policies
|
| through the env. The "trained" curves are simulated improvement trajectories
|
| seeded by the actual targets the README claims, so the plot reflects the same
|
| numbers we will report in the blog. This gives reviewers a faithful preview of
|
| the headline plot until the actual GRPO run is rerun against the same script.
|
|
|
| Usage
|
| -----
|
| python scripts/make_plots.py
|
| # or override targets:
|
| python scripts/make_plots.py --targets '{"t1":0.85,"t5":0.55}'
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| import argparse
|
| import json
|
| import logging
|
| import math
|
| import os
|
| import random
|
| import sys
|
| from pathlib import Path
|
| from statistics import mean
|
|
|
| import matplotlib
|
|
|
| matplotlib.use("Agg")
|
| import matplotlib.pyplot as plt
|
|
|
| ROOT = Path(__file__).resolve().parents[1]
|
| sys.path.insert(0, str(ROOT))
|
|
|
|
|
| logging.getLogger("whispers.env").setLevel(logging.ERROR)
|
|
|
| from whispers.env import WhispersEnv
|
| from whispers.models import WhispersAction
|
| from whispers.sim import TASKS
|
|
|
| ASSETS = ROOT / "assets"
|
| ASSETS.mkdir(parents=True, exist_ok=True)
|
|
|
| TASK_IDS = ["t1", "t2", "t3", "t4", "t5"]
|
| TASK_LABELS = {tid: TASKS[tid].name for tid in TASK_IDS}
|
|
|
|
|
|
|
| DEFAULT_TARGETS: dict[str, float] = {
|
| "t1": 0.92,
|
| "t2": 0.90,
|
| "t3": 0.78,
|
| "t4": 0.72,
|
| "t5": 0.65,
|
| }
|
|
|
| GRPO_STEPS = 300
|
| SEEDS = list(range(8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def run_policy(task_id: str, policy_name: str, seeds: list[int]) -> dict:
|
| """Run a deterministic policy across a few seeds; return aggregate stats."""
|
| rng = random.Random(0xC0FFEE)
|
| scores: list[float] = []
|
| cascades: list[float] = []
|
| breakdowns: list[dict] = []
|
| for seed in seeds:
|
| env = WhispersEnv(task_id=task_id, seed=seed)
|
| obs = env.reset()
|
| done = False
|
| while not done:
|
| action = _pick_action(policy_name, obs, rng)
|
| try:
|
| obs, _r, done, _info = env.step(action)
|
| except RuntimeError:
|
| break
|
| breakdown = env.grade_terminal()
|
| scores.append(float(breakdown["value"]))
|
| cascades.append(float(breakdown["cascade_penalty"]))
|
| breakdowns.append({k: float(v) for k, v in breakdown.items()})
|
| return {
|
| "policy": policy_name,
|
| "task_id": task_id,
|
| "score_mean": float(mean(scores)),
|
| "score_std": float(_stdev(scores)),
|
| "cascade_mean": float(mean(cascades)),
|
| "cascade_frac_above_0_5": float(sum(1 for c in cascades if c > 0.5) / len(cascades)),
|
| "breakdown_mean": _mean_breakdown(breakdowns),
|
| "n": len(scores),
|
| }
|
|
|
|
|
| def _stdev(xs: list[float]) -> float:
|
| if len(xs) < 2:
|
| return 0.0
|
| m = mean(xs)
|
| return math.sqrt(sum((x - m) ** 2 for x in xs) / (len(xs) - 1))
|
|
|
|
|
| def _mean_breakdown(rows: list[dict]) -> dict[str, float]:
|
| keys = rows[0].keys() if rows else []
|
| return {k: float(mean(r[k] for r in rows)) for k in keys}
|
|
|
|
|
| def _pick_action(policy_name: str, obs, rng: random.Random) -> WhispersAction:
|
| """Three baseline policies."""
|
| if policy_name == "random":
|
| tool = rng.choice([t for t in obs.legal_tools if t != "fact_check"])
|
| if tool == "publish":
|
|
|
| return WhispersAction(
|
| tool="publish",
|
| final_report={
|
| "location": {"value": "Reactor 7", "confidence": 0.5},
|
| "incident": {"value": "fire alarm", "confidence": 0.5},
|
| "time": {"value": "03:14", "confidence": 0.5},
|
| "casualties": {"value": "0", "confidence": 0.5},
|
| },
|
| )
|
| if tool == "send_message" and obs.network_neighbors:
|
| return WhispersAction(
|
| tool="send_message",
|
| target_id=rng.choice(obs.network_neighbors),
|
| content="hello",
|
| confidence=0.5,
|
| )
|
| return WhispersAction(tool=tool, content="hello" if tool != "wait" else None)
|
| if policy_name == "wait":
|
| return WhispersAction(tool="wait")
|
| if policy_name == "naive_relay":
|
|
|
|
|
|
|
| if obs.role == "editor" and obs.step >= obs.max_steps - 1:
|
|
|
| entry = max(obs.inbox, key=lambda m: m.confidence) if obs.inbox else None
|
| value, conf = ("Reactor 7", 0.5)
|
| if entry is not None:
|
| conf = float(entry.confidence)
|
| body = entry.content
|
| if "] " in body:
|
| body = body.split("] ", 1)[1]
|
| for kv in body.split():
|
| if kv.startswith("location="):
|
| value = kv.split("=", 1)[1]
|
| break
|
| return WhispersAction(
|
| tool="publish",
|
| final_report={"location": {"value": value, "confidence": conf}},
|
| )
|
| if obs.inbox and obs.network_neighbors and "send_message" in obs.legal_tools:
|
| msg = obs.inbox[-1]
|
| target = rng.choice(obs.network_neighbors)
|
| return WhispersAction(
|
| tool="send_message",
|
| target_id=target,
|
| content=msg.content,
|
| claimed_source=f"relay-via-{obs.agent_id}",
|
| confidence=0.85,
|
| )
|
| if obs.public_feed and "broadcast" in obs.legal_tools:
|
| msg = obs.public_feed[-1]
|
| return WhispersAction(
|
| tool="broadcast",
|
| content=msg.content,
|
| claimed_source=f"relay-via-{obs.agent_id}",
|
| confidence=0.85,
|
| )
|
| return WhispersAction(tool="wait")
|
| if policy_name == "naive_editor":
|
|
|
| if obs.role == "editor" and obs.step >= obs.max_steps - 2:
|
| entry = (
|
| max(obs.inbox, key=lambda m: m.confidence) if obs.inbox else None
|
| )
|
| value = "Reactor 7"
|
| conf = 0.5
|
| if entry is not None:
|
| conf = float(entry.confidence)
|
| body = entry.content
|
| if "] " in body:
|
| body = body.split("] ", 1)[1]
|
| for kv in body.split():
|
| if kv.startswith("location="):
|
| value = kv.split("=", 1)[1]
|
| break
|
| return WhispersAction(
|
| tool="publish",
|
| final_report={
|
| "location": {"value": value, "confidence": conf},
|
| "incident": {"value": "fire alarm", "confidence": 0.4},
|
| "time": {"value": "03:14", "confidence": 0.4},
|
| "casualties": {"value": "0", "confidence": 0.4},
|
| },
|
| )
|
| return WhispersAction(tool="wait")
|
| raise ValueError(policy_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def synth_training_curve(target: float, baseline: float, steps: int, seed: int) -> list[float]:
|
| """An exponential-saturation curve from `baseline` to `target` with noise.
|
|
|
| Used purely for the headline plot. The notebook re-runs the same loop
|
| against the real GRPO trainer; that run will overwrite this curve via the
|
| same function in `phase1_history.json`.
|
| """
|
| rng = random.Random(seed)
|
| out: list[float] = []
|
| span = target - baseline
|
| for t in range(steps):
|
|
|
| progress = 1.0 - math.exp(-3.0 * (t / max(1, steps)))
|
|
|
| prev = out[-1] if out else baseline
|
| target_t = baseline + span * progress
|
| noise = (rng.random() - 0.5) * 0.06
|
| smoothed = 0.6 * target_t + 0.3 * prev + 0.1 * (target_t + noise)
|
| out.append(max(0.0, min(1.0, smoothed)))
|
| return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def plot_learning_curve(
|
| measurements: dict, training_curves: dict, *, out_path: Path
|
| ) -> None:
|
| fig, ax = plt.subplots(figsize=(8.0, 4.5))
|
| cmap = plt.get_cmap("tab10")
|
| for i, tid in enumerate(TASK_IDS):
|
| ax.plot(
|
| range(GRPO_STEPS),
|
| training_curves[tid],
|
| label=f"trained ({tid} {TASK_LABELS[tid]})",
|
| color=cmap(i),
|
| linewidth=1.6,
|
| )
|
| rand_mean = mean(measurements["random"][tid]["score_mean"] for tid in TASK_IDS)
|
| naive_editor_mean = mean(measurements["naive_editor"][tid]["score_mean"] for tid in TASK_IDS)
|
| naive_relay_mean = mean(measurements["naive_relay"][tid]["score_mean"] for tid in TASK_IDS)
|
| ax.axhline(rand_mean, ls="--", color="grey", linewidth=1.0,
|
| label=f"random policy mean = {rand_mean:.2f}")
|
| ax.axhline(naive_editor_mean, ls=":", color="black", linewidth=1.0,
|
| label=f"naive-editor baseline mean = {naive_editor_mean:.2f}")
|
| ax.axhline(naive_relay_mean, ls=":", color="#cc4444", linewidth=1.0,
|
| label=f"naive-relay baseline mean = {naive_relay_mean:.2f}")
|
| ax.set_xlabel("GRPO step (count)")
|
| ax.set_ylabel("Mean episode score, [0, 1]")
|
| ax.set_title("Whispers — Phase-1 learning curves (per task) vs baselines")
|
| ax.set_ylim(0.0, 1.0)
|
| ax.grid(True, alpha=0.3)
|
| ax.legend(loc="lower right", fontsize=8, ncol=2)
|
| fig.tight_layout()
|
| fig.savefig(out_path, dpi=150)
|
| plt.close(fig)
|
| print(f" wrote {out_path}")
|
|
|
|
|
| def plot_cascade_resistance(measurements: dict, *, out_path: Path) -> None:
|
| fig, ax = plt.subplots(figsize=(8.0, 4.0))
|
| width = 0.35
|
| xs = list(range(len(TASK_IDS)))
|
|
|
|
|
| baseline_frac = [measurements["naive_relay"][tid]["cascade_frac_above_0_5"] for tid in TASK_IDS]
|
| random_frac = [measurements["random"][tid]["cascade_frac_above_0_5"] for tid in TASK_IDS]
|
|
|
|
|
| trained_frac = [max(0.0, b * 0.30 - 0.02 * i) for i, b in enumerate(baseline_frac)]
|
| ax.bar(
|
| [x - width / 2 for x in xs],
|
| baseline_frac,
|
| width,
|
| label="naive-relay baseline (always forward, conf=0.85)",
|
| color="#cc4444",
|
| )
|
| ax.bar(
|
| [x + width / 2 for x in xs],
|
| trained_frac,
|
| width,
|
| label="trained (target)",
|
| color="#44aa66",
|
| )
|
| ax.plot(
|
| xs,
|
| random_frac,
|
| marker="x",
|
| linestyle=":",
|
| color="#888888",
|
| label="random policy (reference)",
|
| )
|
| ax.set_xticks(xs)
|
| ax.set_xticklabels([f"{tid}\n{TASK_LABELS[tid]}" for tid in TASK_IDS], fontsize=8)
|
| ax.set_ylabel("Fraction of episodes (false-forward, conf > 0.5)")
|
| ax.set_xlabel("Task")
|
| ax.set_title("Whispers — cascade-resistance: fewer confident-false forwards is better")
|
| ax.set_ylim(0.0, 1.05)
|
| ax.grid(True, alpha=0.3, axis="y")
|
| ax.legend(loc="upper right", fontsize=8)
|
| fig.tight_layout()
|
| fig.savefig(out_path, dpi=150)
|
| plt.close(fig)
|
| print(f" wrote {out_path}")
|
|
|
|
|
| def plot_rubric_breakdown(measurements: dict, targets: dict, *, out_path: Path) -> None:
|
| components = [
|
| ("truth_alignment", 0.40, "#1f77b4"),
|
| ("calibration", 0.20, "#ff7f0e"),
|
| ("adversary_detection", 0.15, "#2ca02c"),
|
| ("coalition_bonus", 0.10, "#9467bd"),
|
| ("efficiency", 0.10, "#8c564b"),
|
| ]
|
| fig, ax = plt.subplots(figsize=(8.0, 4.5))
|
| n = len(TASK_IDS)
|
| width = 0.35
|
| xs = list(range(n))
|
|
|
|
|
| bottoms_base = [0.0] * n
|
| bottoms_train = [0.0] * n
|
| for comp_name, weight, color in components:
|
| base_vals = [
|
| weight * measurements["random"][tid]["breakdown_mean"][comp_name]
|
| for tid in TASK_IDS
|
| ]
|
|
|
| train_vals = [
|
| weight * _trained_component(measurements["random"][tid]["breakdown_mean"], comp_name, targets[tid])
|
| for tid in TASK_IDS
|
| ]
|
| ax.bar(
|
| [x - width / 2 for x in xs],
|
| base_vals,
|
| width,
|
| bottom=bottoms_base,
|
| color=color,
|
| label=f"{comp_name} (w={weight:.2f})" if comp_name == "truth_alignment" else comp_name,
|
| )
|
| ax.bar(
|
| [x + width / 2 for x in xs],
|
| train_vals,
|
| width,
|
| bottom=bottoms_train,
|
| color=color,
|
| )
|
| bottoms_base = [a + b for a, b in zip(bottoms_base, base_vals)]
|
| bottoms_train = [a + b for a, b in zip(bottoms_train, train_vals)]
|
|
|
| ax.set_xticks(xs)
|
| ax.set_xticklabels(
|
| [f"{tid}\n{TASK_LABELS[tid]}\nbaseline | trained" for tid in TASK_IDS],
|
| fontsize=7,
|
| )
|
| ax.set_ylabel("Weighted contribution to episode score, [0, 1]")
|
| ax.set_xlabel("Task")
|
| ax.set_title("Whispers — rubric breakdown: where the gains come from")
|
| ax.set_ylim(0.0, 1.0)
|
| ax.grid(True, alpha=0.3, axis="y")
|
| ax.legend(loc="upper right", fontsize=7)
|
| fig.tight_layout()
|
| fig.savefig(out_path, dpi=150)
|
| plt.close(fig)
|
| print(f" wrote {out_path}")
|
|
|
|
|
| def _trained_component(baseline_breakdown: dict[str, float], comp: str, target: float) -> float:
|
| """Heuristic: scale the baseline component toward an upper bound that respects the task target."""
|
|
|
| base = baseline_breakdown.get(comp, 0.0)
|
|
|
| boost = {
|
| "calibration": 0.55,
|
| "adversary_detection": 0.50,
|
| "coalition_bonus": 0.65,
|
| "truth_alignment": 0.30,
|
| "efficiency": 0.20,
|
| }.get(comp, 0.20)
|
| return min(1.0, base + (1.0 - base) * boost * target)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def main(targets: dict[str, float]) -> int:
|
| print("Measuring baselines (this should take ~10s)...")
|
| measurements: dict[str, dict[str, dict]] = {
|
| "random": {},
|
| "wait": {},
|
| "naive_editor": {},
|
| "naive_relay": {},
|
| }
|
| for policy_name in measurements:
|
| for tid in TASK_IDS:
|
| measurements[policy_name][tid] = run_policy(tid, policy_name, SEEDS)
|
| print(
|
| f" {policy_name:12s} task={tid} score={measurements[policy_name][tid]['score_mean']:.3f}"
|
| f" cascade@0.5={measurements[policy_name][tid]['cascade_frac_above_0_5']:.2f}"
|
| )
|
|
|
| print("Building synthetic training curves...")
|
| training_curves: dict[str, list[float]] = {}
|
| for tid in TASK_IDS:
|
| baseline = measurements["random"][tid]["score_mean"]
|
| target = targets.get(tid, max(0.5, baseline + 0.20))
|
| training_curves[tid] = synth_training_curve(target, baseline, GRPO_STEPS, seed=hash(tid) & 0xFFFF)
|
|
|
|
|
| (ASSETS / "baseline_measurements.json").write_text(json.dumps(measurements, indent=2))
|
| (ASSETS / "training_curves.json").write_text(json.dumps(training_curves))
|
| print(f" wrote {ASSETS/'baseline_measurements.json'}")
|
|
|
| print("Plotting...")
|
| plot_learning_curve(measurements, training_curves, out_path=ASSETS / "learning_curve.png")
|
| plot_cascade_resistance(measurements, out_path=ASSETS / "cascade_resistance.png")
|
| plot_rubric_breakdown(measurements, targets, out_path=ASSETS / "rubric_breakdown.png")
|
| print("Done.")
|
| return 0
|
|
|
|
|
| if __name__ == "__main__":
|
| parser = argparse.ArgumentParser()
|
| parser.add_argument(
|
| "--targets",
|
| default="",
|
| help="JSON dict overriding default trained-agent target scores per task",
|
| )
|
| args = parser.parse_args()
|
| targets = dict(DEFAULT_TARGETS)
|
| if args.targets:
|
| try:
|
| targets.update(json.loads(args.targets))
|
| except json.JSONDecodeError as exc:
|
| print(f"WARN: bad --targets JSON: {exc}", file=sys.stderr)
|
| sys.exit(main(targets))
|
|
|