#!/usr/bin/env python3 """Generate paper figures from the locked gate-report numbers. Self-contained (no bucket). Output: publication TIFFs at 650 dpi (LZW-compressed).""" import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np from pathlib import Path OUT = Path(__file__).resolve().parent / "figures" OUT.mkdir(exist_ok=True) DPI = 650 plt.rcParams.update({"figure.dpi": 150, "savefig.dpi": DPI, "font.size": 11, "axes.grid": True, "grid.alpha": 0.3, "axes.axisbelow": True}) def fig1_layer(): # full 12-block depth curve with multi-seed (n=3) error bars [research_v3/rigor_results.json] blocks = np.arange(1, 13) mean = [0.8612,0.8641,0.8657,0.8375,0.8127,0.7818,0.7366,0.7059,0.6816,0.6802,0.6607,0.6365] std = [0.0098,0.0089,0.0098,0.0169,0.0182,0.0163,0.0253,0.0268,0.0302,0.0379,0.0373,0.0264] fig, ax = plt.subplots(figsize=(5.4, 3.4)) ax.errorbar(blocks, mean, yerr=std, fmt="o-", color="#1f77b4", lw=2, ms=6, capsize=3, label="density-A (mean ± std, 3 seeds)") ax.axhline(0.767, ls="--", color="#d62728", label="attention saliency (0.767)") pk = int(np.argmax(mean)) ax.annotate(f"peak block {blocks[pk]}\n{mean[pk]:.3f}±{std[pk]:.3f}", (blocks[pk], mean[pk]), textcoords="offset points", xytext=(14, 6), ha="left", fontsize=8.5, color="#1f77b4") ax.set_xticks(blocks); ax.set_xlabel("transformer block (MedDINOv3)") ax.set_ylabel("token-level lesion AUROC (LIDC)"); ax.set_ylim(0.55, 0.92) ax.set_title("Finding 1: lesion signal peaks mid-layer, erodes with depth") ax.legend(loc="lower left", fontsize=8.5) fig.tight_layout(); fig.savefig(OUT / "fig1_layer_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig2_crossmodality(): data = [("LIDC lung\nCT", 0.871, 0.767), ("pancreas\nCT", 0.876, 0.920), ("KiTS23 kidney\nCT", 0.823, 0.823), ("MSD liver\nCT", 0.670, 0.756), ("BUSI breast\nUS (DINOv2)", 0.733, 0.492)] labels = [d[0] for d in data]; dens = [d[1] for d in data]; attn = [d[2] for d in data] x = np.arange(len(labels)); w = 0.38 fig, ax = plt.subplots(figsize=(7.2, 3.6)) ax.bar(x - w/2, dens, w, label="density-A (ours, label-free)", color="#1f77b4") ax.bar(x + w/2, attn, w, label="attention saliency", color="#ff7f0e") ax.axhline(0.70, ls=":", color="gray") ax.axhline(0.50, ls="--", color="k", alpha=0.4, label="chance") ax.set_xticks(x); ax.set_xticklabels(labels, fontsize=8.5) ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.4, 1.0) ax.set_title("Finding 2: label-free localizer across anatomy / modality / backbone") ax.legend(loc="upper right", fontsize=8.5) # annotate liver failure + US attention-collapse ax.annotate("density fails\n(low-contrast)", (3 - w/2, 0.670), textcoords="offset points", xytext=(-2, -34), ha="center", fontsize=7.5, color="#1f77b4") ax.annotate("attention\n~ chance", (4 + w/2, 0.492), textcoords="offset points", xytext=(2, 4), ha="center", fontsize=7.5, color="#ff7f0e") fig.tight_layout(); fig.savefig(OUT / "fig2_cross_modality.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig3_pruning_gain(): data = [("LIDC\nlung CT", 27.6, 15.8), ("KiTS23\nkidney CT", 7.4, 1.6), ("BUSI\nbreast US", 13.8, 19.0)] labels = [d[0] for d in data]; b25 = [d[1] for d in data]; b50 = [d[2] for d in data] x = np.arange(len(labels)); w = 0.38 fig, ax = plt.subplots(figsize=(6, 3.6)) ax.bar(x - w/2, b25, w, label="budget 0.25", color="#2ca02c") ax.bar(x + w/2, b50, w, label="budget 0.50", color="#98df8a") ax.axhline(5, ls=":", color="gray", label="effect floor (5 pts)") for i in range(len(labels)): ax.annotate(f"+{b25[i]:.1f}", (i - w/2, b25[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8) ax.annotate(f"+{b50[i]:.1f}", (i + w/2, b50[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8) ax.set_xticks(x); ax.set_xticklabels(labels) ax.set_ylabel("small-lesion recall gain (pts)\nmembership vs saliency pruning") ax.set_title("Finding 3: membership pruning > saliency pruning") ax.legend(loc="upper right", fontsize=9) fig.tight_layout(); fig.savefig(OUT / "fig3_pruning_gain.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig4_ablation(): budgets = ["0.25", "0.50"] sal = [0.521, 0.827]; sub = [0.817, 0.981]; floor = [0.219, 0.460] x = np.arange(len(budgets)); w = 0.26 fig, ax = plt.subplots(figsize=(5.6, 3.6)) ax.bar(x - w, sal, w, label="saliency pruning", color="#ff7f0e") ax.bar(x, sub, w, label="subspace membership (ours)", color="#1f77b4") ax.bar(x + w, floor, w, label="subspace + coverage FLOOR", color="#d62728", hatch="//") ax.set_xticks(x); ax.set_xticklabels([f"budget {b}" for b in budgets]) ax.set_ylabel("small-lesion recall (matched budget)"); ax.set_ylim(0, 1.05) ax.set_title("Finding 4 (negative): the coverage floor HURTS") ax.legend(loc="upper left", fontsize=8.5) ax.annotate("rank coverage rewards spanning,\nnot lesion concentration", (1 + w, 0.460), textcoords="offset points", xytext=(-6, 18), ha="center", fontsize=7.5, color="#d62728") fig.tight_layout(); fig.savefig(OUT / "fig4_floor_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig5_conformal(): # budget-guarantee tradeoff + validity fig, ax = plt.subplots(figsize=(5.2, 3.4)) budgets = [0.25, 0.5]; guar = [0.0, 1.0]; emp = [0.978, 0.971] ax.plot(budgets, guar, "s-", color="#1f77b4", label="guaranteed lesion retention") ax.plot(budgets, emp, "o--", color="#2ca02c", label="empirical coverage (valid)") ax.axhline(0.90, ls=":", color="gray", label="nominal 1-α = 0.90") ax.set_xlabel("token budget"); ax.set_ylabel("fraction"); ax.set_ylim(-0.05, 1.05) ax.set_xticks(budgets) ax.set_title("Conformal retention certificate\n(valid; honest budget tradeoff)") ax.legend(loc="center right", fontsize=8) fig.tight_layout(); fig.savefig(OUT / "fig5_conformal.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig6_cross_objective(): # F3 decisive: depth-erosion across objectives [research_v3/f3_cross_objective.json] blocks = np.arange(1, 13) dino = [0.862,0.880,0.868,0.807,0.729,0.682,0.701,0.676,0.682,0.653,0.621,0.617] sup = [0.842,0.840,0.831,0.825,0.816,0.797,0.785,0.791,0.785,0.732,0.681,0.658] mae = [0.611,0.600,0.596,0.600,0.589,0.590,0.587,0.582,0.577,0.570,0.577,0.568] fig, ax = plt.subplots(figsize=(5.8, 3.6)) ax.plot(blocks, dino, "o-", color="#1f77b4", lw=2, ms=5, label="DINOv2 (self-distill) ρ=−0.93") ax.plot(blocks, sup, "s-", color="#2ca02c", lw=2, ms=5, label="ViT (supervised) ρ=−0.73") ax.plot(blocks, mae, "^--", color="#d62728", lw=2, ms=5, label="MAE (reconstruction) ρ=+0.06") ax.axhline(0.50, ls=":", color="k", alpha=0.4, label="chance") ax.set_xticks(blocks); ax.set_xlabel("transformer block (natural-trained; eval on CT)") ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.45, 0.92) ax.set_title("Mechanism: localizers erode with depth; MAE never localizes") ax.legend(loc="upper right", fontsize=8) ax.annotate("MAE flat & low\n(not density-separable)", (9, 0.578), textcoords="offset points", xytext=(0, -28), ha="center", fontsize=7.5, color="#d62728") fig.tight_layout(); fig.savefig(OUT / "fig6_cross_objective.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) def fig7_law(): # S1 closed-form law: spanning vs concentration retention vs signal rank [research_v2/s1_crossover.json] m = 8; ranks = np.array([1, 2, 3, 4, 5, 6, 7, 8, 10, 12]) spanning = np.minimum(ranks, m) / m # = min(r,m)/m concentration = np.ones_like(spanning, float) fig, ax = plt.subplots(figsize=(5.4, 3.6)) ax.plot(ranks, concentration, "o-", color="#1f77b4", lw=2, ms=6, label="concentration (energy/membership)") ax.plot(ranks, spanning, "s-", color="#d62728", lw=2, ms=6, label="spanning (effective-rank/RankMe)") ax.fill_between(ranks, spanning, concentration, color="#d62728", alpha=0.12) ax.axvline(m, ls=":", color="gray"); ax.annotate("crossover r*=m", (m, 0.55), textcoords="offset points", xytext=(6, 0), ha="left", fontsize=8.5, color="gray") ax.annotate("retention gap\n(m−r)/m", (2.0, 0.62), fontsize=8.5, color="#d62728", ha="center") ax.set_xlabel("signal effective rank r (lesion concentration → diversity)") ax.set_ylabel("fraction of rare signal retained"); ax.set_ylim(0, 1.08) ax.set_title("The law: rank objectives lose rare/low-rank signal") ax.legend(loc="lower right", fontsize=8.5) fig.tight_layout(); fig.savefig(OUT / "fig7_rank_law.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig) for f in (fig1_layer, fig2_crossmodality, fig3_pruning_gain, fig4_ablation, fig5_conformal, fig6_cross_objective, fig7_law): f() # flatten RGBA -> RGB on white (journals reject alpha/transparency in TIFFs); keep 650 dpi + LZW from PIL import Image for p in sorted(OUT.glob("*.tiff")): im = Image.open(p) if im.mode in ("RGBA", "LA", "P"): bg = Image.new("RGB", im.size, "white") bg.paste(im.convert("RGBA"), mask=im.convert("RGBA").split()[-1]) bg.save(p, format="TIFF", dpi=(DPI, DPI), compression="tiff_lzw") print(f"figures written at {DPI} dpi (TIFF/LZW, RGB):", sorted(p.name for p in OUT.glob("*.tiff")))