covtoken / paper /make_figures.py
Chucks90's picture
regenerate figures as 650-dpi LZW TIFFs, flattened RGB (publication-ready)
b30c499 verified
Raw
History Blame Contribute Delete
9.76 kB
#!/usr/bin/env python3
"""Generate paper figures from the locked gate-report numbers. Self-contained (no bucket).
Output: publication TIFFs at 650 dpi (LZW-compressed)."""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
OUT = Path(__file__).resolve().parent / "figures"
OUT.mkdir(exist_ok=True)
DPI = 650
plt.rcParams.update({"figure.dpi": 150, "savefig.dpi": DPI, "font.size": 11, "axes.grid": True,
"grid.alpha": 0.3, "axes.axisbelow": True})
def fig1_layer():
# full 12-block depth curve with multi-seed (n=3) error bars [research_v3/rigor_results.json]
blocks = np.arange(1, 13)
mean = [0.8612,0.8641,0.8657,0.8375,0.8127,0.7818,0.7366,0.7059,0.6816,0.6802,0.6607,0.6365]
std = [0.0098,0.0089,0.0098,0.0169,0.0182,0.0163,0.0253,0.0268,0.0302,0.0379,0.0373,0.0264]
fig, ax = plt.subplots(figsize=(5.4, 3.4))
ax.errorbar(blocks, mean, yerr=std, fmt="o-", color="#1f77b4", lw=2, ms=6,
capsize=3, label="density-A (mean ± std, 3 seeds)")
ax.axhline(0.767, ls="--", color="#d62728", label="attention saliency (0.767)")
pk = int(np.argmax(mean))
ax.annotate(f"peak block {blocks[pk]}\n{mean[pk]:.3f}±{std[pk]:.3f}", (blocks[pk], mean[pk]),
textcoords="offset points", xytext=(14, 6), ha="left", fontsize=8.5, color="#1f77b4")
ax.set_xticks(blocks); ax.set_xlabel("transformer block (MedDINOv3)")
ax.set_ylabel("token-level lesion AUROC (LIDC)"); ax.set_ylim(0.55, 0.92)
ax.set_title("Finding 1: lesion signal peaks mid-layer, erodes with depth")
ax.legend(loc="lower left", fontsize=8.5)
fig.tight_layout(); fig.savefig(OUT / "fig1_layer_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig2_crossmodality():
data = [("LIDC lung\nCT", 0.871, 0.767), ("pancreas\nCT", 0.876, 0.920),
("KiTS23 kidney\nCT", 0.823, 0.823), ("MSD liver\nCT", 0.670, 0.756),
("BUSI breast\nUS (DINOv2)", 0.733, 0.492)]
labels = [d[0] for d in data]; dens = [d[1] for d in data]; attn = [d[2] for d in data]
x = np.arange(len(labels)); w = 0.38
fig, ax = plt.subplots(figsize=(7.2, 3.6))
ax.bar(x - w/2, dens, w, label="density-A (ours, label-free)", color="#1f77b4")
ax.bar(x + w/2, attn, w, label="attention saliency", color="#ff7f0e")
ax.axhline(0.70, ls=":", color="gray")
ax.axhline(0.50, ls="--", color="k", alpha=0.4, label="chance")
ax.set_xticks(x); ax.set_xticklabels(labels, fontsize=8.5)
ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.4, 1.0)
ax.set_title("Finding 2: label-free localizer across anatomy / modality / backbone")
ax.legend(loc="upper right", fontsize=8.5)
# annotate liver failure + US attention-collapse
ax.annotate("density fails\n(low-contrast)", (3 - w/2, 0.670), textcoords="offset points",
xytext=(-2, -34), ha="center", fontsize=7.5, color="#1f77b4")
ax.annotate("attention\n~ chance", (4 + w/2, 0.492), textcoords="offset points",
xytext=(2, 4), ha="center", fontsize=7.5, color="#ff7f0e")
fig.tight_layout(); fig.savefig(OUT / "fig2_cross_modality.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig3_pruning_gain():
data = [("LIDC\nlung CT", 27.6, 15.8), ("KiTS23\nkidney CT", 7.4, 1.6),
("BUSI\nbreast US", 13.8, 19.0)]
labels = [d[0] for d in data]; b25 = [d[1] for d in data]; b50 = [d[2] for d in data]
x = np.arange(len(labels)); w = 0.38
fig, ax = plt.subplots(figsize=(6, 3.6))
ax.bar(x - w/2, b25, w, label="budget 0.25", color="#2ca02c")
ax.bar(x + w/2, b50, w, label="budget 0.50", color="#98df8a")
ax.axhline(5, ls=":", color="gray", label="effect floor (5 pts)")
for i in range(len(labels)):
ax.annotate(f"+{b25[i]:.1f}", (i - w/2, b25[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8)
ax.annotate(f"+{b50[i]:.1f}", (i + w/2, b50[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8)
ax.set_xticks(x); ax.set_xticklabels(labels)
ax.set_ylabel("small-lesion recall gain (pts)\nmembership vs saliency pruning")
ax.set_title("Finding 3: membership pruning > saliency pruning")
ax.legend(loc="upper right", fontsize=9)
fig.tight_layout(); fig.savefig(OUT / "fig3_pruning_gain.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig4_ablation():
budgets = ["0.25", "0.50"]
sal = [0.521, 0.827]; sub = [0.817, 0.981]; floor = [0.219, 0.460]
x = np.arange(len(budgets)); w = 0.26
fig, ax = plt.subplots(figsize=(5.6, 3.6))
ax.bar(x - w, sal, w, label="saliency pruning", color="#ff7f0e")
ax.bar(x, sub, w, label="subspace membership (ours)", color="#1f77b4")
ax.bar(x + w, floor, w, label="subspace + coverage FLOOR", color="#d62728", hatch="//")
ax.set_xticks(x); ax.set_xticklabels([f"budget {b}" for b in budgets])
ax.set_ylabel("small-lesion recall (matched budget)"); ax.set_ylim(0, 1.05)
ax.set_title("Finding 4 (negative): the coverage floor HURTS")
ax.legend(loc="upper left", fontsize=8.5)
ax.annotate("rank coverage rewards spanning,\nnot lesion concentration",
(1 + w, 0.460), textcoords="offset points", xytext=(-6, 18), ha="center",
fontsize=7.5, color="#d62728")
fig.tight_layout(); fig.savefig(OUT / "fig4_floor_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig5_conformal():
# budget-guarantee tradeoff + validity
fig, ax = plt.subplots(figsize=(5.2, 3.4))
budgets = [0.25, 0.5]; guar = [0.0, 1.0]; emp = [0.978, 0.971]
ax.plot(budgets, guar, "s-", color="#1f77b4", label="guaranteed lesion retention")
ax.plot(budgets, emp, "o--", color="#2ca02c", label="empirical coverage (valid)")
ax.axhline(0.90, ls=":", color="gray", label="nominal 1-α = 0.90")
ax.set_xlabel("token budget"); ax.set_ylabel("fraction"); ax.set_ylim(-0.05, 1.05)
ax.set_xticks(budgets)
ax.set_title("Conformal retention certificate\n(valid; honest budget tradeoff)")
ax.legend(loc="center right", fontsize=8)
fig.tight_layout(); fig.savefig(OUT / "fig5_conformal.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig6_cross_objective():
# F3 decisive: depth-erosion across objectives [research_v3/f3_cross_objective.json]
blocks = np.arange(1, 13)
dino = [0.862,0.880,0.868,0.807,0.729,0.682,0.701,0.676,0.682,0.653,0.621,0.617]
sup = [0.842,0.840,0.831,0.825,0.816,0.797,0.785,0.791,0.785,0.732,0.681,0.658]
mae = [0.611,0.600,0.596,0.600,0.589,0.590,0.587,0.582,0.577,0.570,0.577,0.568]
fig, ax = plt.subplots(figsize=(5.8, 3.6))
ax.plot(blocks, dino, "o-", color="#1f77b4", lw=2, ms=5, label="DINOv2 (self-distill) ρ=−0.93")
ax.plot(blocks, sup, "s-", color="#2ca02c", lw=2, ms=5, label="ViT (supervised) ρ=−0.73")
ax.plot(blocks, mae, "^--", color="#d62728", lw=2, ms=5, label="MAE (reconstruction) ρ=+0.06")
ax.axhline(0.50, ls=":", color="k", alpha=0.4, label="chance")
ax.set_xticks(blocks); ax.set_xlabel("transformer block (natural-trained; eval on CT)")
ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.45, 0.92)
ax.set_title("Mechanism: localizers erode with depth; MAE never localizes")
ax.legend(loc="upper right", fontsize=8)
ax.annotate("MAE flat & low\n(not density-separable)", (9, 0.578),
textcoords="offset points", xytext=(0, -28), ha="center", fontsize=7.5, color="#d62728")
fig.tight_layout(); fig.savefig(OUT / "fig6_cross_objective.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
def fig7_law():
# S1 closed-form law: spanning vs concentration retention vs signal rank [research_v2/s1_crossover.json]
m = 8; ranks = np.array([1, 2, 3, 4, 5, 6, 7, 8, 10, 12])
spanning = np.minimum(ranks, m) / m # = min(r,m)/m
concentration = np.ones_like(spanning, float)
fig, ax = plt.subplots(figsize=(5.4, 3.6))
ax.plot(ranks, concentration, "o-", color="#1f77b4", lw=2, ms=6, label="concentration (energy/membership)")
ax.plot(ranks, spanning, "s-", color="#d62728", lw=2, ms=6, label="spanning (effective-rank/RankMe)")
ax.fill_between(ranks, spanning, concentration, color="#d62728", alpha=0.12)
ax.axvline(m, ls=":", color="gray"); ax.annotate("crossover r*=m", (m, 0.55),
textcoords="offset points", xytext=(6, 0), ha="left", fontsize=8.5, color="gray")
ax.annotate("retention gap\n(m−r)/m", (2.0, 0.62), fontsize=8.5, color="#d62728", ha="center")
ax.set_xlabel("signal effective rank r (lesion concentration → diversity)")
ax.set_ylabel("fraction of rare signal retained"); ax.set_ylim(0, 1.08)
ax.set_title("The law: rank objectives lose rare/low-rank signal")
ax.legend(loc="lower right", fontsize=8.5)
fig.tight_layout(); fig.savefig(OUT / "fig7_rank_law.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)
for f in (fig1_layer, fig2_crossmodality, fig3_pruning_gain, fig4_ablation, fig5_conformal,
fig6_cross_objective, fig7_law):
f()
# flatten RGBA -> RGB on white (journals reject alpha/transparency in TIFFs); keep 650 dpi + LZW
from PIL import Image
for p in sorted(OUT.glob("*.tiff")):
im = Image.open(p)
if im.mode in ("RGBA", "LA", "P"):
bg = Image.new("RGB", im.size, "white")
bg.paste(im.convert("RGBA"), mask=im.convert("RGBA").split()[-1])
bg.save(p, format="TIFF", dpi=(DPI, DPI), compression="tiff_lzw")
print(f"figures written at {DPI} dpi (TIFF/LZW, RGB):", sorted(p.name for p in OUT.glob("*.tiff")))