covtoken / paper /make_figures.py

regenerate figures as 650-dpi LZW TIFFs, flattened RGB (publication-ready)

b30c499 verified 7 days ago

9.76 kB

	#!/usr/bin/env python3
	"""Generate paper figures from the locked gate-report numbers. Self-contained (no bucket).
	Output: publication TIFFs at 650 dpi (LZW-compressed)."""
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import numpy as np
	from pathlib import Path

	OUT = Path(__file__).resolve().parent / "figures"
	OUT.mkdir(exist_ok=True)
	DPI = 650
	plt.rcParams.update({"figure.dpi": 150, "savefig.dpi": DPI, "font.size": 11, "axes.grid": True,
	"grid.alpha": 0.3, "axes.axisbelow": True})


	def fig1_layer():
	# full 12-block depth curve with multi-seed (n=3) error bars [research_v3/rigor_results.json]
	blocks = np.arange(1, 13)
	mean = [0.8612,0.8641,0.8657,0.8375,0.8127,0.7818,0.7366,0.7059,0.6816,0.6802,0.6607,0.6365]
	std = [0.0098,0.0089,0.0098,0.0169,0.0182,0.0163,0.0253,0.0268,0.0302,0.0379,0.0373,0.0264]
	fig, ax = plt.subplots(figsize=(5.4, 3.4))
	ax.errorbar(blocks, mean, yerr=std, fmt="o-", color="#1f77b4", lw=2, ms=6,
	capsize=3, label="density-A (mean ± std, 3 seeds)")
	ax.axhline(0.767, ls="--", color="#d62728", label="attention saliency (0.767)")
	pk = int(np.argmax(mean))
	ax.annotate(f"peak block {blocks[pk]}\n{mean[pk]:.3f}±{std[pk]:.3f}", (blocks[pk], mean[pk]),
	textcoords="offset points", xytext=(14, 6), ha="left", fontsize=8.5, color="#1f77b4")
	ax.set_xticks(blocks); ax.set_xlabel("transformer block (MedDINOv3)")
	ax.set_ylabel("token-level lesion AUROC (LIDC)"); ax.set_ylim(0.55, 0.92)
	ax.set_title("Finding 1: lesion signal peaks mid-layer, erodes with depth")
	ax.legend(loc="lower left", fontsize=8.5)
	fig.tight_layout(); fig.savefig(OUT / "fig1_layer_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig2_crossmodality():
	data = [("LIDC lung\nCT", 0.871, 0.767), ("pancreas\nCT", 0.876, 0.920),
	("KiTS23 kidney\nCT", 0.823, 0.823), ("MSD liver\nCT", 0.670, 0.756),
	("BUSI breast\nUS (DINOv2)", 0.733, 0.492)]
	labels = [d[0] for d in data]; dens = [d[1] for d in data]; attn = [d[2] for d in data]
	x = np.arange(len(labels)); w = 0.38
	fig, ax = plt.subplots(figsize=(7.2, 3.6))
	ax.bar(x - w/2, dens, w, label="density-A (ours, label-free)", color="#1f77b4")
	ax.bar(x + w/2, attn, w, label="attention saliency", color="#ff7f0e")
	ax.axhline(0.70, ls=":", color="gray")
	ax.axhline(0.50, ls="--", color="k", alpha=0.4, label="chance")
	ax.set_xticks(x); ax.set_xticklabels(labels, fontsize=8.5)
	ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.4, 1.0)
	ax.set_title("Finding 2: label-free localizer across anatomy / modality / backbone")
	ax.legend(loc="upper right", fontsize=8.5)
	# annotate liver failure + US attention-collapse
	ax.annotate("density fails\n(low-contrast)", (3 - w/2, 0.670), textcoords="offset points",
	xytext=(-2, -34), ha="center", fontsize=7.5, color="#1f77b4")
	ax.annotate("attention\n~ chance", (4 + w/2, 0.492), textcoords="offset points",
	xytext=(2, 4), ha="center", fontsize=7.5, color="#ff7f0e")
	fig.tight_layout(); fig.savefig(OUT / "fig2_cross_modality.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig3_pruning_gain():
	data = [("LIDC\nlung CT", 27.6, 15.8), ("KiTS23\nkidney CT", 7.4, 1.6),
	("BUSI\nbreast US", 13.8, 19.0)]
	labels = [d[0] for d in data]; b25 = [d[1] for d in data]; b50 = [d[2] for d in data]
	x = np.arange(len(labels)); w = 0.38
	fig, ax = plt.subplots(figsize=(6, 3.6))
	ax.bar(x - w/2, b25, w, label="budget 0.25", color="#2ca02c")
	ax.bar(x + w/2, b50, w, label="budget 0.50", color="#98df8a")
	ax.axhline(5, ls=":", color="gray", label="effect floor (5 pts)")
	for i in range(len(labels)):
	ax.annotate(f"+{b25[i]:.1f}", (i - w/2, b25[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8)
	ax.annotate(f"+{b50[i]:.1f}", (i + w/2, b50[i]), textcoords="offset points", xytext=(0, 3), ha="center", fontsize=8)
	ax.set_xticks(x); ax.set_xticklabels(labels)
	ax.set_ylabel("small-lesion recall gain (pts)\nmembership vs saliency pruning")
	ax.set_title("Finding 3: membership pruning > saliency pruning")
	ax.legend(loc="upper right", fontsize=9)
	fig.tight_layout(); fig.savefig(OUT / "fig3_pruning_gain.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig4_ablation():
	budgets = ["0.25", "0.50"]
	sal = [0.521, 0.827]; sub = [0.817, 0.981]; floor = [0.219, 0.460]
	x = np.arange(len(budgets)); w = 0.26
	fig, ax = plt.subplots(figsize=(5.6, 3.6))
	ax.bar(x - w, sal, w, label="saliency pruning", color="#ff7f0e")
	ax.bar(x, sub, w, label="subspace membership (ours)", color="#1f77b4")
	ax.bar(x + w, floor, w, label="subspace + coverage FLOOR", color="#d62728", hatch="//")
	ax.set_xticks(x); ax.set_xticklabels([f"budget {b}" for b in budgets])
	ax.set_ylabel("small-lesion recall (matched budget)"); ax.set_ylim(0, 1.05)
	ax.set_title("Finding 4 (negative): the coverage floor HURTS")
	ax.legend(loc="upper left", fontsize=8.5)
	ax.annotate("rank coverage rewards spanning,\nnot lesion concentration",
	(1 + w, 0.460), textcoords="offset points", xytext=(-6, 18), ha="center",
	fontsize=7.5, color="#d62728")
	fig.tight_layout(); fig.savefig(OUT / "fig4_floor_ablation.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig5_conformal():
	# budget-guarantee tradeoff + validity
	fig, ax = plt.subplots(figsize=(5.2, 3.4))
	budgets = [0.25, 0.5]; guar = [0.0, 1.0]; emp = [0.978, 0.971]
	ax.plot(budgets, guar, "s-", color="#1f77b4", label="guaranteed lesion retention")
	ax.plot(budgets, emp, "o--", color="#2ca02c", label="empirical coverage (valid)")
	ax.axhline(0.90, ls=":", color="gray", label="nominal 1-α = 0.90")
	ax.set_xlabel("token budget"); ax.set_ylabel("fraction"); ax.set_ylim(-0.05, 1.05)
	ax.set_xticks(budgets)
	ax.set_title("Conformal retention certificate\n(valid; honest budget tradeoff)")
	ax.legend(loc="center right", fontsize=8)
	fig.tight_layout(); fig.savefig(OUT / "fig5_conformal.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig6_cross_objective():
	# F3 decisive: depth-erosion across objectives [research_v3/f3_cross_objective.json]
	blocks = np.arange(1, 13)
	dino = [0.862,0.880,0.868,0.807,0.729,0.682,0.701,0.676,0.682,0.653,0.621,0.617]
	sup = [0.842,0.840,0.831,0.825,0.816,0.797,0.785,0.791,0.785,0.732,0.681,0.658]
	mae = [0.611,0.600,0.596,0.600,0.589,0.590,0.587,0.582,0.577,0.570,0.577,0.568]
	fig, ax = plt.subplots(figsize=(5.8, 3.6))
	ax.plot(blocks, dino, "o-", color="#1f77b4", lw=2, ms=5, label="DINOv2 (self-distill) ρ=−0.93")
	ax.plot(blocks, sup, "s-", color="#2ca02c", lw=2, ms=5, label="ViT (supervised) ρ=−0.73")
	ax.plot(blocks, mae, "^--", color="#d62728", lw=2, ms=5, label="MAE (reconstruction) ρ=+0.06")
	ax.axhline(0.50, ls=":", color="k", alpha=0.4, label="chance")
	ax.set_xticks(blocks); ax.set_xlabel("transformer block (natural-trained; eval on CT)")
	ax.set_ylabel("token-level lesion AUROC"); ax.set_ylim(0.45, 0.92)
	ax.set_title("Mechanism: localizers erode with depth; MAE never localizes")
	ax.legend(loc="upper right", fontsize=8)
	ax.annotate("MAE flat & low\n(not density-separable)", (9, 0.578),
	textcoords="offset points", xytext=(0, -28), ha="center", fontsize=7.5, color="#d62728")
	fig.tight_layout(); fig.savefig(OUT / "fig6_cross_objective.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	def fig7_law():
	# S1 closed-form law: spanning vs concentration retention vs signal rank [research_v2/s1_crossover.json]
	m = 8; ranks = np.array([1, 2, 3, 4, 5, 6, 7, 8, 10, 12])
	spanning = np.minimum(ranks, m) / m # = min(r,m)/m
	concentration = np.ones_like(spanning, float)
	fig, ax = plt.subplots(figsize=(5.4, 3.6))
	ax.plot(ranks, concentration, "o-", color="#1f77b4", lw=2, ms=6, label="concentration (energy/membership)")
	ax.plot(ranks, spanning, "s-", color="#d62728", lw=2, ms=6, label="spanning (effective-rank/RankMe)")
	ax.fill_between(ranks, spanning, concentration, color="#d62728", alpha=0.12)
	ax.axvline(m, ls=":", color="gray"); ax.annotate("crossover r*=m", (m, 0.55),
	textcoords="offset points", xytext=(6, 0), ha="left", fontsize=8.5, color="gray")
	ax.annotate("retention gap\n(m−r)/m", (2.0, 0.62), fontsize=8.5, color="#d62728", ha="center")
	ax.set_xlabel("signal effective rank r (lesion concentration → diversity)")
	ax.set_ylabel("fraction of rare signal retained"); ax.set_ylim(0, 1.08)
	ax.set_title("The law: rank objectives lose rare/low-rank signal")
	ax.legend(loc="lower right", fontsize=8.5)
	fig.tight_layout(); fig.savefig(OUT / "fig7_rank_law.tiff", dpi=DPI, pil_kwargs={"compression": "tiff_lzw"}); plt.close(fig)


	for f in (fig1_layer, fig2_crossmodality, fig3_pruning_gain, fig4_ablation, fig5_conformal,
	fig6_cross_objective, fig7_law):
	f()
	# flatten RGBA -> RGB on white (journals reject alpha/transparency in TIFFs); keep 650 dpi + LZW
	from PIL import Image
	for p in sorted(OUT.glob("*.tiff")):
	im = Image.open(p)
	if im.mode in ("RGBA", "LA", "P"):
	bg = Image.new("RGB", im.size, "white")
	bg.paste(im.convert("RGBA"), mask=im.convert("RGBA").split()[-1])
	bg.save(p, format="TIFF", dpi=(DPI, DPI), compression="tiff_lzw")
	print(f"figures written at {DPI} dpi (TIFF/LZW, RGB):", sorted(p.name for p in OUT.glob("*.tiff")))