| |
| """ENGRAM Research Paper β Figure Generation. |
| |
| Generates all 15 figures for the ENGRAM paper from results/ data files. |
| Output: results/figures/*.pdf (LaTeX-compatible, 300 DPI) |
| |
| Usage: |
| cd ENGRAM && python scripts/paper_figures.py |
| python scripts/paper_figures.py --only fig02 # Single figure |
| python scripts/paper_figures.py --list # List all figures |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import sys |
| from pathlib import Path |
| from typing import Any |
|
|
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| import matplotlib.ticker as ticker |
| import numpy as np |
|
|
| |
|
|
| RESULTS_DIR = Path(__file__).parent.parent / "results" |
| FIGURES_DIR = RESULTS_DIR / "figures" |
| ABSOLUTE_DIR = RESULTS_DIR / "absolute" |
| STRESS_DIR = RESULTS_DIR / "stress" |
|
|
| |
| plt.rcParams.update({ |
| "font.family": "serif", |
| "font.size": 11, |
| "axes.labelsize": 12, |
| "axes.titlesize": 13, |
| "xtick.labelsize": 10, |
| "ytick.labelsize": 10, |
| "legend.fontsize": 10, |
| "figure.dpi": 300, |
| "savefig.dpi": 300, |
| "savefig.bbox": "tight", |
| "savefig.pad_inches": 0.1, |
| "axes.grid": True, |
| "grid.alpha": 0.3, |
| "axes.spines.top": False, |
| "axes.spines.right": False, |
| }) |
|
|
| |
| COLORS = { |
| "blue": "#4477AA", |
| "orange": "#EE6677", |
| "green": "#228833", |
| "purple": "#AA3377", |
| "cyan": "#66CCEE", |
| "grey": "#BBBBBB", |
| "red": "#CC3311", |
| "teal": "#009988", |
| "yellow": "#CCBB44", |
| "indigo": "#332288", |
| } |
|
|
| PASS_COLOR = COLORS["green"] |
| FAIL_COLOR = COLORS["red"] |
|
|
|
|
| |
|
|
| def load_json(path: Path) -> dict[str, Any]: |
| """Load JSON file and return parsed dict.""" |
| return json.loads(path.read_text()) |
|
|
|
|
| def save_figure(fig: plt.Figure, name: str) -> None: |
| """Save figure as PDF and PNG.""" |
| FIGURES_DIR.mkdir(parents=True, exist_ok=True) |
| fig.savefig(FIGURES_DIR / f"{name}.pdf", format="pdf") |
| fig.savefig(FIGURES_DIR / f"{name}.png", format="png") |
| plt.close(fig) |
| print(f" Saved: {name}.pdf + .png") |
|
|
|
|
| |
|
|
| def fig02_frequency_comparison() -> None: |
| """Bar chart: 6 frequency combos Γ recall and margin.""" |
| print("Fig 02: Frequency combination comparison...") |
| data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json") |
| results = data["results"] |
|
|
| combos = list(results.keys()) |
| recalls = [results[c]["recall"] * 100 for c in combos] |
| margins = [results[c]["margin_mean"] * 1000 for c in combos] |
| failures = [results[c]["n_failures"] for c in combos] |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5)) |
|
|
| |
| x = np.arange(len(combos)) |
| bar_colors = [COLORS["green"] if c == "f0+f1" else COLORS["blue"] for c in combos] |
| bars = ax1.bar(x, recalls, color=bar_colors, edgecolor="white", linewidth=0.5) |
| ax1.set_xticks(x) |
| ax1.set_xticklabels(combos, rotation=30, ha="right") |
| ax1.set_ylabel("Recall@1 (%)") |
| ax1.set_title("(a) Recall by Frequency Combination") |
| ax1.set_ylim(60, 102) |
| for bar, val, nf in zip(bars, recalls, failures): |
| ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, |
| f"{val:.0f}%\n({nf} fail)", ha="center", va="bottom", fontsize=8) |
|
|
| |
| bars2 = ax2.bar(x, margins, color=bar_colors, edgecolor="white", linewidth=0.5) |
| ax2.set_xticks(x) |
| ax2.set_xticklabels(combos, rotation=30, ha="right") |
| ax2.set_ylabel("Mean Margin (Γ10Β³)") |
| ax2.set_title("(b) Mean Discrimination Margin") |
| for bar, val in zip(bars2, margins): |
| ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, |
| f"{val:.1f}", ha="center", va="bottom", fontsize=8) |
|
|
| fig.suptitle("Multi-Frequency Fingerprint Ablation (N=200)", fontsize=14, y=1.02) |
| fig.tight_layout() |
| save_figure(fig, "fig02_frequency_comparison") |
|
|
|
|
| |
|
|
| def fig03_margin_power_law() -> None: |
| """Log-log plot: margin vs N for f1 and f0+f1 with fitted power laws.""" |
| print("Fig 03: Margin power law...") |
| f1_data = load_json(ABSOLUTE_DIR / "margin_compression_law.json") |
| f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json") |
|
|
| |
| f1_n = [int(n) for n in f1_data["results"].keys()] |
| f1_margins = [f1_data["results"][str(n)]["mean_margin"] for n in f1_n] |
| f1_alpha = f1_data["alpha"] |
| f1_A = f1_data["A"] |
|
|
| |
| f0f1_n = [int(n) for n in f0f1_data["results"].keys()] |
| f0f1_margins = [f0f1_data["results"][str(n)]["mean_margin"] for n in f0f1_n] |
| f0f1_alpha = f0f1_data["alpha"] |
| f0f1_A = f0f1_data["A"] |
|
|
| fig, ax = plt.subplots(figsize=(7, 5)) |
|
|
| |
| ax.scatter(f1_n, f1_margins, color=COLORS["orange"], s=60, zorder=5, label="f1 (data)") |
| ax.scatter(f0f1_n, f0f1_margins, color=COLORS["blue"], s=60, zorder=5, label="f0+f1 (data)") |
|
|
| |
| n_fit = np.linspace(3, 250, 200) |
| f1_fit = f1_A * n_fit ** f1_alpha |
| f0f1_fit = f0f1_A * n_fit ** f0f1_alpha |
|
|
| ax.plot(n_fit, f1_fit, color=COLORS["orange"], linestyle="--", alpha=0.7, |
| label=f"f1 fit: {f1_A:.4f}Β·N^{{{f1_alpha:.3f}}}") |
| ax.plot(n_fit, f0f1_fit, color=COLORS["blue"], linestyle="--", alpha=0.7, |
| label=f"f0+f1 fit: {f0f1_A:.4f}Β·N^{{{f0f1_alpha:.3f}}}") |
|
|
| ax.set_xscale("log") |
| ax.set_yscale("log") |
| ax.set_xlabel("Corpus Size N") |
| ax.set_ylabel("Mean Discrimination Margin") |
| ax.set_title("Margin Power Law: Graceful Degradation") |
| ax.legend(loc="upper right") |
| ax.xaxis.set_major_formatter(ticker.ScalarFormatter()) |
| ax.set_xticks([5, 10, 20, 50, 100, 200]) |
|
|
| |
| ax.annotate( |
| f"f0+f1: Ξ±={f0f1_alpha:.3f} (shallower)\nf1: Ξ±={f1_alpha:.3f}", |
| xy=(100, f0f1_A * 100 ** f0f1_alpha), xytext=(30, 0.003), |
| arrowprops={"arrowstyle": "->", "color": COLORS["grey"]}, |
| fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "wheat", "alpha": 0.5} |
| ) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig03_margin_power_law") |
|
|
|
|
| |
|
|
| def fig04_recall_vs_n() -> None: |
| """Fourier f0+f1 recall vs FCDB recall across corpus sizes.""" |
| print("Fig 04: Recall vs N (Fourier vs FCDB)...") |
| f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json") |
| stress_data = load_json(STRESS_DIR / "STRESS_SUMMARY.json") |
|
|
| |
| fourier_n = [int(n) for n in f0f1_data["results"].keys()] |
| fourier_recall = [f0f1_data["results"][str(n)]["recall"] * 100 for n in fourier_n] |
|
|
| |
| fcdb_map = stress_data["recall_at_1_vs_n_fcdb"] |
| fcdb_n = [int(n) for n in fcdb_map.keys()] |
| fcdb_recall = [v * 100 for v in fcdb_map.values()] |
|
|
| fig, ax = plt.subplots(figsize=(7, 5)) |
|
|
| ax.plot(fourier_n, fourier_recall, "o-", color=COLORS["blue"], linewidth=2, |
| markersize=7, label="Fourier f0+f1 (same-model)", zorder=5) |
| ax.plot(fcdb_n, fcdb_recall, "s--", color=COLORS["orange"], linewidth=2, |
| markersize=7, label="FCDB (cross-model)", zorder=5) |
|
|
| |
| ax.axvline(x=100, color=COLORS["red"], linestyle=":", alpha=0.5) |
| ax.annotate("FCDB collapse\n(N=100)", xy=(100, 30), xytext=(140, 50), |
| arrowprops={"arrowstyle": "->", "color": COLORS["red"]}, |
| fontsize=9, color=COLORS["red"]) |
|
|
| ax.set_xlabel("Corpus Size N") |
| ax.set_ylabel("Recall@1 (%)") |
| ax.set_title("Retrieval Recall vs Corpus Size") |
| ax.legend(loc="lower left") |
| ax.set_ylim(-5, 105) |
| ax.set_xlim(0, 210) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig04_recall_vs_n") |
|
|
|
|
| |
|
|
| def fig05_cross_model_strategies() -> None: |
| """Horizontal bar chart: 9 cross-model methods Γ margin.""" |
| print("Fig 05: Cross-model strategy comparison...") |
|
|
| strategies = [ |
| ("CCA", -0.420, False), |
| ("Residual FCB", -0.382, False), |
| ("Procrustes", -0.104, False), |
| ("RR (K=20)", -0.066, False), |
| ("FCB+ridge", -0.017, False), |
| ("Contrastive", 0.001, True), |
| ("JCB", 0.011, True), |
| ("JCB+delta", 0.037, True), |
| ("FCDB", 0.124, True), |
| ] |
|
|
| names = [s[0] for s in strategies] |
| margins = [s[1] for s in strategies] |
| colors = [PASS_COLOR if s[2] else FAIL_COLOR for s in strategies] |
|
|
| fig, ax = plt.subplots(figsize=(8, 5)) |
| y_pos = np.arange(len(names)) |
|
|
| bars = ax.barh(y_pos, margins, color=colors, edgecolor="white", linewidth=0.5, height=0.7) |
| ax.set_yticks(y_pos) |
| ax.set_yticklabels(names) |
| ax.set_xlabel("Retrieval Margin") |
| ax.set_title("Cross-Model Transfer Strategies (Llama 3B β 8B)") |
| ax.axvline(x=0, color="black", linewidth=0.8) |
|
|
| |
| for bar, val in zip(bars, margins): |
| x_offset = 0.005 if val >= 0 else -0.005 |
| ha = "left" if val >= 0 else "right" |
| ax.text(val + x_offset, bar.get_y() + bar.get_height() / 2, |
| f"{val:+.3f}", ha=ha, va="center", fontsize=9, fontweight="bold") |
|
|
| |
| from matplotlib.patches import Patch |
| legend_elements = [Patch(facecolor=PASS_COLOR, label="PASS (margin > 0)"), |
| Patch(facecolor=FAIL_COLOR, label="FAIL (margin β€ 0)")] |
| ax.legend(handles=legend_elements, loc="lower right") |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig05_cross_model_strategies") |
|
|
|
|
| |
|
|
| def fig06_cka_layers() -> None: |
| """CKA similarity per layer: within-family vs cross-family.""" |
| print("Fig 06: CKA layer similarity...") |
| within = load_json(ABSOLUTE_DIR / "FAMILY_CKA.json") |
| cross = load_json(ABSOLUTE_DIR / "FAMILY_CKA_CROSS.json") |
|
|
| within_cka = within["layer_ckas"] |
| cross_cka = cross["layer_ckas"] |
| layers = list(range(len(within_cka))) |
|
|
| fig, ax = plt.subplots(figsize=(8, 4.5)) |
|
|
| ax.plot(layers, within_cka, "o-", color=COLORS["blue"], markersize=5, linewidth=1.5, |
| label=f"Within-family (Llama 3Bβ8B), ΞΌ={within['mean_cka']:.3f}") |
| ax.plot(layers, cross_cka, "s--", color=COLORS["orange"], markersize=5, linewidth=1.5, |
| label=f"Cross-family (LlamaβQwen), ΞΌ={cross['mean_cka']:.3f}") |
|
|
| ax.axhline(y=0.95, color=COLORS["grey"], linestyle=":", alpha=0.5, label="0.95 threshold") |
| ax.set_xlabel("Layer Index") |
| ax.set_ylabel("CKA Similarity") |
| ax.set_title("Centered Kernel Alignment Across Layers") |
| ax.legend(loc="lower left", fontsize=9) |
| ax.set_ylim(0.85, 1.0) |
|
|
| |
| min_idx_w = int(np.argmin(within_cka)) |
| min_idx_c = int(np.argmin(cross_cka)) |
| ax.annotate(f"min={within_cka[min_idx_w]:.3f}", xy=(min_idx_w, within_cka[min_idx_w]), |
| xytext=(min_idx_w + 2, within_cka[min_idx_w] - 0.01), |
| fontsize=8, color=COLORS["blue"]) |
| ax.annotate(f"min={cross_cka[min_idx_c]:.3f}", xy=(min_idx_c, cross_cka[min_idx_c]), |
| xytext=(min_idx_c + 2, cross_cka[min_idx_c] - 0.01), |
| fontsize=8, color=COLORS["orange"]) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig06_cka_layers") |
|
|
|
|
| |
|
|
| def fig07_confusion_matrix() -> None: |
| """Heatmaps: f1 confusion vs f0+f1 confusion across domains.""" |
| print("Fig 07: Domain confusion matrix...") |
| data = load_json(ABSOLUTE_DIR / "confusion_analysis.json") |
|
|
| domains = sorted({ |
| k.split(" -> ")[0] for k in data["f1_confusion"].keys() |
| } | { |
| k.split(" -> ")[1] for k in data["f1_confusion"].keys() |
| }) |
|
|
| def build_matrix(confusion_dict: dict[str, int]) -> np.ndarray: |
| n = len(domains) |
| mat = np.zeros((n, n)) |
| for key, count in confusion_dict.items(): |
| src, dst = key.split(" -> ") |
| if src in domains and dst in domains: |
| i = domains.index(src) |
| j = domains.index(dst) |
| mat[i, j] = count |
| return mat |
|
|
| f1_mat = build_matrix(data["f1_confusion"]) |
| best_mat = build_matrix(data["best_confusion"]) |
|
|
| |
| short_labels = [d[:6] for d in domains] |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) |
|
|
| im1 = ax1.imshow(f1_mat, cmap="Reds", aspect="auto", interpolation="nearest") |
| ax1.set_xticks(range(len(domains))) |
| ax1.set_yticks(range(len(domains))) |
| ax1.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8) |
| ax1.set_yticklabels(short_labels, fontsize=8) |
| ax1.set_title("(a) f1 Only β 28 Failures") |
| ax1.set_xlabel("Confused With") |
| ax1.set_ylabel("True Domain") |
| fig.colorbar(im1, ax=ax1, shrink=0.8) |
|
|
| im2 = ax2.imshow(best_mat, cmap="Blues", aspect="auto", interpolation="nearest") |
| ax2.set_xticks(range(len(domains))) |
| ax2.set_yticks(range(len(domains))) |
| ax2.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8) |
| ax2.set_yticklabels(short_labels, fontsize=8) |
| ax2.set_title("(b) f0+f1 β 4 Failures") |
| ax2.set_xlabel("Confused With") |
| ax2.set_ylabel("True Domain") |
| fig.colorbar(im2, ax=ax2, shrink=0.8) |
|
|
| fig.suptitle("Domain Confusion Analysis (N=200)", fontsize=14, y=1.02) |
| fig.tight_layout() |
| save_figure(fig, "fig07_confusion_matrix") |
|
|
|
|
| |
|
|
| def fig08_domain_recall_radar() -> None: |
| """Radar chart: per-domain recall with f0+f1.""" |
| print("Fig 08: Domain recall radar...") |
| data = load_json(ABSOLUTE_DIR / "confusion_analysis.json") |
| domain_recall = data["domain_recall"] |
|
|
| categories = list(domain_recall.keys()) |
| values = [domain_recall[c] * 100 for c in categories] |
|
|
| |
| values_closed = values + [values[0]] |
| n = len(categories) |
| angles = [i / n * 2 * np.pi for i in range(n)] |
| angles_closed = angles + [angles[0]] |
|
|
| fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"projection": "polar"}) |
|
|
| ax.plot(angles_closed, values_closed, "o-", color=COLORS["blue"], linewidth=2, markersize=6) |
| ax.fill(angles_closed, values_closed, color=COLORS["blue"], alpha=0.15) |
|
|
| ax.set_xticks(angles) |
| ax.set_xticklabels([c.replace("_", "\n") for c in categories], fontsize=9) |
| ax.set_ylim(80, 102) |
| ax.set_yticks([85, 90, 95, 100]) |
| ax.set_yticklabels(["85%", "90%", "95%", "100%"], fontsize=8) |
| ax.set_title("Per-Domain Recall@1 (f0+f1, N=200)", pad=20) |
|
|
| |
| min_idx = int(np.argmin(values)) |
| ax.annotate(f"{values[min_idx]:.0f}%", |
| xy=(angles[min_idx], values[min_idx]), |
| xytext=(angles[min_idx] + 0.2, values[min_idx] - 3), |
| fontsize=9, fontweight="bold", color=COLORS["red"]) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig08_domain_recall_radar") |
|
|
|
|
| |
|
|
| def fig09_hnsw_benchmark() -> None: |
| """Bar chart: HNSW vs brute-force latency.""" |
| print("Fig 09: HNSW benchmark...") |
| data = load_json(ABSOLUTE_DIR / "HNSW_BENCH.json") |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4)) |
|
|
| |
| methods = ["Brute-Force", "HNSW"] |
| latencies = [data["bf_latency_us"], data["hnsw_latency_us"]] |
| colors = [COLORS["orange"], COLORS["blue"]] |
| bars = ax1.bar(methods, latencies, color=colors, edgecolor="white", width=0.5) |
| ax1.set_ylabel("Latency (ΞΌs)") |
| ax1.set_title(f"(a) Search Latency β {data['speedup']:.1f}Γ Speedup") |
| for bar, val in zip(bars, latencies): |
| ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 3, |
| f"{val:.1f} ΞΌs", ha="center", va="bottom", fontsize=10) |
|
|
| |
| recalls = [data["bruteforce_recall"] * 100, data["hnsw_recall"] * 100] |
| bars2 = ax2.bar(methods, recalls, color=colors, edgecolor="white", width=0.5) |
| ax2.set_ylabel("Recall@1 (%)") |
| ax2.set_title("(b) Recall Preserved") |
| ax2.set_ylim(98, 100.5) |
| for bar, val in zip(bars2, recalls): |
| ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05, |
| f"{val:.1f}%", ha="center", va="bottom", fontsize=10) |
|
|
| fig.suptitle("HNSW Index Benchmark (N=200)", fontsize=14, y=1.02) |
| fig.tight_layout() |
| save_figure(fig, "fig09_hnsw_benchmark") |
|
|
|
|
| |
|
|
| def fig10_int8_compression() -> None: |
| """Bar chart: FP16 vs INT8 comparison.""" |
| print("Fig 10: INT8 compression...") |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4)) |
|
|
| |
| configs = ["591 tok", "6,403 tok"] |
| fp16_sizes = [73.9, 800.4] |
| int8_sizes = [37.5, 406.5] |
| x = np.arange(len(configs)) |
| w = 0.35 |
| ax1.bar(x - w / 2, fp16_sizes, w, label="FP16", color=COLORS["orange"], edgecolor="white") |
| ax1.bar(x + w / 2, int8_sizes, w, label="INT8", color=COLORS["blue"], edgecolor="white") |
| ax1.set_xticks(x) |
| ax1.set_xticklabels(configs) |
| ax1.set_ylabel("File Size (MB)") |
| ax1.set_title("(a) .eng File Size β 1.97Γ Compression") |
| ax1.legend() |
|
|
| |
| metrics = ["Cosine\nSimilarity", "Margin\n(FP16)", "Margin\n(INT8)"] |
| values = [0.99998, 0.381, 0.262] |
| bar_colors = [COLORS["green"], COLORS["blue"], COLORS["cyan"]] |
| bars = ax2.bar(metrics, values, color=bar_colors, edgecolor="white", width=0.5) |
| ax2.set_ylabel("Value") |
| ax2.set_title("(b) Quality Preservation") |
| for bar, val in zip(bars, values): |
| ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, |
| f"{val:.5f}" if val > 0.9 else f"{val:.3f}", |
| ha="center", va="bottom", fontsize=9) |
|
|
| fig.suptitle("INT8 Quantization Impact", fontsize=14, y=1.02) |
| fig.tight_layout() |
| save_figure(fig, "fig10_int8_compression") |
|
|
|
|
| |
|
|
| def fig12_margin_distribution() -> None: |
| """Distribution comparison: f1 vs f0+f1 summary statistics.""" |
| print("Fig 12: Margin distribution...") |
| data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json") |
| results = data["results"] |
|
|
| fig, ax = plt.subplots(figsize=(7, 4.5)) |
|
|
| |
| combos = ["f1", "f0+f1"] |
| means = [results[c]["margin_mean"] * 1000 for c in combos] |
| medians = [results[c]["margin_median"] * 1000 for c in combos] |
| mins = [results[c]["margin_min"] * 1000 for c in combos] |
|
|
| x = np.arange(len(combos)) |
| w = 0.25 |
| ax.bar(x - w, means, w, label="Mean", color=COLORS["blue"], edgecolor="white") |
| ax.bar(x, medians, w, label="Median", color=COLORS["green"], edgecolor="white") |
| ax.bar(x + w, mins, w, label="Min", color=COLORS["red"], edgecolor="white") |
|
|
| ax.set_xticks(x) |
| ax.set_xticklabels(combos, fontsize=12) |
| ax.set_ylabel("Margin (Γ10Β³)") |
| ax.set_title("Margin Statistics: f1 vs f0+f1 (N=200)") |
| ax.legend() |
| ax.axhline(y=0, color="black", linewidth=0.5) |
|
|
| |
| ax.annotate( |
| f"+76% mean margin\n25/28 failures fixed", |
| xy=(1, means[1]), xytext=(1.3, means[1] + 1), |
| arrowprops={"arrowstyle": "->", "color": COLORS["green"]}, |
| fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "#e6ffe6", "alpha": 0.8} |
| ) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig12_margin_distribution") |
|
|
|
|
| |
|
|
| def fig13_fcdb_tradeoff() -> None: |
| """Dual-axis: basis stability vs retrieval margin vs corpus size.""" |
| print("Fig 13: FCDB stability-discrimination tradeoff...") |
|
|
| |
| n_vals = [50, 100, 125, 200] |
| stability = [0.82, 0.906, 0.983, 0.999] |
| margin = [0.124, None, None, 0.013] |
| margin_n = [50, 200] |
| margin_v = [0.124, 0.013] |
|
|
| fig, ax1 = plt.subplots(figsize=(7, 5)) |
| ax2 = ax1.twinx() |
|
|
| |
| line1 = ax1.plot(n_vals, stability, "o-", color=COLORS["blue"], linewidth=2, |
| markersize=8, label="Basis Stability", zorder=5) |
| ax1.set_xlabel("Corpus Size N") |
| ax1.set_ylabel("Subspace Agreement", color=COLORS["blue"]) |
| ax1.tick_params(axis="y", labelcolor=COLORS["blue"]) |
| ax1.set_ylim(0.7, 1.05) |
|
|
| |
| line2 = ax2.plot(margin_n, margin_v, "s--", color=COLORS["orange"], linewidth=2, |
| markersize=8, label="Retrieval Margin", zorder=5) |
| ax2.set_ylabel("Cross-Model Margin", color=COLORS["orange"]) |
| ax2.tick_params(axis="y", labelcolor=COLORS["orange"]) |
| ax2.set_ylim(-0.01, 0.15) |
|
|
| |
| ax1.axhline(y=0.99, color=COLORS["grey"], linestyle=":", alpha=0.5) |
| ax1.annotate("Stable (β₯0.99)", xy=(125, 0.99), fontsize=8, color=COLORS["grey"]) |
|
|
| |
| lines = line1 + line2 |
| labels = [l.get_label() for l in lines] |
| ax1.legend(lines, labels, loc="center left") |
|
|
| ax1.set_title("FCDB StabilityβDiscrimination Tradeoff") |
| fig.tight_layout() |
| save_figure(fig, "fig13_fcdb_tradeoff") |
|
|
|
|
| |
|
|
| def fig14_ttft_speedup() -> None: |
| """Grouped bar chart: cold vs warm TTFT.""" |
| print("Fig 14: TTFT speedup...") |
|
|
| configs = ["3B / 4K tok", "3B / 16K tok", "8B / 591 tok"] |
| cold_ttft = [11439, 94592, 3508] |
| warm_ttft = [170, 1777, 116] |
| speedups = [67.2, 53.2, 30.8] |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5)) |
|
|
| x = np.arange(len(configs)) |
| w = 0.35 |
| ax1.bar(x - w / 2, cold_ttft, w, label="Cold TTFT", color=COLORS["orange"], edgecolor="white") |
| ax1.bar(x + w / 2, warm_ttft, w, label="Warm TTFT", color=COLORS["blue"], edgecolor="white") |
| ax1.set_xticks(x) |
| ax1.set_xticklabels(configs, fontsize=9) |
| ax1.set_ylabel("TTFT (ms)") |
| ax1.set_title("(a) Time to First Token") |
| ax1.set_yscale("log") |
| ax1.legend() |
|
|
| |
| bars = ax2.bar(configs, speedups, color=COLORS["green"], edgecolor="white", width=0.5) |
| ax2.set_ylabel("Speedup (Γ)") |
| ax2.set_title("(b) KV Cache Restoration Speedup") |
| ax2.set_xticklabels(configs, fontsize=9) |
| for bar, val in zip(bars, speedups): |
| ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, |
| f"{val:.1f}Γ", ha="center", va="bottom", fontsize=10, fontweight="bold") |
|
|
| fig.suptitle("KV Cache Warm Start Performance", fontsize=14, y=1.02) |
| fig.tight_layout() |
| save_figure(fig, "fig14_ttft_speedup") |
|
|
|
|
| |
|
|
| def fig15_egr_overhead() -> None: |
| """Scatter/line: EGR overhead vs token count.""" |
| print("Fig 15: EGR overhead scaling...") |
|
|
| tokens = [600, 6403, 600] |
| overhead_ms = [30.6, 48.8, 84.0] |
| labels = ["16 layers\n(8-24)", "16 layers\n(8-24)", "32 layers\n(all)"] |
| colors_pts = [COLORS["blue"], COLORS["blue"], COLORS["orange"]] |
|
|
| fig, ax = plt.subplots(figsize=(6, 4.5)) |
|
|
| for t, o, l, c in zip(tokens, overhead_ms, labels, colors_pts): |
| ax.scatter(t, o, s=100, color=c, zorder=5, edgecolor="white", linewidth=1.5) |
| ax.annotate(l, xy=(t, o), xytext=(t + 200, o + 2), fontsize=9) |
|
|
| ax.set_xlabel("Context Length (tokens)") |
| ax.set_ylabel("EGR Overhead (ms)") |
| ax.set_title("Fingerprint Extraction Overhead") |
| ax.set_xlim(0, 7000) |
| ax.set_ylim(20, 95) |
|
|
| |
| ax.axhline(y=50, color=COLORS["grey"], linestyle=":", alpha=0.3) |
| ax.text(100, 51, "50ms threshold", fontsize=8, color=COLORS["grey"]) |
|
|
| fig.tight_layout() |
| save_figure(fig, "fig15_egr_overhead") |
|
|
|
|
| |
|
|
| def fig01_architecture_mermaid() -> None: |
| """Generate Mermaid flowchart for system architecture.""" |
| print("Fig 01: Architecture diagram (Mermaid)...") |
| mermaid = """\ |
| %%{init: {'theme': 'base', 'themeVariables': {'primaryColor': '#4477AA', 'primaryTextColor': '#fff', 'primaryBorderColor': '#335588', 'lineColor': '#666', 'secondaryColor': '#EE6677', 'tertiaryColor': '#228833'}}}%% |
| flowchart TD |
| A[LLM Runtime<br/>llama.cpp] -->|KV cache blob| B[Blob Parser] |
| B -->|Layer keys K| C[Fourier Fingerprint<br/>f0+f1 DFT] |
| C -->|2048-dim vector| D{Storage} |
| D -->|.eng binary| E[EIGENGRAM File<br/>v1.2 format] |
| D -->|HNSW index| F[FAISS IndexHNSW<br/>M=32] |
| |
| G[Query Session] -->|New KV cache| C |
| C -->|Query fingerprint| H[Geodesic Retrieval] |
| F -->|Top-k candidates| H |
| |
| H --> I{Stage 0<br/>Prior Check} |
| I -->|chronic failure| J[Skip / LOW] |
| I -->|ok| K{Stage 1<br/>HNSW Search} |
| K -->|HIGH / MEDIUM| L[Result] |
| K -->|below threshold| M{Stage 2<br/>Trajectory} |
| M -->|interpolation| N{Stage 3<br/>Constraints} |
| N --> O{Stage 4<br/>Metadata} |
| O --> L |
| |
| subgraph Confidence Tracking |
| P[IndexC<br/>SQLite] ---|update| I |
| L ---|record| P |
| end |
| |
| style A fill:#4477AA,stroke:#335588,color:#fff |
| style C fill:#228833,stroke:#1a6625,color:#fff |
| style E fill:#EE6677,stroke:#cc5566,color:#fff |
| style F fill:#66CCEE,stroke:#55aabb,color:#000 |
| style H fill:#AA3377,stroke:#882266,color:#fff |
| """ |
| mermaid_path = FIGURES_DIR / "fig01_architecture.mmd" |
| mermaid_path.write_text(mermaid) |
| print(f" Saved: fig01_architecture.mmd") |
|
|
|
|
| |
|
|
| def fig11_retrieval_pipeline_mermaid() -> None: |
| """Generate Mermaid diagram for 4-stage geodesic retrieval.""" |
| print("Fig 11: Retrieval pipeline (Mermaid)...") |
| mermaid = """\ |
| %%{init: {'theme': 'base'}}%% |
| flowchart LR |
| Q[Query<br/>Fingerprint] --> S0 |
| |
| S0[Stage 0<br/>Prior Preemption<br/><i>IndexC chronic<br/>failure check</i>] |
| S0 -->|"pass"| S1 |
| S0 -->|"preempt"| SKIP[SKIP<br/>confidence=LOW] |
| |
| S1[Stage 1<br/>HNSW Search<br/><i>cosine top-k</i>] |
| S1 -->|"margin > 0.005"| HIGH[HIGH<br/>199/200 docs] |
| S1 -->|"margin 0.001-0.005"| MED[MEDIUM] |
| S1 -->|"margin < 0.001"| S2 |
| |
| S2[Stage 2<br/>Trajectory<br/><i>interpolation<br/>w=0.3</i>] |
| S2 --> S3 |
| |
| S3[Stage 3<br/>Negative<br/>Constraints<br/><i>apophatic layer</i>] |
| S3 --> S4 |
| |
| S4[Stage 4<br/>Metadata<br/>Disambig<br/><i>domain + keywords<br/>+ norms</i>] |
| S4 --> LOW[LOW<br/>1/200 docs<br/><i>doc_146</i>] |
| |
| style S0 fill:#66CCEE,stroke:#55aabb |
| style S1 fill:#4477AA,stroke:#335588,color:#fff |
| style S2 fill:#CCBB44,stroke:#aa9933 |
| style S3 fill:#EE6677,stroke:#cc5566,color:#fff |
| style S4 fill:#AA3377,stroke:#882266,color:#fff |
| style HIGH fill:#228833,stroke:#1a6625,color:#fff |
| style MED fill:#CCBB44,stroke:#aa9933 |
| style LOW fill:#EE6677,stroke:#cc5566,color:#fff |
| style SKIP fill:#BBBBBB,stroke:#999999 |
| """ |
| mermaid_path = FIGURES_DIR / "fig11_retrieval_pipeline.mmd" |
| mermaid_path.write_text(mermaid) |
| print(f" Saved: fig11_retrieval_pipeline.mmd") |
|
|
|
|
| |
|
|
| def generate_findings() -> None: |
| """Consolidate all key metrics into a single findings.json.""" |
| print("Generating consolidated findings...") |
|
|
| findings = { |
| "title": "ENGRAM Protocol β Consolidated Research Findings", |
| "date": "2026-04-03", |
| "hardware": { |
| "platform": "Apple M3, 24GB RAM", |
| "gpu": "Metal (n_gpu_layers=-1)", |
| "os": "macOS Darwin 25.4.0", |
| "llama_cpp": "0.3.19", |
| "faiss": "1.13.2", |
| "torch": "2.11.0", |
| }, |
| "same_model_retrieval": { |
| "method": "Fourier f0+f1 fingerprint", |
| "corpus_size": 200, |
| "n_domains": 10, |
| "recall_at_1": 0.98, |
| "n_failures": 4, |
| "mean_margin": 0.007201, |
| "margin_power_law": {"A": 0.021342, "alpha": -0.2065}, |
| "f1_only_recall": 0.86, |
| "f1_only_failures": 28, |
| "improvement_over_f1": "25/28 failures fixed (+76% mean margin)", |
| "ml_math_confusion_reduction": "81.5%", |
| }, |
| "frequency_ablation": { |
| "combos_tested": 6, |
| "best": "f0+f1", |
| "results": { |
| "f1": {"recall": 0.86, "margin": 0.004087}, |
| "f2": {"recall": 0.715, "margin": 0.002196}, |
| "f1+f2": {"recall": 0.95, "margin": 0.004744}, |
| "f1+f2+f3": {"recall": 0.95, "margin": 0.004129}, |
| "f0+f1": {"recall": 0.98, "margin": 0.007201}, |
| "f1+f3": {"recall": 0.89, "margin": 0.003477}, |
| }, |
| }, |
| "hnsw_index": { |
| "speedup": 5.65, |
| "recall": 0.995, |
| "latency_us": 51.83, |
| "bruteforce_latency_us": 293.07, |
| }, |
| "geodesic_retrieval": { |
| "stages": 4, |
| "final_recall": 1.0, |
| "n_high": 0, |
| "n_medium": 199, |
| "n_low": 1, |
| "hard_failure": "doc_146 (resolved by Stage 4 metadata)", |
| }, |
| "int8_compression": { |
| "ratio": 1.97, |
| "cosine_similarity": 0.99998, |
| "margin_fp16": 0.381, |
| "margin_int8": 0.262, |
| "margin_preserved": True, |
| }, |
| "ttft_speedup": { |
| "3b_4k": {"cold_ms": 11439, "warm_ms": 170, "speedup": 67.2}, |
| "3b_16k": {"cold_ms": 94592, "warm_ms": 1777, "speedup": 53.2}, |
| "8b_591": {"cold_ms": 3508, "warm_ms": 116, "speedup": 30.8}, |
| }, |
| "cross_model_transfer": { |
| "n_strategies": 9, |
| "best_method": "FCDB", |
| "best_margin": 0.124, |
| "results": { |
| "CCA": {"margin": -0.420, "correct": False}, |
| "Residual_FCB": {"margin": -0.382, "correct": False}, |
| "Procrustes": {"margin": -0.104, "correct": False}, |
| "RR": {"margin": -0.066, "correct": False}, |
| "FCB_ridge": {"margin": -0.017, "correct": False}, |
| "Contrastive": {"margin": 0.001, "correct": True}, |
| "JCB": {"margin": 0.011, "correct": True}, |
| "JCB_delta": {"margin": 0.037, "correct": True}, |
| "FCDB": {"margin": 0.124, "correct": True}, |
| }, |
| "key_insight": "Cross-model transfer requires representing documents as directions from a shared reference point (Frechet mean), not positions in space", |
| }, |
| "fcdb_scaling": { |
| "v1_n50": {"stability": 0.82, "margin": 0.124}, |
| "v2_n200": {"stability": 0.999, "margin": 0.013}, |
| "collapse_n": 100, |
| "tradeoff": "Larger corpus stabilizes basis but dilutes per-document signal", |
| }, |
| "cka_analysis": { |
| "within_family": {"models": "Llama 3B β 8B", "mean_cka": 0.975, "f0f1_sim": 0.875}, |
| "cross_family": {"models": "Llama β Qwen", "mean_cka": 0.927, "f0f1_sim": 0.259}, |
| "verdict": "Manifolds topologically isomorphic (CKA>0.92 all pairs)", |
| }, |
| "domain_recall": { |
| "computer_science": 1.0, "general_world": 0.95, "history": 1.0, |
| "language_arts": 1.0, "ml_systems": 0.90, "mathematics": 1.0, |
| "philosophy": 1.0, "medicine": 0.95, "biology": 1.0, "physics": 1.0, |
| }, |
| "eigengram_format": { |
| "version": "1.2", |
| "architectures": ["llama", "gemma", "gemma4/ISWA", "phi", "qwen", "mistral"], |
| "iswa_support": "Gemma 4 26B dual-cache (5+25 layers, 6144-dim fingerprint)", |
| }, |
| } |
|
|
| paper_dir = RESULTS_DIR / "paper" |
| paper_dir.mkdir(parents=True, exist_ok=True) |
| findings_path = paper_dir / "findings.json" |
| findings_path.write_text(json.dumps(findings, indent=2)) |
| print(f" Saved: paper/findings.json") |
|
|
|
|
| |
|
|
| def generate_latex_tables() -> None: |
| """Generate LaTeX table source for the paper.""" |
| print("Generating LaTeX tables...") |
|
|
| tables = r"""\ |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 1: Multi-Frequency Ablation |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{Multi-frequency fingerprint ablation at $N=200$. The f0+f1 combination |
| achieves the highest recall and mean margin, fixing 25 of 28 single-frequency failures.} |
| \label{tab:frequency-ablation} |
| \begin{tabular}{lcccc} |
| \toprule |
| Frequencies & Recall@1 & Mean Margin & Min Margin & Failures \\ |
| \midrule |
| $f_1$ & 86.0\% & 4.09$\times 10^{-3}$ & $-4.71\times 10^{-3}$ & 28 \\ |
| $f_2$ & 71.5\% & 2.20$\times 10^{-3}$ & $-5.85\times 10^{-3}$ & 57 \\ |
| $f_1 + f_2$ & 95.0\% & 4.74$\times 10^{-3}$ & $-2.68\times 10^{-3}$ & 10 \\ |
| $f_1 + f_2 + f_3$ & 95.0\% & 4.13$\times 10^{-3}$ & $-2.71\times 10^{-3}$ & 10 \\ |
| \rowcolor{green!10} |
| $f_0 + f_1$ & \textbf{98.0\%} & \textbf{7.20}$\times 10^{-3}$ & $-4.09\times 10^{-3}$ & \textbf{4} \\ |
| $f_1 + f_3$ & 89.0\% & 3.48$\times 10^{-3}$ & $-4.08\times 10^{-3}$ & 22 \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 2: Cross-Model Transfer Strategies |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{Cross-model transfer strategies (Llama 3B $\to$ 8B). Nine methods tested; |
| FCDB achieves the only reliable positive margin without requiring an adapter.} |
| \label{tab:cross-model} |
| \begin{tabular}{lccc} |
| \toprule |
| Method & Margin & Correct & Adapter \\ |
| \midrule |
| CCA & $-0.420$ & \xmark & symmetric \\ |
| Residual FCB & $-0.382$ & \xmark & none \\ |
| Procrustes & $-0.104$ & \xmark & orthogonal \\ |
| Relative Repr. & $-0.066$ & \xmark & none \\ |
| FCB + ridge & $-0.017$ & \xmark & ridge \\ |
| \midrule |
| Contrastive $\delta$ & $+0.001$ & \cmark & ridge \\ |
| JCB & $+0.011$ & \cmark & none \\ |
| JCB + $\delta$ & $+0.037$ & \cmark & none \\ |
| \rowcolor{green!10} |
| \textbf{FCDB} & $\mathbf{+0.124}$ & \cmark & \textbf{none} \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 3: TTFT Speedup |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{KV cache warm-start performance. TTFT speedup ranges from 27--67$\times$ |
| depending on model size and context length.} |
| \label{tab:ttft} |
| \begin{tabular}{lccccc} |
| \toprule |
| Model & Tokens & Cold TTFT & Warm TTFT & Speedup & EGR (ms) \\ |
| \midrule |
| Llama 3.2 3B & 4,002 & 11,439\,ms & 170\,ms & 67.2$\times$ & 9.5 \\ |
| Llama 3.2 3B & 16,382 & 94,592\,ms & 1,777\,ms & 53.2$\times$ & 9.5 \\ |
| Llama 3.1 8B & 591 & 3,508\,ms & 116\,ms & 30.8$\times$ & 30.6 \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 4: INT8 Compression |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{INT8 quantization results. Per-row symmetric quantization achieves |
| 1.97$\times$ compression with negligible quality loss (cos\_sim = 0.99998).} |
| \label{tab:int8} |
| \begin{tabular}{lcccc} |
| \toprule |
| Tokens & FP16 Size & INT8 Size & Ratio & $\cos(s_\text{fp16}, s_\text{int8})$ \\ |
| \midrule |
| 591 & 73.9\,MB & 37.5\,MB & 1.97$\times$ & 0.99998 \\ |
| 6,403 & 800.4\,MB & 406.5\,MB & 1.97$\times$ & 0.99998 \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 5: CKA Analysis |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{Centered Kernel Alignment (CKA) between model families. High CKA values |
| ($>0.92$) confirm topological isomorphism of key manifolds across architectures.} |
| \label{tab:cka} |
| \begin{tabular}{lccc} |
| \toprule |
| Comparison & Mean CKA & f0+f1 Sim & Verdict \\ |
| \midrule |
| Within-family (Llama 3B $\leftrightarrow$ 8B) & 0.975 & 0.875 & Isomorphic \\ |
| Cross-family (Llama $\leftrightarrow$ Qwen) & 0.927 & 0.259 & Isomorphic \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 6: HNSW Benchmark |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{HNSW index performance at $N=200$. The index provides 5.65$\times$ |
| speedup over brute-force with no recall loss.} |
| \label{tab:hnsw} |
| \begin{tabular}{lcc} |
| \toprule |
| Method & Latency ($\mu$s) & Recall@1 \\ |
| \midrule |
| Brute-force & 293.1 & 99.5\% \\ |
| HNSW ($M=32$) & 51.8 & 99.5\% \\ |
| \midrule |
| \textbf{Speedup} & \textbf{5.65$\times$} & --- \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 7: Domain Recall |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{Per-domain recall@1 with f0+f1 fingerprint at $N=200$. |
| All domains achieve $\geq 90\%$ recall.} |
| \label{tab:domain-recall} |
| \begin{tabular}{lc} |
| \toprule |
| Domain & Recall@1 \\ |
| \midrule |
| Biology & 100.0\% \\ |
| Computer Science & 100.0\% \\ |
| History & 100.0\% \\ |
| Language Arts & 100.0\% \\ |
| Mathematics & 100.0\% \\ |
| Philosophy & 100.0\% \\ |
| Physics & 100.0\% \\ |
| General World & 95.0\% \\ |
| Medicine & 95.0\% \\ |
| ML/Systems & 90.0\% \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| % Table 8: Margin Power Law |
| % ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| \begin{table}[t] |
| \centering |
| \caption{Margin scaling law parameters. Both fingerprint methods follow |
| power-law decay $\bar{m} = A \cdot N^\alpha$ with no hard collapse point.} |
| \label{tab:power-law} |
| \begin{tabular}{lccc} |
| \toprule |
| Fingerprint & $A$ & $\alpha$ & Recall@200 \\ |
| \midrule |
| $f_1$ & 0.0181 & $-0.277$ & 86.0\% \\ |
| $f_0 + f_1$ & 0.0213 & $-0.207$ & 98.0\% \\ |
| \bottomrule |
| \end{tabular} |
| \end{table} |
| """ |
|
|
| paper_dir = RESULTS_DIR / "paper" |
| paper_dir.mkdir(parents=True, exist_ok=True) |
| tables_path = paper_dir / "tables.tex" |
| tables_path.write_text(tables) |
| print(f" Saved: paper/tables.tex") |
|
|
|
|
| |
|
|
| FIGURE_REGISTRY: dict[str, tuple[str, object]] = { |
| "fig01": ("System Architecture (Mermaid)", fig01_architecture_mermaid), |
| "fig02": ("Frequency Combination Comparison", fig02_frequency_comparison), |
| "fig03": ("Margin Power Law", fig03_margin_power_law), |
| "fig04": ("Recall vs N (Fourier vs FCDB)", fig04_recall_vs_n), |
| "fig05": ("Cross-Model Strategy Comparison", fig05_cross_model_strategies), |
| "fig06": ("CKA Layer Similarity", fig06_cka_layers), |
| "fig07": ("Domain Confusion Matrix", fig07_confusion_matrix), |
| "fig08": ("Domain Recall Radar", fig08_domain_recall_radar), |
| "fig09": ("HNSW Benchmark", fig09_hnsw_benchmark), |
| "fig10": ("INT8 Compression", fig10_int8_compression), |
| "fig11": ("Retrieval Pipeline (Mermaid)", fig11_retrieval_pipeline_mermaid), |
| "fig12": ("Margin Distribution", fig12_margin_distribution), |
| "fig13": ("FCDB Tradeoff", fig13_fcdb_tradeoff), |
| "fig14": ("TTFT Speedup", fig14_ttft_speedup), |
| "fig15": ("EGR Overhead Scaling", fig15_egr_overhead), |
| "findings": ("Consolidated Findings JSON", generate_findings), |
| "tables": ("LaTeX Tables", generate_latex_tables), |
| } |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser(description="Generate ENGRAM paper figures") |
| parser.add_argument("--only", help="Generate only this figure (e.g., fig02)") |
| parser.add_argument("--list", action="store_true", help="List all figures") |
| args = parser.parse_args() |
|
|
| if args.list: |
| print("\nAvailable figures:") |
| for key, (desc, _) in FIGURE_REGISTRY.items(): |
| print(f" {key:10s} {desc}") |
| return |
|
|
| FIGURES_DIR.mkdir(parents=True, exist_ok=True) |
| print(f"\nOutput directory: {FIGURES_DIR}\n") |
|
|
| if args.only: |
| if args.only not in FIGURE_REGISTRY: |
| print(f"Unknown figure: {args.only}") |
| print(f"Available: {', '.join(FIGURE_REGISTRY.keys())}") |
| sys.exit(1) |
| desc, func = FIGURE_REGISTRY[args.only] |
| func() |
| else: |
| for key, (desc, func) in FIGURE_REGISTRY.items(): |
| try: |
| func() |
| except Exception as e: |
| print(f" ERROR generating {key}: {e}") |
|
|
| print(f"\nDone. Figures saved to: {FIGURES_DIR}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|