feat: upload scripts

Browse files

Files changed (8) hide show

scripts/compute_corpus_basis.py +90 -0
scripts/demo_agent_session.py +371 -0
scripts/diagnose_gemma4.py +248 -0
scripts/egr_semantic_proof.py +437 -0
scripts/generate_alignment_dataset.py +141 -0
scripts/index_knowledge.py +393 -0
scripts/paper_figures.py +1084 -0
scripts/setup.sh +146 -0

scripts/compute_corpus_basis.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+Compute a Fixed Corpus Basis (FCB) for cross-document and
+cross-model stable state vector extraction.
+The FCB is the principal subspace of the key manifold computed
+from a diverse reference corpus. Unlike per-document SVD,
+the FCB is document-independent — all documents projected
+with the same FCB exist in the same coordinate system.
+"""
+from __future__ import annotations
+import argparse
+import gc
+import sys
+from pathlib import Path
+import torch
+from llama_cpp import Llama
+from kvcos.core.blob_parser import parse_state_blob
+from kvcos.core.state_extractor import MARStateExtractor
+from scripts.generate_alignment_dataset import DOCUMENTS
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Compute Fixed Corpus Basis")
+    parser.add_argument("--model", required=True)
+    parser.add_argument("--layer-range", type=int, nargs=2, default=[8, 24])
+    parser.add_argument("--gate-start", type=int, default=6)
+    parser.add_argument("--rank", type=int, default=122)
+    parser.add_argument("--output", required=True)
+    args = parser.parse_args()
+    llm = Llama(model_path=args.model, n_ctx=2048, n_gpu_layers=-1, verbose=False)
+    meta = llm.metadata
+    n_kv = int(meta.get("llama.attention.head_count_kv", "8"))
+    head_dim = int(meta.get("llama.embedding_length", "4096")) // int(
+        meta.get("llama.attention.head_count", "32")
+    )
+    model_name = meta.get("general.name", "unknown")
+    print(f"Model: {model_name} ({n_kv} KV heads, {head_dim} head_dim)")
+    print(f"Layer range: {args.layer_range}, gate_start: {args.gate_start}")
+    print(f"Collecting key tensors from {len(DOCUMENTS)} documents...")
+    key_tensors: list[torch.Tensor] = []
+    for i, doc in enumerate(DOCUMENTS):
+        llm.reset()
+        llm(doc.strip(), max_tokens=1, temperature=0.0)
+        s = llm.save_state()
+        parsed = parse_state_blob(
+            bytes(s.llama_state), n_kv_heads=n_kv, head_dim=head_dim
+        )
+        key_tensors.append(parsed.keys)
+        if (i + 1) % 10 == 0:
+            print(f"  {i + 1}/{len(DOCUMENTS)}")
+    del llm
+    gc.collect()
+    print("Computing corpus SVD...")
+    basis = MARStateExtractor.compute_corpus_basis(
+        key_tensors=key_tensors,
+        layer_range=tuple(args.layer_range),
+        gate_start=args.gate_start,
+        rank=args.rank,
+    )
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    torch.save(
+        {
+            "basis": basis,
+            "model_name": model_name,
+            "layer_range": args.layer_range,
+            "gate_start": args.gate_start,
+            "rank": args.rank,
+            "n_corpus_docs": len(DOCUMENTS),
+            "key_tensors": key_tensors,
+        },
+        str(output_path),
+    )
+    print(f"Basis shape: {basis.shape}")
+    print(f"Saved: {output_path}")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

scripts/demo_agent_session.py ADDED Viewed

	@@ -0,0 +1,371 @@

+"""
+ENGRAM Protocol — Demo Agent Session
+End-to-end demonstration:
+  1. Load model via llama-cpp-python (D1)
+  2. Generate with a prompt → measure cold TTFT
+  3. Extract KV cache → compress → serialize to .eng
+  4. Index in EGR manifold index
+  5. Reset model → restore from .eng → measure cached TTFT
+  6. Print speedup ratio
+D6: Target >10x TTFT reduction at 16K context on Llama 3.1 8B.
+    Cold baseline: ~1,500-5,000ms. Cached target: <500ms.
+    Anything below 4x at 16K is a failure.
+"""
+from __future__ import annotations
+import argparse
+import sys
+import time
+from pathlib import Path
+def _run_dry_run(args: argparse.Namespace) -> int:
+    """Run full pipeline with synthetic tensors — no model file needed."""
+    import os
+    import tempfile
+    import torch
+    from kvcos.core.cache_spec import LLAMA_3_1_8B
+    from kvcos.core.serializer import EngramSerializer
+    from kvcos.core.types import CompressionMethod, StateExtractionMode
+    from kvcos.core.manifold_index import IndexEntry, ManifoldIndex
+    from kvcos.core.state_extractor import MARStateExtractor
+    from kvcos.storage.local import LocalStorageBackend
+    spec = LLAMA_3_1_8B
+    ctx_len = args.context
+    model_name = spec["model_id"]
+    # ── Synthetic KV tensors ──────────────────────────────────
+    torch.manual_seed(42)
+    shape = (spec["n_layers"], spec["n_kv_heads"], ctx_len, spec["head_dim"])
+    keys = torch.randn(shape, dtype=torch.float16)
+    values = torch.randn(shape, dtype=torch.float16)
+    tensor_mb = keys.numel() * keys.element_size() / 1024 / 1024
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp_dir = Path(tmp)
+        # ── Serialize to .eng ────────────────────────────────
+        serializer = EngramSerializer()
+        eng_path = tmp_dir / "dry_run.eng"
+        t0 = time.perf_counter()
+        result = serializer.serialize(
+            keys=keys, values=values,
+            agent_id="dry-run-agent",
+            task_description="dry run benchmark",
+            model_id=model_name,
+            output_path=eng_path,
+            compression=CompressionMethod.Q8_0,
+        )
+        serialize_ms = (time.perf_counter() - t0) * 1000
+        # ── Load back ────────────────────────────────────────
+        t0 = time.perf_counter()
+        k_out, v_out, meta = serializer.deserialize(eng_path)
+        deserialize_ms = (time.perf_counter() - t0) * 1000
+        assert k_out.shape == keys.shape, f"Shape mismatch: {k_out.shape} vs {keys.shape}"
+        # ── EGR granular timing ──────────────────────────────
+        extractor = MARStateExtractor(
+            mode=StateExtractionMode.SVD_PROJECT,
+            rank=min(160, spec["head_dim"]),
+        )
+        dim = extractor.output_dim(spec)
+        index = ManifoldIndex(dim=dim)
+        storage = LocalStorageBackend(data_dir=tmp_dir)
+        # Index: extract + serialize + store + add
+        t0 = time.perf_counter()
+        extraction = extractor.extract(keys, spec)
+        t_extract = time.perf_counter()
+        eng2 = tmp_dir / "indexed.eng"
+        serializer.serialize(
+            keys=keys, values=values,
+            agent_id="dry-run-agent",
+            task_description="dry run benchmark",
+            model_id=model_name,
+            output_path=eng2,
+            compression=CompressionMethod.Q8_0,
+            cache_id="dry-run-001",
+        )
+        t_serialize = time.perf_counter()
+        idx_meta = serializer.read_metadata_only(eng2)
+        storage.store_file("dry-run-001", eng2, idx_meta)
+        t_store = time.perf_counter()
+        from datetime import datetime, timezone
+        entry = IndexEntry(
+            cache_id="dry-run-001",
+            task_description="dry run benchmark",
+            model_id=model_name,
+            created_at=datetime.now(timezone.utc).isoformat(),
+            context_len=ctx_len,
+            l2_norm=extraction.l2_norm,
+        )
+        index.add(extraction.state_vec, entry)
+        t_add = time.perf_counter()
+        extract_ms = (t_extract - t0) * 1000
+        ser_ms = (t_serialize - t_extract) * 1000
+        store_ms = (t_store - t_serialize) * 1000
+        add_ms = (t_add - t_store) * 1000
+        index_ms = (t_add - t0) * 1000
+        # Retrieve: extract query + search + load
+        torch.manual_seed(99)
+        query_keys = torch.randn(shape, dtype=torch.float16)
+        t0 = time.perf_counter()
+        q_ext = extractor.extract(query_keys, spec)
+        t_qext = time.perf_counter()
+        results = index.search(q_ext.state_vec, top_k=1)
+        t_search = time.perf_counter()
+        # Load matched engram
+        stored_path = storage.get_path("dry-run-001")
+        k_loaded, v_loaded, _ = serializer.deserialize(stored_path)
+        t_load = time.perf_counter()
+        q_extract_ms = (t_qext - t0) * 1000
+        search_ms = (t_search - t_qext) * 1000
+        load_ms = (t_load - t_search) * 1000
+        retrieve_ms = (t_load - t0) * 1000
+        # ── Simulate TTFT estimates ──────────────────────────
+        cold_ms = ctx_len * 0.1  # simulated
+        cached_ms = deserialize_ms
+        egr_overhead = extract_ms + search_ms  # overhead added to warm path
+        speedup = cold_ms / cached_ms if cached_ms > 0 else float("inf")
+        eng_size_mb = os.path.getsize(eng_path) / 1024 / 1024
+        # ── Output ───────────────────────────────────────────
+        sep = "=" * 35
+        print(sep)
+        print("ENGRAM Protocol \u2014 EGR Demo")
+        print(f"Model: {model_name}")
+        print(f"Context: {ctx_len} tokens")
+        print(sep)
+        print(f"Cold TTFT:    {cold_ms:.1f}ms (simulated)")
+        print(f"Cached TTFT:  {cached_ms:.1f}ms (deserialize)")
+        print(f"Speedup:      {speedup:.1f}x")
+        print(f"D6 target:    >10x at 16K tokens")
+        status = "PASS" if speedup > 10 else "FAIL"
+        print(f"Status:       {status}")
+        print(f"EGR overhead: {egr_overhead:.1f}ms (extract+search)")
+        print(f".eng file:    {eng_path.name} ({eng_size_mb:.1f}MB)")
+        print(f"Tensor shape: {list(shape)} ({tensor_mb:.0f}MB per K/V)")
+        print(sep)
+        print()
+        print("Index breakdown:")
+        print(f"  SVD extract:    {extract_ms:8.1f}ms")
+        print(f"  Serialize .eng: {ser_ms:8.1f}ms")
+        print(f"  Store backend:  {store_ms:8.1f}ms")
+        print(f"  FAISS add():    {add_ms:8.1f}ms")
+        print(f"  TOTAL:          {index_ms:8.1f}ms")
+        print()
+        print("Retrieve breakdown:")
+        print(f"  SVD extract:    {q_extract_ms:8.1f}ms")
+        print(f"  FAISS search(): {search_ms:8.1f}ms")
+        print(f"  Load+deser:     {load_ms:8.1f}ms")
+        print(f"  TOTAL:          {retrieve_ms:8.1f}ms")
+        print()
+        print("Verification:")
+        print(f"  Round-trip shape:  {'OK' if k_out.shape == keys.shape else 'FAIL'}")
+        print(f"  Retrieval result:  {'OK' if len(results) >= 1 else 'FAIL'}")
+        print(f"  .eng valid:        {'OK' if eng_path.exists() else 'FAIL'}")
+    return 0 if speedup > 10 else 1
+def main():
+    parser = argparse.ArgumentParser(
+        description="ENGRAM Protocol — Demo Agent Session",
+        epilog="D6: >10x TTFT reduction at 16K context on Llama 3.1 8B",
+    )
+    parser.add_argument(
+        "--model", "-m", default=None,
+        help="Path to GGUF model file (required unless --dry-run)",
+    )
+    parser.add_argument(
+        "--context", "-c", type=int, default=4096,
+        help="Context length to fill (tokens). Default: 4096",
+    )
+    parser.add_argument(
+        "--n-ctx", type=int, default=16384,
+        help="Max context window for model. Default: 16384",
+    )
+    parser.add_argument(
+        "--data-dir", type=str, default=None,
+        help="ENGRAM data directory. Default: ~/.engram/data",
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Run full pipeline with synthetic tensors (no model needed)",
+    )
+    parser.add_argument(
+        "--verbose", "-v", action="store_true",
+        help="Enable verbose output",
+    )
+    args = parser.parse_args()
+    if args.dry_run:
+        return _run_dry_run(args)
+    if not args.model:
+        parser.error("--model is required unless --dry-run is specified")
+    print("=" * 70)
+    print("ENGRAM Protocol — Demo Agent Session")
+    print("KV cache fingerprinting for persistent semantic retrieval")
+    print("=" * 70)
+    print()
+    # ── Setup ─────────────────────────────────────────────────
+    from kvcos.core.config import get_config
+    from kvcos.core.serializer import EngramSerializer
+    from kvcos.core.types import CompressionMethod, StateExtractionMode
+    from kvcos.core.manifold_index import ManifoldIndex
+    from kvcos.core.retriever import EGRRetriever
+    from kvcos.core.state_extractor import MARStateExtractor
+    from kvcos.storage.local import LocalStorageBackend
+    from integrations.llama_cpp_bridge import LlamaCppBridge
+    config = get_config()
+    data_dir = Path(args.data_dir) if args.data_dir else config.data_dir
+    # ── Step 1: Load Model ────────────────────────────────────
+    print(f"[1/6] Loading model: {args.model}")
+    bridge = LlamaCppBridge(
+        model_path=args.model,
+        n_ctx=args.n_ctx,
+        n_gpu_layers=0,  # D1
+        verbose=args.verbose,
+    )
+    spec = bridge.load_model()
+    print(f"  Model: {spec['model_id']}")
+    print(f"  Architecture: {spec['n_layers']}L / {spec['n_heads']}H / {spec['n_kv_heads']}KV / {spec['head_dim']}D")
+    print(f"  Context window: {args.n_ctx}")
+    print()
+    # ── Step 2: Generate + Cold TTFT ──────────────────────────
+    filler = "The quick brown fox jumps over the lazy dog. " * 100
+    target_tokens = args.context
+    prompt = filler[:target_tokens * 4]
+    print(f"[2/6] Cold prefill ({target_tokens} target tokens)...")
+    t0 = time.perf_counter()
+    cold = bridge.measure_cold_ttft(prompt)
+    print(f"  Cold TTFT: {cold.ttft_ms:.1f}ms ({cold.context_len} tokens)")
+    print()
+    # ── Step 3: Extract + Serialize ───────────────────────────
+    print("[3/6] Extracting KV cache...")
+    try:
+        parsed = bridge.extract_kv_cache()
+        print(f"  Keys shape:   {list(parsed.keys.shape)}")
+        print(f"  Values shape: {list(parsed.values.shape)}")
+        print(f"  Cells: {parsed.n_cells}")
+    except Exception as e:
+        print(f"  KV extraction failed: {e}")
+        print("  This is expected if the blob format doesn't match.")
+        print("  Falling back to save_state/load_state raw blob path.")
+        parsed = None
+    print()
+    print("[3b/6] Saving raw state blob...")
+    raw_state = bridge.llm.save_state()
+    raw_blob = bytes(raw_state.llama_state)
+    print(f"  Raw state size: {len(raw_blob) / 1024 / 1024:.1f} MB")
+    if parsed is not None:
+        print("[3c/6] Serializing to .eng format...")
+        serializer = EngramSerializer()
+        eng_path = data_dir / "demo" / "session_001.eng"
+        result = serializer.serialize(
+            keys=parsed.keys,
+            values=parsed.values,
+            agent_id="demo-agent",
+            task_description="demo session - cold prefill benchmark",
+            model_id=spec["model_id"],
+            output_path=eng_path,
+            compression=CompressionMethod.Q8_0,
+        )
+        print(f"  .eng file: {result['path']}")
+        print(f"  Size: {result['size_bytes'] / 1024 / 1024:.1f} MB")
+        print(f"  Compression ratio: {result['compression_ratio']:.2f}x")
+    print()
+    # ── Step 4: Index in EGR ──────────────────────────────────
+    if parsed is not None:
+        print("[4/6] Indexing in EGR manifold index...")
+        storage = LocalStorageBackend(data_dir=data_dir)
+        extractor = MARStateExtractor(
+            mode=StateExtractionMode.SVD_PROJECT,
+            rank=min(160, spec["head_dim"]),
+        )
+        dim = extractor.output_dim(spec)
+        index = ManifoldIndex(dim=dim)
+        retriever = EGRRetriever(extractor, index, storage)
+        cache_id = retriever.index_engram(
+            keys=parsed.keys,
+            values=parsed.values,
+            spec=spec,
+            agent_id="demo-agent",
+            task_description="demo session - cold prefill benchmark",
+            model_id=spec["model_id"],
+        )
+        print(f"  Indexed: {cache_id}")
+        print(f"  State vector dim: {dim}")
+        print(f"  Index entries: {index.n_entries}")
+    else:
+        print("[4/6] Skipped (KV extraction failed)")
+    print()
+    # ── Step 5: Restore + Cached TTFT ─────────────────────────
+    print("[5/6] Restoring from cached state...")
+    t0 = time.perf_counter()
+    cached = bridge.measure_cached_ttft(raw_blob)
+    print(f"  Cached TTFT: {cached.ttft_ms:.1f}ms")
+    print()
+    # ── Step 6: Results ───────────────────────────────────────
+    cold_ms = cold.ttft_ms
+    cached_ms = cached.ttft_ms
+    speedup = cold_ms / cached_ms if cached_ms > 0 else float("inf")
+    eng_path_str = result["path"] if parsed else "N/A"
+    eng_size_kb = result["size_bytes"] / 1024 if parsed else 0
+    sep = "=" * 35
+    print(sep)
+    print("ENGRAM Protocol — EGR Demo")
+    print(f"Model: {spec['model_id']}")
+    print(f"Context: {cold.context_len} tokens")
+    print(sep)
+    print(f"Cold TTFT:    {cold_ms:.1f}ms")
+    print(f"Cached TTFT:  {cached_ms:.1f}ms")
+    print(f"Speedup:      {speedup:.1f}x")
+    print(f"D6 target:    >10x at 16K tokens")
+    status = "PASS" if speedup > 10 else "FAIL"
+    print(f"Status:       {status}")
+    print(f".eng file:    {eng_path_str} ({eng_size_kb:.1f}KB)")
+    print(sep)
+    return 0 if speedup >= 4 else 1
+if __name__ == "__main__":
+    sys.exit(main())

scripts/diagnose_gemma4.py ADDED Viewed

	@@ -0,0 +1,248 @@

+"""
+Diagnostic script for Gemma 4 26B-A4B GGUF compatibility with ENGRAM.
+Tests:
+  1. Model loading + metadata extraction
+  2. Basic generation (does it produce coherent output?)
+  3. State blob extraction + structure analysis
+  4. ENGRAM blob parser compatibility
+  5. Full fingerprint pipeline (if blob parsing works)
+Usage:
+    PYTHONPATH=. .venv/bin/python scripts/diagnose_gemma4.py /path/to/gemma4.gguf
+"""
+from __future__ import annotations
+import struct
+import sys
+import time
+from pathlib import Path
+def read_u32(data: bytes, offset: int) -> tuple[int, int]:
+    return struct.unpack_from("<I", data, offset)[0], offset + 4
+def read_i32(data: bytes, offset: int) -> tuple[int, int]:
+    return struct.unpack_from("<i", data, offset)[0], offset + 4
+def read_u64(data: bytes, offset: int) -> tuple[int, int]:
+    return struct.unpack_from("<Q", data, offset)[0], offset + 8
+def inspect_blob_header(blob: bytes) -> dict:
+    """Parse just the header/structure of a state blob without assuming F16."""
+    info = {}
+    offset = 0
+    # Architecture string
+    str_len, offset = read_u32(blob, offset)
+    info["arch"] = blob[offset:offset + str_len].decode("ascii", errors="replace")
+    offset += str_len
+    # KV stream
+    n_stream, offset = read_u32(blob, offset)
+    info["n_stream"] = n_stream
+    if n_stream != 1:
+        info["error"] = f"Expected 1 stream, got {n_stream}"
+        return info
+    cell_count, offset = read_u32(blob, offset)
+    info["cell_count"] = cell_count
+    # Skip cell metadata
+    for _ in range(cell_count):
+        _pos, offset = read_i32(blob, offset)
+        n_seq, offset = read_u32(blob, offset)
+        for _ in range(n_seq):
+            _sid, offset = read_i32(blob, offset)
+    # Data header
+    v_trans, offset = read_u32(blob, offset)
+    info["v_trans"] = bool(v_trans)
+    n_layers, offset = read_u32(blob, offset)
+    info["n_layers"] = n_layers
+    # Inspect first few K layers
+    info["k_layer_types"] = []
+    info["k_layer_row_sizes"] = []
+    for i in range(min(n_layers, 5)):
+        type_k, offset = read_i32(blob, offset)
+        row_size_k, offset = read_u64(blob, offset)
+        info["k_layer_types"].append(type_k)
+        info["k_layer_row_sizes"].append(row_size_k)
+        # Skip actual data
+        data_size = row_size_k * cell_count
+        offset += data_size
+    info["data_offset_after_k_sample"] = offset
+    info["blob_total_size"] = len(blob)
+    # GGML type names
+    type_names = {0: "F32", 1: "F16", 2: "Q4_0", 8: "Q8_0"}
+    info["k_type_names"] = [type_names.get(t, f"unknown({t})") for t in info["k_layer_types"]]
+    return info
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python scripts/diagnose_gemma4.py <path-to-gguf>")
+        sys.exit(1)
+    model_path = sys.argv[1]
+    if not Path(model_path).exists():
+        print(f"Model not found: {model_path}")
+        sys.exit(1)
+    print(f"{'='*60}")
+    print(f"ENGRAM × Gemma 4 Diagnostic")
+    print(f"Model: {model_path}")
+    print(f"{'='*60}\n")
+    # ── Step 1: Load model ──────────────────────────────────────
+    print("STEP 1: Loading model...")
+    try:
+        from llama_cpp import Llama
+        t0 = time.perf_counter()
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=512,       # minimal context for diagnostics
+            n_gpu_layers=0,  # CPU for safety
+            verbose=False,
+        )
+        load_s = time.perf_counter() - t0
+        print(f"  Loaded in {load_s:.1f}s")
+    except Exception as e:
+        print(f"  FAILED: {type(e).__name__}: {e}")
+        sys.exit(1)
+    # ── Step 2: Read metadata ───────────────────────────────────
+    print("\nSTEP 2: Model metadata")
+    metadata = llm.metadata
+    interesting_keys = [
+        "general.name", "general.architecture",
+        "llama.block_count", "general.block_count",
+        "llama.attention.head_count", "llama.attention.head_count_kv",
+        "llama.embedding_length", "llama.context_length",
+        "llama.expert_count", "llama.expert_used_count",
+        "gemma.block_count", "gemma.attention.head_count",
+        "gemma.attention.head_count_kv", "gemma.embedding_length",
+    ]
+    for key in interesting_keys:
+        val = metadata.get(key)
+        if val is not None:
+            print(f"  {key}: {val}")
+    # Also dump any keys containing "expert" or "moe"
+    for key, val in sorted(metadata.items()):
+        if "expert" in key.lower() or "moe" in key.lower():
+            print(f"  {key}: {val}")
+    # Derive spec parameters
+    n_layers = int(metadata.get("llama.block_count", metadata.get("gemma.block_count", metadata.get("general.block_count", "0"))))
+    n_heads = int(metadata.get("llama.attention.head_count", metadata.get("gemma.attention.head_count", "0")))
+    n_kv_heads = int(metadata.get("llama.attention.head_count_kv", metadata.get("gemma.attention.head_count_kv", str(n_heads))))
+    embed_dim = int(metadata.get("llama.embedding_length", metadata.get("gemma.embedding_length", "0")))
+    head_dim = embed_dim // n_heads if n_heads > 0 else 0
+    print(f"\n  Derived spec:")
+    print(f"    n_layers={n_layers}, n_heads={n_heads}, n_kv_heads={n_kv_heads}")
+    print(f"    embed_dim={embed_dim}, head_dim={head_dim}")
+    print(f"    n_embd_kv = {n_kv_heads * head_dim}")
+    # ── Step 3: Generate ────────────────────────────────────────
+    print("\nSTEP 3: Basic generation")
+    try:
+        t0 = time.perf_counter()
+        output = llm("Hello, my name is", max_tokens=20, temperature=0.0)
+        gen_ms = (time.perf_counter() - t0) * 1000
+        text = output["choices"][0]["text"]
+        print(f"  Generated in {gen_ms:.0f}ms")
+        print(f"  Output: {text[:200]}")
+    except Exception as e:
+        print(f"  FAILED: {type(e).__name__}: {e}")
+        print("  Continuing anyway (bartowski warned about conversion issues)...")
+    # ── Step 4: State blob extraction ───────────────────────────
+    print("\nSTEP 4: State blob extraction")
+    try:
+        state_data = llm.save_state()
+        blob = bytes(state_data.llama_state)
+        print(f"  Blob size: {len(blob):,} bytes ({len(blob)/1024/1024:.1f} MB)")
+        # Inspect structure without assuming F16
+        info = inspect_blob_header(blob)
+        print(f"  Architecture: {info.get('arch', '?')}")
+        print(f"  Cell count: {info.get('cell_count', '?')}")
+        print(f"  V transposed: {info.get('v_trans', '?')}")
+        print(f"  N layers: {info.get('n_layers', '?')}")
+        print(f"  K dtype (first 5 layers): {info.get('k_type_names', [])}")
+        print(f"  K row sizes (first 5): {info.get('k_layer_row_sizes', [])}")
+        if info.get("k_layer_row_sizes"):
+            row = info["k_layer_row_sizes"][0]
+            cells = info["cell_count"]
+            elements_per_row = row // 2  # assuming F16
+            expected_embd_kv = n_kv_heads * head_dim
+            print(f"\n  Row analysis:")
+            print(f"    row_size={row}, cells={cells}")
+            print(f"    elements_per_cell (if F16) = {row // 2}")
+            print(f"    expected n_embd_kv = {expected_embd_kv}")
+            if elements_per_row == expected_embd_kv:
+                print(f"    MATCH: row elements == n_kv_heads * head_dim")
+            else:
+                print(f"    MISMATCH: {elements_per_row} != {expected_embd_kv}")
+                # Check if it matches with different assumptions
+                for dtype_name, dtype_size in [("F32", 4.0), ("F16", 2.0), ("Q8_0", 34/32), ("Q4_0", 18/32)]:
+                    if row / dtype_size == expected_embd_kv:
+                        print(f"    → Would match with dtype {dtype_name}")
+    except Exception as e:
+        print(f"  FAILED: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+    # ── Step 5: ENGRAM blob parser ──────────────────────────────
+    print("\nSTEP 5: ENGRAM blob parser")
+    if n_kv_heads == 0 or head_dim == 0:
+        print("  SKIPPED: could not derive n_kv_heads/head_dim from metadata")
+    else:
+        try:
+            from kvcos.core.blob_parser import parse_state_blob
+            parsed = parse_state_blob(blob, n_kv_heads=n_kv_heads, head_dim=head_dim)
+            print(f"  SUCCESS!")
+            print(f"  Keys shape: {parsed.keys.shape}")
+            print(f"  Values shape: {parsed.values.shape}")
+            print(f"  N cells: {parsed.n_cells}")
+            print(f"  N layers: {parsed.n_layers}")
+            print(f"  Arch: {parsed.arch}")
+        except Exception as e:
+            print(f"  FAILED: {type(e).__name__}: {e}")
+            print("  This is where we need to fix compatibility.")
+    # ── Step 6: Fourier fingerprint ─────────────────────────────
+    print("\nSTEP 6: Fourier fingerprint (if blob parsed)")
+    try:
+        parsed  # check it exists
+        from kvcos.core.fingerprint import compute_fourier_fingerprint_v2
+        layer_keys = parsed.keys.float().mean(dim=2)  # [layers, heads, dim]
+        fp = compute_fourier_fingerprint_v2(layer_keys, freqs=[0, 1])
+        print(f"  Fingerprint shape: {fp.shape}")
+        print(f"  Norm: {fp.norm():.4f}")
+        print(f"  First 5 values: {fp[:5].tolist()}")
+    except NameError:
+        print("  SKIPPED: blob parsing failed")
+    except Exception as e:
+        print(f"  FAILED: {type(e).__name__}: {e}")
+    print(f"\n{'='*60}")
+    print("Diagnostic complete.")
+if __name__ == "__main__":
+    main()

scripts/egr_semantic_proof.py ADDED Viewed

	@@ -0,0 +1,437 @@

+"""
+ENGRAM Protocol — EGR Semantic Proof Script
+Definitive K→K retrieval validation with diverse, non-repeated documents.
+Usage:
+    KMP_DUPLICATE_LIB_OK=TRUE OMP_NUM_THREADS=1 PYTHONPATH=. \
+    .venv/bin/python scripts/egr_semantic_proof.py \
+        --model /path/to/model.gguf \
+        --ctx 16384 --n-trials 3 --layer-range 8 24 \
+        --output results/egr_semantic_proof_8B_14K.json --verbose
+"""
+from __future__ import annotations
+import argparse
+import gc
+import json
+import math
+import os
+import sys
+import tempfile
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+import torch
+# ── Documents ─────────────────────────────────────────────────────────────────
+DOC_A = """
+The transformer architecture introduced in "Attention Is All You Need"
+replaced recurrent networks with self-attention as the core computational
+primitive. Self-attention computes a weighted sum of value vectors, where
+weights derive from the compatibility between query and key vectors.
+For a sequence of length n, the attention matrix has shape n×n,
+making vanilla attention quadratic in both time and memory.
+Multi-head attention partitions the embedding dimension into h parallel
+subspaces. Each head independently computes attention using its own
+learned projections W_Q, W_K, W_V of dimension d_model/h. The outputs
+are concatenated and projected back to d_model via W_O. This allows
+different heads to specialize in different relational patterns:
+some heads track syntactic dependencies, others semantic similarity,
+others coreference chains across longer distances.
+Grouped-query attention generalizes multi-head and multi-query attention.
+Rather than one KV pair per query head (MHA) or one KV pair for all
+heads (MQA), GQA assigns one KV pair per group of g query heads.
+Llama 3 uses GQA with 8 KV heads for 32 query heads, reducing
+KV cache memory by 4× with minimal quality degradation.
+Rotary position embeddings encode absolute position by rotating
+query and key vectors in 2D subspaces of the head dimension.
+Unlike learned absolute embeddings or sinusoidal encodings,
+RoPE naturally extrapolates to sequences longer than those seen
+during training by preserving the inner product between positions
+i and j as a function only of their relative offset i-j.
+The KV cache enables efficient autoregressive generation by storing
+computed key and value matrices from all previous positions.
+Without caching, generating a sequence of length L requires O(L²)
+attention operations. With caching, each new token requires only
+O(L) operations — one attention pass over the cached KV pairs.
+Flash attention avoids materializing the full n×n attention matrix
+by tiling the computation into blocks that fit in SRAM. The forward
+pass fuses the softmax and matrix multiply into a single kernel,
+achieving O(n) memory complexity while maintaining exact numerical
+equivalence to standard attention.
+Mixture-of-experts transformer variants route each token to a sparse
+subset of feed-forward experts using a learned routing function.
+Mistral's Mixtral 8×7B activates 2 of 8 experts per token,
+achieving 7B-parameter inference cost with 47B total parameters.
+Expert specialization emerges: some experts process syntactic
+patterns, others domain-specific content, without explicit supervision.
+Layer normalization applied before the attention sublayer (Pre-LN)
+stabilizes training compared to Post-LN by ensuring gradients flow
+through the residual stream without vanishing through normalized paths.
+Modern architectures including Llama, Mistral, and GPT-NeoX all
+adopt Pre-LN with RMSNorm, dropping the learned bias parameters.
+"""
+DOC_B = """
+DNA replication in eukaryotic cells initiates at multiple origins
+of replication simultaneously, enabling the duplication of genomes
+containing billions of base pairs within hours. The origin recognition
+complex marks these sites, recruiting CDC6 and CDT1 to load the
+MCM helicase onto double-stranded DNA during G1 phase.
+The MCM complex unwinds the double helix at replication forks,
+separating the complementary strands to serve as templates.
+DNA polymerase delta and epsilon synthesize the lagging and leading
+strands respectively, both requiring a short RNA primer synthesized
+by primase to provide a free 3'-OH group for extension.
+Topoisomerase II resolves the positive supercoils that accumulate
+ahead of the replication fork as the helix is unwound. Without
+topoisomerase activity, the torsional stress would stall replication.
+Type II topoisomerases cleave both strands simultaneously, pass
+a second duplex through the break, and religate — changing
+the linking number by two per catalytic cycle.
+Protein synthesis begins with mRNA recognition by the 43S
+pre-initiation complex, comprising the 40S ribosomal subunit,
+eIF2-GTP-Met-tRNA, and accessory factors. The complex scans
+5' to 3' until it encounters the AUG start codon in a favorable
+Kozak context. The 60S subunit then joins to form the 80S ribosome.
+Elongation proceeds by aminoacyl-tRNA accommodation at the A-site,
+peptide bond formation catalyzed by the peptidyl transferase center
+of the 23S rRNA, and translocation driven by EF-G and GTP hydrolysis.
+Each elongation cycle advances the ribosome by exactly one codon,
+consuming one GTP equivalent and incorporating one amino acid.
+Cell signaling cascades amplify extracellular signals through
+phosphorylation networks. The MAPK/ERK pathway converts growth
+factor receptor activation into nuclear transcription factor
+phosphorylation through RAF, MEK, and ERK kinases. Signal amplitude
+and duration encode distinct transcriptional outcomes — transient
+ERK activation drives proliferation while sustained activation
+drives differentiation in PC12 cells.
+CRISPR-Cas9 genome editing exploits the bacterial adaptive immunity
+system in which Cas9 endonuclease is guided by a 20-nucleotide
+spacer sequence in the sgRNA to cleave complementary genomic DNA.
+The PAM sequence NGG immediately 3' of the target site is required
+for Cas9 binding and R-loop formation. Double-strand breaks are
+repaired by NHEJ (causing indels) or HDR (enabling precise edits).
+"""
+QUERY = "How does the attention mechanism use keys and queries to compute weighted context representations in transformer models?"
+def run_trial(
+    llm,
+    n_kv_heads: int,
+    head_dim: int,
+    spec: dict,
+    extractor,
+    doc_a: str,
+    doc_b: str,
+    query: str,
+    trial_id: int,
+    verbose: bool,
+) -> dict:
+    """Run a single EGR semantic proof trial."""
+    from kvcos.core.blob_parser import parse_state_blob
+    from kvcos.core.manifold_index import IndexEntry, ManifoldIndex
+    dim = extractor.output_dim(spec)
+    index = ManifoldIndex(dim=dim)
+    # ── Session A ─────────────────────────────────────────
+    llm.reset()
+    t0 = time.perf_counter()
+    llm(doc_a, max_tokens=1, temperature=0.0)
+    cold_ms = (time.perf_counter() - t0) * 1000
+    n_tok_a = llm.n_tokens
+    state_a = llm.save_state()
+    blob_a = bytes(state_a.llama_state)
+    blob_mb = len(blob_a) / 1024 / 1024
+    # Warm TTFT
+    llm.reset()
+    gc.collect()
+    t0 = time.perf_counter()
+    llm.load_state(state_a)
+    llm(" ", max_tokens=1, temperature=0.0)
+    warm_ms = (time.perf_counter() - t0) * 1000
+    speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf")
+    # Parse + extract A
+    t0 = time.perf_counter()
+    parsed_a = parse_state_blob(blob_a, n_kv_heads=n_kv_heads, head_dim=head_dim)
+    parse_ms = (time.perf_counter() - t0) * 1000
+    t0 = time.perf_counter()
+    ext_a = extractor.extract(parsed_a.keys, spec)
+    extract_ms = (time.perf_counter() - t0) * 1000
+    entry_a = IndexEntry(
+        cache_id="session-a",
+        task_description="Transformer attention mechanisms",
+        model_id=spec["model_id"],
+        created_at=datetime.now(timezone.utc).isoformat(),
+        context_len=parsed_a.n_cells,
+        l2_norm=ext_a.l2_norm,
+    )
+    index.add(ext_a.state_vec, entry_a)
+    # ── Session B ─────────────────────────────────────────
+    llm.reset()
+    llm(doc_b, max_tokens=1, temperature=0.0)
+    n_tok_b = llm.n_tokens
+    state_b = llm.save_state()
+    blob_b = bytes(state_b.llama_state)
+    parsed_b = parse_state_blob(blob_b, n_kv_heads=n_kv_heads, head_dim=head_dim)
+    ext_b = extractor.extract(parsed_b.keys, spec)
+    entry_b = IndexEntry(
+        cache_id="session-b",
+        task_description="DNA replication and molecular biology",
+        model_id=spec["model_id"],
+        created_at=datetime.now(timezone.utc).isoformat(),
+        context_len=parsed_b.n_cells,
+        l2_norm=ext_b.l2_norm,
+    )
+    index.add(ext_b.state_vec, entry_b)
+    # ── Query ─────────────────────────────────────────────
+    llm.reset()
+    llm(query, max_tokens=1, temperature=0.0)
+    n_tok_q = llm.n_tokens
+    state_q = llm.save_state()
+    blob_q = bytes(state_q.llama_state)
+    parsed_q = parse_state_blob(blob_q, n_kv_heads=n_kv_heads, head_dim=head_dim)
+    t0 = time.perf_counter()
+    ext_q = extractor.extract(parsed_q.keys, spec)
+    t1 = time.perf_counter()
+    results = index.search(ext_q.state_vec, top_k=2)
+    t2 = time.perf_counter()
+    search_ms = (t2 - t1) * 1000
+    egr_total_ms = (t2 - t0) * 1000 + extract_ms  # query extract + search + index extract
+    # Score extraction
+    score_a = next((r["similarity"] for r in results if "attention" in r["task_description"].lower() or "transformer" in r["task_description"].lower()), None)
+    score_b = next((r["similarity"] for r in results if "dna" in r["task_description"].lower() or "molecular" in r["task_description"].lower()), None)
+    if score_a is None or score_b is None:
+        # Fallback: use position
+        score_a = results[0]["similarity"] if results else 0
+        score_b = results[1]["similarity"] if len(results) > 1 else 0
+    margin = score_a - score_b
+    correct = len(results) > 0 and (
+        "attention" in results[0]["task_description"].lower()
+        or "transformer" in results[0]["task_description"].lower()
+    )
+    layer_range_used = list(extractor.layer_range) if extractor.layer_range else "spec_default"
+    trial = {
+        "trial_id": trial_id,
+        "n_cells_a": parsed_a.n_cells,
+        "n_cells_b": parsed_b.n_cells,
+        "n_cells_q": parsed_q.n_cells,
+        "score_a": round(score_a, 6),
+        "score_b": round(score_b, 6),
+        "margin": round(margin, 6),
+        "correct": correct,
+        "cold_ms": round(cold_ms, 1),
+        "warm_ms": round(warm_ms, 1),
+        "speedup": round(speedup, 1),
+        "parse_ms": round(parse_ms, 1),
+        "extract_ms": round(extract_ms, 1),
+        "search_ms": round(search_ms, 1),
+        "egr_total_ms": round(egr_total_ms, 1),
+        "blob_size_mb": round(blob_mb, 1),
+        "layer_range_used": layer_range_used,
+        "n_layers_used": extractor.layer_range[1] - extractor.layer_range[0] if extractor.layer_range else len(spec.get("extraction_layers", ())),
+        "svd_rank": extractor.rank,
+        "output_dim": dim,
+    }
+    if verbose:
+        print(f"  Trial {trial_id}: margin={margin:.4f} correct={correct} "
+              f"cold={cold_ms:.0f}ms warm={warm_ms:.0f}ms "
+              f"egr={egr_total_ms:.1f}ms cells_a={parsed_a.n_cells}")
+    return trial
+def main() -> int:
+    parser = argparse.ArgumentParser(description="ENGRAM EGR Semantic Proof")
+    parser.add_argument("--model", "-m", required=True, help="Path to GGUF model")
+    parser.add_argument("--ctx", type=int, default=16384, help="Context window")
+    parser.add_argument("--n-trials", type=int, default=3, help="Number of trials")
+    parser.add_argument("--layer-range", type=int, nargs=2, default=[8, 24], help="Layer range start end")
+    parser.add_argument("--gate-start", type=int, default=0, help="Skip top N singular values (0=none)")
+    parser.add_argument("--compression", default="FP16", help="Compression method: FP16, INT8, Q8_0")
+    parser.add_argument("--output", "-o", default="results/egr_semantic_proof.json", help="Output JSON path")
+    parser.add_argument("--verbose", "-v", action="store_true")
+    args = parser.parse_args()
+    from llama_cpp import Llama
+    import llama_cpp as lc
+    from kvcos.core.cache_spec import make_spec_from_metadata
+    from kvcos.core.types import StateExtractionMode
+    from kvcos.core.state_extractor import MARStateExtractor
+    layer_range = tuple(args.layer_range)
+    print(f"ENGRAM EGR Semantic Proof — {args.n_trials} trials")
+    print(f"Model: {args.model}")
+    print(f"Context: {args.ctx}, Layer range: {layer_range}")
+    print()
+    trials: list[dict] = []
+    for trial_id in range(args.n_trials):
+        print(f"Trial {trial_id + 1}/{args.n_trials}...")
+        llm = Llama(model_path=args.model, n_ctx=args.ctx, n_gpu_layers=-1, verbose=False)
+        meta = llm.metadata
+        n_layers = int(meta.get("llama.block_count", "32"))
+        n_heads = int(meta.get("llama.attention.head_count", "32"))
+        n_kv_heads = int(meta.get("llama.attention.head_count_kv", "8"))
+        head_dim = int(meta.get("llama.embedding_length", "4096")) // n_heads
+        model_name = meta.get("general.name", Path(args.model).stem)
+        spec = make_spec_from_metadata(
+            model_id=model_name, n_layers=n_layers, n_heads=n_heads,
+            n_kv_heads=n_kv_heads, head_dim=head_dim,
+        )
+        extractor = MARStateExtractor(
+            mode=StateExtractionMode.SVD_PROJECT,
+            rank=min(160, head_dim),
+            layer_range=layer_range,
+            gate_start=args.gate_start,
+        )
+        trial = run_trial(
+            llm=llm, n_kv_heads=n_kv_heads, head_dim=head_dim,
+            spec=spec, extractor=extractor,
+            doc_a=DOC_A.strip(), doc_b=DOC_B.strip(), query=QUERY.strip(),
+            trial_id=trial_id, verbose=args.verbose,
+        )
+        trials.append(trial)
+        del llm
+        gc.collect()
+    # ── Summary statistics ────────────────────────────────
+    margins = [t["margin"] for t in trials]
+    speedups = [t["speedup"] for t in trials]
+    egr_times = [t["egr_total_ms"] for t in trials]
+    n_correct = sum(1 for t in trials if t["correct"])
+    mean_margin = sum(margins) / len(margins)
+    std_margin = math.sqrt(sum((m - mean_margin) ** 2 for m in margins) / max(len(margins) - 1, 1)) if len(margins) > 1 else 0.0
+    mean_speedup = sum(speedups) / len(speedups)
+    std_speedup = math.sqrt(sum((s - mean_speedup) ** 2 for s in speedups) / max(len(speedups) - 1, 1)) if len(speedups) > 1 else 0.0
+    mean_egr = sum(egr_times) / len(egr_times)
+    std_egr = math.sqrt(sum((e - mean_egr) ** 2 for e in egr_times) / max(len(egr_times) - 1, 1)) if len(egr_times) > 1 else 0.0
+    passed = (
+        mean_margin > 0.05
+        and n_correct == args.n_trials
+        and mean_egr < 200
+        and mean_speedup > 10
+    )
+    summary = {
+        "mean_margin": round(mean_margin, 4),
+        "std_margin": round(std_margin, 4),
+        "mean_speedup": round(mean_speedup, 1),
+        "std_speedup": round(std_speedup, 1),
+        "mean_egr_ms": round(mean_egr, 1),
+        "std_egr_ms": round(std_egr, 1),
+        "n_correct": n_correct,
+        "n_trials": args.n_trials,
+        "min_margin": round(min(margins), 4),
+        "max_margin": round(max(margins), 4),
+        "pass": passed,
+    }
+    # ── Build output JSON ─────────────────────────────────
+    doc_a_tokens = trials[0]["n_cells_a"] if trials else 0
+    doc_b_tokens = trials[0]["n_cells_b"] if trials else 0
+    query_tokens = trials[0]["n_cells_q"] if trials else 0
+    output = {
+        "metadata": {
+            "model": model_name,
+            "ctx": args.ctx,
+            "layer_range": list(layer_range),
+            "n_trials": args.n_trials,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "platform": "Apple M3 / macOS",
+            "llama_cpp_version": lc.__version__,
+        },
+        "documents": {
+            "doc_a": {"description": "Transformer attention mechanisms (ML)", "n_tokens": doc_a_tokens},
+            "doc_b": {"description": "DNA replication and molecular biology", "n_tokens": doc_b_tokens},
+            "query": {"text": QUERY, "n_tokens": query_tokens},
+        },
+        "trials": trials,
+        "summary": summary,
+    }
+    # ── Write JSON ────────────────────────────────────────
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(output, indent=2))
+    print(f"\nResults written to {output_path}")
+    # ── Print summary ─────────────────────────────────────
+    print()
+    sep = "=" * 55
+    print(sep)
+    print("ENGRAM EGR Semantic Proof — Summary")
+    print(sep)
+    print(f"Model:       {model_name}")
+    print(f"Context:     {args.ctx}")
+    print(f"Layer range: {layer_range}")
+    print(f"Trials:      {args.n_trials}")
+    print()
+    print(f"K→K margin:  {mean_margin:.4f} ± {std_margin:.4f} (min={min(margins):.4f}, max={max(margins):.4f})")
+    print(f"Correct:     {n_correct}/{args.n_trials}")
+    print(f"Speedup:     {mean_speedup:.1f}x ± {std_speedup:.1f}x")
+    print(f"EGR ms:      {mean_egr:.1f}ms ± {std_egr:.1f}ms")
+    print()
+    verdict = "PASS" if passed else "FAIL"
+    reasons = []
+    if mean_margin <= 0.05:
+        reasons.append(f"margin {mean_margin:.4f} <= 0.05")
+    if n_correct < args.n_trials:
+        reasons.append(f"correct {n_correct}/{args.n_trials}")
+    if mean_egr >= 200:
+        reasons.append(f"egr {mean_egr:.1f}ms >= 200ms")
+    if mean_speedup <= 10:
+        reasons.append(f"speedup {mean_speedup:.1f}x <= 10x")
+    reason_str = " | ".join(reasons) if reasons else "all criteria met"
+    print(f"Verdict:     {verdict} ({reason_str})")
+    print(sep)
+    return 0 if passed else 1
+if __name__ == "__main__":
+    sys.exit(main())

scripts/generate_alignment_dataset.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""Generate alignment dataset: SVD state vectors for same docs on two models."""
+from __future__ import annotations
+import argparse
+import gc
+import sys
+import time
+from pathlib import Path
+import torch
+from llama_cpp import Llama
+from kvcos.core.blob_parser import parse_state_blob
+from kvcos.core.cache_spec import make_spec_from_metadata
+from kvcos.core.types import StateExtractionMode
+from kvcos.core.state_extractor import MARStateExtractor
+# 50 diverse documents: 5 per domain × 10 domains
+DOCUMENTS = [
+    # ML/AI (0-4)
+    "Gradient descent optimizes neural network parameters by computing partial derivatives of the loss function with respect to each weight and updating weights in the direction that reduces loss.",
+    "Convolutional neural networks apply learned filters across spatial dimensions of input images, producing feature maps that detect edges, textures, and higher-level visual patterns.",
+    "Recurrent neural networks process sequences by maintaining hidden state that carries information across time steps, enabling the model to capture temporal dependencies in data.",
+    "Batch normalization normalizes layer inputs during training by subtracting the mini-batch mean and dividing by the mini-batch standard deviation, accelerating convergence.",
+    "Dropout regularization randomly sets neuron activations to zero during training with probability p, preventing co-adaptation and reducing overfitting in deep networks.",
+    # Biology (5-9)
+    "Mitochondria generate ATP through oxidative phosphorylation, where electrons pass through complexes I through IV of the electron transport chain embedded in the inner membrane.",
+    "Photosynthesis in chloroplasts converts carbon dioxide and water into glucose using light energy captured by chlorophyll molecules in the thylakoid membrane.",
+    "The immune system distinguishes self from non-self through major histocompatibility complex proteins that present intracellular peptide fragments to T lymphocytes.",
+    "Synaptic transmission involves calcium-dependent exocytosis of neurotransmitter vesicles at the presynaptic terminal followed by receptor binding at the postsynaptic membrane.",
+    "Enzyme kinetics follow Michaelis-Menten dynamics where reaction velocity approaches Vmax asymptotically as substrate concentration increases relative to the Km constant.",
+    # History (10-14)
+    "The French Revolution of 1789 abolished feudal privileges and established principles of popular sovereignty that fundamentally altered European political structures.",
+    "The Silk Road connected Chinese Han dynasty merchants with Roman traders across Central Asia, facilitating exchange of silk, spices, and metallurgical techniques.",
+    "The Industrial Revolution began in eighteenth-century Britain with mechanized textile production, steam power, and factory organization transforming agrarian economies.",
+    "Ancient Egyptian civilization developed hieroglyphic writing, monumental architecture, and sophisticated irrigation systems along the Nile River floodplain.",
+    "The Renaissance in fifteenth-century Florence produced breakthroughs in perspective painting, humanist philosophy, and anatomical studies by artists like Leonardo.",
+    # Cooking (15-19)
+    "Maillard reactions between amino acids and reducing sugars at temperatures above 140 degrees Celsius produce the brown color and complex flavors of seared meat.",
+    "Emulsification in mayonnaise relies on lecithin from egg yolks to stabilize the dispersion of oil droplets in the aqueous vinegar and lemon juice phase.",
+    "Bread leavening occurs when Saccharomyces cerevisiae ferments sugars in dough, producing carbon dioxide gas that becomes trapped in the gluten network.",
+    "Caramelization of sucrose begins at 160 degrees Celsius as the disaccharide breaks down into glucose and fructose which then undergo further dehydration.",
+    "Brining meat in a salt solution denatures surface proteins and increases water retention through osmotic effects, producing juicier cooked results.",
+    # Mathematics (20-24)
+    "The fundamental theorem of calculus establishes that differentiation and integration are inverse operations, connecting the derivative of an integral to the original function.",
+    "Eigenvalues of a square matrix A satisfy the characteristic equation det(A - lambda I) = 0, with corresponding eigenvectors spanning invariant subspaces.",
+    "The central limit theorem states that the sampling distribution of the mean approaches a normal distribution as sample size increases regardless of population shape.",
+    "Group theory studies algebraic structures with a binary operation satisfying closure, associativity, identity, and invertibility axioms.",
+    "Fourier transforms decompose signals into constituent sinusoidal frequencies, enabling spectral analysis and convolution operations in the frequency domain.",
+    # Literature (25-29)
+    "Shakespeare's tragedies explore fatal character flaws: Hamlet's indecision, Macbeth's ambition, Othello's jealousy, and King Lear's prideful blindness.",
+    "Stream of consciousness narration in Joyce's Ulysses follows Leopold Bloom's interior monologue through Dublin in a single day paralleling Homer's Odyssey.",
+    "Magical realism in Garcia Marquez's fiction blends supernatural events with mundane Latin American reality, challenging Western rationalist literary conventions.",
+    "The bildungsroman genre traces protagonist maturation from youth to adulthood, exemplified by Dickens's Great Expectations and Bronte's Jane Eyre.",
+    "Haiku poetry constrains expression to seventeen syllables across three lines, using seasonal reference words to evoke natural imagery and transient emotion.",
+    # Economics (30-34)
+    "Supply and demand curves intersect at equilibrium price where quantity supplied equals quantity demanded, with shifts caused by external factors like income changes.",
+    "Monetary policy adjusts interest rates and money supply to influence inflation, employment, and economic growth through central bank open market operations.",
+    "Game theory models strategic interactions where each player's optimal decision depends on expectations about other players' choices and resulting payoff matrices.",
+    "Comparative advantage explains why countries benefit from trade even when one nation produces all goods more efficiently than its trading partner.",
+    "Behavioral economics incorporates psychological biases like loss aversion and anchoring into economic models, departing from purely rational agent assumptions.",
+    # Physics (35-39)
+    "Quantum entanglement creates correlations between particles such that measuring one instantaneously determines the state of the other regardless of separation distance.",
+    "General relativity describes gravity as spacetime curvature caused by mass-energy, predicting phenomena like gravitational time dilation and black hole event horizons.",
+    "Thermodynamic entropy measures disorder in a system, with the second law stating that total entropy of an isolated system can only increase over time.",
+    "Superconductivity occurs below critical temperature when electron pairs form Cooper pairs that flow without resistance through the crystal lattice.",
+    "The Heisenberg uncertainty principle establishes a fundamental limit on simultaneously knowing both position and momentum of a quantum particle.",
+    # Geography (40-44)
+    "Tectonic plate boundaries produce earthquakes at transform faults, volcanic activity at subduction zones, and new oceanic crust at mid-ocean spreading ridges.",
+    "The Amazon River basin contains the largest tropical rainforest ecosystem, supporting approximately ten percent of all known species on Earth.",
+    "Glacial erosion carved U-shaped valleys, cirques, and fjords during Pleistocene ice ages when ice sheets covered much of northern Europe and North America.",
+    "Mediterranean climate zones occur on western continental coasts between latitudes 30 and 45 degrees, characterized by dry summers and mild wet winters.",
+    "The Sahara Desert receives less than 25 millimeters of annual rainfall, with extreme diurnal temperature variation exceeding 30 degrees Celsius.",
+    # Programming (45-49)
+    "Hash tables provide average O(1) lookup time by mapping keys through a hash function to array indices, with collision resolution via chaining or open addressing.",
+    "Garbage collection in managed runtimes automatically reclaims memory by tracing reachable objects from root references and freeing unreachable allocations.",
+    "TCP ensures reliable data delivery through sequence numbers, acknowledgments, retransmission timers, and flow control using sliding window protocol.",
+    "Database normalization eliminates redundancy by decomposing relations into smaller tables satisfying normal forms while preserving functional dependencies.",
+    "Version control with git tracks content changes using a directed acyclic graph of commit objects, each containing a tree hash, parent references, and metadata.",
+]
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Generate cross-model alignment dataset")
+    parser.add_argument("--model-a", required=True, help="Path to model A GGUF")
+    parser.add_argument("--model-b", required=True, help="Path to model B GGUF")
+    parser.add_argument("--n-docs", type=int, default=50)
+    parser.add_argument("--layer-range-a", type=int, nargs=2, default=[8, 24])
+    parser.add_argument("--layer-range-b", type=int, nargs=2, default=[8, 24])
+    parser.add_argument("--output", "-o", required=True)
+    args = parser.parse_args()
+    docs = DOCUMENTS[: args.n_docs]
+    def extract_all(model_path: str, layer_range: tuple[int, int]) -> torch.Tensor:
+        llm = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=-1, verbose=False)
+        meta = llm.metadata
+        n_layers = int(meta.get("llama.block_count", "32"))
+        n_heads = int(meta.get("llama.attention.head_count", "32"))
+        n_kv_heads = int(meta.get("llama.attention.head_count_kv", "8"))
+        head_dim = int(meta.get("llama.embedding_length", "4096")) // n_heads
+        model_name = meta.get("general.name", Path(model_path).stem)
+        spec = make_spec_from_metadata(
+            model_id=model_name, n_layers=n_layers, n_heads=n_heads,
+            n_kv_heads=n_kv_heads, head_dim=head_dim,
+        )
+        ext = MARStateExtractor(
+            mode=StateExtractionMode.SVD_PROJECT,
+            rank=128, layer_range=layer_range, gate_start=6,
+        )
+        print(f"Extracting from {model_name} ({n_layers}L/{n_kv_heads}KV/{head_dim}D)...")
+        vecs = []
+        for i, doc in enumerate(docs):
+            llm.reset()
+            llm(doc.strip(), max_tokens=1, temperature=0.0)
+            s = llm.save_state()
+            p = parse_state_blob(bytes(s.llama_state), n_kv_heads=n_kv_heads, head_dim=head_dim)
+            r = ext.extract(p.keys, spec)
+            vecs.append(r.state_vec)
+            if (i + 1) % 10 == 0:
+                print(f"  {i + 1}/{len(docs)}")
+        del llm
+        gc.collect()
+        return torch.stack(vecs)
+    vecs_a = extract_all(args.model_a, tuple(args.layer_range_a))
+    vecs_b = extract_all(args.model_b, tuple(args.layer_range_b))
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    torch.save({"vecs_a": vecs_a, "vecs_b": vecs_b, "n_docs": len(docs)}, str(output_path))
+    print(f"\nSaved: {output_path} ({vecs_a.shape[0]} docs, dim_a={vecs_a.shape[1]}, dim_b={vecs_b.shape[1]})")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

scripts/index_knowledge.py ADDED Viewed

	@@ -0,0 +1,393 @@

+#!/usr/bin/env python3
+"""
+scripts/index_knowledge.py — Batch index markdown files into .eng binaries.
+Processes markdown files from a directory (or single file), chunks them,
+fingerprints each chunk, and writes .eng files to the knowledge index.
+Usage:
+    # Index a single file
+    python scripts/index_knowledge.py --source path/to/file.md --project engram
+    # Index a directory recursively
+    python scripts/index_knowledge.py --source path/to/docs/ --project engram
+    # Re-index changed files only (incremental)
+    python scripts/index_knowledge.py --source path/to/docs/ --project engram --incremental
+    # Dry run — show what would be indexed
+    python scripts/index_knowledge.py --source path/to/docs/ --project engram --dry-run
+    # Force re-index everything
+    python scripts/index_knowledge.py --source path/to/docs/ --project engram --force
+Environment:
+    ENGRAM_SESSIONS_DIR   Base sessions dir (default: ~/.engram/sessions)
+    ENGRAM_KNOWLEDGE_DIR  Knowledge index dir (default: ~/.engram/knowledge)
+    ENGRAM_MODEL_PATH     Path to GGUF model for real fingerprints (optional)
+    PYTHONPATH=.          Must include project root for kvcos imports
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+# Ensure project root is importable
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import torch
+from kvcos.engram.chunker import Chunk, chunk_markdown, eng_filename, slug_from_path
+from kvcos.engram.format import EigramEncoder
+from kvcos.engram.manifest import ChunkRecord, Manifest, _content_hash, _file_hash
+# ── Configuration ────────────────────────────────────────────────────
+KNOWLEDGE_DIR = Path(
+    os.environ.get("ENGRAM_KNOWLEDGE_DIR", "~/.engram/knowledge")
+).expanduser()
+SKIP_PATTERNS = {
+    "node_modules",
+    ".venv",
+    "__pycache__",
+    ".git",
+    ".eng",
+    "site-packages",
+}
+SKIP_FILES = {
+    "LICENSE.md",
+    "CHANGELOG.md",
+    "SECURITY.md",
+}
+# ── Fingerprinting ──────────────────────────────────────────────────
+from kvcos.engram.embedder import get_fingerprint as _get_fingerprint
+# ── .eng Writer ──────────────────────────────────────────────────────
+_encoder = EigramEncoder()
+def _write_knowledge_eng(
+    fp_tensor: torch.Tensor,
+    chunk: Chunk,
+    eng_path: Path,
+    session_id: str,
+    fp_source: str,
+    source_path: str,
+    project: str,
+    chunk_index: int,
+    chunk_total: int,
+) -> Path:
+    """Write a .eng binary for a knowledge chunk."""
+    dim = fp_tensor.shape[0]
+    basis_rank = 116
+    vec_perdoc = torch.zeros(basis_rank)
+    vec_fcdb = torch.zeros(basis_rank)
+    joint_center = torch.zeros(128)
+    # Truncate description to 256 chars for binary
+    description = chunk.text[:256]
+    blob = _encoder.encode(
+        vec_perdoc=vec_perdoc,
+        vec_fcdb=vec_fcdb,
+        joint_center=joint_center,
+        corpus_hash=hashlib.sha256(source_path.encode()).hexdigest()[:32],
+        model_id=fp_source[:16],
+        basis_rank=basis_rank,
+        n_corpus=0,
+        layer_range=(0, 0),
+        context_len=len(chunk.text),
+        l2_norm=float(torch.norm(fp_tensor).item()),
+        scs=0.0,
+        margin_proof=0.0,
+        task_description=description,
+        cache_id=session_id,
+        vec_fourier=fp_tensor if dim == 2048 else None,
+        vec_fourier_v2=fp_tensor,
+        confusion_flag=False,
+    )
+    eng_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(eng_path, "wb") as f:
+        f.write(blob)
+    # Write extended sidecar with full metadata
+    meta = {
+        "cache_id": session_id,
+        "task_description": chunk.text[:500],
+        "source_path": source_path,
+        "project": project,
+        "fp_source": fp_source,
+        "chunk_index": chunk_index,
+        "chunk_total": chunk_total,
+        "char_start": chunk.char_start,
+        "char_end": chunk.char_end,
+        "headers": list(chunk.headers),
+        "ts": time.time(),
+        "type": "knowledge",
+    }
+    meta_path = Path(str(eng_path) + ".meta.json")
+    with open(meta_path, "w") as f:
+        json.dump(meta, f, indent=2)
+    return eng_path
+# ── Discovery ────────────────────────────────────────────────────────
+def discover_markdown_files(source: Path) -> list[Path]:
+    """Find all indexable .md files under source path."""
+    if source.is_file():
+        return [source] if source.suffix == ".md" else []
+    files: list[Path] = []
+    for p in sorted(source.rglob("*.md")):
+        # Skip files in excluded directories
+        if any(skip in p.parts for skip in SKIP_PATTERNS):
+            continue
+        # Skip excluded filenames
+        if p.name in SKIP_FILES:
+            continue
+        # Skip empty files
+        if p.stat().st_size == 0:
+            continue
+        files.append(p)
+    return files
+# ── Main Pipeline ────────────────────────────────────────────────────
+def index_file(
+    source_path: Path,
+    project: str,
+    manifest: Manifest,
+    date_str: str,
+    dry_run: bool = False,
+    force: bool = False,
+) -> tuple[Manifest, int]:
+    """
+    Index a single markdown file into .eng chunks.
+    Returns:
+        (updated_manifest, chunks_written)
+    """
+    content = source_path.read_text(encoding="utf-8", errors="replace")
+    content_hash = _content_hash(content)
+    # Incremental: skip if unchanged
+    if not force and not manifest.needs_reindex(str(source_path), content_hash):
+        return manifest, 0
+    slug = slug_from_path(str(source_path))
+    context = f"Source: {source_path.name} | Project: {project}"
+    # Chunk the content
+    chunks = chunk_markdown(
+        content,
+        max_chars=2000,
+        min_chars=100,
+        context_prefix=context,
+    )
+    if dry_run:
+        print(f"  [DRY RUN] {source_path.name}: {len(chunks)} chunks, "
+              f"{len(content)} chars")
+        return manifest, len(chunks)
+    # Write .eng for each chunk
+    chunk_records: list[ChunkRecord] = []
+    project_dir = KNOWLEDGE_DIR / project
+    project_dir.mkdir(parents=True, exist_ok=True)
+    for chunk in chunks:
+        filename = eng_filename(
+            project=project,
+            slug=slug,
+            date=date_str,
+            chunk_index=chunk.index,
+            chunk_total=len(chunks),
+        )
+        eng_path = project_dir / filename
+        # Fingerprint the chunk text (with context)
+        fp_tensor, fp_source = _get_fingerprint(chunk.text)
+        session_id = f"{project}/{slug}"
+        if len(chunks) > 1:
+            session_id += f"_c{chunk.index + 1:03d}"
+        _write_knowledge_eng(
+            fp_tensor=fp_tensor,
+            chunk=chunk,
+            eng_path=eng_path,
+            session_id=session_id,
+            fp_source=fp_source,
+            source_path=str(source_path),
+            project=project,
+            chunk_index=chunk.index,
+            chunk_total=len(chunks),
+        )
+        chunk_records.append(ChunkRecord(
+            eng_path=str(eng_path),
+            chunk_index=chunk.index,
+            chunk_total=len(chunks),
+            char_start=chunk.char_start,
+            char_end=chunk.char_end,
+            indexed_at=time.time(),
+        ))
+    # Register in manifest
+    manifest = manifest.register(
+        source_path=str(source_path),
+        content_hash=content_hash,
+        project=project,
+        file_size=len(content.encode("utf-8")),
+        chunks=chunk_records,
+    )
+    return manifest, len(chunks)
+def index_batch(
+    source: Path,
+    project: str,
+    incremental: bool = True,
+    dry_run: bool = False,
+    force: bool = False,
+) -> dict:
+    """
+    Index all markdown files under source path.
+    Returns summary dict with stats.
+    """
+    manifest = Manifest.load()
+    date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    files = discover_markdown_files(source)
+    if not files:
+        return {"error": f"No .md files found under {source}"}
+    stats = {
+        "source": str(source),
+        "project": project,
+        "files_found": len(files),
+        "files_indexed": 0,
+        "files_skipped": 0,
+        "chunks_written": 0,
+        "dry_run": dry_run,
+        "incremental": incremental,
+        "date": date_str,
+    }
+    print(f"\nENGRAM Knowledge Indexer")
+    print(f"{'=' * 50}")
+    print(f"Source:      {source}")
+    print(f"Project:     {project}")
+    print(f"Files found: {len(files)}")
+    print(f"Mode:        {'DRY RUN' if dry_run else 'LIVE'}")
+    print(f"{'=' * 50}\n")
+    for i, fpath in enumerate(files, 1):
+        prev_chunks = manifest.total_chunks
+        manifest, n_chunks = index_file(
+            source_path=fpath,
+            project=project,
+            manifest=manifest,
+            date_str=date_str,
+            dry_run=dry_run,
+            force=force,
+        )
+        if n_chunks > 0:
+            stats["files_indexed"] += 1
+            stats["chunks_written"] += n_chunks
+            status = "INDEXED" if not dry_run else "DRY RUN"
+            print(f"  [{i}/{len(files)}] {status}: {fpath.name} "
+                  f"→ {n_chunks} chunks")
+        else:
+            stats["files_skipped"] += 1
+            print(f"  [{i}/{len(files)}] SKIP (unchanged): {fpath.name}")
+    print(f"\n{'=' * 50}")
+    print(f"Done. {stats['files_indexed']} files → "
+          f"{stats['chunks_written']} chunks")
+    if stats["files_skipped"]:
+        print(f"Skipped {stats['files_skipped']} unchanged files")
+    print(f"Manifest: {manifest.summary()}")
+    print(f"{'=' * 50}\n")
+    return stats
+# ── CLI ──────────────────────────────────────────────────────────────
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Index markdown files into ENGRAM .eng knowledge files"
+    )
+    parser.add_argument(
+        "--source", "-s",
+        required=True,
+        help="Path to file or directory to index",
+    )
+    parser.add_argument(
+        "--project", "-p",
+        default="engram",
+        help="Project namespace (default: engram)",
+    )
+    parser.add_argument(
+        "--dry-run", "-n",
+        action="store_true",
+        help="Show what would be indexed without writing",
+    )
+    parser.add_argument(
+        "--force", "-f",
+        action="store_true",
+        help="Re-index all files regardless of content hash",
+    )
+    parser.add_argument(
+        "--incremental", "-i",
+        action="store_true",
+        default=True,
+        help="Skip unchanged files (default: true)",
+    )
+    args = parser.parse_args()
+    source = Path(args.source).resolve()
+    if not source.exists():
+        print(f"Error: {source} does not exist", file=sys.stderr)
+        sys.exit(1)
+    stats = index_batch(
+        source=source,
+        project=args.project,
+        incremental=args.incremental,
+        dry_run=args.dry_run,
+        force=args.force,
+    )
+    if "error" in stats:
+        print(f"Error: {stats['error']}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

scripts/paper_figures.py ADDED Viewed

	@@ -0,0 +1,1084 @@

+#!/usr/bin/env python3
+"""ENGRAM Research Paper — Figure Generation.
+Generates all 15 figures for the ENGRAM paper from results/ data files.
+Output: results/figures/*.pdf (LaTeX-compatible, 300 DPI)
+Usage:
+    cd ENGRAM && python scripts/paper_figures.py
+    python scripts/paper_figures.py --only fig02   # Single figure
+    python scripts/paper_figures.py --list          # List all figures
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any
+import matplotlib
+matplotlib.use("Agg")  # Non-interactive backend
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+# ── Configuration ────────────────────────────────────────────────────────
+RESULTS_DIR = Path(__file__).parent.parent / "results"
+FIGURES_DIR = RESULTS_DIR / "figures"
+ABSOLUTE_DIR = RESULTS_DIR / "absolute"
+STRESS_DIR = RESULTS_DIR / "stress"
+# LaTeX-compatible style
+plt.rcParams.update({
+    "font.family": "serif",
+    "font.size": 11,
+    "axes.labelsize": 12,
+    "axes.titlesize": 13,
+    "xtick.labelsize": 10,
+    "ytick.labelsize": 10,
+    "legend.fontsize": 10,
+    "figure.dpi": 300,
+    "savefig.dpi": 300,
+    "savefig.bbox": "tight",
+    "savefig.pad_inches": 0.1,
+    "axes.grid": True,
+    "grid.alpha": 0.3,
+    "axes.spines.top": False,
+    "axes.spines.right": False,
+})
+# Colorblind-safe palette
+COLORS = {
+    "blue": "#4477AA",
+    "orange": "#EE6677",
+    "green": "#228833",
+    "purple": "#AA3377",
+    "cyan": "#66CCEE",
+    "grey": "#BBBBBB",
+    "red": "#CC3311",
+    "teal": "#009988",
+    "yellow": "#CCBB44",
+    "indigo": "#332288",
+}
+PASS_COLOR = COLORS["green"]
+FAIL_COLOR = COLORS["red"]
+# ── Data Loading ─────────────────────────────────────────────────────────
+def load_json(path: Path) -> dict[str, Any]:
+    """Load JSON file and return parsed dict."""
+    return json.loads(path.read_text())
+def save_figure(fig: plt.Figure, name: str) -> None:
+    """Save figure as PDF and PNG."""
+    FIGURES_DIR.mkdir(parents=True, exist_ok=True)
+    fig.savefig(FIGURES_DIR / f"{name}.pdf", format="pdf")
+    fig.savefig(FIGURES_DIR / f"{name}.png", format="png")
+    plt.close(fig)
+    print(f"  Saved: {name}.pdf + .png")
+# ── Figure 2: Frequency Combination Comparison ──────────────────────────
+def fig02_frequency_comparison() -> None:
+    """Bar chart: 6 frequency combos × recall and margin."""
+    print("Fig 02: Frequency combination comparison...")
+    data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json")
+    results = data["results"]
+    combos = list(results.keys())
+    recalls = [results[c]["recall"] * 100 for c in combos]
+    margins = [results[c]["margin_mean"] * 1000 for c in combos]  # ×1000
+    failures = [results[c]["n_failures"] for c in combos]
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5))
+    # Left: Recall
+    x = np.arange(len(combos))
+    bar_colors = [COLORS["green"] if c == "f0+f1" else COLORS["blue"] for c in combos]
+    bars = ax1.bar(x, recalls, color=bar_colors, edgecolor="white", linewidth=0.5)
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(combos, rotation=30, ha="right")
+    ax1.set_ylabel("Recall@1 (%)")
+    ax1.set_title("(a) Recall by Frequency Combination")
+    ax1.set_ylim(60, 102)
+    for bar, val, nf in zip(bars, recalls, failures):
+        ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5,
+                 f"{val:.0f}%\n({nf} fail)", ha="center", va="bottom", fontsize=8)
+    # Right: Mean margin
+    bars2 = ax2.bar(x, margins, color=bar_colors, edgecolor="white", linewidth=0.5)
+    ax2.set_xticks(x)
+    ax2.set_xticklabels(combos, rotation=30, ha="right")
+    ax2.set_ylabel("Mean Margin (×10³)")
+    ax2.set_title("(b) Mean Discrimination Margin")
+    for bar, val in zip(bars2, margins):
+        ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05,
+                 f"{val:.1f}", ha="center", va="bottom", fontsize=8)
+    fig.suptitle("Multi-Frequency Fingerprint Ablation (N=200)", fontsize=14, y=1.02)
+    fig.tight_layout()
+    save_figure(fig, "fig02_frequency_comparison")
+# ── Figure 3: Margin Power Law ──────────────────────────────────────────
+def fig03_margin_power_law() -> None:
+    """Log-log plot: margin vs N for f1 and f0+f1 with fitted power laws."""
+    print("Fig 03: Margin power law...")
+    f1_data = load_json(ABSOLUTE_DIR / "margin_compression_law.json")
+    f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json")
+    # f1 data
+    f1_n = [int(n) for n in f1_data["results"].keys()]
+    f1_margins = [f1_data["results"][str(n)]["mean_margin"] for n in f1_n]
+    f1_alpha = f1_data["alpha"]
+    f1_A = f1_data["A"]
+    # f0+f1 data
+    f0f1_n = [int(n) for n in f0f1_data["results"].keys()]
+    f0f1_margins = [f0f1_data["results"][str(n)]["mean_margin"] for n in f0f1_n]
+    f0f1_alpha = f0f1_data["alpha"]
+    f0f1_A = f0f1_data["A"]
+    fig, ax = plt.subplots(figsize=(7, 5))
+    # Data points
+    ax.scatter(f1_n, f1_margins, color=COLORS["orange"], s=60, zorder=5, label="f1 (data)")
+    ax.scatter(f0f1_n, f0f1_margins, color=COLORS["blue"], s=60, zorder=5, label="f0+f1 (data)")
+    # Fitted curves
+    n_fit = np.linspace(3, 250, 200)
+    f1_fit = f1_A * n_fit ** f1_alpha
+    f0f1_fit = f0f1_A * n_fit ** f0f1_alpha
+    ax.plot(n_fit, f1_fit, color=COLORS["orange"], linestyle="--", alpha=0.7,
+            label=f"f1 fit: {f1_A:.4f}·N^{{{f1_alpha:.3f}}}")
+    ax.plot(n_fit, f0f1_fit, color=COLORS["blue"], linestyle="--", alpha=0.7,
+            label=f"f0+f1 fit: {f0f1_A:.4f}·N^{{{f0f1_alpha:.3f}}}")
+    ax.set_xscale("log")
+    ax.set_yscale("log")
+    ax.set_xlabel("Corpus Size N")
+    ax.set_ylabel("Mean Discrimination Margin")
+    ax.set_title("Margin Power Law: Graceful Degradation")
+    ax.legend(loc="upper right")
+    ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
+    ax.set_xticks([5, 10, 20, 50, 100, 200])
+    # Annotation
+    ax.annotate(
+        f"f0+f1: α={f0f1_alpha:.3f} (shallower)\nf1: α={f1_alpha:.3f}",
+        xy=(100, f0f1_A * 100 ** f0f1_alpha), xytext=(30, 0.003),
+        arrowprops={"arrowstyle": "->", "color": COLORS["grey"]},
+        fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "wheat", "alpha": 0.5}
+    )
+    fig.tight_layout()
+    save_figure(fig, "fig03_margin_power_law")
+# ── Figure 4: Recall vs N — Fourier vs FCDB ─────────────────────────────
+def fig04_recall_vs_n() -> None:
+    """Fourier f0+f1 recall vs FCDB recall across corpus sizes."""
+    print("Fig 04: Recall vs N (Fourier vs FCDB)...")
+    f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json")
+    stress_data = load_json(STRESS_DIR / "STRESS_SUMMARY.json")
+    # Fourier f0+f1
+    fourier_n = [int(n) for n in f0f1_data["results"].keys()]
+    fourier_recall = [f0f1_data["results"][str(n)]["recall"] * 100 for n in fourier_n]
+    # FCDB cross-model
+    fcdb_map = stress_data["recall_at_1_vs_n_fcdb"]
+    fcdb_n = [int(n) for n in fcdb_map.keys()]
+    fcdb_recall = [v * 100 for v in fcdb_map.values()]
+    fig, ax = plt.subplots(figsize=(7, 5))
+    ax.plot(fourier_n, fourier_recall, "o-", color=COLORS["blue"], linewidth=2,
+            markersize=7, label="Fourier f0+f1 (same-model)", zorder=5)
+    ax.plot(fcdb_n, fcdb_recall, "s--", color=COLORS["orange"], linewidth=2,
+            markersize=7, label="FCDB (cross-model)", zorder=5)
+    # Collapse annotation
+    ax.axvline(x=100, color=COLORS["red"], linestyle=":", alpha=0.5)
+    ax.annotate("FCDB collapse\n(N=100)", xy=(100, 30), xytext=(140, 50),
+                arrowprops={"arrowstyle": "->", "color": COLORS["red"]},
+                fontsize=9, color=COLORS["red"])
+    ax.set_xlabel("Corpus Size N")
+    ax.set_ylabel("Recall@1 (%)")
+    ax.set_title("Retrieval Recall vs Corpus Size")
+    ax.legend(loc="lower left")
+    ax.set_ylim(-5, 105)
+    ax.set_xlim(0, 210)
+    fig.tight_layout()
+    save_figure(fig, "fig04_recall_vs_n")
+# ── Figure 5: Cross-Model Strategy Comparison ───────────────────────────
+def fig05_cross_model_strategies() -> None:
+    """Horizontal bar chart: 9 cross-model methods × margin."""
+    print("Fig 05: Cross-model strategy comparison...")
+    strategies = [
+        ("CCA", -0.420, False),
+        ("Residual FCB", -0.382, False),
+        ("Procrustes", -0.104, False),
+        ("RR (K=20)", -0.066, False),
+        ("FCB+ridge", -0.017, False),
+        ("Contrastive", 0.001, True),
+        ("JCB", 0.011, True),
+        ("JCB+delta", 0.037, True),
+        ("FCDB", 0.124, True),
+    ]
+    names = [s[0] for s in strategies]
+    margins = [s[1] for s in strategies]
+    colors = [PASS_COLOR if s[2] else FAIL_COLOR for s in strategies]
+    fig, ax = plt.subplots(figsize=(8, 5))
+    y_pos = np.arange(len(names))
+    bars = ax.barh(y_pos, margins, color=colors, edgecolor="white", linewidth=0.5, height=0.7)
+    ax.set_yticks(y_pos)
+    ax.set_yticklabels(names)
+    ax.set_xlabel("Retrieval Margin")
+    ax.set_title("Cross-Model Transfer Strategies (Llama 3B → 8B)")
+    ax.axvline(x=0, color="black", linewidth=0.8)
+    # Value labels
+    for bar, val in zip(bars, margins):
+        x_offset = 0.005 if val >= 0 else -0.005
+        ha = "left" if val >= 0 else "right"
+        ax.text(val + x_offset, bar.get_y() + bar.get_height() / 2,
+                f"{val:+.3f}", ha=ha, va="center", fontsize=9, fontweight="bold")
+    # Legend
+    from matplotlib.patches import Patch
+    legend_elements = [Patch(facecolor=PASS_COLOR, label="PASS (margin > 0)"),
+                       Patch(facecolor=FAIL_COLOR, label="FAIL (margin ≤ 0)")]
+    ax.legend(handles=legend_elements, loc="lower right")
+    fig.tight_layout()
+    save_figure(fig, "fig05_cross_model_strategies")
+# ── Figure 6: CKA Layer Similarity ──────────────────────────────────────
+def fig06_cka_layers() -> None:
+    """CKA similarity per layer: within-family vs cross-family."""
+    print("Fig 06: CKA layer similarity...")
+    within = load_json(ABSOLUTE_DIR / "FAMILY_CKA.json")
+    cross = load_json(ABSOLUTE_DIR / "FAMILY_CKA_CROSS.json")
+    within_cka = within["layer_ckas"]
+    cross_cka = cross["layer_ckas"]
+    layers = list(range(len(within_cka)))
+    fig, ax = plt.subplots(figsize=(8, 4.5))
+    ax.plot(layers, within_cka, "o-", color=COLORS["blue"], markersize=5, linewidth=1.5,
+            label=f"Within-family (Llama 3B↔8B), μ={within['mean_cka']:.3f}")
+    ax.plot(layers, cross_cka, "s--", color=COLORS["orange"], markersize=5, linewidth=1.5,
+            label=f"Cross-family (Llama↔Qwen), μ={cross['mean_cka']:.3f}")
+    ax.axhline(y=0.95, color=COLORS["grey"], linestyle=":", alpha=0.5, label="0.95 threshold")
+    ax.set_xlabel("Layer Index")
+    ax.set_ylabel("CKA Similarity")
+    ax.set_title("Centered Kernel Alignment Across Layers")
+    ax.legend(loc="lower left", fontsize=9)
+    ax.set_ylim(0.85, 1.0)
+    # Annotate min
+    min_idx_w = int(np.argmin(within_cka))
+    min_idx_c = int(np.argmin(cross_cka))
+    ax.annotate(f"min={within_cka[min_idx_w]:.3f}", xy=(min_idx_w, within_cka[min_idx_w]),
+                xytext=(min_idx_w + 2, within_cka[min_idx_w] - 0.01),
+                fontsize=8, color=COLORS["blue"])
+    ax.annotate(f"min={cross_cka[min_idx_c]:.3f}", xy=(min_idx_c, cross_cka[min_idx_c]),
+                xytext=(min_idx_c + 2, cross_cka[min_idx_c] - 0.01),
+                fontsize=8, color=COLORS["orange"])
+    fig.tight_layout()
+    save_figure(fig, "fig06_cka_layers")
+# ── Figure 7: Domain Confusion Before/After ──────────────────────────────
+def fig07_confusion_matrix() -> None:
+    """Heatmaps: f1 confusion vs f0+f1 confusion across domains."""
+    print("Fig 07: Domain confusion matrix...")
+    data = load_json(ABSOLUTE_DIR / "confusion_analysis.json")
+    domains = sorted({
+        k.split(" -> ")[0] for k in data["f1_confusion"].keys()
+    } | {
+        k.split(" -> ")[1] for k in data["f1_confusion"].keys()
+    })
+    def build_matrix(confusion_dict: dict[str, int]) -> np.ndarray:
+        n = len(domains)
+        mat = np.zeros((n, n))
+        for key, count in confusion_dict.items():
+            src, dst = key.split(" -> ")
+            if src in domains and dst in domains:
+                i = domains.index(src)
+                j = domains.index(dst)
+                mat[i, j] = count
+        return mat
+    f1_mat = build_matrix(data["f1_confusion"])
+    best_mat = build_matrix(data["best_confusion"])
+    # Short domain labels
+    short_labels = [d[:6] for d in domains]
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+    im1 = ax1.imshow(f1_mat, cmap="Reds", aspect="auto", interpolation="nearest")
+    ax1.set_xticks(range(len(domains)))
+    ax1.set_yticks(range(len(domains)))
+    ax1.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8)
+    ax1.set_yticklabels(short_labels, fontsize=8)
+    ax1.set_title("(a) f1 Only — 28 Failures")
+    ax1.set_xlabel("Confused With")
+    ax1.set_ylabel("True Domain")
+    fig.colorbar(im1, ax=ax1, shrink=0.8)
+    im2 = ax2.imshow(best_mat, cmap="Blues", aspect="auto", interpolation="nearest")
+    ax2.set_xticks(range(len(domains)))
+    ax2.set_yticks(range(len(domains)))
+    ax2.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8)
+    ax2.set_yticklabels(short_labels, fontsize=8)
+    ax2.set_title("(b) f0+f1 — 4 Failures")
+    ax2.set_xlabel("Confused With")
+    ax2.set_ylabel("True Domain")
+    fig.colorbar(im2, ax=ax2, shrink=0.8)
+    fig.suptitle("Domain Confusion Analysis (N=200)", fontsize=14, y=1.02)
+    fig.tight_layout()
+    save_figure(fig, "fig07_confusion_matrix")
+# ── Figure 8: Domain Recall Radar ────────────────────────────────────────
+def fig08_domain_recall_radar() -> None:
+    """Radar chart: per-domain recall with f0+f1."""
+    print("Fig 08: Domain recall radar...")
+    data = load_json(ABSOLUTE_DIR / "confusion_analysis.json")
+    domain_recall = data["domain_recall"]
+    categories = list(domain_recall.keys())
+    values = [domain_recall[c] * 100 for c in categories]
+    # Close the polygon
+    values_closed = values + [values[0]]
+    n = len(categories)
+    angles = [i / n * 2 * np.pi for i in range(n)]
+    angles_closed = angles + [angles[0]]
+    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"projection": "polar"})
+    ax.plot(angles_closed, values_closed, "o-", color=COLORS["blue"], linewidth=2, markersize=6)
+    ax.fill(angles_closed, values_closed, color=COLORS["blue"], alpha=0.15)
+    ax.set_xticks(angles)
+    ax.set_xticklabels([c.replace("_", "\n") for c in categories], fontsize=9)
+    ax.set_ylim(80, 102)
+    ax.set_yticks([85, 90, 95, 100])
+    ax.set_yticklabels(["85%", "90%", "95%", "100%"], fontsize=8)
+    ax.set_title("Per-Domain Recall@1 (f0+f1, N=200)", pad=20)
+    # Annotate minimum
+    min_idx = int(np.argmin(values))
+    ax.annotate(f"{values[min_idx]:.0f}%",
+                xy=(angles[min_idx], values[min_idx]),
+                xytext=(angles[min_idx] + 0.2, values[min_idx] - 3),
+                fontsize=9, fontweight="bold", color=COLORS["red"])
+    fig.tight_layout()
+    save_figure(fig, "fig08_domain_recall_radar")
+# ── Figure 9: HNSW Benchmark ────────────────────────────────────────────
+def fig09_hnsw_benchmark() -> None:
+    """Bar chart: HNSW vs brute-force latency."""
+    print("Fig 09: HNSW benchmark...")
+    data = load_json(ABSOLUTE_DIR / "HNSW_BENCH.json")
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
+    # Latency comparison
+    methods = ["Brute-Force", "HNSW"]
+    latencies = [data["bf_latency_us"], data["hnsw_latency_us"]]
+    colors = [COLORS["orange"], COLORS["blue"]]
+    bars = ax1.bar(methods, latencies, color=colors, edgecolor="white", width=0.5)
+    ax1.set_ylabel("Latency (μs)")
+    ax1.set_title(f"(a) Search Latency — {data['speedup']:.1f}× Speedup")
+    for bar, val in zip(bars, latencies):
+        ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 3,
+                 f"{val:.1f} μs", ha="center", va="bottom", fontsize=10)
+    # Recall comparison
+    recalls = [data["bruteforce_recall"] * 100, data["hnsw_recall"] * 100]
+    bars2 = ax2.bar(methods, recalls, color=colors, edgecolor="white", width=0.5)
+    ax2.set_ylabel("Recall@1 (%)")
+    ax2.set_title("(b) Recall Preserved")
+    ax2.set_ylim(98, 100.5)
+    for bar, val in zip(bars2, recalls):
+        ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05,
+                 f"{val:.1f}%", ha="center", va="bottom", fontsize=10)
+    fig.suptitle("HNSW Index Benchmark (N=200)", fontsize=14, y=1.02)
+    fig.tight_layout()
+    save_figure(fig, "fig09_hnsw_benchmark")
+# ── Figure 10: INT8 Compression ──────────────────────────────────────────
+def fig10_int8_compression() -> None:
+    """Bar chart: FP16 vs INT8 comparison."""
+    print("Fig 10: INT8 compression...")
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
+    # Size comparison
+    configs = ["591 tok", "6,403 tok"]
+    fp16_sizes = [73.9, 800.4]
+    int8_sizes = [37.5, 406.5]
+    x = np.arange(len(configs))
+    w = 0.35
+    ax1.bar(x - w / 2, fp16_sizes, w, label="FP16", color=COLORS["orange"], edgecolor="white")
+    ax1.bar(x + w / 2, int8_sizes, w, label="INT8", color=COLORS["blue"], edgecolor="white")
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(configs)
+    ax1.set_ylabel("File Size (MB)")
+    ax1.set_title("(a) .eng File Size — 1.97× Compression")
+    ax1.legend()
+    # Quality metrics
+    metrics = ["Cosine\nSimilarity", "Margin\n(FP16)", "Margin\n(INT8)"]
+    values = [0.99998, 0.381, 0.262]
+    bar_colors = [COLORS["green"], COLORS["blue"], COLORS["cyan"]]
+    bars = ax2.bar(metrics, values, color=bar_colors, edgecolor="white", width=0.5)
+    ax2.set_ylabel("Value")
+    ax2.set_title("(b) Quality Preservation")
+    for bar, val in zip(bars, values):
+        ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01,
+                 f"{val:.5f}" if val > 0.9 else f"{val:.3f}",
+                 ha="center", va="bottom", fontsize=9)
+    fig.suptitle("INT8 Quantization Impact", fontsize=14, y=1.02)
+    fig.tight_layout()
+    save_figure(fig, "fig10_int8_compression")
+# ── Figure 12: Margin Distribution ───────────────────────────────────────
+def fig12_margin_distribution() -> None:
+    """Distribution comparison: f1 vs f0+f1 summary statistics."""
+    print("Fig 12: Margin distribution...")
+    data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json")
+    results = data["results"]
+    fig, ax = plt.subplots(figsize=(7, 4.5))
+    # We'll show key statistics as a visualization
+    combos = ["f1", "f0+f1"]
+    means = [results[c]["margin_mean"] * 1000 for c in combos]
+    medians = [results[c]["margin_median"] * 1000 for c in combos]
+    mins = [results[c]["margin_min"] * 1000 for c in combos]
+    x = np.arange(len(combos))
+    w = 0.25
+    ax.bar(x - w, means, w, label="Mean", color=COLORS["blue"], edgecolor="white")
+    ax.bar(x, medians, w, label="Median", color=COLORS["green"], edgecolor="white")
+    ax.bar(x + w, mins, w, label="Min", color=COLORS["red"], edgecolor="white")
+    ax.set_xticks(x)
+    ax.set_xticklabels(combos, fontsize=12)
+    ax.set_ylabel("Margin (×10³)")
+    ax.set_title("Margin Statistics: f1 vs f0+f1 (N=200)")
+    ax.legend()
+    ax.axhline(y=0, color="black", linewidth=0.5)
+    # Annotate improvement
+    ax.annotate(
+        f"+76% mean margin\n25/28 failures fixed",
+        xy=(1, means[1]), xytext=(1.3, means[1] + 1),
+        arrowprops={"arrowstyle": "->", "color": COLORS["green"]},
+        fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "#e6ffe6", "alpha": 0.8}
+    )
+    fig.tight_layout()
+    save_figure(fig, "fig12_margin_distribution")
+# ── Figure 13: FCDB Stability-Discrimination Tradeoff ────────────────────
+def fig13_fcdb_tradeoff() -> None:
+    """Dual-axis: basis stability vs retrieval margin vs corpus size."""
+    print("Fig 13: FCDB stability-discrimination tradeoff...")
+    # Data from PAPER_TABLE.md
+    n_vals = [50, 100, 125, 200]
+    stability = [0.82, 0.906, 0.983, 0.999]  # subspace agreement
+    margin = [0.124, None, None, 0.013]  # Only measured at 50 and 200
+    margin_n = [50, 200]
+    margin_v = [0.124, 0.013]
+    fig, ax1 = plt.subplots(figsize=(7, 5))
+    ax2 = ax1.twinx()
+    # Stability (left axis)
+    line1 = ax1.plot(n_vals, stability, "o-", color=COLORS["blue"], linewidth=2,
+                     markersize=8, label="Basis Stability", zorder=5)
+    ax1.set_xlabel("Corpus Size N")
+    ax1.set_ylabel("Subspace Agreement", color=COLORS["blue"])
+    ax1.tick_params(axis="y", labelcolor=COLORS["blue"])
+    ax1.set_ylim(0.7, 1.05)
+    # Margin (right axis)
+    line2 = ax2.plot(margin_n, margin_v, "s--", color=COLORS["orange"], linewidth=2,
+                     markersize=8, label="Retrieval Margin", zorder=5)
+    ax2.set_ylabel("Cross-Model Margin", color=COLORS["orange"])
+    ax2.tick_params(axis="y", labelcolor=COLORS["orange"])
+    ax2.set_ylim(-0.01, 0.15)
+    # Threshold line
+    ax1.axhline(y=0.99, color=COLORS["grey"], linestyle=":", alpha=0.5)
+    ax1.annotate("Stable (≥0.99)", xy=(125, 0.99), fontsize=8, color=COLORS["grey"])
+    # Combined legend
+    lines = line1 + line2
+    labels = [l.get_label() for l in lines]
+    ax1.legend(lines, labels, loc="center left")
+    ax1.set_title("FCDB Stability–Discrimination Tradeoff")
+    fig.tight_layout()
+    save_figure(fig, "fig13_fcdb_tradeoff")
+# ── Figure 14: TTFT Speedup ─────────────────────────────────────────────
+def fig14_ttft_speedup() -> None:
+    """Grouped bar chart: cold vs warm TTFT."""
+    print("Fig 14: TTFT speedup...")
+    configs = ["3B / 4K tok", "3B / 16K tok", "8B / 591 tok"]
+    cold_ttft = [11439, 94592, 3508]  # ms
+    warm_ttft = [170, 1777, 116]  # ms
+    speedups = [67.2, 53.2, 30.8]
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5))
+    x = np.arange(len(configs))
+    w = 0.35
+    ax1.bar(x - w / 2, cold_ttft, w, label="Cold TTFT", color=COLORS["orange"], edgecolor="white")
+    ax1.bar(x + w / 2, warm_ttft, w, label="Warm TTFT", color=COLORS["blue"], edgecolor="white")
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(configs, fontsize=9)
+    ax1.set_ylabel("TTFT (ms)")
+    ax1.set_title("(a) Time to First Token")
+    ax1.set_yscale("log")
+    ax1.legend()
+    # Speedup bars
+    bars = ax2.bar(configs, speedups, color=COLORS["green"], edgecolor="white", width=0.5)
+    ax2.set_ylabel("Speedup (×)")
+    ax2.set_title("(b) KV Cache Restoration Speedup")
+    ax2.set_xticklabels(configs, fontsize=9)
+    for bar, val in zip(bars, speedups):
+        ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5,
+                 f"{val:.1f}×", ha="center", va="bottom", fontsize=10, fontweight="bold")
+    fig.suptitle("KV Cache Warm Start Performance", fontsize=14, y=1.02)
+    fig.tight_layout()
+    save_figure(fig, "fig14_ttft_speedup")
+# ── Figure 15: EGR Overhead Scaling ──────────────────────────────────────
+def fig15_egr_overhead() -> None:
+    """Scatter/line: EGR overhead vs token count."""
+    print("Fig 15: EGR overhead scaling...")
+    tokens = [600, 6403, 600]
+    overhead_ms = [30.6, 48.8, 84.0]
+    labels = ["16 layers\n(8-24)", "16 layers\n(8-24)", "32 layers\n(all)"]
+    colors_pts = [COLORS["blue"], COLORS["blue"], COLORS["orange"]]
+    fig, ax = plt.subplots(figsize=(6, 4.5))
+    for t, o, l, c in zip(tokens, overhead_ms, labels, colors_pts):
+        ax.scatter(t, o, s=100, color=c, zorder=5, edgecolor="white", linewidth=1.5)
+        ax.annotate(l, xy=(t, o), xytext=(t + 200, o + 2), fontsize=9)
+    ax.set_xlabel("Context Length (tokens)")
+    ax.set_ylabel("EGR Overhead (ms)")
+    ax.set_title("Fingerprint Extraction Overhead")
+    ax.set_xlim(0, 7000)
+    ax.set_ylim(20, 95)
+    # Reference lines
+    ax.axhline(y=50, color=COLORS["grey"], linestyle=":", alpha=0.3)
+    ax.text(100, 51, "50ms threshold", fontsize=8, color=COLORS["grey"])
+    fig.tight_layout()
+    save_figure(fig, "fig15_egr_overhead")
+# ── Figure 1: Architecture Diagram (Mermaid) ────────────────────────────
+def fig01_architecture_mermaid() -> None:
+    """Generate Mermaid flowchart for system architecture."""
+    print("Fig 01: Architecture diagram (Mermaid)...")
+    mermaid = """\
+%%{init: {'theme': 'base', 'themeVariables': {'primaryColor': '#4477AA', 'primaryTextColor': '#fff', 'primaryBorderColor': '#335588', 'lineColor': '#666', 'secondaryColor': '#EE6677', 'tertiaryColor': '#228833'}}}%%
+flowchart TD
+    A[LLM Runtime<br/>llama.cpp] -->|KV cache blob| B[Blob Parser]
+    B -->|Layer keys K| C[Fourier Fingerprint<br/>f0+f1 DFT]
+    C -->|2048-dim vector| D{Storage}
+    D -->|.eng binary| E[EIGENGRAM File<br/>v1.2 format]
+    D -->|HNSW index| F[FAISS IndexHNSW<br/>M=32]
+    G[Query Session] -->|New KV cache| C
+    C -->|Query fingerprint| H[Geodesic Retrieval]
+    F -->|Top-k candidates| H
+    H --> I{Stage 0<br/>Prior Check}
+    I -->|chronic failure| J[Skip / LOW]
+    I -->|ok| K{Stage 1<br/>HNSW Search}
+    K -->|HIGH / MEDIUM| L[Result]
+    K -->|below threshold| M{Stage 2<br/>Trajectory}
+    M -->|interpolation| N{Stage 3<br/>Constraints}
+    N --> O{Stage 4<br/>Metadata}
+    O --> L
+    subgraph Confidence Tracking
+        P[IndexC<br/>SQLite] ---|update| I
+        L ---|record| P
+    end
+    style A fill:#4477AA,stroke:#335588,color:#fff
+    style C fill:#228833,stroke:#1a6625,color:#fff
+    style E fill:#EE6677,stroke:#cc5566,color:#fff
+    style F fill:#66CCEE,stroke:#55aabb,color:#000
+    style H fill:#AA3377,stroke:#882266,color:#fff
+"""
+    mermaid_path = FIGURES_DIR / "fig01_architecture.mmd"
+    mermaid_path.write_text(mermaid)
+    print(f"  Saved: fig01_architecture.mmd")
+# ── Figure 11: Retrieval Pipeline (Mermaid) ──────────────────────────────
+def fig11_retrieval_pipeline_mermaid() -> None:
+    """Generate Mermaid diagram for 4-stage geodesic retrieval."""
+    print("Fig 11: Retrieval pipeline (Mermaid)...")
+    mermaid = """\
+%%{init: {'theme': 'base'}}%%
+flowchart LR
+    Q[Query<br/>Fingerprint] --> S0
+    S0[Stage 0<br/>Prior Preemption<br/><i>IndexC chronic<br/>failure check</i>]
+    S0 -->|"pass"| S1
+    S0 -->|"preempt"| SKIP[SKIP<br/>confidence=LOW]
+    S1[Stage 1<br/>HNSW Search<br/><i>cosine top-k</i>]
+    S1 -->|"margin > 0.005"| HIGH[HIGH<br/>199/200 docs]
+    S1 -->|"margin 0.001-0.005"| MED[MEDIUM]
+    S1 -->|"margin < 0.001"| S2
+    S2[Stage 2<br/>Trajectory<br/><i>interpolation<br/>w=0.3</i>]
+    S2 --> S3
+    S3[Stage 3<br/>Negative<br/>Constraints<br/><i>apophatic layer</i>]
+    S3 --> S4
+    S4[Stage 4<br/>Metadata<br/>Disambig<br/><i>domain + keywords<br/>+ norms</i>]
+    S4 --> LOW[LOW<br/>1/200 docs<br/><i>doc_146</i>]
+    style S0 fill:#66CCEE,stroke:#55aabb
+    style S1 fill:#4477AA,stroke:#335588,color:#fff
+    style S2 fill:#CCBB44,stroke:#aa9933
+    style S3 fill:#EE6677,stroke:#cc5566,color:#fff
+    style S4 fill:#AA3377,stroke:#882266,color:#fff
+    style HIGH fill:#228833,stroke:#1a6625,color:#fff
+    style MED fill:#CCBB44,stroke:#aa9933
+    style LOW fill:#EE6677,stroke:#cc5566,color:#fff
+    style SKIP fill:#BBBBBB,stroke:#999999
+"""
+    mermaid_path = FIGURES_DIR / "fig11_retrieval_pipeline.mmd"
+    mermaid_path.write_text(mermaid)
+    print(f"  Saved: fig11_retrieval_pipeline.mmd")
+# ── Consolidated Findings JSON ───────────────────────────────────────────
+def generate_findings() -> None:
+    """Consolidate all key metrics into a single findings.json."""
+    print("Generating consolidated findings...")
+    findings = {
+        "title": "ENGRAM Protocol — Consolidated Research Findings",
+        "date": "2026-04-03",
+        "hardware": {
+            "platform": "Apple M3, 24GB RAM",
+            "gpu": "Metal (n_gpu_layers=-1)",
+            "os": "macOS Darwin 25.4.0",
+            "llama_cpp": "0.3.19",
+            "faiss": "1.13.2",
+            "torch": "2.11.0",
+        },
+        "same_model_retrieval": {
+            "method": "Fourier f0+f1 fingerprint",
+            "corpus_size": 200,
+            "n_domains": 10,
+            "recall_at_1": 0.98,
+            "n_failures": 4,
+            "mean_margin": 0.007201,
+            "margin_power_law": {"A": 0.021342, "alpha": -0.2065},
+            "f1_only_recall": 0.86,
+            "f1_only_failures": 28,
+            "improvement_over_f1": "25/28 failures fixed (+76% mean margin)",
+            "ml_math_confusion_reduction": "81.5%",
+        },
+        "frequency_ablation": {
+            "combos_tested": 6,
+            "best": "f0+f1",
+            "results": {
+                "f1": {"recall": 0.86, "margin": 0.004087},
+                "f2": {"recall": 0.715, "margin": 0.002196},
+                "f1+f2": {"recall": 0.95, "margin": 0.004744},
+                "f1+f2+f3": {"recall": 0.95, "margin": 0.004129},
+                "f0+f1": {"recall": 0.98, "margin": 0.007201},
+                "f1+f3": {"recall": 0.89, "margin": 0.003477},
+            },
+        },
+        "hnsw_index": {
+            "speedup": 5.65,
+            "recall": 0.995,
+            "latency_us": 51.83,
+            "bruteforce_latency_us": 293.07,
+        },
+        "geodesic_retrieval": {
+            "stages": 4,
+            "final_recall": 1.0,
+            "n_high": 0,
+            "n_medium": 199,
+            "n_low": 1,
+            "hard_failure": "doc_146 (resolved by Stage 4 metadata)",
+        },
+        "int8_compression": {
+            "ratio": 1.97,
+            "cosine_similarity": 0.99998,
+            "margin_fp16": 0.381,
+            "margin_int8": 0.262,
+            "margin_preserved": True,
+        },
+        "ttft_speedup": {
+            "3b_4k": {"cold_ms": 11439, "warm_ms": 170, "speedup": 67.2},
+            "3b_16k": {"cold_ms": 94592, "warm_ms": 1777, "speedup": 53.2},
+            "8b_591": {"cold_ms": 3508, "warm_ms": 116, "speedup": 30.8},
+        },
+        "cross_model_transfer": {
+            "n_strategies": 9,
+            "best_method": "FCDB",
+            "best_margin": 0.124,
+            "results": {
+                "CCA": {"margin": -0.420, "correct": False},
+                "Residual_FCB": {"margin": -0.382, "correct": False},
+                "Procrustes": {"margin": -0.104, "correct": False},
+                "RR": {"margin": -0.066, "correct": False},
+                "FCB_ridge": {"margin": -0.017, "correct": False},
+                "Contrastive": {"margin": 0.001, "correct": True},
+                "JCB": {"margin": 0.011, "correct": True},
+                "JCB_delta": {"margin": 0.037, "correct": True},
+                "FCDB": {"margin": 0.124, "correct": True},
+            },
+            "key_insight": "Cross-model transfer requires representing documents as directions from a shared reference point (Frechet mean), not positions in space",
+        },
+        "fcdb_scaling": {
+            "v1_n50": {"stability": 0.82, "margin": 0.124},
+            "v2_n200": {"stability": 0.999, "margin": 0.013},
+            "collapse_n": 100,
+            "tradeoff": "Larger corpus stabilizes basis but dilutes per-document signal",
+        },
+        "cka_analysis": {
+            "within_family": {"models": "Llama 3B ↔ 8B", "mean_cka": 0.975, "f0f1_sim": 0.875},
+            "cross_family": {"models": "Llama ↔ Qwen", "mean_cka": 0.927, "f0f1_sim": 0.259},
+            "verdict": "Manifolds topologically isomorphic (CKA>0.92 all pairs)",
+        },
+        "domain_recall": {
+            "computer_science": 1.0, "general_world": 0.95, "history": 1.0,
+            "language_arts": 1.0, "ml_systems": 0.90, "mathematics": 1.0,
+            "philosophy": 1.0, "medicine": 0.95, "biology": 1.0, "physics": 1.0,
+        },
+        "eigengram_format": {
+            "version": "1.2",
+            "architectures": ["llama", "gemma", "gemma4/ISWA", "phi", "qwen", "mistral"],
+            "iswa_support": "Gemma 4 26B dual-cache (5+25 layers, 6144-dim fingerprint)",
+        },
+    }
+    paper_dir = RESULTS_DIR / "paper"
+    paper_dir.mkdir(parents=True, exist_ok=True)
+    findings_path = paper_dir / "findings.json"
+    findings_path.write_text(json.dumps(findings, indent=2))
+    print(f"  Saved: paper/findings.json")
+# ── LaTeX Tables ─────────────────────────────────────────────────────────
+def generate_latex_tables() -> None:
+    """Generate LaTeX table source for the paper."""
+    print("Generating LaTeX tables...")
+    tables = r"""\
+% ──────────────────────────────────────────────────────────────────────
+% Table 1: Multi-Frequency Ablation
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{Multi-frequency fingerprint ablation at $N=200$. The f0+f1 combination
+achieves the highest recall and mean margin, fixing 25 of 28 single-frequency failures.}
+\label{tab:frequency-ablation}
+\begin{tabular}{lcccc}
+\toprule
+Frequencies & Recall@1 & Mean Margin & Min Margin & Failures \\
+\midrule
+$f_1$ & 86.0\% & 4.09$\times 10^{-3}$ & $-4.71\times 10^{-3}$ & 28 \\
+$f_2$ & 71.5\% & 2.20$\times 10^{-3}$ & $-5.85\times 10^{-3}$ & 57 \\
+$f_1 + f_2$ & 95.0\% & 4.74$\times 10^{-3}$ & $-2.68\times 10^{-3}$ & 10 \\
+$f_1 + f_2 + f_3$ & 95.0\% & 4.13$\times 10^{-3}$ & $-2.71\times 10^{-3}$ & 10 \\
+\rowcolor{green!10}
+$f_0 + f_1$ & \textbf{98.0\%} & \textbf{7.20}$\times 10^{-3}$ & $-4.09\times 10^{-3}$ & \textbf{4} \\
+$f_1 + f_3$ & 89.0\% & 3.48$\times 10^{-3}$ & $-4.08\times 10^{-3}$ & 22 \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 2: Cross-Model Transfer Strategies
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{Cross-model transfer strategies (Llama 3B $\to$ 8B). Nine methods tested;
+FCDB achieves the only reliable positive margin without requiring an adapter.}
+\label{tab:cross-model}
+\begin{tabular}{lccc}
+\toprule
+Method & Margin & Correct & Adapter \\
+\midrule
+CCA & $-0.420$ & \xmark & symmetric \\
+Residual FCB & $-0.382$ & \xmark & none \\
+Procrustes & $-0.104$ & \xmark & orthogonal \\
+Relative Repr. & $-0.066$ & \xmark & none \\
+FCB + ridge & $-0.017$ & \xmark & ridge \\
+\midrule
+Contrastive $\delta$ & $+0.001$ & \cmark & ridge \\
+JCB & $+0.011$ & \cmark & none \\
+JCB + $\delta$ & $+0.037$ & \cmark & none \\
+\rowcolor{green!10}
+\textbf{FCDB} & $\mathbf{+0.124}$ & \cmark & \textbf{none} \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 3: TTFT Speedup
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{KV cache warm-start performance. TTFT speedup ranges from 27--67$\times$
+depending on model size and context length.}
+\label{tab:ttft}
+\begin{tabular}{lccccc}
+\toprule
+Model & Tokens & Cold TTFT & Warm TTFT & Speedup & EGR (ms) \\
+\midrule
+Llama 3.2 3B & 4,002 & 11,439\,ms & 170\,ms & 67.2$\times$ & 9.5 \\
+Llama 3.2 3B & 16,382 & 94,592\,ms & 1,777\,ms & 53.2$\times$ & 9.5 \\
+Llama 3.1 8B & 591 & 3,508\,ms & 116\,ms & 30.8$\times$ & 30.6 \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 4: INT8 Compression
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{INT8 quantization results. Per-row symmetric quantization achieves
+1.97$\times$ compression with negligible quality loss (cos\_sim = 0.99998).}
+\label{tab:int8}
+\begin{tabular}{lcccc}
+\toprule
+Tokens & FP16 Size & INT8 Size & Ratio & $\cos(s_\text{fp16}, s_\text{int8})$ \\
+\midrule
+591 & 73.9\,MB & 37.5\,MB & 1.97$\times$ & 0.99998 \\
+6,403 & 800.4\,MB & 406.5\,MB & 1.97$\times$ & 0.99998 \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 5: CKA Analysis
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{Centered Kernel Alignment (CKA) between model families. High CKA values
+($>0.92$) confirm topological isomorphism of key manifolds across architectures.}
+\label{tab:cka}
+\begin{tabular}{lccc}
+\toprule
+Comparison & Mean CKA & f0+f1 Sim & Verdict \\
+\midrule
+Within-family (Llama 3B $\leftrightarrow$ 8B) & 0.975 & 0.875 & Isomorphic \\
+Cross-family (Llama $\leftrightarrow$ Qwen) & 0.927 & 0.259 & Isomorphic \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 6: HNSW Benchmark
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{HNSW index performance at $N=200$. The index provides 5.65$\times$
+speedup over brute-force with no recall loss.}
+\label{tab:hnsw}
+\begin{tabular}{lcc}
+\toprule
+Method & Latency ($\mu$s) & Recall@1 \\
+\midrule
+Brute-force & 293.1 & 99.5\% \\
+HNSW ($M=32$) & 51.8 & 99.5\% \\
+\midrule
+\textbf{Speedup} & \textbf{5.65$\times$} & --- \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 7: Domain Recall
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{Per-domain recall@1 with f0+f1 fingerprint at $N=200$.
+All domains achieve $\geq 90\%$ recall.}
+\label{tab:domain-recall}
+\begin{tabular}{lc}
+\toprule
+Domain & Recall@1 \\
+\midrule
+Biology & 100.0\% \\
+Computer Science & 100.0\% \\
+History & 100.0\% \\
+Language Arts & 100.0\% \\
+Mathematics & 100.0\% \\
+Philosophy & 100.0\% \\
+Physics & 100.0\% \\
+General World & 95.0\% \\
+Medicine & 95.0\% \\
+ML/Systems & 90.0\% \\
+\bottomrule
+\end{tabular}
+\end{table}
+% ──────────────────────────────────────────────────────────────────────
+% Table 8: Margin Power Law
+% ──────────────────────────────────────────────────────────────────────
+\begin{table}[t]
+\centering
+\caption{Margin scaling law parameters. Both fingerprint methods follow
+power-law decay $\bar{m} = A \cdot N^\alpha$ with no hard collapse point.}
+\label{tab:power-law}
+\begin{tabular}{lccc}
+\toprule
+Fingerprint & $A$ & $\alpha$ & Recall@200 \\
+\midrule
+$f_1$ & 0.0181 & $-0.277$ & 86.0\% \\
+$f_0 + f_1$ & 0.0213 & $-0.207$ & 98.0\% \\
+\bottomrule
+\end{tabular}
+\end{table}
+"""
+    paper_dir = RESULTS_DIR / "paper"
+    paper_dir.mkdir(parents=True, exist_ok=True)
+    tables_path = paper_dir / "tables.tex"
+    tables_path.write_text(tables)
+    print(f"  Saved: paper/tables.tex")
+# ── Registry ─────────────────────────────────────────────────────────────
+FIGURE_REGISTRY: dict[str, tuple[str, object]] = {
+    "fig01": ("System Architecture (Mermaid)", fig01_architecture_mermaid),
+    "fig02": ("Frequency Combination Comparison", fig02_frequency_comparison),
+    "fig03": ("Margin Power Law", fig03_margin_power_law),
+    "fig04": ("Recall vs N (Fourier vs FCDB)", fig04_recall_vs_n),
+    "fig05": ("Cross-Model Strategy Comparison", fig05_cross_model_strategies),
+    "fig06": ("CKA Layer Similarity", fig06_cka_layers),
+    "fig07": ("Domain Confusion Matrix", fig07_confusion_matrix),
+    "fig08": ("Domain Recall Radar", fig08_domain_recall_radar),
+    "fig09": ("HNSW Benchmark", fig09_hnsw_benchmark),
+    "fig10": ("INT8 Compression", fig10_int8_compression),
+    "fig11": ("Retrieval Pipeline (Mermaid)", fig11_retrieval_pipeline_mermaid),
+    "fig12": ("Margin Distribution", fig12_margin_distribution),
+    "fig13": ("FCDB Tradeoff", fig13_fcdb_tradeoff),
+    "fig14": ("TTFT Speedup", fig14_ttft_speedup),
+    "fig15": ("EGR Overhead Scaling", fig15_egr_overhead),
+    "findings": ("Consolidated Findings JSON", generate_findings),
+    "tables": ("LaTeX Tables", generate_latex_tables),
+}
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate ENGRAM paper figures")
+    parser.add_argument("--only", help="Generate only this figure (e.g., fig02)")
+    parser.add_argument("--list", action="store_true", help="List all figures")
+    args = parser.parse_args()
+    if args.list:
+        print("\nAvailable figures:")
+        for key, (desc, _) in FIGURE_REGISTRY.items():
+            print(f"  {key:10s}  {desc}")
+        return
+    FIGURES_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"\nOutput directory: {FIGURES_DIR}\n")
+    if args.only:
+        if args.only not in FIGURE_REGISTRY:
+            print(f"Unknown figure: {args.only}")
+            print(f"Available: {', '.join(FIGURE_REGISTRY.keys())}")
+            sys.exit(1)
+        desc, func = FIGURE_REGISTRY[args.only]
+        func()
+    else:
+        for key, (desc, func) in FIGURE_REGISTRY.items():
+            try:
+                func()
+            except Exception as e:
+                print(f"  ERROR generating {key}: {e}")
+    print(f"\nDone. Figures saved to: {FIGURES_DIR}")
+if __name__ == "__main__":
+    main()

scripts/setup.sh ADDED Viewed

	@@ -0,0 +1,146 @@

+#!/usr/bin/env bash
+# ENGRAM Protocol — One-command setup
+#
+# Usage:
+#   ./scripts/setup.sh            # Full setup with sbert embedder
+#   ./scripts/setup.sh --minimal  # Core only (no sbert, no MCP)
+#   ./scripts/setup.sh --dev      # Full setup + dev tools
+#
+# Requirements:
+#   - Python >= 3.11
+#   - pip (comes with Python)
+#   - git (for cloning)
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+# Colors (if terminal supports them)
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+info()  { echo -e "${GREEN}[ENGRAM]${NC} $*"; }
+warn()  { echo -e "${YELLOW}[ENGRAM]${NC} $*"; }
+error() { echo -e "${RED}[ENGRAM]${NC} $*" >&2; }
+# Parse arguments
+MINIMAL=false
+DEV=false
+for arg in "$@"; do
+    case "$arg" in
+        --minimal) MINIMAL=true ;;
+        --dev)     DEV=true ;;
+        --help|-h)
+            echo "Usage: ./scripts/setup.sh [--minimal] [--dev]"
+            echo ""
+            echo "  --minimal  Core dependencies only (no sbert, no MCP)"
+            echo "  --dev      Include development tools (pytest, ruff, mypy)"
+            exit 0
+            ;;
+        *) error "Unknown argument: $arg"; exit 1 ;;
+    esac
+done
+cd "$PROJECT_DIR"
+# ── 1. Check Python version ──────────────────────────────────────────
+info "Checking Python version..."
+PYTHON=""
+for cmd in python3.14 python3.13 python3.12 python3.11 python3; do
+    if command -v "$cmd" &>/dev/null; then
+        version=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
+        major=$("$cmd" -c "import sys; print(sys.version_info.major)")
+        minor=$("$cmd" -c "import sys; print(sys.version_info.minor)")
+        if [ "$major" -ge 3 ] && [ "$minor" -ge 11 ]; then
+            PYTHON="$cmd"
+            break
+        fi
+    fi
+done
+if [ -z "$PYTHON" ]; then
+    error "Python >= 3.11 required but not found."
+    error "Install from https://python.org or via your package manager."
+    exit 1
+fi
+info "Using $PYTHON ($(${PYTHON} --version 2>&1))"
+# ── 2. Create virtual environment ────────────────────────────────────
+if [ ! -d ".venv" ]; then
+    info "Creating virtual environment..."
+    "$PYTHON" -m venv .venv
+else
+    info "Virtual environment already exists."
+fi
+# Activate
+source .venv/bin/activate
+info "Activated .venv"
+# ── 3. Upgrade pip ───────────────────────────────────────────────────
+info "Upgrading pip..."
+pip install --upgrade pip --quiet
+# ── 4. Install core package ──────────────────────────────────────────
+info "Installing ENGRAM core dependencies..."
+pip install -e . --quiet
+if [ "$MINIMAL" = false ]; then
+    # ── 5. Install sbert embedder ────────────────────────────────────
+    info "Installing sentence-transformers embedder..."
+    pip install -e ".[sbert]" --quiet
+    # ── 6. Install MCP server ────────────────────────────────────────
+    info "Installing MCP server dependencies..."
+    pip install -e ".[mcp]" --quiet 2>/dev/null || \
+        warn "MCP package not available (optional — needed for Claude Code integration)"
+fi
+if [ "$DEV" = true ]; then
+    # ── 7. Install dev tools ─────────────────────────────────────────
+    info "Installing development tools..."
+    pip install -e ".[dev]" --quiet
+fi
+# ── 8. Create config from template ───────────────────────────────────
+if [ ! -f ".env" ]; then
+    cp .env.template .env
+    info "Created .env from template. Edit it to set ENGRAM_MODEL_PATH."
+else
+    info ".env already exists."
+fi
+# ── 9. Create ENGRAM directories ─────────────────────────────────────
+mkdir -p ~/.engram/sessions
+mkdir -p ~/.engram/knowledge
+mkdir -p ~/.engram/index
+info "Created ~/.engram/ directories."
+# ── 10. Verify installation ──────────────────────────────────────────
+info "Verifying installation..."
+if python -c "import kvcos; print(f'  kvcos OK (v{kvcos.core.types.ENGRAM_VERSION})')"; then
+    info "Core library loaded successfully."
+else
+    error "Failed to import kvcos. Check error messages above."
+    exit 1
+fi
+# ── 11. Run tests (if dev mode) ─────────────────��────────────────────
+if [ "$DEV" = true ]; then
+    info "Running test suite..."
+    KMP_DUPLICATE_LIB_OK=TRUE OMP_NUM_THREADS=1 PYTHONPATH=. \
+        pytest tests/ -x -q --tb=short 2>&1 | tail -5
+fi
+# ── Done ─────────────────────────────────────────────────────────────
+echo ""
+info "Setup complete."
+echo ""
+echo "  Activate:  source .venv/bin/activate"
+echo "  Tests:     KMP_DUPLICATE_LIB_OK=TRUE PYTHONPATH=. pytest tests/ -x -q"
+echo "  Server:    engram-server"
+echo "  Config:    Edit .env to set ENGRAM_MODEL_PATH"
+echo ""