eigengram commited on
Commit
954cf8a
Β·
verified Β·
1 Parent(s): 36566c3

feat: upload scripts

Browse files
scripts/compute_corpus_basis.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Compute a Fixed Corpus Basis (FCB) for cross-document and
3
+ cross-model stable state vector extraction.
4
+
5
+ The FCB is the principal subspace of the key manifold computed
6
+ from a diverse reference corpus. Unlike per-document SVD,
7
+ the FCB is document-independent β€” all documents projected
8
+ with the same FCB exist in the same coordinate system.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import gc
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ import torch
19
+ from llama_cpp import Llama
20
+
21
+ from kvcos.core.blob_parser import parse_state_blob
22
+ from kvcos.core.state_extractor import MARStateExtractor
23
+ from scripts.generate_alignment_dataset import DOCUMENTS
24
+
25
+
26
+ def main() -> int:
27
+ parser = argparse.ArgumentParser(description="Compute Fixed Corpus Basis")
28
+ parser.add_argument("--model", required=True)
29
+ parser.add_argument("--layer-range", type=int, nargs=2, default=[8, 24])
30
+ parser.add_argument("--gate-start", type=int, default=6)
31
+ parser.add_argument("--rank", type=int, default=122)
32
+ parser.add_argument("--output", required=True)
33
+ args = parser.parse_args()
34
+
35
+ llm = Llama(model_path=args.model, n_ctx=2048, n_gpu_layers=-1, verbose=False)
36
+ meta = llm.metadata
37
+ n_kv = int(meta.get("llama.attention.head_count_kv", "8"))
38
+ head_dim = int(meta.get("llama.embedding_length", "4096")) // int(
39
+ meta.get("llama.attention.head_count", "32")
40
+ )
41
+ model_name = meta.get("general.name", "unknown")
42
+
43
+ print(f"Model: {model_name} ({n_kv} KV heads, {head_dim} head_dim)")
44
+ print(f"Layer range: {args.layer_range}, gate_start: {args.gate_start}")
45
+ print(f"Collecting key tensors from {len(DOCUMENTS)} documents...")
46
+
47
+ key_tensors: list[torch.Tensor] = []
48
+ for i, doc in enumerate(DOCUMENTS):
49
+ llm.reset()
50
+ llm(doc.strip(), max_tokens=1, temperature=0.0)
51
+ s = llm.save_state()
52
+ parsed = parse_state_blob(
53
+ bytes(s.llama_state), n_kv_heads=n_kv, head_dim=head_dim
54
+ )
55
+ key_tensors.append(parsed.keys)
56
+ if (i + 1) % 10 == 0:
57
+ print(f" {i + 1}/{len(DOCUMENTS)}")
58
+ del llm
59
+ gc.collect()
60
+
61
+ print("Computing corpus SVD...")
62
+ basis = MARStateExtractor.compute_corpus_basis(
63
+ key_tensors=key_tensors,
64
+ layer_range=tuple(args.layer_range),
65
+ gate_start=args.gate_start,
66
+ rank=args.rank,
67
+ )
68
+
69
+ output_path = Path(args.output)
70
+ output_path.parent.mkdir(parents=True, exist_ok=True)
71
+ torch.save(
72
+ {
73
+ "basis": basis,
74
+ "model_name": model_name,
75
+ "layer_range": args.layer_range,
76
+ "gate_start": args.gate_start,
77
+ "rank": args.rank,
78
+ "n_corpus_docs": len(DOCUMENTS),
79
+ "key_tensors": key_tensors,
80
+ },
81
+ str(output_path),
82
+ )
83
+
84
+ print(f"Basis shape: {basis.shape}")
85
+ print(f"Saved: {output_path}")
86
+ return 0
87
+
88
+
89
+ if __name__ == "__main__":
90
+ sys.exit(main())
scripts/demo_agent_session.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ENGRAM Protocol β€” Demo Agent Session
3
+
4
+
5
+ End-to-end demonstration:
6
+ 1. Load model via llama-cpp-python (D1)
7
+ 2. Generate with a prompt β†’ measure cold TTFT
8
+ 3. Extract KV cache β†’ compress β†’ serialize to .eng
9
+ 4. Index in EGR manifold index
10
+ 5. Reset model β†’ restore from .eng β†’ measure cached TTFT
11
+ 6. Print speedup ratio
12
+
13
+ D6: Target >10x TTFT reduction at 16K context on Llama 3.1 8B.
14
+ Cold baseline: ~1,500-5,000ms. Cached target: <500ms.
15
+ Anything below 4x at 16K is a failure.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import sys
22
+ import time
23
+ from pathlib import Path
24
+
25
+
26
+ def _run_dry_run(args: argparse.Namespace) -> int:
27
+ """Run full pipeline with synthetic tensors β€” no model file needed."""
28
+ import os
29
+ import tempfile
30
+
31
+ import torch
32
+
33
+ from kvcos.core.cache_spec import LLAMA_3_1_8B
34
+ from kvcos.core.serializer import EngramSerializer
35
+ from kvcos.core.types import CompressionMethod, StateExtractionMode
36
+ from kvcos.core.manifold_index import IndexEntry, ManifoldIndex
37
+ from kvcos.core.state_extractor import MARStateExtractor
38
+ from kvcos.storage.local import LocalStorageBackend
39
+
40
+ spec = LLAMA_3_1_8B
41
+ ctx_len = args.context
42
+ model_name = spec["model_id"]
43
+
44
+ # ── Synthetic KV tensors ──────────────────────────────────
45
+ torch.manual_seed(42)
46
+ shape = (spec["n_layers"], spec["n_kv_heads"], ctx_len, spec["head_dim"])
47
+ keys = torch.randn(shape, dtype=torch.float16)
48
+ values = torch.randn(shape, dtype=torch.float16)
49
+
50
+ tensor_mb = keys.numel() * keys.element_size() / 1024 / 1024
51
+
52
+ with tempfile.TemporaryDirectory() as tmp:
53
+ tmp_dir = Path(tmp)
54
+
55
+ # ── Serialize to .eng ────────────────────────────────
56
+ serializer = EngramSerializer()
57
+ eng_path = tmp_dir / "dry_run.eng"
58
+
59
+ t0 = time.perf_counter()
60
+ result = serializer.serialize(
61
+ keys=keys, values=values,
62
+ agent_id="dry-run-agent",
63
+ task_description="dry run benchmark",
64
+ model_id=model_name,
65
+ output_path=eng_path,
66
+ compression=CompressionMethod.Q8_0,
67
+ )
68
+ serialize_ms = (time.perf_counter() - t0) * 1000
69
+
70
+ # ── Load back ────────────────────────────────────────
71
+ t0 = time.perf_counter()
72
+ k_out, v_out, meta = serializer.deserialize(eng_path)
73
+ deserialize_ms = (time.perf_counter() - t0) * 1000
74
+
75
+ assert k_out.shape == keys.shape, f"Shape mismatch: {k_out.shape} vs {keys.shape}"
76
+
77
+ # ── EGR granular timing ──────────────────────────────
78
+ extractor = MARStateExtractor(
79
+ mode=StateExtractionMode.SVD_PROJECT,
80
+ rank=min(160, spec["head_dim"]),
81
+ )
82
+ dim = extractor.output_dim(spec)
83
+ index = ManifoldIndex(dim=dim)
84
+ storage = LocalStorageBackend(data_dir=tmp_dir)
85
+
86
+ # Index: extract + serialize + store + add
87
+ t0 = time.perf_counter()
88
+ extraction = extractor.extract(keys, spec)
89
+ t_extract = time.perf_counter()
90
+
91
+ eng2 = tmp_dir / "indexed.eng"
92
+ serializer.serialize(
93
+ keys=keys, values=values,
94
+ agent_id="dry-run-agent",
95
+ task_description="dry run benchmark",
96
+ model_id=model_name,
97
+ output_path=eng2,
98
+ compression=CompressionMethod.Q8_0,
99
+ cache_id="dry-run-001",
100
+ )
101
+ t_serialize = time.perf_counter()
102
+
103
+ idx_meta = serializer.read_metadata_only(eng2)
104
+ storage.store_file("dry-run-001", eng2, idx_meta)
105
+ t_store = time.perf_counter()
106
+
107
+ from datetime import datetime, timezone
108
+ entry = IndexEntry(
109
+ cache_id="dry-run-001",
110
+ task_description="dry run benchmark",
111
+ model_id=model_name,
112
+ created_at=datetime.now(timezone.utc).isoformat(),
113
+ context_len=ctx_len,
114
+ l2_norm=extraction.l2_norm,
115
+ )
116
+ index.add(extraction.state_vec, entry)
117
+ t_add = time.perf_counter()
118
+
119
+ extract_ms = (t_extract - t0) * 1000
120
+ ser_ms = (t_serialize - t_extract) * 1000
121
+ store_ms = (t_store - t_serialize) * 1000
122
+ add_ms = (t_add - t_store) * 1000
123
+ index_ms = (t_add - t0) * 1000
124
+
125
+ # Retrieve: extract query + search + load
126
+ torch.manual_seed(99)
127
+ query_keys = torch.randn(shape, dtype=torch.float16)
128
+
129
+ t0 = time.perf_counter()
130
+ q_ext = extractor.extract(query_keys, spec)
131
+ t_qext = time.perf_counter()
132
+
133
+ results = index.search(q_ext.state_vec, top_k=1)
134
+ t_search = time.perf_counter()
135
+
136
+ # Load matched engram
137
+ stored_path = storage.get_path("dry-run-001")
138
+ k_loaded, v_loaded, _ = serializer.deserialize(stored_path)
139
+ t_load = time.perf_counter()
140
+
141
+ q_extract_ms = (t_qext - t0) * 1000
142
+ search_ms = (t_search - t_qext) * 1000
143
+ load_ms = (t_load - t_search) * 1000
144
+ retrieve_ms = (t_load - t0) * 1000
145
+
146
+ # ── Simulate TTFT estimates ──────────────────────────
147
+ cold_ms = ctx_len * 0.1 # simulated
148
+ cached_ms = deserialize_ms
149
+ egr_overhead = extract_ms + search_ms # overhead added to warm path
150
+ speedup = cold_ms / cached_ms if cached_ms > 0 else float("inf")
151
+ eng_size_mb = os.path.getsize(eng_path) / 1024 / 1024
152
+
153
+ # ── Output ───────────────────────────────────────────
154
+ sep = "=" * 35
155
+ print(sep)
156
+ print("ENGRAM Protocol \u2014 EGR Demo")
157
+ print(f"Model: {model_name}")
158
+ print(f"Context: {ctx_len} tokens")
159
+ print(sep)
160
+ print(f"Cold TTFT: {cold_ms:.1f}ms (simulated)")
161
+ print(f"Cached TTFT: {cached_ms:.1f}ms (deserialize)")
162
+ print(f"Speedup: {speedup:.1f}x")
163
+ print(f"D6 target: >10x at 16K tokens")
164
+ status = "PASS" if speedup > 10 else "FAIL"
165
+ print(f"Status: {status}")
166
+ print(f"EGR overhead: {egr_overhead:.1f}ms (extract+search)")
167
+ print(f".eng file: {eng_path.name} ({eng_size_mb:.1f}MB)")
168
+ print(f"Tensor shape: {list(shape)} ({tensor_mb:.0f}MB per K/V)")
169
+ print(sep)
170
+ print()
171
+ print("Index breakdown:")
172
+ print(f" SVD extract: {extract_ms:8.1f}ms")
173
+ print(f" Serialize .eng: {ser_ms:8.1f}ms")
174
+ print(f" Store backend: {store_ms:8.1f}ms")
175
+ print(f" FAISS add(): {add_ms:8.1f}ms")
176
+ print(f" TOTAL: {index_ms:8.1f}ms")
177
+ print()
178
+ print("Retrieve breakdown:")
179
+ print(f" SVD extract: {q_extract_ms:8.1f}ms")
180
+ print(f" FAISS search(): {search_ms:8.1f}ms")
181
+ print(f" Load+deser: {load_ms:8.1f}ms")
182
+ print(f" TOTAL: {retrieve_ms:8.1f}ms")
183
+ print()
184
+ print("Verification:")
185
+ print(f" Round-trip shape: {'OK' if k_out.shape == keys.shape else 'FAIL'}")
186
+ print(f" Retrieval result: {'OK' if len(results) >= 1 else 'FAIL'}")
187
+ print(f" .eng valid: {'OK' if eng_path.exists() else 'FAIL'}")
188
+
189
+ return 0 if speedup > 10 else 1
190
+
191
+
192
+ def main():
193
+ parser = argparse.ArgumentParser(
194
+ description="ENGRAM Protocol β€” Demo Agent Session",
195
+ epilog="D6: >10x TTFT reduction at 16K context on Llama 3.1 8B",
196
+ )
197
+ parser.add_argument(
198
+ "--model", "-m", default=None,
199
+ help="Path to GGUF model file (required unless --dry-run)",
200
+ )
201
+ parser.add_argument(
202
+ "--context", "-c", type=int, default=4096,
203
+ help="Context length to fill (tokens). Default: 4096",
204
+ )
205
+ parser.add_argument(
206
+ "--n-ctx", type=int, default=16384,
207
+ help="Max context window for model. Default: 16384",
208
+ )
209
+ parser.add_argument(
210
+ "--data-dir", type=str, default=None,
211
+ help="ENGRAM data directory. Default: ~/.engram/data",
212
+ )
213
+ parser.add_argument(
214
+ "--dry-run", action="store_true",
215
+ help="Run full pipeline with synthetic tensors (no model needed)",
216
+ )
217
+ parser.add_argument(
218
+ "--verbose", "-v", action="store_true",
219
+ help="Enable verbose output",
220
+ )
221
+ args = parser.parse_args()
222
+
223
+ if args.dry_run:
224
+ return _run_dry_run(args)
225
+
226
+ if not args.model:
227
+ parser.error("--model is required unless --dry-run is specified")
228
+
229
+ print("=" * 70)
230
+ print("ENGRAM Protocol β€” Demo Agent Session")
231
+ print("KV cache fingerprinting for persistent semantic retrieval")
232
+ print("=" * 70)
233
+ print()
234
+
235
+ # ── Setup ─────────────────────────────────────────────────
236
+ from kvcos.core.config import get_config
237
+ from kvcos.core.serializer import EngramSerializer
238
+ from kvcos.core.types import CompressionMethod, StateExtractionMode
239
+ from kvcos.core.manifold_index import ManifoldIndex
240
+ from kvcos.core.retriever import EGRRetriever
241
+ from kvcos.core.state_extractor import MARStateExtractor
242
+ from kvcos.storage.local import LocalStorageBackend
243
+ from integrations.llama_cpp_bridge import LlamaCppBridge
244
+
245
+ config = get_config()
246
+ data_dir = Path(args.data_dir) if args.data_dir else config.data_dir
247
+
248
+ # ── Step 1: Load Model ────────────────────────────────────
249
+ print(f"[1/6] Loading model: {args.model}")
250
+ bridge = LlamaCppBridge(
251
+ model_path=args.model,
252
+ n_ctx=args.n_ctx,
253
+ n_gpu_layers=0, # D1
254
+ verbose=args.verbose,
255
+ )
256
+ spec = bridge.load_model()
257
+ print(f" Model: {spec['model_id']}")
258
+ print(f" Architecture: {spec['n_layers']}L / {spec['n_heads']}H / {spec['n_kv_heads']}KV / {spec['head_dim']}D")
259
+ print(f" Context window: {args.n_ctx}")
260
+ print()
261
+
262
+ # ── Step 2: Generate + Cold TTFT ──────────────────────────
263
+ filler = "The quick brown fox jumps over the lazy dog. " * 100
264
+ target_tokens = args.context
265
+ prompt = filler[:target_tokens * 4]
266
+
267
+ print(f"[2/6] Cold prefill ({target_tokens} target tokens)...")
268
+ t0 = time.perf_counter()
269
+ cold = bridge.measure_cold_ttft(prompt)
270
+ print(f" Cold TTFT: {cold.ttft_ms:.1f}ms ({cold.context_len} tokens)")
271
+ print()
272
+
273
+ # ── Step 3: Extract + Serialize ───────────────────────────
274
+ print("[3/6] Extracting KV cache...")
275
+ try:
276
+ parsed = bridge.extract_kv_cache()
277
+ print(f" Keys shape: {list(parsed.keys.shape)}")
278
+ print(f" Values shape: {list(parsed.values.shape)}")
279
+ print(f" Cells: {parsed.n_cells}")
280
+ except Exception as e:
281
+ print(f" KV extraction failed: {e}")
282
+ print(" This is expected if the blob format doesn't match.")
283
+ print(" Falling back to save_state/load_state raw blob path.")
284
+ parsed = None
285
+ print()
286
+
287
+ print("[3b/6] Saving raw state blob...")
288
+ raw_state = bridge.llm.save_state()
289
+ raw_blob = bytes(raw_state.llama_state)
290
+ print(f" Raw state size: {len(raw_blob) / 1024 / 1024:.1f} MB")
291
+
292
+ if parsed is not None:
293
+ print("[3c/6] Serializing to .eng format...")
294
+ serializer = EngramSerializer()
295
+ eng_path = data_dir / "demo" / "session_001.eng"
296
+ result = serializer.serialize(
297
+ keys=parsed.keys,
298
+ values=parsed.values,
299
+ agent_id="demo-agent",
300
+ task_description="demo session - cold prefill benchmark",
301
+ model_id=spec["model_id"],
302
+ output_path=eng_path,
303
+ compression=CompressionMethod.Q8_0,
304
+ )
305
+ print(f" .eng file: {result['path']}")
306
+ print(f" Size: {result['size_bytes'] / 1024 / 1024:.1f} MB")
307
+ print(f" Compression ratio: {result['compression_ratio']:.2f}x")
308
+ print()
309
+
310
+ # ── Step 4: Index in EGR ──────────────────────────────────
311
+ if parsed is not None:
312
+ print("[4/6] Indexing in EGR manifold index...")
313
+ storage = LocalStorageBackend(data_dir=data_dir)
314
+ extractor = MARStateExtractor(
315
+ mode=StateExtractionMode.SVD_PROJECT,
316
+ rank=min(160, spec["head_dim"]),
317
+ )
318
+ dim = extractor.output_dim(spec)
319
+ index = ManifoldIndex(dim=dim)
320
+ retriever = EGRRetriever(extractor, index, storage)
321
+
322
+ cache_id = retriever.index_engram(
323
+ keys=parsed.keys,
324
+ values=parsed.values,
325
+ spec=spec,
326
+ agent_id="demo-agent",
327
+ task_description="demo session - cold prefill benchmark",
328
+ model_id=spec["model_id"],
329
+ )
330
+ print(f" Indexed: {cache_id}")
331
+ print(f" State vector dim: {dim}")
332
+ print(f" Index entries: {index.n_entries}")
333
+ else:
334
+ print("[4/6] Skipped (KV extraction failed)")
335
+ print()
336
+
337
+ # ── Step 5: Restore + Cached TTFT ─────────────────────────
338
+ print("[5/6] Restoring from cached state...")
339
+ t0 = time.perf_counter()
340
+ cached = bridge.measure_cached_ttft(raw_blob)
341
+ print(f" Cached TTFT: {cached.ttft_ms:.1f}ms")
342
+ print()
343
+
344
+ # ── Step 6: Results ───────────────────────────────────────
345
+ cold_ms = cold.ttft_ms
346
+ cached_ms = cached.ttft_ms
347
+ speedup = cold_ms / cached_ms if cached_ms > 0 else float("inf")
348
+
349
+ eng_path_str = result["path"] if parsed else "N/A"
350
+ eng_size_kb = result["size_bytes"] / 1024 if parsed else 0
351
+
352
+ sep = "=" * 35
353
+ print(sep)
354
+ print("ENGRAM Protocol β€” EGR Demo")
355
+ print(f"Model: {spec['model_id']}")
356
+ print(f"Context: {cold.context_len} tokens")
357
+ print(sep)
358
+ print(f"Cold TTFT: {cold_ms:.1f}ms")
359
+ print(f"Cached TTFT: {cached_ms:.1f}ms")
360
+ print(f"Speedup: {speedup:.1f}x")
361
+ print(f"D6 target: >10x at 16K tokens")
362
+ status = "PASS" if speedup > 10 else "FAIL"
363
+ print(f"Status: {status}")
364
+ print(f".eng file: {eng_path_str} ({eng_size_kb:.1f}KB)")
365
+ print(sep)
366
+
367
+ return 0 if speedup >= 4 else 1
368
+
369
+
370
+ if __name__ == "__main__":
371
+ sys.exit(main())
scripts/diagnose_gemma4.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Diagnostic script for Gemma 4 26B-A4B GGUF compatibility with ENGRAM.
3
+
4
+ Tests:
5
+ 1. Model loading + metadata extraction
6
+ 2. Basic generation (does it produce coherent output?)
7
+ 3. State blob extraction + structure analysis
8
+ 4. ENGRAM blob parser compatibility
9
+ 5. Full fingerprint pipeline (if blob parsing works)
10
+
11
+ Usage:
12
+ PYTHONPATH=. .venv/bin/python scripts/diagnose_gemma4.py /path/to/gemma4.gguf
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import struct
18
+ import sys
19
+ import time
20
+ from pathlib import Path
21
+
22
+
23
+ def read_u32(data: bytes, offset: int) -> tuple[int, int]:
24
+ return struct.unpack_from("<I", data, offset)[0], offset + 4
25
+
26
+
27
+ def read_i32(data: bytes, offset: int) -> tuple[int, int]:
28
+ return struct.unpack_from("<i", data, offset)[0], offset + 4
29
+
30
+
31
+ def read_u64(data: bytes, offset: int) -> tuple[int, int]:
32
+ return struct.unpack_from("<Q", data, offset)[0], offset + 8
33
+
34
+
35
+ def inspect_blob_header(blob: bytes) -> dict:
36
+ """Parse just the header/structure of a state blob without assuming F16."""
37
+ info = {}
38
+ offset = 0
39
+
40
+ # Architecture string
41
+ str_len, offset = read_u32(blob, offset)
42
+ info["arch"] = blob[offset:offset + str_len].decode("ascii", errors="replace")
43
+ offset += str_len
44
+
45
+ # KV stream
46
+ n_stream, offset = read_u32(blob, offset)
47
+ info["n_stream"] = n_stream
48
+ if n_stream != 1:
49
+ info["error"] = f"Expected 1 stream, got {n_stream}"
50
+ return info
51
+
52
+ cell_count, offset = read_u32(blob, offset)
53
+ info["cell_count"] = cell_count
54
+
55
+ # Skip cell metadata
56
+ for _ in range(cell_count):
57
+ _pos, offset = read_i32(blob, offset)
58
+ n_seq, offset = read_u32(blob, offset)
59
+ for _ in range(n_seq):
60
+ _sid, offset = read_i32(blob, offset)
61
+
62
+ # Data header
63
+ v_trans, offset = read_u32(blob, offset)
64
+ info["v_trans"] = bool(v_trans)
65
+
66
+ n_layers, offset = read_u32(blob, offset)
67
+ info["n_layers"] = n_layers
68
+
69
+ # Inspect first few K layers
70
+ info["k_layer_types"] = []
71
+ info["k_layer_row_sizes"] = []
72
+ for i in range(min(n_layers, 5)):
73
+ type_k, offset = read_i32(blob, offset)
74
+ row_size_k, offset = read_u64(blob, offset)
75
+ info["k_layer_types"].append(type_k)
76
+ info["k_layer_row_sizes"].append(row_size_k)
77
+ # Skip actual data
78
+ data_size = row_size_k * cell_count
79
+ offset += data_size
80
+
81
+ info["data_offset_after_k_sample"] = offset
82
+ info["blob_total_size"] = len(blob)
83
+
84
+ # GGML type names
85
+ type_names = {0: "F32", 1: "F16", 2: "Q4_0", 8: "Q8_0"}
86
+ info["k_type_names"] = [type_names.get(t, f"unknown({t})") for t in info["k_layer_types"]]
87
+
88
+ return info
89
+
90
+
91
+ def main():
92
+ if len(sys.argv) < 2:
93
+ print("Usage: python scripts/diagnose_gemma4.py <path-to-gguf>")
94
+ sys.exit(1)
95
+
96
+ model_path = sys.argv[1]
97
+ if not Path(model_path).exists():
98
+ print(f"Model not found: {model_path}")
99
+ sys.exit(1)
100
+
101
+ print(f"{'='*60}")
102
+ print(f"ENGRAM Γ— Gemma 4 Diagnostic")
103
+ print(f"Model: {model_path}")
104
+ print(f"{'='*60}\n")
105
+
106
+ # ── Step 1: Load model ──────────────────────────────────────
107
+ print("STEP 1: Loading model...")
108
+ try:
109
+ from llama_cpp import Llama
110
+
111
+ t0 = time.perf_counter()
112
+ llm = Llama(
113
+ model_path=model_path,
114
+ n_ctx=512, # minimal context for diagnostics
115
+ n_gpu_layers=0, # CPU for safety
116
+ verbose=False,
117
+ )
118
+ load_s = time.perf_counter() - t0
119
+ print(f" Loaded in {load_s:.1f}s")
120
+ except Exception as e:
121
+ print(f" FAILED: {type(e).__name__}: {e}")
122
+ sys.exit(1)
123
+
124
+ # ── Step 2: Read metadata ───────────────────────────────────
125
+ print("\nSTEP 2: Model metadata")
126
+ metadata = llm.metadata
127
+ interesting_keys = [
128
+ "general.name", "general.architecture",
129
+ "llama.block_count", "general.block_count",
130
+ "llama.attention.head_count", "llama.attention.head_count_kv",
131
+ "llama.embedding_length", "llama.context_length",
132
+ "llama.expert_count", "llama.expert_used_count",
133
+ "gemma.block_count", "gemma.attention.head_count",
134
+ "gemma.attention.head_count_kv", "gemma.embedding_length",
135
+ ]
136
+ for key in interesting_keys:
137
+ val = metadata.get(key)
138
+ if val is not None:
139
+ print(f" {key}: {val}")
140
+
141
+ # Also dump any keys containing "expert" or "moe"
142
+ for key, val in sorted(metadata.items()):
143
+ if "expert" in key.lower() or "moe" in key.lower():
144
+ print(f" {key}: {val}")
145
+
146
+ # Derive spec parameters
147
+ n_layers = int(metadata.get("llama.block_count", metadata.get("gemma.block_count", metadata.get("general.block_count", "0"))))
148
+ n_heads = int(metadata.get("llama.attention.head_count", metadata.get("gemma.attention.head_count", "0")))
149
+ n_kv_heads = int(metadata.get("llama.attention.head_count_kv", metadata.get("gemma.attention.head_count_kv", str(n_heads))))
150
+ embed_dim = int(metadata.get("llama.embedding_length", metadata.get("gemma.embedding_length", "0")))
151
+ head_dim = embed_dim // n_heads if n_heads > 0 else 0
152
+
153
+ print(f"\n Derived spec:")
154
+ print(f" n_layers={n_layers}, n_heads={n_heads}, n_kv_heads={n_kv_heads}")
155
+ print(f" embed_dim={embed_dim}, head_dim={head_dim}")
156
+ print(f" n_embd_kv = {n_kv_heads * head_dim}")
157
+
158
+ # ── Step 3: Generate ────────────────────────────────────────
159
+ print("\nSTEP 3: Basic generation")
160
+ try:
161
+ t0 = time.perf_counter()
162
+ output = llm("Hello, my name is", max_tokens=20, temperature=0.0)
163
+ gen_ms = (time.perf_counter() - t0) * 1000
164
+ text = output["choices"][0]["text"]
165
+ print(f" Generated in {gen_ms:.0f}ms")
166
+ print(f" Output: {text[:200]}")
167
+ except Exception as e:
168
+ print(f" FAILED: {type(e).__name__}: {e}")
169
+ print(" Continuing anyway (bartowski warned about conversion issues)...")
170
+
171
+ # ── Step 4: State blob extraction ───────────────────────────
172
+ print("\nSTEP 4: State blob extraction")
173
+ try:
174
+ state_data = llm.save_state()
175
+ blob = bytes(state_data.llama_state)
176
+ print(f" Blob size: {len(blob):,} bytes ({len(blob)/1024/1024:.1f} MB)")
177
+
178
+ # Inspect structure without assuming F16
179
+ info = inspect_blob_header(blob)
180
+ print(f" Architecture: {info.get('arch', '?')}")
181
+ print(f" Cell count: {info.get('cell_count', '?')}")
182
+ print(f" V transposed: {info.get('v_trans', '?')}")
183
+ print(f" N layers: {info.get('n_layers', '?')}")
184
+ print(f" K dtype (first 5 layers): {info.get('k_type_names', [])}")
185
+ print(f" K row sizes (first 5): {info.get('k_layer_row_sizes', [])}")
186
+
187
+ if info.get("k_layer_row_sizes"):
188
+ row = info["k_layer_row_sizes"][0]
189
+ cells = info["cell_count"]
190
+ elements_per_row = row // 2 # assuming F16
191
+ expected_embd_kv = n_kv_heads * head_dim
192
+ print(f"\n Row analysis:")
193
+ print(f" row_size={row}, cells={cells}")
194
+ print(f" elements_per_cell (if F16) = {row // 2}")
195
+ print(f" expected n_embd_kv = {expected_embd_kv}")
196
+ if elements_per_row == expected_embd_kv:
197
+ print(f" MATCH: row elements == n_kv_heads * head_dim")
198
+ else:
199
+ print(f" MISMATCH: {elements_per_row} != {expected_embd_kv}")
200
+ # Check if it matches with different assumptions
201
+ for dtype_name, dtype_size in [("F32", 4.0), ("F16", 2.0), ("Q8_0", 34/32), ("Q4_0", 18/32)]:
202
+ if row / dtype_size == expected_embd_kv:
203
+ print(f" β†’ Would match with dtype {dtype_name}")
204
+ except Exception as e:
205
+ print(f" FAILED: {type(e).__name__}: {e}")
206
+ import traceback
207
+ traceback.print_exc()
208
+ sys.exit(1)
209
+
210
+ # ── Step 5: ENGRAM blob parser ──────────────────────────────
211
+ print("\nSTEP 5: ENGRAM blob parser")
212
+ if n_kv_heads == 0 or head_dim == 0:
213
+ print(" SKIPPED: could not derive n_kv_heads/head_dim from metadata")
214
+ else:
215
+ try:
216
+ from kvcos.core.blob_parser import parse_state_blob
217
+ parsed = parse_state_blob(blob, n_kv_heads=n_kv_heads, head_dim=head_dim)
218
+ print(f" SUCCESS!")
219
+ print(f" Keys shape: {parsed.keys.shape}")
220
+ print(f" Values shape: {parsed.values.shape}")
221
+ print(f" N cells: {parsed.n_cells}")
222
+ print(f" N layers: {parsed.n_layers}")
223
+ print(f" Arch: {parsed.arch}")
224
+ except Exception as e:
225
+ print(f" FAILED: {type(e).__name__}: {e}")
226
+ print(" This is where we need to fix compatibility.")
227
+
228
+ # ── Step 6: Fourier fingerprint ─────────────────────────────
229
+ print("\nSTEP 6: Fourier fingerprint (if blob parsed)")
230
+ try:
231
+ parsed # check it exists
232
+ from kvcos.core.fingerprint import compute_fourier_fingerprint_v2
233
+ layer_keys = parsed.keys.float().mean(dim=2) # [layers, heads, dim]
234
+ fp = compute_fourier_fingerprint_v2(layer_keys, freqs=[0, 1])
235
+ print(f" Fingerprint shape: {fp.shape}")
236
+ print(f" Norm: {fp.norm():.4f}")
237
+ print(f" First 5 values: {fp[:5].tolist()}")
238
+ except NameError:
239
+ print(" SKIPPED: blob parsing failed")
240
+ except Exception as e:
241
+ print(f" FAILED: {type(e).__name__}: {e}")
242
+
243
+ print(f"\n{'='*60}")
244
+ print("Diagnostic complete.")
245
+
246
+
247
+ if __name__ == "__main__":
248
+ main()
scripts/egr_semantic_proof.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ENGRAM Protocol β€” EGR Semantic Proof Script
3
+ Definitive K→K retrieval validation with diverse, non-repeated documents.
4
+
5
+ Usage:
6
+ KMP_DUPLICATE_LIB_OK=TRUE OMP_NUM_THREADS=1 PYTHONPATH=. \
7
+ .venv/bin/python scripts/egr_semantic_proof.py \
8
+ --model /path/to/model.gguf \
9
+ --ctx 16384 --n-trials 3 --layer-range 8 24 \
10
+ --output results/egr_semantic_proof_8B_14K.json --verbose
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import gc
17
+ import json
18
+ import math
19
+ import os
20
+ import sys
21
+ import tempfile
22
+ import time
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ import torch
27
+
28
+ # ── Documents ─────────────────────────────────────────────────────────────────
29
+
30
+ DOC_A = """
31
+ The transformer architecture introduced in "Attention Is All You Need"
32
+ replaced recurrent networks with self-attention as the core computational
33
+ primitive. Self-attention computes a weighted sum of value vectors, where
34
+ weights derive from the compatibility between query and key vectors.
35
+ For a sequence of length n, the attention matrix has shape nΓ—n,
36
+ making vanilla attention quadratic in both time and memory.
37
+
38
+ Multi-head attention partitions the embedding dimension into h parallel
39
+ subspaces. Each head independently computes attention using its own
40
+ learned projections W_Q, W_K, W_V of dimension d_model/h. The outputs
41
+ are concatenated and projected back to d_model via W_O. This allows
42
+ different heads to specialize in different relational patterns:
43
+ some heads track syntactic dependencies, others semantic similarity,
44
+ others coreference chains across longer distances.
45
+
46
+ Grouped-query attention generalizes multi-head and multi-query attention.
47
+ Rather than one KV pair per query head (MHA) or one KV pair for all
48
+ heads (MQA), GQA assigns one KV pair per group of g query heads.
49
+ Llama 3 uses GQA with 8 KV heads for 32 query heads, reducing
50
+ KV cache memory by 4Γ— with minimal quality degradation.
51
+
52
+ Rotary position embeddings encode absolute position by rotating
53
+ query and key vectors in 2D subspaces of the head dimension.
54
+ Unlike learned absolute embeddings or sinusoidal encodings,
55
+ RoPE naturally extrapolates to sequences longer than those seen
56
+ during training by preserving the inner product between positions
57
+ i and j as a function only of their relative offset i-j.
58
+
59
+ The KV cache enables efficient autoregressive generation by storing
60
+ computed key and value matrices from all previous positions.
61
+ Without caching, generating a sequence of length L requires O(LΒ²)
62
+ attention operations. With caching, each new token requires only
63
+ O(L) operations β€” one attention pass over the cached KV pairs.
64
+
65
+ Flash attention avoids materializing the full nΓ—n attention matrix
66
+ by tiling the computation into blocks that fit in SRAM. The forward
67
+ pass fuses the softmax and matrix multiply into a single kernel,
68
+ achieving O(n) memory complexity while maintaining exact numerical
69
+ equivalence to standard attention.
70
+
71
+ Mixture-of-experts transformer variants route each token to a sparse
72
+ subset of feed-forward experts using a learned routing function.
73
+ Mistral's Mixtral 8Γ—7B activates 2 of 8 experts per token,
74
+ achieving 7B-parameter inference cost with 47B total parameters.
75
+ Expert specialization emerges: some experts process syntactic
76
+ patterns, others domain-specific content, without explicit supervision.
77
+
78
+ Layer normalization applied before the attention sublayer (Pre-LN)
79
+ stabilizes training compared to Post-LN by ensuring gradients flow
80
+ through the residual stream without vanishing through normalized paths.
81
+ Modern architectures including Llama, Mistral, and GPT-NeoX all
82
+ adopt Pre-LN with RMSNorm, dropping the learned bias parameters.
83
+ """
84
+
85
+ DOC_B = """
86
+ DNA replication in eukaryotic cells initiates at multiple origins
87
+ of replication simultaneously, enabling the duplication of genomes
88
+ containing billions of base pairs within hours. The origin recognition
89
+ complex marks these sites, recruiting CDC6 and CDT1 to load the
90
+ MCM helicase onto double-stranded DNA during G1 phase.
91
+
92
+ The MCM complex unwinds the double helix at replication forks,
93
+ separating the complementary strands to serve as templates.
94
+ DNA polymerase delta and epsilon synthesize the lagging and leading
95
+ strands respectively, both requiring a short RNA primer synthesized
96
+ by primase to provide a free 3'-OH group for extension.
97
+
98
+ Topoisomerase II resolves the positive supercoils that accumulate
99
+ ahead of the replication fork as the helix is unwound. Without
100
+ topoisomerase activity, the torsional stress would stall replication.
101
+ Type II topoisomerases cleave both strands simultaneously, pass
102
+ a second duplex through the break, and religate β€” changing
103
+ the linking number by two per catalytic cycle.
104
+
105
+ Protein synthesis begins with mRNA recognition by the 43S
106
+ pre-initiation complex, comprising the 40S ribosomal subunit,
107
+ eIF2-GTP-Met-tRNA, and accessory factors. The complex scans
108
+ 5' to 3' until it encounters the AUG start codon in a favorable
109
+ Kozak context. The 60S subunit then joins to form the 80S ribosome.
110
+
111
+ Elongation proceeds by aminoacyl-tRNA accommodation at the A-site,
112
+ peptide bond formation catalyzed by the peptidyl transferase center
113
+ of the 23S rRNA, and translocation driven by EF-G and GTP hydrolysis.
114
+ Each elongation cycle advances the ribosome by exactly one codon,
115
+ consuming one GTP equivalent and incorporating one amino acid.
116
+
117
+ Cell signaling cascades amplify extracellular signals through
118
+ phosphorylation networks. The MAPK/ERK pathway converts growth
119
+ factor receptor activation into nuclear transcription factor
120
+ phosphorylation through RAF, MEK, and ERK kinases. Signal amplitude
121
+ and duration encode distinct transcriptional outcomes β€” transient
122
+ ERK activation drives proliferation while sustained activation
123
+ drives differentiation in PC12 cells.
124
+
125
+ CRISPR-Cas9 genome editing exploits the bacterial adaptive immunity
126
+ system in which Cas9 endonuclease is guided by a 20-nucleotide
127
+ spacer sequence in the sgRNA to cleave complementary genomic DNA.
128
+ The PAM sequence NGG immediately 3' of the target site is required
129
+ for Cas9 binding and R-loop formation. Double-strand breaks are
130
+ repaired by NHEJ (causing indels) or HDR (enabling precise edits).
131
+ """
132
+
133
+ QUERY = "How does the attention mechanism use keys and queries to compute weighted context representations in transformer models?"
134
+
135
+
136
+ def run_trial(
137
+ llm,
138
+ n_kv_heads: int,
139
+ head_dim: int,
140
+ spec: dict,
141
+ extractor,
142
+ doc_a: str,
143
+ doc_b: str,
144
+ query: str,
145
+ trial_id: int,
146
+ verbose: bool,
147
+ ) -> dict:
148
+ """Run a single EGR semantic proof trial."""
149
+ from kvcos.core.blob_parser import parse_state_blob
150
+ from kvcos.core.manifold_index import IndexEntry, ManifoldIndex
151
+
152
+ dim = extractor.output_dim(spec)
153
+ index = ManifoldIndex(dim=dim)
154
+
155
+ # ── Session A ─────────────────────────────────────────
156
+ llm.reset()
157
+ t0 = time.perf_counter()
158
+ llm(doc_a, max_tokens=1, temperature=0.0)
159
+ cold_ms = (time.perf_counter() - t0) * 1000
160
+ n_tok_a = llm.n_tokens
161
+
162
+ state_a = llm.save_state()
163
+ blob_a = bytes(state_a.llama_state)
164
+ blob_mb = len(blob_a) / 1024 / 1024
165
+
166
+ # Warm TTFT
167
+ llm.reset()
168
+ gc.collect()
169
+ t0 = time.perf_counter()
170
+ llm.load_state(state_a)
171
+ llm(" ", max_tokens=1, temperature=0.0)
172
+ warm_ms = (time.perf_counter() - t0) * 1000
173
+ speedup = cold_ms / warm_ms if warm_ms > 0 else float("inf")
174
+
175
+ # Parse + extract A
176
+ t0 = time.perf_counter()
177
+ parsed_a = parse_state_blob(blob_a, n_kv_heads=n_kv_heads, head_dim=head_dim)
178
+ parse_ms = (time.perf_counter() - t0) * 1000
179
+
180
+ t0 = time.perf_counter()
181
+ ext_a = extractor.extract(parsed_a.keys, spec)
182
+ extract_ms = (time.perf_counter() - t0) * 1000
183
+
184
+ entry_a = IndexEntry(
185
+ cache_id="session-a",
186
+ task_description="Transformer attention mechanisms",
187
+ model_id=spec["model_id"],
188
+ created_at=datetime.now(timezone.utc).isoformat(),
189
+ context_len=parsed_a.n_cells,
190
+ l2_norm=ext_a.l2_norm,
191
+ )
192
+ index.add(ext_a.state_vec, entry_a)
193
+
194
+ # ── Session B ─────────────────────────────────────────
195
+ llm.reset()
196
+ llm(doc_b, max_tokens=1, temperature=0.0)
197
+ n_tok_b = llm.n_tokens
198
+ state_b = llm.save_state()
199
+ blob_b = bytes(state_b.llama_state)
200
+ parsed_b = parse_state_blob(blob_b, n_kv_heads=n_kv_heads, head_dim=head_dim)
201
+ ext_b = extractor.extract(parsed_b.keys, spec)
202
+
203
+ entry_b = IndexEntry(
204
+ cache_id="session-b",
205
+ task_description="DNA replication and molecular biology",
206
+ model_id=spec["model_id"],
207
+ created_at=datetime.now(timezone.utc).isoformat(),
208
+ context_len=parsed_b.n_cells,
209
+ l2_norm=ext_b.l2_norm,
210
+ )
211
+ index.add(ext_b.state_vec, entry_b)
212
+
213
+ # ── Query ─────────────────────────────────────────────
214
+ llm.reset()
215
+ llm(query, max_tokens=1, temperature=0.0)
216
+ n_tok_q = llm.n_tokens
217
+ state_q = llm.save_state()
218
+ blob_q = bytes(state_q.llama_state)
219
+ parsed_q = parse_state_blob(blob_q, n_kv_heads=n_kv_heads, head_dim=head_dim)
220
+
221
+ t0 = time.perf_counter()
222
+ ext_q = extractor.extract(parsed_q.keys, spec)
223
+ t1 = time.perf_counter()
224
+ results = index.search(ext_q.state_vec, top_k=2)
225
+ t2 = time.perf_counter()
226
+
227
+ search_ms = (t2 - t1) * 1000
228
+ egr_total_ms = (t2 - t0) * 1000 + extract_ms # query extract + search + index extract
229
+
230
+ # Score extraction
231
+ score_a = next((r["similarity"] for r in results if "attention" in r["task_description"].lower() or "transformer" in r["task_description"].lower()), None)
232
+ score_b = next((r["similarity"] for r in results if "dna" in r["task_description"].lower() or "molecular" in r["task_description"].lower()), None)
233
+
234
+ if score_a is None or score_b is None:
235
+ # Fallback: use position
236
+ score_a = results[0]["similarity"] if results else 0
237
+ score_b = results[1]["similarity"] if len(results) > 1 else 0
238
+
239
+ margin = score_a - score_b
240
+ correct = len(results) > 0 and (
241
+ "attention" in results[0]["task_description"].lower()
242
+ or "transformer" in results[0]["task_description"].lower()
243
+ )
244
+
245
+ layer_range_used = list(extractor.layer_range) if extractor.layer_range else "spec_default"
246
+
247
+ trial = {
248
+ "trial_id": trial_id,
249
+ "n_cells_a": parsed_a.n_cells,
250
+ "n_cells_b": parsed_b.n_cells,
251
+ "n_cells_q": parsed_q.n_cells,
252
+ "score_a": round(score_a, 6),
253
+ "score_b": round(score_b, 6),
254
+ "margin": round(margin, 6),
255
+ "correct": correct,
256
+ "cold_ms": round(cold_ms, 1),
257
+ "warm_ms": round(warm_ms, 1),
258
+ "speedup": round(speedup, 1),
259
+ "parse_ms": round(parse_ms, 1),
260
+ "extract_ms": round(extract_ms, 1),
261
+ "search_ms": round(search_ms, 1),
262
+ "egr_total_ms": round(egr_total_ms, 1),
263
+ "blob_size_mb": round(blob_mb, 1),
264
+ "layer_range_used": layer_range_used,
265
+ "n_layers_used": extractor.layer_range[1] - extractor.layer_range[0] if extractor.layer_range else len(spec.get("extraction_layers", ())),
266
+ "svd_rank": extractor.rank,
267
+ "output_dim": dim,
268
+ }
269
+
270
+ if verbose:
271
+ print(f" Trial {trial_id}: margin={margin:.4f} correct={correct} "
272
+ f"cold={cold_ms:.0f}ms warm={warm_ms:.0f}ms "
273
+ f"egr={egr_total_ms:.1f}ms cells_a={parsed_a.n_cells}")
274
+
275
+ return trial
276
+
277
+
278
+ def main() -> int:
279
+ parser = argparse.ArgumentParser(description="ENGRAM EGR Semantic Proof")
280
+ parser.add_argument("--model", "-m", required=True, help="Path to GGUF model")
281
+ parser.add_argument("--ctx", type=int, default=16384, help="Context window")
282
+ parser.add_argument("--n-trials", type=int, default=3, help="Number of trials")
283
+ parser.add_argument("--layer-range", type=int, nargs=2, default=[8, 24], help="Layer range start end")
284
+ parser.add_argument("--gate-start", type=int, default=0, help="Skip top N singular values (0=none)")
285
+ parser.add_argument("--compression", default="FP16", help="Compression method: FP16, INT8, Q8_0")
286
+ parser.add_argument("--output", "-o", default="results/egr_semantic_proof.json", help="Output JSON path")
287
+ parser.add_argument("--verbose", "-v", action="store_true")
288
+ args = parser.parse_args()
289
+
290
+ from llama_cpp import Llama
291
+ import llama_cpp as lc
292
+
293
+ from kvcos.core.cache_spec import make_spec_from_metadata
294
+ from kvcos.core.types import StateExtractionMode
295
+ from kvcos.core.state_extractor import MARStateExtractor
296
+
297
+ layer_range = tuple(args.layer_range)
298
+
299
+ print(f"ENGRAM EGR Semantic Proof β€” {args.n_trials} trials")
300
+ print(f"Model: {args.model}")
301
+ print(f"Context: {args.ctx}, Layer range: {layer_range}")
302
+ print()
303
+
304
+ trials: list[dict] = []
305
+ for trial_id in range(args.n_trials):
306
+ print(f"Trial {trial_id + 1}/{args.n_trials}...")
307
+
308
+ llm = Llama(model_path=args.model, n_ctx=args.ctx, n_gpu_layers=-1, verbose=False)
309
+ meta = llm.metadata
310
+ n_layers = int(meta.get("llama.block_count", "32"))
311
+ n_heads = int(meta.get("llama.attention.head_count", "32"))
312
+ n_kv_heads = int(meta.get("llama.attention.head_count_kv", "8"))
313
+ head_dim = int(meta.get("llama.embedding_length", "4096")) // n_heads
314
+ model_name = meta.get("general.name", Path(args.model).stem)
315
+
316
+ spec = make_spec_from_metadata(
317
+ model_id=model_name, n_layers=n_layers, n_heads=n_heads,
318
+ n_kv_heads=n_kv_heads, head_dim=head_dim,
319
+ )
320
+
321
+ extractor = MARStateExtractor(
322
+ mode=StateExtractionMode.SVD_PROJECT,
323
+ rank=min(160, head_dim),
324
+ layer_range=layer_range,
325
+ gate_start=args.gate_start,
326
+ )
327
+
328
+ trial = run_trial(
329
+ llm=llm, n_kv_heads=n_kv_heads, head_dim=head_dim,
330
+ spec=spec, extractor=extractor,
331
+ doc_a=DOC_A.strip(), doc_b=DOC_B.strip(), query=QUERY.strip(),
332
+ trial_id=trial_id, verbose=args.verbose,
333
+ )
334
+ trials.append(trial)
335
+
336
+ del llm
337
+ gc.collect()
338
+
339
+ # ── Summary statistics ────────────────────────────────
340
+ margins = [t["margin"] for t in trials]
341
+ speedups = [t["speedup"] for t in trials]
342
+ egr_times = [t["egr_total_ms"] for t in trials]
343
+ n_correct = sum(1 for t in trials if t["correct"])
344
+
345
+ mean_margin = sum(margins) / len(margins)
346
+ std_margin = math.sqrt(sum((m - mean_margin) ** 2 for m in margins) / max(len(margins) - 1, 1)) if len(margins) > 1 else 0.0
347
+ mean_speedup = sum(speedups) / len(speedups)
348
+ std_speedup = math.sqrt(sum((s - mean_speedup) ** 2 for s in speedups) / max(len(speedups) - 1, 1)) if len(speedups) > 1 else 0.0
349
+ mean_egr = sum(egr_times) / len(egr_times)
350
+ std_egr = math.sqrt(sum((e - mean_egr) ** 2 for e in egr_times) / max(len(egr_times) - 1, 1)) if len(egr_times) > 1 else 0.0
351
+
352
+ passed = (
353
+ mean_margin > 0.05
354
+ and n_correct == args.n_trials
355
+ and mean_egr < 200
356
+ and mean_speedup > 10
357
+ )
358
+
359
+ summary = {
360
+ "mean_margin": round(mean_margin, 4),
361
+ "std_margin": round(std_margin, 4),
362
+ "mean_speedup": round(mean_speedup, 1),
363
+ "std_speedup": round(std_speedup, 1),
364
+ "mean_egr_ms": round(mean_egr, 1),
365
+ "std_egr_ms": round(std_egr, 1),
366
+ "n_correct": n_correct,
367
+ "n_trials": args.n_trials,
368
+ "min_margin": round(min(margins), 4),
369
+ "max_margin": round(max(margins), 4),
370
+ "pass": passed,
371
+ }
372
+
373
+ # ── Build output JSON ─────────────────────────────────
374
+ doc_a_tokens = trials[0]["n_cells_a"] if trials else 0
375
+ doc_b_tokens = trials[0]["n_cells_b"] if trials else 0
376
+ query_tokens = trials[0]["n_cells_q"] if trials else 0
377
+
378
+ output = {
379
+ "metadata": {
380
+ "model": model_name,
381
+ "ctx": args.ctx,
382
+ "layer_range": list(layer_range),
383
+ "n_trials": args.n_trials,
384
+ "timestamp": datetime.now(timezone.utc).isoformat(),
385
+ "platform": "Apple M3 / macOS",
386
+ "llama_cpp_version": lc.__version__,
387
+ },
388
+ "documents": {
389
+ "doc_a": {"description": "Transformer attention mechanisms (ML)", "n_tokens": doc_a_tokens},
390
+ "doc_b": {"description": "DNA replication and molecular biology", "n_tokens": doc_b_tokens},
391
+ "query": {"text": QUERY, "n_tokens": query_tokens},
392
+ },
393
+ "trials": trials,
394
+ "summary": summary,
395
+ }
396
+
397
+ # ── Write JSON ────────────────────────────────────────
398
+ output_path = Path(args.output)
399
+ output_path.parent.mkdir(parents=True, exist_ok=True)
400
+ output_path.write_text(json.dumps(output, indent=2))
401
+ print(f"\nResults written to {output_path}")
402
+
403
+ # ── Print summary ─────────────────────────────────────
404
+ print()
405
+ sep = "=" * 55
406
+ print(sep)
407
+ print("ENGRAM EGR Semantic Proof β€” Summary")
408
+ print(sep)
409
+ print(f"Model: {model_name}")
410
+ print(f"Context: {args.ctx}")
411
+ print(f"Layer range: {layer_range}")
412
+ print(f"Trials: {args.n_trials}")
413
+ print()
414
+ print(f"K→K margin: {mean_margin:.4f} ± {std_margin:.4f} (min={min(margins):.4f}, max={max(margins):.4f})")
415
+ print(f"Correct: {n_correct}/{args.n_trials}")
416
+ print(f"Speedup: {mean_speedup:.1f}x Β± {std_speedup:.1f}x")
417
+ print(f"EGR ms: {mean_egr:.1f}ms Β± {std_egr:.1f}ms")
418
+ print()
419
+ verdict = "PASS" if passed else "FAIL"
420
+ reasons = []
421
+ if mean_margin <= 0.05:
422
+ reasons.append(f"margin {mean_margin:.4f} <= 0.05")
423
+ if n_correct < args.n_trials:
424
+ reasons.append(f"correct {n_correct}/{args.n_trials}")
425
+ if mean_egr >= 200:
426
+ reasons.append(f"egr {mean_egr:.1f}ms >= 200ms")
427
+ if mean_speedup <= 10:
428
+ reasons.append(f"speedup {mean_speedup:.1f}x <= 10x")
429
+ reason_str = " | ".join(reasons) if reasons else "all criteria met"
430
+ print(f"Verdict: {verdict} ({reason_str})")
431
+ print(sep)
432
+
433
+ return 0 if passed else 1
434
+
435
+
436
+ if __name__ == "__main__":
437
+ sys.exit(main())
scripts/generate_alignment_dataset.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Generate alignment dataset: SVD state vectors for same docs on two models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import gc
7
+ import sys
8
+ import time
9
+ from pathlib import Path
10
+
11
+ import torch
12
+ from llama_cpp import Llama
13
+
14
+ from kvcos.core.blob_parser import parse_state_blob
15
+ from kvcos.core.cache_spec import make_spec_from_metadata
16
+ from kvcos.core.types import StateExtractionMode
17
+ from kvcos.core.state_extractor import MARStateExtractor
18
+
19
+ # 50 diverse documents: 5 per domain Γ— 10 domains
20
+ DOCUMENTS = [
21
+ # ML/AI (0-4)
22
+ "Gradient descent optimizes neural network parameters by computing partial derivatives of the loss function with respect to each weight and updating weights in the direction that reduces loss.",
23
+ "Convolutional neural networks apply learned filters across spatial dimensions of input images, producing feature maps that detect edges, textures, and higher-level visual patterns.",
24
+ "Recurrent neural networks process sequences by maintaining hidden state that carries information across time steps, enabling the model to capture temporal dependencies in data.",
25
+ "Batch normalization normalizes layer inputs during training by subtracting the mini-batch mean and dividing by the mini-batch standard deviation, accelerating convergence.",
26
+ "Dropout regularization randomly sets neuron activations to zero during training with probability p, preventing co-adaptation and reducing overfitting in deep networks.",
27
+ # Biology (5-9)
28
+ "Mitochondria generate ATP through oxidative phosphorylation, where electrons pass through complexes I through IV of the electron transport chain embedded in the inner membrane.",
29
+ "Photosynthesis in chloroplasts converts carbon dioxide and water into glucose using light energy captured by chlorophyll molecules in the thylakoid membrane.",
30
+ "The immune system distinguishes self from non-self through major histocompatibility complex proteins that present intracellular peptide fragments to T lymphocytes.",
31
+ "Synaptic transmission involves calcium-dependent exocytosis of neurotransmitter vesicles at the presynaptic terminal followed by receptor binding at the postsynaptic membrane.",
32
+ "Enzyme kinetics follow Michaelis-Menten dynamics where reaction velocity approaches Vmax asymptotically as substrate concentration increases relative to the Km constant.",
33
+ # History (10-14)
34
+ "The French Revolution of 1789 abolished feudal privileges and established principles of popular sovereignty that fundamentally altered European political structures.",
35
+ "The Silk Road connected Chinese Han dynasty merchants with Roman traders across Central Asia, facilitating exchange of silk, spices, and metallurgical techniques.",
36
+ "The Industrial Revolution began in eighteenth-century Britain with mechanized textile production, steam power, and factory organization transforming agrarian economies.",
37
+ "Ancient Egyptian civilization developed hieroglyphic writing, monumental architecture, and sophisticated irrigation systems along the Nile River floodplain.",
38
+ "The Renaissance in fifteenth-century Florence produced breakthroughs in perspective painting, humanist philosophy, and anatomical studies by artists like Leonardo.",
39
+ # Cooking (15-19)
40
+ "Maillard reactions between amino acids and reducing sugars at temperatures above 140 degrees Celsius produce the brown color and complex flavors of seared meat.",
41
+ "Emulsification in mayonnaise relies on lecithin from egg yolks to stabilize the dispersion of oil droplets in the aqueous vinegar and lemon juice phase.",
42
+ "Bread leavening occurs when Saccharomyces cerevisiae ferments sugars in dough, producing carbon dioxide gas that becomes trapped in the gluten network.",
43
+ "Caramelization of sucrose begins at 160 degrees Celsius as the disaccharide breaks down into glucose and fructose which then undergo further dehydration.",
44
+ "Brining meat in a salt solution denatures surface proteins and increases water retention through osmotic effects, producing juicier cooked results.",
45
+ # Mathematics (20-24)
46
+ "The fundamental theorem of calculus establishes that differentiation and integration are inverse operations, connecting the derivative of an integral to the original function.",
47
+ "Eigenvalues of a square matrix A satisfy the characteristic equation det(A - lambda I) = 0, with corresponding eigenvectors spanning invariant subspaces.",
48
+ "The central limit theorem states that the sampling distribution of the mean approaches a normal distribution as sample size increases regardless of population shape.",
49
+ "Group theory studies algebraic structures with a binary operation satisfying closure, associativity, identity, and invertibility axioms.",
50
+ "Fourier transforms decompose signals into constituent sinusoidal frequencies, enabling spectral analysis and convolution operations in the frequency domain.",
51
+ # Literature (25-29)
52
+ "Shakespeare's tragedies explore fatal character flaws: Hamlet's indecision, Macbeth's ambition, Othello's jealousy, and King Lear's prideful blindness.",
53
+ "Stream of consciousness narration in Joyce's Ulysses follows Leopold Bloom's interior monologue through Dublin in a single day paralleling Homer's Odyssey.",
54
+ "Magical realism in Garcia Marquez's fiction blends supernatural events with mundane Latin American reality, challenging Western rationalist literary conventions.",
55
+ "The bildungsroman genre traces protagonist maturation from youth to adulthood, exemplified by Dickens's Great Expectations and Bronte's Jane Eyre.",
56
+ "Haiku poetry constrains expression to seventeen syllables across three lines, using seasonal reference words to evoke natural imagery and transient emotion.",
57
+ # Economics (30-34)
58
+ "Supply and demand curves intersect at equilibrium price where quantity supplied equals quantity demanded, with shifts caused by external factors like income changes.",
59
+ "Monetary policy adjusts interest rates and money supply to influence inflation, employment, and economic growth through central bank open market operations.",
60
+ "Game theory models strategic interactions where each player's optimal decision depends on expectations about other players' choices and resulting payoff matrices.",
61
+ "Comparative advantage explains why countries benefit from trade even when one nation produces all goods more efficiently than its trading partner.",
62
+ "Behavioral economics incorporates psychological biases like loss aversion and anchoring into economic models, departing from purely rational agent assumptions.",
63
+ # Physics (35-39)
64
+ "Quantum entanglement creates correlations between particles such that measuring one instantaneously determines the state of the other regardless of separation distance.",
65
+ "General relativity describes gravity as spacetime curvature caused by mass-energy, predicting phenomena like gravitational time dilation and black hole event horizons.",
66
+ "Thermodynamic entropy measures disorder in a system, with the second law stating that total entropy of an isolated system can only increase over time.",
67
+ "Superconductivity occurs below critical temperature when electron pairs form Cooper pairs that flow without resistance through the crystal lattice.",
68
+ "The Heisenberg uncertainty principle establishes a fundamental limit on simultaneously knowing both position and momentum of a quantum particle.",
69
+ # Geography (40-44)
70
+ "Tectonic plate boundaries produce earthquakes at transform faults, volcanic activity at subduction zones, and new oceanic crust at mid-ocean spreading ridges.",
71
+ "The Amazon River basin contains the largest tropical rainforest ecosystem, supporting approximately ten percent of all known species on Earth.",
72
+ "Glacial erosion carved U-shaped valleys, cirques, and fjords during Pleistocene ice ages when ice sheets covered much of northern Europe and North America.",
73
+ "Mediterranean climate zones occur on western continental coasts between latitudes 30 and 45 degrees, characterized by dry summers and mild wet winters.",
74
+ "The Sahara Desert receives less than 25 millimeters of annual rainfall, with extreme diurnal temperature variation exceeding 30 degrees Celsius.",
75
+ # Programming (45-49)
76
+ "Hash tables provide average O(1) lookup time by mapping keys through a hash function to array indices, with collision resolution via chaining or open addressing.",
77
+ "Garbage collection in managed runtimes automatically reclaims memory by tracing reachable objects from root references and freeing unreachable allocations.",
78
+ "TCP ensures reliable data delivery through sequence numbers, acknowledgments, retransmission timers, and flow control using sliding window protocol.",
79
+ "Database normalization eliminates redundancy by decomposing relations into smaller tables satisfying normal forms while preserving functional dependencies.",
80
+ "Version control with git tracks content changes using a directed acyclic graph of commit objects, each containing a tree hash, parent references, and metadata.",
81
+ ]
82
+
83
+
84
+ def main() -> int:
85
+ parser = argparse.ArgumentParser(description="Generate cross-model alignment dataset")
86
+ parser.add_argument("--model-a", required=True, help="Path to model A GGUF")
87
+ parser.add_argument("--model-b", required=True, help="Path to model B GGUF")
88
+ parser.add_argument("--n-docs", type=int, default=50)
89
+ parser.add_argument("--layer-range-a", type=int, nargs=2, default=[8, 24])
90
+ parser.add_argument("--layer-range-b", type=int, nargs=2, default=[8, 24])
91
+ parser.add_argument("--output", "-o", required=True)
92
+ args = parser.parse_args()
93
+
94
+ docs = DOCUMENTS[: args.n_docs]
95
+
96
+ def extract_all(model_path: str, layer_range: tuple[int, int]) -> torch.Tensor:
97
+ llm = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=-1, verbose=False)
98
+ meta = llm.metadata
99
+ n_layers = int(meta.get("llama.block_count", "32"))
100
+ n_heads = int(meta.get("llama.attention.head_count", "32"))
101
+ n_kv_heads = int(meta.get("llama.attention.head_count_kv", "8"))
102
+ head_dim = int(meta.get("llama.embedding_length", "4096")) // n_heads
103
+ model_name = meta.get("general.name", Path(model_path).stem)
104
+
105
+ spec = make_spec_from_metadata(
106
+ model_id=model_name, n_layers=n_layers, n_heads=n_heads,
107
+ n_kv_heads=n_kv_heads, head_dim=head_dim,
108
+ )
109
+ ext = MARStateExtractor(
110
+ mode=StateExtractionMode.SVD_PROJECT,
111
+ rank=128, layer_range=layer_range, gate_start=6,
112
+ )
113
+
114
+ print(f"Extracting from {model_name} ({n_layers}L/{n_kv_heads}KV/{head_dim}D)...")
115
+ vecs = []
116
+ for i, doc in enumerate(docs):
117
+ llm.reset()
118
+ llm(doc.strip(), max_tokens=1, temperature=0.0)
119
+ s = llm.save_state()
120
+ p = parse_state_blob(bytes(s.llama_state), n_kv_heads=n_kv_heads, head_dim=head_dim)
121
+ r = ext.extract(p.keys, spec)
122
+ vecs.append(r.state_vec)
123
+ if (i + 1) % 10 == 0:
124
+ print(f" {i + 1}/{len(docs)}")
125
+
126
+ del llm
127
+ gc.collect()
128
+ return torch.stack(vecs)
129
+
130
+ vecs_a = extract_all(args.model_a, tuple(args.layer_range_a))
131
+ vecs_b = extract_all(args.model_b, tuple(args.layer_range_b))
132
+
133
+ output_path = Path(args.output)
134
+ output_path.parent.mkdir(parents=True, exist_ok=True)
135
+ torch.save({"vecs_a": vecs_a, "vecs_b": vecs_b, "n_docs": len(docs)}, str(output_path))
136
+ print(f"\nSaved: {output_path} ({vecs_a.shape[0]} docs, dim_a={vecs_a.shape[1]}, dim_b={vecs_b.shape[1]})")
137
+ return 0
138
+
139
+
140
+ if __name__ == "__main__":
141
+ sys.exit(main())
scripts/index_knowledge.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ scripts/index_knowledge.py β€” Batch index markdown files into .eng binaries.
4
+
5
+ Processes markdown files from a directory (or single file), chunks them,
6
+ fingerprints each chunk, and writes .eng files to the knowledge index.
7
+
8
+ Usage:
9
+ # Index a single file
10
+ python scripts/index_knowledge.py --source path/to/file.md --project engram
11
+
12
+ # Index a directory recursively
13
+ python scripts/index_knowledge.py --source path/to/docs/ --project engram
14
+
15
+ # Re-index changed files only (incremental)
16
+ python scripts/index_knowledge.py --source path/to/docs/ --project engram --incremental
17
+
18
+ # Dry run β€” show what would be indexed
19
+ python scripts/index_knowledge.py --source path/to/docs/ --project engram --dry-run
20
+
21
+ # Force re-index everything
22
+ python scripts/index_knowledge.py --source path/to/docs/ --project engram --force
23
+
24
+ Environment:
25
+ ENGRAM_SESSIONS_DIR Base sessions dir (default: ~/.engram/sessions)
26
+ ENGRAM_KNOWLEDGE_DIR Knowledge index dir (default: ~/.engram/knowledge)
27
+ ENGRAM_MODEL_PATH Path to GGUF model for real fingerprints (optional)
28
+ PYTHONPATH=. Must include project root for kvcos imports
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import argparse
34
+ import hashlib
35
+ import json
36
+ import os
37
+ import sys
38
+ import time
39
+ from datetime import datetime, timezone
40
+ from pathlib import Path
41
+
42
+ # Ensure project root is importable
43
+ sys.path.insert(0, str(Path(__file__).parent.parent))
44
+
45
+ import torch
46
+
47
+ from kvcos.engram.chunker import Chunk, chunk_markdown, eng_filename, slug_from_path
48
+ from kvcos.engram.format import EigramEncoder
49
+ from kvcos.engram.manifest import ChunkRecord, Manifest, _content_hash, _file_hash
50
+
51
+
52
+ # ── Configuration ────────────────────────────────────────────────────
53
+
54
+ KNOWLEDGE_DIR = Path(
55
+ os.environ.get("ENGRAM_KNOWLEDGE_DIR", "~/.engram/knowledge")
56
+ ).expanduser()
57
+
58
+ SKIP_PATTERNS = {
59
+ "node_modules",
60
+ ".venv",
61
+ "__pycache__",
62
+ ".git",
63
+ ".eng",
64
+ "site-packages",
65
+ }
66
+
67
+ SKIP_FILES = {
68
+ "LICENSE.md",
69
+ "CHANGELOG.md",
70
+ "SECURITY.md",
71
+ }
72
+
73
+
74
+ # ── Fingerprinting ──────────────────────────────────────────────────
75
+
76
+ from kvcos.engram.embedder import get_fingerprint as _get_fingerprint
77
+
78
+
79
+ # ── .eng Writer ──────────────────────────────────────────────────────
80
+
81
+ _encoder = EigramEncoder()
82
+
83
+
84
+ def _write_knowledge_eng(
85
+ fp_tensor: torch.Tensor,
86
+ chunk: Chunk,
87
+ eng_path: Path,
88
+ session_id: str,
89
+ fp_source: str,
90
+ source_path: str,
91
+ project: str,
92
+ chunk_index: int,
93
+ chunk_total: int,
94
+ ) -> Path:
95
+ """Write a .eng binary for a knowledge chunk."""
96
+ dim = fp_tensor.shape[0]
97
+ basis_rank = 116
98
+ vec_perdoc = torch.zeros(basis_rank)
99
+ vec_fcdb = torch.zeros(basis_rank)
100
+ joint_center = torch.zeros(128)
101
+
102
+ # Truncate description to 256 chars for binary
103
+ description = chunk.text[:256]
104
+
105
+ blob = _encoder.encode(
106
+ vec_perdoc=vec_perdoc,
107
+ vec_fcdb=vec_fcdb,
108
+ joint_center=joint_center,
109
+ corpus_hash=hashlib.sha256(source_path.encode()).hexdigest()[:32],
110
+ model_id=fp_source[:16],
111
+ basis_rank=basis_rank,
112
+ n_corpus=0,
113
+ layer_range=(0, 0),
114
+ context_len=len(chunk.text),
115
+ l2_norm=float(torch.norm(fp_tensor).item()),
116
+ scs=0.0,
117
+ margin_proof=0.0,
118
+ task_description=description,
119
+ cache_id=session_id,
120
+ vec_fourier=fp_tensor if dim == 2048 else None,
121
+ vec_fourier_v2=fp_tensor,
122
+ confusion_flag=False,
123
+ )
124
+
125
+ eng_path.parent.mkdir(parents=True, exist_ok=True)
126
+ with open(eng_path, "wb") as f:
127
+ f.write(blob)
128
+
129
+ # Write extended sidecar with full metadata
130
+ meta = {
131
+ "cache_id": session_id,
132
+ "task_description": chunk.text[:500],
133
+ "source_path": source_path,
134
+ "project": project,
135
+ "fp_source": fp_source,
136
+ "chunk_index": chunk_index,
137
+ "chunk_total": chunk_total,
138
+ "char_start": chunk.char_start,
139
+ "char_end": chunk.char_end,
140
+ "headers": list(chunk.headers),
141
+ "ts": time.time(),
142
+ "type": "knowledge",
143
+ }
144
+ meta_path = Path(str(eng_path) + ".meta.json")
145
+ with open(meta_path, "w") as f:
146
+ json.dump(meta, f, indent=2)
147
+
148
+ return eng_path
149
+
150
+
151
+ # ── Discovery ────────────────────────────────────────────────────────
152
+
153
+ def discover_markdown_files(source: Path) -> list[Path]:
154
+ """Find all indexable .md files under source path."""
155
+ if source.is_file():
156
+ return [source] if source.suffix == ".md" else []
157
+
158
+ files: list[Path] = []
159
+ for p in sorted(source.rglob("*.md")):
160
+ # Skip files in excluded directories
161
+ if any(skip in p.parts for skip in SKIP_PATTERNS):
162
+ continue
163
+ # Skip excluded filenames
164
+ if p.name in SKIP_FILES:
165
+ continue
166
+ # Skip empty files
167
+ if p.stat().st_size == 0:
168
+ continue
169
+ files.append(p)
170
+
171
+ return files
172
+
173
+
174
+ # ── Main Pipeline ────────────────────────────────────────────────────
175
+
176
+ def index_file(
177
+ source_path: Path,
178
+ project: str,
179
+ manifest: Manifest,
180
+ date_str: str,
181
+ dry_run: bool = False,
182
+ force: bool = False,
183
+ ) -> tuple[Manifest, int]:
184
+ """
185
+ Index a single markdown file into .eng chunks.
186
+
187
+ Returns:
188
+ (updated_manifest, chunks_written)
189
+ """
190
+ content = source_path.read_text(encoding="utf-8", errors="replace")
191
+ content_hash = _content_hash(content)
192
+
193
+ # Incremental: skip if unchanged
194
+ if not force and not manifest.needs_reindex(str(source_path), content_hash):
195
+ return manifest, 0
196
+
197
+ slug = slug_from_path(str(source_path))
198
+ context = f"Source: {source_path.name} | Project: {project}"
199
+
200
+ # Chunk the content
201
+ chunks = chunk_markdown(
202
+ content,
203
+ max_chars=2000,
204
+ min_chars=100,
205
+ context_prefix=context,
206
+ )
207
+
208
+ if dry_run:
209
+ print(f" [DRY RUN] {source_path.name}: {len(chunks)} chunks, "
210
+ f"{len(content)} chars")
211
+ return manifest, len(chunks)
212
+
213
+ # Write .eng for each chunk
214
+ chunk_records: list[ChunkRecord] = []
215
+ project_dir = KNOWLEDGE_DIR / project
216
+ project_dir.mkdir(parents=True, exist_ok=True)
217
+
218
+ for chunk in chunks:
219
+ filename = eng_filename(
220
+ project=project,
221
+ slug=slug,
222
+ date=date_str,
223
+ chunk_index=chunk.index,
224
+ chunk_total=len(chunks),
225
+ )
226
+ eng_path = project_dir / filename
227
+
228
+ # Fingerprint the chunk text (with context)
229
+ fp_tensor, fp_source = _get_fingerprint(chunk.text)
230
+
231
+ session_id = f"{project}/{slug}"
232
+ if len(chunks) > 1:
233
+ session_id += f"_c{chunk.index + 1:03d}"
234
+
235
+ _write_knowledge_eng(
236
+ fp_tensor=fp_tensor,
237
+ chunk=chunk,
238
+ eng_path=eng_path,
239
+ session_id=session_id,
240
+ fp_source=fp_source,
241
+ source_path=str(source_path),
242
+ project=project,
243
+ chunk_index=chunk.index,
244
+ chunk_total=len(chunks),
245
+ )
246
+
247
+ chunk_records.append(ChunkRecord(
248
+ eng_path=str(eng_path),
249
+ chunk_index=chunk.index,
250
+ chunk_total=len(chunks),
251
+ char_start=chunk.char_start,
252
+ char_end=chunk.char_end,
253
+ indexed_at=time.time(),
254
+ ))
255
+
256
+ # Register in manifest
257
+ manifest = manifest.register(
258
+ source_path=str(source_path),
259
+ content_hash=content_hash,
260
+ project=project,
261
+ file_size=len(content.encode("utf-8")),
262
+ chunks=chunk_records,
263
+ )
264
+
265
+ return manifest, len(chunks)
266
+
267
+
268
+ def index_batch(
269
+ source: Path,
270
+ project: str,
271
+ incremental: bool = True,
272
+ dry_run: bool = False,
273
+ force: bool = False,
274
+ ) -> dict:
275
+ """
276
+ Index all markdown files under source path.
277
+
278
+ Returns summary dict with stats.
279
+ """
280
+ manifest = Manifest.load()
281
+ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
282
+
283
+ files = discover_markdown_files(source)
284
+ if not files:
285
+ return {"error": f"No .md files found under {source}"}
286
+
287
+ stats = {
288
+ "source": str(source),
289
+ "project": project,
290
+ "files_found": len(files),
291
+ "files_indexed": 0,
292
+ "files_skipped": 0,
293
+ "chunks_written": 0,
294
+ "dry_run": dry_run,
295
+ "incremental": incremental,
296
+ "date": date_str,
297
+ }
298
+
299
+ print(f"\nENGRAM Knowledge Indexer")
300
+ print(f"{'=' * 50}")
301
+ print(f"Source: {source}")
302
+ print(f"Project: {project}")
303
+ print(f"Files found: {len(files)}")
304
+ print(f"Mode: {'DRY RUN' if dry_run else 'LIVE'}")
305
+ print(f"{'=' * 50}\n")
306
+
307
+ for i, fpath in enumerate(files, 1):
308
+ prev_chunks = manifest.total_chunks
309
+ manifest, n_chunks = index_file(
310
+ source_path=fpath,
311
+ project=project,
312
+ manifest=manifest,
313
+ date_str=date_str,
314
+ dry_run=dry_run,
315
+ force=force,
316
+ )
317
+
318
+ if n_chunks > 0:
319
+ stats["files_indexed"] += 1
320
+ stats["chunks_written"] += n_chunks
321
+ status = "INDEXED" if not dry_run else "DRY RUN"
322
+ print(f" [{i}/{len(files)}] {status}: {fpath.name} "
323
+ f"β†’ {n_chunks} chunks")
324
+ else:
325
+ stats["files_skipped"] += 1
326
+ print(f" [{i}/{len(files)}] SKIP (unchanged): {fpath.name}")
327
+
328
+ print(f"\n{'=' * 50}")
329
+ print(f"Done. {stats['files_indexed']} files β†’ "
330
+ f"{stats['chunks_written']} chunks")
331
+ if stats["files_skipped"]:
332
+ print(f"Skipped {stats['files_skipped']} unchanged files")
333
+ print(f"Manifest: {manifest.summary()}")
334
+ print(f"{'=' * 50}\n")
335
+
336
+ return stats
337
+
338
+
339
+ # ── CLI ──────────────────────────────────────────────────────────────
340
+
341
+ def main() -> None:
342
+ parser = argparse.ArgumentParser(
343
+ description="Index markdown files into ENGRAM .eng knowledge files"
344
+ )
345
+ parser.add_argument(
346
+ "--source", "-s",
347
+ required=True,
348
+ help="Path to file or directory to index",
349
+ )
350
+ parser.add_argument(
351
+ "--project", "-p",
352
+ default="engram",
353
+ help="Project namespace (default: engram)",
354
+ )
355
+ parser.add_argument(
356
+ "--dry-run", "-n",
357
+ action="store_true",
358
+ help="Show what would be indexed without writing",
359
+ )
360
+ parser.add_argument(
361
+ "--force", "-f",
362
+ action="store_true",
363
+ help="Re-index all files regardless of content hash",
364
+ )
365
+ parser.add_argument(
366
+ "--incremental", "-i",
367
+ action="store_true",
368
+ default=True,
369
+ help="Skip unchanged files (default: true)",
370
+ )
371
+
372
+ args = parser.parse_args()
373
+ source = Path(args.source).resolve()
374
+
375
+ if not source.exists():
376
+ print(f"Error: {source} does not exist", file=sys.stderr)
377
+ sys.exit(1)
378
+
379
+ stats = index_batch(
380
+ source=source,
381
+ project=args.project,
382
+ incremental=args.incremental,
383
+ dry_run=args.dry_run,
384
+ force=args.force,
385
+ )
386
+
387
+ if "error" in stats:
388
+ print(f"Error: {stats['error']}", file=sys.stderr)
389
+ sys.exit(1)
390
+
391
+
392
+ if __name__ == "__main__":
393
+ main()
scripts/paper_figures.py ADDED
@@ -0,0 +1,1084 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """ENGRAM Research Paper β€” Figure Generation.
3
+
4
+ Generates all 15 figures for the ENGRAM paper from results/ data files.
5
+ Output: results/figures/*.pdf (LaTeX-compatible, 300 DPI)
6
+
7
+ Usage:
8
+ cd ENGRAM && python scripts/paper_figures.py
9
+ python scripts/paper_figures.py --only fig02 # Single figure
10
+ python scripts/paper_figures.py --list # List all figures
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import json
17
+ import sys
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ import matplotlib
22
+ matplotlib.use("Agg") # Non-interactive backend
23
+ import matplotlib.pyplot as plt
24
+ import matplotlib.ticker as ticker
25
+ import numpy as np
26
+
27
+ # ── Configuration ────────────────────────────────────────────────────────
28
+
29
+ RESULTS_DIR = Path(__file__).parent.parent / "results"
30
+ FIGURES_DIR = RESULTS_DIR / "figures"
31
+ ABSOLUTE_DIR = RESULTS_DIR / "absolute"
32
+ STRESS_DIR = RESULTS_DIR / "stress"
33
+
34
+ # LaTeX-compatible style
35
+ plt.rcParams.update({
36
+ "font.family": "serif",
37
+ "font.size": 11,
38
+ "axes.labelsize": 12,
39
+ "axes.titlesize": 13,
40
+ "xtick.labelsize": 10,
41
+ "ytick.labelsize": 10,
42
+ "legend.fontsize": 10,
43
+ "figure.dpi": 300,
44
+ "savefig.dpi": 300,
45
+ "savefig.bbox": "tight",
46
+ "savefig.pad_inches": 0.1,
47
+ "axes.grid": True,
48
+ "grid.alpha": 0.3,
49
+ "axes.spines.top": False,
50
+ "axes.spines.right": False,
51
+ })
52
+
53
+ # Colorblind-safe palette
54
+ COLORS = {
55
+ "blue": "#4477AA",
56
+ "orange": "#EE6677",
57
+ "green": "#228833",
58
+ "purple": "#AA3377",
59
+ "cyan": "#66CCEE",
60
+ "grey": "#BBBBBB",
61
+ "red": "#CC3311",
62
+ "teal": "#009988",
63
+ "yellow": "#CCBB44",
64
+ "indigo": "#332288",
65
+ }
66
+
67
+ PASS_COLOR = COLORS["green"]
68
+ FAIL_COLOR = COLORS["red"]
69
+
70
+
71
+ # ── Data Loading ─────────────────────────────────────────────────────────
72
+
73
+ def load_json(path: Path) -> dict[str, Any]:
74
+ """Load JSON file and return parsed dict."""
75
+ return json.loads(path.read_text())
76
+
77
+
78
+ def save_figure(fig: plt.Figure, name: str) -> None:
79
+ """Save figure as PDF and PNG."""
80
+ FIGURES_DIR.mkdir(parents=True, exist_ok=True)
81
+ fig.savefig(FIGURES_DIR / f"{name}.pdf", format="pdf")
82
+ fig.savefig(FIGURES_DIR / f"{name}.png", format="png")
83
+ plt.close(fig)
84
+ print(f" Saved: {name}.pdf + .png")
85
+
86
+
87
+ # ── Figure 2: Frequency Combination Comparison ──────────────────────────
88
+
89
+ def fig02_frequency_comparison() -> None:
90
+ """Bar chart: 6 frequency combos Γ— recall and margin."""
91
+ print("Fig 02: Frequency combination comparison...")
92
+ data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json")
93
+ results = data["results"]
94
+
95
+ combos = list(results.keys())
96
+ recalls = [results[c]["recall"] * 100 for c in combos]
97
+ margins = [results[c]["margin_mean"] * 1000 for c in combos] # Γ—1000
98
+ failures = [results[c]["n_failures"] for c in combos]
99
+
100
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5))
101
+
102
+ # Left: Recall
103
+ x = np.arange(len(combos))
104
+ bar_colors = [COLORS["green"] if c == "f0+f1" else COLORS["blue"] for c in combos]
105
+ bars = ax1.bar(x, recalls, color=bar_colors, edgecolor="white", linewidth=0.5)
106
+ ax1.set_xticks(x)
107
+ ax1.set_xticklabels(combos, rotation=30, ha="right")
108
+ ax1.set_ylabel("Recall@1 (%)")
109
+ ax1.set_title("(a) Recall by Frequency Combination")
110
+ ax1.set_ylim(60, 102)
111
+ for bar, val, nf in zip(bars, recalls, failures):
112
+ ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5,
113
+ f"{val:.0f}%\n({nf} fail)", ha="center", va="bottom", fontsize=8)
114
+
115
+ # Right: Mean margin
116
+ bars2 = ax2.bar(x, margins, color=bar_colors, edgecolor="white", linewidth=0.5)
117
+ ax2.set_xticks(x)
118
+ ax2.set_xticklabels(combos, rotation=30, ha="right")
119
+ ax2.set_ylabel("Mean Margin (Γ—10Β³)")
120
+ ax2.set_title("(b) Mean Discrimination Margin")
121
+ for bar, val in zip(bars2, margins):
122
+ ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05,
123
+ f"{val:.1f}", ha="center", va="bottom", fontsize=8)
124
+
125
+ fig.suptitle("Multi-Frequency Fingerprint Ablation (N=200)", fontsize=14, y=1.02)
126
+ fig.tight_layout()
127
+ save_figure(fig, "fig02_frequency_comparison")
128
+
129
+
130
+ # ── Figure 3: Margin Power Law ──────────────────────────────────────────
131
+
132
+ def fig03_margin_power_law() -> None:
133
+ """Log-log plot: margin vs N for f1 and f0+f1 with fitted power laws."""
134
+ print("Fig 03: Margin power law...")
135
+ f1_data = load_json(ABSOLUTE_DIR / "margin_compression_law.json")
136
+ f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json")
137
+
138
+ # f1 data
139
+ f1_n = [int(n) for n in f1_data["results"].keys()]
140
+ f1_margins = [f1_data["results"][str(n)]["mean_margin"] for n in f1_n]
141
+ f1_alpha = f1_data["alpha"]
142
+ f1_A = f1_data["A"]
143
+
144
+ # f0+f1 data
145
+ f0f1_n = [int(n) for n in f0f1_data["results"].keys()]
146
+ f0f1_margins = [f0f1_data["results"][str(n)]["mean_margin"] for n in f0f1_n]
147
+ f0f1_alpha = f0f1_data["alpha"]
148
+ f0f1_A = f0f1_data["A"]
149
+
150
+ fig, ax = plt.subplots(figsize=(7, 5))
151
+
152
+ # Data points
153
+ ax.scatter(f1_n, f1_margins, color=COLORS["orange"], s=60, zorder=5, label="f1 (data)")
154
+ ax.scatter(f0f1_n, f0f1_margins, color=COLORS["blue"], s=60, zorder=5, label="f0+f1 (data)")
155
+
156
+ # Fitted curves
157
+ n_fit = np.linspace(3, 250, 200)
158
+ f1_fit = f1_A * n_fit ** f1_alpha
159
+ f0f1_fit = f0f1_A * n_fit ** f0f1_alpha
160
+
161
+ ax.plot(n_fit, f1_fit, color=COLORS["orange"], linestyle="--", alpha=0.7,
162
+ label=f"f1 fit: {f1_A:.4f}Β·N^{{{f1_alpha:.3f}}}")
163
+ ax.plot(n_fit, f0f1_fit, color=COLORS["blue"], linestyle="--", alpha=0.7,
164
+ label=f"f0+f1 fit: {f0f1_A:.4f}Β·N^{{{f0f1_alpha:.3f}}}")
165
+
166
+ ax.set_xscale("log")
167
+ ax.set_yscale("log")
168
+ ax.set_xlabel("Corpus Size N")
169
+ ax.set_ylabel("Mean Discrimination Margin")
170
+ ax.set_title("Margin Power Law: Graceful Degradation")
171
+ ax.legend(loc="upper right")
172
+ ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
173
+ ax.set_xticks([5, 10, 20, 50, 100, 200])
174
+
175
+ # Annotation
176
+ ax.annotate(
177
+ f"f0+f1: Ξ±={f0f1_alpha:.3f} (shallower)\nf1: Ξ±={f1_alpha:.3f}",
178
+ xy=(100, f0f1_A * 100 ** f0f1_alpha), xytext=(30, 0.003),
179
+ arrowprops={"arrowstyle": "->", "color": COLORS["grey"]},
180
+ fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "wheat", "alpha": 0.5}
181
+ )
182
+
183
+ fig.tight_layout()
184
+ save_figure(fig, "fig03_margin_power_law")
185
+
186
+
187
+ # ── Figure 4: Recall vs N β€” Fourier vs FCDB ─────────────────────────────
188
+
189
+ def fig04_recall_vs_n() -> None:
190
+ """Fourier f0+f1 recall vs FCDB recall across corpus sizes."""
191
+ print("Fig 04: Recall vs N (Fourier vs FCDB)...")
192
+ f0f1_data = load_json(ABSOLUTE_DIR / "multifreq_law.json")
193
+ stress_data = load_json(STRESS_DIR / "STRESS_SUMMARY.json")
194
+
195
+ # Fourier f0+f1
196
+ fourier_n = [int(n) for n in f0f1_data["results"].keys()]
197
+ fourier_recall = [f0f1_data["results"][str(n)]["recall"] * 100 for n in fourier_n]
198
+
199
+ # FCDB cross-model
200
+ fcdb_map = stress_data["recall_at_1_vs_n_fcdb"]
201
+ fcdb_n = [int(n) for n in fcdb_map.keys()]
202
+ fcdb_recall = [v * 100 for v in fcdb_map.values()]
203
+
204
+ fig, ax = plt.subplots(figsize=(7, 5))
205
+
206
+ ax.plot(fourier_n, fourier_recall, "o-", color=COLORS["blue"], linewidth=2,
207
+ markersize=7, label="Fourier f0+f1 (same-model)", zorder=5)
208
+ ax.plot(fcdb_n, fcdb_recall, "s--", color=COLORS["orange"], linewidth=2,
209
+ markersize=7, label="FCDB (cross-model)", zorder=5)
210
+
211
+ # Collapse annotation
212
+ ax.axvline(x=100, color=COLORS["red"], linestyle=":", alpha=0.5)
213
+ ax.annotate("FCDB collapse\n(N=100)", xy=(100, 30), xytext=(140, 50),
214
+ arrowprops={"arrowstyle": "->", "color": COLORS["red"]},
215
+ fontsize=9, color=COLORS["red"])
216
+
217
+ ax.set_xlabel("Corpus Size N")
218
+ ax.set_ylabel("Recall@1 (%)")
219
+ ax.set_title("Retrieval Recall vs Corpus Size")
220
+ ax.legend(loc="lower left")
221
+ ax.set_ylim(-5, 105)
222
+ ax.set_xlim(0, 210)
223
+
224
+ fig.tight_layout()
225
+ save_figure(fig, "fig04_recall_vs_n")
226
+
227
+
228
+ # ── Figure 5: Cross-Model Strategy Comparison ───────────────────────────
229
+
230
+ def fig05_cross_model_strategies() -> None:
231
+ """Horizontal bar chart: 9 cross-model methods Γ— margin."""
232
+ print("Fig 05: Cross-model strategy comparison...")
233
+
234
+ strategies = [
235
+ ("CCA", -0.420, False),
236
+ ("Residual FCB", -0.382, False),
237
+ ("Procrustes", -0.104, False),
238
+ ("RR (K=20)", -0.066, False),
239
+ ("FCB+ridge", -0.017, False),
240
+ ("Contrastive", 0.001, True),
241
+ ("JCB", 0.011, True),
242
+ ("JCB+delta", 0.037, True),
243
+ ("FCDB", 0.124, True),
244
+ ]
245
+
246
+ names = [s[0] for s in strategies]
247
+ margins = [s[1] for s in strategies]
248
+ colors = [PASS_COLOR if s[2] else FAIL_COLOR for s in strategies]
249
+
250
+ fig, ax = plt.subplots(figsize=(8, 5))
251
+ y_pos = np.arange(len(names))
252
+
253
+ bars = ax.barh(y_pos, margins, color=colors, edgecolor="white", linewidth=0.5, height=0.7)
254
+ ax.set_yticks(y_pos)
255
+ ax.set_yticklabels(names)
256
+ ax.set_xlabel("Retrieval Margin")
257
+ ax.set_title("Cross-Model Transfer Strategies (Llama 3B β†’ 8B)")
258
+ ax.axvline(x=0, color="black", linewidth=0.8)
259
+
260
+ # Value labels
261
+ for bar, val in zip(bars, margins):
262
+ x_offset = 0.005 if val >= 0 else -0.005
263
+ ha = "left" if val >= 0 else "right"
264
+ ax.text(val + x_offset, bar.get_y() + bar.get_height() / 2,
265
+ f"{val:+.3f}", ha=ha, va="center", fontsize=9, fontweight="bold")
266
+
267
+ # Legend
268
+ from matplotlib.patches import Patch
269
+ legend_elements = [Patch(facecolor=PASS_COLOR, label="PASS (margin > 0)"),
270
+ Patch(facecolor=FAIL_COLOR, label="FAIL (margin ≀ 0)")]
271
+ ax.legend(handles=legend_elements, loc="lower right")
272
+
273
+ fig.tight_layout()
274
+ save_figure(fig, "fig05_cross_model_strategies")
275
+
276
+
277
+ # ── Figure 6: CKA Layer Similarity ──────────────────────────────────────
278
+
279
+ def fig06_cka_layers() -> None:
280
+ """CKA similarity per layer: within-family vs cross-family."""
281
+ print("Fig 06: CKA layer similarity...")
282
+ within = load_json(ABSOLUTE_DIR / "FAMILY_CKA.json")
283
+ cross = load_json(ABSOLUTE_DIR / "FAMILY_CKA_CROSS.json")
284
+
285
+ within_cka = within["layer_ckas"]
286
+ cross_cka = cross["layer_ckas"]
287
+ layers = list(range(len(within_cka)))
288
+
289
+ fig, ax = plt.subplots(figsize=(8, 4.5))
290
+
291
+ ax.plot(layers, within_cka, "o-", color=COLORS["blue"], markersize=5, linewidth=1.5,
292
+ label=f"Within-family (Llama 3B↔8B), ΞΌ={within['mean_cka']:.3f}")
293
+ ax.plot(layers, cross_cka, "s--", color=COLORS["orange"], markersize=5, linewidth=1.5,
294
+ label=f"Cross-family (Llama↔Qwen), ΞΌ={cross['mean_cka']:.3f}")
295
+
296
+ ax.axhline(y=0.95, color=COLORS["grey"], linestyle=":", alpha=0.5, label="0.95 threshold")
297
+ ax.set_xlabel("Layer Index")
298
+ ax.set_ylabel("CKA Similarity")
299
+ ax.set_title("Centered Kernel Alignment Across Layers")
300
+ ax.legend(loc="lower left", fontsize=9)
301
+ ax.set_ylim(0.85, 1.0)
302
+
303
+ # Annotate min
304
+ min_idx_w = int(np.argmin(within_cka))
305
+ min_idx_c = int(np.argmin(cross_cka))
306
+ ax.annotate(f"min={within_cka[min_idx_w]:.3f}", xy=(min_idx_w, within_cka[min_idx_w]),
307
+ xytext=(min_idx_w + 2, within_cka[min_idx_w] - 0.01),
308
+ fontsize=8, color=COLORS["blue"])
309
+ ax.annotate(f"min={cross_cka[min_idx_c]:.3f}", xy=(min_idx_c, cross_cka[min_idx_c]),
310
+ xytext=(min_idx_c + 2, cross_cka[min_idx_c] - 0.01),
311
+ fontsize=8, color=COLORS["orange"])
312
+
313
+ fig.tight_layout()
314
+ save_figure(fig, "fig06_cka_layers")
315
+
316
+
317
+ # ── Figure 7: Domain Confusion Before/After ──────────────────────────────
318
+
319
+ def fig07_confusion_matrix() -> None:
320
+ """Heatmaps: f1 confusion vs f0+f1 confusion across domains."""
321
+ print("Fig 07: Domain confusion matrix...")
322
+ data = load_json(ABSOLUTE_DIR / "confusion_analysis.json")
323
+
324
+ domains = sorted({
325
+ k.split(" -> ")[0] for k in data["f1_confusion"].keys()
326
+ } | {
327
+ k.split(" -> ")[1] for k in data["f1_confusion"].keys()
328
+ })
329
+
330
+ def build_matrix(confusion_dict: dict[str, int]) -> np.ndarray:
331
+ n = len(domains)
332
+ mat = np.zeros((n, n))
333
+ for key, count in confusion_dict.items():
334
+ src, dst = key.split(" -> ")
335
+ if src in domains and dst in domains:
336
+ i = domains.index(src)
337
+ j = domains.index(dst)
338
+ mat[i, j] = count
339
+ return mat
340
+
341
+ f1_mat = build_matrix(data["f1_confusion"])
342
+ best_mat = build_matrix(data["best_confusion"])
343
+
344
+ # Short domain labels
345
+ short_labels = [d[:6] for d in domains]
346
+
347
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
348
+
349
+ im1 = ax1.imshow(f1_mat, cmap="Reds", aspect="auto", interpolation="nearest")
350
+ ax1.set_xticks(range(len(domains)))
351
+ ax1.set_yticks(range(len(domains)))
352
+ ax1.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8)
353
+ ax1.set_yticklabels(short_labels, fontsize=8)
354
+ ax1.set_title("(a) f1 Only β€” 28 Failures")
355
+ ax1.set_xlabel("Confused With")
356
+ ax1.set_ylabel("True Domain")
357
+ fig.colorbar(im1, ax=ax1, shrink=0.8)
358
+
359
+ im2 = ax2.imshow(best_mat, cmap="Blues", aspect="auto", interpolation="nearest")
360
+ ax2.set_xticks(range(len(domains)))
361
+ ax2.set_yticks(range(len(domains)))
362
+ ax2.set_xticklabels(short_labels, rotation=45, ha="right", fontsize=8)
363
+ ax2.set_yticklabels(short_labels, fontsize=8)
364
+ ax2.set_title("(b) f0+f1 β€” 4 Failures")
365
+ ax2.set_xlabel("Confused With")
366
+ ax2.set_ylabel("True Domain")
367
+ fig.colorbar(im2, ax=ax2, shrink=0.8)
368
+
369
+ fig.suptitle("Domain Confusion Analysis (N=200)", fontsize=14, y=1.02)
370
+ fig.tight_layout()
371
+ save_figure(fig, "fig07_confusion_matrix")
372
+
373
+
374
+ # ── Figure 8: Domain Recall Radar ────────────────────────────────────────
375
+
376
+ def fig08_domain_recall_radar() -> None:
377
+ """Radar chart: per-domain recall with f0+f1."""
378
+ print("Fig 08: Domain recall radar...")
379
+ data = load_json(ABSOLUTE_DIR / "confusion_analysis.json")
380
+ domain_recall = data["domain_recall"]
381
+
382
+ categories = list(domain_recall.keys())
383
+ values = [domain_recall[c] * 100 for c in categories]
384
+
385
+ # Close the polygon
386
+ values_closed = values + [values[0]]
387
+ n = len(categories)
388
+ angles = [i / n * 2 * np.pi for i in range(n)]
389
+ angles_closed = angles + [angles[0]]
390
+
391
+ fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"projection": "polar"})
392
+
393
+ ax.plot(angles_closed, values_closed, "o-", color=COLORS["blue"], linewidth=2, markersize=6)
394
+ ax.fill(angles_closed, values_closed, color=COLORS["blue"], alpha=0.15)
395
+
396
+ ax.set_xticks(angles)
397
+ ax.set_xticklabels([c.replace("_", "\n") for c in categories], fontsize=9)
398
+ ax.set_ylim(80, 102)
399
+ ax.set_yticks([85, 90, 95, 100])
400
+ ax.set_yticklabels(["85%", "90%", "95%", "100%"], fontsize=8)
401
+ ax.set_title("Per-Domain Recall@1 (f0+f1, N=200)", pad=20)
402
+
403
+ # Annotate minimum
404
+ min_idx = int(np.argmin(values))
405
+ ax.annotate(f"{values[min_idx]:.0f}%",
406
+ xy=(angles[min_idx], values[min_idx]),
407
+ xytext=(angles[min_idx] + 0.2, values[min_idx] - 3),
408
+ fontsize=9, fontweight="bold", color=COLORS["red"])
409
+
410
+ fig.tight_layout()
411
+ save_figure(fig, "fig08_domain_recall_radar")
412
+
413
+
414
+ # ── Figure 9: HNSW Benchmark ────────────────────────────────────────────
415
+
416
+ def fig09_hnsw_benchmark() -> None:
417
+ """Bar chart: HNSW vs brute-force latency."""
418
+ print("Fig 09: HNSW benchmark...")
419
+ data = load_json(ABSOLUTE_DIR / "HNSW_BENCH.json")
420
+
421
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
422
+
423
+ # Latency comparison
424
+ methods = ["Brute-Force", "HNSW"]
425
+ latencies = [data["bf_latency_us"], data["hnsw_latency_us"]]
426
+ colors = [COLORS["orange"], COLORS["blue"]]
427
+ bars = ax1.bar(methods, latencies, color=colors, edgecolor="white", width=0.5)
428
+ ax1.set_ylabel("Latency (ΞΌs)")
429
+ ax1.set_title(f"(a) Search Latency β€” {data['speedup']:.1f}Γ— Speedup")
430
+ for bar, val in zip(bars, latencies):
431
+ ax1.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 3,
432
+ f"{val:.1f} ΞΌs", ha="center", va="bottom", fontsize=10)
433
+
434
+ # Recall comparison
435
+ recalls = [data["bruteforce_recall"] * 100, data["hnsw_recall"] * 100]
436
+ bars2 = ax2.bar(methods, recalls, color=colors, edgecolor="white", width=0.5)
437
+ ax2.set_ylabel("Recall@1 (%)")
438
+ ax2.set_title("(b) Recall Preserved")
439
+ ax2.set_ylim(98, 100.5)
440
+ for bar, val in zip(bars2, recalls):
441
+ ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.05,
442
+ f"{val:.1f}%", ha="center", va="bottom", fontsize=10)
443
+
444
+ fig.suptitle("HNSW Index Benchmark (N=200)", fontsize=14, y=1.02)
445
+ fig.tight_layout()
446
+ save_figure(fig, "fig09_hnsw_benchmark")
447
+
448
+
449
+ # ── Figure 10: INT8 Compression ──────────────────────────────────────────
450
+
451
+ def fig10_int8_compression() -> None:
452
+ """Bar chart: FP16 vs INT8 comparison."""
453
+ print("Fig 10: INT8 compression...")
454
+
455
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))
456
+
457
+ # Size comparison
458
+ configs = ["591 tok", "6,403 tok"]
459
+ fp16_sizes = [73.9, 800.4]
460
+ int8_sizes = [37.5, 406.5]
461
+ x = np.arange(len(configs))
462
+ w = 0.35
463
+ ax1.bar(x - w / 2, fp16_sizes, w, label="FP16", color=COLORS["orange"], edgecolor="white")
464
+ ax1.bar(x + w / 2, int8_sizes, w, label="INT8", color=COLORS["blue"], edgecolor="white")
465
+ ax1.set_xticks(x)
466
+ ax1.set_xticklabels(configs)
467
+ ax1.set_ylabel("File Size (MB)")
468
+ ax1.set_title("(a) .eng File Size β€” 1.97Γ— Compression")
469
+ ax1.legend()
470
+
471
+ # Quality metrics
472
+ metrics = ["Cosine\nSimilarity", "Margin\n(FP16)", "Margin\n(INT8)"]
473
+ values = [0.99998, 0.381, 0.262]
474
+ bar_colors = [COLORS["green"], COLORS["blue"], COLORS["cyan"]]
475
+ bars = ax2.bar(metrics, values, color=bar_colors, edgecolor="white", width=0.5)
476
+ ax2.set_ylabel("Value")
477
+ ax2.set_title("(b) Quality Preservation")
478
+ for bar, val in zip(bars, values):
479
+ ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01,
480
+ f"{val:.5f}" if val > 0.9 else f"{val:.3f}",
481
+ ha="center", va="bottom", fontsize=9)
482
+
483
+ fig.suptitle("INT8 Quantization Impact", fontsize=14, y=1.02)
484
+ fig.tight_layout()
485
+ save_figure(fig, "fig10_int8_compression")
486
+
487
+
488
+ # ── Figure 12: Margin Distribution ───────────────────────────────────────
489
+
490
+ def fig12_margin_distribution() -> None:
491
+ """Distribution comparison: f1 vs f0+f1 summary statistics."""
492
+ print("Fig 12: Margin distribution...")
493
+ data = load_json(ABSOLUTE_DIR / "multifreq_comparison.json")
494
+ results = data["results"]
495
+
496
+ fig, ax = plt.subplots(figsize=(7, 4.5))
497
+
498
+ # We'll show key statistics as a visualization
499
+ combos = ["f1", "f0+f1"]
500
+ means = [results[c]["margin_mean"] * 1000 for c in combos]
501
+ medians = [results[c]["margin_median"] * 1000 for c in combos]
502
+ mins = [results[c]["margin_min"] * 1000 for c in combos]
503
+
504
+ x = np.arange(len(combos))
505
+ w = 0.25
506
+ ax.bar(x - w, means, w, label="Mean", color=COLORS["blue"], edgecolor="white")
507
+ ax.bar(x, medians, w, label="Median", color=COLORS["green"], edgecolor="white")
508
+ ax.bar(x + w, mins, w, label="Min", color=COLORS["red"], edgecolor="white")
509
+
510
+ ax.set_xticks(x)
511
+ ax.set_xticklabels(combos, fontsize=12)
512
+ ax.set_ylabel("Margin (Γ—10Β³)")
513
+ ax.set_title("Margin Statistics: f1 vs f0+f1 (N=200)")
514
+ ax.legend()
515
+ ax.axhline(y=0, color="black", linewidth=0.5)
516
+
517
+ # Annotate improvement
518
+ ax.annotate(
519
+ f"+76% mean margin\n25/28 failures fixed",
520
+ xy=(1, means[1]), xytext=(1.3, means[1] + 1),
521
+ arrowprops={"arrowstyle": "->", "color": COLORS["green"]},
522
+ fontsize=9, bbox={"boxstyle": "round,pad=0.3", "facecolor": "#e6ffe6", "alpha": 0.8}
523
+ )
524
+
525
+ fig.tight_layout()
526
+ save_figure(fig, "fig12_margin_distribution")
527
+
528
+
529
+ # ── Figure 13: FCDB Stability-Discrimination Tradeoff ────────────────────
530
+
531
+ def fig13_fcdb_tradeoff() -> None:
532
+ """Dual-axis: basis stability vs retrieval margin vs corpus size."""
533
+ print("Fig 13: FCDB stability-discrimination tradeoff...")
534
+
535
+ # Data from PAPER_TABLE.md
536
+ n_vals = [50, 100, 125, 200]
537
+ stability = [0.82, 0.906, 0.983, 0.999] # subspace agreement
538
+ margin = [0.124, None, None, 0.013] # Only measured at 50 and 200
539
+ margin_n = [50, 200]
540
+ margin_v = [0.124, 0.013]
541
+
542
+ fig, ax1 = plt.subplots(figsize=(7, 5))
543
+ ax2 = ax1.twinx()
544
+
545
+ # Stability (left axis)
546
+ line1 = ax1.plot(n_vals, stability, "o-", color=COLORS["blue"], linewidth=2,
547
+ markersize=8, label="Basis Stability", zorder=5)
548
+ ax1.set_xlabel("Corpus Size N")
549
+ ax1.set_ylabel("Subspace Agreement", color=COLORS["blue"])
550
+ ax1.tick_params(axis="y", labelcolor=COLORS["blue"])
551
+ ax1.set_ylim(0.7, 1.05)
552
+
553
+ # Margin (right axis)
554
+ line2 = ax2.plot(margin_n, margin_v, "s--", color=COLORS["orange"], linewidth=2,
555
+ markersize=8, label="Retrieval Margin", zorder=5)
556
+ ax2.set_ylabel("Cross-Model Margin", color=COLORS["orange"])
557
+ ax2.tick_params(axis="y", labelcolor=COLORS["orange"])
558
+ ax2.set_ylim(-0.01, 0.15)
559
+
560
+ # Threshold line
561
+ ax1.axhline(y=0.99, color=COLORS["grey"], linestyle=":", alpha=0.5)
562
+ ax1.annotate("Stable (β‰₯0.99)", xy=(125, 0.99), fontsize=8, color=COLORS["grey"])
563
+
564
+ # Combined legend
565
+ lines = line1 + line2
566
+ labels = [l.get_label() for l in lines]
567
+ ax1.legend(lines, labels, loc="center left")
568
+
569
+ ax1.set_title("FCDB Stability–Discrimination Tradeoff")
570
+ fig.tight_layout()
571
+ save_figure(fig, "fig13_fcdb_tradeoff")
572
+
573
+
574
+ # ── Figure 14: TTFT Speedup ─────────────────────────────────────────────
575
+
576
+ def fig14_ttft_speedup() -> None:
577
+ """Grouped bar chart: cold vs warm TTFT."""
578
+ print("Fig 14: TTFT speedup...")
579
+
580
+ configs = ["3B / 4K tok", "3B / 16K tok", "8B / 591 tok"]
581
+ cold_ttft = [11439, 94592, 3508] # ms
582
+ warm_ttft = [170, 1777, 116] # ms
583
+ speedups = [67.2, 53.2, 30.8]
584
+
585
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4.5))
586
+
587
+ x = np.arange(len(configs))
588
+ w = 0.35
589
+ ax1.bar(x - w / 2, cold_ttft, w, label="Cold TTFT", color=COLORS["orange"], edgecolor="white")
590
+ ax1.bar(x + w / 2, warm_ttft, w, label="Warm TTFT", color=COLORS["blue"], edgecolor="white")
591
+ ax1.set_xticks(x)
592
+ ax1.set_xticklabels(configs, fontsize=9)
593
+ ax1.set_ylabel("TTFT (ms)")
594
+ ax1.set_title("(a) Time to First Token")
595
+ ax1.set_yscale("log")
596
+ ax1.legend()
597
+
598
+ # Speedup bars
599
+ bars = ax2.bar(configs, speedups, color=COLORS["green"], edgecolor="white", width=0.5)
600
+ ax2.set_ylabel("Speedup (Γ—)")
601
+ ax2.set_title("(b) KV Cache Restoration Speedup")
602
+ ax2.set_xticklabels(configs, fontsize=9)
603
+ for bar, val in zip(bars, speedups):
604
+ ax2.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5,
605
+ f"{val:.1f}Γ—", ha="center", va="bottom", fontsize=10, fontweight="bold")
606
+
607
+ fig.suptitle("KV Cache Warm Start Performance", fontsize=14, y=1.02)
608
+ fig.tight_layout()
609
+ save_figure(fig, "fig14_ttft_speedup")
610
+
611
+
612
+ # ── Figure 15: EGR Overhead Scaling ──────────────────────────────────────
613
+
614
+ def fig15_egr_overhead() -> None:
615
+ """Scatter/line: EGR overhead vs token count."""
616
+ print("Fig 15: EGR overhead scaling...")
617
+
618
+ tokens = [600, 6403, 600]
619
+ overhead_ms = [30.6, 48.8, 84.0]
620
+ labels = ["16 layers\n(8-24)", "16 layers\n(8-24)", "32 layers\n(all)"]
621
+ colors_pts = [COLORS["blue"], COLORS["blue"], COLORS["orange"]]
622
+
623
+ fig, ax = plt.subplots(figsize=(6, 4.5))
624
+
625
+ for t, o, l, c in zip(tokens, overhead_ms, labels, colors_pts):
626
+ ax.scatter(t, o, s=100, color=c, zorder=5, edgecolor="white", linewidth=1.5)
627
+ ax.annotate(l, xy=(t, o), xytext=(t + 200, o + 2), fontsize=9)
628
+
629
+ ax.set_xlabel("Context Length (tokens)")
630
+ ax.set_ylabel("EGR Overhead (ms)")
631
+ ax.set_title("Fingerprint Extraction Overhead")
632
+ ax.set_xlim(0, 7000)
633
+ ax.set_ylim(20, 95)
634
+
635
+ # Reference lines
636
+ ax.axhline(y=50, color=COLORS["grey"], linestyle=":", alpha=0.3)
637
+ ax.text(100, 51, "50ms threshold", fontsize=8, color=COLORS["grey"])
638
+
639
+ fig.tight_layout()
640
+ save_figure(fig, "fig15_egr_overhead")
641
+
642
+
643
+ # ── Figure 1: Architecture Diagram (Mermaid) ────────────────────────────
644
+
645
+ def fig01_architecture_mermaid() -> None:
646
+ """Generate Mermaid flowchart for system architecture."""
647
+ print("Fig 01: Architecture diagram (Mermaid)...")
648
+ mermaid = """\
649
+ %%{init: {'theme': 'base', 'themeVariables': {'primaryColor': '#4477AA', 'primaryTextColor': '#fff', 'primaryBorderColor': '#335588', 'lineColor': '#666', 'secondaryColor': '#EE6677', 'tertiaryColor': '#228833'}}}%%
650
+ flowchart TD
651
+ A[LLM Runtime<br/>llama.cpp] -->|KV cache blob| B[Blob Parser]
652
+ B -->|Layer keys K| C[Fourier Fingerprint<br/>f0+f1 DFT]
653
+ C -->|2048-dim vector| D{Storage}
654
+ D -->|.eng binary| E[EIGENGRAM File<br/>v1.2 format]
655
+ D -->|HNSW index| F[FAISS IndexHNSW<br/>M=32]
656
+
657
+ G[Query Session] -->|New KV cache| C
658
+ C -->|Query fingerprint| H[Geodesic Retrieval]
659
+ F -->|Top-k candidates| H
660
+
661
+ H --> I{Stage 0<br/>Prior Check}
662
+ I -->|chronic failure| J[Skip / LOW]
663
+ I -->|ok| K{Stage 1<br/>HNSW Search}
664
+ K -->|HIGH / MEDIUM| L[Result]
665
+ K -->|below threshold| M{Stage 2<br/>Trajectory}
666
+ M -->|interpolation| N{Stage 3<br/>Constraints}
667
+ N --> O{Stage 4<br/>Metadata}
668
+ O --> L
669
+
670
+ subgraph Confidence Tracking
671
+ P[IndexC<br/>SQLite] ---|update| I
672
+ L ---|record| P
673
+ end
674
+
675
+ style A fill:#4477AA,stroke:#335588,color:#fff
676
+ style C fill:#228833,stroke:#1a6625,color:#fff
677
+ style E fill:#EE6677,stroke:#cc5566,color:#fff
678
+ style F fill:#66CCEE,stroke:#55aabb,color:#000
679
+ style H fill:#AA3377,stroke:#882266,color:#fff
680
+ """
681
+ mermaid_path = FIGURES_DIR / "fig01_architecture.mmd"
682
+ mermaid_path.write_text(mermaid)
683
+ print(f" Saved: fig01_architecture.mmd")
684
+
685
+
686
+ # ── Figure 11: Retrieval Pipeline (Mermaid) ──────────────────────────────
687
+
688
+ def fig11_retrieval_pipeline_mermaid() -> None:
689
+ """Generate Mermaid diagram for 4-stage geodesic retrieval."""
690
+ print("Fig 11: Retrieval pipeline (Mermaid)...")
691
+ mermaid = """\
692
+ %%{init: {'theme': 'base'}}%%
693
+ flowchart LR
694
+ Q[Query<br/>Fingerprint] --> S0
695
+
696
+ S0[Stage 0<br/>Prior Preemption<br/><i>IndexC chronic<br/>failure check</i>]
697
+ S0 -->|"pass"| S1
698
+ S0 -->|"preempt"| SKIP[SKIP<br/>confidence=LOW]
699
+
700
+ S1[Stage 1<br/>HNSW Search<br/><i>cosine top-k</i>]
701
+ S1 -->|"margin > 0.005"| HIGH[HIGH<br/>199/200 docs]
702
+ S1 -->|"margin 0.001-0.005"| MED[MEDIUM]
703
+ S1 -->|"margin < 0.001"| S2
704
+
705
+ S2[Stage 2<br/>Trajectory<br/><i>interpolation<br/>w=0.3</i>]
706
+ S2 --> S3
707
+
708
+ S3[Stage 3<br/>Negative<br/>Constraints<br/><i>apophatic layer</i>]
709
+ S3 --> S4
710
+
711
+ S4[Stage 4<br/>Metadata<br/>Disambig<br/><i>domain + keywords<br/>+ norms</i>]
712
+ S4 --> LOW[LOW<br/>1/200 docs<br/><i>doc_146</i>]
713
+
714
+ style S0 fill:#66CCEE,stroke:#55aabb
715
+ style S1 fill:#4477AA,stroke:#335588,color:#fff
716
+ style S2 fill:#CCBB44,stroke:#aa9933
717
+ style S3 fill:#EE6677,stroke:#cc5566,color:#fff
718
+ style S4 fill:#AA3377,stroke:#882266,color:#fff
719
+ style HIGH fill:#228833,stroke:#1a6625,color:#fff
720
+ style MED fill:#CCBB44,stroke:#aa9933
721
+ style LOW fill:#EE6677,stroke:#cc5566,color:#fff
722
+ style SKIP fill:#BBBBBB,stroke:#999999
723
+ """
724
+ mermaid_path = FIGURES_DIR / "fig11_retrieval_pipeline.mmd"
725
+ mermaid_path.write_text(mermaid)
726
+ print(f" Saved: fig11_retrieval_pipeline.mmd")
727
+
728
+
729
+ # ── Consolidated Findings JSON ───────────────────────────────────────────
730
+
731
+ def generate_findings() -> None:
732
+ """Consolidate all key metrics into a single findings.json."""
733
+ print("Generating consolidated findings...")
734
+
735
+ findings = {
736
+ "title": "ENGRAM Protocol β€” Consolidated Research Findings",
737
+ "date": "2026-04-03",
738
+ "hardware": {
739
+ "platform": "Apple M3, 24GB RAM",
740
+ "gpu": "Metal (n_gpu_layers=-1)",
741
+ "os": "macOS Darwin 25.4.0",
742
+ "llama_cpp": "0.3.19",
743
+ "faiss": "1.13.2",
744
+ "torch": "2.11.0",
745
+ },
746
+ "same_model_retrieval": {
747
+ "method": "Fourier f0+f1 fingerprint",
748
+ "corpus_size": 200,
749
+ "n_domains": 10,
750
+ "recall_at_1": 0.98,
751
+ "n_failures": 4,
752
+ "mean_margin": 0.007201,
753
+ "margin_power_law": {"A": 0.021342, "alpha": -0.2065},
754
+ "f1_only_recall": 0.86,
755
+ "f1_only_failures": 28,
756
+ "improvement_over_f1": "25/28 failures fixed (+76% mean margin)",
757
+ "ml_math_confusion_reduction": "81.5%",
758
+ },
759
+ "frequency_ablation": {
760
+ "combos_tested": 6,
761
+ "best": "f0+f1",
762
+ "results": {
763
+ "f1": {"recall": 0.86, "margin": 0.004087},
764
+ "f2": {"recall": 0.715, "margin": 0.002196},
765
+ "f1+f2": {"recall": 0.95, "margin": 0.004744},
766
+ "f1+f2+f3": {"recall": 0.95, "margin": 0.004129},
767
+ "f0+f1": {"recall": 0.98, "margin": 0.007201},
768
+ "f1+f3": {"recall": 0.89, "margin": 0.003477},
769
+ },
770
+ },
771
+ "hnsw_index": {
772
+ "speedup": 5.65,
773
+ "recall": 0.995,
774
+ "latency_us": 51.83,
775
+ "bruteforce_latency_us": 293.07,
776
+ },
777
+ "geodesic_retrieval": {
778
+ "stages": 4,
779
+ "final_recall": 1.0,
780
+ "n_high": 0,
781
+ "n_medium": 199,
782
+ "n_low": 1,
783
+ "hard_failure": "doc_146 (resolved by Stage 4 metadata)",
784
+ },
785
+ "int8_compression": {
786
+ "ratio": 1.97,
787
+ "cosine_similarity": 0.99998,
788
+ "margin_fp16": 0.381,
789
+ "margin_int8": 0.262,
790
+ "margin_preserved": True,
791
+ },
792
+ "ttft_speedup": {
793
+ "3b_4k": {"cold_ms": 11439, "warm_ms": 170, "speedup": 67.2},
794
+ "3b_16k": {"cold_ms": 94592, "warm_ms": 1777, "speedup": 53.2},
795
+ "8b_591": {"cold_ms": 3508, "warm_ms": 116, "speedup": 30.8},
796
+ },
797
+ "cross_model_transfer": {
798
+ "n_strategies": 9,
799
+ "best_method": "FCDB",
800
+ "best_margin": 0.124,
801
+ "results": {
802
+ "CCA": {"margin": -0.420, "correct": False},
803
+ "Residual_FCB": {"margin": -0.382, "correct": False},
804
+ "Procrustes": {"margin": -0.104, "correct": False},
805
+ "RR": {"margin": -0.066, "correct": False},
806
+ "FCB_ridge": {"margin": -0.017, "correct": False},
807
+ "Contrastive": {"margin": 0.001, "correct": True},
808
+ "JCB": {"margin": 0.011, "correct": True},
809
+ "JCB_delta": {"margin": 0.037, "correct": True},
810
+ "FCDB": {"margin": 0.124, "correct": True},
811
+ },
812
+ "key_insight": "Cross-model transfer requires representing documents as directions from a shared reference point (Frechet mean), not positions in space",
813
+ },
814
+ "fcdb_scaling": {
815
+ "v1_n50": {"stability": 0.82, "margin": 0.124},
816
+ "v2_n200": {"stability": 0.999, "margin": 0.013},
817
+ "collapse_n": 100,
818
+ "tradeoff": "Larger corpus stabilizes basis but dilutes per-document signal",
819
+ },
820
+ "cka_analysis": {
821
+ "within_family": {"models": "Llama 3B ↔ 8B", "mean_cka": 0.975, "f0f1_sim": 0.875},
822
+ "cross_family": {"models": "Llama ↔ Qwen", "mean_cka": 0.927, "f0f1_sim": 0.259},
823
+ "verdict": "Manifolds topologically isomorphic (CKA>0.92 all pairs)",
824
+ },
825
+ "domain_recall": {
826
+ "computer_science": 1.0, "general_world": 0.95, "history": 1.0,
827
+ "language_arts": 1.0, "ml_systems": 0.90, "mathematics": 1.0,
828
+ "philosophy": 1.0, "medicine": 0.95, "biology": 1.0, "physics": 1.0,
829
+ },
830
+ "eigengram_format": {
831
+ "version": "1.2",
832
+ "architectures": ["llama", "gemma", "gemma4/ISWA", "phi", "qwen", "mistral"],
833
+ "iswa_support": "Gemma 4 26B dual-cache (5+25 layers, 6144-dim fingerprint)",
834
+ },
835
+ }
836
+
837
+ paper_dir = RESULTS_DIR / "paper"
838
+ paper_dir.mkdir(parents=True, exist_ok=True)
839
+ findings_path = paper_dir / "findings.json"
840
+ findings_path.write_text(json.dumps(findings, indent=2))
841
+ print(f" Saved: paper/findings.json")
842
+
843
+
844
+ # ── LaTeX Tables ─────────────────────────────────────────────────────────
845
+
846
+ def generate_latex_tables() -> None:
847
+ """Generate LaTeX table source for the paper."""
848
+ print("Generating LaTeX tables...")
849
+
850
+ tables = r"""\
851
+ % ──────────────────────────────────────────────────────────────────────
852
+ % Table 1: Multi-Frequency Ablation
853
+ % ──────────────────────────────────────────────────────────────────────
854
+ \begin{table}[t]
855
+ \centering
856
+ \caption{Multi-frequency fingerprint ablation at $N=200$. The f0+f1 combination
857
+ achieves the highest recall and mean margin, fixing 25 of 28 single-frequency failures.}
858
+ \label{tab:frequency-ablation}
859
+ \begin{tabular}{lcccc}
860
+ \toprule
861
+ Frequencies & Recall@1 & Mean Margin & Min Margin & Failures \\
862
+ \midrule
863
+ $f_1$ & 86.0\% & 4.09$\times 10^{-3}$ & $-4.71\times 10^{-3}$ & 28 \\
864
+ $f_2$ & 71.5\% & 2.20$\times 10^{-3}$ & $-5.85\times 10^{-3}$ & 57 \\
865
+ $f_1 + f_2$ & 95.0\% & 4.74$\times 10^{-3}$ & $-2.68\times 10^{-3}$ & 10 \\
866
+ $f_1 + f_2 + f_3$ & 95.0\% & 4.13$\times 10^{-3}$ & $-2.71\times 10^{-3}$ & 10 \\
867
+ \rowcolor{green!10}
868
+ $f_0 + f_1$ & \textbf{98.0\%} & \textbf{7.20}$\times 10^{-3}$ & $-4.09\times 10^{-3}$ & \textbf{4} \\
869
+ $f_1 + f_3$ & 89.0\% & 3.48$\times 10^{-3}$ & $-4.08\times 10^{-3}$ & 22 \\
870
+ \bottomrule
871
+ \end{tabular}
872
+ \end{table}
873
+
874
+ % ──────────────────────────────────────────────────────────────────────
875
+ % Table 2: Cross-Model Transfer Strategies
876
+ % ──────────────────────────────────────────────────────────────────────
877
+ \begin{table}[t]
878
+ \centering
879
+ \caption{Cross-model transfer strategies (Llama 3B $\to$ 8B). Nine methods tested;
880
+ FCDB achieves the only reliable positive margin without requiring an adapter.}
881
+ \label{tab:cross-model}
882
+ \begin{tabular}{lccc}
883
+ \toprule
884
+ Method & Margin & Correct & Adapter \\
885
+ \midrule
886
+ CCA & $-0.420$ & \xmark & symmetric \\
887
+ Residual FCB & $-0.382$ & \xmark & none \\
888
+ Procrustes & $-0.104$ & \xmark & orthogonal \\
889
+ Relative Repr. & $-0.066$ & \xmark & none \\
890
+ FCB + ridge & $-0.017$ & \xmark & ridge \\
891
+ \midrule
892
+ Contrastive $\delta$ & $+0.001$ & \cmark & ridge \\
893
+ JCB & $+0.011$ & \cmark & none \\
894
+ JCB + $\delta$ & $+0.037$ & \cmark & none \\
895
+ \rowcolor{green!10}
896
+ \textbf{FCDB} & $\mathbf{+0.124}$ & \cmark & \textbf{none} \\
897
+ \bottomrule
898
+ \end{tabular}
899
+ \end{table}
900
+
901
+ % ──────────────────────────────────────────────────────────────────────
902
+ % Table 3: TTFT Speedup
903
+ % ──────────────────────────────────────────────────────────────────────
904
+ \begin{table}[t]
905
+ \centering
906
+ \caption{KV cache warm-start performance. TTFT speedup ranges from 27--67$\times$
907
+ depending on model size and context length.}
908
+ \label{tab:ttft}
909
+ \begin{tabular}{lccccc}
910
+ \toprule
911
+ Model & Tokens & Cold TTFT & Warm TTFT & Speedup & EGR (ms) \\
912
+ \midrule
913
+ Llama 3.2 3B & 4,002 & 11,439\,ms & 170\,ms & 67.2$\times$ & 9.5 \\
914
+ Llama 3.2 3B & 16,382 & 94,592\,ms & 1,777\,ms & 53.2$\times$ & 9.5 \\
915
+ Llama 3.1 8B & 591 & 3,508\,ms & 116\,ms & 30.8$\times$ & 30.6 \\
916
+ \bottomrule
917
+ \end{tabular}
918
+ \end{table}
919
+
920
+ % ──────────────────────────────────────────────────────────────────────
921
+ % Table 4: INT8 Compression
922
+ % ──────────────────────────────────────────────────────────────────────
923
+ \begin{table}[t]
924
+ \centering
925
+ \caption{INT8 quantization results. Per-row symmetric quantization achieves
926
+ 1.97$\times$ compression with negligible quality loss (cos\_sim = 0.99998).}
927
+ \label{tab:int8}
928
+ \begin{tabular}{lcccc}
929
+ \toprule
930
+ Tokens & FP16 Size & INT8 Size & Ratio & $\cos(s_\text{fp16}, s_\text{int8})$ \\
931
+ \midrule
932
+ 591 & 73.9\,MB & 37.5\,MB & 1.97$\times$ & 0.99998 \\
933
+ 6,403 & 800.4\,MB & 406.5\,MB & 1.97$\times$ & 0.99998 \\
934
+ \bottomrule
935
+ \end{tabular}
936
+ \end{table}
937
+
938
+ % ──────────────────────────────────────────────────────────────────────
939
+ % Table 5: CKA Analysis
940
+ % ──────────────────────────────────────────────────────────────────────
941
+ \begin{table}[t]
942
+ \centering
943
+ \caption{Centered Kernel Alignment (CKA) between model families. High CKA values
944
+ ($>0.92$) confirm topological isomorphism of key manifolds across architectures.}
945
+ \label{tab:cka}
946
+ \begin{tabular}{lccc}
947
+ \toprule
948
+ Comparison & Mean CKA & f0+f1 Sim & Verdict \\
949
+ \midrule
950
+ Within-family (Llama 3B $\leftrightarrow$ 8B) & 0.975 & 0.875 & Isomorphic \\
951
+ Cross-family (Llama $\leftrightarrow$ Qwen) & 0.927 & 0.259 & Isomorphic \\
952
+ \bottomrule
953
+ \end{tabular}
954
+ \end{table}
955
+
956
+ % ──────────────────────────────────────────────────────────────────────
957
+ % Table 6: HNSW Benchmark
958
+ % ──────────────────────────────────────────────────────────────────────
959
+ \begin{table}[t]
960
+ \centering
961
+ \caption{HNSW index performance at $N=200$. The index provides 5.65$\times$
962
+ speedup over brute-force with no recall loss.}
963
+ \label{tab:hnsw}
964
+ \begin{tabular}{lcc}
965
+ \toprule
966
+ Method & Latency ($\mu$s) & Recall@1 \\
967
+ \midrule
968
+ Brute-force & 293.1 & 99.5\% \\
969
+ HNSW ($M=32$) & 51.8 & 99.5\% \\
970
+ \midrule
971
+ \textbf{Speedup} & \textbf{5.65$\times$} & --- \\
972
+ \bottomrule
973
+ \end{tabular}
974
+ \end{table}
975
+
976
+ % ──────────────────────────────────────────────────────────────────────
977
+ % Table 7: Domain Recall
978
+ % ──────────────────────────────────────────────────────────────────────
979
+ \begin{table}[t]
980
+ \centering
981
+ \caption{Per-domain recall@1 with f0+f1 fingerprint at $N=200$.
982
+ All domains achieve $\geq 90\%$ recall.}
983
+ \label{tab:domain-recall}
984
+ \begin{tabular}{lc}
985
+ \toprule
986
+ Domain & Recall@1 \\
987
+ \midrule
988
+ Biology & 100.0\% \\
989
+ Computer Science & 100.0\% \\
990
+ History & 100.0\% \\
991
+ Language Arts & 100.0\% \\
992
+ Mathematics & 100.0\% \\
993
+ Philosophy & 100.0\% \\
994
+ Physics & 100.0\% \\
995
+ General World & 95.0\% \\
996
+ Medicine & 95.0\% \\
997
+ ML/Systems & 90.0\% \\
998
+ \bottomrule
999
+ \end{tabular}
1000
+ \end{table}
1001
+
1002
+ % ──────────────────────────────────────────────────────────────────────
1003
+ % Table 8: Margin Power Law
1004
+ % ──────────────────────────────────────────────────────────────────────
1005
+ \begin{table}[t]
1006
+ \centering
1007
+ \caption{Margin scaling law parameters. Both fingerprint methods follow
1008
+ power-law decay $\bar{m} = A \cdot N^\alpha$ with no hard collapse point.}
1009
+ \label{tab:power-law}
1010
+ \begin{tabular}{lccc}
1011
+ \toprule
1012
+ Fingerprint & $A$ & $\alpha$ & Recall@200 \\
1013
+ \midrule
1014
+ $f_1$ & 0.0181 & $-0.277$ & 86.0\% \\
1015
+ $f_0 + f_1$ & 0.0213 & $-0.207$ & 98.0\% \\
1016
+ \bottomrule
1017
+ \end{tabular}
1018
+ \end{table}
1019
+ """
1020
+
1021
+ paper_dir = RESULTS_DIR / "paper"
1022
+ paper_dir.mkdir(parents=True, exist_ok=True)
1023
+ tables_path = paper_dir / "tables.tex"
1024
+ tables_path.write_text(tables)
1025
+ print(f" Saved: paper/tables.tex")
1026
+
1027
+
1028
+ # ── Registry ─────────────────────────────────────────────────────────────
1029
+
1030
+ FIGURE_REGISTRY: dict[str, tuple[str, object]] = {
1031
+ "fig01": ("System Architecture (Mermaid)", fig01_architecture_mermaid),
1032
+ "fig02": ("Frequency Combination Comparison", fig02_frequency_comparison),
1033
+ "fig03": ("Margin Power Law", fig03_margin_power_law),
1034
+ "fig04": ("Recall vs N (Fourier vs FCDB)", fig04_recall_vs_n),
1035
+ "fig05": ("Cross-Model Strategy Comparison", fig05_cross_model_strategies),
1036
+ "fig06": ("CKA Layer Similarity", fig06_cka_layers),
1037
+ "fig07": ("Domain Confusion Matrix", fig07_confusion_matrix),
1038
+ "fig08": ("Domain Recall Radar", fig08_domain_recall_radar),
1039
+ "fig09": ("HNSW Benchmark", fig09_hnsw_benchmark),
1040
+ "fig10": ("INT8 Compression", fig10_int8_compression),
1041
+ "fig11": ("Retrieval Pipeline (Mermaid)", fig11_retrieval_pipeline_mermaid),
1042
+ "fig12": ("Margin Distribution", fig12_margin_distribution),
1043
+ "fig13": ("FCDB Tradeoff", fig13_fcdb_tradeoff),
1044
+ "fig14": ("TTFT Speedup", fig14_ttft_speedup),
1045
+ "fig15": ("EGR Overhead Scaling", fig15_egr_overhead),
1046
+ "findings": ("Consolidated Findings JSON", generate_findings),
1047
+ "tables": ("LaTeX Tables", generate_latex_tables),
1048
+ }
1049
+
1050
+
1051
+ def main() -> None:
1052
+ parser = argparse.ArgumentParser(description="Generate ENGRAM paper figures")
1053
+ parser.add_argument("--only", help="Generate only this figure (e.g., fig02)")
1054
+ parser.add_argument("--list", action="store_true", help="List all figures")
1055
+ args = parser.parse_args()
1056
+
1057
+ if args.list:
1058
+ print("\nAvailable figures:")
1059
+ for key, (desc, _) in FIGURE_REGISTRY.items():
1060
+ print(f" {key:10s} {desc}")
1061
+ return
1062
+
1063
+ FIGURES_DIR.mkdir(parents=True, exist_ok=True)
1064
+ print(f"\nOutput directory: {FIGURES_DIR}\n")
1065
+
1066
+ if args.only:
1067
+ if args.only not in FIGURE_REGISTRY:
1068
+ print(f"Unknown figure: {args.only}")
1069
+ print(f"Available: {', '.join(FIGURE_REGISTRY.keys())}")
1070
+ sys.exit(1)
1071
+ desc, func = FIGURE_REGISTRY[args.only]
1072
+ func()
1073
+ else:
1074
+ for key, (desc, func) in FIGURE_REGISTRY.items():
1075
+ try:
1076
+ func()
1077
+ except Exception as e:
1078
+ print(f" ERROR generating {key}: {e}")
1079
+
1080
+ print(f"\nDone. Figures saved to: {FIGURES_DIR}")
1081
+
1082
+
1083
+ if __name__ == "__main__":
1084
+ main()
scripts/setup.sh ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # ENGRAM Protocol β€” One-command setup
3
+ #
4
+ # Usage:
5
+ # ./scripts/setup.sh # Full setup with sbert embedder
6
+ # ./scripts/setup.sh --minimal # Core only (no sbert, no MCP)
7
+ # ./scripts/setup.sh --dev # Full setup + dev tools
8
+ #
9
+ # Requirements:
10
+ # - Python >= 3.11
11
+ # - pip (comes with Python)
12
+ # - git (for cloning)
13
+
14
+ set -euo pipefail
15
+
16
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
18
+
19
+ # Colors (if terminal supports them)
20
+ RED='\033[0;31m'
21
+ GREEN='\033[0;32m'
22
+ YELLOW='\033[1;33m'
23
+ NC='\033[0m'
24
+
25
+ info() { echo -e "${GREEN}[ENGRAM]${NC} $*"; }
26
+ warn() { echo -e "${YELLOW}[ENGRAM]${NC} $*"; }
27
+ error() { echo -e "${RED}[ENGRAM]${NC} $*" >&2; }
28
+
29
+ # Parse arguments
30
+ MINIMAL=false
31
+ DEV=false
32
+ for arg in "$@"; do
33
+ case "$arg" in
34
+ --minimal) MINIMAL=true ;;
35
+ --dev) DEV=true ;;
36
+ --help|-h)
37
+ echo "Usage: ./scripts/setup.sh [--minimal] [--dev]"
38
+ echo ""
39
+ echo " --minimal Core dependencies only (no sbert, no MCP)"
40
+ echo " --dev Include development tools (pytest, ruff, mypy)"
41
+ exit 0
42
+ ;;
43
+ *) error "Unknown argument: $arg"; exit 1 ;;
44
+ esac
45
+ done
46
+
47
+ cd "$PROJECT_DIR"
48
+
49
+ # ── 1. Check Python version ──────────────────────────────────────────
50
+ info "Checking Python version..."
51
+ PYTHON=""
52
+ for cmd in python3.14 python3.13 python3.12 python3.11 python3; do
53
+ if command -v "$cmd" &>/dev/null; then
54
+ version=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
55
+ major=$("$cmd" -c "import sys; print(sys.version_info.major)")
56
+ minor=$("$cmd" -c "import sys; print(sys.version_info.minor)")
57
+ if [ "$major" -ge 3 ] && [ "$minor" -ge 11 ]; then
58
+ PYTHON="$cmd"
59
+ break
60
+ fi
61
+ fi
62
+ done
63
+
64
+ if [ -z "$PYTHON" ]; then
65
+ error "Python >= 3.11 required but not found."
66
+ error "Install from https://python.org or via your package manager."
67
+ exit 1
68
+ fi
69
+ info "Using $PYTHON ($(${PYTHON} --version 2>&1))"
70
+
71
+ # ── 2. Create virtual environment ────────────────────────────────────
72
+ if [ ! -d ".venv" ]; then
73
+ info "Creating virtual environment..."
74
+ "$PYTHON" -m venv .venv
75
+ else
76
+ info "Virtual environment already exists."
77
+ fi
78
+
79
+ # Activate
80
+ source .venv/bin/activate
81
+ info "Activated .venv"
82
+
83
+ # ── 3. Upgrade pip ───────────────────────────────────────────────────
84
+ info "Upgrading pip..."
85
+ pip install --upgrade pip --quiet
86
+
87
+ # ── 4. Install core package ──────────────────────────────────────────
88
+ info "Installing ENGRAM core dependencies..."
89
+ pip install -e . --quiet
90
+
91
+ if [ "$MINIMAL" = false ]; then
92
+ # ── 5. Install sbert embedder ────────────────────────────────────
93
+ info "Installing sentence-transformers embedder..."
94
+ pip install -e ".[sbert]" --quiet
95
+
96
+ # ── 6. Install MCP server ────────────────────────────────────────
97
+ info "Installing MCP server dependencies..."
98
+ pip install -e ".[mcp]" --quiet 2>/dev/null || \
99
+ warn "MCP package not available (optional β€” needed for Claude Code integration)"
100
+ fi
101
+
102
+ if [ "$DEV" = true ]; then
103
+ # ── 7. Install dev tools ─────────────────────────────────────────
104
+ info "Installing development tools..."
105
+ pip install -e ".[dev]" --quiet
106
+ fi
107
+
108
+ # ── 8. Create config from template ───────────────────────────────────
109
+ if [ ! -f ".env" ]; then
110
+ cp .env.template .env
111
+ info "Created .env from template. Edit it to set ENGRAM_MODEL_PATH."
112
+ else
113
+ info ".env already exists."
114
+ fi
115
+
116
+ # ── 9. Create ENGRAM directories ─────────────────────────────────────
117
+ mkdir -p ~/.engram/sessions
118
+ mkdir -p ~/.engram/knowledge
119
+ mkdir -p ~/.engram/index
120
+ info "Created ~/.engram/ directories."
121
+
122
+ # ── 10. Verify installation ──────────────────────────────────────────
123
+ info "Verifying installation..."
124
+ if python -c "import kvcos; print(f' kvcos OK (v{kvcos.core.types.ENGRAM_VERSION})')"; then
125
+ info "Core library loaded successfully."
126
+ else
127
+ error "Failed to import kvcos. Check error messages above."
128
+ exit 1
129
+ fi
130
+
131
+ # ── 11. Run tests (if dev mode) ─────────────────��────────────────────
132
+ if [ "$DEV" = true ]; then
133
+ info "Running test suite..."
134
+ KMP_DUPLICATE_LIB_OK=TRUE OMP_NUM_THREADS=1 PYTHONPATH=. \
135
+ pytest tests/ -x -q --tb=short 2>&1 | tail -5
136
+ fi
137
+
138
+ # ── Done ─────────────────────────────────────────────────────────────
139
+ echo ""
140
+ info "Setup complete."
141
+ echo ""
142
+ echo " Activate: source .venv/bin/activate"
143
+ echo " Tests: KMP_DUPLICATE_LIB_OK=TRUE PYTHONPATH=. pytest tests/ -x -q"
144
+ echo " Server: engram-server"
145
+ echo " Config: Edit .env to set ENGRAM_MODEL_PATH"
146
+ echo ""