Spaces:

Executor-Tyrant-Framework
/

Condensate

Runtime error

Executor-Tyrant-Framework Claude Opus 4.6 (1M context) commited on Mar 29

Commit

efd23fa

1 Parent(s): 262b9d5

Fix HF Space: CPU-only torch, lazy imports

- Use --extra-index-url for CPU-only PyTorch (much smaller)
- Lazy-import torch and torch_membrane inside functions
- Prevents import failures during Gradio startup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (11) hide show

__pycache__/condenser.cpython-312.pyc +0 -0
__pycache__/graph_builder.cpython-312.pyc +0 -0
__pycache__/membrane.cpython-312.pyc +0 -0
__pycache__/predictor.cpython-312.pyc +0 -0
app.py +6 -3
inference_graph.json +0 -0
requirements.txt +1 -0
test_condenser.py +292 -0
test_graph_builder.py +332 -0
test_membrane.py +275 -0
test_predictor.py +411 -0

__pycache__/condenser.cpython-312.pyc ADDED Viewed

Binary file (26.1 kB). View file

__pycache__/graph_builder.cpython-312.pyc ADDED Viewed

Binary file (25.9 kB). View file

__pycache__/membrane.cpython-312.pyc ADDED Viewed

Binary file (17.5 kB). View file

__pycache__/predictor.cpython-312.pyc ADDED Viewed

Binary file (16.8 kB). View file

app.py CHANGED Viewed

@@ -9,16 +9,13 @@ Compares baseline vs condensed inference.
 """
 import gradio as gr
-import torch
 import numpy as np
 import time
-import json
 import os
 import sys
 sys.path.insert(0, os.path.dirname(__file__))
-from torch_membrane import TorchMembrane
 from graph_builder import GraphBuilder
 from predictor import Predictor
@@ -36,7 +33,9 @@ def load_model():
     """Load model and install membrane."""
     global MODEL, TOKENIZER, MEMBRANE
     from transformers import AutoModelForCausalLM, AutoTokenizer
     TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
     if TOKENIZER.pad_token is None:
@@ -58,6 +57,8 @@ def train_predictor(num_prompts=5):
     """Run several prompts to train the predictor on access patterns."""
     global PREDICTOR, GRAPH, MEMBRANE
     if MODEL is None:
         load_model()
@@ -108,6 +109,8 @@ def run_inference(prompt, max_tokens=30):
     """Run inference and show activation map + condensation potential."""
     global MEMBRANE, PREDICTOR
     if MODEL is None:
         load_model()
     if PREDICTOR is None:

 """
 import gradio as gr
 import numpy as np
 import time
 import os
 import sys
 sys.path.insert(0, os.path.dirname(__file__))
 from graph_builder import GraphBuilder
 from predictor import Predictor
     """Load model and install membrane."""
     global MODEL, TOKENIZER, MEMBRANE
+    import torch
     from transformers import AutoModelForCausalLM, AutoTokenizer
+    from torch_membrane import TorchMembrane
     TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
     if TOKENIZER.pad_token is None:
     """Run several prompts to train the predictor on access patterns."""
     global PREDICTOR, GRAPH, MEMBRANE
+    import torch
     if MODEL is None:
         load_model()
     """Run inference and show activation map + condensation potential."""
     global MEMBRANE, PREDICTOR
+    import torch
     if MODEL is None:
         load_model()
     if PREDICTOR is None:

inference_graph.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 transformers
 numpy

+--extra-index-url https://download.pytorch.org/whl/cpu
 torch
 transformers
 numpy

test_condenser.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""
+Condensate Layer 3: Condenser Tests
+The moment of truth — does condensation actually save RAM?
+Run: python3 test_condenser.py
+"""
+import numpy as np
+import time
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+from condenser import Condenser
+def test_basic_compression():
+    """Test 1: Can we compress and decompress without data loss?"""
+    print("\n--- Test 1: Lossless Compression Round-Trip ---")
+    condenser = Condenser(demotion_idle_ms=1)
+    # Register some numpy arrays
+    original_data = np.random.randn(256, 256).astype(np.float32)
+    condenser.register("test.weights", original_data.copy())
+    region = condenser.regions["test.weights"]
+    original_size = region.original_size
+    # Compress to WARM
+    saved = region.compress_to_warm()
+    assert region.tier == "WARM"
+    assert region.hot_data is None
+    assert region.warm_data is not None
+    print(f"  Original: {original_size / 1024:.1f} KB")
+    print(f"  Compressed: {region.compressed_size / 1024:.1f} KB")
+    print(f"  Ratio: {original_size / region.compressed_size:.1f}:1")
+    print(f"  Saved: {saved / 1024:.1f} KB")
+    # Promote back to HOT
+    restored = region.promote_to_hot()
+    assert region.tier == "HOT"
+    assert np.array_equal(restored, original_data), "Data corrupted after round-trip!"
+    print(f"  Round-trip: LOSSLESS (arrays match exactly)")
+    # Compress to COLD (disk)
+    region.compress_to_cold(condenser.cold_dir)
+    assert region.tier == "COLD"
+    assert region.current_ram_usage == 0
+    print(f"  Cold (on disk): 0 KB RAM")
+    # Promote from COLD back to HOT
+    restored2 = region.promote_to_hot()
+    assert region.tier == "HOT"
+    assert np.array_equal(restored2, original_data), "Data corrupted after cold round-trip!"
+    print(f"  Cold round-trip: LOSSLESS")
+    condenser.cleanup()
+    print("  PASS")
+def test_selective_condensation():
+    """Test 2: Hot regions stay hot, cold regions compress.
+    16 regions, 4 hot, 12 cold. After condensation, only 4 should
+    be in RAM at full size.
+    """
+    print("\n--- Test 2: Selective Condensation ---")
+    # 16 regions × 64KB each = 1MB total
+    # Use structured data (sparse + patterns) — like real weights, not pure noise
+    state = {}
+    for i in range(16):
+        arr = np.zeros((128, 64), dtype=np.float32)
+        # Sparse: only ~20% nonzero (realistic for many weight matrices)
+        mask = np.random.random((128, 64)) < 0.2
+        arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
+        state[f"block_{i}"] = arr
+    hot_blocks = {0, 1, 2, 3}
+    def workload(wrapped):
+        # Hot blocks: accessed every iteration
+        for i in hot_blocks:
+            _ = wrapped[f"block_{i}"]
+        # Cold blocks: rarely accessed
+        if np.random.random() < 0.05:
+            idx = np.random.choice(list(range(4, 16)))
+            _ = wrapped[f"block_{idx}"]
+        time.sleep(0.001)
+    condenser = Condenser(demotion_idle_ms=10, warmup_iters=15)
+    results = condenser.run_benchmark(state, workload, iterations=30,
+                                       name="selective")
+    condenser.print_results(results)
+    # Verify tier management is working — cold regions should exist
+    last_log = results["promotion_log"][-1] if results["promotion_log"] else {}
+    warm_cold = last_log.get("warm", 0) + last_log.get("cold", 0)
+    print(f"  Condensed regions (WARM+COLD): {warm_cold} of {results['total_regions']}")
+    print(f"  RAM saved: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)")
+    assert warm_cold >= 8, f"Should condense at least 8 cold regions, got {warm_cold}"
+    condenser.cleanup()
+    print("  PASS")
+def test_inference_workload():
+    """Test 3: Simulated AI inference — THE benchmark.
+    6-layer model with attention + FFN + KV cache.
+    Config and unused layers should compress.
+    Active layers should stay hot.
+    """
+    print("\n--- Test 3: AI Inference Workload (The Real Test) ---")
+    state = {}
+    # Model layers (each ~128KB) — sparse structured weights
+    for i in range(6):
+        for name in ["q", "k", "v"]:
+            arr = np.zeros((128, 128), dtype=np.float32)
+            mask = np.random.random((128, 128)) < 0.25
+            arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
+            state[f"layer_{i}_{name}"] = arr
+        for name, shape in [("ffn_up", (128, 512)), ("ffn_down", (512, 128))]:
+            arr = np.zeros(shape, dtype=np.float32)
+            mask = np.random.random(shape) < 0.2
+            arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
+            state[f"layer_{i}_{name}"] = arr
+    # KV cache — zeros (compresses extremely well)
+    for i in range(6):
+        state[f"kv_{i}_keys"] = np.zeros((256, 128), dtype=np.float32)
+        state[f"kv_{i}_vals"] = np.zeros((256, 128), dtype=np.float32)
+    # Config and metadata (small)
+    for i in range(20):
+        state[f"meta_{i}"] = np.zeros(32, dtype=np.float32)
+    def workload(wrapped):
+        # Token generation: sequential through layers
+        for token in range(3):
+            for layer_idx in range(6):
+                _ = wrapped[f"layer_{layer_idx}_q"]
+                _ = wrapped[f"layer_{layer_idx}_k"]
+                _ = wrapped[f"layer_{layer_idx}_v"]
+                _ = wrapped[f"kv_{layer_idx}_keys"]
+                _ = wrapped[f"kv_{layer_idx}_vals"]
+                _ = wrapped[f"layer_{layer_idx}_ffn_up"]
+                _ = wrapped[f"layer_{layer_idx}_ffn_down"]
+                time.sleep(0.0001)
+        # Metadata accessed once per request
+        _ = wrapped["meta_0"]
+        _ = wrapped["meta_1"]
+    print(f"  State: {len(state)} regions, "
+          f"{sum(v.nbytes for v in state.values()) / 1024 / 1024:.2f} MB total")
+    condenser = Condenser(demotion_idle_ms=5, warmup_iters=10)
+    results = condenser.run_benchmark(state, workload, iterations=20,
+                                       name="inference")
+    condenser.print_results(results)
+    print(f"\n  *** INFERENCE RESULTS ***")
+    print(f"  Baseline RAM:    {results['baseline_ram_mb']:.2f} MB")
+    print(f"  Condensed RAM:   {results['avg_condensed_ram_mb']:.2f} MB")
+    print(f"  Saved:           {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)")
+    print(f"  Prediction acc:  {results['prediction_accuracy']}%")
+    condenser.cleanup()
+    print("  PASS")
+def test_large_state():
+    """Test 4: Larger state — stress test with meaningful RAM numbers.
+    64 regions × 256KB = 16 MB total state.
+    Only 8 regions hot at any time = 2 MB needed.
+    Target: condense ~14 MB.
+    """
+    print("\n--- Test 4: Large State Stress Test ---")
+    # 64 regions × 256KB each = 16 MB
+    # Structured sparse data — compresses well
+    state = {}
+    for i in range(64):
+        arr = np.zeros((256, 128), dtype=np.float32)
+        mask = np.random.random((256, 128)) < 0.15
+        arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
+        state[f"region_{i}"] = arr
+    # 8 hot regions that rotate
+    hot_set_a = set(range(0, 8))
+    hot_set_b = set(range(32, 40))
+    iteration_count = [0]
+    def workload(wrapped):
+        iteration_count[0] += 1
+        # Alternate between two hot sets
+        hot = hot_set_a if (iteration_count[0] % 20) < 10 else hot_set_b
+        for i in hot:
+            _ = wrapped[f"region_{i}"]
+        time.sleep(0.002)
+    total_mb = sum(v.nbytes for v in state.values()) / 1024 / 1024
+    print(f"  State: {len(state)} regions, {total_mb:.1f} MB total")
+    print(f"  Only 8 regions hot at any time (2 MB needed)")
+    condenser = Condenser(demotion_idle_ms=15, warmup_iters=15)
+    results = condenser.run_benchmark(state, workload, iterations=40,
+                                       name="large")
+    condenser.print_results(results)
+    print(f"\n  *** LARGE STATE RESULTS ***")
+    print(f"  Baseline RAM:    {results['baseline_ram_mb']:.1f} MB (all in RAM)")
+    print(f"  Condensed RAM:   {results['avg_condensed_ram_mb']:.1f} MB")
+    print(f"  Saved:           {results['saved_mb']:.1f} MB ({results['saved_pct']:.1f}%)")
+    condenser.cleanup()
+    print("  PASS")
+def test_prediction_value():
+    """Test 5: Measure prediction-driven vs reactive promotions.
+    The ratio of predicted vs reactive tells us how much the
+    predictor is actually helping vs just reacting to cache misses.
+    """
+    print("\n--- Test 5: Prediction Value Measurement ---")
+    state = {f"chunk_{i}": np.random.randn(64, 64).astype(np.float32)
+             for i in range(20)}
+    # Predictable pattern: 0→1→2→3, then 10→11→12→13
+    def workload(wrapped):
+        for i in range(4):
+            _ = wrapped[f"chunk_{i}"]
+            time.sleep(0.001)
+        time.sleep(0.005)
+        for i in range(10, 14):
+            _ = wrapped[f"chunk_{i}"]
+            time.sleep(0.001)
+        time.sleep(0.005)
+    condenser = Condenser(demotion_idle_ms=8, warmup_iters=15)
+    results = condenser.run_benchmark(state, workload, iterations=25,
+                                       name="predval")
+    condenser.print_results(results)
+    pred = results["prediction_promotions"]
+    react = results["reactive_promotions"]
+    total = pred + react
+    if total > 0:
+        pred_pct = pred / total * 100
+        print(f"\n  Promotions: {total} total")
+        print(f"    Prediction-driven: {pred} ({pred_pct:.0f}%)")
+        print(f"    Reactive (miss):   {react} ({100-pred_pct:.0f}%)")
+        if pred_pct > 50:
+            print(f"  GOOD — Majority of promotions are prediction-driven")
+        else:
+            print(f"  Prediction helps but reactive still dominates")
+    else:
+        print(f"  No promotions needed (everything stayed HOT)")
+    condenser.cleanup()
+    print("  PASS")
+if __name__ == "__main__":
+    print("=" * 60)
+    print("  CONDENSATE — Layer 3 Condenser Tests")
+    print("  The Moment of Truth: Does It Actually Save RAM?")
+    print("=" * 60)
+    test_basic_compression()
+    test_selective_condensation()
+    test_inference_workload()
+    test_large_state()
+    test_prediction_value()
+    print("\n" + "=" * 60)
+    print("  ALL TESTS PASSED")
+    print("=" * 60)

test_graph_builder.py ADDED Viewed

	@@ -0,0 +1,332 @@

+"""
+Condensate Layer 1: Graph Builder Tests
+Tests the graph builder on access logs from the Membrane.
+Run: python3 test_graph_builder.py
+"""
+import numpy as np
+import time
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+from membrane import Membrane
+from graph_builder import GraphBuilder
+def test_sequential_model():
+    """Test 1: Sequential layer access — like a transformer forward pass.
+    Should discover: each layer is a cluster, layers chain sequentially.
+    """
+    print("\n--- Test 1: Sequential Model Forward Pass ---")
+    Membrane.clear()
+    # 12-layer "model" with attention components
+    state = {}
+    for layer in range(12):
+        state[f"layer_{layer}"] = {
+            "weight": np.random.randn(128, 128).astype(np.float32),
+            "bias": np.random.randn(128).astype(np.float32),
+            "attn_q": np.random.randn(128, 128).astype(np.float32),
+            "attn_k": np.random.randn(128, 128).astype(np.float32),
+            "attn_v": np.random.randn(128, 128).astype(np.float32),
+        }
+    wrapped = Membrane.wrap(state, "model")
+    # Run 5 "forward passes" — sequential layer access
+    for pass_num in range(5):
+        for layer_idx in range(12):
+            layer = wrapped[f"layer_{layer_idx}"]
+            _ = layer["weight"]
+            _ = layer["bias"]
+            _ = layer["attn_q"]
+            _ = layer["attn_k"]
+            _ = layer["attn_v"]
+            time.sleep(0.0002)  # small gap between layers
+    # Build graph
+    graph = GraphBuilder(causal_window_ns=2_000_000)  # 2ms window
+    graph.build(Membrane.get_log())
+    graph.print_analysis()
+    # Verify clusters found
+    assert len(graph.clusters) > 0, "Should find layer clusters"
+    # Verify causal chains found
+    chains = graph.get_causal_chains()
+    assert len(chains) > 0, "Should find sequential chains"
+    print("  PASS")
+def test_hot_cold_pattern():
+    """Test 2: Hot/cold access — some regions hammered, others barely touched.
+    Should discover: clear temperature separation, cold regions compressible.
+    """
+    print("\n--- Test 2: Hot/Cold Access Pattern ---")
+    Membrane.clear()
+    # 20 regions, 4 of them hot
+    state = {f"region_{i}": np.random.randn(64, 64).astype(np.float32)
+             for i in range(20)}
+    wrapped = Membrane.wrap(state, "hotcold")
+    hot = {2, 7, 13, 18}
+    for _ in range(100):
+        for i in range(20):
+            if i in hot:
+                _ = wrapped[f"region_{i}"]  # hot: every iteration
+            elif np.random.random() < 0.03:
+                _ = wrapped[f"region_{i}"]  # cold: 3% chance
+    graph = GraphBuilder()
+    graph.build(Membrane.get_log())
+    graph.print_analysis()
+    # Verify temperature classification
+    hot_nodes = [n for n in graph.nodes.values()
+                 if getattr(n, '_temp_class', '') == 'HOT']
+    cold_nodes = [n for n in graph.nodes.values()
+                  if getattr(n, '_temp_class', '') == 'COLD']
+    print(f"  HOT nodes: {len(hot_nodes)}, COLD nodes: {len(cold_nodes)}")
+    assert len(hot_nodes) >= 3, "Should identify hot regions"
+    assert len(cold_nodes) >= 1, "Should identify cold regions"
+    print("  PASS")
+def test_causal_chains():
+    """Test 3: Known causal chains — verify the graph discovers them.
+    This is the core capability: can we learn prefetch chains?
+    """
+    print("\n--- Test 3: Causal Chain Discovery ---")
+    Membrane.clear()
+    state = {f"r{i}": np.random.randn(32, 32).astype(np.float32)
+             for i in range(10)}
+    wrapped = Membrane.wrap(state, "causal")
+    # Chain A: r0 → r2 → r5 → r9  (always this order, ~0.5ms apart)
+    # Chain B: r1 → r3 → r6       (always this order)
+    # Noise:   r4, r7, r8          (random, no pattern)
+    for _ in range(80):
+        # Chain A
+        _ = wrapped["r0"]
+        time.sleep(0.0005)
+        _ = wrapped["r2"]
+        time.sleep(0.0005)
+        _ = wrapped["r5"]
+        time.sleep(0.0005)
+        _ = wrapped["r9"]
+        time.sleep(0.001)
+        # Chain B
+        _ = wrapped["r1"]
+        time.sleep(0.0005)
+        _ = wrapped["r3"]
+        time.sleep(0.0005)
+        _ = wrapped["r6"]
+        time.sleep(0.002)
+        # Noise
+        if np.random.random() > 0.5:
+            _ = wrapped[f"r{np.random.choice([4, 7, 8])}"]
+    graph = GraphBuilder(causal_window_ns=3_000_000)  # 3ms window
+    graph.build(Membrane.get_log())
+    graph.print_analysis()
+    # Check for discovered chains
+    chains = graph.get_causal_chains(min_weight=5.0)
+    print(f"\n  Chains found (weight >= 5): {len(chains)}")
+    for chain in chains:
+        path_names = [p.split(".")[-1] for p, _ in chain]
+        print(f"    {' → '.join(path_names)}")
+    # The graph should find chain-like patterns
+    # (exact chains depend on timing, but structure should be visible)
+    assert len(chains) >= 1, "Should discover at least one causal chain"
+    print("  PASS")
+def test_cluster_discovery():
+    """Test 4: Co-access clusters — groups of regions always used together.
+    These become hyperedges: promote/demote the whole group as a unit.
+    """
+    print("\n--- Test 4: Cluster (Proto-Hyperedge) Discovery ---")
+    Membrane.clear()
+    state = {f"item_{i}": np.random.randn(16).astype(np.float32)
+             for i in range(15)}
+    wrapped = Membrane.wrap(state, "cluster")
+    # Cluster A: items 0, 1, 2 always together
+    # Cluster B: items 5, 6, 7, 8 always together
+    # Cluster C: items 10, 11 always together
+    # Singletons: 3, 4, 9, 12, 13, 14 — accessed independently
+    for _ in range(60):
+        # Cluster A — tight access, big gap after
+        _ = wrapped["item_0"]
+        _ = wrapped["item_1"]
+        _ = wrapped["item_2"]
+        time.sleep(0.008)  # 8ms gap — outside causal window
+        # Cluster B — tight access, big gap after
+        _ = wrapped["item_5"]
+        _ = wrapped["item_6"]
+        _ = wrapped["item_7"]
+        _ = wrapped["item_8"]
+        time.sleep(0.008)
+        # Cluster C (less frequent)
+        if np.random.random() > 0.3:
+            _ = wrapped["item_10"]
+            _ = wrapped["item_11"]
+            time.sleep(0.008)
+        # Random singletons
+        idx = np.random.choice([3, 4, 9, 12, 13, 14])
+        _ = wrapped[f"item_{idx}"]
+        time.sleep(0.008)
+    graph = GraphBuilder(causal_window_ns=3_000_000, cluster_threshold=0.6)
+    graph.build(Membrane.get_log())
+    graph.print_analysis()
+    # Should find at least 2 clear clusters
+    print(f"\n  Clusters found: {len(graph.clusters)}")
+    assert len(graph.clusters) >= 2, "Should find multiple clusters"
+    # Verify cluster A members are together
+    cluster_a_found = False
+    for cluster in graph.clusters:
+        paths = {m.split(".")[-1] for m in cluster.members}
+        if {"item_0", "item_1", "item_2"}.issubset(paths):
+            cluster_a_found = True
+            break
+    assert cluster_a_found, "Should find cluster A (items 0,1,2)"
+    print("  Cluster A (items 0,1,2) found correctly")
+    print("  PASS")
+def test_real_world_simulation():
+    """Test 5: Realistic workload — simulates an AI inference server.
+    Pattern:
+    - Model weights accessed sequentially (forward pass)
+    - KV cache accessed selectively (attention)
+    - Config accessed once at start
+    - Buffer reused across requests
+    """
+    print("\n--- Test 5: Realistic AI Inference Simulation ---")
+    Membrane.clear()
+    state = {
+        "config": {"max_tokens": 512, "temperature": 0.7, "top_p": 0.9},
+        "buffer": {"input_ids": np.zeros(512, dtype=np.int32),
+                    "logits": np.zeros(32000, dtype=np.float32)},
+    }
+    # Add model layers
+    for i in range(6):
+        state[f"layer_{i}"] = {
+            "q": np.random.randn(64, 64).astype(np.float32),
+            "k": np.random.randn(64, 64).astype(np.float32),
+            "v": np.random.randn(64, 64).astype(np.float32),
+            "ffn_up": np.random.randn(64, 256).astype(np.float32),
+            "ffn_down": np.random.randn(256, 64).astype(np.float32),
+        }
+    # Add KV cache (per layer, grows with sequence)
+    for i in range(6):
+        state[f"kv_cache_{i}"] = {
+            "keys": np.zeros((512, 64), dtype=np.float32),
+            "values": np.zeros((512, 64), dtype=np.float32),
+        }
+    wrapped = Membrane.wrap(state, "server")
+    # Simulate 3 requests
+    for req in range(3):
+        # Config read once per request
+        _ = wrapped["config"]["max_tokens"]
+        _ = wrapped["config"]["temperature"]
+        # Buffer setup
+        _ = wrapped["buffer"]["input_ids"]
+        # Forward pass — 10 "tokens" of autoregressive generation
+        for token in range(10):
+            for layer_idx in range(6):
+                # Attention
+                layer = wrapped[f"layer_{layer_idx}"]
+                _ = layer["q"]
+                _ = layer["k"]
+                _ = layer["v"]
+                # KV cache read/write
+                cache = wrapped[f"kv_cache_{layer_idx}"]
+                _ = cache["keys"]
+                _ = cache["values"]
+                # FFN
+                _ = layer["ffn_up"]
+                _ = layer["ffn_down"]
+                time.sleep(0.0001)
+            # Logits at the end of each token
+            _ = wrapped["buffer"]["logits"]
+    total_bytes = 0
+    for k, v in state.items():
+        if isinstance(v, dict):
+            for v2 in v.values():
+                if isinstance(v2, np.ndarray):
+                    total_bytes += v2.nbytes
+                elif isinstance(v2, dict):
+                    for v3 in v2.values():
+                        if isinstance(v3, np.ndarray):
+                            total_bytes += v3.nbytes
+    total_mb = total_bytes / 1024 / 1024
+    print(f"  Simulated: 3 requests × 10 tokens × 6 layers")
+    print(f"  Total state: {total_mb:.1f} MB")
+    graph = GraphBuilder(causal_window_ns=2_000_000)
+    graph.build(Membrane.get_log())
+    graph.print_analysis()
+    # Save for potential Layer 2 testing
+    graph.save(os.path.join(os.path.dirname(__file__), "inference_graph.json"))
+    # Verify key insights
+    config_node = graph.nodes.get("server.config.max_tokens")
+    layer0_q = graph.nodes.get("server.layer_0.q")
+    if config_node and layer0_q:
+        print(f"  Config accesses: {config_node.access_count} (read once per request)")
+        print(f"  Layer 0 Q accesses: {layer0_q.access_count} (every token, every request)")
+        ratio = layer0_q.access_count / max(config_node.access_count, 1)
+        print(f"  Ratio: {ratio:.0f}x — config is compressible, Q is not")
+    print("  PASS")
+if __name__ == "__main__":
+    print("=" * 60)
+    print("  CONDENSATE — Layer 1 Graph Builder Tests")
+    print("=" * 60)
+    test_sequential_model()
+    test_hot_cold_pattern()
+    test_causal_chains()
+    test_cluster_discovery()
+    test_real_world_simulation()
+    print("\n" + "=" * 60)
+    print("  ALL TESTS PASSED")
+    print("=" * 60)

test_membrane.py ADDED Viewed

	@@ -0,0 +1,275 @@

+"""
+Condensate Layer 0: Membrane Tests
+Tests the membrane wrapper on increasingly realistic workloads.
+Run: python3 test_membrane.py
+"""
+import numpy as np
+import time
+import os
+import sys
+# Add parent dir to path so we can import membrane
+sys.path.insert(0, os.path.dirname(__file__))
+from membrane import Membrane
+def test_basic_dict():
+    """Test 1: Basic dict access tracking."""
+    print("\n--- Test 1: Basic Dict Access ---")
+    Membrane.clear()
+    data = Membrane.wrap({
+        "name": "test",
+        "values": [1, 2, 3, 4, 5],
+        "nested": {"a": 10, "b": 20, "c": 30},
+    }, "basic")
+    # Read some values
+    _ = data["name"]
+    _ = data["name"]          # same key twice
+    _ = data["values"]
+    _ = data["nested"]["a"]   # nested read — should log both levels
+    _ = data["nested"]["b"]
+    # Write
+    data["name"] = "updated"
+    assert Membrane.entry_count() > 0, "Should have recorded accesses"
+    Membrane.print_stats()
+    print("  PASS")
+def test_numpy_arrays():
+    """Test 2: Dict of numpy arrays — simulates model weight storage."""
+    print("\n--- Test 2: NumPy Array State (Simulated Model Weights) ---")
+    Membrane.clear()
+    # Simulate a small model with layers of weight matrices
+    state = {}
+    for layer in range(8):
+        state[f"layer_{layer}"] = {
+            "weight": np.random.randn(256, 256).astype(np.float32),
+            "bias": np.random.randn(256).astype(np.float32),
+            "attention": {
+                "q_proj": np.random.randn(256, 256).astype(np.float32),
+                "k_proj": np.random.randn(256, 256).astype(np.float32),
+                "v_proj": np.random.randn(256, 256).astype(np.float32),
+            }
+        }
+    wrapped = Membrane.wrap(state, "model")
+    total_bytes = sum(
+        state[f"layer_{i}"]["weight"].nbytes +
+        state[f"layer_{i}"]["bias"].nbytes +
+        sum(v.nbytes for v in state[f"layer_{i}"]["attention"].values())
+        for i in range(8)
+    )
+    print(f"  Model state: {total_bytes / 1024 / 1024:.1f} MB across 8 layers")
+    # Simulate a forward pass — sequential layer access
+    print("  Simulating forward pass...")
+    for layer_idx in range(8):
+        layer = wrapped[f"layer_{layer_idx}"]
+        w = layer["weight"]
+        b = layer["bias"]
+        attn = layer["attention"]
+        q = attn["q_proj"]
+        k = attn["k_proj"]
+        v = attn["v_proj"]
+    # Simulate a second forward pass — same pattern
+    print("  Simulating second forward pass...")
+    for layer_idx in range(8):
+        layer = wrapped[f"layer_{layer_idx}"]
+        w = layer["weight"]
+        b = layer["bias"]
+        attn = layer["attention"]
+        q = attn["q_proj"]
+        k = attn["k_proj"]
+        v = attn["v_proj"]
+    Membrane.print_stats()
+    print("  PASS")
+def test_selective_access():
+    """Test 3: Selective access — some layers hot, some cold.
+    This is the pattern Condensate exploits: not all state is accessed equally.
+    """
+    print("\n--- Test 3: Selective Access (Hot/Cold Pattern) ---")
+    Membrane.clear()
+    state = {}
+    for layer in range(16):
+        state[f"layer_{layer}"] = {
+            "weight": np.random.randn(128, 128).astype(np.float32),
+            "bias": np.random.randn(128).astype(np.float32),
+        }
+    wrapped = Membrane.wrap(state, "selective")
+    # Simulate: layers 3, 7, 11 are "hot" — accessed 10x more
+    hot_layers = {3, 7, 11}
+    for iteration in range(20):
+        for layer_idx in range(16):
+            if layer_idx in hot_layers:
+                # Hot path — always accessed
+                layer = wrapped[f"layer_{layer_idx}"]
+                _ = layer["weight"]
+                _ = layer["bias"]
+            elif iteration % 10 == 0:
+                # Cold path — accessed once every 10 iterations
+                layer = wrapped[f"layer_{layer_idx}"]
+                _ = layer["weight"]
+    stats = Membrane.stats()
+    Membrane.print_stats()
+    # Verify hot layers have more accesses
+    hot_count = sum(
+        stats["paths"].get(f"selective.layer_{i}", {}).get("reads", 0)
+        for i in hot_layers
+    )
+    cold_count = sum(
+        stats["paths"].get(f"selective.layer_{i}", {}).get("reads", 0)
+        for i in range(16) if i not in hot_layers
+    )
+    ratio = hot_count / max(cold_count, 1)
+    print(f"  Hot/cold access ratio: {ratio:.1f}x")
+    print(f"  (This ratio is what Condensate exploits — hot stays in RAM, cold compresses)")
+    print("  PASS")
+def test_temporal_chains():
+    """Test 4: Temporal access chains — A always followed by B followed by C.
+    This is what the SNN will learn as causal chains for prefetch.
+    """
+    print("\n--- Test 4: Temporal Chains (Causal Access Patterns) ---")
+    Membrane.clear()
+    state = {f"region_{i}": np.random.randn(64, 64).astype(np.float32) for i in range(10)}
+    wrapped = Membrane.wrap(state, "temporal")
+    # Chain 1: 0 → 3 → 7 (always in this order)
+    # Chain 2: 1 → 4 → 8 (always in this order)
+    # Region 5: random, no chain
+    chains = [
+        [0, 3, 7],
+        [1, 4, 8],
+    ]
+    for _ in range(50):
+        for chain in chains:
+            for region_id in chain:
+                _ = wrapped[f"region_{region_id}"]
+                time.sleep(0.0001)  # tiny delay to separate timestamps
+        # Random access to region 5
+        if np.random.random() > 0.5:
+            _ = wrapped["region_5"]
+    stats = Membrane.stats()
+    Membrane.print_stats()
+    # Check co-accesses — chain members should co-access heavily
+    coaccesses = stats.get("top_coaccesses", [])
+    if coaccesses:
+        print(f"  Top co-access pairs found: {len(coaccesses)}")
+        print(f"  (These are the causal chains the SNN would learn)")
+    print("  PASS")
+def test_overhead():
+    """Test 5: Measure the membrane's overhead.
+    This tells us if the observation layer is cheap enough.
+    """
+    print("\n--- Test 5: Overhead Measurement ---")
+    state = {f"key_{i}": np.random.randn(32).astype(np.float32) for i in range(100)}
+    # Baseline: raw dict access
+    iterations = 100_000
+    start = time.monotonic_ns()
+    for _ in range(iterations):
+        for key in ["key_0", "key_50", "key_99"]:
+            _ = state[key]
+    raw_ns = time.monotonic_ns() - start
+    # Membrane: wrapped dict access
+    Membrane.clear()
+    wrapped = Membrane.wrap(state.copy(), "overhead")
+    start = time.monotonic_ns()
+    for _ in range(iterations):
+        for key in ["key_0", "key_50", "key_99"]:
+            _ = wrapped[key]
+    membrane_ns = time.monotonic_ns() - start
+    raw_per = raw_ns / (iterations * 3)
+    membrane_per = membrane_ns / (iterations * 3)
+    overhead = membrane_per - raw_per
+    print(f"  Raw dict access:      {raw_per:.0f} ns/access")
+    print(f"  Membrane access:      {membrane_per:.0f} ns/access")
+    print(f"  Overhead per access:  {overhead:.0f} ns")
+    print(f"  Slowdown factor:      {membrane_per / raw_per:.1f}x")
+    print(f"  Total accesses logged: {Membrane.entry_count()}")
+    # The membrane is for observation only — overhead is acceptable
+    # if it's under ~1μs per access. For production, the Rust core
+    # will bring this to ~5ns.
+    if overhead < 5000:
+        print(f"  Overhead acceptable for PoC (< 5μs)")
+    else:
+        print(f"  Overhead high — expected for Python, Rust core will fix")
+    print("  PASS")
+def test_save_log():
+    """Test 6: Save the access log for Layer 1 analysis."""
+    print("\n--- Test 6: Save Log ---")
+    Membrane.clear()
+    state = {f"region_{i}": np.random.randn(64, 64).astype(np.float32) for i in range(5)}
+    wrapped = Membrane.wrap(state, "saveable")
+    # Generate some access patterns
+    for _ in range(10):
+        _ = wrapped["region_0"]
+        _ = wrapped["region_2"]
+        _ = wrapped["region_4"]
+    log_path = os.path.join(os.path.dirname(__file__), "test_access_log.json")
+    Membrane.save_log(log_path)
+    # Verify file exists and is valid JSON
+    import json
+    with open(log_path) as f:
+        data = json.load(f)
+    assert "entries" in data
+    assert len(data["entries"]) == 30  # 3 accesses x 10 iterations
+    # Clean up
+    os.remove(log_path)
+    print("  PASS")
+if __name__ == "__main__":
+    print("=" * 60)
+    print("  CONDENSATE — Layer 0 Membrane Tests")
+    print("=" * 60)
+    test_basic_dict()
+    test_numpy_arrays()
+    test_selective_access()
+    test_temporal_chains()
+    test_overhead()
+    test_save_log()
+    print("\n" + "=" * 60)
+    print("  ALL TESTS PASSED")
+    print("=" * 60)

test_predictor.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+Condensate Layer 2: Predictor Tests
+Tests prediction accuracy on known access patterns.
+The key question: can we predict what's coming before it's requested?
+Run: python3 test_predictor.py
+"""
+import numpy as np
+import time
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+from membrane import Membrane
+from graph_builder import GraphBuilder
+from predictor import Predictor
+def generate_and_learn(name, state, access_fn, train_iters,
+                       causal_window_ns=3_000_000):
+    """Helper: run a workload, build graph, learn predictor.
+    Returns (predictor, graph) after training.
+    """
+    Membrane.clear()
+    wrapped = Membrane.wrap(state, name)
+    for _ in range(train_iters):
+        access_fn(wrapped)
+    train_log = Membrane.get_log()
+    graph = GraphBuilder(causal_window_ns=causal_window_ns)
+    graph.build(train_log)
+    predictor = Predictor()
+    predictor.learn(graph)
+    return predictor, graph, train_log
+def test_sequential_prediction():
+    """Test 1: Sequential layer access — can we predict the next layer?
+    Pattern: layer_0 → layer_1 → layer_2 → ... → layer_7
+    If we see layer_3, we should predict layer_4.
+    """
+    print("\n--- Test 1: Sequential Layer Prediction ---")
+    state = {f"layer_{i}": {"w": np.random.randn(64, 64).astype(np.float32)}
+             for i in range(8)}
+    def access_fn(wrapped):
+        for i in range(8):
+            layer = wrapped[f"layer_{i}"]
+            _ = layer["w"]
+            time.sleep(0.0005)
+    # Train on 20 passes
+    predictor, graph, train_log = generate_and_learn(
+        "seq", state, access_fn, train_iters=20
+    )
+    predictor.print_model()
+    # Test on 10 new passes
+    Membrane.clear()
+    wrapped = Membrane.wrap(
+        {k: dict(v) if isinstance(v, dict) else v for k, v in state.items()},
+        "seq"
+    )
+    for _ in range(10):
+        access_fn(wrapped)
+    test_log = Membrane.get_log()
+    result = predictor.print_score(test_log, verbose=True)
+    assert result["accuracy"] > 50, f"Sequential prediction should be >50%, got {result['accuracy']}%"
+    print(f"  Accuracy: {result['accuracy']}% — sequential prediction works!")
+    print("  PASS")
+def test_causal_chain_prediction():
+    """Test 2: Known causal chains — A→B→C with consistent timing.
+    The predictor should learn the chain and predict B when A fires,
+    and C when B fires. Multi-hop: seeing A should also predict C.
+    """
+    print("\n--- Test 2: Causal Chain Prediction ---")
+    state = {f"r{i}": np.random.randn(32).astype(np.float32)
+             for i in range(8)}
+    def access_fn(wrapped):
+        # Chain: r0 → r2 → r5 → r7  (always, ~1ms apart)
+        _ = wrapped["r0"]
+        time.sleep(0.001)
+        _ = wrapped["r2"]
+        time.sleep(0.001)
+        _ = wrapped["r5"]
+        time.sleep(0.001)
+        _ = wrapped["r7"]
+        time.sleep(0.005)
+        # Noise
+        if np.random.random() > 0.7:
+            _ = wrapped[f"r{np.random.choice([1, 3, 4, 6])}"]
+        time.sleep(0.005)
+    predictor, graph, train_log = generate_and_learn(
+        "chain", state, access_fn, train_iters=50
+    )
+    predictor.print_model()
+    # Test: when r0 fires, do we predict r2?
+    preds = predictor.predict("chain.r0")
+    pred_paths = [p.path for p in preds]
+    print(f"  When r0 fires, predictions: {[p.path.split('.')[-1] for p in preds[:5]]}")
+    r2_predicted = "chain.r2" in pred_paths
+    print(f"  r2 predicted after r0: {r2_predicted}")
+    # Test: when r2 fires, do we predict r5?
+    preds_r2 = predictor.predict("chain.r2")
+    pred_paths_r2 = [p.path for p in preds_r2]
+    r5_predicted = "chain.r5" in pred_paths_r2
+    print(f"  r5 predicted after r2: {r5_predicted}")
+    # Score on fresh data
+    Membrane.clear()
+    wrapped = Membrane.wrap(
+        {k: v.copy() if hasattr(v, 'copy') else v for k, v in state.items()},
+        "chain"
+    )
+    for _ in range(20):
+        access_fn(wrapped)
+    result = predictor.print_score(Membrane.get_log(), verbose=True)
+    assert r2_predicted, "Should predict r2 after r0"
+    print("  PASS")
+def test_cluster_prediction():
+    """Test 3: Cluster co-activation — if one member fires, predict all.
+    When item_0 fires, we should predict item_1 and item_2 (same cluster).
+    """
+    print("\n--- Test 3: Cluster Co-Activation Prediction ---")
+    state = {f"item_{i}": np.random.randn(16).astype(np.float32)
+             for i in range(10)}
+    def access_fn(wrapped):
+        # Cluster A: always together
+        _ = wrapped["item_0"]
+        _ = wrapped["item_1"]
+        _ = wrapped["item_2"]
+        time.sleep(0.008)
+        # Cluster B: always together
+        _ = wrapped["item_5"]
+        _ = wrapped["item_6"]
+        _ = wrapped["item_7"]
+        time.sleep(0.008)
+        # Random singletons
+        _ = wrapped[f"item_{np.random.choice([3, 4, 8, 9])}"]
+        time.sleep(0.008)
+    predictor, graph, train_log = generate_and_learn(
+        "clust", state, access_fn, train_iters=40,
+        causal_window_ns=3_000_000
+    )
+    predictor.print_model()
+    # Test: when item_0 fires, predict item_1 and item_2
+    preds = predictor.predict("clust.item_0")
+    pred_paths = {p.path for p in preds}
+    print(f"  When item_0 fires: {[p.path.split('.')[-1] for p in preds[:5]]}")
+    item_1_predicted = "clust.item_1" in pred_paths
+    item_2_predicted = "clust.item_2" in pred_paths
+    print(f"  item_1 predicted: {item_1_predicted}")
+    print(f"  item_2 predicted: {item_2_predicted}")
+    # Score on fresh data
+    Membrane.clear()
+    wrapped = Membrane.wrap(
+        {k: v.copy() for k, v in state.items()}, "clust"
+    )
+    for _ in range(15):
+        access_fn(wrapped)
+    result = predictor.print_score(Membrane.get_log(), verbose=True)
+    assert item_1_predicted and item_2_predicted, "Should predict cluster members"
+    print("  PASS")
+def test_inference_simulation():
+    """Test 4: Realistic inference — train on requests, predict on new ones.
+    This is the demo workload. If prediction accuracy is high here,
+    Condensate has legs.
+    """
+    print("\n--- Test 4: AI Inference Prediction (The Real Test) ---")
+    state = {
+        "config": {"temp": 0.7, "max_tok": 512},
+    }
+    for i in range(6):
+        state[f"layer_{i}"] = {
+            "q": np.random.randn(64, 64).astype(np.float32),
+            "k": np.random.randn(64, 64).astype(np.float32),
+            "v": np.random.randn(64, 64).astype(np.float32),
+            "ffn": np.random.randn(64, 256).astype(np.float32),
+        }
+    for i in range(6):
+        state[f"kv_{i}"] = {
+            "keys": np.zeros((128, 64), dtype=np.float32),
+            "vals": np.zeros((128, 64), dtype=np.float32),
+        }
+    def access_fn(wrapped):
+        # Config once
+        _ = wrapped["config"]["temp"]
+        # 5 tokens of autoregressive generation
+        for tok in range(5):
+            for layer_idx in range(6):
+                layer = wrapped[f"layer_{layer_idx}"]
+                _ = layer["q"]
+                _ = layer["k"]
+                _ = layer["v"]
+                kv = wrapped[f"kv_{layer_idx}"]
+                _ = kv["keys"]
+                _ = kv["vals"]
+                _ = layer["ffn"]
+                time.sleep(0.0001)
+    # TRAIN on 10 requests
+    print("  Training on 10 requests...")
+    predictor, graph, train_log = generate_and_learn(
+        "inf", state, access_fn, train_iters=10,
+        causal_window_ns=2_000_000
+    )
+    predictor.print_model()
+    # TEST on 5 new requests
+    print("  Testing on 5 new requests...")
+    Membrane.clear()
+    # Rebuild state for test
+    test_state = {}
+    test_state["config"] = {"temp": 0.7, "max_tok": 512}
+    for i in range(6):
+        test_state[f"layer_{i}"] = {
+            "q": np.random.randn(64, 64).astype(np.float32),
+            "k": np.random.randn(64, 64).astype(np.float32),
+            "v": np.random.randn(64, 64).astype(np.float32),
+            "ffn": np.random.randn(64, 256).astype(np.float32),
+        }
+    for i in range(6):
+        test_state[f"kv_{i}"] = {
+            "keys": np.zeros((128, 64), dtype=np.float32),
+            "vals": np.zeros((128, 64), dtype=np.float32),
+        }
+    wrapped = Membrane.wrap(test_state, "inf")
+    for _ in range(5):
+        access_fn(wrapped)
+    test_log = Membrane.get_log()
+    result = predictor.print_score(test_log, verbose=True)
+    # The moment of truth
+    accuracy = result["accuracy"]
+    print(f"\n  *** INFERENCE PREDICTION ACCURACY: {accuracy}% ***")
+    if accuracy >= 80:
+        print("  EXCELLENT — Condensate can predict inference access patterns!")
+        print("  This means: pre-staging works. RAM condensation is viable.")
+    elif accuracy >= 60:
+        print("  GOOD — Significant prediction capability. Worth pursuing.")
+    elif accuracy >= 40:
+        print("  MODERATE — Some structure learned. Needs better substrate.")
+    else:
+        print("  LOW — Pattern too noisy or model too simple. Investigate.")
+    print("  PASS")
+def test_prediction_vs_no_prediction():
+    """Test 5: Quantify the value — compare predicted vs unpredicted accesses.
+    Simulates what would happen with and without prediction:
+    - Without: every cold access = full latency (cache miss)
+    - With: predicted accesses = pre-staged (cache hit)
+    Reports the theoretical speedup.
+    """
+    print("\n--- Test 5: Prediction Value (Theoretical Speedup) ---")
+    state = {}
+    for i in range(16):
+        state[f"block_{i}"] = np.random.randn(128, 128).astype(np.float32)
+    hot_blocks = {0, 1, 2, 3}  # always in RAM
+    cold_blocks = set(range(4, 16))  # would need paging
+    def access_fn(wrapped):
+        # Hot blocks every iteration
+        for i in hot_blocks:
+            _ = wrapped[f"block_{i}"]
+        time.sleep(0.001)
+        # Cold blocks: predictable pattern
+        # Phase A: blocks 4,5,6 together
+        _ = wrapped["block_4"]
+        _ = wrapped["block_5"]
+        _ = wrapped["block_6"]
+        time.sleep(0.005)
+        # Phase B: blocks 10,11,12 together
+        _ = wrapped["block_10"]
+        _ = wrapped["block_11"]
+        _ = wrapped["block_12"]
+        time.sleep(0.005)
+        # Random cold access (unpredictable)
+        _ = wrapped[f"block_{np.random.choice([7, 8, 9, 13, 14, 15])}"]
+        time.sleep(0.005)
+    # Train
+    predictor, graph, train_log = generate_and_learn(
+        "value", state, access_fn, train_iters=30,
+        causal_window_ns=3_000_000
+    )
+    # Test
+    Membrane.clear()
+    wrapped = Membrane.wrap(
+        {k: v.copy() for k, v in state.items()}, "value"
+    )
+    for _ in range(10):
+        access_fn(wrapped)
+    result = predictor.score(Membrane.get_log())
+    # Simulate latency impact
+    hit_rate = result["accuracy"] / 100.0
+    cold_access_count = result["predictions_made"]
+    # Latency model (simplified):
+    # Cache hit (predicted & pre-staged):  ~100ns (RAM-HOT)
+    # Cache miss (unpredicted cold):       ~100μs (disk page-in)
+    # That's a 1000x difference
+    hit_latency_ns = 100
+    miss_latency_ns = 100_000
+    with_prediction = (cold_access_count * hit_rate * hit_latency_ns +
+                       cold_access_count * (1 - hit_rate) * miss_latency_ns)
+    without_prediction = cold_access_count * miss_latency_ns
+    speedup = without_prediction / with_prediction if with_prediction > 0 else 1.0
+    print(f"\n  Cold accesses in test: {cold_access_count}")
+    print(f"  Prediction hit rate:   {result['accuracy']}%")
+    print(f"")
+    print(f"  Without Condensate:")
+    print(f"    Every cold access = {miss_latency_ns/1000:.0f}μs (page from disk)")
+    print(f"    Total latency:      {without_prediction/1e6:.1f}ms")
+    print(f"")
+    print(f"  With Condensate:")
+    print(f"    Predicted hits:     {hit_latency_ns}ns (pre-staged in RAM)")
+    print(f"    Unpredicted misses: {miss_latency_ns/1000:.0f}μs (still cold)")
+    print(f"    Total latency:      {with_prediction/1e6:.1f}ms")
+    print(f"")
+    print(f"  *** THEORETICAL SPEEDUP: {speedup:.1f}x ***")
+    if speedup > 5:
+        print(f"  Significant — prediction eliminates most cold-access latency")
+    elif speedup > 2:
+        print(f"  Meaningful — prediction cuts cold-access latency substantially")
+    else:
+        print(f"  Marginal — need better prediction or different access patterns")
+    print("  PASS")
+if __name__ == "__main__":
+    print("=" * 60)
+    print("  CONDENSATE — Layer 2 Predictor Tests")
+    print("=" * 60)
+    test_sequential_prediction()
+    test_causal_chain_prediction()
+    test_cluster_prediction()
+    test_inference_simulation()
+    test_prediction_vs_no_prediction()
+    print("\n" + "=" * 60)
+    print("  ALL TESTS PASSED")
+    print("=" * 60)