| """ |
| Condensate Layer 3: Condenser Tests |
| |
| The moment of truth β does condensation actually save RAM? |
| |
| Run: python3 test_condenser.py |
| """ |
|
|
| import numpy as np |
| import time |
| import os |
| import sys |
|
|
| sys.path.insert(0, os.path.dirname(__file__)) |
| from condenser import Condenser |
|
|
|
|
| def test_basic_compression(): |
| """Test 1: Can we compress and decompress without data loss?""" |
| print("\n--- Test 1: Lossless Compression Round-Trip ---") |
|
|
| condenser = Condenser(demotion_idle_ms=1) |
|
|
| |
| original_data = np.random.randn(256, 256).astype(np.float32) |
| condenser.register("test.weights", original_data.copy()) |
|
|
| region = condenser.regions["test.weights"] |
| original_size = region.original_size |
|
|
| |
| saved = region.compress_to_warm() |
| assert region.tier == "WARM" |
| assert region.hot_data is None |
| assert region.warm_data is not None |
| print(f" Original: {original_size / 1024:.1f} KB") |
| print(f" Compressed: {region.compressed_size / 1024:.1f} KB") |
| print(f" Ratio: {original_size / region.compressed_size:.1f}:1") |
| print(f" Saved: {saved / 1024:.1f} KB") |
|
|
| |
| restored = region.promote_to_hot() |
| assert region.tier == "HOT" |
| assert np.array_equal(restored, original_data), "Data corrupted after round-trip!" |
| print(f" Round-trip: LOSSLESS (arrays match exactly)") |
|
|
| |
| region.compress_to_cold(condenser.cold_dir) |
| assert region.tier == "COLD" |
| assert region.current_ram_usage == 0 |
| print(f" Cold (on disk): 0 KB RAM") |
|
|
| |
| restored2 = region.promote_to_hot() |
| assert region.tier == "HOT" |
| assert np.array_equal(restored2, original_data), "Data corrupted after cold round-trip!" |
| print(f" Cold round-trip: LOSSLESS") |
|
|
| condenser.cleanup() |
| print(" PASS") |
|
|
|
|
| def test_selective_condensation(): |
| """Test 2: Hot regions stay hot, cold regions compress. |
| |
| 16 regions, 4 hot, 12 cold. After condensation, only 4 should |
| be in RAM at full size. |
| """ |
| print("\n--- Test 2: Selective Condensation ---") |
|
|
| |
| |
| state = {} |
| for i in range(16): |
| arr = np.zeros((128, 64), dtype=np.float32) |
| |
| mask = np.random.random((128, 64)) < 0.2 |
| arr[mask] = np.random.randn(mask.sum()).astype(np.float32) |
| state[f"block_{i}"] = arr |
|
|
| hot_blocks = {0, 1, 2, 3} |
|
|
| def workload(wrapped): |
| |
| for i in hot_blocks: |
| _ = wrapped[f"block_{i}"] |
|
|
| |
| if np.random.random() < 0.05: |
| idx = np.random.choice(list(range(4, 16))) |
| _ = wrapped[f"block_{idx}"] |
|
|
| time.sleep(0.001) |
|
|
| condenser = Condenser(demotion_idle_ms=10, warmup_iters=15) |
| results = condenser.run_benchmark(state, workload, iterations=30, |
| name="selective") |
| condenser.print_results(results) |
|
|
| |
| last_log = results["promotion_log"][-1] if results["promotion_log"] else {} |
| warm_cold = last_log.get("warm", 0) + last_log.get("cold", 0) |
| print(f" Condensed regions (WARM+COLD): {warm_cold} of {results['total_regions']}") |
| print(f" RAM saved: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)") |
| assert warm_cold >= 8, f"Should condense at least 8 cold regions, got {warm_cold}" |
| condenser.cleanup() |
| print(" PASS") |
|
|
|
|
| def test_inference_workload(): |
| """Test 3: Simulated AI inference β THE benchmark. |
| |
| 6-layer model with attention + FFN + KV cache. |
| Config and unused layers should compress. |
| Active layers should stay hot. |
| """ |
| print("\n--- Test 3: AI Inference Workload (The Real Test) ---") |
|
|
| state = {} |
|
|
| |
| for i in range(6): |
| for name in ["q", "k", "v"]: |
| arr = np.zeros((128, 128), dtype=np.float32) |
| mask = np.random.random((128, 128)) < 0.25 |
| arr[mask] = np.random.randn(mask.sum()).astype(np.float32) |
| state[f"layer_{i}_{name}"] = arr |
| for name, shape in [("ffn_up", (128, 512)), ("ffn_down", (512, 128))]: |
| arr = np.zeros(shape, dtype=np.float32) |
| mask = np.random.random(shape) < 0.2 |
| arr[mask] = np.random.randn(mask.sum()).astype(np.float32) |
| state[f"layer_{i}_{name}"] = arr |
|
|
| |
| for i in range(6): |
| state[f"kv_{i}_keys"] = np.zeros((256, 128), dtype=np.float32) |
| state[f"kv_{i}_vals"] = np.zeros((256, 128), dtype=np.float32) |
|
|
| |
| for i in range(20): |
| state[f"meta_{i}"] = np.zeros(32, dtype=np.float32) |
|
|
| def workload(wrapped): |
| |
| for token in range(3): |
| for layer_idx in range(6): |
| _ = wrapped[f"layer_{layer_idx}_q"] |
| _ = wrapped[f"layer_{layer_idx}_k"] |
| _ = wrapped[f"layer_{layer_idx}_v"] |
| _ = wrapped[f"kv_{layer_idx}_keys"] |
| _ = wrapped[f"kv_{layer_idx}_vals"] |
| _ = wrapped[f"layer_{layer_idx}_ffn_up"] |
| _ = wrapped[f"layer_{layer_idx}_ffn_down"] |
| time.sleep(0.0001) |
|
|
| |
| _ = wrapped["meta_0"] |
| _ = wrapped["meta_1"] |
|
|
| print(f" State: {len(state)} regions, " |
| f"{sum(v.nbytes for v in state.values()) / 1024 / 1024:.2f} MB total") |
|
|
| condenser = Condenser(demotion_idle_ms=5, warmup_iters=10) |
| results = condenser.run_benchmark(state, workload, iterations=20, |
| name="inference") |
| condenser.print_results(results) |
|
|
| print(f"\n *** INFERENCE RESULTS ***") |
| print(f" Baseline RAM: {results['baseline_ram_mb']:.2f} MB") |
| print(f" Condensed RAM: {results['avg_condensed_ram_mb']:.2f} MB") |
| print(f" Saved: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)") |
| print(f" Prediction acc: {results['prediction_accuracy']}%") |
|
|
| condenser.cleanup() |
| print(" PASS") |
|
|
|
|
| def test_large_state(): |
| """Test 4: Larger state β stress test with meaningful RAM numbers. |
| |
| 64 regions Γ 256KB = 16 MB total state. |
| Only 8 regions hot at any time = 2 MB needed. |
| Target: condense ~14 MB. |
| """ |
| print("\n--- Test 4: Large State Stress Test ---") |
|
|
| |
| |
| state = {} |
| for i in range(64): |
| arr = np.zeros((256, 128), dtype=np.float32) |
| mask = np.random.random((256, 128)) < 0.15 |
| arr[mask] = np.random.randn(mask.sum()).astype(np.float32) |
| state[f"region_{i}"] = arr |
|
|
| |
| hot_set_a = set(range(0, 8)) |
| hot_set_b = set(range(32, 40)) |
|
|
| iteration_count = [0] |
|
|
| def workload(wrapped): |
| iteration_count[0] += 1 |
| |
| hot = hot_set_a if (iteration_count[0] % 20) < 10 else hot_set_b |
|
|
| for i in hot: |
| _ = wrapped[f"region_{i}"] |
|
|
| time.sleep(0.002) |
|
|
| total_mb = sum(v.nbytes for v in state.values()) / 1024 / 1024 |
| print(f" State: {len(state)} regions, {total_mb:.1f} MB total") |
| print(f" Only 8 regions hot at any time (2 MB needed)") |
|
|
| condenser = Condenser(demotion_idle_ms=15, warmup_iters=15) |
| results = condenser.run_benchmark(state, workload, iterations=40, |
| name="large") |
| condenser.print_results(results) |
|
|
| print(f"\n *** LARGE STATE RESULTS ***") |
| print(f" Baseline RAM: {results['baseline_ram_mb']:.1f} MB (all in RAM)") |
| print(f" Condensed RAM: {results['avg_condensed_ram_mb']:.1f} MB") |
| print(f" Saved: {results['saved_mb']:.1f} MB ({results['saved_pct']:.1f}%)") |
|
|
| condenser.cleanup() |
| print(" PASS") |
|
|
|
|
| def test_prediction_value(): |
| """Test 5: Measure prediction-driven vs reactive promotions. |
| |
| The ratio of predicted vs reactive tells us how much the |
| predictor is actually helping vs just reacting to cache misses. |
| """ |
| print("\n--- Test 5: Prediction Value Measurement ---") |
|
|
| state = {f"chunk_{i}": np.random.randn(64, 64).astype(np.float32) |
| for i in range(20)} |
|
|
| |
| def workload(wrapped): |
| for i in range(4): |
| _ = wrapped[f"chunk_{i}"] |
| time.sleep(0.001) |
| time.sleep(0.005) |
| for i in range(10, 14): |
| _ = wrapped[f"chunk_{i}"] |
| time.sleep(0.001) |
| time.sleep(0.005) |
|
|
| condenser = Condenser(demotion_idle_ms=8, warmup_iters=15) |
| results = condenser.run_benchmark(state, workload, iterations=25, |
| name="predval") |
| condenser.print_results(results) |
|
|
| pred = results["prediction_promotions"] |
| react = results["reactive_promotions"] |
| total = pred + react |
|
|
| if total > 0: |
| pred_pct = pred / total * 100 |
| print(f"\n Promotions: {total} total") |
| print(f" Prediction-driven: {pred} ({pred_pct:.0f}%)") |
| print(f" Reactive (miss): {react} ({100-pred_pct:.0f}%)") |
|
|
| if pred_pct > 50: |
| print(f" GOOD β Majority of promotions are prediction-driven") |
| else: |
| print(f" Prediction helps but reactive still dominates") |
| else: |
| print(f" No promotions needed (everything stayed HOT)") |
|
|
| condenser.cleanup() |
| print(" PASS") |
|
|
|
|
| if __name__ == "__main__": |
| print("=" * 60) |
| print(" CONDENSATE β Layer 3 Condenser Tests") |
| print(" The Moment of Truth: Does It Actually Save RAM?") |
| print("=" * 60) |
|
|
| test_basic_compression() |
| test_selective_condensation() |
| test_inference_workload() |
| test_large_state() |
| test_prediction_value() |
|
|
| print("\n" + "=" * 60) |
| print(" ALL TESTS PASSED") |
| print("=" * 60) |
|
|