Spaces:

Executor-Tyrant-Framework
/

Condensate

Sleeping

App Files Files Community

Condensate / test_condenser.py

Executor-Tyrant-Framework

Fix HF Space: CPU-only torch, lazy imports

efd23fa about 2 months ago

raw

history blame contribute delete

10.2 kB

	"""
	Condensate Layer 3: Condenser Tests

	The moment of truth — does condensation actually save RAM?

	Run: python3 test_condenser.py
	"""

	import numpy as np
	import time
	import os
	import sys

	sys.path.insert(0, os.path.dirname(__file__))
	from condenser import Condenser


	def test_basic_compression():
	"""Test 1: Can we compress and decompress without data loss?"""
	print("\n--- Test 1: Lossless Compression Round-Trip ---")

	condenser = Condenser(demotion_idle_ms=1)

	# Register some numpy arrays
	original_data = np.random.randn(256, 256).astype(np.float32)
	condenser.register("test.weights", original_data.copy())

	region = condenser.regions["test.weights"]
	original_size = region.original_size

	# Compress to WARM
	saved = region.compress_to_warm()
	assert region.tier == "WARM"
	assert region.hot_data is None
	assert region.warm_data is not None
	print(f" Original: {original_size / 1024:.1f} KB")
	print(f" Compressed: {region.compressed_size / 1024:.1f} KB")
	print(f" Ratio: {original_size / region.compressed_size:.1f}:1")
	print(f" Saved: {saved / 1024:.1f} KB")

	# Promote back to HOT
	restored = region.promote_to_hot()
	assert region.tier == "HOT"
	assert np.array_equal(restored, original_data), "Data corrupted after round-trip!"
	print(f" Round-trip: LOSSLESS (arrays match exactly)")

	# Compress to COLD (disk)
	region.compress_to_cold(condenser.cold_dir)
	assert region.tier == "COLD"
	assert region.current_ram_usage == 0
	print(f" Cold (on disk): 0 KB RAM")

	# Promote from COLD back to HOT
	restored2 = region.promote_to_hot()
	assert region.tier == "HOT"
	assert np.array_equal(restored2, original_data), "Data corrupted after cold round-trip!"
	print(f" Cold round-trip: LOSSLESS")

	condenser.cleanup()
	print(" PASS")


	def test_selective_condensation():
	"""Test 2: Hot regions stay hot, cold regions compress.

	16 regions, 4 hot, 12 cold. After condensation, only 4 should
	be in RAM at full size.
	"""
	print("\n--- Test 2: Selective Condensation ---")

	# 16 regions × 64KB each = 1MB total
	# Use structured data (sparse + patterns) — like real weights, not pure noise
	state = {}
	for i in range(16):
	arr = np.zeros((128, 64), dtype=np.float32)
	# Sparse: only ~20% nonzero (realistic for many weight matrices)
	mask = np.random.random((128, 64)) < 0.2
	arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
	state[f"block_{i}"] = arr

	hot_blocks = {0, 1, 2, 3}

	def workload(wrapped):
	# Hot blocks: accessed every iteration
	for i in hot_blocks:
	_ = wrapped[f"block_{i}"]

	# Cold blocks: rarely accessed
	if np.random.random() < 0.05:
	idx = np.random.choice(list(range(4, 16)))
	_ = wrapped[f"block_{idx}"]

	time.sleep(0.001)

	condenser = Condenser(demotion_idle_ms=10, warmup_iters=15)
	results = condenser.run_benchmark(state, workload, iterations=30,
	name="selective")
	condenser.print_results(results)

	# Verify tier management is working — cold regions should exist
	last_log = results["promotion_log"][-1] if results["promotion_log"] else {}
	warm_cold = last_log.get("warm", 0) + last_log.get("cold", 0)
	print(f" Condensed regions (WARM+COLD): {warm_cold} of {results['total_regions']}")
	print(f" RAM saved: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)")
	assert warm_cold >= 8, f"Should condense at least 8 cold regions, got {warm_cold}"
	condenser.cleanup()
	print(" PASS")


	def test_inference_workload():
	"""Test 3: Simulated AI inference — THE benchmark.

	6-layer model with attention + FFN + KV cache.
	Config and unused layers should compress.
	Active layers should stay hot.
	"""
	print("\n--- Test 3: AI Inference Workload (The Real Test) ---")

	state = {}

	# Model layers (each ~128KB) — sparse structured weights
	for i in range(6):
	for name in ["q", "k", "v"]:
	arr = np.zeros((128, 128), dtype=np.float32)
	mask = np.random.random((128, 128)) < 0.25
	arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
	state[f"layer_{i}_{name}"] = arr
	for name, shape in [("ffn_up", (128, 512)), ("ffn_down", (512, 128))]:
	arr = np.zeros(shape, dtype=np.float32)
	mask = np.random.random(shape) < 0.2
	arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
	state[f"layer_{i}_{name}"] = arr

	# KV cache — zeros (compresses extremely well)
	for i in range(6):
	state[f"kv_{i}_keys"] = np.zeros((256, 128), dtype=np.float32)
	state[f"kv_{i}_vals"] = np.zeros((256, 128), dtype=np.float32)

	# Config and metadata (small)
	for i in range(20):
	state[f"meta_{i}"] = np.zeros(32, dtype=np.float32)

	def workload(wrapped):
	# Token generation: sequential through layers
	for token in range(3):
	for layer_idx in range(6):
	_ = wrapped[f"layer_{layer_idx}_q"]
	_ = wrapped[f"layer_{layer_idx}_k"]
	_ = wrapped[f"layer_{layer_idx}_v"]
	_ = wrapped[f"kv_{layer_idx}_keys"]
	_ = wrapped[f"kv_{layer_idx}_vals"]
	_ = wrapped[f"layer_{layer_idx}_ffn_up"]
	_ = wrapped[f"layer_{layer_idx}_ffn_down"]
	time.sleep(0.0001)

	# Metadata accessed once per request
	_ = wrapped["meta_0"]
	_ = wrapped["meta_1"]

	print(f" State: {len(state)} regions, "
	f"{sum(v.nbytes for v in state.values()) / 1024 / 1024:.2f} MB total")

	condenser = Condenser(demotion_idle_ms=5, warmup_iters=10)
	results = condenser.run_benchmark(state, workload, iterations=20,
	name="inference")
	condenser.print_results(results)

	print(f"\n * INFERENCE RESULTS *")
	print(f" Baseline RAM: {results['baseline_ram_mb']:.2f} MB")
	print(f" Condensed RAM: {results['avg_condensed_ram_mb']:.2f} MB")
	print(f" Saved: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%)")
	print(f" Prediction acc: {results['prediction_accuracy']}%")

	condenser.cleanup()
	print(" PASS")


	def test_large_state():
	"""Test 4: Larger state — stress test with meaningful RAM numbers.

	64 regions × 256KB = 16 MB total state.
	Only 8 regions hot at any time = 2 MB needed.
	Target: condense ~14 MB.
	"""
	print("\n--- Test 4: Large State Stress Test ---")

	# 64 regions × 256KB each = 16 MB
	# Structured sparse data — compresses well
	state = {}
	for i in range(64):
	arr = np.zeros((256, 128), dtype=np.float32)
	mask = np.random.random((256, 128)) < 0.15
	arr[mask] = np.random.randn(mask.sum()).astype(np.float32)
	state[f"region_{i}"] = arr

	# 8 hot regions that rotate
	hot_set_a = set(range(0, 8))
	hot_set_b = set(range(32, 40))

	iteration_count = [0]

	def workload(wrapped):
	iteration_count[0] += 1
	# Alternate between two hot sets
	hot = hot_set_a if (iteration_count[0] % 20) < 10 else hot_set_b

	for i in hot:
	_ = wrapped[f"region_{i}"]

	time.sleep(0.002)

	total_mb = sum(v.nbytes for v in state.values()) / 1024 / 1024
	print(f" State: {len(state)} regions, {total_mb:.1f} MB total")
	print(f" Only 8 regions hot at any time (2 MB needed)")

	condenser = Condenser(demotion_idle_ms=15, warmup_iters=15)
	results = condenser.run_benchmark(state, workload, iterations=40,
	name="large")
	condenser.print_results(results)

	print(f"\n * LARGE STATE RESULTS *")
	print(f" Baseline RAM: {results['baseline_ram_mb']:.1f} MB (all in RAM)")
	print(f" Condensed RAM: {results['avg_condensed_ram_mb']:.1f} MB")
	print(f" Saved: {results['saved_mb']:.1f} MB ({results['saved_pct']:.1f}%)")

	condenser.cleanup()
	print(" PASS")


	def test_prediction_value():
	"""Test 5: Measure prediction-driven vs reactive promotions.

	The ratio of predicted vs reactive tells us how much the
	predictor is actually helping vs just reacting to cache misses.
	"""
	print("\n--- Test 5: Prediction Value Measurement ---")

	state = {f"chunk_{i}": np.random.randn(64, 64).astype(np.float32)
	for i in range(20)}

	# Predictable pattern: 0→1→2→3, then 10→11→12→13
	def workload(wrapped):
	for i in range(4):
	_ = wrapped[f"chunk_{i}"]
	time.sleep(0.001)
	time.sleep(0.005)
	for i in range(10, 14):
	_ = wrapped[f"chunk_{i}"]
	time.sleep(0.001)
	time.sleep(0.005)

	condenser = Condenser(demotion_idle_ms=8, warmup_iters=15)
	results = condenser.run_benchmark(state, workload, iterations=25,
	name="predval")
	condenser.print_results(results)

	pred = results["prediction_promotions"]
	react = results["reactive_promotions"]
	total = pred + react

	if total > 0:
	pred_pct = pred / total * 100
	print(f"\n Promotions: {total} total")
	print(f" Prediction-driven: {pred} ({pred_pct:.0f}%)")
	print(f" Reactive (miss): {react} ({100-pred_pct:.0f}%)")

	if pred_pct > 50:
	print(f" GOOD — Majority of promotions are prediction-driven")
	else:
	print(f" Prediction helps but reactive still dominates")
	else:
	print(f" No promotions needed (everything stayed HOT)")

	condenser.cleanup()
	print(" PASS")


	if __name__ == "__main__":
	print("=" * 60)
	print(" CONDENSATE — Layer 3 Condenser Tests")
	print(" The Moment of Truth: Does It Actually Save RAM?")
	print("=" * 60)

	test_basic_compression()
	test_selective_condensation()
	test_inference_workload()
	test_large_state()
	test_prediction_value()

	print("\n" + "=" * 60)
	print(" ALL TESTS PASSED")
	print("=" * 60)