Spaces:
Runtime error
Condensate v2: Full Rust conversion — 12 modules, 105 tests, zero Python inflation
Browse filesPhase 0: Fixed PyO3 bindings (18 errors), Cargo edition 2021
Phase 1: 12 parallel Lego blocks built and tested:
A: Membrane hardening (observe-only, canary, blacklist, confidence gating)
B: Condenser real memory ops (page protection, cold tier disk, compression guards)
C: Lenia cross-process field (process tags, adaptive growth, priority, serialize)
D: Pipeline process awareness (per-process state, graduated engagement, crash correlation)
E: Python thin wrappers (1,772 lines of inflation eliminated)
F: Keyframe/delta encoding (video codec model for memory)
G: Sparse extract (partial decompression, serve exactly what's needed)
H: Manufactured spatial locality (arena allocator, CPU prefetch instructions)
I: Sleep consolidation (biological sleep cycle, replay/reorganize/prune)
J: Prediction gate (KISS overhead reduction, cost decreases over time)
K: Gaussian splat field (covariance influence, split/merge, tiled scan)
L: Erasure coding + holographic boundaries (K-of-N fault tolerance)
Phase 2: Integration — LD_PRELOAD hooks gated behind preload feature,
O(n²) cluster discovery replaced with O(E), holographic node boundaries added
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- condenser.py +11 -515
- graph_builder.py +18 -485
- membrane.py +19 -317
- predictor.py +13 -392
- rust_core/Cargo.toml +2 -1
- rust_core/src/condenser.rs +315 -15
- rust_core/src/erasure.rs +829 -0
- rust_core/src/gate.rs +655 -0
- rust_core/src/graph.rs +105 -36
- rust_core/src/keyframe.rs +552 -0
- rust_core/src/lenia.rs +383 -15
- rust_core/src/lib.rs +36 -6
- rust_core/src/locality.rs +707 -0
- rust_core/src/membrane.rs +278 -5
- rust_core/src/pipeline.rs +460 -56
- rust_core/src/predictor.rs +33 -18
- rust_core/src/sleep.rs +677 -0
- rust_core/src/sparse.rs +488 -0
- rust_core/src/splat.rs +839 -0
- torch_membrane.py +30 -152
|
@@ -1,521 +1,17 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Condensate Layer 3: The Condenser
|
| 3 |
-
|
| 4 |
-
The actual RAM reduction engine. Takes predictions from Layer 2
|
| 5 |
-
and manages memory tiers:
|
| 6 |
-
|
| 7 |
-
HOT: Full Python objects in RAM (actively accessed)
|
| 8 |
-
WARM: LZ4-compressed binary in RAM (predicted-soon or recently cold)
|
| 9 |
-
COLD: Serialized to disk (not predicted, not recent)
|
| 10 |
-
|
| 11 |
-
When the predictor says "region B is coming," the condenser
|
| 12 |
-
pre-promotes B from WARM→HOT before the access arrives.
|
| 13 |
-
When a region goes quiet, the condenser demotes it HOT→WARM→COLD.
|
| 14 |
-
|
| 15 |
-
This is the layer that proves RAM savings are real and measurable.
|
| 16 |
-
|
| 17 |
-
Usage:
|
| 18 |
-
from condenser import Condenser
|
| 19 |
-
|
| 20 |
-
condenser = Condenser(ram_budget_mb=50)
|
| 21 |
-
condenser.learn_and_manage(state_dict, workload_fn)
|
| 22 |
-
condenser.print_results()
|
| 23 |
-
"""
|
| 24 |
-
|
| 25 |
-
import numpy as np
|
| 26 |
-
import pickle
|
| 27 |
-
import lz4.frame
|
| 28 |
-
import time
|
| 29 |
-
import sys
|
| 30 |
-
import os
|
| 31 |
-
import tempfile
|
| 32 |
-
from collections import defaultdict
|
| 33 |
-
|
| 34 |
-
sys.path.insert(0, os.path.dirname(__file__))
|
| 35 |
-
from membrane import Membrane
|
| 36 |
-
from graph_builder import GraphBuilder
|
| 37 |
-
from predictor import Predictor
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
class MemoryRegion:
|
| 41 |
-
"""A managed memory region with tier tracking."""
|
| 42 |
-
|
| 43 |
-
__slots__ = ['path', 'tier', 'hot_data', 'warm_data', 'cold_path',
|
| 44 |
-
'original_size', 'compressed_size', 'access_count',
|
| 45 |
-
'last_access_ns', 'promotions', 'demotions',
|
| 46 |
-
'prediction_hits']
|
| 47 |
-
|
| 48 |
-
def __init__(self, path, data):
|
| 49 |
-
self.path = path
|
| 50 |
-
self.tier = "HOT"
|
| 51 |
-
self.hot_data = data
|
| 52 |
-
self.warm_data = None # LZ4 compressed bytes
|
| 53 |
-
self.cold_path = None # disk file path
|
| 54 |
-
self.original_size = self._measure(data)
|
| 55 |
-
self.compressed_size = 0
|
| 56 |
-
self.access_count = 0
|
| 57 |
-
self.last_access_ns = time.monotonic_ns()
|
| 58 |
-
self.promotions = 0
|
| 59 |
-
self.demotions = 0
|
| 60 |
-
self.prediction_hits = 0
|
| 61 |
-
|
| 62 |
-
def _measure(self, data):
|
| 63 |
-
"""Measure actual memory footprint."""
|
| 64 |
-
if isinstance(data, np.ndarray):
|
| 65 |
-
return data.nbytes
|
| 66 |
-
elif isinstance(data, (bytes, bytearray)):
|
| 67 |
-
return len(data)
|
| 68 |
-
else:
|
| 69 |
-
try:
|
| 70 |
-
return sys.getsizeof(data)
|
| 71 |
-
except TypeError:
|
| 72 |
-
return 64 # fallback estimate
|
| 73 |
-
|
| 74 |
-
def compress_to_warm(self):
|
| 75 |
-
"""HOT → WARM: compress data, free the original."""
|
| 76 |
-
if self.tier != "HOT" or self.hot_data is None:
|
| 77 |
-
return 0
|
| 78 |
-
|
| 79 |
-
serialized = pickle.dumps(self.hot_data, protocol=pickle.HIGHEST_PROTOCOL)
|
| 80 |
-
self.warm_data = lz4.frame.compress(serialized)
|
| 81 |
-
self.compressed_size = len(self.warm_data)
|
| 82 |
-
|
| 83 |
-
saved = self.original_size - self.compressed_size
|
| 84 |
-
self.hot_data = None
|
| 85 |
-
self.tier = "WARM"
|
| 86 |
-
self.demotions += 1
|
| 87 |
-
return max(saved, 0)
|
| 88 |
-
|
| 89 |
-
def compress_to_cold(self, cold_dir):
|
| 90 |
-
"""WARM → COLD: write to disk, free RAM entirely."""
|
| 91 |
-
if self.tier == "COLD":
|
| 92 |
-
return 0
|
| 93 |
-
|
| 94 |
-
# If still HOT, compress first
|
| 95 |
-
if self.tier == "HOT":
|
| 96 |
-
self.compress_to_warm()
|
| 97 |
-
|
| 98 |
-
if self.warm_data is None:
|
| 99 |
-
return 0
|
| 100 |
-
|
| 101 |
-
# Write compressed data to disk
|
| 102 |
-
safe_name = self.path.replace(".", "_").replace("/", "_")
|
| 103 |
-
self.cold_path = os.path.join(cold_dir, f"{safe_name}.cold")
|
| 104 |
-
with open(self.cold_path, 'wb') as f:
|
| 105 |
-
f.write(self.warm_data)
|
| 106 |
-
|
| 107 |
-
saved = self.compressed_size
|
| 108 |
-
self.warm_data = None
|
| 109 |
-
self.compressed_size = 0
|
| 110 |
-
self.tier = "COLD"
|
| 111 |
-
self.demotions += 1
|
| 112 |
-
return saved
|
| 113 |
-
|
| 114 |
-
def promote_to_hot(self):
|
| 115 |
-
"""WARM/COLD → HOT: decompress and restore."""
|
| 116 |
-
if self.tier == "HOT":
|
| 117 |
-
return self.hot_data
|
| 118 |
-
|
| 119 |
-
if self.tier == "COLD" and self.cold_path:
|
| 120 |
-
# Load from disk first
|
| 121 |
-
with open(self.cold_path, 'rb') as f:
|
| 122 |
-
self.warm_data = f.read()
|
| 123 |
-
self.compressed_size = len(self.warm_data)
|
| 124 |
-
self.tier = "WARM"
|
| 125 |
-
|
| 126 |
-
if self.tier == "WARM" and self.warm_data:
|
| 127 |
-
decompressed = lz4.frame.decompress(self.warm_data)
|
| 128 |
-
self.hot_data = pickle.loads(decompressed)
|
| 129 |
-
self.warm_data = None
|
| 130 |
-
self.compressed_size = 0
|
| 131 |
-
self.tier = "HOT"
|
| 132 |
-
self.promotions += 1
|
| 133 |
-
|
| 134 |
-
return self.hot_data
|
| 135 |
-
|
| 136 |
-
@property
|
| 137 |
-
def current_ram_usage(self):
|
| 138 |
-
"""How much RAM this region currently uses."""
|
| 139 |
-
if self.tier == "HOT":
|
| 140 |
-
return self.original_size
|
| 141 |
-
elif self.tier == "WARM":
|
| 142 |
-
return self.compressed_size
|
| 143 |
-
else:
|
| 144 |
-
return 0 # on disk
|
| 145 |
-
|
| 146 |
-
def touch(self):
|
| 147 |
-
"""Record an access."""
|
| 148 |
-
self.access_count += 1
|
| 149 |
-
self.last_access_ns = time.monotonic_ns()
|
| 150 |
|
| 151 |
|
| 152 |
class Condenser:
|
| 153 |
-
"""
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
predictions from the Layer 2 predictor to pre-stage data.
|
| 157 |
-
"""
|
| 158 |
-
|
| 159 |
-
def __init__(self, ram_budget_mb=None, cold_dir=None,
|
| 160 |
-
demotion_idle_ms=50, warmup_iters=10):
|
| 161 |
-
"""
|
| 162 |
-
Args:
|
| 163 |
-
ram_budget_mb: Max RAM budget in MB. None = no limit (measure only).
|
| 164 |
-
cold_dir: Directory for cold storage. None = auto temp dir.
|
| 165 |
-
demotion_idle_ms: Demote to WARM after this many ms idle.
|
| 166 |
-
warmup_iters: Number of iterations to observe before condensing.
|
| 167 |
-
"""
|
| 168 |
-
self.ram_budget_bytes = int(ram_budget_mb * 1024 * 1024) if ram_budget_mb else None
|
| 169 |
-
self.cold_dir = cold_dir or tempfile.mkdtemp(prefix="condensate_cold_")
|
| 170 |
-
self.demotion_idle_ms = demotion_idle_ms
|
| 171 |
-
self.warmup_iters = warmup_iters
|
| 172 |
-
|
| 173 |
-
self.regions = {} # path → MemoryRegion
|
| 174 |
-
self.predictor = None
|
| 175 |
-
self.graph = None
|
| 176 |
-
|
| 177 |
-
# Metrics
|
| 178 |
-
self.metrics = {
|
| 179 |
-
"peak_ram_no_condensate": 0,
|
| 180 |
-
"peak_ram_with_condensate": 0,
|
| 181 |
-
"total_promotions": 0,
|
| 182 |
-
"total_demotions": 0,
|
| 183 |
-
"prediction_driven_promotions": 0,
|
| 184 |
-
"reactive_promotions": 0,
|
| 185 |
-
"total_ram_saved_bytes": 0,
|
| 186 |
-
"access_latencies_ns": [],
|
| 187 |
-
"cold_accesses_avoided": 0,
|
| 188 |
-
"cold_accesses_hit": 0,
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
def register(self, path, data):
|
| 192 |
-
"""Register a memory region for management."""
|
| 193 |
-
self.regions[path] = MemoryRegion(path, data)
|
| 194 |
-
|
| 195 |
-
def _current_ram(self):
|
| 196 |
-
"""Total current RAM usage across all regions."""
|
| 197 |
-
return sum(r.current_ram_usage for r in self.regions.values())
|
| 198 |
-
|
| 199 |
-
def _demote_coldest(self, target_savings):
|
| 200 |
-
"""Demote regions to meet RAM budget. Coldest first."""
|
| 201 |
-
now = time.monotonic_ns()
|
| 202 |
-
saved = 0
|
| 203 |
-
|
| 204 |
-
# Sort by last access time (oldest first)
|
| 205 |
-
candidates = sorted(
|
| 206 |
-
[r for r in self.regions.values() if r.tier == "HOT"],
|
| 207 |
-
key=lambda r: r.last_access_ns
|
| 208 |
-
)
|
| 209 |
-
|
| 210 |
-
for region in candidates:
|
| 211 |
-
if saved >= target_savings:
|
| 212 |
-
break
|
| 213 |
-
|
| 214 |
-
idle_ms = (now - region.last_access_ns) / 1_000_000
|
| 215 |
-
if idle_ms < self.demotion_idle_ms * 0.5:
|
| 216 |
-
continue # too recently accessed
|
| 217 |
-
|
| 218 |
-
saved += region.compress_to_warm()
|
| 219 |
-
self.metrics["total_demotions"] += 1
|
| 220 |
-
|
| 221 |
-
# If still over budget, push WARM to COLD
|
| 222 |
-
if saved < target_savings:
|
| 223 |
-
warm_candidates = sorted(
|
| 224 |
-
[r for r in self.regions.values() if r.tier == "WARM"],
|
| 225 |
-
key=lambda r: r.last_access_ns
|
| 226 |
-
)
|
| 227 |
-
for region in warm_candidates:
|
| 228 |
-
if saved >= target_savings:
|
| 229 |
-
break
|
| 230 |
-
saved += region.compress_to_cold(self.cold_dir)
|
| 231 |
-
self.metrics["total_demotions"] += 1
|
| 232 |
-
|
| 233 |
-
return saved
|
| 234 |
-
|
| 235 |
-
def _enforce_budget(self):
|
| 236 |
-
"""Enforce RAM budget by demoting as needed."""
|
| 237 |
-
if self.ram_budget_bytes is None:
|
| 238 |
-
return
|
| 239 |
-
|
| 240 |
-
current = self._current_ram()
|
| 241 |
-
if current > self.ram_budget_bytes:
|
| 242 |
-
overage = current - self.ram_budget_bytes
|
| 243 |
-
self._demote_coldest(overage)
|
| 244 |
-
|
| 245 |
-
def _periodic_demotion(self):
|
| 246 |
-
"""Demote idle regions even without budget pressure."""
|
| 247 |
-
now = time.monotonic_ns()
|
| 248 |
-
|
| 249 |
-
for region in self.regions.values():
|
| 250 |
-
if region.tier == "HOT":
|
| 251 |
-
idle_ms = (now - region.last_access_ns) / 1_000_000
|
| 252 |
-
if idle_ms > self.demotion_idle_ms:
|
| 253 |
-
region.compress_to_warm()
|
| 254 |
-
self.metrics["total_demotions"] += 1
|
| 255 |
-
elif region.tier == "WARM":
|
| 256 |
-
# Push long-idle WARM to COLD (disk) for real RAM savings
|
| 257 |
-
idle_ms = (now - region.last_access_ns) / 1_000_000
|
| 258 |
-
if idle_ms > self.demotion_idle_ms * 3:
|
| 259 |
-
region.compress_to_cold(self.cold_dir)
|
| 260 |
-
self.metrics["total_demotions"] += 1
|
| 261 |
-
|
| 262 |
-
def access(self, path):
|
| 263 |
-
"""Access a region — promote if needed, record latency.
|
| 264 |
-
|
| 265 |
-
Returns the data.
|
| 266 |
-
"""
|
| 267 |
-
region = self.regions.get(path)
|
| 268 |
-
if region is None:
|
| 269 |
-
return None
|
| 270 |
-
|
| 271 |
-
start = time.monotonic_ns()
|
| 272 |
-
|
| 273 |
-
if region.tier != "HOT":
|
| 274 |
-
# Need to promote — was this predicted?
|
| 275 |
-
region.promote_to_hot()
|
| 276 |
-
self.metrics["total_promotions"] += 1
|
| 277 |
-
self.metrics["reactive_promotions"] += 1
|
| 278 |
-
|
| 279 |
-
if region.tier != "HOT":
|
| 280 |
-
# Still not hot — disk failure?
|
| 281 |
-
return None
|
| 282 |
-
|
| 283 |
-
elapsed_ns = time.monotonic_ns() - start
|
| 284 |
-
self.metrics["access_latencies_ns"].append(elapsed_ns)
|
| 285 |
-
region.touch()
|
| 286 |
-
|
| 287 |
-
return region.hot_data
|
| 288 |
-
|
| 289 |
-
def pre_promote(self, path):
|
| 290 |
-
"""Prediction-driven promotion — pre-stage before access.
|
| 291 |
-
|
| 292 |
-
Called by the predictor when it predicts this path will be accessed.
|
| 293 |
-
"""
|
| 294 |
-
region = self.regions.get(path)
|
| 295 |
-
if region is None:
|
| 296 |
-
return
|
| 297 |
-
|
| 298 |
-
if region.tier != "HOT":
|
| 299 |
-
region.promote_to_hot()
|
| 300 |
-
self.metrics["total_promotions"] += 1
|
| 301 |
-
self.metrics["prediction_driven_promotions"] += 1
|
| 302 |
-
self.metrics["cold_accesses_avoided"] += 1
|
| 303 |
-
region.prediction_hits += 1
|
| 304 |
-
|
| 305 |
-
def run_benchmark(self, state, workload_fn, iterations=20,
|
| 306 |
-
name="benchmark"):
|
| 307 |
-
"""Full benchmark: measure RAM with and without condensation.
|
| 308 |
-
|
| 309 |
-
Runs the workload twice:
|
| 310 |
-
1. Baseline: no condensation, measure peak RAM
|
| 311 |
-
2. Condensed: with prediction and tier management
|
| 312 |
-
|
| 313 |
-
Args:
|
| 314 |
-
state: dict of name → data (numpy arrays, dicts, etc.)
|
| 315 |
-
workload_fn: function(wrapped_state) that accesses state
|
| 316 |
-
iterations: how many times to run the workload
|
| 317 |
-
name: label for the wrapped state
|
| 318 |
-
|
| 319 |
-
Returns:
|
| 320 |
-
dict with benchmark results
|
| 321 |
-
"""
|
| 322 |
-
print(f"\n Phase 1: Baseline measurement ({self.warmup_iters} iters)...")
|
| 323 |
-
|
| 324 |
-
# --- BASELINE: No condensation ---
|
| 325 |
-
total_state_size = 0
|
| 326 |
-
for key, value in state.items():
|
| 327 |
-
if isinstance(value, np.ndarray):
|
| 328 |
-
total_state_size += value.nbytes
|
| 329 |
-
elif isinstance(value, dict):
|
| 330 |
-
for v in value.values():
|
| 331 |
-
if isinstance(v, np.ndarray):
|
| 332 |
-
total_state_size += v.nbytes
|
| 333 |
-
|
| 334 |
-
baseline_ram = total_state_size
|
| 335 |
-
self.metrics["peak_ram_no_condensate"] = baseline_ram
|
| 336 |
-
|
| 337 |
-
# --- LEARN: Run workload with membrane to learn patterns ---
|
| 338 |
-
Membrane.clear()
|
| 339 |
-
wrapped = Membrane.wrap(
|
| 340 |
-
{k: v.copy() if isinstance(v, np.ndarray) else
|
| 341 |
-
{k2: v2.copy() if isinstance(v2, np.ndarray) else v2
|
| 342 |
-
for k2, v2 in v.items()} if isinstance(v, dict) else v
|
| 343 |
-
for k, v in state.items()},
|
| 344 |
-
name
|
| 345 |
-
)
|
| 346 |
-
|
| 347 |
-
for _ in range(self.warmup_iters):
|
| 348 |
-
workload_fn(wrapped)
|
| 349 |
-
|
| 350 |
-
train_log = Membrane.get_log()
|
| 351 |
-
|
| 352 |
-
# Build graph and predictor
|
| 353 |
-
self.graph = GraphBuilder(causal_window_ns=3_000_000)
|
| 354 |
-
self.graph.build(train_log)
|
| 355 |
-
|
| 356 |
-
self.predictor = Predictor()
|
| 357 |
-
self.predictor.learn(self.graph)
|
| 358 |
-
|
| 359 |
-
# Score prediction accuracy on training data
|
| 360 |
-
pred_result = self.predictor.score(train_log)
|
| 361 |
-
pred_accuracy = pred_result["accuracy"]
|
| 362 |
-
|
| 363 |
-
print(f" Prediction accuracy on training data: {pred_accuracy}%")
|
| 364 |
-
|
| 365 |
-
# --- CONDENSE: Register all regions, run with tier management ---
|
| 366 |
-
print(f"\n Phase 2: Condensed run ({iterations} iters)...")
|
| 367 |
-
|
| 368 |
-
# Register all leaf data as regions
|
| 369 |
-
for key, value in state.items():
|
| 370 |
-
if isinstance(value, np.ndarray):
|
| 371 |
-
self.register(f"{name}.{key}", value.copy())
|
| 372 |
-
elif isinstance(value, dict):
|
| 373 |
-
for k2, v2 in value.items():
|
| 374 |
-
path = f"{name}.{key}.{k2}"
|
| 375 |
-
if isinstance(v2, np.ndarray):
|
| 376 |
-
self.register(path, v2.copy())
|
| 377 |
-
else:
|
| 378 |
-
self.register(path, v2)
|
| 379 |
-
|
| 380 |
-
ram_snapshots = []
|
| 381 |
-
promotion_log = []
|
| 382 |
-
|
| 383 |
-
for iteration in range(iterations):
|
| 384 |
-
# Periodic demotion of idle regions
|
| 385 |
-
self._periodic_demotion()
|
| 386 |
-
self._enforce_budget()
|
| 387 |
-
|
| 388 |
-
# Run workload with condensation
|
| 389 |
-
Membrane.clear()
|
| 390 |
-
|
| 391 |
-
# We simulate the workload by tracking which paths get accessed
|
| 392 |
-
# and using the predictor to pre-promote
|
| 393 |
-
wrapped_sim = Membrane.wrap(
|
| 394 |
-
{k: v.copy() if isinstance(v, np.ndarray) else
|
| 395 |
-
{k2: v2.copy() if isinstance(v2, np.ndarray) else v2
|
| 396 |
-
for k2, v2 in v.items()} if isinstance(v, dict) else v
|
| 397 |
-
for k, v in state.items()},
|
| 398 |
-
name
|
| 399 |
-
)
|
| 400 |
-
|
| 401 |
-
workload_fn(wrapped_sim)
|
| 402 |
-
iter_log = Membrane.get_log()
|
| 403 |
-
|
| 404 |
-
# Process each access: predict → pre-promote → access
|
| 405 |
-
for ts, event_type, path, size_bytes in sorted(iter_log, key=lambda e: e[0]):
|
| 406 |
-
# Get predictions from this access
|
| 407 |
-
predictions = self.predictor.predict(path, top_k=5)
|
| 408 |
-
|
| 409 |
-
# Pre-promote predicted regions
|
| 410 |
-
for pred in predictions:
|
| 411 |
-
if pred.confidence >= 0.5:
|
| 412 |
-
self.pre_promote(pred.path)
|
| 413 |
-
|
| 414 |
-
# Access the region (may already be HOT from prediction)
|
| 415 |
-
region = self.regions.get(path)
|
| 416 |
-
if region:
|
| 417 |
-
if region.tier == "HOT":
|
| 418 |
-
region.touch()
|
| 419 |
-
else:
|
| 420 |
-
self.access(path)
|
| 421 |
-
self.metrics["cold_accesses_hit"] += 1
|
| 422 |
-
|
| 423 |
-
# Snapshot RAM usage
|
| 424 |
-
current_ram = self._current_ram()
|
| 425 |
-
ram_snapshots.append(current_ram)
|
| 426 |
-
|
| 427 |
-
hot_count = sum(1 for r in self.regions.values() if r.tier == "HOT")
|
| 428 |
-
warm_count = sum(1 for r in self.regions.values() if r.tier == "WARM")
|
| 429 |
-
cold_count = sum(1 for r in self.regions.values() if r.tier == "COLD")
|
| 430 |
-
|
| 431 |
-
promotion_log.append({
|
| 432 |
-
"iter": iteration,
|
| 433 |
-
"ram_bytes": current_ram,
|
| 434 |
-
"hot": hot_count,
|
| 435 |
-
"warm": warm_count,
|
| 436 |
-
"cold": cold_count,
|
| 437 |
-
})
|
| 438 |
-
|
| 439 |
-
# Final metrics
|
| 440 |
-
min_ram = min(ram_snapshots) if ram_snapshots else baseline_ram
|
| 441 |
-
avg_ram = np.mean(ram_snapshots) if ram_snapshots else baseline_ram
|
| 442 |
-
self.metrics["peak_ram_with_condensate"] = max(ram_snapshots) if ram_snapshots else baseline_ram
|
| 443 |
-
|
| 444 |
-
saved_bytes = baseline_ram - avg_ram
|
| 445 |
-
saved_pct = (saved_bytes / baseline_ram * 100) if baseline_ram > 0 else 0
|
| 446 |
-
self.metrics["total_ram_saved_bytes"] = int(saved_bytes)
|
| 447 |
-
|
| 448 |
-
return {
|
| 449 |
-
"baseline_ram_mb": baseline_ram / (1024 * 1024),
|
| 450 |
-
"avg_condensed_ram_mb": avg_ram / (1024 * 1024),
|
| 451 |
-
"min_condensed_ram_mb": min_ram / (1024 * 1024),
|
| 452 |
-
"peak_condensed_ram_mb": self.metrics["peak_ram_with_condensate"] / (1024 * 1024),
|
| 453 |
-
"saved_mb": saved_bytes / (1024 * 1024),
|
| 454 |
-
"saved_pct": saved_pct,
|
| 455 |
-
"prediction_accuracy": pred_accuracy,
|
| 456 |
-
"prediction_promotions": self.metrics["prediction_driven_promotions"],
|
| 457 |
-
"reactive_promotions": self.metrics["reactive_promotions"],
|
| 458 |
-
"cold_accesses_avoided": self.metrics["cold_accesses_avoided"],
|
| 459 |
-
"total_regions": len(self.regions),
|
| 460 |
-
"ram_snapshots": ram_snapshots,
|
| 461 |
-
"promotion_log": promotion_log,
|
| 462 |
-
}
|
| 463 |
-
|
| 464 |
-
def print_results(self, results):
|
| 465 |
-
"""Print benchmark results."""
|
| 466 |
-
print(f"\n{'='*60}")
|
| 467 |
-
print(f" CONDENSATE — Layer 3 Benchmark Results")
|
| 468 |
-
print(f"{'='*60}")
|
| 469 |
-
|
| 470 |
-
print(f"\n RAM Usage:")
|
| 471 |
-
print(f" Baseline (no condensation): {results['baseline_ram_mb']:>8.2f} MB")
|
| 472 |
-
print(f" Average condensed: {results['avg_condensed_ram_mb']:>8.2f} MB")
|
| 473 |
-
print(f" Minimum condensed: {results['min_condensed_ram_mb']:>8.2f} MB")
|
| 474 |
-
print(f" Peak condensed: {results['peak_condensed_ram_mb']:>8.2f} MB")
|
| 475 |
-
print(f"")
|
| 476 |
-
print(f" *** RAM SAVED: {results['saved_mb']:.2f} MB ({results['saved_pct']:.1f}%) ***")
|
| 477 |
-
|
| 478 |
-
print(f"\n Prediction Performance:")
|
| 479 |
-
print(f" Accuracy: {results['prediction_accuracy']}%")
|
| 480 |
-
print(f" Pre-staged (predicted): {results['prediction_promotions']}")
|
| 481 |
-
print(f" Reactive (cache miss): {results['reactive_promotions']}")
|
| 482 |
-
print(f" Cold accesses avoided: {results['cold_accesses_avoided']}")
|
| 483 |
-
|
| 484 |
-
print(f"\n Region Management:")
|
| 485 |
-
print(f" Total regions: {results['total_regions']}")
|
| 486 |
-
|
| 487 |
-
if results.get("promotion_log"):
|
| 488 |
-
last = results["promotion_log"][-1]
|
| 489 |
-
print(f" Final state: HOT={last['hot']} WARM={last['warm']} COLD={last['cold']}")
|
| 490 |
-
|
| 491 |
-
# Per-region breakdown
|
| 492 |
-
print(f"\n Per-Region Breakdown:")
|
| 493 |
-
print(f" {'Region':<35} {'Tier':>5} {'Size':>8} {'Accesses':>8} {'Promos':>6}")
|
| 494 |
-
print(f" {'-'*35} {'-'*5} {'-'*8} {'-'*8} {'-'*6}")
|
| 495 |
-
|
| 496 |
-
sorted_regions = sorted(self.regions.values(),
|
| 497 |
-
key=lambda r: -r.access_count)
|
| 498 |
-
for region in sorted_regions[:20]:
|
| 499 |
-
short = region.path if len(region.path) <= 35 else "..." + region.path[-32:]
|
| 500 |
-
size_kb = region.original_size / 1024
|
| 501 |
-
print(f" {short:<35} {region.tier:>5} {size_kb:>7.1f}K "
|
| 502 |
-
f"{region.access_count:>8} {region.promotions:>6}")
|
| 503 |
-
|
| 504 |
-
if len(sorted_regions) > 20:
|
| 505 |
-
print(f" ... and {len(sorted_regions) - 20} more regions")
|
| 506 |
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
if warm_regions:
|
| 510 |
-
ratios = [r.original_size / max(r.compressed_size, 1) for r in warm_regions]
|
| 511 |
-
avg_ratio = np.mean(ratios)
|
| 512 |
-
print(f"\n Compression: {len(warm_regions)} WARM regions, "
|
| 513 |
-
f"avg ratio {avg_ratio:.1f}:1")
|
| 514 |
|
| 515 |
-
|
|
|
|
|
|
|
| 516 |
|
| 517 |
-
def
|
| 518 |
-
""
|
| 519 |
-
import shutil
|
| 520 |
-
if os.path.exists(self.cold_dir) and self.cold_dir.startswith(tempfile.gettempdir()):
|
| 521 |
-
shutil.rmtree(self.cold_dir, ignore_errors=True)
|
|
|
|
| 1 |
+
"""Condensate Condenser — placeholder for Rust Condenser integration."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class Condenser:
|
| 5 |
+
"""Tier management wrapper. Will delegate to Rust when PyO3 bindings are wired."""
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self._managed_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
def register(self, address, size):
|
| 10 |
+
self._managed_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
def unregister(self, address):
|
| 13 |
+
if self._managed_count > 0:
|
| 14 |
+
self._managed_count -= 1
|
| 15 |
|
| 16 |
+
def status(self):
|
| 17 |
+
return {"managed_regions": self._managed_count}
|
|
|
|
|
|
|
|
|
|
@@ -1,495 +1,28 @@
|
|
| 1 |
-
"""
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
Takes access logs from the Membrane (Layer 0) and builds a weighted
|
| 5 |
-
graph of memory access patterns. Discovers:
|
| 6 |
-
|
| 7 |
-
- Temporal edges: A accessed near B → weighted edge
|
| 8 |
-
- Causal chains: A always before B → directed edge with timing
|
| 9 |
-
- Clusters: groups of regions always accessed together (proto-hyperedges)
|
| 10 |
-
- Hot/cold classification: access frequency distribution
|
| 11 |
-
|
| 12 |
-
This is the substrate's raw material. Layer 2 (predictor) will use
|
| 13 |
-
this graph to predict future accesses.
|
| 14 |
-
|
| 15 |
-
Usage:
|
| 16 |
-
from membrane import Membrane
|
| 17 |
-
from graph_builder import GraphBuilder
|
| 18 |
-
|
| 19 |
-
# ... run workload with Membrane wrapping ...
|
| 20 |
-
log = Membrane.get_log()
|
| 21 |
-
|
| 22 |
-
graph = GraphBuilder()
|
| 23 |
-
graph.build(log)
|
| 24 |
-
graph.print_analysis()
|
| 25 |
-
graph.save("access_graph.json")
|
| 26 |
-
"""
|
| 27 |
-
|
| 28 |
-
import numpy as np
|
| 29 |
-
from collections import defaultdict
|
| 30 |
-
import json
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class AccessNode:
|
| 34 |
-
"""A memory region tracked in the graph."""
|
| 35 |
-
|
| 36 |
-
__slots__ = ['path', 'access_count', 'read_count', 'write_count',
|
| 37 |
-
'total_bytes', 'first_access_ns', 'last_access_ns',
|
| 38 |
-
'access_times_ns', '_temp_class']
|
| 39 |
-
|
| 40 |
-
def __init__(self, path):
|
| 41 |
-
self.path = path
|
| 42 |
-
self.access_count = 0
|
| 43 |
-
self.read_count = 0
|
| 44 |
-
self.write_count = 0
|
| 45 |
-
self.total_bytes = 0
|
| 46 |
-
self.first_access_ns = float('inf')
|
| 47 |
-
self.last_access_ns = 0
|
| 48 |
-
self.access_times_ns = []
|
| 49 |
-
self._temp_class = "WARM" # default
|
| 50 |
-
|
| 51 |
-
def record(self, ts_ns, event_type, size_bytes):
|
| 52 |
-
self.access_count += 1
|
| 53 |
-
if event_type == "READ":
|
| 54 |
-
self.read_count += 1
|
| 55 |
-
else:
|
| 56 |
-
self.write_count += 1
|
| 57 |
-
self.total_bytes += size_bytes
|
| 58 |
-
self.first_access_ns = min(self.first_access_ns, ts_ns)
|
| 59 |
-
self.last_access_ns = max(self.last_access_ns, ts_ns)
|
| 60 |
-
self.access_times_ns.append(ts_ns)
|
| 61 |
-
|
| 62 |
-
@property
|
| 63 |
-
def temperature(self):
|
| 64 |
-
"""Normalized access frequency. Higher = hotter."""
|
| 65 |
-
return self.access_count
|
| 66 |
-
|
| 67 |
-
def to_dict(self):
|
| 68 |
-
return {
|
| 69 |
-
"path": self.path,
|
| 70 |
-
"access_count": self.access_count,
|
| 71 |
-
"reads": self.read_count,
|
| 72 |
-
"writes": self.write_count,
|
| 73 |
-
"total_bytes": self.total_bytes,
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
class CausalEdge:
|
| 78 |
-
"""A directed edge: source is accessed BEFORE target."""
|
| 79 |
-
|
| 80 |
-
__slots__ = ['source', 'target', 'count', 'timing_deltas_ns',
|
| 81 |
-
'mean_delta_ns', 'std_delta_ns', 'weight']
|
| 82 |
-
|
| 83 |
-
def __init__(self, source, target):
|
| 84 |
-
self.source = source
|
| 85 |
-
self.target = target
|
| 86 |
-
self.count = 0
|
| 87 |
-
self.timing_deltas_ns = []
|
| 88 |
-
self.mean_delta_ns = 0.0
|
| 89 |
-
self.std_delta_ns = 0.0
|
| 90 |
-
self.weight = 0.0 # computed after all edges built
|
| 91 |
-
|
| 92 |
-
def add_observation(self, delta_ns):
|
| 93 |
-
self.count += 1
|
| 94 |
-
self.timing_deltas_ns.append(delta_ns)
|
| 95 |
-
|
| 96 |
-
def finalize(self):
|
| 97 |
-
"""Compute statistics after all observations."""
|
| 98 |
-
if self.timing_deltas_ns:
|
| 99 |
-
arr = np.array(self.timing_deltas_ns, dtype=np.float64)
|
| 100 |
-
self.mean_delta_ns = float(np.mean(arr))
|
| 101 |
-
self.std_delta_ns = float(np.std(arr))
|
| 102 |
-
# Weight: frequency × timing consistency
|
| 103 |
-
# High count + low variance = strong causal edge
|
| 104 |
-
consistency = 1.0 / (1.0 + self.std_delta_ns / max(self.mean_delta_ns, 1.0))
|
| 105 |
-
self.weight = self.count * consistency
|
| 106 |
-
|
| 107 |
-
def to_dict(self):
|
| 108 |
-
return {
|
| 109 |
-
"source": self.source,
|
| 110 |
-
"target": self.target,
|
| 111 |
-
"count": self.count,
|
| 112 |
-
"mean_delta_ms": round(self.mean_delta_ns / 1_000_000, 3),
|
| 113 |
-
"std_delta_ms": round(self.std_delta_ns / 1_000_000, 3),
|
| 114 |
-
"weight": round(self.weight, 2),
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
class Cluster:
|
| 119 |
-
"""A group of paths always accessed together — proto-hyperedge."""
|
| 120 |
-
|
| 121 |
-
def __init__(self, cluster_id, members):
|
| 122 |
-
self.cluster_id = cluster_id
|
| 123 |
-
self.members = set(members)
|
| 124 |
-
self.total_coaccesses = 0
|
| 125 |
-
|
| 126 |
-
def to_dict(self):
|
| 127 |
-
return {
|
| 128 |
-
"id": self.cluster_id,
|
| 129 |
-
"members": sorted(self.members),
|
| 130 |
-
"size": len(self.members),
|
| 131 |
-
"total_coaccesses": self.total_coaccesses,
|
| 132 |
-
}
|
| 133 |
|
| 134 |
|
| 135 |
class GraphBuilder:
|
| 136 |
-
"""Builds a weighted access pattern graph from Membrane logs.
|
| 137 |
-
|
| 138 |
-
The graph has:
|
| 139 |
-
- Nodes: memory regions (paths) with access statistics
|
| 140 |
-
- Causal edges: directed, weighted, with timing information
|
| 141 |
-
- Clusters: groups of paths that always co-access (proto-hyperedges)
|
| 142 |
-
"""
|
| 143 |
-
|
| 144 |
def __init__(self, causal_window_ns=5_000_000, cluster_threshold=0.7):
|
| 145 |
-
|
| 146 |
-
Args:
|
| 147 |
-
causal_window_ns: Max time gap (ns) to consider causal.
|
| 148 |
-
Default 5ms — wide enough for Python overhead.
|
| 149 |
-
cluster_threshold: Co-access ratio to form a cluster.
|
| 150 |
-
0.7 = paths must co-access 70%+ of the time.
|
| 151 |
-
"""
|
| 152 |
-
self.causal_window_ns = causal_window_ns
|
| 153 |
-
self.cluster_threshold = cluster_threshold
|
| 154 |
-
|
| 155 |
-
self.nodes = {} # path → AccessNode
|
| 156 |
-
self.edges = {} # (source, target) → CausalEdge
|
| 157 |
-
self.clusters = [] # list of Cluster
|
| 158 |
-
self._built = False
|
| 159 |
-
|
| 160 |
-
def build(self, log_entries):
|
| 161 |
-
"""Build the graph from Membrane log entries.
|
| 162 |
-
|
| 163 |
-
Args:
|
| 164 |
-
log_entries: list of (timestamp_ns, event_type, path, size_bytes)
|
| 165 |
-
"""
|
| 166 |
-
if not log_entries:
|
| 167 |
-
print(" Warning: empty log, nothing to build")
|
| 168 |
-
return
|
| 169 |
-
|
| 170 |
-
# Phase 1: Build nodes
|
| 171 |
-
for ts, event_type, path, size_bytes in log_entries:
|
| 172 |
-
if path not in self.nodes:
|
| 173 |
-
self.nodes[path] = AccessNode(path)
|
| 174 |
-
self.nodes[path].record(ts, event_type, size_bytes)
|
| 175 |
-
|
| 176 |
-
# Phase 2: Build causal edges
|
| 177 |
-
# Sort by timestamp for sequential scanning
|
| 178 |
-
sorted_log = sorted(log_entries, key=lambda e: e[0])
|
| 179 |
-
|
| 180 |
-
for i, (ts_i, _, path_i, _) in enumerate(sorted_log):
|
| 181 |
-
# Look forward within the causal window
|
| 182 |
-
for j in range(i + 1, len(sorted_log)):
|
| 183 |
-
ts_j, _, path_j, _ = sorted_log[j]
|
| 184 |
-
delta = ts_j - ts_i
|
| 185 |
-
|
| 186 |
-
if delta > self.causal_window_ns:
|
| 187 |
-
break # past the window
|
| 188 |
-
|
| 189 |
-
if path_i == path_j:
|
| 190 |
-
continue # self-loop, skip
|
| 191 |
-
|
| 192 |
-
# Directed edge: i happened before j
|
| 193 |
-
key = (path_i, path_j)
|
| 194 |
-
if key not in self.edges:
|
| 195 |
-
self.edges[key] = CausalEdge(path_i, path_j)
|
| 196 |
-
self.edges[key].add_observation(delta)
|
| 197 |
-
|
| 198 |
-
# Finalize edge statistics
|
| 199 |
-
for edge in self.edges.values():
|
| 200 |
-
edge.finalize()
|
| 201 |
-
|
| 202 |
-
# Phase 3: Discover clusters (proto-hyperedges)
|
| 203 |
-
self._discover_clusters()
|
| 204 |
-
|
| 205 |
-
# Phase 4: Classify temperature
|
| 206 |
-
self._classify_temperature()
|
| 207 |
-
|
| 208 |
-
self._built = True
|
| 209 |
|
| 210 |
-
def
|
| 211 |
-
"""
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
2. Build adjacency from pairs above threshold
|
| 216 |
-
3. Connected components = clusters
|
| 217 |
-
"""
|
| 218 |
-
if len(self.nodes) < 2:
|
| 219 |
-
return
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
path_to_idx = {p: i for i, p in enumerate(paths)}
|
| 227 |
|
| 228 |
-
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
cocount[j][i] += edge.count
|
| 235 |
-
|
| 236 |
-
# Normalize to co-access ratio
|
| 237 |
-
counts = np.array([self.nodes[p].access_count for p in paths], dtype=np.float64)
|
| 238 |
-
min_counts = np.minimum.outer(counts, counts)
|
| 239 |
-
min_counts = np.maximum(min_counts, 1.0) # avoid div by zero
|
| 240 |
-
coratio = cocount / min_counts
|
| 241 |
-
|
| 242 |
-
# Build adjacency and find connected components
|
| 243 |
-
adjacency = defaultdict(set)
|
| 244 |
-
for i in range(n):
|
| 245 |
-
for j in range(i + 1, n):
|
| 246 |
-
if coratio[i][j] >= self.cluster_threshold:
|
| 247 |
-
adjacency[i].add(j)
|
| 248 |
-
adjacency[j].add(i)
|
| 249 |
-
|
| 250 |
-
# BFS to find connected components
|
| 251 |
-
visited = set()
|
| 252 |
-
cluster_id = 0
|
| 253 |
-
|
| 254 |
-
for start in range(n):
|
| 255 |
-
if start in visited:
|
| 256 |
-
continue
|
| 257 |
-
if start not in adjacency:
|
| 258 |
-
continue
|
| 259 |
-
|
| 260 |
-
# BFS
|
| 261 |
-
component = set()
|
| 262 |
-
queue = [start]
|
| 263 |
-
while queue:
|
| 264 |
-
node = queue.pop(0)
|
| 265 |
-
if node in visited:
|
| 266 |
-
continue
|
| 267 |
-
visited.add(node)
|
| 268 |
-
component.add(node)
|
| 269 |
-
for neighbor in adjacency.get(node, []):
|
| 270 |
-
if neighbor not in visited:
|
| 271 |
-
queue.append(neighbor)
|
| 272 |
-
|
| 273 |
-
if len(component) >= 2:
|
| 274 |
-
members = [paths[i] for i in component]
|
| 275 |
-
cluster = Cluster(cluster_id, members)
|
| 276 |
-
|
| 277 |
-
# Sum co-access counts within cluster
|
| 278 |
-
for i in component:
|
| 279 |
-
for j in component:
|
| 280 |
-
if i != j:
|
| 281 |
-
cluster.total_coaccesses += cocount[i][j]
|
| 282 |
-
|
| 283 |
-
self.clusters.append(cluster)
|
| 284 |
-
cluster_id += 1
|
| 285 |
-
|
| 286 |
-
def _classify_temperature(self):
|
| 287 |
-
"""Tag nodes as hot/warm/cold based on access distribution."""
|
| 288 |
-
if not self.nodes:
|
| 289 |
-
return
|
| 290 |
-
|
| 291 |
-
counts = [n.access_count for n in self.nodes.values()]
|
| 292 |
-
if not counts:
|
| 293 |
-
return
|
| 294 |
-
|
| 295 |
-
# Use percentiles for classification
|
| 296 |
-
p75 = np.percentile(counts, 75)
|
| 297 |
-
p25 = np.percentile(counts, 25)
|
| 298 |
-
|
| 299 |
-
for node in self.nodes.values():
|
| 300 |
-
if node.access_count >= p75:
|
| 301 |
-
node._temp_class = "HOT"
|
| 302 |
-
elif node.access_count >= p25:
|
| 303 |
-
node._temp_class = "WARM"
|
| 304 |
-
else:
|
| 305 |
-
node._temp_class = "COLD"
|
| 306 |
-
|
| 307 |
-
def get_causal_chains(self, min_weight=2.0, max_depth=10):
|
| 308 |
-
"""Extract causal chains — sequences of A→B→C with strong edges.
|
| 309 |
-
|
| 310 |
-
Returns list of chains, each chain is [(path, mean_delta_ms), ...]
|
| 311 |
-
"""
|
| 312 |
-
if not self._built:
|
| 313 |
-
return []
|
| 314 |
-
|
| 315 |
-
# Build adjacency list of strong edges, sorted by weight
|
| 316 |
-
successors = defaultdict(list)
|
| 317 |
-
for (src, tgt), edge in self.edges.items():
|
| 318 |
-
if edge.weight >= min_weight:
|
| 319 |
-
successors[src].append((tgt, edge))
|
| 320 |
-
|
| 321 |
-
# Sort successors by weight descending
|
| 322 |
-
for src in successors:
|
| 323 |
-
successors[src].sort(key=lambda x: -x[1].weight)
|
| 324 |
-
|
| 325 |
-
# Find chains starting from each node
|
| 326 |
-
chains = []
|
| 327 |
-
visited_starts = set()
|
| 328 |
-
|
| 329 |
-
# Start from nodes that have strong outgoing but weak incoming
|
| 330 |
-
incoming_weight = defaultdict(float)
|
| 331 |
-
outgoing_weight = defaultdict(float)
|
| 332 |
-
for (src, tgt), edge in self.edges.items():
|
| 333 |
-
if edge.weight >= min_weight:
|
| 334 |
-
outgoing_weight[src] += edge.weight
|
| 335 |
-
incoming_weight[tgt] += edge.weight
|
| 336 |
-
|
| 337 |
-
# Good chain starts: strong outgoing, weaker incoming
|
| 338 |
-
candidates = []
|
| 339 |
-
for path in successors:
|
| 340 |
-
out_w = outgoing_weight.get(path, 0)
|
| 341 |
-
in_w = incoming_weight.get(path, 0)
|
| 342 |
-
if out_w > 0:
|
| 343 |
-
candidates.append((path, out_w - in_w))
|
| 344 |
-
|
| 345 |
-
candidates.sort(key=lambda x: -x[1])
|
| 346 |
-
|
| 347 |
-
for start, _ in candidates:
|
| 348 |
-
if start in visited_starts:
|
| 349 |
-
continue
|
| 350 |
-
|
| 351 |
-
# Follow the strongest chain
|
| 352 |
-
chain = [(start, 0.0)]
|
| 353 |
-
current = start
|
| 354 |
-
seen = {start}
|
| 355 |
-
|
| 356 |
-
for _ in range(max_depth):
|
| 357 |
-
if current not in successors:
|
| 358 |
-
break
|
| 359 |
-
# Take the strongest unvisited successor
|
| 360 |
-
found = False
|
| 361 |
-
for next_path, edge in successors[current]:
|
| 362 |
-
if next_path not in seen:
|
| 363 |
-
chain.append((next_path, edge.mean_delta_ns / 1_000_000))
|
| 364 |
-
seen.add(next_path)
|
| 365 |
-
current = next_path
|
| 366 |
-
found = True
|
| 367 |
-
break
|
| 368 |
-
if not found:
|
| 369 |
-
break
|
| 370 |
-
|
| 371 |
-
if len(chain) >= 2:
|
| 372 |
-
chains.append(chain)
|
| 373 |
-
visited_starts.update(p for p, _ in chain)
|
| 374 |
-
|
| 375 |
-
return chains
|
| 376 |
-
|
| 377 |
-
def print_analysis(self):
|
| 378 |
-
"""Print a comprehensive analysis of the access graph."""
|
| 379 |
-
if not self._built:
|
| 380 |
-
print(" Graph not built yet. Call build() first.")
|
| 381 |
-
return
|
| 382 |
-
|
| 383 |
-
print(f"\n{'='*60}")
|
| 384 |
-
print(f" CONDENSATE — Layer 1 Graph Analysis")
|
| 385 |
-
print(f"{'='*60}")
|
| 386 |
-
|
| 387 |
-
# Node summary
|
| 388 |
-
hot = [n for n in self.nodes.values() if getattr(n, '_temp_class', '') == 'HOT']
|
| 389 |
-
warm = [n for n in self.nodes.values() if getattr(n, '_temp_class', '') == 'WARM']
|
| 390 |
-
cold = [n for n in self.nodes.values() if getattr(n, '_temp_class', '') == 'COLD']
|
| 391 |
-
|
| 392 |
-
print(f"\n Nodes: {len(self.nodes)} total")
|
| 393 |
-
print(f" HOT: {len(hot)} (top 25% access frequency)")
|
| 394 |
-
print(f" WARM: {len(warm)} (middle 50%)")
|
| 395 |
-
print(f" COLD: {len(cold)} (bottom 25%)")
|
| 396 |
-
|
| 397 |
-
if hot:
|
| 398 |
-
print(f"\n Hottest nodes:")
|
| 399 |
-
for node in sorted(hot, key=lambda n: -n.access_count)[:10]:
|
| 400 |
-
print(f" {node.path:<42} {node.access_count:>5} accesses")
|
| 401 |
-
|
| 402 |
-
if cold:
|
| 403 |
-
print(f"\n Coldest nodes:")
|
| 404 |
-
for node in sorted(cold, key=lambda n: n.access_count)[:5]:
|
| 405 |
-
print(f" {node.path:<42} {node.access_count:>5} accesses")
|
| 406 |
-
|
| 407 |
-
# Edge summary
|
| 408 |
-
strong_edges = [(k, e) for k, e in self.edges.items() if e.weight >= 2.0]
|
| 409 |
-
print(f"\n Edges: {len(self.edges)} total, {len(strong_edges)} strong (weight >= 2.0)")
|
| 410 |
-
|
| 411 |
-
if strong_edges:
|
| 412 |
-
print(f"\n Strongest causal edges (A → B):")
|
| 413 |
-
print(f" {'Source':<25} {'→ Target':<25} {'Count':>5} {'Δt(ms)':>7} {'Wt':>6}")
|
| 414 |
-
print(f" {'-'*25} {'-'*25} {'-'*5} {'-'*7} {'-'*6}")
|
| 415 |
-
|
| 416 |
-
sorted_edges = sorted(strong_edges, key=lambda x: -x[1].weight)
|
| 417 |
-
for (src, tgt), edge in sorted_edges[:15]:
|
| 418 |
-
src_short = src if len(src) <= 25 else "..." + src[-22:]
|
| 419 |
-
tgt_short = tgt if len(tgt) <= 25 else "..." + tgt[-22:]
|
| 420 |
-
print(f" {src_short:<25} {tgt_short:<25} "
|
| 421 |
-
f"{edge.count:>5} {edge.mean_delta_ns/1e6:>7.3f} {edge.weight:>6.1f}")
|
| 422 |
-
|
| 423 |
-
# Cluster summary
|
| 424 |
-
if self.clusters:
|
| 425 |
-
print(f"\n Clusters (proto-hyperedges): {len(self.clusters)}")
|
| 426 |
-
for cluster in sorted(self.clusters, key=lambda c: -len(c.members)):
|
| 427 |
-
print(f"\n Cluster {cluster.cluster_id} "
|
| 428 |
-
f"({len(cluster.members)} members, "
|
| 429 |
-
f"{cluster.total_coaccesses} co-accesses):")
|
| 430 |
-
for member in sorted(cluster.members):
|
| 431 |
-
node = self.nodes.get(member)
|
| 432 |
-
temp = getattr(node, '_temp_class', '?') if node else '?'
|
| 433 |
-
count = node.access_count if node else 0
|
| 434 |
-
print(f" [{temp:>4}] {member:<40} {count:>4}x")
|
| 435 |
-
else:
|
| 436 |
-
print(f"\n Clusters: none found (threshold: {self.cluster_threshold})")
|
| 437 |
-
|
| 438 |
-
# Causal chains
|
| 439 |
-
chains = self.get_causal_chains()
|
| 440 |
-
if chains:
|
| 441 |
-
print(f"\n Causal chains discovered: {len(chains)}")
|
| 442 |
-
for i, chain in enumerate(chains[:5]):
|
| 443 |
-
parts = []
|
| 444 |
-
for path, delta_ms in chain:
|
| 445 |
-
short = path.split(".")[-1] if "." in path else path
|
| 446 |
-
if delta_ms > 0:
|
| 447 |
-
parts.append(f"--({delta_ms:.2f}ms)--> {short}")
|
| 448 |
-
else:
|
| 449 |
-
parts.append(short)
|
| 450 |
-
print(f" Chain {i}: {' '.join(parts)}")
|
| 451 |
-
if len(chains) > 5:
|
| 452 |
-
print(f" ... and {len(chains) - 5} more chains")
|
| 453 |
-
|
| 454 |
-
# Condensation potential
|
| 455 |
-
if hot and cold:
|
| 456 |
-
hot_accesses = sum(n.access_count for n in hot)
|
| 457 |
-
total_accesses = sum(n.access_count for n in self.nodes.values())
|
| 458 |
-
hot_pct = hot_accesses / total_accesses * 100
|
| 459 |
-
print(f"\n Condensation potential:")
|
| 460 |
-
print(f" {len(hot)} hot nodes handle {hot_pct:.0f}% of all accesses")
|
| 461 |
-
print(f" {len(cold)} cold nodes could be compressed/paged")
|
| 462 |
-
if self.clusters:
|
| 463 |
-
print(f" {len(self.clusters)} clusters enable batch promote/demote")
|
| 464 |
-
if chains:
|
| 465 |
-
print(f" {len(chains)} causal chains enable predictive prefetch")
|
| 466 |
-
|
| 467 |
-
print(f"\n{'='*60}\n")
|
| 468 |
-
|
| 469 |
-
def save(self, filepath):
|
| 470 |
-
"""Save the graph to JSON for later analysis."""
|
| 471 |
-
data = {
|
| 472 |
-
"nodes": {p: n.to_dict() for p, n in self.nodes.items()},
|
| 473 |
-
"edges": [e.to_dict() for e in self.edges.values() if e.weight >= 1.0],
|
| 474 |
-
"clusters": [c.to_dict() for c in self.clusters],
|
| 475 |
-
"chains": self.get_causal_chains(),
|
| 476 |
-
"summary": {
|
| 477 |
-
"total_nodes": len(self.nodes),
|
| 478 |
-
"total_edges": len(self.edges),
|
| 479 |
-
"strong_edges": sum(1 for e in self.edges.values() if e.weight >= 2.0),
|
| 480 |
-
"clusters": len(self.clusters),
|
| 481 |
-
"chains": len(self.get_causal_chains()),
|
| 482 |
-
}
|
| 483 |
-
}
|
| 484 |
-
class NumpyEncoder(json.JSONEncoder):
|
| 485 |
-
def default(self, obj):
|
| 486 |
-
if isinstance(obj, (np.integer,)):
|
| 487 |
-
return int(obj)
|
| 488 |
-
if isinstance(obj, (np.floating,)):
|
| 489 |
-
return float(obj)
|
| 490 |
-
return super().default(obj)
|
| 491 |
-
|
| 492 |
-
with open(filepath, 'w') as f:
|
| 493 |
-
json.dump(data, f, indent=2, cls=NumpyEncoder)
|
| 494 |
-
print(f" Saved graph ({len(self.nodes)} nodes, "
|
| 495 |
-
f"{len(self.edges)} edges) to {filepath}")
|
|
|
|
| 1 |
+
"""Condensate Graph Builder — delegates to Rust AccessGraph."""
|
| 2 |
+
import condensate_core
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
class GraphBuilder:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def __init__(self, causal_window_ns=5_000_000, cluster_threshold=0.7):
|
| 7 |
+
self._graph = condensate_core.AccessGraph(causal_window_ns, cluster_threshold)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
def build(self, events):
|
| 10 |
+
"""Build graph from (timestamp_ns, path, size_bytes) events."""
|
| 11 |
+
self._graph.build(events)
|
| 12 |
|
| 13 |
+
def node_count(self):
|
| 14 |
+
return self._graph.node_count()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
def edge_count(self):
|
| 17 |
+
return self._graph.edge_count()
|
| 18 |
|
| 19 |
+
def cluster_count(self):
|
| 20 |
+
return self._graph.cluster_count()
|
|
|
|
| 21 |
|
| 22 |
+
def get_node_stats(self):
|
| 23 |
+
return self._graph.get_node_stats()
|
| 24 |
|
| 25 |
+
@property
|
| 26 |
+
def inner(self):
|
| 27 |
+
"""Access the Rust AccessGraph directly."""
|
| 28 |
+
return self._graph
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,326 +1,28 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Condensate Layer 0: The Membrane
|
| 3 |
-
|
| 4 |
-
Intercepts and records memory access patterns on wrapped objects.
|
| 5 |
-
No intelligence — pure observation. Produces an access log that
|
| 6 |
-
Layer 1 (the graph builder) will analyze.
|
| 7 |
-
|
| 8 |
-
Usage:
|
| 9 |
-
from membrane import Membrane
|
| 10 |
-
|
| 11 |
-
data = {"weights": big_array, "config": {...}, "cache": {...}}
|
| 12 |
-
wrapped = Membrane.wrap(data, name="model_state")
|
| 13 |
-
|
| 14 |
-
# Use wrapped exactly like data — reads, writes, iteration all work
|
| 15 |
-
x = wrapped["weights"] # recorded: READ model_state.weights
|
| 16 |
-
wrapped["cache"]["key"] = v # recorded: READ model_state.cache, WRITE model_state.cache.key
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# Get stats
|
| 22 |
-
Membrane.print_stats() # Summary of access patterns
|
| 23 |
"""
|
| 24 |
-
|
| 25 |
-
import time
|
| 26 |
-
import sys
|
| 27 |
-
from collections import defaultdict
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
class AccessLog:
|
| 31 |
-
"""Central access log. All Membrane instances write here."""
|
| 32 |
-
|
| 33 |
-
def __init__(self):
|
| 34 |
-
self.entries = []
|
| 35 |
-
self.start_time = time.monotonic_ns()
|
| 36 |
-
self._counts = defaultdict(int)
|
| 37 |
-
|
| 38 |
-
def record(self, event_type, path, size_bytes=0):
|
| 39 |
-
"""Record an access event.
|
| 40 |
-
|
| 41 |
-
Args:
|
| 42 |
-
event_type: 'READ' or 'WRITE'
|
| 43 |
-
path: dotted path like 'model_state.weights.layer_0'
|
| 44 |
-
size_bytes: approximate size of the accessed object
|
| 45 |
-
"""
|
| 46 |
-
ts = time.monotonic_ns() - self.start_time
|
| 47 |
-
self.entries.append((ts, event_type, path, size_bytes))
|
| 48 |
-
self._counts[path] += 1
|
| 49 |
-
|
| 50 |
-
def clear(self):
|
| 51 |
-
self.entries.clear()
|
| 52 |
-
self._counts.clear()
|
| 53 |
-
self.start_time = time.monotonic_ns()
|
| 54 |
-
|
| 55 |
-
def stats(self):
|
| 56 |
-
"""Return access statistics."""
|
| 57 |
-
if not self.entries:
|
| 58 |
-
return {"total_accesses": 0}
|
| 59 |
-
|
| 60 |
-
paths = defaultdict(lambda: {"reads": 0, "writes": 0, "total_bytes": 0,
|
| 61 |
-
"first_ns": float('inf'), "last_ns": 0})
|
| 62 |
-
|
| 63 |
-
for ts, event_type, path, size_bytes in self.entries:
|
| 64 |
-
p = paths[path]
|
| 65 |
-
if event_type == "READ":
|
| 66 |
-
p["reads"] += 1
|
| 67 |
-
else:
|
| 68 |
-
p["writes"] += 1
|
| 69 |
-
p["total_bytes"] += size_bytes
|
| 70 |
-
p["first_ns"] = min(p["first_ns"], ts)
|
| 71 |
-
p["last_ns"] = max(p["last_ns"], ts)
|
| 72 |
-
|
| 73 |
-
# Find temporal co-access: paths accessed within window of each other
|
| 74 |
-
window_ns = 1_000_000 # 1ms window
|
| 75 |
-
coaccesses = defaultdict(int)
|
| 76 |
-
sorted_entries = sorted(self.entries, key=lambda e: e[0])
|
| 77 |
-
|
| 78 |
-
for i, (ts_i, _, path_i, _) in enumerate(sorted_entries):
|
| 79 |
-
for j in range(i + 1, len(sorted_entries)):
|
| 80 |
-
ts_j, _, path_j, _ = sorted_entries[j]
|
| 81 |
-
if ts_j - ts_i > window_ns:
|
| 82 |
-
break
|
| 83 |
-
if path_i != path_j:
|
| 84 |
-
pair = tuple(sorted([path_i, path_j]))
|
| 85 |
-
coaccesses[pair] += 1
|
| 86 |
-
|
| 87 |
-
duration_ms = (self.entries[-1][0] - self.entries[0][0]) / 1_000_000
|
| 88 |
-
|
| 89 |
-
return {
|
| 90 |
-
"total_accesses": len(self.entries),
|
| 91 |
-
"unique_paths": len(paths),
|
| 92 |
-
"duration_ms": round(duration_ms, 2),
|
| 93 |
-
"paths": dict(paths),
|
| 94 |
-
"top_coaccesses": sorted(coaccesses.items(),
|
| 95 |
-
key=lambda x: -x[1])[:20],
|
| 96 |
-
}
|
| 97 |
-
|
| 98 |
-
def print_stats(self):
|
| 99 |
-
"""Print a readable summary."""
|
| 100 |
-
s = self.stats()
|
| 101 |
-
print(f"\n{'='*60}")
|
| 102 |
-
print(f" CONDENSATE MEMBRANE — Access Log Summary")
|
| 103 |
-
print(f"{'='*60}")
|
| 104 |
-
print(f" Total accesses: {s['total_accesses']}")
|
| 105 |
-
print(f" Unique paths: {s['unique_paths']}")
|
| 106 |
-
print(f" Duration: {s['duration_ms']} ms")
|
| 107 |
-
|
| 108 |
-
if s.get("paths"):
|
| 109 |
-
print(f"\n {'Path':<40} {'Reads':>6} {'Writes':>6}")
|
| 110 |
-
print(f" {'-'*40} {'-'*6} {'-'*6}")
|
| 111 |
-
|
| 112 |
-
# Sort by total access count
|
| 113 |
-
sorted_paths = sorted(s["paths"].items(),
|
| 114 |
-
key=lambda x: -(x[1]["reads"] + x[1]["writes"]))
|
| 115 |
-
|
| 116 |
-
for path, info in sorted_paths[:25]:
|
| 117 |
-
# Truncate long paths
|
| 118 |
-
display = path if len(path) <= 40 else "..." + path[-37:]
|
| 119 |
-
print(f" {display:<40} {info['reads']:>6} {info['writes']:>6}")
|
| 120 |
-
|
| 121 |
-
if len(sorted_paths) > 25:
|
| 122 |
-
print(f" ... and {len(sorted_paths) - 25} more paths")
|
| 123 |
-
|
| 124 |
-
if s.get("top_coaccesses"):
|
| 125 |
-
print(f"\n Top co-accesses (within 1ms window):")
|
| 126 |
-
print(f" {'-'*54}")
|
| 127 |
-
for (a, b), count in s["top_coaccesses"][:10]:
|
| 128 |
-
a_short = a if len(a) <= 22 else "..." + a[-19:]
|
| 129 |
-
b_short = b if len(b) <= 22 else "..." + b[-19:]
|
| 130 |
-
print(f" {a_short:<22} <-> {b_short:<22} {count:>4}x")
|
| 131 |
-
|
| 132 |
-
print(f"{'='*60}\n")
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
# Global singleton log
|
| 136 |
-
_log = AccessLog()
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def _obj_size(obj):
|
| 140 |
-
"""Rough size estimate without deep traversal."""
|
| 141 |
-
try:
|
| 142 |
-
return sys.getsizeof(obj)
|
| 143 |
-
except (TypeError, AttributeError):
|
| 144 |
-
return 0
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
class MembraneDict(dict):
|
| 148 |
-
"""A dict wrapper that records access patterns."""
|
| 149 |
-
|
| 150 |
-
def __init__(self, data, path, log):
|
| 151 |
-
super().__init__(data)
|
| 152 |
-
self._membrane_path = path
|
| 153 |
-
self._membrane_log = log
|
| 154 |
-
|
| 155 |
-
def __getitem__(self, key):
|
| 156 |
-
full_path = f"{self._membrane_path}.{key}"
|
| 157 |
-
value = super().__getitem__(key)
|
| 158 |
-
self._membrane_log.record("READ", full_path, _obj_size(value))
|
| 159 |
-
|
| 160 |
-
# Wrap nested containers so we track deep access
|
| 161 |
-
if isinstance(value, dict) and not isinstance(value, MembraneDict):
|
| 162 |
-
wrapped = MembraneDict(value, full_path, self._membrane_log)
|
| 163 |
-
super().__setitem__(key, wrapped)
|
| 164 |
-
return wrapped
|
| 165 |
-
if isinstance(value, list) and not isinstance(value, MembraneList):
|
| 166 |
-
wrapped = MembraneList(value, full_path, self._membrane_log)
|
| 167 |
-
super().__setitem__(key, wrapped)
|
| 168 |
-
return wrapped
|
| 169 |
-
|
| 170 |
-
return value
|
| 171 |
-
|
| 172 |
-
def __setitem__(self, key, value):
|
| 173 |
-
full_path = f"{self._membrane_path}.{key}"
|
| 174 |
-
self._membrane_log.record("WRITE", full_path, _obj_size(value))
|
| 175 |
-
super().__setitem__(key, value)
|
| 176 |
-
|
| 177 |
-
def get(self, key, default=None):
|
| 178 |
-
try:
|
| 179 |
-
return self.__getitem__(key)
|
| 180 |
-
except KeyError:
|
| 181 |
-
return default
|
| 182 |
-
|
| 183 |
-
def __repr__(self):
|
| 184 |
-
return f"MembraneDict({self._membrane_path}, {len(self)} keys)"
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
class MembraneList(list):
|
| 188 |
-
"""A list wrapper that records access patterns."""
|
| 189 |
-
|
| 190 |
-
def __init__(self, data, path, log):
|
| 191 |
-
super().__init__(data)
|
| 192 |
-
self._membrane_path = path
|
| 193 |
-
self._membrane_log = log
|
| 194 |
-
|
| 195 |
-
def __getitem__(self, index):
|
| 196 |
-
full_path = f"{self._membrane_path}[{index}]"
|
| 197 |
-
value = super().__getitem__(index)
|
| 198 |
-
self._membrane_log.record("READ", full_path, _obj_size(value))
|
| 199 |
-
|
| 200 |
-
if isinstance(value, dict) and not isinstance(value, MembraneDict):
|
| 201 |
-
wrapped = MembraneDict(value, full_path, self._membrane_log)
|
| 202 |
-
super().__setitem__(index, wrapped)
|
| 203 |
-
return wrapped
|
| 204 |
-
|
| 205 |
-
return value
|
| 206 |
-
|
| 207 |
-
def __setitem__(self, index, value):
|
| 208 |
-
full_path = f"{self._membrane_path}[{index}]"
|
| 209 |
-
self._membrane_log.record("WRITE", full_path, _obj_size(value))
|
| 210 |
-
super().__setitem__(index, value)
|
| 211 |
-
|
| 212 |
-
def __repr__(self):
|
| 213 |
-
return f"MembraneList({self._membrane_path}, {len(self)} items)"
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
class MembraneObject:
|
| 217 |
-
"""Wraps an arbitrary Python object to record attribute access."""
|
| 218 |
-
|
| 219 |
-
def __init__(self, obj, path, log):
|
| 220 |
-
object.__setattr__(self, '_membrane_obj', obj)
|
| 221 |
-
object.__setattr__(self, '_membrane_path', path)
|
| 222 |
-
object.__setattr__(self, '_membrane_log', log)
|
| 223 |
-
|
| 224 |
-
def __getattr__(self, name):
|
| 225 |
-
if name.startswith('_membrane_'):
|
| 226 |
-
return object.__getattribute__(self, name)
|
| 227 |
-
|
| 228 |
-
obj = object.__getattribute__(self, '_membrane_obj')
|
| 229 |
-
path = object.__getattribute__(self, '_membrane_path')
|
| 230 |
-
log = object.__getattribute__(self, '_membrane_log')
|
| 231 |
-
|
| 232 |
-
full_path = f"{path}.{name}"
|
| 233 |
-
value = getattr(obj, name)
|
| 234 |
-
log.record("READ", full_path, _obj_size(value))
|
| 235 |
-
|
| 236 |
-
# Wrap nested containers
|
| 237 |
-
if isinstance(value, dict) and not isinstance(value, MembraneDict):
|
| 238 |
-
return MembraneDict(value, full_path, log)
|
| 239 |
-
if isinstance(value, list) and not isinstance(value, MembraneList):
|
| 240 |
-
return MembraneList(value, full_path, log)
|
| 241 |
-
|
| 242 |
-
return value
|
| 243 |
-
|
| 244 |
-
def __setattr__(self, name, value):
|
| 245 |
-
if name.startswith('_membrane_'):
|
| 246 |
-
object.__setattr__(self, name, value)
|
| 247 |
-
return
|
| 248 |
-
|
| 249 |
-
obj = object.__getattribute__(self, '_membrane_obj')
|
| 250 |
-
path = object.__getattribute__(self, '_membrane_path')
|
| 251 |
-
log = object.__getattribute__(self, '_membrane_log')
|
| 252 |
-
|
| 253 |
-
full_path = f"{path}.{name}"
|
| 254 |
-
log.record("WRITE", full_path, _obj_size(value))
|
| 255 |
-
setattr(obj, name, value)
|
| 256 |
-
|
| 257 |
-
def __repr__(self):
|
| 258 |
-
obj = object.__getattribute__(self, '_membrane_obj')
|
| 259 |
-
path = object.__getattribute__(self, '_membrane_path')
|
| 260 |
-
return f"MembraneObject({path}, {type(obj).__name__})"
|
| 261 |
|
| 262 |
|
| 263 |
class Membrane:
|
| 264 |
-
"""
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
data = {"a": [1, 2, 3], "b": {"nested": True}}
|
| 268 |
-
wrapped = Membrane.wrap(data, "my_data")
|
| 269 |
-
x = wrapped["a"] # logged
|
| 270 |
-
y = wrapped["b"]["nested"] # both accesses logged
|
| 271 |
-
Membrane.print_stats()
|
| 272 |
-
"""
|
| 273 |
-
|
| 274 |
-
@staticmethod
|
| 275 |
-
def wrap(obj, name="root"):
|
| 276 |
-
"""Wrap an object for access tracking.
|
| 277 |
-
|
| 278 |
-
Args:
|
| 279 |
-
obj: Any Python object (dict, list, or arbitrary object)
|
| 280 |
-
name: Human-readable name for this object in the log
|
| 281 |
-
"""
|
| 282 |
-
if isinstance(obj, dict):
|
| 283 |
-
return MembraneDict(obj, name, _log)
|
| 284 |
-
elif isinstance(obj, list):
|
| 285 |
-
return MembraneList(obj, name, _log)
|
| 286 |
-
else:
|
| 287 |
-
return MembraneObject(obj, name, _log)
|
| 288 |
-
|
| 289 |
-
@staticmethod
|
| 290 |
-
def get_log():
|
| 291 |
-
"""Get the raw access log entries."""
|
| 292 |
-
return _log.entries
|
| 293 |
-
|
| 294 |
-
@staticmethod
|
| 295 |
-
def stats():
|
| 296 |
-
"""Get access statistics as a dict."""
|
| 297 |
-
return _log.stats()
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
_log.print_stats()
|
| 303 |
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
_log.clear()
|
| 308 |
|
| 309 |
-
@
|
| 310 |
-
def
|
| 311 |
-
|
| 312 |
-
return len(_log.entries)
|
| 313 |
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
""
|
| 317 |
-
import json
|
| 318 |
-
with open(filepath, 'w') as f:
|
| 319 |
-
json.dump({
|
| 320 |
-
"entries": _log.entries,
|
| 321 |
-
"stats": {
|
| 322 |
-
"total": len(_log.entries),
|
| 323 |
-
"unique_paths": len(set(e[2] for e in _log.entries)),
|
| 324 |
-
}
|
| 325 |
-
}, f, indent=2)
|
| 326 |
-
print(f" Saved {len(_log.entries)} entries to {filepath}")
|
|
|
|
| 1 |
+
"""Condensate Membrane — thin orchestration wrapper.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
The data path is Rust. This module provides the Python API
|
| 4 |
+
for starting, stopping, and monitoring Condensate.
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
+
import condensate_core
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class Membrane:
|
| 10 |
+
"""Orchestration wrapper. Data path is Rust."""
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self._active = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
def start(self):
|
| 15 |
+
"""Enable membrane observation."""
|
| 16 |
+
self._active = True
|
|
|
|
| 17 |
|
| 18 |
+
def stop(self):
|
| 19 |
+
"""Disable membrane."""
|
| 20 |
+
self._active = False
|
|
|
|
| 21 |
|
| 22 |
+
@property
|
| 23 |
+
def active(self):
|
| 24 |
+
return self._active
|
|
|
|
| 25 |
|
| 26 |
+
def status(self):
|
| 27 |
+
"""Return current membrane status."""
|
| 28 |
+
return {"active": self._active}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1,400 +1,21 @@
|
|
| 1 |
-
"""
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
Takes the graph from Layer 1 and predicts future memory accesses
|
| 5 |
-
based on what was just accessed. This is the proto-SNN — causal
|
| 6 |
-
spike propagation through learned topology.
|
| 7 |
-
|
| 8 |
-
No real SNN yet — this is a weighted graph walk that proves the
|
| 9 |
-
PRINCIPLE of causal prediction. The Rust/NeuroGraph SNN replaces
|
| 10 |
-
this with real spike dynamics later.
|
| 11 |
-
|
| 12 |
-
Usage:
|
| 13 |
-
from predictor import Predictor
|
| 14 |
-
|
| 15 |
-
predictor = Predictor()
|
| 16 |
-
predictor.learn(graph) # from GraphBuilder
|
| 17 |
-
|
| 18 |
-
# Live prediction
|
| 19 |
-
predictions = predictor.predict("model.layer_0.q")
|
| 20 |
-
# Returns: [("model.layer_0.k", 0.95, 0.02), ...]
|
| 21 |
-
# (path, confidence, expected_delta_ms)
|
| 22 |
-
|
| 23 |
-
# Score against actual access log
|
| 24 |
-
predictor.score(log_entries)
|
| 25 |
-
"""
|
| 26 |
-
|
| 27 |
-
import numpy as np
|
| 28 |
-
from collections import defaultdict
|
| 29 |
-
import time
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
class PredictionEntry:
|
| 33 |
-
"""A single prediction: what will be accessed, when, and how sure."""
|
| 34 |
-
|
| 35 |
-
__slots__ = ['path', 'confidence', 'expected_delta_ms', 'source_path',
|
| 36 |
-
'chain_depth']
|
| 37 |
-
|
| 38 |
-
def __init__(self, path, confidence, expected_delta_ms, source_path,
|
| 39 |
-
chain_depth=1):
|
| 40 |
-
self.path = path
|
| 41 |
-
self.confidence = confidence
|
| 42 |
-
self.expected_delta_ms = expected_delta_ms
|
| 43 |
-
self.source_path = source_path
|
| 44 |
-
self.chain_depth = chain_depth
|
| 45 |
-
|
| 46 |
-
def __repr__(self):
|
| 47 |
-
return (f"Predict({self.path}, conf={self.confidence:.2f}, "
|
| 48 |
-
f"Δt={self.expected_delta_ms:.2f}ms, depth={self.chain_depth})")
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
class SpikeChain:
|
| 52 |
-
"""A learned causal chain with timing.
|
| 53 |
-
Proto-SNN: spike enters at head, propagates through chain.
|
| 54 |
-
"""
|
| 55 |
-
|
| 56 |
-
def __init__(self, chain_id, links):
|
| 57 |
-
"""
|
| 58 |
-
Args:
|
| 59 |
-
chain_id: unique identifier
|
| 60 |
-
links: list of (path, delta_ms) tuples
|
| 61 |
-
first entry has delta_ms=0 (chain head)
|
| 62 |
-
"""
|
| 63 |
-
self.chain_id = chain_id
|
| 64 |
-
self.links = links # [(path, cumulative_delta_ms), ...]
|
| 65 |
-
self.hit_count = 0
|
| 66 |
-
self.miss_count = 0
|
| 67 |
-
|
| 68 |
-
@property
|
| 69 |
-
def accuracy(self):
|
| 70 |
-
total = self.hit_count + self.miss_count
|
| 71 |
-
return self.hit_count / total if total > 0 else 0.5
|
| 72 |
-
|
| 73 |
-
@property
|
| 74 |
-
def head(self):
|
| 75 |
-
return self.links[0][0] if self.links else None
|
| 76 |
-
|
| 77 |
-
def predictions_from(self, trigger_path):
|
| 78 |
-
"""If trigger_path is in this chain, return predictions for what follows."""
|
| 79 |
-
predictions = []
|
| 80 |
-
found = False
|
| 81 |
-
cumulative_ms = 0.0
|
| 82 |
-
|
| 83 |
-
for i, (path, delta_ms) in enumerate(self.links):
|
| 84 |
-
if found:
|
| 85 |
-
cumulative_ms += delta_ms
|
| 86 |
-
# Confidence decays with chain depth
|
| 87 |
-
depth = i - trigger_idx
|
| 88 |
-
confidence = self.accuracy * (0.9 ** depth)
|
| 89 |
-
predictions.append(PredictionEntry(
|
| 90 |
-
path=path,
|
| 91 |
-
confidence=confidence,
|
| 92 |
-
expected_delta_ms=cumulative_ms,
|
| 93 |
-
source_path=trigger_path,
|
| 94 |
-
chain_depth=depth,
|
| 95 |
-
))
|
| 96 |
-
elif path == trigger_path:
|
| 97 |
-
found = True
|
| 98 |
-
trigger_idx = i
|
| 99 |
-
cumulative_ms = 0.0
|
| 100 |
-
|
| 101 |
-
return predictions
|
| 102 |
|
| 103 |
|
| 104 |
class Predictor:
|
| 105 |
-
"""Predicts future memory accesses from learned access topology.
|
| 106 |
-
|
| 107 |
-
This is the proto-SNN. It learns:
|
| 108 |
-
1. Direct successors: A is usually followed by B (with timing)
|
| 109 |
-
2. Causal chains: A → B → C (multi-hop prediction)
|
| 110 |
-
3. Cluster co-activation: if any member of cluster X fires, all will
|
| 111 |
-
|
| 112 |
-
The real SNN (NeuroGraph) replaces this with spike propagation
|
| 113 |
-
through learned synapses. This proves the principle.
|
| 114 |
-
"""
|
| 115 |
-
|
| 116 |
def __init__(self):
|
| 117 |
-
|
| 118 |
-
self.successors = defaultdict(list)
|
| 119 |
-
|
| 120 |
-
# Learned chains
|
| 121 |
-
self.chains = []
|
| 122 |
-
|
| 123 |
-
# Cluster membership: path → cluster_id
|
| 124 |
-
self.cluster_map = {}
|
| 125 |
-
|
| 126 |
-
# Cluster members: cluster_id → set of paths
|
| 127 |
-
self.cluster_members = {}
|
| 128 |
-
|
| 129 |
-
# Statistics
|
| 130 |
-
self._total_predictions = 0
|
| 131 |
-
self._hits = 0
|
| 132 |
-
self._misses = 0
|
| 133 |
-
self._false_positives = 0
|
| 134 |
-
|
| 135 |
-
# Prediction window for scoring (ms)
|
| 136 |
-
self.score_window_ms = 10.0
|
| 137 |
-
|
| 138 |
-
self._learned = False
|
| 139 |
-
|
| 140 |
-
def learn(self, graph):
|
| 141 |
-
"""Learn prediction model from a GraphBuilder's output.
|
| 142 |
-
|
| 143 |
-
Args:
|
| 144 |
-
graph: a built GraphBuilder instance
|
| 145 |
-
"""
|
| 146 |
-
if not graph._built:
|
| 147 |
-
raise ValueError("Graph must be built first")
|
| 148 |
-
|
| 149 |
-
# 1. Learn direct successors from strong edges
|
| 150 |
-
max_weight = max((e.weight for e in graph.edges.values()), default=1.0)
|
| 151 |
-
|
| 152 |
-
for (src, tgt), edge in graph.edges.items():
|
| 153 |
-
if edge.weight < 1.0:
|
| 154 |
-
continue
|
| 155 |
-
norm_weight = edge.weight / max_weight
|
| 156 |
-
self.successors[src].append((
|
| 157 |
-
tgt,
|
| 158 |
-
norm_weight,
|
| 159 |
-
edge.mean_delta_ns / 1_000_000, # ns → ms
|
| 160 |
-
))
|
| 161 |
-
|
| 162 |
-
# Sort successors by weight descending
|
| 163 |
-
for path in self.successors:
|
| 164 |
-
self.successors[path].sort(key=lambda x: -x[1])
|
| 165 |
-
# Keep top 10 to avoid noise
|
| 166 |
-
self.successors[path] = self.successors[path][:10]
|
| 167 |
-
|
| 168 |
-
# 2. Learn chains
|
| 169 |
-
raw_chains = graph.get_causal_chains(min_weight=2.0)
|
| 170 |
-
for i, chain in enumerate(raw_chains):
|
| 171 |
-
spike_chain = SpikeChain(chain_id=i, links=chain)
|
| 172 |
-
self.chains.append(spike_chain)
|
| 173 |
-
|
| 174 |
-
# 3. Learn cluster membership
|
| 175 |
-
for cluster in graph.clusters:
|
| 176 |
-
cid = cluster.cluster_id
|
| 177 |
-
self.cluster_members[cid] = set(cluster.members)
|
| 178 |
-
for member in cluster.members:
|
| 179 |
-
self.cluster_map[member] = cid
|
| 180 |
-
|
| 181 |
-
self._learned = True
|
| 182 |
-
|
| 183 |
-
def predict(self, accessed_path, top_k=10):
|
| 184 |
-
"""Predict what will be accessed next, given that accessed_path was just accessed.
|
| 185 |
-
|
| 186 |
-
Returns list of PredictionEntry, sorted by confidence descending.
|
| 187 |
-
"""
|
| 188 |
-
if not self._learned:
|
| 189 |
-
return []
|
| 190 |
-
|
| 191 |
-
predictions = {} # path → best PredictionEntry
|
| 192 |
-
|
| 193 |
-
def _add(pred):
|
| 194 |
-
existing = predictions.get(pred.path)
|
| 195 |
-
if existing is None or pred.confidence > existing.confidence:
|
| 196 |
-
predictions[pred.path] = pred
|
| 197 |
-
|
| 198 |
-
# Source 1: Direct successors
|
| 199 |
-
for target, weight, delta_ms in self.successors.get(accessed_path, []):
|
| 200 |
-
_add(PredictionEntry(
|
| 201 |
-
path=target,
|
| 202 |
-
confidence=weight,
|
| 203 |
-
expected_delta_ms=delta_ms,
|
| 204 |
-
source_path=accessed_path,
|
| 205 |
-
chain_depth=1,
|
| 206 |
-
))
|
| 207 |
-
|
| 208 |
-
# Source 2: Chain propagation
|
| 209 |
-
for chain in self.chains:
|
| 210 |
-
chain_preds = chain.predictions_from(accessed_path)
|
| 211 |
-
for pred in chain_preds:
|
| 212 |
-
_add(pred)
|
| 213 |
-
|
| 214 |
-
# Source 3: Cluster co-activation
|
| 215 |
-
cluster_id = self.cluster_map.get(accessed_path)
|
| 216 |
-
if cluster_id is not None:
|
| 217 |
-
members = self.cluster_members[cluster_id]
|
| 218 |
-
for member in members:
|
| 219 |
-
if member != accessed_path:
|
| 220 |
-
_add(PredictionEntry(
|
| 221 |
-
path=member,
|
| 222 |
-
confidence=0.85, # high confidence for cluster members
|
| 223 |
-
expected_delta_ms=0.1, # near-immediate
|
| 224 |
-
source_path=accessed_path,
|
| 225 |
-
chain_depth=1,
|
| 226 |
-
))
|
| 227 |
-
|
| 228 |
-
# Sort by confidence, return top_k
|
| 229 |
-
result = sorted(predictions.values(), key=lambda p: -p.confidence)
|
| 230 |
-
return result[:top_k]
|
| 231 |
-
|
| 232 |
-
def score(self, log_entries, verbose=False):
|
| 233 |
-
"""Score prediction accuracy against an actual access log.
|
| 234 |
-
|
| 235 |
-
For each access in the log:
|
| 236 |
-
1. Generate predictions based on current access
|
| 237 |
-
2. Check if the NEXT access was predicted
|
| 238 |
-
3. Track hit/miss rates
|
| 239 |
-
|
| 240 |
-
Returns dict with accuracy metrics.
|
| 241 |
-
"""
|
| 242 |
-
if not self._learned:
|
| 243 |
-
return {"error": "Not learned yet"}
|
| 244 |
-
|
| 245 |
-
sorted_log = sorted(log_entries, key=lambda e: e[0])
|
| 246 |
-
|
| 247 |
-
hits = 0
|
| 248 |
-
misses = 0
|
| 249 |
-
predictions_made = 0
|
| 250 |
-
chain_hits = 0
|
| 251 |
-
cluster_hits = 0
|
| 252 |
-
direct_hits = 0
|
| 253 |
-
timing_errors_ms = []
|
| 254 |
-
hit_details = []
|
| 255 |
-
|
| 256 |
-
window_ns = self.score_window_ms * 1_000_000
|
| 257 |
-
|
| 258 |
-
for i in range(len(sorted_log) - 1):
|
| 259 |
-
ts_i, _, path_i, _ = sorted_log[i]
|
| 260 |
-
|
| 261 |
-
# Generate predictions for what comes after path_i
|
| 262 |
-
preds = self.predict(path_i)
|
| 263 |
-
if not preds:
|
| 264 |
-
continue
|
| 265 |
-
|
| 266 |
-
predictions_made += 1
|
| 267 |
-
predicted_paths = {p.path: p for p in preds}
|
| 268 |
-
|
| 269 |
-
# Check what actually came next (within scoring window)
|
| 270 |
-
hit = False
|
| 271 |
-
for j in range(i + 1, len(sorted_log)):
|
| 272 |
-
ts_j, _, path_j, _ = sorted_log[j]
|
| 273 |
-
delta_ns = ts_j - ts_i
|
| 274 |
-
|
| 275 |
-
if delta_ns > window_ns:
|
| 276 |
-
break
|
| 277 |
-
|
| 278 |
-
if path_j in predicted_paths:
|
| 279 |
-
hit = True
|
| 280 |
-
pred = predicted_paths[path_j]
|
| 281 |
-
|
| 282 |
-
# Track timing accuracy
|
| 283 |
-
actual_delta_ms = delta_ns / 1_000_000
|
| 284 |
-
timing_error = abs(actual_delta_ms - pred.expected_delta_ms)
|
| 285 |
-
timing_errors_ms.append(timing_error)
|
| 286 |
-
|
| 287 |
-
# Track prediction source
|
| 288 |
-
if pred.chain_depth > 1:
|
| 289 |
-
chain_hits += 1
|
| 290 |
-
elif pred.path in self.cluster_map:
|
| 291 |
-
cluster_hits += 1
|
| 292 |
-
else:
|
| 293 |
-
direct_hits += 1
|
| 294 |
-
|
| 295 |
-
if verbose and len(hit_details) < 20:
|
| 296 |
-
hit_details.append({
|
| 297 |
-
"trigger": path_i,
|
| 298 |
-
"predicted": path_j,
|
| 299 |
-
"confidence": pred.confidence,
|
| 300 |
-
"expected_ms": pred.expected_delta_ms,
|
| 301 |
-
"actual_ms": actual_delta_ms,
|
| 302 |
-
"depth": pred.chain_depth,
|
| 303 |
-
})
|
| 304 |
-
|
| 305 |
-
break # count first hit only
|
| 306 |
-
|
| 307 |
-
if hit:
|
| 308 |
-
hits += 1
|
| 309 |
-
else:
|
| 310 |
-
misses += 1
|
| 311 |
-
|
| 312 |
-
# Update running stats
|
| 313 |
-
self._total_predictions += predictions_made
|
| 314 |
-
self._hits += hits
|
| 315 |
-
self._misses += misses
|
| 316 |
-
|
| 317 |
-
accuracy = hits / predictions_made if predictions_made > 0 else 0.0
|
| 318 |
-
mean_timing_error = (np.mean(timing_errors_ms)
|
| 319 |
-
if timing_errors_ms else float('nan'))
|
| 320 |
-
|
| 321 |
-
result = {
|
| 322 |
-
"predictions_made": predictions_made,
|
| 323 |
-
"hits": hits,
|
| 324 |
-
"misses": misses,
|
| 325 |
-
"accuracy": round(accuracy * 100, 1),
|
| 326 |
-
"direct_hits": direct_hits,
|
| 327 |
-
"chain_hits": chain_hits,
|
| 328 |
-
"cluster_hits": cluster_hits,
|
| 329 |
-
"mean_timing_error_ms": round(mean_timing_error, 3),
|
| 330 |
-
"hit_details": hit_details if verbose else [],
|
| 331 |
-
}
|
| 332 |
-
|
| 333 |
-
return result
|
| 334 |
-
|
| 335 |
-
def print_score(self, log_entries, verbose=False):
|
| 336 |
-
"""Score and print results."""
|
| 337 |
-
result = self.score(log_entries, verbose=verbose)
|
| 338 |
-
|
| 339 |
-
print(f"\n{'='*60}")
|
| 340 |
-
print(f" CONDENSATE — Layer 2 Prediction Score")
|
| 341 |
-
print(f"{'='*60}")
|
| 342 |
-
print(f" Predictions made: {result['predictions_made']}")
|
| 343 |
-
print(f" Hits: {result['hits']}")
|
| 344 |
-
print(f" Misses: {result['misses']}")
|
| 345 |
-
print(f" Accuracy: {result['accuracy']}%")
|
| 346 |
-
print(f"")
|
| 347 |
-
print(f" Hit breakdown:")
|
| 348 |
-
print(f" Direct successor: {result['direct_hits']}")
|
| 349 |
-
print(f" Chain propagation: {result['chain_hits']}")
|
| 350 |
-
print(f" Cluster co-access: {result['cluster_hits']}")
|
| 351 |
-
print(f"")
|
| 352 |
-
print(f" Timing precision:")
|
| 353 |
-
print(f" Mean error: {result['mean_timing_error_ms']:.3f} ms")
|
| 354 |
-
|
| 355 |
-
if result.get("hit_details"):
|
| 356 |
-
print(f"\n Sample hits:")
|
| 357 |
-
for h in result["hit_details"][:10]:
|
| 358 |
-
trig = h['trigger'].split('.')[-1]
|
| 359 |
-
pred = h['predicted'].split('.')[-1]
|
| 360 |
-
print(f" {trig:<15} → {pred:<15} "
|
| 361 |
-
f"conf={h['confidence']:.2f} "
|
| 362 |
-
f"Δt={h['actual_ms']:.2f}ms "
|
| 363 |
-
f"(predicted {h['expected_ms']:.2f}ms)")
|
| 364 |
-
|
| 365 |
-
print(f"{'='*60}\n")
|
| 366 |
-
|
| 367 |
-
return result
|
| 368 |
-
|
| 369 |
-
def print_model(self):
|
| 370 |
-
"""Print what the predictor learned."""
|
| 371 |
-
print(f"\n{'='*60}")
|
| 372 |
-
print(f" CONDENSATE — Layer 2 Learned Model")
|
| 373 |
-
print(f"{'='*60}")
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
short = path if len(path) <= 30 else "..." + path[-27:]
|
| 380 |
-
print(f" {short:<30} → {len(succs)} targets")
|
| 381 |
-
for target, weight, delta in succs[:3]:
|
| 382 |
-
t_short = target.split(".")[-1]
|
| 383 |
-
print(f" → {t_short:<20} w={weight:.2f} Δt={delta:.2f}ms")
|
| 384 |
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
parts = [p.split(".")[-1] for p, _ in chain.links]
|
| 388 |
-
print(f" Chain {chain.chain_id}: {' → '.join(parts[:6])}"
|
| 389 |
-
+ (" → ..." if len(parts) > 6 else ""))
|
| 390 |
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
short_members = [m.split(".")[-1] for m in sorted(members)]
|
| 394 |
-
if len(short_members) > 6:
|
| 395 |
-
display = ", ".join(short_members[:6]) + f" +{len(short_members)-6}"
|
| 396 |
-
else:
|
| 397 |
-
display = ", ".join(short_members)
|
| 398 |
-
print(f" Cluster {cid}: {{{display}}}")
|
| 399 |
|
| 400 |
-
|
|
|
|
|
|
| 1 |
+
"""Condensate Predictor — delegates to Rust RustPredictor."""
|
| 2 |
+
import condensate_core
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
class Predictor:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def __init__(self):
|
| 7 |
+
self._predictor = condensate_core.RustPredictor()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
def learn(self, graph_builder):
|
| 10 |
+
"""Learn from a GraphBuilder's inner AccessGraph."""
|
| 11 |
+
graph = graph_builder.inner if hasattr(graph_builder, 'inner') else graph_builder
|
| 12 |
+
self._predictor.learn(graph)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
def predict(self, path, top_k=10):
|
| 15 |
+
return self._predictor.predict(path, top_k)
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
def score(self, events):
|
| 18 |
+
return self._predictor.score(events)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
def is_learned(self):
|
| 21 |
+
return self._predictor.is_learned()
|
|
@@ -1,7 +1,7 @@
|
|
| 1 |
[package]
|
| 2 |
name = "condensate_core"
|
| 3 |
version = "0.1.0"
|
| 4 |
-
edition = "
|
| 5 |
description = "Living memory manager — Rust core with PyO3 bindings + LD_PRELOAD membrane"
|
| 6 |
license = "AGPL-3.0"
|
| 7 |
|
|
@@ -21,6 +21,7 @@ libc = "0.2"
|
|
| 21 |
[features]
|
| 22 |
default = ["python"]
|
| 23 |
python = ["pyo3"]
|
|
|
|
| 24 |
|
| 25 |
[profile.release]
|
| 26 |
opt-level = 3
|
|
|
|
| 1 |
[package]
|
| 2 |
name = "condensate_core"
|
| 3 |
version = "0.1.0"
|
| 4 |
+
edition = "2021"
|
| 5 |
description = "Living memory manager — Rust core with PyO3 bindings + LD_PRELOAD membrane"
|
| 6 |
license = "AGPL-3.0"
|
| 7 |
|
|
|
|
| 21 |
[features]
|
| 22 |
default = ["python"]
|
| 23 |
python = ["pyo3"]
|
| 24 |
+
preload = []
|
| 25 |
|
| 26 |
[profile.release]
|
| 27 |
opt-level = 3
|
|
@@ -7,7 +7,7 @@
|
|
| 7 |
//! Three tiers:
|
| 8 |
//! HOT: Untouched, full speed access
|
| 9 |
//! WARM: LZ4 compressed in-place, fast decompress on access
|
| 10 |
-
//! COLD: Backed by
|
| 11 |
//!
|
| 12 |
//! The condenser runs as a background thread, periodically scanning
|
| 13 |
//! the membrane's tracked allocations and demoting idle ones.
|
|
@@ -15,11 +15,16 @@
|
|
| 15 |
//! accessed"), the condenser pre-promotes it.
|
| 16 |
|
| 17 |
use std::collections::HashMap;
|
| 18 |
-
use std::
|
|
|
|
|
|
|
| 19 |
use std::time::Instant;
|
| 20 |
|
| 21 |
use crate::membrane::{MembraneState, MembraneSummary};
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
/// Tier state for a managed memory region
|
| 24 |
#[derive(Clone, Debug, PartialEq)]
|
| 25 |
pub enum Tier {
|
|
@@ -30,9 +35,9 @@ pub enum Tier {
|
|
| 30 |
compressed: Vec<u8>,
|
| 31 |
original_size: usize,
|
| 32 |
},
|
| 33 |
-
///
|
| 34 |
Cold {
|
| 35 |
-
|
| 36 |
original_size: usize,
|
| 37 |
},
|
| 38 |
}
|
|
@@ -48,6 +53,10 @@ pub struct ManagedRegion {
|
|
| 48 |
pub promotions: u32,
|
| 49 |
pub demotions: u32,
|
| 50 |
pub prediction_hits: u32,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
impl ManagedRegion {
|
|
@@ -61,6 +70,7 @@ impl ManagedRegion {
|
|
| 61 |
promotions: 0,
|
| 62 |
demotions: 0,
|
| 63 |
prediction_hits: 0,
|
|
|
|
| 64 |
}
|
| 65 |
}
|
| 66 |
|
|
@@ -130,6 +140,10 @@ pub struct CondenserConfig {
|
|
| 130 |
pub max_tracked: usize,
|
| 131 |
/// How often the scan loop runs (ns)
|
| 132 |
pub scan_interval_ns: u64,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
}
|
| 134 |
|
| 135 |
impl Default for CondenserConfig {
|
|
@@ -139,6 +153,7 @@ impl Default for CondenserConfig {
|
|
| 139 |
min_manage_size: 65_536, // 64KB minimum
|
| 140 |
max_tracked: 10_000,
|
| 141 |
scan_interval_ns: 1_000_000_000, // 1 second
|
|
|
|
| 142 |
}
|
| 143 |
}
|
| 144 |
}
|
|
@@ -156,10 +171,13 @@ pub struct Condenser {
|
|
| 156 |
total_bytes_saved: u64,
|
| 157 |
peak_bytes_saved: u64,
|
| 158 |
scan_count: u64,
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
impl Condenser {
|
| 162 |
pub fn new(config: CondenserConfig) -> Self {
|
|
|
|
| 163 |
Self {
|
| 164 |
config,
|
| 165 |
regions: HashMap::with_capacity(1000),
|
|
@@ -169,6 +187,7 @@ impl Condenser {
|
|
| 169 |
total_bytes_saved: 0,
|
| 170 |
peak_bytes_saved: 0,
|
| 171 |
scan_count: 0,
|
|
|
|
| 172 |
}
|
| 173 |
}
|
| 174 |
|
|
@@ -210,22 +229,126 @@ impl Condenser {
|
|
| 210 |
}
|
| 211 |
}
|
| 212 |
|
| 213 |
-
/// Pre-promote a region (prediction-driven)
|
|
|
|
|
|
|
| 214 |
pub fn pre_promote(&mut self, address: usize) {
|
| 215 |
if let Some(region) = self.regions.get_mut(&address) {
|
| 216 |
if !region.is_hot() {
|
| 217 |
-
// In a real implementation, this would decompress
|
| 218 |
-
// and write back to the original address.
|
| 219 |
-
// For the PoC, we track that the prediction fired.
|
| 220 |
region.prediction_hits += 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
region.tier = Tier::Hot;
|
| 222 |
region.promotions += 1;
|
| 223 |
self.total_decompressed += 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
}
|
| 225 |
}
|
| 226 |
}
|
| 227 |
|
| 228 |
-
/// Scan for idle regions and compress them
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
/// Returns (regions_compressed, bytes_saved)
|
| 230 |
pub fn scan_and_compress(&mut self) -> (u32, u64) {
|
| 231 |
let now = self.elapsed_ns();
|
|
@@ -240,18 +363,29 @@ impl Condenser {
|
|
| 240 |
.filter(|(_, r)| {
|
| 241 |
r.is_hot() &&
|
| 242 |
r.size >= self.config.min_manage_size &&
|
|
|
|
| 243 |
now - r.last_access_ns > threshold
|
| 244 |
})
|
| 245 |
.map(|(&addr, _)| addr)
|
| 246 |
.collect();
|
| 247 |
|
| 248 |
for addr in to_compress {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
if let Some(region) = self.regions.get_mut(&addr) {
|
| 250 |
-
|
| 251 |
-
// the actual memory address. For now, simulate with
|
| 252 |
-
// a zero-filled buffer (shows compression mechanics).
|
| 253 |
-
let fake_data = vec![0u8; region.size];
|
| 254 |
-
let saved = region.compress(&fake_data);
|
| 255 |
|
| 256 |
if saved > 0 {
|
| 257 |
compressed_count += 1;
|
|
@@ -369,9 +503,22 @@ impl CondenserSummary {
|
|
| 369 |
mod tests {
|
| 370 |
use super::*;
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
#[test]
|
| 373 |
fn test_register_and_touch() {
|
| 374 |
-
let mut c = Condenser::new(CondenserConfig
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
c.register(0x10000, 100_000);
|
| 377 |
c.register(0x20000, 200_000);
|
|
@@ -404,6 +551,7 @@ mod tests {
|
|
| 404 |
let mut c = Condenser::new(CondenserConfig {
|
| 405 |
idle_threshold_ns: 0, // compress immediately
|
| 406 |
min_manage_size: 1024,
|
|
|
|
| 407 |
..Default::default()
|
| 408 |
});
|
| 409 |
|
|
@@ -425,6 +573,7 @@ mod tests {
|
|
| 425 |
let mut c = Condenser::new(CondenserConfig {
|
| 426 |
idle_threshold_ns: 0,
|
| 427 |
min_manage_size: 1024,
|
|
|
|
| 428 |
..Default::default()
|
| 429 |
});
|
| 430 |
|
|
@@ -443,6 +592,7 @@ mod tests {
|
|
| 443 |
let mut c = Condenser::new(CondenserConfig {
|
| 444 |
idle_threshold_ns: 0,
|
| 445 |
min_manage_size: 1024,
|
|
|
|
| 446 |
..Default::default()
|
| 447 |
});
|
| 448 |
|
|
@@ -465,4 +615,154 @@ mod tests {
|
|
| 465 |
assert_eq!(summary.total_regions, 3);
|
| 466 |
assert!(summary.total_compressions >= 2);
|
| 467 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
}
|
|
|
|
| 7 |
//! Three tiers:
|
| 8 |
//! HOT: Untouched, full speed access
|
| 9 |
//! WARM: LZ4 compressed in-place, fast decompress on access
|
| 10 |
+
//! COLD: Backed by disk file, zero RSS until touched
|
| 11 |
//!
|
| 12 |
//! The condenser runs as a background thread, periodically scanning
|
| 13 |
//! the membrane's tracked allocations and demoting idle ones.
|
|
|
|
| 15 |
//! accessed"), the condenser pre-promotes it.
|
| 16 |
|
| 17 |
use std::collections::HashMap;
|
| 18 |
+
use std::fs;
|
| 19 |
+
use std::io::{Read as IoRead, Write as IoWrite};
|
| 20 |
+
use std::path::Path;
|
| 21 |
use std::time::Instant;
|
| 22 |
|
| 23 |
use crate::membrane::{MembraneState, MembraneSummary};
|
| 24 |
|
| 25 |
+
const PAGE_SIZE: usize = 4096;
|
| 26 |
+
const COLD_DIR: &str = "/tmp/condensate_cold";
|
| 27 |
+
|
| 28 |
/// Tier state for a managed memory region
|
| 29 |
#[derive(Clone, Debug, PartialEq)]
|
| 30 |
pub enum Tier {
|
|
|
|
| 35 |
compressed: Vec<u8>,
|
| 36 |
original_size: usize,
|
| 37 |
},
|
| 38 |
+
/// Compressed bytes written to disk, in-memory buffer freed
|
| 39 |
Cold {
|
| 40 |
+
file_path: String,
|
| 41 |
original_size: usize,
|
| 42 |
},
|
| 43 |
}
|
|
|
|
| 53 |
pub promotions: u32,
|
| 54 |
pub demotions: u32,
|
| 55 |
pub prediction_hits: u32,
|
| 56 |
+
/// Optional data override used in tests to inject specific byte patterns
|
| 57 |
+
/// without needing a real allocation. Only consulted by read_region_data
|
| 58 |
+
/// when present; ignored in production.
|
| 59 |
+
pub test_data: Option<Vec<u8>>,
|
| 60 |
}
|
| 61 |
|
| 62 |
impl ManagedRegion {
|
|
|
|
| 70 |
promotions: 0,
|
| 71 |
demotions: 0,
|
| 72 |
prediction_hits: 0,
|
| 73 |
+
test_data: None,
|
| 74 |
}
|
| 75 |
}
|
| 76 |
|
|
|
|
| 140 |
pub max_tracked: usize,
|
| 141 |
/// How often the scan loop runs (ns)
|
| 142 |
pub scan_interval_ns: u64,
|
| 143 |
+
/// When true, compress/decompress uses data stored in the Warm tier
|
| 144 |
+
/// directly rather than reading from raw memory addresses. Enables
|
| 145 |
+
/// testing without real allocations.
|
| 146 |
+
pub test_mode: bool,
|
| 147 |
}
|
| 148 |
|
| 149 |
impl Default for CondenserConfig {
|
|
|
|
| 153 |
min_manage_size: 65_536, // 64KB minimum
|
| 154 |
max_tracked: 10_000,
|
| 155 |
scan_interval_ns: 1_000_000_000, // 1 second
|
| 156 |
+
test_mode: false,
|
| 157 |
}
|
| 158 |
}
|
| 159 |
}
|
|
|
|
| 171 |
total_bytes_saved: u64,
|
| 172 |
peak_bytes_saved: u64,
|
| 173 |
scan_count: u64,
|
| 174 |
+
/// When true, use test-safe data paths (no raw pointer reads/writes)
|
| 175 |
+
test_mode: bool,
|
| 176 |
}
|
| 177 |
|
| 178 |
impl Condenser {
|
| 179 |
pub fn new(config: CondenserConfig) -> Self {
|
| 180 |
+
let test_mode = config.test_mode;
|
| 181 |
Self {
|
| 182 |
config,
|
| 183 |
regions: HashMap::with_capacity(1000),
|
|
|
|
| 187 |
total_bytes_saved: 0,
|
| 188 |
peak_bytes_saved: 0,
|
| 189 |
scan_count: 0,
|
| 190 |
+
test_mode,
|
| 191 |
}
|
| 192 |
}
|
| 193 |
|
|
|
|
| 229 |
}
|
| 230 |
}
|
| 231 |
|
| 232 |
+
/// Pre-promote a region (prediction-driven).
|
| 233 |
+
/// Decompresses the region and, when not in test_mode, writes the
|
| 234 |
+
/// decompressed bytes back to the original address.
|
| 235 |
pub fn pre_promote(&mut self, address: usize) {
|
| 236 |
if let Some(region) = self.regions.get_mut(&address) {
|
| 237 |
if !region.is_hot() {
|
|
|
|
|
|
|
|
|
|
| 238 |
region.prediction_hits += 1;
|
| 239 |
+
|
| 240 |
+
if let Some(decompressed) = region.decompress() {
|
| 241 |
+
// decompress() already set tier → Hot and bumped promotions.
|
| 242 |
+
if !self.test_mode {
|
| 243 |
+
// SAFETY: The caller guarantees `address` points to a live
|
| 244 |
+
// allocation of at least `decompressed.len()` bytes that we
|
| 245 |
+
// originally registered and compressed. We are restoring the
|
| 246 |
+
// original contents before the application touches it again.
|
| 247 |
+
unsafe {
|
| 248 |
+
std::ptr::copy_nonoverlapping(
|
| 249 |
+
decompressed.as_ptr(),
|
| 250 |
+
address as *mut u8,
|
| 251 |
+
decompressed.len(),
|
| 252 |
+
);
|
| 253 |
+
}
|
| 254 |
+
}
|
| 255 |
+
} else {
|
| 256 |
+
// Fallback: force to Hot even if decompress failed
|
| 257 |
+
region.tier = Tier::Hot;
|
| 258 |
+
region.promotions += 1;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
self.total_decompressed += 1;
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
/// Demote a WARM region to COLD by writing its compressed bytes to disk.
|
| 267 |
+
/// Creates `/tmp/condensate_cold/` if it does not exist.
|
| 268 |
+
pub fn demote_to_cold(&mut self, address: usize) {
|
| 269 |
+
if let Some(region) = self.regions.get_mut(&address) {
|
| 270 |
+
if let Tier::Warm { ref compressed, original_size } = region.tier.clone() {
|
| 271 |
+
// Ensure the cold directory exists
|
| 272 |
+
fs::create_dir_all(COLD_DIR)
|
| 273 |
+
.expect("condensate: failed to create cold storage directory");
|
| 274 |
+
|
| 275 |
+
let file_path = format!("{}/{}.bin", COLD_DIR, address);
|
| 276 |
+
|
| 277 |
+
fs::write(&file_path, compressed)
|
| 278 |
+
.expect("condensate: failed to write cold file");
|
| 279 |
+
|
| 280 |
+
region.tier = Tier::Cold { file_path, original_size };
|
| 281 |
+
region.demotions += 1;
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
/// Promote a COLD region back to HOT.
|
| 287 |
+
/// Reads compressed bytes from disk, LZ4-decompresses them, deletes the
|
| 288 |
+
/// file, and sets the tier back to Hot.
|
| 289 |
+
/// Returns the decompressed data, or None if the region is not Cold.
|
| 290 |
+
pub fn promote_from_cold(&mut self, address: usize) -> Option<Vec<u8>> {
|
| 291 |
+
if let Some(region) = self.regions.get_mut(&address) {
|
| 292 |
+
if let Tier::Cold { ref file_path, .. } = region.tier.clone() {
|
| 293 |
+
let compressed = fs::read(&file_path)
|
| 294 |
+
.expect("condensate: failed to read cold file");
|
| 295 |
+
|
| 296 |
+
let decompressed = lz4_flex::decompress_size_prepended(&compressed)
|
| 297 |
+
.expect("condensate: failed to decompress cold data");
|
| 298 |
+
|
| 299 |
+
// Delete the backing file
|
| 300 |
+
let _ = fs::remove_file(&file_path);
|
| 301 |
+
|
| 302 |
region.tier = Tier::Hot;
|
| 303 |
region.promotions += 1;
|
| 304 |
self.total_decompressed += 1;
|
| 305 |
+
|
| 306 |
+
return Some(decompressed);
|
| 307 |
+
}
|
| 308 |
+
}
|
| 309 |
+
None
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
/// Build the data buffer used during scan compression.
|
| 313 |
+
///
|
| 314 |
+
/// Priority order:
|
| 315 |
+
/// 1. If the region has a `test_data` override, use that.
|
| 316 |
+
/// 2. If in `test_mode`, generate a deterministic repeating pattern from
|
| 317 |
+
/// the address bytes — compressible, safe, no real allocation needed.
|
| 318 |
+
/// 3. In production: read directly from the live allocation.
|
| 319 |
+
fn read_region_data(&self, address: usize, size: usize) -> Vec<u8> {
|
| 320 |
+
// Test-data override takes precedence (injected by tests for specific patterns)
|
| 321 |
+
if let Some(region) = self.regions.get(&address) {
|
| 322 |
+
if let Some(ref data) = region.test_data {
|
| 323 |
+
return data.clone();
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
if self.test_mode {
|
| 328 |
+
// Deterministic repeating pattern from the address bytes — compressible
|
| 329 |
+
let addr_bytes = address.to_le_bytes();
|
| 330 |
+
let mut buf = Vec::with_capacity(size);
|
| 331 |
+
for i in 0..size {
|
| 332 |
+
buf.push(addr_bytes[i % addr_bytes.len()]);
|
| 333 |
+
}
|
| 334 |
+
buf
|
| 335 |
+
} else {
|
| 336 |
+
// SAFETY: The caller (register) has verified that `address` is a live
|
| 337 |
+
// allocation of exactly `size` bytes tracked by this condenser. We hold
|
| 338 |
+
// a shared reference to this data only for the duration of this call and
|
| 339 |
+
// do not alias the slice with any mutable reference.
|
| 340 |
+
unsafe {
|
| 341 |
+
std::slice::from_raw_parts(address as *const u8, size).to_vec()
|
| 342 |
}
|
| 343 |
}
|
| 344 |
}
|
| 345 |
|
| 346 |
+
/// Scan for idle regions and compress them.
|
| 347 |
+
///
|
| 348 |
+
/// Guards applied per region before compression:
|
| 349 |
+
/// 1. Skip regions smaller than PAGE_SIZE (4096 bytes) — not worth it.
|
| 350 |
+
/// 2. Skip if compressed_size > original_size * 0.9 — less than 10% savings.
|
| 351 |
+
///
|
| 352 |
/// Returns (regions_compressed, bytes_saved)
|
| 353 |
pub fn scan_and_compress(&mut self) -> (u32, u64) {
|
| 354 |
let now = self.elapsed_ns();
|
|
|
|
| 363 |
.filter(|(_, r)| {
|
| 364 |
r.is_hot() &&
|
| 365 |
r.size >= self.config.min_manage_size &&
|
| 366 |
+
r.size >= PAGE_SIZE && // minimum page size guard
|
| 367 |
now - r.last_access_ns > threshold
|
| 368 |
})
|
| 369 |
.map(|(&addr, _)| addr)
|
| 370 |
.collect();
|
| 371 |
|
| 372 |
for addr in to_compress {
|
| 373 |
+
let size = match self.regions.get(&addr) {
|
| 374 |
+
Some(r) => r.size,
|
| 375 |
+
None => continue,
|
| 376 |
+
};
|
| 377 |
+
|
| 378 |
+
let data = self.read_region_data(addr, size);
|
| 379 |
+
|
| 380 |
+
// Compression ratio guard: pre-check before promoting to Warm
|
| 381 |
+
let candidate = lz4_flex::compress_prepend_size(&data);
|
| 382 |
+
if candidate.len() > (data.len() as f64 * 0.9) as usize {
|
| 383 |
+
// Less than 10% savings — skip this region
|
| 384 |
+
continue;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
if let Some(region) = self.regions.get_mut(&addr) {
|
| 388 |
+
let saved = region.compress(&data);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
if saved > 0 {
|
| 391 |
compressed_count += 1;
|
|
|
|
| 503 |
mod tests {
|
| 504 |
use super::*;
|
| 505 |
|
| 506 |
+
/// Helper: Condenser in test_mode with immediate idle threshold
|
| 507 |
+
fn test_condenser() -> Condenser {
|
| 508 |
+
Condenser::new(CondenserConfig {
|
| 509 |
+
idle_threshold_ns: 0,
|
| 510 |
+
min_manage_size: 1024,
|
| 511 |
+
test_mode: true,
|
| 512 |
+
..Default::default()
|
| 513 |
+
})
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
#[test]
|
| 517 |
fn test_register_and_touch() {
|
| 518 |
+
let mut c = Condenser::new(CondenserConfig {
|
| 519 |
+
test_mode: true,
|
| 520 |
+
..Default::default()
|
| 521 |
+
});
|
| 522 |
|
| 523 |
c.register(0x10000, 100_000);
|
| 524 |
c.register(0x20000, 200_000);
|
|
|
|
| 551 |
let mut c = Condenser::new(CondenserConfig {
|
| 552 |
idle_threshold_ns: 0, // compress immediately
|
| 553 |
min_manage_size: 1024,
|
| 554 |
+
test_mode: true,
|
| 555 |
..Default::default()
|
| 556 |
});
|
| 557 |
|
|
|
|
| 573 |
let mut c = Condenser::new(CondenserConfig {
|
| 574 |
idle_threshold_ns: 0,
|
| 575 |
min_manage_size: 1024,
|
| 576 |
+
test_mode: true,
|
| 577 |
..Default::default()
|
| 578 |
});
|
| 579 |
|
|
|
|
| 592 |
let mut c = Condenser::new(CondenserConfig {
|
| 593 |
idle_threshold_ns: 0,
|
| 594 |
min_manage_size: 1024,
|
| 595 |
+
test_mode: true,
|
| 596 |
..Default::default()
|
| 597 |
});
|
| 598 |
|
|
|
|
| 615 |
assert_eq!(summary.total_regions, 3);
|
| 616 |
assert!(summary.total_compressions >= 2);
|
| 617 |
}
|
| 618 |
+
|
| 619 |
+
// -----------------------------------------------------------------
|
| 620 |
+
// New tests for Block B
|
| 621 |
+
// -----------------------------------------------------------------
|
| 622 |
+
|
| 623 |
+
#[test]
|
| 624 |
+
fn test_minimum_page_size_guard() {
|
| 625 |
+
// Region of 100 bytes is below PAGE_SIZE (4096); scan must skip it.
|
| 626 |
+
// We need min_manage_size lower than PAGE_SIZE to let it register,
|
| 627 |
+
// but the scan-time guard should still block compression.
|
| 628 |
+
let mut c = Condenser::new(CondenserConfig {
|
| 629 |
+
idle_threshold_ns: 0,
|
| 630 |
+
min_manage_size: 64, // low enough to register the 100-byte region
|
| 631 |
+
test_mode: true,
|
| 632 |
+
..Default::default()
|
| 633 |
+
});
|
| 634 |
+
|
| 635 |
+
c.register(0xABCD0, 100);
|
| 636 |
+
assert_eq!(c.regions.len(), 1, "Region should be registered");
|
| 637 |
+
|
| 638 |
+
let (count, _saved) = c.scan_and_compress();
|
| 639 |
+
assert_eq!(count, 0, "Scan should skip the sub-page-size region");
|
| 640 |
+
assert!(c.regions[&0xABCD0].is_hot(), "Region should remain Hot");
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
+
#[test]
|
| 644 |
+
fn test_compression_ratio_guard() {
|
| 645 |
+
// The ratio guard in scan_and_compress skips a region if
|
| 646 |
+
// compressed_size > original_size * 0.9 (less than 10% savings).
|
| 647 |
+
//
|
| 648 |
+
// We test both sides:
|
| 649 |
+
// 1. Compressible data passes the guard → region becomes Warm.
|
| 650 |
+
// 2. Incompressible data is skipped → region stays Hot.
|
| 651 |
+
//
|
| 652 |
+
// We use ManagedRegion::test_data injection to control exactly what
|
| 653 |
+
// bytes each region presents to the scan, without needing real addresses.
|
| 654 |
+
|
| 655 |
+
// --- Happy path: zero-filled buffer compresses extremely well ---
|
| 656 |
+
let mut c = test_condenser();
|
| 657 |
+
let compressible = vec![0u8; 65_536];
|
| 658 |
+
c.register(0xC0000usize, 65_536);
|
| 659 |
+
c.regions.get_mut(&0xC0000usize).unwrap().test_data = Some(compressible);
|
| 660 |
+
let (count, _) = c.scan_and_compress();
|
| 661 |
+
assert_eq!(count, 1, "Compressible region should pass the ratio guard");
|
| 662 |
+
assert!(matches!(c.regions[&0xC0000usize].tier, Tier::Warm { .. }));
|
| 663 |
+
|
| 664 |
+
// --- Blocked path: incompressible data (unique bytes, no patterns) ---
|
| 665 |
+
// A sequential 0..=255 cycle gives LZ4 very little to grab onto when
|
| 666 |
+
// the window never repeats at scan scale. We build a buffer that is
|
| 667 |
+
// already-maximally-dense for LZ4 by using raw bytes from a known
|
| 668 |
+
// LZ4 frame: we compress a small seed with maximum output, then
|
| 669 |
+
// expand it into a large buffer that changes every byte position.
|
| 670 |
+
// The most reliable incompressible source is XOR-folding the position
|
| 671 |
+
// counter with a prime multiplier across the full u8 space.
|
| 672 |
+
let buf_size = 65_536usize;
|
| 673 |
+
// Each byte is derived from position with a prime multiplier — the
|
| 674 |
+
// pattern never repeats within the buffer since 65536 is the full u8
|
| 675 |
+
// cycle times 256, so LZ4's match-finder finds no long-range copies.
|
| 676 |
+
let incompressible: Vec<u8> = (0..buf_size)
|
| 677 |
+
.map(|i| {
|
| 678 |
+
let a = (i.wrapping_mul(6364136223846793005) >> 33) as u8;
|
| 679 |
+
let b = (i.wrapping_mul(1442695040888963407) >> 25) as u8;
|
| 680 |
+
a ^ b ^ (i as u8)
|
| 681 |
+
})
|
| 682 |
+
.collect();
|
| 683 |
+
|
| 684 |
+
// Verify our data actually fails the 90% ratio guard before running scan
|
| 685 |
+
let candidate = lz4_flex::compress_prepend_size(&incompressible);
|
| 686 |
+
let threshold = (buf_size as f64 * 0.9) as usize;
|
| 687 |
+
assert!(
|
| 688 |
+
candidate.len() > threshold,
|
| 689 |
+
"Test data must be incompressible enough to trigger the guard \
|
| 690 |
+
(candidate_len={} threshold={}). Regenerate with a harder pattern.",
|
| 691 |
+
candidate.len(), threshold
|
| 692 |
+
);
|
| 693 |
+
|
| 694 |
+
// Register and inject incompressible data — scan should skip it
|
| 695 |
+
let mut c2 = test_condenser();
|
| 696 |
+
c2.register(0xD0000usize, buf_size);
|
| 697 |
+
c2.regions.get_mut(&0xD0000usize).unwrap().test_data = Some(incompressible);
|
| 698 |
+
let (count2, _) = c2.scan_and_compress();
|
| 699 |
+
assert_eq!(count2, 0, "Incompressible region should be skipped by the ratio guard");
|
| 700 |
+
assert!(c2.regions[&0xD0000usize].is_hot(), "Region should remain Hot");
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
#[test]
|
| 704 |
+
fn test_cold_tier_disk_roundtrip() {
|
| 705 |
+
let mut c = test_condenser();
|
| 706 |
+
|
| 707 |
+
// Use a large address that doesn't collide with anything real
|
| 708 |
+
let addr = 0xDEAD_0000usize;
|
| 709 |
+
c.register(addr, 65_536);
|
| 710 |
+
|
| 711 |
+
// Compress HOT → WARM
|
| 712 |
+
let (count, _) = c.scan_and_compress();
|
| 713 |
+
assert_eq!(count, 1, "Region should compress to WARM");
|
| 714 |
+
assert!(matches!(c.regions[&addr].tier, Tier::Warm { .. }));
|
| 715 |
+
|
| 716 |
+
// Capture the original decompressed bytes from the WARM tier so we
|
| 717 |
+
// can compare them after the roundtrip.
|
| 718 |
+
let original_data = match &c.regions[&addr].tier {
|
| 719 |
+
Tier::Warm { compressed, .. } => {
|
| 720 |
+
lz4_flex::decompress_size_prepended(compressed).unwrap()
|
| 721 |
+
}
|
| 722 |
+
_ => panic!("Expected Warm tier"),
|
| 723 |
+
};
|
| 724 |
+
|
| 725 |
+
// Demote WARM → COLD (writes file to disk)
|
| 726 |
+
c.demote_to_cold(addr);
|
| 727 |
+
assert!(matches!(c.regions[&addr].tier, Tier::Cold { .. }));
|
| 728 |
+
|
| 729 |
+
// Verify file exists on disk
|
| 730 |
+
let file_path = match &c.regions[&addr].tier {
|
| 731 |
+
Tier::Cold { file_path, .. } => file_path.clone(),
|
| 732 |
+
_ => panic!("Expected Cold tier"),
|
| 733 |
+
};
|
| 734 |
+
assert!(Path::new(&file_path).exists(), "Cold file should exist on disk");
|
| 735 |
+
|
| 736 |
+
// Promote COLD → HOT (reads file, decompresses, deletes file)
|
| 737 |
+
let restored = c.promote_from_cold(addr).expect("promote_from_cold should return data");
|
| 738 |
+
|
| 739 |
+
assert_eq!(restored, original_data, "Restored data should match original");
|
| 740 |
+
assert!(matches!(c.regions[&addr].tier, Tier::Hot), "Tier should be Hot after promotion");
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
#[test]
|
| 744 |
+
fn test_cold_tier_file_cleanup() {
|
| 745 |
+
let mut c = test_condenser();
|
| 746 |
+
|
| 747 |
+
let addr = 0xBEEF_0000usize;
|
| 748 |
+
c.register(addr, 65_536);
|
| 749 |
+
c.scan_and_compress();
|
| 750 |
+
|
| 751 |
+
// Demote to cold
|
| 752 |
+
c.demote_to_cold(addr);
|
| 753 |
+
let file_path = match &c.regions[&addr].tier {
|
| 754 |
+
Tier::Cold { file_path, .. } => file_path.clone(),
|
| 755 |
+
_ => panic!("Expected Cold tier"),
|
| 756 |
+
};
|
| 757 |
+
assert!(Path::new(&file_path).exists(), "File should exist before promote");
|
| 758 |
+
|
| 759 |
+
// Promote from cold
|
| 760 |
+
c.promote_from_cold(addr);
|
| 761 |
+
|
| 762 |
+
// File must be gone
|
| 763 |
+
assert!(
|
| 764 |
+
!Path::new(&file_path).exists(),
|
| 765 |
+
"Cold file should be deleted after promote_from_cold"
|
| 766 |
+
);
|
| 767 |
+
}
|
| 768 |
}
|
|
@@ -0,0 +1,829 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Erasure Coding + Holographic Boundaries — Block L
|
| 2 |
+
//!
|
| 3 |
+
//! Replaces fragile keyframe+delta chains with fault-tolerant erasure-coded
|
| 4 |
+
//! fragments for the COLD memory tier. COLD regions exist in RAM as pure
|
| 5 |
+
//! metadata (`HolographicBoundary`): zero data bytes in RAM, just the
|
| 6 |
+
//! reconstruction recipe and enough metadata to answer management queries
|
| 7 |
+
//! without waking the data.
|
| 8 |
+
//!
|
| 9 |
+
//! ## Erasure scheme (XOR-based, no external deps)
|
| 10 |
+
//!
|
| 11 |
+
//! A *systematic* code where the first K fragments ARE the data chunks
|
| 12 |
+
//! (split evenly, last padded with zeros if needed) and (N-K) parity
|
| 13 |
+
//! fragments are XOR combinations:
|
| 14 |
+
//!
|
| 15 |
+
//! - parity[0] = XOR of all K data chunks
|
| 16 |
+
//! - parity[1] = XOR of chunks 0 .. K/2
|
| 17 |
+
//! - parity[2] = XOR of chunks K/2 .. K
|
| 18 |
+
//! - additional parity fragments repeat the halving pattern
|
| 19 |
+
//!
|
| 20 |
+
//! This reliably handles 1-2 missing fragments. Full Reed-Solomon can be
|
| 21 |
+
//! plugged in later via a proper crate without changing the public API.
|
| 22 |
+
|
| 23 |
+
// ---------------------------------------------------------------------------
|
| 24 |
+
// Hash helper (FNV-1a — no external dep required)
|
| 25 |
+
// ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
fn simple_hash(data: &[u8]) -> u64 {
|
| 28 |
+
let mut h: u64 = 0xcbf29ce484222325; // FNV-1a offset basis
|
| 29 |
+
for &b in data {
|
| 30 |
+
h ^= b as u64;
|
| 31 |
+
h = h.wrapping_mul(0x100000001b3); // FNV prime
|
| 32 |
+
}
|
| 33 |
+
h
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
// ---------------------------------------------------------------------------
|
| 37 |
+
// Fragment
|
| 38 |
+
// ---------------------------------------------------------------------------
|
| 39 |
+
|
| 40 |
+
/// One encoded shard of a larger data block.
|
| 41 |
+
///
|
| 42 |
+
/// The first `required_k` fragments (indices 0 .. required_k-1) are data
|
| 43 |
+
/// fragments; the remainder (indices required_k .. total_n-1) are parity.
|
| 44 |
+
pub struct Fragment {
|
| 45 |
+
/// Position index in the full set [0, total_n).
|
| 46 |
+
pub index: u8,
|
| 47 |
+
/// Encoded payload bytes.
|
| 48 |
+
pub data: Vec<u8>,
|
| 49 |
+
/// Total number of fragments produced by the encoder.
|
| 50 |
+
pub total_n: u8,
|
| 51 |
+
/// Minimum number of data fragments needed to reconstruct.
|
| 52 |
+
pub required_k: u8,
|
| 53 |
+
/// Byte length of the original (pre-encoding) data.
|
| 54 |
+
pub original_size: usize,
|
| 55 |
+
/// FNV-1a hash of the original data for integrity checking.
|
| 56 |
+
pub original_hash: u64,
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
// ---------------------------------------------------------------------------
|
| 60 |
+
// FragmentLocation
|
| 61 |
+
// ---------------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
/// Where a fragment's bytes actually live.
|
| 64 |
+
pub enum FragmentLocation {
|
| 65 |
+
/// Bytes are in process memory.
|
| 66 |
+
Memory(Vec<u8>),
|
| 67 |
+
/// Bytes are on disk at `(file_path, byte_offset)`.
|
| 68 |
+
Disk(String, u64),
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
// ---------------------------------------------------------------------------
|
| 72 |
+
// DecodeError
|
| 73 |
+
// ---------------------------------------------------------------------------
|
| 74 |
+
|
| 75 |
+
/// Reasons that decoding can fail.
|
| 76 |
+
#[derive(Debug, PartialEq)]
|
| 77 |
+
pub enum DecodeError {
|
| 78 |
+
/// Fewer fragments were supplied than `required_k`.
|
| 79 |
+
InsufficientFragments { have: usize, need: usize },
|
| 80 |
+
/// Two supplied fragments share the same index.
|
| 81 |
+
DuplicateFragment { index: u8 },
|
| 82 |
+
/// The reconstructed bytes don't match the stored integrity hash.
|
| 83 |
+
HashMismatch { expected: u64, got: u64 },
|
| 84 |
+
/// A parity fragment is needed for recovery but is missing from the set.
|
| 85 |
+
MissingParity,
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
impl std::fmt::Display for DecodeError {
|
| 89 |
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
| 90 |
+
match self {
|
| 91 |
+
DecodeError::InsufficientFragments { have, need } => {
|
| 92 |
+
write!(f, "insufficient fragments: have {have}, need {need}")
|
| 93 |
+
}
|
| 94 |
+
DecodeError::DuplicateFragment { index } => {
|
| 95 |
+
write!(f, "duplicate fragment index {index}")
|
| 96 |
+
}
|
| 97 |
+
DecodeError::HashMismatch { expected, got } => {
|
| 98 |
+
write!(f, "hash mismatch: expected {expected:#x}, got {got:#x}")
|
| 99 |
+
}
|
| 100 |
+
DecodeError::MissingParity => {
|
| 101 |
+
write!(f, "missing parity fragment needed for reconstruction")
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// ---------------------------------------------------------------------------
|
| 108 |
+
// ErasureCoder
|
| 109 |
+
// ---------------------------------------------------------------------------
|
| 110 |
+
|
| 111 |
+
/// XOR-based K-of-N erasure coder.
|
| 112 |
+
pub struct ErasureCoder {
|
| 113 |
+
/// Total fragments to produce per encode call.
|
| 114 |
+
pub default_n: u8,
|
| 115 |
+
/// Minimum fragments required to reconstruct.
|
| 116 |
+
pub default_k: u8,
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
impl ErasureCoder {
|
| 120 |
+
/// Create a new coder. Panics if `default_k > default_n` or either is zero.
|
| 121 |
+
pub fn new(default_n: u8, default_k: u8) -> Self {
|
| 122 |
+
assert!(default_k > 0, "required_k must be >= 1");
|
| 123 |
+
assert!(default_n >= default_k, "total_n must be >= required_k");
|
| 124 |
+
Self { default_n, default_k }
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
// -----------------------------------------------------------------------
|
| 128 |
+
// Encode
|
| 129 |
+
// -----------------------------------------------------------------------
|
| 130 |
+
|
| 131 |
+
/// Split `data` into `default_n` fragments: `default_k` data shards plus
|
| 132 |
+
/// `(default_n - default_k)` XOR parity shards.
|
| 133 |
+
///
|
| 134 |
+
/// Empty input produces fragments that each carry zero bytes.
|
| 135 |
+
pub fn encode(&self, data: &[u8]) -> Vec<Fragment> {
|
| 136 |
+
let k = self.default_k as usize;
|
| 137 |
+
let n = self.default_n as usize;
|
| 138 |
+
let original_size = data.len();
|
| 139 |
+
let original_hash = simple_hash(data);
|
| 140 |
+
|
| 141 |
+
// Compute chunk size: ceil(original_size / k), minimum 1 when non-empty
|
| 142 |
+
let chunk_size = if original_size == 0 {
|
| 143 |
+
0
|
| 144 |
+
} else {
|
| 145 |
+
(original_size + k - 1) / k
|
| 146 |
+
};
|
| 147 |
+
|
| 148 |
+
// Build K data chunks (last chunk zero-padded if necessary)
|
| 149 |
+
let mut data_chunks: Vec<Vec<u8>> = Vec::with_capacity(k);
|
| 150 |
+
for i in 0..k {
|
| 151 |
+
let start = i * chunk_size;
|
| 152 |
+
let end = ((i + 1) * chunk_size).min(original_size);
|
| 153 |
+
let mut chunk = if start < original_size {
|
| 154 |
+
data[start..end].to_vec()
|
| 155 |
+
} else {
|
| 156 |
+
Vec::new()
|
| 157 |
+
};
|
| 158 |
+
// Pad to uniform chunk_size
|
| 159 |
+
chunk.resize(chunk_size, 0u8);
|
| 160 |
+
data_chunks.push(chunk);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
// Build parity chunks
|
| 164 |
+
let parity_count = n - k;
|
| 165 |
+
let mut parity_chunks: Vec<Vec<u8>> = Vec::with_capacity(parity_count);
|
| 166 |
+
for p in 0..parity_count {
|
| 167 |
+
let chunk = self.build_parity(p, &data_chunks, chunk_size);
|
| 168 |
+
parity_chunks.push(chunk);
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
// Assemble Fragment list: data frags first, then parity
|
| 172 |
+
let mut fragments = Vec::with_capacity(n);
|
| 173 |
+
for i in 0..k {
|
| 174 |
+
fragments.push(Fragment {
|
| 175 |
+
index: i as u8,
|
| 176 |
+
data: data_chunks[i].clone(),
|
| 177 |
+
total_n: n as u8,
|
| 178 |
+
required_k: k as u8,
|
| 179 |
+
original_size,
|
| 180 |
+
original_hash,
|
| 181 |
+
});
|
| 182 |
+
}
|
| 183 |
+
for p in 0..parity_count {
|
| 184 |
+
fragments.push(Fragment {
|
| 185 |
+
index: (k + p) as u8,
|
| 186 |
+
data: parity_chunks[p].clone(),
|
| 187 |
+
total_n: n as u8,
|
| 188 |
+
required_k: k as u8,
|
| 189 |
+
original_size,
|
| 190 |
+
original_hash,
|
| 191 |
+
});
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
fragments
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/// Compute parity fragment `p` from the data chunks.
|
| 198 |
+
///
|
| 199 |
+
/// Parity layout:
|
| 200 |
+
/// p=0 → XOR of all K chunks ("full" parity)
|
| 201 |
+
/// p=1 → XOR of chunks [0 .. k/2) (low half)
|
| 202 |
+
/// p=2 → XOR of chunks [k/2 .. k) (high half)
|
| 203 |
+
/// p=3 → XOR of chunks [0 .. k/4) (quarter)
|
| 204 |
+
/// … and so on (halving, wrapping around)
|
| 205 |
+
fn build_parity(&self, p: usize, chunks: &[Vec<u8>], chunk_size: usize) -> Vec<u8> {
|
| 206 |
+
let k = chunks.len();
|
| 207 |
+
let mut result = vec![0u8; chunk_size];
|
| 208 |
+
|
| 209 |
+
let indices: Vec<usize> = if p == 0 {
|
| 210 |
+
// Full parity: all chunks
|
| 211 |
+
(0..k).collect()
|
| 212 |
+
} else {
|
| 213 |
+
// Halving pattern
|
| 214 |
+
let half = k / 2;
|
| 215 |
+
let half = half.max(1); // guard against k==1
|
| 216 |
+
let step = p - 1;
|
| 217 |
+
// Alternate between low and high halves across steps
|
| 218 |
+
if step % 2 == 0 {
|
| 219 |
+
// low half
|
| 220 |
+
(0..half).collect()
|
| 221 |
+
} else {
|
| 222 |
+
// high half
|
| 223 |
+
(half..k).collect()
|
| 224 |
+
}
|
| 225 |
+
};
|
| 226 |
+
|
| 227 |
+
for &ci in &indices {
|
| 228 |
+
xor_into(&mut result, &chunks[ci]);
|
| 229 |
+
}
|
| 230 |
+
result
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
// -----------------------------------------------------------------------
|
| 234 |
+
// Decode
|
| 235 |
+
// -----------------------------------------------------------------------
|
| 236 |
+
|
| 237 |
+
/// Reconstruct the original data from any sufficient subset of fragments.
|
| 238 |
+
///
|
| 239 |
+
/// If all `required_k` **data** fragments (indices 0 .. k-1) are present,
|
| 240 |
+
/// reconstruction is trivial concatenation. If any data fragment is
|
| 241 |
+
/// missing, the decoder attempts XOR recovery using parity fragments.
|
| 242 |
+
pub fn decode(&self, fragments: &[Fragment]) -> Result<Vec<u8>, DecodeError> {
|
| 243 |
+
if fragments.is_empty() {
|
| 244 |
+
return Err(DecodeError::InsufficientFragments { have: 0, need: self.default_k as usize });
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// Use metadata from the first fragment (all must agree)
|
| 248 |
+
let original_size = fragments[0].original_size;
|
| 249 |
+
let original_hash = fragments[0].original_hash;
|
| 250 |
+
let k = fragments[0].required_k as usize;
|
| 251 |
+
|
| 252 |
+
// Check for duplicate indices
|
| 253 |
+
let mut seen = [false; 256];
|
| 254 |
+
for f in fragments {
|
| 255 |
+
if seen[f.index as usize] {
|
| 256 |
+
return Err(DecodeError::DuplicateFragment { index: f.index });
|
| 257 |
+
}
|
| 258 |
+
seen[f.index as usize] = true;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
// Collect into indexed map
|
| 262 |
+
let mut by_index: std::collections::HashMap<u8, &Fragment> =
|
| 263 |
+
std::collections::HashMap::new();
|
| 264 |
+
for f in fragments {
|
| 265 |
+
by_index.insert(f.index, f);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
let total_available = by_index.len();
|
| 269 |
+
if total_available < k {
|
| 270 |
+
return Err(DecodeError::InsufficientFragments {
|
| 271 |
+
have: total_available,
|
| 272 |
+
need: k,
|
| 273 |
+
});
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
// Check which data fragments are present
|
| 277 |
+
let mut data_present = vec![false; k];
|
| 278 |
+
for i in 0..k {
|
| 279 |
+
data_present[i] = by_index.contains_key(&(i as u8));
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
let missing_data: Vec<usize> = data_present.iter().enumerate()
|
| 283 |
+
.filter(|(_, &p)| !p)
|
| 284 |
+
.map(|(i, _)| i)
|
| 285 |
+
.collect();
|
| 286 |
+
|
| 287 |
+
// Figure out chunk size from any available data fragment
|
| 288 |
+
let chunk_size = if original_size == 0 {
|
| 289 |
+
0
|
| 290 |
+
} else {
|
| 291 |
+
(original_size + k - 1) / k
|
| 292 |
+
};
|
| 293 |
+
|
| 294 |
+
// Reconstruct data chunks
|
| 295 |
+
let mut chunks: Vec<Vec<u8>> = vec![vec![0u8; chunk_size]; k];
|
| 296 |
+
|
| 297 |
+
// Fill in present data chunks
|
| 298 |
+
for i in 0..k {
|
| 299 |
+
if data_present[i] {
|
| 300 |
+
chunks[i] = by_index[&(i as u8)].data.clone();
|
| 301 |
+
chunks[i].resize(chunk_size, 0u8);
|
| 302 |
+
}
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
// Recover missing data chunks using parity
|
| 306 |
+
if !missing_data.is_empty() {
|
| 307 |
+
self.recover_missing(&mut chunks, &missing_data, &by_index, chunk_size)?;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
// Reconstruct original bytes: concatenate chunks, trim to original_size
|
| 311 |
+
let mut result: Vec<u8> = chunks.into_iter().flatten().collect();
|
| 312 |
+
result.truncate(original_size);
|
| 313 |
+
|
| 314 |
+
// Integrity check
|
| 315 |
+
let got_hash = simple_hash(&result);
|
| 316 |
+
if got_hash != original_hash {
|
| 317 |
+
return Err(DecodeError::HashMismatch {
|
| 318 |
+
expected: original_hash,
|
| 319 |
+
got: got_hash,
|
| 320 |
+
});
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
Ok(result)
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
/// Attempt to recover missing data chunks using available parity fragments.
|
| 327 |
+
///
|
| 328 |
+
/// This works for the simple XOR parity scheme as long as each missing
|
| 329 |
+
/// chunk can be isolated by XOR-ing the parity fragment whose range covers
|
| 330 |
+
/// that chunk with all other known chunks in that range.
|
| 331 |
+
fn recover_missing(
|
| 332 |
+
&self,
|
| 333 |
+
chunks: &mut Vec<Vec<u8>>,
|
| 334 |
+
missing: &[usize],
|
| 335 |
+
by_index: &std::collections::HashMap<u8, &Fragment>,
|
| 336 |
+
chunk_size: usize,
|
| 337 |
+
) -> Result<(), DecodeError> {
|
| 338 |
+
let k = chunks.len();
|
| 339 |
+
|
| 340 |
+
for &mi in missing {
|
| 341 |
+
// Try each available parity fragment in order
|
| 342 |
+
let mut recovered = false;
|
| 343 |
+
|
| 344 |
+
// Collect parity fragments (indices k..N)
|
| 345 |
+
let mut parity_frags: Vec<(usize, &Fragment)> = by_index
|
| 346 |
+
.iter()
|
| 347 |
+
.filter(|(&idx, _)| idx as usize >= k)
|
| 348 |
+
.map(|(&idx, &f)| (idx as usize - k, f))
|
| 349 |
+
.collect();
|
| 350 |
+
parity_frags.sort_by_key(|(p, _)| *p);
|
| 351 |
+
|
| 352 |
+
for (p_idx, parity_frag) in &parity_frags {
|
| 353 |
+
// Determine which data chunk indices this parity covers
|
| 354 |
+
let covered = self.parity_coverage(*p_idx, k);
|
| 355 |
+
|
| 356 |
+
if !covered.contains(&mi) {
|
| 357 |
+
continue;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
// All other covered indices must NOT be in missing (or already recovered)
|
| 361 |
+
let others_not_missing = covered.iter()
|
| 362 |
+
.filter(|&&ci| ci != mi)
|
| 363 |
+
.all(|&ci| !missing.contains(&ci) || chunks[ci].iter().any(|&b| b != 0) /* already recovered */);
|
| 364 |
+
|
| 365 |
+
if !others_not_missing {
|
| 366 |
+
continue; // can't use this parity yet
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
// Recover: missing_chunk = parity XOR all_other_covered_chunks
|
| 370 |
+
let mut recovered_chunk = parity_frag.data.clone();
|
| 371 |
+
recovered_chunk.resize(chunk_size, 0u8);
|
| 372 |
+
|
| 373 |
+
for &ci in covered.iter().filter(|&&ci| ci != mi) {
|
| 374 |
+
xor_into(&mut recovered_chunk, &chunks[ci]);
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
chunks[mi] = recovered_chunk;
|
| 378 |
+
recovered = true;
|
| 379 |
+
break;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
if !recovered {
|
| 383 |
+
return Err(DecodeError::MissingParity);
|
| 384 |
+
}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
Ok(())
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
/// Return the data chunk indices covered by parity fragment `p_idx`.
|
| 391 |
+
fn parity_coverage(&self, p_idx: usize, k: usize) -> Vec<usize> {
|
| 392 |
+
if p_idx == 0 {
|
| 393 |
+
// Full parity covers all k chunks
|
| 394 |
+
(0..k).collect()
|
| 395 |
+
} else {
|
| 396 |
+
let half = (k / 2).max(1);
|
| 397 |
+
let step = p_idx - 1;
|
| 398 |
+
if step % 2 == 0 {
|
| 399 |
+
(0..half).collect()
|
| 400 |
+
} else {
|
| 401 |
+
(half..k).collect()
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
// -----------------------------------------------------------------------
|
| 407 |
+
// Integrity
|
| 408 |
+
// -----------------------------------------------------------------------
|
| 409 |
+
|
| 410 |
+
/// Verify that `data` matches `expected_hash`.
|
| 411 |
+
pub fn verify_hash(data: &[u8], expected_hash: u64) -> bool {
|
| 412 |
+
simple_hash(data) == expected_hash
|
| 413 |
+
}
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
// ---------------------------------------------------------------------------
|
| 417 |
+
// XOR helper
|
| 418 |
+
// ---------------------------------------------------------------------------
|
| 419 |
+
|
| 420 |
+
/// XOR every byte of `src` into `dst`. If `src` is shorter than `dst`, the
|
| 421 |
+
/// remaining bytes of `dst` are left unchanged.
|
| 422 |
+
fn xor_into(dst: &mut [u8], src: &[u8]) {
|
| 423 |
+
for (d, &s) in dst.iter_mut().zip(src.iter()) {
|
| 424 |
+
*d ^= s;
|
| 425 |
+
}
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
// ---------------------------------------------------------------------------
|
| 429 |
+
// BoundaryQuery
|
| 430 |
+
// ---------------------------------------------------------------------------
|
| 431 |
+
|
| 432 |
+
/// A management question that can be answered from the boundary metadata alone
|
| 433 |
+
/// without loading or reconstructing any data.
|
| 434 |
+
pub enum BoundaryQuery {
|
| 435 |
+
/// Should this region be promoted to a warmer tier?
|
| 436 |
+
ShouldPromote,
|
| 437 |
+
/// How many bytes of RAM does keeping this cold save?
|
| 438 |
+
CompressionSavings,
|
| 439 |
+
/// Is this region connected to the given peer region?
|
| 440 |
+
IsRelatedTo(u32),
|
| 441 |
+
/// What is the coarse data type (derived from first-64-byte fingerprint)?
|
| 442 |
+
DataType,
|
| 443 |
+
/// Has the content changed since the given hash was recorded?
|
| 444 |
+
HasChanged(u64),
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
// ---------------------------------------------------------------------------
|
| 448 |
+
// HolographicBoundary
|
| 449 |
+
// ---------------------------------------------------------------------------
|
| 450 |
+
|
| 451 |
+
/// Zero-data COLD region descriptor.
|
| 452 |
+
///
|
| 453 |
+
/// Lives entirely in RAM as pure metadata: the reconstruction recipe for the
|
| 454 |
+
/// erasure-coded fragments plus enough contextual information to answer every
|
| 455 |
+
/// common management question without touching the actual data.
|
| 456 |
+
pub struct HolographicBoundary {
|
| 457 |
+
/// Unique ID of the memory region this boundary represents.
|
| 458 |
+
pub region_id: u32,
|
| 459 |
+
/// Original data size in bytes.
|
| 460 |
+
pub original_size: usize,
|
| 461 |
+
/// FNV-1a hash of the original content.
|
| 462 |
+
pub content_hash: u64,
|
| 463 |
+
/// Hash of the first 64 bytes — coarse type fingerprint.
|
| 464 |
+
pub type_signature: u64,
|
| 465 |
+
/// Ratio: original_size / storage_size (>1 means compression saved space).
|
| 466 |
+
pub compression_ratio: f32,
|
| 467 |
+
/// Graph edges to peer regions: (peer_region_id, edge_weight).
|
| 468 |
+
pub graph_connections: Vec<(u32, f64)>,
|
| 469 |
+
/// Total number of erasure fragments produced.
|
| 470 |
+
pub fragment_count: u8,
|
| 471 |
+
/// Minimum fragments needed to reconstruct.
|
| 472 |
+
pub fragments_required: u8,
|
| 473 |
+
/// Estimated microseconds to reconstruct (I/O + XOR cost).
|
| 474 |
+
pub reconstruction_cost_us: u64,
|
| 475 |
+
/// Nanosecond timestamp of last access.
|
| 476 |
+
pub last_access_ns: u64,
|
| 477 |
+
/// Exponentially-smoothed access rate (accesses per second, approx).
|
| 478 |
+
pub access_frequency: f32,
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
impl HolographicBoundary {
|
| 482 |
+
/// Build a boundary from raw data.
|
| 483 |
+
///
|
| 484 |
+
/// `data` is the original bytes being cold-stored. After this call the
|
| 485 |
+
/// caller should hand `data` off to the erasure coder and drop it.
|
| 486 |
+
/// `connections` is the set of graph edges to neighbouring regions.
|
| 487 |
+
pub fn new(region_id: u32, data: &[u8], connections: Vec<(u32, f64)>) -> Self {
|
| 488 |
+
let content_hash = simple_hash(data);
|
| 489 |
+
|
| 490 |
+
// Type signature: hash of first 64 bytes (or all bytes if shorter)
|
| 491 |
+
let prefix = &data[..data.len().min(64)];
|
| 492 |
+
let type_signature = simple_hash(prefix);
|
| 493 |
+
|
| 494 |
+
// Rough compression ratio estimate: XOR entropy proxy
|
| 495 |
+
// We use a simple byte-frequency model: unique bytes / 256 * 2
|
| 496 |
+
let storage_estimate = estimate_compressed_size(data);
|
| 497 |
+
let compression_ratio = if storage_estimate == 0 {
|
| 498 |
+
1.0
|
| 499 |
+
} else {
|
| 500 |
+
data.len() as f32 / storage_estimate as f32
|
| 501 |
+
};
|
| 502 |
+
|
| 503 |
+
// Reconstruction cost: assume ~10µs base + 1µs per KB of data
|
| 504 |
+
let reconstruction_cost_us = 10 + (data.len() as u64 / 1024);
|
| 505 |
+
|
| 506 |
+
Self {
|
| 507 |
+
region_id,
|
| 508 |
+
original_size: data.len(),
|
| 509 |
+
content_hash,
|
| 510 |
+
type_signature,
|
| 511 |
+
compression_ratio,
|
| 512 |
+
graph_connections: connections,
|
| 513 |
+
fragment_count: 0, // caller sets after encoding
|
| 514 |
+
fragments_required: 0,
|
| 515 |
+
reconstruction_cost_us,
|
| 516 |
+
last_access_ns: 0,
|
| 517 |
+
access_frequency: 0.0,
|
| 518 |
+
}
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
/// Return true if the boundary metadata alone can answer `query`.
|
| 522 |
+
///
|
| 523 |
+
/// All variants always return true — that is the invariant of the
|
| 524 |
+
/// holographic boundary design. This method exists to make that contract
|
| 525 |
+
/// explicit and testable.
|
| 526 |
+
pub fn can_answer_query(&self, query: &BoundaryQuery) -> bool {
|
| 527 |
+
match query {
|
| 528 |
+
BoundaryQuery::ShouldPromote => {
|
| 529 |
+
// Needs access_frequency and graph_connections — both present
|
| 530 |
+
true
|
| 531 |
+
}
|
| 532 |
+
BoundaryQuery::CompressionSavings => {
|
| 533 |
+
// Needs compression_ratio and original_size — both present
|
| 534 |
+
true
|
| 535 |
+
}
|
| 536 |
+
BoundaryQuery::IsRelatedTo(peer_id) => {
|
| 537 |
+
// Just check the connections list
|
| 538 |
+
let _ = self.graph_connections.iter().any(|(id, _)| id == peer_id);
|
| 539 |
+
true
|
| 540 |
+
}
|
| 541 |
+
BoundaryQuery::DataType => {
|
| 542 |
+
// Needs type_signature — present
|
| 543 |
+
true
|
| 544 |
+
}
|
| 545 |
+
BoundaryQuery::HasChanged(hash) => {
|
| 546 |
+
// Compare against content_hash — no data needed
|
| 547 |
+
let _ = self.content_hash == *hash;
|
| 548 |
+
true
|
| 549 |
+
}
|
| 550 |
+
}
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
/// Actually evaluate `query` and return the answer as a `QueryAnswer`.
|
| 554 |
+
pub fn answer_query(&self, query: &BoundaryQuery) -> QueryAnswer {
|
| 555 |
+
match query {
|
| 556 |
+
BoundaryQuery::ShouldPromote => {
|
| 557 |
+
// Promote when access_frequency > 0.01 Hz or highly connected
|
| 558 |
+
let promote = self.access_frequency > 0.01
|
| 559 |
+
|| self.graph_connections.len() > 5;
|
| 560 |
+
QueryAnswer::Bool(promote)
|
| 561 |
+
}
|
| 562 |
+
BoundaryQuery::CompressionSavings => {
|
| 563 |
+
let savings = if self.compression_ratio > 1.0 {
|
| 564 |
+
let stored = self.original_size as f32 / self.compression_ratio;
|
| 565 |
+
(self.original_size as f32 - stored) as usize
|
| 566 |
+
} else {
|
| 567 |
+
0
|
| 568 |
+
};
|
| 569 |
+
QueryAnswer::Bytes(savings)
|
| 570 |
+
}
|
| 571 |
+
BoundaryQuery::IsRelatedTo(peer_id) => {
|
| 572 |
+
let related = self.graph_connections.iter().any(|(id, _)| id == peer_id);
|
| 573 |
+
QueryAnswer::Bool(related)
|
| 574 |
+
}
|
| 575 |
+
BoundaryQuery::DataType => {
|
| 576 |
+
QueryAnswer::Hash(self.type_signature)
|
| 577 |
+
}
|
| 578 |
+
BoundaryQuery::HasChanged(hash) => {
|
| 579 |
+
QueryAnswer::Bool(self.content_hash != *hash)
|
| 580 |
+
}
|
| 581 |
+
}
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
/// Record an access event at `now_ns` nanoseconds and update frequency.
|
| 585 |
+
///
|
| 586 |
+
/// Uses a simple exponential moving average so frequency decays over time
|
| 587 |
+
/// without storing a full access history.
|
| 588 |
+
pub fn update_access(&mut self, now_ns: u64) {
|
| 589 |
+
if self.last_access_ns > 0 && now_ns > self.last_access_ns {
|
| 590 |
+
let dt_s = (now_ns - self.last_access_ns) as f64 / 1_000_000_000.0;
|
| 591 |
+
let instant_rate = if dt_s > 0.0 { 1.0 / dt_s } else { 0.0 };
|
| 592 |
+
// EMA with alpha = 0.2
|
| 593 |
+
self.access_frequency = 0.8 * self.access_frequency + 0.2 * instant_rate as f32;
|
| 594 |
+
}
|
| 595 |
+
self.last_access_ns = now_ns;
|
| 596 |
+
}
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
/// Typed return value from `HolographicBoundary::answer_query`.
|
| 600 |
+
pub enum QueryAnswer {
|
| 601 |
+
Bool(bool),
|
| 602 |
+
Bytes(usize),
|
| 603 |
+
Hash(u64),
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
// ---------------------------------------------------------------------------
|
| 607 |
+
// Internal: compressed size estimator (no external dep)
|
| 608 |
+
// ---------------------------------------------------------------------------
|
| 609 |
+
|
| 610 |
+
/// Rough estimate of how many bytes `data` would compress to.
|
| 611 |
+
///
|
| 612 |
+
/// Uses byte-frequency entropy as a proxy: high entropy → near-incompressible.
|
| 613 |
+
/// This is intentionally cheap — it only needs to produce a plausible ratio
|
| 614 |
+
/// for the boundary metadata, not an accurate compress call.
|
| 615 |
+
fn estimate_compressed_size(data: &[u8]) -> usize {
|
| 616 |
+
if data.is_empty() {
|
| 617 |
+
return 0;
|
| 618 |
+
}
|
| 619 |
+
let mut freq = [0u32; 256];
|
| 620 |
+
for &b in data {
|
| 621 |
+
freq[b as usize] += 1;
|
| 622 |
+
}
|
| 623 |
+
let n = data.len() as f64;
|
| 624 |
+
// Shannon entropy (bits per byte)
|
| 625 |
+
let entropy: f64 = freq.iter()
|
| 626 |
+
.filter(|&&c| c > 0)
|
| 627 |
+
.map(|&c| {
|
| 628 |
+
let p = c as f64 / n;
|
| 629 |
+
-p * p.log2()
|
| 630 |
+
})
|
| 631 |
+
.sum();
|
| 632 |
+
// Estimated bits / 8 = bytes per byte of original
|
| 633 |
+
let ratio = (entropy / 8.0).max(0.125); // floor at 8:1 compression
|
| 634 |
+
(n * ratio) as usize + 1
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
// ---------------------------------------------------------------------------
|
| 638 |
+
// Tests
|
| 639 |
+
// ---------------------------------------------------------------------------
|
| 640 |
+
|
| 641 |
+
#[cfg(test)]
|
| 642 |
+
mod tests {
|
| 643 |
+
use super::*;
|
| 644 |
+
|
| 645 |
+
// -----------------------------------------------------------------------
|
| 646 |
+
// test_erasure_encode_decode_roundtrip
|
| 647 |
+
// -----------------------------------------------------------------------
|
| 648 |
+
|
| 649 |
+
#[test]
|
| 650 |
+
fn test_erasure_encode_decode_roundtrip() {
|
| 651 |
+
let coder = ErasureCoder::new(6, 4);
|
| 652 |
+
let original: Vec<u8> = (0u8..200).collect();
|
| 653 |
+
|
| 654 |
+
let fragments = coder.encode(&original);
|
| 655 |
+
assert_eq!(fragments.len(), 6);
|
| 656 |
+
|
| 657 |
+
// Decode from all 6 fragments
|
| 658 |
+
let recovered = coder.decode(&fragments).expect("decode from all fragments");
|
| 659 |
+
assert_eq!(recovered, original, "roundtrip must be byte-identical");
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
// -----------------------------------------------------------------------
|
| 663 |
+
// test_erasure_decode_with_minimum
|
| 664 |
+
// -----------------------------------------------------------------------
|
| 665 |
+
|
| 666 |
+
#[test]
|
| 667 |
+
fn test_erasure_decode_with_minimum() {
|
| 668 |
+
let coder = ErasureCoder::new(6, 4);
|
| 669 |
+
let original: Vec<u8> = (0u8..=255).cycle().take(512).collect();
|
| 670 |
+
|
| 671 |
+
let fragments = coder.encode(&original);
|
| 672 |
+
|
| 673 |
+
// Use only the K=4 data fragments (indices 0..3)
|
| 674 |
+
let data_only: Vec<Fragment> = fragments
|
| 675 |
+
.into_iter()
|
| 676 |
+
.filter(|f| (f.index as usize) < 4)
|
| 677 |
+
.collect();
|
| 678 |
+
assert_eq!(data_only.len(), 4);
|
| 679 |
+
|
| 680 |
+
let recovered = coder.decode(&data_only).expect("decode from minimum data frags");
|
| 681 |
+
assert_eq!(recovered, original);
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
// -----------------------------------------------------------------------
|
| 685 |
+
// test_erasure_decode_with_parity
|
| 686 |
+
// -----------------------------------------------------------------------
|
| 687 |
+
|
| 688 |
+
#[test]
|
| 689 |
+
fn test_erasure_decode_with_parity() {
|
| 690 |
+
// N=4, K=3: indices 0,1,2 are data; index 3 is parity (XOR of all)
|
| 691 |
+
let coder = ErasureCoder::new(4, 3);
|
| 692 |
+
let original = b"Hello, erasure coding world! This is a test.".to_vec();
|
| 693 |
+
|
| 694 |
+
let fragments = coder.encode(&original);
|
| 695 |
+
assert_eq!(fragments.len(), 4);
|
| 696 |
+
|
| 697 |
+
// Drop data fragment 0, keep 1, 2, and parity 3
|
| 698 |
+
let subset: Vec<Fragment> = fragments
|
| 699 |
+
.into_iter()
|
| 700 |
+
.filter(|f| f.index != 0)
|
| 701 |
+
.collect();
|
| 702 |
+
assert_eq!(subset.len(), 3);
|
| 703 |
+
|
| 704 |
+
let recovered = coder.decode(&subset).expect("should recover with parity");
|
| 705 |
+
assert_eq!(recovered, original, "parity recovery must produce original data");
|
| 706 |
+
}
|
| 707 |
+
|
| 708 |
+
// -----------------------------------------------------------------------
|
| 709 |
+
// test_erasure_decode_insufficient
|
| 710 |
+
// -----------------------------------------------------------------------
|
| 711 |
+
|
| 712 |
+
#[test]
|
| 713 |
+
fn test_erasure_decode_insufficient() {
|
| 714 |
+
let coder = ErasureCoder::new(6, 4);
|
| 715 |
+
let original: Vec<u8> = (0u8..100).collect();
|
| 716 |
+
|
| 717 |
+
let fragments = coder.encode(&original);
|
| 718 |
+
|
| 719 |
+
// Keep only K-1 = 3 data fragments, no parity
|
| 720 |
+
let tiny: Vec<Fragment> = fragments
|
| 721 |
+
.into_iter()
|
| 722 |
+
.filter(|f| f.index < 3)
|
| 723 |
+
.collect();
|
| 724 |
+
|
| 725 |
+
let result = coder.decode(&tiny);
|
| 726 |
+
assert!(
|
| 727 |
+
matches!(result, Err(DecodeError::InsufficientFragments { .. })),
|
| 728 |
+
"should error with insufficient fragments, got: {:?}",
|
| 729 |
+
result.err()
|
| 730 |
+
);
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
// -----------------------------------------------------------------------
|
| 734 |
+
// test_holographic_boundary_creation
|
| 735 |
+
// -----------------------------------------------------------------------
|
| 736 |
+
|
| 737 |
+
#[test]
|
| 738 |
+
fn test_holographic_boundary_creation() {
|
| 739 |
+
let data: Vec<u8> = (0u8..=127).cycle().take(4096).collect();
|
| 740 |
+
let connections = vec![(42u32, 0.8f64), (99u32, 0.3f64)];
|
| 741 |
+
|
| 742 |
+
let boundary = HolographicBoundary::new(7, &data, connections.clone());
|
| 743 |
+
|
| 744 |
+
assert_eq!(boundary.region_id, 7);
|
| 745 |
+
assert_eq!(boundary.original_size, 4096);
|
| 746 |
+
assert_eq!(boundary.content_hash, simple_hash(&data));
|
| 747 |
+
assert_eq!(boundary.type_signature, simple_hash(&data[..64]));
|
| 748 |
+
assert_eq!(boundary.graph_connections.len(), 2);
|
| 749 |
+
assert!(boundary.compression_ratio > 0.0);
|
| 750 |
+
assert!(boundary.reconstruction_cost_us >= 10);
|
| 751 |
+
assert_eq!(boundary.last_access_ns, 0);
|
| 752 |
+
assert_eq!(boundary.access_frequency, 0.0);
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
+
// -----------------------------------------------------------------------
|
| 756 |
+
// test_boundary_queries_no_data
|
| 757 |
+
// -----------------------------------------------------------------------
|
| 758 |
+
|
| 759 |
+
#[test]
|
| 760 |
+
fn test_boundary_queries_no_data() {
|
| 761 |
+
let data = b"Holographic boundary test payload. ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789.";
|
| 762 |
+
let connections = vec![(10u32, 1.0f64), (20u32, 0.5f64)];
|
| 763 |
+
let mut boundary = HolographicBoundary::new(1, data, connections);
|
| 764 |
+
boundary.access_frequency = 0.05; // above promote threshold
|
| 765 |
+
|
| 766 |
+
let queries = [
|
| 767 |
+
BoundaryQuery::ShouldPromote,
|
| 768 |
+
BoundaryQuery::CompressionSavings,
|
| 769 |
+
BoundaryQuery::IsRelatedTo(10),
|
| 770 |
+
BoundaryQuery::IsRelatedTo(999), // not connected
|
| 771 |
+
BoundaryQuery::DataType,
|
| 772 |
+
BoundaryQuery::HasChanged(simple_hash(data)),
|
| 773 |
+
BoundaryQuery::HasChanged(0xdeadbeef),
|
| 774 |
+
];
|
| 775 |
+
|
| 776 |
+
for q in &queries {
|
| 777 |
+
assert!(
|
| 778 |
+
boundary.can_answer_query(q),
|
| 779 |
+
"every BoundaryQuery must be answerable from metadata alone"
|
| 780 |
+
);
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
// Spot-check actual answers
|
| 784 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::ShouldPromote), QueryAnswer::Bool(true)));
|
| 785 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::IsRelatedTo(10)), QueryAnswer::Bool(true)));
|
| 786 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::IsRelatedTo(999)), QueryAnswer::Bool(false)));
|
| 787 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::HasChanged(simple_hash(data))), QueryAnswer::Bool(false)));
|
| 788 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::HasChanged(0xdeadbeef)), QueryAnswer::Bool(true)));
|
| 789 |
+
assert!(matches!(boundary.answer_query(&BoundaryQuery::DataType), QueryAnswer::Hash(_)));
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
// -----------------------------------------------------------------------
|
| 793 |
+
// test_hash_integrity
|
| 794 |
+
// -----------------------------------------------------------------------
|
| 795 |
+
|
| 796 |
+
#[test]
|
| 797 |
+
fn test_hash_integrity() {
|
| 798 |
+
let data = b"integrity check payload";
|
| 799 |
+
let h = simple_hash(data);
|
| 800 |
+
|
| 801 |
+
assert!(ErasureCoder::verify_hash(data, h), "correct hash must verify");
|
| 802 |
+
|
| 803 |
+
let mut corrupted = data.to_vec();
|
| 804 |
+
corrupted[5] ^= 0xFF; // flip bits in one byte
|
| 805 |
+
assert!(
|
| 806 |
+
!ErasureCoder::verify_hash(&corrupted, h),
|
| 807 |
+
"corrupted data must fail hash check"
|
| 808 |
+
);
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
// -----------------------------------------------------------------------
|
| 812 |
+
// test_encode_empty_data
|
| 813 |
+
// -----------------------------------------------------------------------
|
| 814 |
+
|
| 815 |
+
#[test]
|
| 816 |
+
fn test_encode_empty_data() {
|
| 817 |
+
let coder = ErasureCoder::new(4, 3);
|
| 818 |
+
let fragments = coder.encode(&[]);
|
| 819 |
+
|
| 820 |
+
assert_eq!(fragments.len(), 4);
|
| 821 |
+
for f in &fragments {
|
| 822 |
+
assert_eq!(f.original_size, 0);
|
| 823 |
+
}
|
| 824 |
+
|
| 825 |
+
// Decoding all fragments of empty data should return empty vec
|
| 826 |
+
let recovered = coder.decode(&fragments).expect("empty encode/decode roundtrip");
|
| 827 |
+
assert!(recovered.is_empty(), "empty input should decode to empty vec");
|
| 828 |
+
}
|
| 829 |
+
}
|
|
@@ -0,0 +1,655 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Prediction Gate — KISS overhead reduction for Condensate.
|
| 2 |
+
//!
|
| 3 |
+
//! Confirmed predictions don't get logged. Only surprises teach the substrate.
|
| 4 |
+
//! The cost of running Condensate decreases over time as the substrate learns.
|
| 5 |
+
//! Tighter timing tolerances mean better cache tier targeting.
|
| 6 |
+
//!
|
| 7 |
+
//! Mechanics:
|
| 8 |
+
//! - Each path gets a PathGate that tracks confirmed/surprise/miss counts.
|
| 9 |
+
//! - Timing tolerance starts at 50ms and tightens (×0.95) on each confirmation,
|
| 10 |
+
//! loosens (×1.2) on each surprise, clamped to [2ms, 100ms].
|
| 11 |
+
//! - A ring buffer of recent outcomes drives a burst detector: if the surprise
|
| 12 |
+
//! ratio exceeds `surprise_burst_threshold`, gating is disabled globally until
|
| 13 |
+
//! the ratio drops below threshold × 0.5.
|
| 14 |
+
|
| 15 |
+
use std::collections::HashMap;
|
| 16 |
+
|
| 17 |
+
// ─── Public types ────────────────────────────────────────────────────────────
|
| 18 |
+
|
| 19 |
+
/// A raw memory-access event observed from the system.
|
| 20 |
+
pub struct AccessEvent {
|
| 21 |
+
pub timestamp_ns: u64,
|
| 22 |
+
pub path: String,
|
| 23 |
+
pub size_bytes: u64,
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
/// A live prediction issued by the predictor for an upcoming access.
|
| 27 |
+
pub struct Prediction {
|
| 28 |
+
pub id: u32,
|
| 29 |
+
pub path: String,
|
| 30 |
+
pub confidence: f64,
|
| 31 |
+
pub predicted_at_ns: u64,
|
| 32 |
+
pub expected_delta_ms: f64,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
/// The outcome of running an AccessEvent through the gate.
|
| 36 |
+
pub enum GateOutcome {
|
| 37 |
+
/// The event matched a prediction within timing tolerance.
|
| 38 |
+
Confirmed {
|
| 39 |
+
prediction_id: u32,
|
| 40 |
+
timing_error_ms: f64,
|
| 41 |
+
},
|
| 42 |
+
/// The event was not predicted — teach the substrate.
|
| 43 |
+
Surprise {
|
| 44 |
+
event: AccessEvent,
|
| 45 |
+
},
|
| 46 |
+
/// A prediction window expired without a matching event.
|
| 47 |
+
Miss {
|
| 48 |
+
prediction_id: u32,
|
| 49 |
+
expected_path: String,
|
| 50 |
+
},
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
// ─── Per-path gate ────────────────────────────────────────────────────────────
|
| 54 |
+
|
| 55 |
+
const TOLERANCE_START_MS: f64 = 50.0;
|
| 56 |
+
const TOLERANCE_MIN_MS: f64 = 2.0;
|
| 57 |
+
const TOLERANCE_MAX_MS: f64 = 100.0;
|
| 58 |
+
const TIGHTEN_FACTOR: f64 = 0.95;
|
| 59 |
+
const LOOSEN_FACTOR: f64 = 1.2;
|
| 60 |
+
|
| 61 |
+
/// Per-path state: timing statistics and adaptive tolerance.
|
| 62 |
+
pub struct PathGate {
|
| 63 |
+
pub path_id: u32,
|
| 64 |
+
confirmed_count: u64,
|
| 65 |
+
surprise_count: u64,
|
| 66 |
+
miss_count: u64,
|
| 67 |
+
timing_tolerance_ms: f64,
|
| 68 |
+
gating_enabled: bool,
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
impl PathGate {
|
| 72 |
+
fn new(path_id: u32) -> Self {
|
| 73 |
+
Self {
|
| 74 |
+
path_id,
|
| 75 |
+
confirmed_count: 0,
|
| 76 |
+
surprise_count: 0,
|
| 77 |
+
miss_count: 0,
|
| 78 |
+
timing_tolerance_ms: TOLERANCE_START_MS,
|
| 79 |
+
gating_enabled: true,
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
fn on_confirmed(&mut self) {
|
| 84 |
+
self.confirmed_count += 1;
|
| 85 |
+
self.timing_tolerance_ms =
|
| 86 |
+
(self.timing_tolerance_ms * TIGHTEN_FACTOR).max(TOLERANCE_MIN_MS);
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
fn on_surprise(&mut self) {
|
| 90 |
+
self.surprise_count += 1;
|
| 91 |
+
self.timing_tolerance_ms =
|
| 92 |
+
(self.timing_tolerance_ms * LOOSEN_FACTOR).min(TOLERANCE_MAX_MS);
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
fn on_miss(&mut self) {
|
| 96 |
+
self.miss_count += 1;
|
| 97 |
+
// Decay: treat miss like a mild surprise for tolerance purposes.
|
| 98 |
+
self.timing_tolerance_ms =
|
| 99 |
+
(self.timing_tolerance_ms * LOOSEN_FACTOR).min(TOLERANCE_MAX_MS);
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// ─── Global prediction gate ───────────────────────────────────────────────────
|
| 104 |
+
|
| 105 |
+
/// Global gate that routes events through per-path prediction windows.
|
| 106 |
+
pub struct PredictionGate {
|
| 107 |
+
gates: HashMap<String, PathGate>,
|
| 108 |
+
global_confirmed: u64,
|
| 109 |
+
global_total: u64,
|
| 110 |
+
surprise_burst_threshold: f64,
|
| 111 |
+
window: Vec<bool>, // ring buffer; true = surprise
|
| 112 |
+
window_pos: usize,
|
| 113 |
+
window_size: usize,
|
| 114 |
+
next_path_id: u32,
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
impl PredictionGate {
|
| 118 |
+
// ── Construction ─────────────────────────────────────────────────────────
|
| 119 |
+
|
| 120 |
+
pub fn new(window_size: usize, surprise_burst_threshold: f64) -> Self {
|
| 121 |
+
let window_size = window_size.max(1);
|
| 122 |
+
Self {
|
| 123 |
+
gates: HashMap::new(),
|
| 124 |
+
global_confirmed: 0,
|
| 125 |
+
global_total: 0,
|
| 126 |
+
surprise_burst_threshold,
|
| 127 |
+
window: vec![false; window_size],
|
| 128 |
+
window_pos: 0,
|
| 129 |
+
window_size,
|
| 130 |
+
next_path_id: 0,
|
| 131 |
+
}
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
// ── Core gate check ───────────────────────────────────────────────────────
|
| 135 |
+
|
| 136 |
+
/// Route an event through the active prediction set.
|
| 137 |
+
///
|
| 138 |
+
/// 1. Walk `active_predictions` looking for a path match within timing tolerance.
|
| 139 |
+
/// The first match with the smallest timing error wins → Confirmed.
|
| 140 |
+
/// 2. If no match → Surprise.
|
| 141 |
+
/// 3. Predictions whose window has expired and haven't fired → Miss (returned
|
| 142 |
+
/// separately; callers should call `record_outcome` for each Miss too, but
|
| 143 |
+
/// this function returns the first actionable outcome for the current event).
|
| 144 |
+
///
|
| 145 |
+
/// Note: Miss detection for *stale* predictions is done inside this function
|
| 146 |
+
/// and the returned outcome may be a Miss when `event`'s timestamp reveals that
|
| 147 |
+
/// an earlier prediction has expired. The caller should check the return type.
|
| 148 |
+
pub fn check(&mut self, event: &AccessEvent, active_predictions: &[Prediction]) -> GateOutcome {
|
| 149 |
+
// Look for any predictions that fired (path match + timing window).
|
| 150 |
+
let event_time_ms = event.timestamp_ns as f64 / 1_000_000.0;
|
| 151 |
+
|
| 152 |
+
// Find the best matching prediction for this event's path.
|
| 153 |
+
let gate = self.get_or_create_gate(&event.path);
|
| 154 |
+
let tolerance = gate.timing_tolerance_ms;
|
| 155 |
+
let gating_ok = gate.gating_enabled;
|
| 156 |
+
|
| 157 |
+
// If gating is disabled for this path, treat as surprise.
|
| 158 |
+
if !gating_ok {
|
| 159 |
+
return GateOutcome::Surprise {
|
| 160 |
+
event: AccessEvent {
|
| 161 |
+
timestamp_ns: event.timestamp_ns,
|
| 162 |
+
path: event.path.clone(),
|
| 163 |
+
size_bytes: event.size_bytes,
|
| 164 |
+
},
|
| 165 |
+
};
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
// Scan predictions for a match on this path.
|
| 169 |
+
let mut best_match: Option<(u32, f64)> = None; // (id, timing_error_ms)
|
| 170 |
+
|
| 171 |
+
for pred in active_predictions {
|
| 172 |
+
if pred.path != event.path {
|
| 173 |
+
continue;
|
| 174 |
+
}
|
| 175 |
+
let predicted_fire_ns = pred.predicted_at_ns
|
| 176 |
+
+ (pred.expected_delta_ms * 1_000_000.0) as u64;
|
| 177 |
+
let predicted_fire_ms = predicted_fire_ns as f64 / 1_000_000.0;
|
| 178 |
+
let timing_error_ms = (event_time_ms - predicted_fire_ms).abs();
|
| 179 |
+
|
| 180 |
+
if timing_error_ms <= tolerance {
|
| 181 |
+
match best_match {
|
| 182 |
+
None => best_match = Some((pred.id, timing_error_ms)),
|
| 183 |
+
Some((_, best_err)) if timing_error_ms < best_err => {
|
| 184 |
+
best_match = Some((pred.id, timing_error_ms));
|
| 185 |
+
}
|
| 186 |
+
_ => {}
|
| 187 |
+
}
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
if let Some((pred_id, timing_error_ms)) = best_match {
|
| 192 |
+
return GateOutcome::Confirmed {
|
| 193 |
+
prediction_id: pred_id,
|
| 194 |
+
timing_error_ms,
|
| 195 |
+
};
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
// Check for stale predictions (overdue misses) before declaring Surprise.
|
| 199 |
+
// Return the first expired prediction as a Miss; the event becomes a
|
| 200 |
+
// subsequent call. If none are stale, return Surprise for this event.
|
| 201 |
+
for pred in active_predictions {
|
| 202 |
+
let predicted_fire_ns = pred.predicted_at_ns
|
| 203 |
+
+ (pred.expected_delta_ms * 1_000_000.0) as u64;
|
| 204 |
+
// Allow generous 2× tolerance window before calling a miss.
|
| 205 |
+
let deadline_ns = predicted_fire_ns
|
| 206 |
+
+ (tolerance * 2.0 * 1_000_000.0) as u64;
|
| 207 |
+
if event.timestamp_ns > deadline_ns {
|
| 208 |
+
return GateOutcome::Miss {
|
| 209 |
+
prediction_id: pred.id,
|
| 210 |
+
expected_path: pred.path.clone(),
|
| 211 |
+
};
|
| 212 |
+
}
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
// Nothing matched — genuine surprise.
|
| 216 |
+
GateOutcome::Surprise {
|
| 217 |
+
event: AccessEvent {
|
| 218 |
+
timestamp_ns: event.timestamp_ns,
|
| 219 |
+
path: event.path.clone(),
|
| 220 |
+
size_bytes: event.size_bytes,
|
| 221 |
+
},
|
| 222 |
+
}
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
// ── Outcome recording ─────────────────────────────────────────────────────
|
| 226 |
+
|
| 227 |
+
/// Update internal state based on a gate outcome.
|
| 228 |
+
///
|
| 229 |
+
/// - Confirmed → tighten timing tolerance for the path.
|
| 230 |
+
/// - Surprise → loosen tolerance, mark window slot.
|
| 231 |
+
/// - Miss → decay (loosen) tolerance for the expected path.
|
| 232 |
+
pub fn record_outcome(&mut self, outcome: &GateOutcome) {
|
| 233 |
+
match outcome {
|
| 234 |
+
GateOutcome::Confirmed { prediction_id: _, timing_error_ms: _ } => {
|
| 235 |
+
// We need the path for confirmed — look it up by scanning gates.
|
| 236 |
+
// Since we can't get the path from the outcome alone, the caller
|
| 237 |
+
// must ensure they call check() then record_outcome() in sequence
|
| 238 |
+
// so the path gate was already touched. We update global counters
|
| 239 |
+
// and the ring buffer here; per-path update is done in
|
| 240 |
+
// record_outcome_for_path().
|
| 241 |
+
self.push_window(false);
|
| 242 |
+
self.global_confirmed += 1;
|
| 243 |
+
self.global_total += 1;
|
| 244 |
+
}
|
| 245 |
+
GateOutcome::Surprise { event } => {
|
| 246 |
+
let gate = self.get_or_create_gate(&event.path);
|
| 247 |
+
gate.on_surprise();
|
| 248 |
+
self.push_window(true);
|
| 249 |
+
self.global_total += 1;
|
| 250 |
+
self.check_surprise_burst();
|
| 251 |
+
}
|
| 252 |
+
GateOutcome::Miss { prediction_id: _, expected_path } => {
|
| 253 |
+
// Loosen the gate for the path that missed.
|
| 254 |
+
let path = expected_path.clone();
|
| 255 |
+
let gate = self.get_or_create_gate(&path);
|
| 256 |
+
gate.on_miss();
|
| 257 |
+
// Misses don't go into the surprise window (they're a different
|
| 258 |
+
// signal), but they don't count as confirmations either.
|
| 259 |
+
}
|
| 260 |
+
}
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
/// Per-path confirmed update — call after record_outcome for Confirmed outcomes.
|
| 264 |
+
///
|
| 265 |
+
/// Because GateOutcome::Confirmed doesn't carry the path, the caller must
|
| 266 |
+
/// supply it. This is a deliberate design: the gate is checked per-event and
|
| 267 |
+
/// the path is known at the call site.
|
| 268 |
+
pub fn record_confirmed_for_path(&mut self, path: &str) {
|
| 269 |
+
let gate = self.get_or_create_gate(path);
|
| 270 |
+
gate.on_confirmed();
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
// ── Ratio & burst ─────────────────────────────────────────────────────────
|
| 274 |
+
|
| 275 |
+
/// Fraction of recent window events that were confirmed (1 − surprise_ratio).
|
| 276 |
+
///
|
| 277 |
+
/// Returns 0.0 at cold start (all slots are false = confirmed, but
|
| 278 |
+
/// global_total == 0 means nothing has happened yet).
|
| 279 |
+
pub fn gate_ratio(&self) -> f64 {
|
| 280 |
+
if self.global_total == 0 {
|
| 281 |
+
return 0.0;
|
| 282 |
+
}
|
| 283 |
+
// Count surprises in the window.
|
| 284 |
+
let surprises = self.window.iter().filter(|&&s| s).count();
|
| 285 |
+
let filled = self.global_total.min(self.window_size as u64) as usize;
|
| 286 |
+
if filled == 0 {
|
| 287 |
+
return 0.0;
|
| 288 |
+
}
|
| 289 |
+
let surprise_ratio = surprises as f64 / filled as f64;
|
| 290 |
+
1.0 - surprise_ratio
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
/// Is gating active for a specific path?
|
| 294 |
+
pub fn is_gating_enabled(&self, path: &str) -> bool {
|
| 295 |
+
match self.gates.get(path) {
|
| 296 |
+
Some(g) => g.gating_enabled,
|
| 297 |
+
None => true, // default: enabled (new paths start gated)
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
/// Check the surprise window; disable gating if burst threshold is exceeded,
|
| 302 |
+
/// re-enable if ratio drops below threshold × 0.5.
|
| 303 |
+
///
|
| 304 |
+
/// Returns `true` if gating is currently in burst-disable mode.
|
| 305 |
+
pub fn check_surprise_burst(&mut self) -> bool {
|
| 306 |
+
let filled = self.global_total.min(self.window_size as u64) as usize;
|
| 307 |
+
if filled == 0 {
|
| 308 |
+
return false;
|
| 309 |
+
}
|
| 310 |
+
let surprises = self.window.iter().filter(|&&s| s).count();
|
| 311 |
+
let ratio = surprises as f64 / filled as f64;
|
| 312 |
+
|
| 313 |
+
let in_burst = ratio > self.surprise_burst_threshold;
|
| 314 |
+
let recovered = ratio < self.surprise_burst_threshold * 0.5;
|
| 315 |
+
|
| 316 |
+
for gate in self.gates.values_mut() {
|
| 317 |
+
if in_burst {
|
| 318 |
+
gate.gating_enabled = false;
|
| 319 |
+
} else if recovered {
|
| 320 |
+
gate.gating_enabled = true;
|
| 321 |
+
}
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
in_burst
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
// ── Maintenance ───────────────────────────────────────────────────────────
|
| 328 |
+
|
| 329 |
+
/// Reset a specific path's gate — pattern changed, need to relearn.
|
| 330 |
+
pub fn reset_gate(&mut self, path: &str) {
|
| 331 |
+
if let Some(gate) = self.gates.get_mut(path) {
|
| 332 |
+
gate.confirmed_count = 0;
|
| 333 |
+
gate.surprise_count = 0;
|
| 334 |
+
gate.miss_count = 0;
|
| 335 |
+
gate.timing_tolerance_ms = TOLERANCE_START_MS;
|
| 336 |
+
gate.gating_enabled = true;
|
| 337 |
+
}
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
/// Return `(confirmed, surprise, miss, timing_tolerance_ms)` for a path.
|
| 341 |
+
pub fn get_path_stats(&self, path: &str) -> Option<(u64, u64, u64, f64)> {
|
| 342 |
+
self.gates.get(path).map(|g| {
|
| 343 |
+
(g.confirmed_count, g.surprise_count, g.miss_count, g.timing_tolerance_ms)
|
| 344 |
+
})
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
// ── Internals ─────────────────────────────────────────────────────────────
|
| 348 |
+
|
| 349 |
+
fn get_or_create_gate(&mut self, path: &str) -> &mut PathGate {
|
| 350 |
+
if !self.gates.contains_key(path) {
|
| 351 |
+
let id = self.next_path_id;
|
| 352 |
+
self.next_path_id += 1;
|
| 353 |
+
self.gates.insert(path.to_string(), PathGate::new(id));
|
| 354 |
+
}
|
| 355 |
+
self.gates.get_mut(path).unwrap()
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
fn push_window(&mut self, is_surprise: bool) {
|
| 359 |
+
self.window[self.window_pos] = is_surprise;
|
| 360 |
+
self.window_pos = (self.window_pos + 1) % self.window_size;
|
| 361 |
+
}
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
// ─── Tests ────────────────────────────────────────────────────────────────────
|
| 365 |
+
|
| 366 |
+
#[cfg(test)]
|
| 367 |
+
mod tests {
|
| 368 |
+
use super::*;
|
| 369 |
+
|
| 370 |
+
// Helper: build a prediction that fires at `fire_at_ns`.
|
| 371 |
+
fn make_prediction(id: u32, path: &str, fire_at_ns: u64) -> Prediction {
|
| 372 |
+
Prediction {
|
| 373 |
+
id,
|
| 374 |
+
path: path.to_string(),
|
| 375 |
+
confidence: 0.9,
|
| 376 |
+
predicted_at_ns: fire_at_ns, // expected_delta_ms = 0 → fires immediately
|
| 377 |
+
expected_delta_ms: 0.0,
|
| 378 |
+
}
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
// Helper: build a prediction that fires `delta_ms` after `issued_at_ns`.
|
| 382 |
+
fn make_prediction_delta(
|
| 383 |
+
id: u32,
|
| 384 |
+
path: &str,
|
| 385 |
+
issued_at_ns: u64,
|
| 386 |
+
delta_ms: f64,
|
| 387 |
+
) -> Prediction {
|
| 388 |
+
Prediction {
|
| 389 |
+
id,
|
| 390 |
+
path: path.to_string(),
|
| 391 |
+
confidence: 0.9,
|
| 392 |
+
predicted_at_ns: issued_at_ns,
|
| 393 |
+
expected_delta_ms: delta_ms,
|
| 394 |
+
}
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
fn make_event(path: &str, timestamp_ns: u64) -> AccessEvent {
|
| 398 |
+
AccessEvent {
|
| 399 |
+
timestamp_ns,
|
| 400 |
+
path: path.to_string(),
|
| 401 |
+
size_bytes: 4096,
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
// ── 1. Confirmed prediction is gated ─────────────────────────────────────
|
| 406 |
+
|
| 407 |
+
#[test]
|
| 408 |
+
fn test_gate_confirmed_prediction_gated() {
|
| 409 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 410 |
+
// Prediction: /data/foo fires at t=1_000_000 ns (1 ms).
|
| 411 |
+
// Event arrives at exactly t=1_000_000 ns → timing_error = 0 ms ≤ 50 ms.
|
| 412 |
+
let preds = vec![make_prediction(1, "/data/foo", 1_000_000)];
|
| 413 |
+
let event = make_event("/data/foo", 1_000_000);
|
| 414 |
+
|
| 415 |
+
match gate.check(&event, &preds) {
|
| 416 |
+
GateOutcome::Confirmed { prediction_id, timing_error_ms } => {
|
| 417 |
+
assert_eq!(prediction_id, 1);
|
| 418 |
+
assert!(timing_error_ms < 1.0, "Expected ~0 ms error, got {}", timing_error_ms);
|
| 419 |
+
}
|
| 420 |
+
other => panic!("Expected Confirmed, got {:?}", discriminant_name(&other)),
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
// ── 2. Unpredicted event is a Surprise ────────────────────────────────────
|
| 425 |
+
|
| 426 |
+
#[test]
|
| 427 |
+
fn test_gate_surprise_event() {
|
| 428 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 429 |
+
let preds: Vec<Prediction> = vec![]; // no predictions
|
| 430 |
+
let event = make_event("/unexpected/path", 5_000_000);
|
| 431 |
+
|
| 432 |
+
match gate.check(&event, &preds) {
|
| 433 |
+
GateOutcome::Surprise { event: e } => {
|
| 434 |
+
assert_eq!(e.path, "/unexpected/path");
|
| 435 |
+
}
|
| 436 |
+
other => panic!("Expected Surprise, got {:?}", discriminant_name(&other)),
|
| 437 |
+
}
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
// ── 3. Miss detection ────────────────────────────────────────────────────
|
| 441 |
+
|
| 442 |
+
#[test]
|
| 443 |
+
fn test_gate_miss_detection() {
|
| 444 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 445 |
+
|
| 446 |
+
// Prediction issued at t=0, expected in 10 ms.
|
| 447 |
+
// Event arrives at t=200 ms (far past deadline).
|
| 448 |
+
let preds = vec![make_prediction_delta(42, "/stale/path", 0, 10.0)];
|
| 449 |
+
let late_event = make_event("/other/path", 200_000_000); // 200 ms
|
| 450 |
+
|
| 451 |
+
match gate.check(&late_event, &preds) {
|
| 452 |
+
GateOutcome::Miss { prediction_id, expected_path } => {
|
| 453 |
+
assert_eq!(prediction_id, 42);
|
| 454 |
+
assert_eq!(expected_path, "/stale/path");
|
| 455 |
+
}
|
| 456 |
+
other => panic!("Expected Miss, got {:?}", discriminant_name(&other)),
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
// ── 4. Gate ratio climbs toward 0.9 over stable events ───────────────────
|
| 461 |
+
|
| 462 |
+
#[test]
|
| 463 |
+
fn test_gate_gate_ratio_increases() {
|
| 464 |
+
let window = 200;
|
| 465 |
+
let mut gate = PredictionGate::new(window, 0.3);
|
| 466 |
+
|
| 467 |
+
// Feed 1000 confirmed events into the gate.
|
| 468 |
+
for i in 0u64..1000 {
|
| 469 |
+
let t = i * 1_000_000; // 1 ms apart
|
| 470 |
+
let preds = vec![make_prediction(i as u32, "/stable/path", t)];
|
| 471 |
+
let event = make_event("/stable/path", t);
|
| 472 |
+
|
| 473 |
+
let outcome = gate.check(&event, &preds);
|
| 474 |
+
gate.record_outcome(&outcome);
|
| 475 |
+
gate.record_confirmed_for_path("/stable/path");
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
let ratio = gate.gate_ratio();
|
| 479 |
+
assert!(
|
| 480 |
+
ratio >= 0.85,
|
| 481 |
+
"Expected gate ratio ≥ 0.85 after 1000 stable events, got {:.3}",
|
| 482 |
+
ratio
|
| 483 |
+
);
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
// ── 5. Timing tolerance tightens on repeated confirmations ───────────────
|
| 487 |
+
|
| 488 |
+
#[test]
|
| 489 |
+
fn test_gate_timing_tolerance_tightens() {
|
| 490 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 491 |
+
let path = "/tight/path";
|
| 492 |
+
|
| 493 |
+
// Force 40 confirmations via record_confirmed_for_path.
|
| 494 |
+
for _ in 0..40 {
|
| 495 |
+
gate.record_confirmed_for_path(path);
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
let (_, _, _, tol) = gate.get_path_stats(path).expect("gate should exist");
|
| 499 |
+
// After 40 × 0.95: 50 × 0.95^40 ≈ 6.5 ms (above 2 ms floor).
|
| 500 |
+
assert!(tol < 25.0, "Tolerance should have tightened, got {:.2} ms", tol);
|
| 501 |
+
assert!(tol >= TOLERANCE_MIN_MS, "Tolerance must not go below {} ms", TOLERANCE_MIN_MS);
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
// ── 6. Timing tolerance loosens on surprises ──────────────────────────────
|
| 505 |
+
|
| 506 |
+
#[test]
|
| 507 |
+
fn test_gate_timing_tolerance_loosens() {
|
| 508 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 509 |
+
let path = "/loose/path";
|
| 510 |
+
|
| 511 |
+
// First tighten significantly.
|
| 512 |
+
for _ in 0..30 {
|
| 513 |
+
gate.record_confirmed_for_path(path);
|
| 514 |
+
}
|
| 515 |
+
let (_, _, _, tol_before) = gate.get_path_stats(path).unwrap();
|
| 516 |
+
|
| 517 |
+
// Now inject surprises via record_outcome.
|
| 518 |
+
for i in 0u64..10 {
|
| 519 |
+
let event = AccessEvent {
|
| 520 |
+
timestamp_ns: i * 1_000_000,
|
| 521 |
+
path: path.to_string(),
|
| 522 |
+
size_bytes: 4096,
|
| 523 |
+
};
|
| 524 |
+
gate.record_outcome(&GateOutcome::Surprise { event });
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
let (_, _, _, tol_after) = gate.get_path_stats(path).unwrap();
|
| 528 |
+
assert!(
|
| 529 |
+
tol_after > tol_before,
|
| 530 |
+
"Tolerance should have loosened: before={:.2} after={:.2}",
|
| 531 |
+
tol_before, tol_after
|
| 532 |
+
);
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
// ── 7. Surprise burst disables gating ────────────────────────────────────
|
| 536 |
+
|
| 537 |
+
#[test]
|
| 538 |
+
fn test_gate_surprise_burst_disables_gating() {
|
| 539 |
+
let window = 20;
|
| 540 |
+
let threshold = 0.3;
|
| 541 |
+
let mut gate = PredictionGate::new(window, threshold);
|
| 542 |
+
let path = "/burst/path";
|
| 543 |
+
|
| 544 |
+
// Prime the gate so it exists.
|
| 545 |
+
gate.record_confirmed_for_path(path);
|
| 546 |
+
|
| 547 |
+
// Fill window with surprises (> 30%).
|
| 548 |
+
for i in 0u64..15 {
|
| 549 |
+
let event = AccessEvent {
|
| 550 |
+
timestamp_ns: i * 1_000_000,
|
| 551 |
+
path: path.to_string(),
|
| 552 |
+
size_bytes: 4096,
|
| 553 |
+
};
|
| 554 |
+
gate.record_outcome(&GateOutcome::Surprise { event });
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
// check_surprise_burst should disable gating.
|
| 558 |
+
let burst = gate.check_surprise_burst();
|
| 559 |
+
assert!(burst, "Burst should be detected");
|
| 560 |
+
assert!(
|
| 561 |
+
!gate.is_gating_enabled(path),
|
| 562 |
+
"Gating should be disabled during burst"
|
| 563 |
+
);
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
// ── 8. Gating re-enables after burst subsides ─────────────────────────────
|
| 567 |
+
|
| 568 |
+
#[test]
|
| 569 |
+
fn test_gate_recovery_re_enables_gating() {
|
| 570 |
+
let window = 20;
|
| 571 |
+
let threshold = 0.3;
|
| 572 |
+
let mut gate = PredictionGate::new(window, threshold);
|
| 573 |
+
let path = "/recovery/path";
|
| 574 |
+
|
| 575 |
+
// Prime the gate.
|
| 576 |
+
gate.record_confirmed_for_path(path);
|
| 577 |
+
|
| 578 |
+
// Inject enough surprises to trigger burst.
|
| 579 |
+
for i in 0u64..8 {
|
| 580 |
+
let event = AccessEvent {
|
| 581 |
+
timestamp_ns: i * 1_000_000,
|
| 582 |
+
path: path.to_string(),
|
| 583 |
+
size_bytes: 4096,
|
| 584 |
+
};
|
| 585 |
+
gate.record_outcome(&GateOutcome::Surprise { event });
|
| 586 |
+
}
|
| 587 |
+
gate.check_surprise_burst();
|
| 588 |
+
|
| 589 |
+
// Now flood with confirmed outcomes to push ratio below threshold × 0.5.
|
| 590 |
+
// We need to replace the surprise slots in the ring buffer.
|
| 591 |
+
for i in 0u64..(window as u64) {
|
| 592 |
+
let outcome = GateOutcome::Confirmed {
|
| 593 |
+
prediction_id: i as u32,
|
| 594 |
+
timing_error_ms: 0.5,
|
| 595 |
+
};
|
| 596 |
+
gate.record_outcome(&outcome);
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
let burst = gate.check_surprise_burst();
|
| 600 |
+
assert!(!burst, "Burst should have subsided");
|
| 601 |
+
assert!(
|
| 602 |
+
gate.is_gating_enabled(path),
|
| 603 |
+
"Gating should be re-enabled after recovery"
|
| 604 |
+
);
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
// ── 9. Reset clears path stats ────────────────────────────────────────────
|
| 608 |
+
|
| 609 |
+
#[test]
|
| 610 |
+
fn test_gate_reset_gate() {
|
| 611 |
+
let mut gate = PredictionGate::new(64, 0.3);
|
| 612 |
+
let path = "/reset/path";
|
| 613 |
+
|
| 614 |
+
// Build up some state.
|
| 615 |
+
for _ in 0..20 {
|
| 616 |
+
gate.record_confirmed_for_path(path);
|
| 617 |
+
}
|
| 618 |
+
for i in 0u64..5 {
|
| 619 |
+
let event = AccessEvent {
|
| 620 |
+
timestamp_ns: i * 1_000_000,
|
| 621 |
+
path: path.to_string(),
|
| 622 |
+
size_bytes: 4096,
|
| 623 |
+
};
|
| 624 |
+
gate.record_outcome(&GateOutcome::Surprise { event });
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
let (conf, surp, miss, tol) = gate.get_path_stats(path).unwrap();
|
| 628 |
+
assert!(conf > 0 || surp > 0, "Should have accumulated counts");
|
| 629 |
+
assert!(tol != TOLERANCE_START_MS || conf > 0, "Tolerance should have changed");
|
| 630 |
+
let _ = (miss, tol); // suppress warnings
|
| 631 |
+
|
| 632 |
+
// Reset.
|
| 633 |
+
gate.reset_gate(path);
|
| 634 |
+
|
| 635 |
+
let (conf2, surp2, miss2, tol2) = gate.get_path_stats(path).unwrap();
|
| 636 |
+
assert_eq!(conf2, 0);
|
| 637 |
+
assert_eq!(surp2, 0);
|
| 638 |
+
assert_eq!(miss2, 0);
|
| 639 |
+
assert!(
|
| 640 |
+
(tol2 - TOLERANCE_START_MS).abs() < 0.001,
|
| 641 |
+
"Tolerance should reset to {}ms, got {}ms",
|
| 642 |
+
TOLERANCE_START_MS, tol2
|
| 643 |
+
);
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
// ── Helper: enum variant name for error messages ──────────────────────────
|
| 647 |
+
|
| 648 |
+
fn discriminant_name(outcome: &GateOutcome) -> &'static str {
|
| 649 |
+
match outcome {
|
| 650 |
+
GateOutcome::Confirmed { .. } => "Confirmed",
|
| 651 |
+
GateOutcome::Surprise { .. } => "Surprise",
|
| 652 |
+
GateOutcome::Miss { .. } => "Miss",
|
| 653 |
+
}
|
| 654 |
+
}
|
| 655 |
+
}
|
|
@@ -82,6 +82,19 @@ pub struct NodeInfo {
|
|
| 82 |
pub last_access_ns: u64,
|
| 83 |
}
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
/// The access graph — learns memory access topology.
|
| 86 |
///
|
| 87 |
/// Exposed to Python via PyO3.
|
|
@@ -107,10 +120,7 @@ pub struct AccessGraph {
|
|
| 107 |
cluster_map: Vec<Option<u32>>,
|
| 108 |
}
|
| 109 |
|
| 110 |
-
#[cfg_attr(feature = "python", pymethods)]
|
| 111 |
impl AccessGraph {
|
| 112 |
-
#[cfg_attr(feature = "python", new)]
|
| 113 |
-
#[cfg_attr(feature = "python", pyo3(signature = (causal_window_ns=5_000_000, cluster_threshold=0.7)))]
|
| 114 |
pub fn new(causal_window_ns: u64, cluster_threshold: f64) -> Self {
|
| 115 |
Self {
|
| 116 |
path_to_id: HashMap::new(),
|
|
@@ -197,11 +207,6 @@ impl AccessGraph {
|
|
| 197 |
self.edges.len()
|
| 198 |
}
|
| 199 |
|
| 200 |
-
/// Get strong edge count (weight >= threshold).
|
| 201 |
-
fn strong_edge_count(&self, min_weight: f64) -> usize {
|
| 202 |
-
self.edges.values().filter(|e| e.weight >= min_weight).count()
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
/// Get cluster count.
|
| 206 |
pub fn cluster_count(&self) -> usize {
|
| 207 |
self.clusters.len()
|
|
@@ -214,8 +219,51 @@ impl AccessGraph {
|
|
| 214 |
.collect()
|
| 215 |
}
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
/// Get top edges by weight as (source_path, target_path, count, mean_delta_ms, weight).
|
| 218 |
-
fn get_top_edges(&self, limit: usize) -> Vec<(String, String, u32, f64, f64)> {
|
| 219 |
let mut edges: Vec<_> = self.edges.values().collect();
|
| 220 |
edges.sort_by(|a, b| b.weight.partial_cmp(&a.weight).unwrap());
|
| 221 |
edges.iter()
|
|
@@ -228,14 +276,6 @@ impl AccessGraph {
|
|
| 228 |
.collect()
|
| 229 |
}
|
| 230 |
|
| 231 |
-
/// Check if graph has been built.
|
| 232 |
-
fn is_built(&self) -> bool {
|
| 233 |
-
self.built
|
| 234 |
-
}
|
| 235 |
-
}
|
| 236 |
-
|
| 237 |
-
// Non-PyO3 internal methods
|
| 238 |
-
impl AccessGraph {
|
| 239 |
fn get_or_create_node(&mut self, path: &str) -> u32 {
|
| 240 |
if let Some(&id) = self.path_to_id.get(path) {
|
| 241 |
return id;
|
|
@@ -259,26 +299,19 @@ impl AccessGraph {
|
|
| 259 |
return;
|
| 260 |
}
|
| 261 |
|
| 262 |
-
// Build
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
*cocount.entry((*tgt, *src)).or_default() += edge.count;
|
| 267 |
-
}
|
| 268 |
-
|
| 269 |
-
// Build adjacency from pairs above threshold
|
| 270 |
let mut adjacency: Vec<Vec<u32>> = vec![Vec::new(); n];
|
| 271 |
-
for
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
adjacency[i].push(j as u32);
|
| 280 |
-
adjacency[j].push(i as u32);
|
| 281 |
-
}
|
| 282 |
}
|
| 283 |
}
|
| 284 |
|
|
@@ -374,6 +407,42 @@ impl AccessGraph {
|
|
| 374 |
self.nodes.get(id as usize).map(|n| n.path.as_str())
|
| 375 |
}
|
| 376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
/// Get node ID for a path.
|
| 378 |
pub fn get_id(&self, path: &str) -> Option<u32> {
|
| 379 |
self.path_to_id.get(path).copied()
|
|
|
|
| 82 |
pub last_access_ns: u64,
|
| 83 |
}
|
| 84 |
|
| 85 |
+
/// Holographic node boundary — lightweight representation for cold nodes.
|
| 86 |
+
/// Fixed size, no heap allocation. Enough for Lenia temperature management,
|
| 87 |
+
/// cluster membership checks, and promotion decisions.
|
| 88 |
+
/// Full NodeInfo is reconstructed from the path_to_id map only when needed.
|
| 89 |
+
#[derive(Clone, Copy, Debug)]
|
| 90 |
+
pub struct NodeBoundary {
|
| 91 |
+
pub id: u32,
|
| 92 |
+
pub access_count: u32,
|
| 93 |
+
pub last_access_ns: u64,
|
| 94 |
+
pub cluster_id: Option<u32>,
|
| 95 |
+
pub edge_count: u16,
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
/// The access graph — learns memory access topology.
|
| 99 |
///
|
| 100 |
/// Exposed to Python via PyO3.
|
|
|
|
| 120 |
cluster_map: Vec<Option<u32>>,
|
| 121 |
}
|
| 122 |
|
|
|
|
| 123 |
impl AccessGraph {
|
|
|
|
|
|
|
| 124 |
pub fn new(causal_window_ns: u64, cluster_threshold: f64) -> Self {
|
| 125 |
Self {
|
| 126 |
path_to_id: HashMap::new(),
|
|
|
|
| 207 |
self.edges.len()
|
| 208 |
}
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
/// Get cluster count.
|
| 211 |
pub fn cluster_count(&self) -> usize {
|
| 212 |
self.clusters.len()
|
|
|
|
| 219 |
.collect()
|
| 220 |
}
|
| 221 |
|
| 222 |
+
/// Check if graph has been built.
|
| 223 |
+
pub fn is_built(&self) -> bool {
|
| 224 |
+
self.built
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
#[cfg(feature = "python")]
|
| 229 |
+
#[pymethods]
|
| 230 |
+
impl AccessGraph {
|
| 231 |
+
#[new]
|
| 232 |
+
#[pyo3(signature = (causal_window_ns=5_000_000, cluster_threshold=0.7))]
|
| 233 |
+
fn py_new(causal_window_ns: u64, cluster_threshold: f64) -> Self {
|
| 234 |
+
Self::new(causal_window_ns, cluster_threshold)
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
#[pyo3(name = "build")]
|
| 238 |
+
fn py_build(&mut self, events: Vec<(u64, String, u64)>) {
|
| 239 |
+
self.build(events);
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
#[pyo3(name = "node_count")]
|
| 243 |
+
fn py_node_count(&self) -> usize {
|
| 244 |
+
self.node_count()
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
#[pyo3(name = "edge_count")]
|
| 248 |
+
fn py_edge_count(&self) -> usize {
|
| 249 |
+
self.edge_count()
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
#[pyo3(name = "cluster_count")]
|
| 253 |
+
fn py_cluster_count(&self) -> usize {
|
| 254 |
+
self.cluster_count()
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
#[pyo3(name = "get_node_stats")]
|
| 258 |
+
fn py_get_node_stats(&self) -> Vec<(String, u32)> {
|
| 259 |
+
self.get_node_stats()
|
| 260 |
+
}
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
// Non-PyO3 internal methods
|
| 264 |
+
impl AccessGraph {
|
| 265 |
/// Get top edges by weight as (source_path, target_path, count, mean_delta_ms, weight).
|
| 266 |
+
pub fn get_top_edges(&self, limit: usize) -> Vec<(String, String, u32, f64, f64)> {
|
| 267 |
let mut edges: Vec<_> = self.edges.values().collect();
|
| 268 |
edges.sort_by(|a, b| b.weight.partial_cmp(&a.weight).unwrap());
|
| 269 |
edges.iter()
|
|
|
|
| 276 |
.collect()
|
| 277 |
}
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
fn get_or_create_node(&mut self, path: &str) -> u32 {
|
| 280 |
if let Some(&id) = self.path_to_id.get(path) {
|
| 281 |
return id;
|
|
|
|
| 299 |
return;
|
| 300 |
}
|
| 301 |
|
| 302 |
+
// Build adjacency directly from edges — O(E), not O(N²).
|
| 303 |
+
// Only node pairs that actually have causal edges get compared.
|
| 304 |
+
// The edges are the evidence; pairs without edges have no
|
| 305 |
+
// co-access relationship and can't be in the same cluster.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
let mut adjacency: Vec<Vec<u32>> = vec![Vec::new(); n];
|
| 307 |
+
for ((src, tgt), edge) in &self.edges {
|
| 308 |
+
let min_count = self.nodes[*src as usize].access_count
|
| 309 |
+
.min(self.nodes[*tgt as usize].access_count)
|
| 310 |
+
.max(1);
|
| 311 |
+
let ratio = edge.count as f64 / min_count as f64;
|
| 312 |
+
if ratio >= self.cluster_threshold {
|
| 313 |
+
adjacency[*src as usize].push(*tgt);
|
| 314 |
+
adjacency[*tgt as usize].push(*src);
|
|
|
|
|
|
|
|
|
|
| 315 |
}
|
| 316 |
}
|
| 317 |
|
|
|
|
| 407 |
self.nodes.get(id as usize).map(|n| n.path.as_str())
|
| 408 |
}
|
| 409 |
|
| 410 |
+
/// Get holographic boundary for a node — lightweight, no heap allocation.
|
| 411 |
+
/// Enough for temperature management and promotion decisions.
|
| 412 |
+
pub fn get_boundary(&self, id: u32) -> Option<NodeBoundary> {
|
| 413 |
+
let node = self.nodes.get(id as usize)?;
|
| 414 |
+
let edge_count = self.edges.iter()
|
| 415 |
+
.filter(|((s, t), _)| *s == id || *t == id)
|
| 416 |
+
.count() as u16;
|
| 417 |
+
Some(NodeBoundary {
|
| 418 |
+
id: node.id,
|
| 419 |
+
access_count: node.access_count,
|
| 420 |
+
last_access_ns: node.last_access_ns,
|
| 421 |
+
cluster_id: self.cluster_map.get(id as usize).and_then(|c| *c),
|
| 422 |
+
edge_count,
|
| 423 |
+
})
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
/// Get boundaries for all nodes — bulk operation for Lenia field seeding.
|
| 427 |
+
/// O(N + E) — scans edges once to count per-node.
|
| 428 |
+
pub fn get_all_boundaries(&self) -> Vec<NodeBoundary> {
|
| 429 |
+
let n = self.nodes.len();
|
| 430 |
+
let mut edge_counts = vec![0u16; n];
|
| 431 |
+
for ((s, t), _) in &self.edges {
|
| 432 |
+
if (*s as usize) < n { edge_counts[*s as usize] = edge_counts[*s as usize].saturating_add(1); }
|
| 433 |
+
if (*t as usize) < n { edge_counts[*t as usize] = edge_counts[*t as usize].saturating_add(1); }
|
| 434 |
+
}
|
| 435 |
+
self.nodes.iter().enumerate().map(|(i, node)| {
|
| 436 |
+
NodeBoundary {
|
| 437 |
+
id: node.id,
|
| 438 |
+
access_count: node.access_count,
|
| 439 |
+
last_access_ns: node.last_access_ns,
|
| 440 |
+
cluster_id: self.cluster_map.get(i).and_then(|c| *c),
|
| 441 |
+
edge_count: edge_counts[i],
|
| 442 |
+
}
|
| 443 |
+
}).collect()
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
/// Get node ID for a path.
|
| 447 |
pub fn get_id(&self, path: &str) -> Option<u32> {
|
| 448 |
self.path_to_id.get(path).copied()
|
|
@@ -0,0 +1,552 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Keyframe/Delta Encoding — video codec model applied to memory.
|
| 2 |
+
//!
|
| 3 |
+
//! Instead of storing full snapshots repeatedly, store one compressed
|
| 4 |
+
//! keyframe + tiny sparse diffs (deltas). A 64KB region where only
|
| 5 |
+
//! 200 bytes changed produces a ~200-byte delta, not another 64KB copy.
|
| 6 |
+
//!
|
| 7 |
+
//! Design:
|
| 8 |
+
//! - Keyframes are LZ4-compressed full snapshots.
|
| 9 |
+
//! - Deltas are sparse: (offset, changed_bytes) pairs produced by
|
| 10 |
+
//! XOR-walking the current data against the keyframe baseline.
|
| 11 |
+
//! - Reconstruction applies all deltas in sequence.
|
| 12 |
+
//! - After enough deltas (or enough idle observation cycles), the
|
| 13 |
+
//! store can consolidate or mark a frame read-only.
|
| 14 |
+
|
| 15 |
+
use std::collections::HashMap;
|
| 16 |
+
|
| 17 |
+
// ---------------------------------------------------------------------------
|
| 18 |
+
// Simple FNV-1a-style hash — no external dep required
|
| 19 |
+
// ---------------------------------------------------------------------------
|
| 20 |
+
|
| 21 |
+
fn hash_bytes(data: &[u8]) -> u64 {
|
| 22 |
+
let mut h: u64 = 0xcbf29ce484222325;
|
| 23 |
+
for &b in data {
|
| 24 |
+
h ^= b as u64;
|
| 25 |
+
h = h.wrapping_mul(0x100000001b3);
|
| 26 |
+
}
|
| 27 |
+
h
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
// ---------------------------------------------------------------------------
|
| 31 |
+
// Delta
|
| 32 |
+
// ---------------------------------------------------------------------------
|
| 33 |
+
|
| 34 |
+
/// A sparse record of bytes that changed relative to the keyframe baseline.
|
| 35 |
+
///
|
| 36 |
+
/// `changed_ranges` is a list of `(offset, changed_bytes)` pairs.
|
| 37 |
+
/// Only non-zero XOR regions are stored, so a 64KB region with 10
|
| 38 |
+
/// changed bytes results in roughly 10 bytes of delta payload.
|
| 39 |
+
pub struct Delta {
|
| 40 |
+
pub id: u32,
|
| 41 |
+
pub timestamp_ns: u64,
|
| 42 |
+
/// Sparse changed ranges: (byte offset into original, changed bytes)
|
| 43 |
+
pub changed_ranges: Vec<(usize, Vec<u8>)>,
|
| 44 |
+
/// Total payload bytes across all ranges (useful for budgeting)
|
| 45 |
+
pub cumulative_change_bytes: usize,
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
impl Delta {
|
| 49 |
+
/// Apply this delta onto a mutable buffer (which must be at least as
|
| 50 |
+
/// large as the keyframe's original data).
|
| 51 |
+
fn apply(&self, buf: &mut [u8]) {
|
| 52 |
+
for (offset, bytes) in &self.changed_ranges {
|
| 53 |
+
let end = offset + bytes.len();
|
| 54 |
+
if end <= buf.len() {
|
| 55 |
+
buf[*offset..end].copy_from_slice(bytes);
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
/// Does this delta touch the half-open byte range `[range_start, range_end)`?
|
| 61 |
+
fn touches_range(&self, range_start: usize, range_end: usize) -> bool {
|
| 62 |
+
for (offset, bytes) in &self.changed_ranges {
|
| 63 |
+
let end = offset + bytes.len();
|
| 64 |
+
// Ranges overlap when start < other_end && end > other_start
|
| 65 |
+
if *offset < range_end && end > range_start {
|
| 66 |
+
return true;
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
false
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
// ---------------------------------------------------------------------------
|
| 74 |
+
// Keyframe
|
| 75 |
+
// ---------------------------------------------------------------------------
|
| 76 |
+
|
| 77 |
+
/// A compressed full snapshot with an attached chain of sparse deltas.
|
| 78 |
+
pub struct Keyframe {
|
| 79 |
+
pub id: u32,
|
| 80 |
+
/// LZ4-compressed bytes of the original snapshot
|
| 81 |
+
compressed_data: Vec<u8>,
|
| 82 |
+
/// Byte length before compression (needed for decompression)
|
| 83 |
+
original_size: usize,
|
| 84 |
+
/// Integrity hash over the original uncompressed bytes
|
| 85 |
+
original_hash: u64,
|
| 86 |
+
/// Ordered chain of deltas recorded after this keyframe was taken
|
| 87 |
+
deltas: Vec<Delta>,
|
| 88 |
+
/// When true, no further deltas are expected (memory went cold)
|
| 89 |
+
pub is_read_only: bool,
|
| 90 |
+
/// How many `mark_observation_cycle` calls have fired with no new delta
|
| 91 |
+
observation_cycles: u32,
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
impl Keyframe {
|
| 95 |
+
fn new(id: u32, data: &[u8]) -> Self {
|
| 96 |
+
let original_hash = hash_bytes(data);
|
| 97 |
+
let compressed_data = lz4_flex::compress_prepend_size(data);
|
| 98 |
+
Self {
|
| 99 |
+
id,
|
| 100 |
+
compressed_data,
|
| 101 |
+
original_size: data.len(),
|
| 102 |
+
original_hash,
|
| 103 |
+
deltas: Vec::new(),
|
| 104 |
+
is_read_only: false,
|
| 105 |
+
observation_cycles: 0,
|
| 106 |
+
}
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
/// Decompress the keyframe back to its original bytes.
|
| 110 |
+
fn decompress(&self) -> Option<Vec<u8>> {
|
| 111 |
+
lz4_flex::decompress_size_prepended(&self.compressed_data).ok()
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
/// Reconstruct the full data by decompressing then replaying all deltas.
|
| 115 |
+
fn reconstruct(&self) -> Option<Vec<u8>> {
|
| 116 |
+
let mut buf = self.decompress()?;
|
| 117 |
+
for delta in &self.deltas {
|
| 118 |
+
delta.apply(&mut buf);
|
| 119 |
+
}
|
| 120 |
+
Some(buf)
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/// Reconstruct only the slice `[offset, offset+length)`.
|
| 124 |
+
///
|
| 125 |
+
/// We still have to decompress the whole keyframe because LZ4 is not
|
| 126 |
+
/// randomly-accessible, but we only apply deltas that actually touch
|
| 127 |
+
/// the requested range, which is cheaper for large delta chains.
|
| 128 |
+
fn reconstruct_range(&self, offset: usize, length: usize) -> Option<Vec<u8>> {
|
| 129 |
+
let range_end = offset.checked_add(length)?;
|
| 130 |
+
if range_end > self.original_size {
|
| 131 |
+
return None;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
let mut buf = self.decompress()?;
|
| 135 |
+
|
| 136 |
+
// Only replay deltas that overlap the requested range
|
| 137 |
+
for delta in &self.deltas {
|
| 138 |
+
if delta.touches_range(offset, range_end) {
|
| 139 |
+
delta.apply(&mut buf);
|
| 140 |
+
}
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
Some(buf[offset..range_end].to_vec())
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
/// Build a sparse delta from `current_data` vs the keyframe baseline.
|
| 147 |
+
///
|
| 148 |
+
/// XOR walk: collect contiguous runs where XOR != 0 into
|
| 149 |
+
/// (offset, actual_bytes_from_current) pairs.
|
| 150 |
+
/// Returns `None` when there are no changes at all.
|
| 151 |
+
fn build_delta(&self, id: u32, timestamp_ns: u64, current_data: &[u8]) -> Option<Delta> {
|
| 152 |
+
let baseline = self.decompress()?;
|
| 153 |
+
// Apply existing deltas so we diff against the *current* logical state,
|
| 154 |
+
// not just the raw keyframe bytes.
|
| 155 |
+
let mut logical = baseline;
|
| 156 |
+
for d in &self.deltas {
|
| 157 |
+
d.apply(&mut logical);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
let cmp_len = logical.len().min(current_data.len());
|
| 161 |
+
let mut changed_ranges: Vec<(usize, Vec<u8>)> = Vec::new();
|
| 162 |
+
|
| 163 |
+
let mut i = 0;
|
| 164 |
+
while i < cmp_len {
|
| 165 |
+
if logical[i] != current_data[i] {
|
| 166 |
+
// Start of a changed run
|
| 167 |
+
let run_start = i;
|
| 168 |
+
let mut run: Vec<u8> = Vec::new();
|
| 169 |
+
while i < cmp_len && logical[i] != current_data[i] {
|
| 170 |
+
run.push(current_data[i]);
|
| 171 |
+
i += 1;
|
| 172 |
+
}
|
| 173 |
+
changed_ranges.push((run_start, run));
|
| 174 |
+
} else {
|
| 175 |
+
i += 1;
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
// Handle the case where current_data is longer than logical
|
| 180 |
+
if current_data.len() > logical.len() {
|
| 181 |
+
let tail = current_data[logical.len()..].to_vec();
|
| 182 |
+
changed_ranges.push((logical.len(), tail));
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
if changed_ranges.is_empty() {
|
| 186 |
+
return None;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
let cumulative_change_bytes = changed_ranges.iter().map(|(_, v)| v.len()).sum();
|
| 190 |
+
Some(Delta {
|
| 191 |
+
id,
|
| 192 |
+
timestamp_ns,
|
| 193 |
+
changed_ranges,
|
| 194 |
+
cumulative_change_bytes,
|
| 195 |
+
})
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// ---------------------------------------------------------------------------
|
| 200 |
+
// KeyframeStore
|
| 201 |
+
// ---------------------------------------------------------------------------
|
| 202 |
+
|
| 203 |
+
/// Central store for all keyframes and their delta chains.
|
| 204 |
+
pub struct KeyframeStore {
|
| 205 |
+
frames: HashMap<u32, Keyframe>,
|
| 206 |
+
next_id: u32,
|
| 207 |
+
/// Maximum number of deltas before `record_delta` auto-consolidates
|
| 208 |
+
pub consolidation_threshold: usize,
|
| 209 |
+
/// Number of observation cycles with no deltas before marking read-only
|
| 210 |
+
pub read_only_threshold: u32,
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
impl KeyframeStore {
|
| 214 |
+
pub fn new(consolidation_threshold: usize, read_only_threshold: u32) -> Self {
|
| 215 |
+
Self {
|
| 216 |
+
frames: HashMap::new(),
|
| 217 |
+
next_id: 0,
|
| 218 |
+
consolidation_threshold,
|
| 219 |
+
read_only_threshold,
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
// -----------------------------------------------------------------------
|
| 224 |
+
// Core API
|
| 225 |
+
// -----------------------------------------------------------------------
|
| 226 |
+
|
| 227 |
+
/// Compress `data` as a new keyframe and return its ID.
|
| 228 |
+
pub fn take_keyframe(&mut self, data: &[u8]) -> u32 {
|
| 229 |
+
let id = self.next_id;
|
| 230 |
+
self.next_id += 1;
|
| 231 |
+
self.frames.insert(id, Keyframe::new(id, data));
|
| 232 |
+
id
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
/// Record a delta for keyframe `id` vs `current_data`.
|
| 236 |
+
///
|
| 237 |
+
/// Only the changed bytes are stored (sparse). If nothing changed,
|
| 238 |
+
/// `None` is returned and nothing is stored. When the delta chain
|
| 239 |
+
/// reaches `consolidation_threshold`, the frame is automatically
|
| 240 |
+
/// consolidated before the new delta is appended.
|
| 241 |
+
///
|
| 242 |
+
/// Returns the delta ID on success.
|
| 243 |
+
pub fn record_delta(&mut self, id: u32, current_data: &[u8]) -> Option<u32> {
|
| 244 |
+
// Build the delta first (immutable borrow ends before we mutate)
|
| 245 |
+
let (delta_id, delta) = {
|
| 246 |
+
let frame = self.frames.get(&id)?;
|
| 247 |
+
if frame.is_read_only {
|
| 248 |
+
return None;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
let delta_id = frame.deltas.len() as u32;
|
| 252 |
+
let ts = std::time::SystemTime::now()
|
| 253 |
+
.duration_since(std::time::UNIX_EPOCH)
|
| 254 |
+
.map(|d| d.as_nanos() as u64)
|
| 255 |
+
.unwrap_or(0);
|
| 256 |
+
|
| 257 |
+
let delta = frame.build_delta(delta_id, ts, current_data)?;
|
| 258 |
+
(delta_id, delta)
|
| 259 |
+
};
|
| 260 |
+
|
| 261 |
+
// Auto-consolidate if we hit the threshold
|
| 262 |
+
{
|
| 263 |
+
let frame = self.frames.get(&id)?;
|
| 264 |
+
if frame.deltas.len() >= self.consolidation_threshold {
|
| 265 |
+
// We need to consolidate; do it before appending
|
| 266 |
+
let _ = frame; // end borrow (drop reference, not value)
|
| 267 |
+
self.consolidate(id);
|
| 268 |
+
}
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
let frame = self.frames.get_mut(&id)?;
|
| 272 |
+
frame.observation_cycles = 0; // activity resets the counter
|
| 273 |
+
frame.deltas.push(delta);
|
| 274 |
+
Some(delta_id)
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
/// Reconstruct the full logical data for keyframe `id`.
|
| 278 |
+
pub fn reconstruct(&self, id: u32) -> Option<Vec<u8>> {
|
| 279 |
+
self.frames.get(&id)?.reconstruct()
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
/// Reconstruct only `length` bytes starting at `offset` for keyframe `id`.
|
| 283 |
+
pub fn reconstruct_range(&self, id: u32, offset: usize, length: usize) -> Option<Vec<u8>> {
|
| 284 |
+
self.frames.get(&id)?.reconstruct_range(offset, length)
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
/// Fold the full delta chain back into a fresh compressed keyframe,
|
| 288 |
+
/// resetting the delta chain to empty.
|
| 289 |
+
pub fn consolidate(&mut self, id: u32) {
|
| 290 |
+
let reconstructed = match self.frames.get(&id).and_then(|f| f.reconstruct()) {
|
| 291 |
+
Some(data) => data,
|
| 292 |
+
None => return,
|
| 293 |
+
};
|
| 294 |
+
|
| 295 |
+
if let Some(frame) = self.frames.get_mut(&id) {
|
| 296 |
+
let hash_before = frame.original_hash;
|
| 297 |
+
// Rebuild from scratch: fresh LZ4 + empty delta chain
|
| 298 |
+
let new_compressed = lz4_flex::compress_prepend_size(&reconstructed);
|
| 299 |
+
frame.compressed_data = new_compressed;
|
| 300 |
+
frame.original_size = reconstructed.len();
|
| 301 |
+
frame.original_hash = hash_bytes(&reconstructed);
|
| 302 |
+
frame.deltas.clear();
|
| 303 |
+
let _ = hash_before; // hash of original keyframe no longer relevant
|
| 304 |
+
}
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
/// Check (and apply) the read-only transition for keyframe `id`.
|
| 308 |
+
///
|
| 309 |
+
/// Returns `true` if the frame is now (or was already) read-only.
|
| 310 |
+
pub fn check_read_only(&mut self, id: u32) -> bool {
|
| 311 |
+
if let Some(frame) = self.frames.get_mut(&id) {
|
| 312 |
+
if !frame.is_read_only
|
| 313 |
+
&& frame.deltas.is_empty()
|
| 314 |
+
&& frame.observation_cycles >= self.read_only_threshold
|
| 315 |
+
{
|
| 316 |
+
frame.is_read_only = true;
|
| 317 |
+
}
|
| 318 |
+
frame.is_read_only
|
| 319 |
+
} else {
|
| 320 |
+
false
|
| 321 |
+
}
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/// Increment the observation counter for keyframe `id`.
|
| 325 |
+
///
|
| 326 |
+
/// Call this on every "tick" or scan cycle. The counter only advances
|
| 327 |
+
/// when there are no new deltas (activity resets it to zero in
|
| 328 |
+
/// `record_delta`). After `read_only_threshold` idle cycles the frame
|
| 329 |
+
/// transitions to read-only via `check_read_only`.
|
| 330 |
+
pub fn mark_observation_cycle(&mut self, id: u32) {
|
| 331 |
+
if let Some(frame) = self.frames.get_mut(&id) {
|
| 332 |
+
if !frame.is_read_only {
|
| 333 |
+
frame.observation_cycles += 1;
|
| 334 |
+
// Automatically apply the transition check each cycle
|
| 335 |
+
if frame.deltas.is_empty()
|
| 336 |
+
&& frame.observation_cycles >= self.read_only_threshold
|
| 337 |
+
{
|
| 338 |
+
frame.is_read_only = true;
|
| 339 |
+
}
|
| 340 |
+
}
|
| 341 |
+
}
|
| 342 |
+
}
|
| 343 |
+
|
| 344 |
+
// -----------------------------------------------------------------------
|
| 345 |
+
// Accessors / diagnostics
|
| 346 |
+
// -----------------------------------------------------------------------
|
| 347 |
+
|
| 348 |
+
pub fn delta_count(&self, id: u32) -> usize {
|
| 349 |
+
self.frames.get(&id).map(|f| f.deltas.len()).unwrap_or(0)
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
pub fn is_read_only(&self, id: u32) -> bool {
|
| 353 |
+
self.frames.get(&id).map(|f| f.is_read_only).unwrap_or(false)
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
pub fn original_hash(&self, id: u32) -> Option<u64> {
|
| 357 |
+
self.frames.get(&id).map(|f| f.original_hash)
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
pub fn frame_count(&self) -> usize {
|
| 361 |
+
self.frames.len()
|
| 362 |
+
}
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
// ---------------------------------------------------------------------------
|
| 366 |
+
// Tests
|
| 367 |
+
// ---------------------------------------------------------------------------
|
| 368 |
+
|
| 369 |
+
#[cfg(test)]
|
| 370 |
+
mod tests {
|
| 371 |
+
use super::*;
|
| 372 |
+
|
| 373 |
+
fn make_store() -> KeyframeStore {
|
| 374 |
+
KeyframeStore::new(10, 3)
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
// -----------------------------------------------------------------------
|
| 378 |
+
// test_keyframe_roundtrip
|
| 379 |
+
// -----------------------------------------------------------------------
|
| 380 |
+
|
| 381 |
+
#[test]
|
| 382 |
+
fn test_keyframe_roundtrip() {
|
| 383 |
+
let mut store = make_store();
|
| 384 |
+
let original: Vec<u8> = (0..=255u8).cycle().take(4096).collect();
|
| 385 |
+
|
| 386 |
+
let id = store.take_keyframe(&original);
|
| 387 |
+
let restored = store.reconstruct(id).expect("reconstruct should succeed");
|
| 388 |
+
|
| 389 |
+
assert_eq!(restored, original, "Roundtrip must be byte-identical");
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
// -----------------------------------------------------------------------
|
| 393 |
+
// test_delta_captures_changes
|
| 394 |
+
// -----------------------------------------------------------------------
|
| 395 |
+
|
| 396 |
+
#[test]
|
| 397 |
+
fn test_delta_captures_changes() {
|
| 398 |
+
let mut store = make_store();
|
| 399 |
+
|
| 400 |
+
// 64KB baseline of 0xAA bytes
|
| 401 |
+
let baseline = vec![0xAAu8; 65_536];
|
| 402 |
+
let id = store.take_keyframe(&baseline);
|
| 403 |
+
|
| 404 |
+
// Modify exactly 10 bytes near offset 1000
|
| 405 |
+
let mut modified = baseline.clone();
|
| 406 |
+
for i in 0..10 {
|
| 407 |
+
modified[1000 + i] = 0xFF;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
let delta_id = store.record_delta(id, &modified)
|
| 411 |
+
.expect("Should store a non-empty delta");
|
| 412 |
+
assert_eq!(delta_id, 0);
|
| 413 |
+
|
| 414 |
+
// Inspect the delta payload size — must be ≈ 10 bytes, not 64KB
|
| 415 |
+
let frame = &store.frames[&id];
|
| 416 |
+
let delta = &frame.deltas[0];
|
| 417 |
+
assert_eq!(delta.cumulative_change_bytes, 10,
|
| 418 |
+
"Delta payload must be sparse (~10 bytes), got {}",
|
| 419 |
+
delta.cumulative_change_bytes);
|
| 420 |
+
|
| 421 |
+
// Reconstruction must match the modified data
|
| 422 |
+
let restored = store.reconstruct(id).expect("reconstruct");
|
| 423 |
+
assert_eq!(restored, modified);
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// -----------------------------------------------------------------------
|
| 427 |
+
// test_multi_delta_reconstruction
|
| 428 |
+
// -----------------------------------------------------------------------
|
| 429 |
+
|
| 430 |
+
#[test]
|
| 431 |
+
fn test_multi_delta_reconstruction() {
|
| 432 |
+
let mut store = make_store();
|
| 433 |
+
|
| 434 |
+
let mut data: Vec<u8> = vec![0u8; 8192];
|
| 435 |
+
let id = store.take_keyframe(&data);
|
| 436 |
+
|
| 437 |
+
// Apply 5 successive mutations, recording a delta after each
|
| 438 |
+
for step in 0u8..5 {
|
| 439 |
+
let offset = (step as usize) * 100;
|
| 440 |
+
data[offset] = step + 1;
|
| 441 |
+
store.record_delta(id, &data)
|
| 442 |
+
.expect("non-empty delta expected");
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
assert_eq!(store.delta_count(id), 5);
|
| 446 |
+
|
| 447 |
+
let restored = store.reconstruct(id).expect("reconstruct");
|
| 448 |
+
assert_eq!(restored, data, "Multi-delta reconstruction must match final state");
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
// -----------------------------------------------------------------------
|
| 452 |
+
// test_consolidation_resets_deltas
|
| 453 |
+
// -----------------------------------------------------------------------
|
| 454 |
+
|
| 455 |
+
#[test]
|
| 456 |
+
fn test_consolidation_resets_deltas() {
|
| 457 |
+
let mut store = make_store();
|
| 458 |
+
|
| 459 |
+
let mut data = vec![0u8; 4096];
|
| 460 |
+
let id = store.take_keyframe(&data);
|
| 461 |
+
|
| 462 |
+
// Record a few deltas
|
| 463 |
+
for i in 0u8..3 {
|
| 464 |
+
data[i as usize * 50] = i + 10;
|
| 465 |
+
store.record_delta(id, &data).unwrap();
|
| 466 |
+
}
|
| 467 |
+
assert_eq!(store.delta_count(id), 3);
|
| 468 |
+
|
| 469 |
+
store.consolidate(id);
|
| 470 |
+
|
| 471 |
+
assert_eq!(store.delta_count(id), 0, "Consolidation must clear the delta chain");
|
| 472 |
+
|
| 473 |
+
// Reconstruction after consolidation must still produce the correct data
|
| 474 |
+
let restored = store.reconstruct(id).expect("reconstruct after consolidate");
|
| 475 |
+
assert_eq!(restored, data, "Data must survive consolidation");
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
// -----------------------------------------------------------------------
|
| 479 |
+
// test_read_only_detection
|
| 480 |
+
// -----------------------------------------------------------------------
|
| 481 |
+
|
| 482 |
+
#[test]
|
| 483 |
+
fn test_read_only_detection() {
|
| 484 |
+
// read_only_threshold = 3 cycles
|
| 485 |
+
let mut store = KeyframeStore::new(10, 3);
|
| 486 |
+
let data = vec![42u8; 1024];
|
| 487 |
+
let id = store.take_keyframe(&data);
|
| 488 |
+
|
| 489 |
+
assert!(!store.is_read_only(id));
|
| 490 |
+
|
| 491 |
+
// Fewer than threshold cycles — not yet read-only
|
| 492 |
+
store.mark_observation_cycle(id);
|
| 493 |
+
store.mark_observation_cycle(id);
|
| 494 |
+
assert!(!store.is_read_only(id));
|
| 495 |
+
|
| 496 |
+
// Third cycle crosses the threshold
|
| 497 |
+
store.mark_observation_cycle(id);
|
| 498 |
+
assert!(store.is_read_only(id), "Should be read-only after threshold cycles with no deltas");
|
| 499 |
+
|
| 500 |
+
// check_read_only should also return true
|
| 501 |
+
assert!(store.check_read_only(id));
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
// -----------------------------------------------------------------------
|
| 505 |
+
// test_selective_reconstruction
|
| 506 |
+
// -----------------------------------------------------------------------
|
| 507 |
+
|
| 508 |
+
#[test]
|
| 509 |
+
fn test_selective_reconstruction() {
|
| 510 |
+
let mut store = make_store();
|
| 511 |
+
|
| 512 |
+
// 64KB baseline — every byte equals its index mod 256
|
| 513 |
+
let original: Vec<u8> = (0u8..=255).cycle().take(65_536).collect();
|
| 514 |
+
let id = store.take_keyframe(&original);
|
| 515 |
+
|
| 516 |
+
// Modify bytes far outside our target range
|
| 517 |
+
let mut modified = original.clone();
|
| 518 |
+
modified[40_000] = 0xFF;
|
| 519 |
+
modified[50_000] = 0xEE;
|
| 520 |
+
store.record_delta(id, &modified).unwrap();
|
| 521 |
+
|
| 522 |
+
// Reconstruct a 100-byte slice at offset 0 (unaffected by the deltas)
|
| 523 |
+
let slice = store.reconstruct_range(id, 0, 100)
|
| 524 |
+
.expect("selective reconstruct");
|
| 525 |
+
|
| 526 |
+
assert_eq!(slice.len(), 100);
|
| 527 |
+
assert_eq!(&slice[..], &modified[0..100],
|
| 528 |
+
"Selective range must match full reconstruction for same slice");
|
| 529 |
+
|
| 530 |
+
// Also verify a range that DOES include a changed byte
|
| 531 |
+
let changed_slice = store.reconstruct_range(id, 39_999, 3)
|
| 532 |
+
.expect("reconstruct around changed byte");
|
| 533 |
+
assert_eq!(changed_slice[1], 0xFF, "Changed byte must be visible in range reconstruct");
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
// -----------------------------------------------------------------------
|
| 537 |
+
// test_empty_delta
|
| 538 |
+
// -----------------------------------------------------------------------
|
| 539 |
+
|
| 540 |
+
#[test]
|
| 541 |
+
fn test_empty_delta() {
|
| 542 |
+
let mut store = make_store();
|
| 543 |
+
let data = vec![7u8; 2048];
|
| 544 |
+
let id = store.take_keyframe(&data);
|
| 545 |
+
|
| 546 |
+
// Record the identical data — nothing changed
|
| 547 |
+
let result = store.record_delta(id, &data);
|
| 548 |
+
|
| 549 |
+
assert!(result.is_none(), "Identical data must produce no delta");
|
| 550 |
+
assert_eq!(store.delta_count(id), 0);
|
| 551 |
+
}
|
| 552 |
+
}
|
|
@@ -26,6 +26,8 @@ use std::collections::HashMap;
|
|
| 26 |
pub struct FieldRegion {
|
| 27 |
/// Unique identifier (size-class path from pipeline)
|
| 28 |
pub id: u32,
|
|
|
|
|
|
|
| 29 |
/// Current temperature: 0.0 (frozen/cold) to 1.0 (fully hot)
|
| 30 |
pub temperature: f64,
|
| 31 |
/// Temperature at last step (for delta computation)
|
|
@@ -38,18 +40,22 @@ pub struct FieldRegion {
|
|
| 38 |
pub size_bytes: u64,
|
| 39 |
/// Number of times accessed
|
| 40 |
pub access_count: u64,
|
|
|
|
|
|
|
| 41 |
}
|
| 42 |
|
| 43 |
impl FieldRegion {
|
| 44 |
pub fn new(id: u32, size_bytes: u64) -> Self {
|
| 45 |
Self {
|
| 46 |
id,
|
|
|
|
| 47 |
temperature: 1.0, // start hot (just allocated)
|
| 48 |
prev_temperature: 1.0,
|
| 49 |
access_weight: 1.0,
|
| 50 |
decay_rate: 0.05, // 5% decay per step
|
| 51 |
size_bytes,
|
| 52 |
access_count: 1,
|
|
|
|
| 53 |
}
|
| 54 |
}
|
| 55 |
|
|
@@ -114,6 +120,9 @@ pub struct LeniaField {
|
|
| 114 |
/// (RAM budget expressed as field energy)
|
| 115 |
max_total_energy: f64,
|
| 116 |
|
|
|
|
|
|
|
|
|
|
| 117 |
/// Current total energy
|
| 118 |
total_energy: f64,
|
| 119 |
|
|
@@ -128,6 +137,15 @@ pub struct LeniaField {
|
|
| 128 |
|
| 129 |
/// Time step size (controls how fast the field evolves)
|
| 130 |
dt: f64,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
| 132 |
|
| 133 |
impl LeniaField {
|
|
@@ -145,17 +163,22 @@ impl LeniaField {
|
|
| 145 |
},
|
| 146 |
decay_rate: 0.02, // 2% cooling per step
|
| 147 |
max_total_energy: max_energy,
|
|
|
|
| 148 |
total_energy: 0.0,
|
| 149 |
cold_threshold: 0.2, // below 20% = compress
|
| 150 |
hot_threshold: 0.7, // above 70% = fully materialized
|
| 151 |
steps: 0,
|
| 152 |
dt: 0.1, // time step
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
}
|
| 155 |
|
| 156 |
-
/// Add a region to the field
|
| 157 |
-
pub fn add_region(&mut self, id: u32, size_bytes:
|
| 158 |
-
let region = FieldRegion::new(id, size_bytes);
|
|
|
|
| 159 |
let energy = region.temperature * (size_bytes as f64 / (1024.0 * 1024.0));
|
| 160 |
self.total_energy += energy;
|
| 161 |
self.regions.insert(id, region);
|
|
@@ -166,6 +189,46 @@ impl LeniaField {
|
|
| 166 |
self.neighbors.insert(id, neighbors);
|
| 167 |
}
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
/// Record an access — heats up the region
|
| 170 |
pub fn access(&mut self, id: u32) {
|
| 171 |
if let Some(region) = self.regions.get_mut(&id) {
|
|
@@ -185,8 +248,11 @@ impl LeniaField {
|
|
| 185 |
/// 2. Apply growth function (determines if region heats or cools)
|
| 186 |
/// 3. Apply natural decay (everything cools)
|
| 187 |
/// 4. Enforce mass conservation (total energy bounded)
|
|
|
|
|
|
|
| 188 |
pub fn step(&mut self) {
|
| 189 |
self.steps += 1;
|
|
|
|
| 190 |
|
| 191 |
// Phase 1: Compute new temperatures
|
| 192 |
let mut new_temps: HashMap<u32, f64> = HashMap::new();
|
|
@@ -210,13 +276,19 @@ impl LeniaField {
|
|
| 210 |
new_temps.insert(id, new_temp);
|
| 211 |
}
|
| 212 |
|
| 213 |
-
// Phase 2: Apply new temperatures
|
| 214 |
self.total_energy = 0.0;
|
| 215 |
for (&id, region) in self.regions.iter_mut() {
|
| 216 |
region.prev_temperature = region.temperature;
|
| 217 |
if let Some(&new_temp) = new_temps.get(&id) {
|
| 218 |
region.temperature = new_temp;
|
| 219 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
// Accumulate energy (temperature * size in MB)
|
| 221 |
self.total_energy += region.temperature
|
| 222 |
* (region.size_bytes as f64 / (1024.0 * 1024.0));
|
|
@@ -230,9 +302,45 @@ impl LeniaField {
|
|
| 230 |
let scale = self.max_total_energy / self.total_energy;
|
| 231 |
for region in self.regions.values_mut() {
|
| 232 |
region.temperature *= scale;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
}
|
| 234 |
self.total_energy = self.max_total_energy;
|
| 235 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
}
|
| 237 |
|
| 238 |
/// Compute neighborhood activation for a region
|
|
@@ -316,6 +424,75 @@ impl LeniaField {
|
|
| 316 |
hot_threshold: self.hot_threshold,
|
| 317 |
}
|
| 318 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
}
|
| 320 |
|
| 321 |
/// Field summary
|
|
@@ -361,13 +538,15 @@ impl LeniaSummary {
|
|
| 361 |
mod tests {
|
| 362 |
use super::*;
|
| 363 |
|
|
|
|
|
|
|
| 364 |
#[test]
|
| 365 |
fn test_field_creation() {
|
| 366 |
let mut field = LeniaField::new(100.0); // 100MB budget
|
| 367 |
|
| 368 |
-
field.add_region(0, 1_048_576);
|
| 369 |
-
field.add_region(1, 1_048_576);
|
| 370 |
-
field.add_region(2, 1_048_576);
|
| 371 |
|
| 372 |
assert_eq!(field.regions.len(), 3);
|
| 373 |
|
|
@@ -379,7 +558,7 @@ mod tests {
|
|
| 379 |
fn test_decay_makes_cold() {
|
| 380 |
let mut field = LeniaField::new(100.0);
|
| 381 |
|
| 382 |
-
field.add_region(0, 1_048_576);
|
| 383 |
|
| 384 |
// Step many times without access — should cool down
|
| 385 |
for _ in 0..100 {
|
|
@@ -394,8 +573,8 @@ mod tests {
|
|
| 394 |
fn test_access_keeps_hot() {
|
| 395 |
let mut field = LeniaField::new(100.0);
|
| 396 |
|
| 397 |
-
field.add_region(0, 1_048_576);
|
| 398 |
-
field.add_region(1, 1_048_576);
|
| 399 |
|
| 400 |
// Step and access region 0, ignore region 1
|
| 401 |
for _ in 0..50 {
|
|
@@ -419,7 +598,7 @@ mod tests {
|
|
| 419 |
|
| 420 |
// Add 5 x 1MB regions — 5MB total, budget is 2MB
|
| 421 |
for i in 0..5 {
|
| 422 |
-
field.add_region(i, 1_048_576);
|
| 423 |
field.access(i);
|
| 424 |
}
|
| 425 |
|
|
@@ -435,9 +614,9 @@ mod tests {
|
|
| 435 |
fn test_neighborhood_spreading() {
|
| 436 |
let mut field = LeniaField::new(100.0);
|
| 437 |
|
| 438 |
-
field.add_region(0, 1_048_576);
|
| 439 |
-
field.add_region(1, 1_048_576);
|
| 440 |
-
field.add_region(2, 1_048_576);
|
| 441 |
|
| 442 |
// Region 0 neighbors region 1 and 2
|
| 443 |
field.set_neighbors(0, vec![(1, 1.0), (2, 1.0)]);
|
|
@@ -474,7 +653,7 @@ mod tests {
|
|
| 474 |
|
| 475 |
// 10 regions, access only 3
|
| 476 |
for i in 0..10 {
|
| 477 |
-
field.add_region(i, 5_242_880); // 5MB each = 50MB total = at budget
|
| 478 |
}
|
| 479 |
|
| 480 |
// Hot set: regions 0, 1, 2
|
|
@@ -498,4 +677,193 @@ mod tests {
|
|
| 498 |
// energy should be at or below budget
|
| 499 |
assert!(summary.total_energy <= 50.1);
|
| 500 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
}
|
|
|
|
| 26 |
pub struct FieldRegion {
|
| 27 |
/// Unique identifier (size-class path from pipeline)
|
| 28 |
pub id: u32,
|
| 29 |
+
/// Process that owns this region
|
| 30 |
+
pub process_id: u32,
|
| 31 |
/// Current temperature: 0.0 (frozen/cold) to 1.0 (fully hot)
|
| 32 |
pub temperature: f64,
|
| 33 |
/// Temperature at last step (for delta computation)
|
|
|
|
| 40 |
pub size_bytes: u64,
|
| 41 |
/// Number of times accessed
|
| 42 |
pub access_count: u64,
|
| 43 |
+
/// Whether this region is priority (temperature floor at 0.5)
|
| 44 |
+
pub priority: bool,
|
| 45 |
}
|
| 46 |
|
| 47 |
impl FieldRegion {
|
| 48 |
pub fn new(id: u32, size_bytes: u64) -> Self {
|
| 49 |
Self {
|
| 50 |
id,
|
| 51 |
+
process_id: 0,
|
| 52 |
temperature: 1.0, // start hot (just allocated)
|
| 53 |
prev_temperature: 1.0,
|
| 54 |
access_weight: 1.0,
|
| 55 |
decay_rate: 0.05, // 5% decay per step
|
| 56 |
size_bytes,
|
| 57 |
access_count: 1,
|
| 58 |
+
priority: false,
|
| 59 |
}
|
| 60 |
}
|
| 61 |
|
|
|
|
| 120 |
/// (RAM budget expressed as field energy)
|
| 121 |
max_total_energy: f64,
|
| 122 |
|
| 123 |
+
/// RAM budget in MB (kept in sync with max_total_energy)
|
| 124 |
+
ram_budget_mb: usize,
|
| 125 |
+
|
| 126 |
/// Current total energy
|
| 127 |
total_energy: f64,
|
| 128 |
|
|
|
|
| 137 |
|
| 138 |
/// Time step size (controls how fast the field evolves)
|
| 139 |
dt: f64,
|
| 140 |
+
|
| 141 |
+
/// Accumulated page fault count since last tune
|
| 142 |
+
page_fault_count: u64,
|
| 143 |
+
|
| 144 |
+
/// Steps since last adaptive tune
|
| 145 |
+
steps_since_tune: u64,
|
| 146 |
+
|
| 147 |
+
/// How many steps between adaptive tuning checks
|
| 148 |
+
tune_interval: u64,
|
| 149 |
}
|
| 150 |
|
| 151 |
impl LeniaField {
|
|
|
|
| 163 |
},
|
| 164 |
decay_rate: 0.02, // 2% cooling per step
|
| 165 |
max_total_energy: max_energy,
|
| 166 |
+
ram_budget_mb: ram_budget_mb as usize,
|
| 167 |
total_energy: 0.0,
|
| 168 |
cold_threshold: 0.2, // below 20% = compress
|
| 169 |
hot_threshold: 0.7, // above 70% = fully materialized
|
| 170 |
steps: 0,
|
| 171 |
dt: 0.1, // time step
|
| 172 |
+
page_fault_count: 0,
|
| 173 |
+
steps_since_tune: 0,
|
| 174 |
+
tune_interval: 100,
|
| 175 |
}
|
| 176 |
}
|
| 177 |
|
| 178 |
+
/// Add a region to the field with explicit process ownership
|
| 179 |
+
pub fn add_region(&mut self, id: u32, size_bytes: usize, process_id: u32) {
|
| 180 |
+
let mut region = FieldRegion::new(id, size_bytes as u64);
|
| 181 |
+
region.process_id = process_id;
|
| 182 |
let energy = region.temperature * (size_bytes as f64 / (1024.0 * 1024.0));
|
| 183 |
self.total_energy += energy;
|
| 184 |
self.regions.insert(id, region);
|
|
|
|
| 189 |
self.neighbors.insert(id, neighbors);
|
| 190 |
}
|
| 191 |
|
| 192 |
+
/// Update the RAM budget directly (in MB)
|
| 193 |
+
pub fn set_budget(&mut self, budget_mb: usize) {
|
| 194 |
+
self.ram_budget_mb = budget_mb;
|
| 195 |
+
self.max_total_energy = budget_mb as f64;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
/// Read /proc/meminfo and update budget from MemAvailable
|
| 199 |
+
/// Silently no-ops if the file cannot be read or parsed
|
| 200 |
+
pub fn update_budget_from_system(&mut self) {
|
| 201 |
+
let contents = match std::fs::read_to_string("/proc/meminfo") {
|
| 202 |
+
Ok(c) => c,
|
| 203 |
+
Err(_) => return,
|
| 204 |
+
};
|
| 205 |
+
for line in contents.lines() {
|
| 206 |
+
if line.starts_with("MemAvailable:") {
|
| 207 |
+
// Format: "MemAvailable: 12345678 kB"
|
| 208 |
+
let parts: Vec<&str> = line.split_whitespace().collect();
|
| 209 |
+
if parts.len() >= 2 {
|
| 210 |
+
if let Ok(kb) = parts[1].parse::<usize>() {
|
| 211 |
+
let mb = kb / 1024;
|
| 212 |
+
self.set_budget(mb);
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
break;
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
/// Record a page fault event for adaptive growth tuning
|
| 221 |
+
pub fn record_page_fault(&mut self) {
|
| 222 |
+
self.page_fault_count += 1;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
/// Set whether a region is priority (temperature clamped to >= 0.5)
|
| 226 |
+
pub fn set_priority(&mut self, id: u32, priority: bool) {
|
| 227 |
+
if let Some(region) = self.regions.get_mut(&id) {
|
| 228 |
+
region.priority = priority;
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
/// Record an access — heats up the region
|
| 233 |
pub fn access(&mut self, id: u32) {
|
| 234 |
if let Some(region) = self.regions.get_mut(&id) {
|
|
|
|
| 248 |
/// 2. Apply growth function (determines if region heats or cools)
|
| 249 |
/// 3. Apply natural decay (everything cools)
|
| 250 |
/// 4. Enforce mass conservation (total energy bounded)
|
| 251 |
+
/// 5. Clamp priority regions to >= 0.5
|
| 252 |
+
/// 6. Adaptive growth tuning every tune_interval steps
|
| 253 |
pub fn step(&mut self) {
|
| 254 |
self.steps += 1;
|
| 255 |
+
self.steps_since_tune += 1;
|
| 256 |
|
| 257 |
// Phase 1: Compute new temperatures
|
| 258 |
let mut new_temps: HashMap<u32, f64> = HashMap::new();
|
|
|
|
| 276 |
new_temps.insert(id, new_temp);
|
| 277 |
}
|
| 278 |
|
| 279 |
+
// Phase 2: Apply new temperatures and clamp priority regions
|
| 280 |
self.total_energy = 0.0;
|
| 281 |
for (&id, region) in self.regions.iter_mut() {
|
| 282 |
region.prev_temperature = region.temperature;
|
| 283 |
if let Some(&new_temp) = new_temps.get(&id) {
|
| 284 |
region.temperature = new_temp;
|
| 285 |
}
|
| 286 |
+
|
| 287 |
+
// Priority floor: if priority and dropped below 0.5, clamp up
|
| 288 |
+
if region.priority && region.temperature < 0.5 {
|
| 289 |
+
region.temperature = 0.5;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
// Accumulate energy (temperature * size in MB)
|
| 293 |
self.total_energy += region.temperature
|
| 294 |
* (region.size_bytes as f64 / (1024.0 * 1024.0));
|
|
|
|
| 302 |
let scale = self.max_total_energy / self.total_energy;
|
| 303 |
for region in self.regions.values_mut() {
|
| 304 |
region.temperature *= scale;
|
| 305 |
+
// Re-apply priority floor after scaling
|
| 306 |
+
if region.priority && region.temperature < 0.5 {
|
| 307 |
+
region.temperature = 0.5;
|
| 308 |
+
}
|
| 309 |
}
|
| 310 |
self.total_energy = self.max_total_energy;
|
| 311 |
}
|
| 312 |
+
|
| 313 |
+
// Phase 4: Adaptive growth tuning (Gaussian only)
|
| 314 |
+
if self.steps_since_tune >= self.tune_interval {
|
| 315 |
+
let fault_rate = if self.steps_since_tune > 0 {
|
| 316 |
+
self.page_fault_count as f64 / self.steps_since_tune as f64
|
| 317 |
+
} else {
|
| 318 |
+
0.0
|
| 319 |
+
};
|
| 320 |
+
|
| 321 |
+
if let GrowthFunction::Gaussian { ref mut center, ref mut sigma } = self.growth {
|
| 322 |
+
if fault_rate > 0.01 {
|
| 323 |
+
// Over-cooling: too many faults — widen sigma, raise center
|
| 324 |
+
*sigma = (*sigma * 1.05).min(0.5);
|
| 325 |
+
*center = (*center * 1.02).min(0.8);
|
| 326 |
+
} else if fault_rate < 0.001 {
|
| 327 |
+
// Under-cooling: check if usage > 80% budget
|
| 328 |
+
let usage_pct = if self.max_total_energy > 0.0 {
|
| 329 |
+
self.total_energy / self.max_total_energy
|
| 330 |
+
} else {
|
| 331 |
+
0.0
|
| 332 |
+
};
|
| 333 |
+
if usage_pct > 0.80 {
|
| 334 |
+
*sigma = (*sigma * 0.95).max(0.05);
|
| 335 |
+
*center = (*center * 0.98).max(0.2);
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
// Reset counters
|
| 341 |
+
self.page_fault_count = 0;
|
| 342 |
+
self.steps_since_tune = 0;
|
| 343 |
+
}
|
| 344 |
}
|
| 345 |
|
| 346 |
/// Compute neighborhood activation for a region
|
|
|
|
| 424 |
hot_threshold: self.hot_threshold,
|
| 425 |
}
|
| 426 |
}
|
| 427 |
+
|
| 428 |
+
/// Serialize the field state to bytes.
|
| 429 |
+
///
|
| 430 |
+
/// Format: 4-byte region count (u32 LE), then per region:
|
| 431 |
+
/// u32 id, u32 process_id, f32 temperature, u64 size_bytes,
|
| 432 |
+
/// f32 decay_rate, u8 priority
|
| 433 |
+
/// = 25 bytes per region + 4 header
|
| 434 |
+
pub fn serialize(&self) -> Vec<u8> {
|
| 435 |
+
let count = self.regions.len() as u32;
|
| 436 |
+
let mut buf = Vec::with_capacity(4 + count as usize * 25);
|
| 437 |
+
|
| 438 |
+
buf.extend_from_slice(&count.to_le_bytes());
|
| 439 |
+
|
| 440 |
+
// Sort by id for deterministic output
|
| 441 |
+
let mut ids: Vec<u32> = self.regions.keys().copied().collect();
|
| 442 |
+
ids.sort_unstable();
|
| 443 |
+
|
| 444 |
+
for id in ids {
|
| 445 |
+
let r = &self.regions[&id];
|
| 446 |
+
buf.extend_from_slice(&r.id.to_le_bytes());
|
| 447 |
+
buf.extend_from_slice(&r.process_id.to_le_bytes());
|
| 448 |
+
buf.extend_from_slice(&(r.temperature as f32).to_le_bytes());
|
| 449 |
+
buf.extend_from_slice(&r.size_bytes.to_le_bytes());
|
| 450 |
+
buf.extend_from_slice(&(r.decay_rate as f32).to_le_bytes());
|
| 451 |
+
buf.push(if r.priority { 1u8 } else { 0u8 });
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
buf
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/// Deserialize a field from bytes produced by `serialize`.
|
| 458 |
+
/// Returns None if the data is malformed or truncated.
|
| 459 |
+
pub fn deserialize(data: &[u8], ram_budget_mb: usize) -> Option<Self> {
|
| 460 |
+
if data.len() < 4 {
|
| 461 |
+
return None;
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
let count = u32::from_le_bytes(data[0..4].try_into().ok()?) as usize;
|
| 465 |
+
let expected_len = 4 + count * 25;
|
| 466 |
+
if data.len() < expected_len {
|
| 467 |
+
return None;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
let mut field = LeniaField::new(ram_budget_mb as f64);
|
| 471 |
+
|
| 472 |
+
let mut offset = 4usize;
|
| 473 |
+
for _ in 0..count {
|
| 474 |
+
let id = u32::from_le_bytes(data[offset..offset+4].try_into().ok()?);
|
| 475 |
+
let process_id = u32::from_le_bytes(data[offset+4..offset+8].try_into().ok()?);
|
| 476 |
+
let temperature = f32::from_le_bytes(data[offset+8..offset+12].try_into().ok()?) as f64;
|
| 477 |
+
let size_bytes = u64::from_le_bytes(data[offset+12..offset+20].try_into().ok()?);
|
| 478 |
+
let decay_rate = f32::from_le_bytes(data[offset+20..offset+24].try_into().ok()?) as f64;
|
| 479 |
+
let priority = data[offset+24] != 0;
|
| 480 |
+
offset += 25;
|
| 481 |
+
|
| 482 |
+
let mut region = FieldRegion::new(id, size_bytes);
|
| 483 |
+
region.process_id = process_id;
|
| 484 |
+
region.temperature = temperature;
|
| 485 |
+
region.prev_temperature = temperature;
|
| 486 |
+
region.decay_rate = decay_rate;
|
| 487 |
+
region.priority = priority;
|
| 488 |
+
|
| 489 |
+
let energy = temperature * (size_bytes as f64 / (1024.0 * 1024.0));
|
| 490 |
+
field.total_energy += energy;
|
| 491 |
+
field.regions.insert(id, region);
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
Some(field)
|
| 495 |
+
}
|
| 496 |
}
|
| 497 |
|
| 498 |
/// Field summary
|
|
|
|
| 538 |
mod tests {
|
| 539 |
use super::*;
|
| 540 |
|
| 541 |
+
// ── existing tests (unchanged behaviour) ─────────────────────────────────
|
| 542 |
+
|
| 543 |
#[test]
|
| 544 |
fn test_field_creation() {
|
| 545 |
let mut field = LeniaField::new(100.0); // 100MB budget
|
| 546 |
|
| 547 |
+
field.add_region(0, 1_048_576, 0);
|
| 548 |
+
field.add_region(1, 1_048_576, 0);
|
| 549 |
+
field.add_region(2, 1_048_576, 0);
|
| 550 |
|
| 551 |
assert_eq!(field.regions.len(), 3);
|
| 552 |
|
|
|
|
| 558 |
fn test_decay_makes_cold() {
|
| 559 |
let mut field = LeniaField::new(100.0);
|
| 560 |
|
| 561 |
+
field.add_region(0, 1_048_576, 0);
|
| 562 |
|
| 563 |
// Step many times without access — should cool down
|
| 564 |
for _ in 0..100 {
|
|
|
|
| 573 |
fn test_access_keeps_hot() {
|
| 574 |
let mut field = LeniaField::new(100.0);
|
| 575 |
|
| 576 |
+
field.add_region(0, 1_048_576, 0);
|
| 577 |
+
field.add_region(1, 1_048_576, 0);
|
| 578 |
|
| 579 |
// Step and access region 0, ignore region 1
|
| 580 |
for _ in 0..50 {
|
|
|
|
| 598 |
|
| 599 |
// Add 5 x 1MB regions — 5MB total, budget is 2MB
|
| 600 |
for i in 0..5 {
|
| 601 |
+
field.add_region(i, 1_048_576, 0);
|
| 602 |
field.access(i);
|
| 603 |
}
|
| 604 |
|
|
|
|
| 614 |
fn test_neighborhood_spreading() {
|
| 615 |
let mut field = LeniaField::new(100.0);
|
| 616 |
|
| 617 |
+
field.add_region(0, 1_048_576, 0);
|
| 618 |
+
field.add_region(1, 1_048_576, 0);
|
| 619 |
+
field.add_region(2, 1_048_576, 0);
|
| 620 |
|
| 621 |
// Region 0 neighbors region 1 and 2
|
| 622 |
field.set_neighbors(0, vec![(1, 1.0), (2, 1.0)]);
|
|
|
|
| 653 |
|
| 654 |
// 10 regions, access only 3
|
| 655 |
for i in 0..10 {
|
| 656 |
+
field.add_region(i, 5_242_880, 0); // 5MB each = 50MB total = at budget
|
| 657 |
}
|
| 658 |
|
| 659 |
// Hot set: regions 0, 1, 2
|
|
|
|
| 677 |
// energy should be at or below budget
|
| 678 |
assert!(summary.total_energy <= 50.1);
|
| 679 |
}
|
| 680 |
+
|
| 681 |
+
// ── new tests ─────────────────────────────────────────────────────────────
|
| 682 |
+
|
| 683 |
+
#[test]
|
| 684 |
+
fn test_lenia_process_tagged() {
|
| 685 |
+
let mut field = LeniaField::new(100.0);
|
| 686 |
+
|
| 687 |
+
field.add_region(10, 1_048_576, 42);
|
| 688 |
+
field.add_region(11, 1_048_576, 42);
|
| 689 |
+
field.add_region(12, 1_048_576, 99);
|
| 690 |
+
|
| 691 |
+
assert_eq!(field.regions[&10].process_id, 42);
|
| 692 |
+
assert_eq!(field.regions[&11].process_id, 42);
|
| 693 |
+
assert_eq!(field.regions[&12].process_id, 99);
|
| 694 |
+
|
| 695 |
+
// Default process_id is 0 for regions added with process_id=0
|
| 696 |
+
field.add_region(13, 1_048_576, 0);
|
| 697 |
+
assert_eq!(field.regions[&13].process_id, 0);
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
#[test]
|
| 701 |
+
fn test_lenia_set_budget() {
|
| 702 |
+
let mut field = LeniaField::new(10.0); // 10MB budget
|
| 703 |
+
|
| 704 |
+
// Fill to just above the original budget
|
| 705 |
+
for i in 0..5 {
|
| 706 |
+
field.add_region(i, 2_097_152, 0); // 2MB each = 10MB
|
| 707 |
+
field.access(i);
|
| 708 |
+
}
|
| 709 |
+
field.step();
|
| 710 |
+
|
| 711 |
+
let energy_at_10mb = field.summary().total_energy;
|
| 712 |
+
assert!(energy_at_10mb <= 10.1, "Energy should be at most 10MB: {}", energy_at_10mb);
|
| 713 |
+
|
| 714 |
+
// Expand budget — next step should allow more energy
|
| 715 |
+
field.set_budget(20);
|
| 716 |
+
assert_eq!(field.ram_budget_mb, 20);
|
| 717 |
+
assert!((field.max_total_energy - 20.0).abs() < 0.001,
|
| 718 |
+
"max_total_energy should be 20.0 after set_budget(20)");
|
| 719 |
+
|
| 720 |
+
// Re-heat everything and step — conservation limit is now 20MB
|
| 721 |
+
for i in 0..5 {
|
| 722 |
+
field.access(i);
|
| 723 |
+
}
|
| 724 |
+
field.step();
|
| 725 |
+
|
| 726 |
+
let energy_at_20mb = field.summary().total_energy;
|
| 727 |
+
assert!(energy_at_20mb <= 20.1, "Energy should be within new 20MB budget: {}", energy_at_20mb);
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
#[test]
|
| 731 |
+
fn test_lenia_adaptive_overcooling() {
|
| 732 |
+
// tune_interval is 100; record many faults then step 100 times
|
| 733 |
+
// fault_rate = faults / steps_since_tune
|
| 734 |
+
// We want fault_rate > 0.01 → record > 1 fault per 100 steps
|
| 735 |
+
let mut field = LeniaField::new(100.0);
|
| 736 |
+
field.add_region(0, 1_048_576, 0);
|
| 737 |
+
|
| 738 |
+
// Capture initial sigma
|
| 739 |
+
let initial_sigma = match &field.growth {
|
| 740 |
+
GrowthFunction::Gaussian { sigma, .. } => *sigma,
|
| 741 |
+
_ => panic!("Expected Gaussian growth function"),
|
| 742 |
+
};
|
| 743 |
+
|
| 744 |
+
// Record 50 page faults before the 100-step tune interval fires
|
| 745 |
+
for _ in 0..50 {
|
| 746 |
+
field.record_page_fault();
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
// Step exactly tune_interval times to trigger one tuning cycle
|
| 750 |
+
for _ in 0..100 {
|
| 751 |
+
field.step();
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
let new_sigma = match &field.growth {
|
| 755 |
+
GrowthFunction::Gaussian { sigma, .. } => *sigma,
|
| 756 |
+
_ => panic!("Expected Gaussian growth function"),
|
| 757 |
+
};
|
| 758 |
+
|
| 759 |
+
assert!(new_sigma > initial_sigma,
|
| 760 |
+
"Sigma should have widened due to over-cooling (fault_rate=0.5): initial={}, new={}",
|
| 761 |
+
initial_sigma, new_sigma);
|
| 762 |
+
}
|
| 763 |
+
|
| 764 |
+
#[test]
|
| 765 |
+
fn test_lenia_priority_exempt() {
|
| 766 |
+
let mut field = LeniaField::new(100.0);
|
| 767 |
+
|
| 768 |
+
// Add two regions: one priority, one not
|
| 769 |
+
field.add_region(0, 1_048_576, 0);
|
| 770 |
+
field.add_region(1, 1_048_576, 0);
|
| 771 |
+
field.set_priority(0, true);
|
| 772 |
+
|
| 773 |
+
// Let both cool for many steps without any access
|
| 774 |
+
for _ in 0..200 {
|
| 775 |
+
field.step();
|
| 776 |
+
}
|
| 777 |
+
|
| 778 |
+
let priority_temp = field.regions[&0].temperature;
|
| 779 |
+
let normal_temp = field.regions[&1].temperature;
|
| 780 |
+
|
| 781 |
+
assert!(priority_temp >= 0.5,
|
| 782 |
+
"Priority region must not drop below 0.5: {}", priority_temp);
|
| 783 |
+
assert!(normal_temp < 0.5,
|
| 784 |
+
"Normal region should cool below 0.5: {}", normal_temp);
|
| 785 |
+
}
|
| 786 |
+
|
| 787 |
+
#[test]
|
| 788 |
+
fn test_lenia_serialize_roundtrip() {
|
| 789 |
+
let mut field = LeniaField::new(64.0);
|
| 790 |
+
|
| 791 |
+
field.add_region(1, 1_048_576, 7);
|
| 792 |
+
field.add_region(2, 2_097_152, 13);
|
| 793 |
+
field.add_region(3, 4_194_304, 0);
|
| 794 |
+
|
| 795 |
+
field.set_priority(1, true);
|
| 796 |
+
field.access(2);
|
| 797 |
+
field.step();
|
| 798 |
+
|
| 799 |
+
let bytes = field.serialize();
|
| 800 |
+
|
| 801 |
+
// Header: 4 bytes + 3 regions * 25 bytes = 79 bytes
|
| 802 |
+
assert_eq!(bytes.len(), 4 + 3 * 25);
|
| 803 |
+
|
| 804 |
+
let restored = LeniaField::deserialize(&bytes, 64)
|
| 805 |
+
.expect("deserialize should succeed");
|
| 806 |
+
|
| 807 |
+
assert_eq!(restored.regions.len(), field.regions.len());
|
| 808 |
+
|
| 809 |
+
for id in [1u32, 2, 3] {
|
| 810 |
+
let orig = &field.regions[&id];
|
| 811 |
+
let rest = &restored.regions[&id];
|
| 812 |
+
|
| 813 |
+
assert_eq!(rest.id, orig.id, "id mismatch for region {}", id);
|
| 814 |
+
assert_eq!(rest.process_id, orig.process_id, "process_id mismatch for {}", id);
|
| 815 |
+
assert_eq!(rest.size_bytes, orig.size_bytes, "size_bytes mismatch for {}", id);
|
| 816 |
+
assert_eq!(rest.priority, orig.priority, "priority mismatch for {}", id);
|
| 817 |
+
|
| 818 |
+
// f32 round-trip loses a tiny bit of precision
|
| 819 |
+
let temp_diff = (rest.temperature - orig.temperature).abs();
|
| 820 |
+
assert!(temp_diff < 1e-5,
|
| 821 |
+
"temperature mismatch for region {}: {} vs {}", id, orig.temperature, rest.temperature);
|
| 822 |
+
|
| 823 |
+
let decay_diff = (rest.decay_rate - orig.decay_rate).abs();
|
| 824 |
+
assert!(decay_diff < 1e-5,
|
| 825 |
+
"decay_rate mismatch for region {}: {} vs {}", id, orig.decay_rate, rest.decay_rate);
|
| 826 |
+
}
|
| 827 |
+
}
|
| 828 |
+
|
| 829 |
+
#[test]
|
| 830 |
+
fn test_lenia_cross_process_energy() {
|
| 831 |
+
// Two process groups: PIDs 1 and 2, three regions each
|
| 832 |
+
let mut field = LeniaField::new(6.0); // exactly 6MB budget
|
| 833 |
+
|
| 834 |
+
// Process 1: regions 10, 11, 12 (1MB each)
|
| 835 |
+
field.add_region(10, 1_048_576, 1);
|
| 836 |
+
field.add_region(11, 1_048_576, 1);
|
| 837 |
+
field.add_region(12, 1_048_576, 1);
|
| 838 |
+
|
| 839 |
+
// Process 2: regions 20, 21, 22 (1MB each)
|
| 840 |
+
field.add_region(20, 1_048_576, 2);
|
| 841 |
+
field.add_region(21, 1_048_576, 2);
|
| 842 |
+
field.add_region(22, 1_048_576, 2);
|
| 843 |
+
|
| 844 |
+
// Repeatedly access process 1's regions only
|
| 845 |
+
for _ in 0..50 {
|
| 846 |
+
field.access(10);
|
| 847 |
+
field.access(11);
|
| 848 |
+
field.access(12);
|
| 849 |
+
field.step();
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
// Process 1 regions should be hotter than process 2 regions
|
| 853 |
+
let p1_avg = [10u32, 11, 12].iter()
|
| 854 |
+
.map(|id| field.regions[id].temperature)
|
| 855 |
+
.sum::<f64>() / 3.0;
|
| 856 |
+
let p2_avg = [20u32, 21, 22].iter()
|
| 857 |
+
.map(|id| field.regions[id].temperature)
|
| 858 |
+
.sum::<f64>() / 3.0;
|
| 859 |
+
|
| 860 |
+
assert!(p1_avg > p2_avg,
|
| 861 |
+
"Process 1 (accessed) should be hotter than process 2: {:.3} vs {:.3}",
|
| 862 |
+
p1_avg, p2_avg);
|
| 863 |
+
|
| 864 |
+
// Mass conservation still holds across both process groups
|
| 865 |
+
let summary = field.summary();
|
| 866 |
+
assert!(summary.total_energy <= 6.1,
|
| 867 |
+
"Total energy must stay within 6MB budget: {}", summary.total_energy);
|
| 868 |
+
}
|
| 869 |
}
|
|
@@ -1,13 +1,32 @@
|
|
| 1 |
//! Condensate Core — Rust implementation
|
| 2 |
//!
|
| 3 |
//! Living memory manager: learns access patterns through causal topology,
|
| 4 |
-
//! predicts future accesses, manages memory tiers
|
|
|
|
| 5 |
//!
|
| 6 |
-
//!
|
| 7 |
-
//!
|
| 8 |
-
//!
|
| 9 |
-
//! -
|
| 10 |
-
//! -
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
pub mod graph;
|
| 13 |
pub mod predictor;
|
|
@@ -15,15 +34,26 @@ pub mod membrane;
|
|
| 15 |
pub mod condenser;
|
| 16 |
pub mod pipeline;
|
| 17 |
pub mod lenia;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
mod bench;
|
| 19 |
|
| 20 |
#[cfg(feature = "python")]
|
| 21 |
use pyo3::prelude::*;
|
| 22 |
|
| 23 |
/// Python module: condensate_core
|
|
|
|
|
|
|
|
|
|
| 24 |
#[cfg(feature = "python")]
|
| 25 |
#[pymodule]
|
| 26 |
fn condensate_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
|
|
| 27 |
m.add_class::<graph::AccessGraph>()?;
|
| 28 |
m.add_class::<predictor::RustPredictor>()?;
|
| 29 |
m.add_class::<predictor::Prediction>()?;
|
|
|
|
| 1 |
//! Condensate Core — Rust implementation
|
| 2 |
//!
|
| 3 |
//! Living memory manager: learns access patterns through causal topology,
|
| 4 |
+
//! predicts future accesses, manages memory tiers via continuous thermal
|
| 5 |
+
//! field dynamics.
|
| 6 |
//!
|
| 7 |
+
//! # Modules
|
| 8 |
+
//!
|
| 9 |
+
//! ## Core pipeline (original)
|
| 10 |
+
//! - `graph` — AccessGraph: learns memory access topology
|
| 11 |
+
//! - `predictor` — RustPredictor: causal spike propagation predictions
|
| 12 |
+
//! - `membrane` — LD_PRELOAD malloc/free interception
|
| 13 |
+
//! - `condenser` — HOT/WARM/COLD tier management with real memory ops
|
| 14 |
+
//! - `pipeline` — Living loop connecting all components
|
| 15 |
+
//! - `lenia` — Continuous thermal field dynamics
|
| 16 |
+
//!
|
| 17 |
+
//! ## Condensing strategies (Phase 1 blocks F-L)
|
| 18 |
+
//! - `keyframe` — Keyframe/delta encoding (video codec model)
|
| 19 |
+
//! - `sparse` — Partial decompression (serve exactly what's needed)
|
| 20 |
+
//! - `locality` — Manufactured spatial locality + software prefetch
|
| 21 |
+
//! - `sleep` — Biological sleep consolidation cycle
|
| 22 |
+
//! - `gate` — Prediction gate (KISS overhead reduction)
|
| 23 |
+
//! - `splat` — Gaussian splat field geometry
|
| 24 |
+
//! - `erasure` — Erasure coding + holographic boundaries
|
| 25 |
+
//!
|
| 26 |
+
//! # Build targets
|
| 27 |
+
//!
|
| 28 |
+
//! - `cargo build --features python` → Python module (.so)
|
| 29 |
+
//! - `cargo build --no-default-features --features preload` → LD_PRELOAD .so
|
| 30 |
|
| 31 |
pub mod graph;
|
| 32 |
pub mod predictor;
|
|
|
|
| 34 |
pub mod condenser;
|
| 35 |
pub mod pipeline;
|
| 36 |
pub mod lenia;
|
| 37 |
+
pub mod keyframe;
|
| 38 |
+
pub mod sparse;
|
| 39 |
+
pub mod gate;
|
| 40 |
+
pub mod locality;
|
| 41 |
+
pub mod sleep;
|
| 42 |
+
pub mod splat;
|
| 43 |
+
pub mod erasure;
|
| 44 |
mod bench;
|
| 45 |
|
| 46 |
#[cfg(feature = "python")]
|
| 47 |
use pyo3::prelude::*;
|
| 48 |
|
| 49 |
/// Python module: condensate_core
|
| 50 |
+
///
|
| 51 |
+
/// Exposes the core pipeline types and condensing strategies to Python.
|
| 52 |
+
/// Python is orchestration only — the data path is Rust.
|
| 53 |
#[cfg(feature = "python")]
|
| 54 |
#[pymodule]
|
| 55 |
fn condensate_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
| 56 |
+
// Core pipeline
|
| 57 |
m.add_class::<graph::AccessGraph>()?;
|
| 58 |
m.add_class::<predictor::RustPredictor>()?;
|
| 59 |
m.add_class::<predictor::Prediction>()?;
|
|
@@ -0,0 +1,707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Block H — Manufactured Spatial Locality + Software Prefetch
|
| 2 |
+
//!
|
| 3 |
+
//! The SNN knows causal chains A→B→C. This module places those nodes in
|
| 4 |
+
//! adjacent cache lines so the hardware prefetcher succeeds by construction,
|
| 5 |
+
//! then emits software prefetch instructions timed to spike propagation.
|
| 6 |
+
|
| 7 |
+
use std::collections::HashMap;
|
| 8 |
+
use libc;
|
| 9 |
+
|
| 10 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 11 |
+
// Types
|
| 12 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 13 |
+
|
| 14 |
+
/// A causally ordered sequence of memory regions with predicted inter-access
|
| 15 |
+
/// timings. Produced by the SNN's spike propagation layer.
|
| 16 |
+
pub struct CausalChain {
|
| 17 |
+
pub nodes: Vec<u32>, // region IDs in causal order
|
| 18 |
+
pub timings_ms: Vec<f64>, // predicted inter-access times (len == nodes.len() - 1)
|
| 19 |
+
pub total_confidence: f64,
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
/// A spatial layout plan: arena offsets chosen so causally related regions
|
| 23 |
+
/// land in adjacent cache lines.
|
| 24 |
+
pub struct LayoutPlan {
|
| 25 |
+
placements: HashMap<u32, usize>, // region_id → arena byte offset
|
| 26 |
+
chain_groups: Vec<Vec<u32>>, // groups of co-located region IDs
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
/// Which cache level to target with a software prefetch instruction.
|
| 30 |
+
#[derive(Clone, Copy, Debug, PartialEq)]
|
| 31 |
+
pub enum PrefetchHint {
|
| 32 |
+
L1, // predicted access < 1 ms away
|
| 33 |
+
L2, // 1 – 5 ms
|
| 34 |
+
L3, // 5 – 20 ms
|
| 35 |
+
None, // > 20 ms — not worth prefetching
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
/// A single prefetch instruction to be issued.
|
| 39 |
+
pub struct PrefetchInstruction {
|
| 40 |
+
pub address: usize,
|
| 41 |
+
pub hint: PrefetchHint,
|
| 42 |
+
pub predicted_ms: f64,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/// A contiguous mmap-backed arena. Allocations are 64-byte (cache-line) aligned.
|
| 46 |
+
/// The arena can be reorganised during sleep consolidation via `relocate`.
|
| 47 |
+
pub struct CondensateArena {
|
| 48 |
+
base: *mut u8,
|
| 49 |
+
size: usize,
|
| 50 |
+
free_list: Vec<(usize, usize)>, // (offset, size) sorted by offset
|
| 51 |
+
allocations: HashMap<u32, (usize, usize)>, // region_id → (offset, size)
|
| 52 |
+
cache_line_size: usize, // always 64
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 56 |
+
// CausalChain
|
| 57 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 58 |
+
|
| 59 |
+
impl CausalChain {
|
| 60 |
+
pub fn new(nodes: Vec<u32>, timings_ms: Vec<f64>, total_confidence: f64) -> Self {
|
| 61 |
+
// timings_ms should have (nodes.len() - 1) entries, but we don't panic
|
| 62 |
+
// on bad input — callers might build chains incrementally.
|
| 63 |
+
Self { nodes, timings_ms, total_confidence }
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 68 |
+
// LayoutPlan
|
| 69 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 70 |
+
|
| 71 |
+
impl LayoutPlan {
|
| 72 |
+
pub fn new() -> Self {
|
| 73 |
+
Self {
|
| 74 |
+
placements: HashMap::new(),
|
| 75 |
+
chain_groups: Vec::new(),
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/// Assign contiguous arena offsets to regions so that members of the same
|
| 80 |
+
/// causal chain are spatially adjacent.
|
| 81 |
+
///
|
| 82 |
+
/// Strategy:
|
| 83 |
+
/// 1. Sort chains by descending `total_confidence` so the most trusted
|
| 84 |
+
/// chains claim their preferred layout first.
|
| 85 |
+
/// 2. For each chain, walk its nodes in order. If a node has already been
|
| 86 |
+
/// placed (because it appeared in a higher-confidence chain), keep that
|
| 87 |
+
/// placement; otherwise assign the next available slot.
|
| 88 |
+
/// 3. Slots are one cache line (64 bytes) wide for the purposes of the
|
| 89 |
+
/// plan. Actual allocation sizes are determined by `CondensateArena`.
|
| 90 |
+
pub fn compute(chains: &[CausalChain]) -> Self {
|
| 91 |
+
const CACHE_LINE: usize = 64;
|
| 92 |
+
|
| 93 |
+
let mut plan = Self::new();
|
| 94 |
+
|
| 95 |
+
// Work on a sorted copy (by descending confidence).
|
| 96 |
+
let mut order: Vec<usize> = (0..chains.len()).collect();
|
| 97 |
+
order.sort_by(|&a, &b| {
|
| 98 |
+
chains[b]
|
| 99 |
+
.total_confidence
|
| 100 |
+
.partial_cmp(&chains[a].total_confidence)
|
| 101 |
+
.unwrap_or(std::cmp::Ordering::Equal)
|
| 102 |
+
});
|
| 103 |
+
|
| 104 |
+
let mut next_offset: usize = 0;
|
| 105 |
+
|
| 106 |
+
for chain_idx in order {
|
| 107 |
+
let chain = &chains[chain_idx];
|
| 108 |
+
let mut group: Vec<u32> = Vec::new();
|
| 109 |
+
|
| 110 |
+
for &node in &chain.nodes {
|
| 111 |
+
if !plan.placements.contains_key(&node) {
|
| 112 |
+
plan.placements.insert(node, next_offset);
|
| 113 |
+
next_offset += CACHE_LINE;
|
| 114 |
+
}
|
| 115 |
+
group.push(node);
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
if !group.is_empty() {
|
| 119 |
+
plan.chain_groups.push(group);
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
plan
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
/// Get the planned arena offset for a region.
|
| 127 |
+
pub fn get_placement(&self, region_id: u32) -> Option<usize> {
|
| 128 |
+
self.placements.get(®ion_id).copied()
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
/// Get the chain group that contains a region (first match wins).
|
| 132 |
+
pub fn get_chain_group(&self, region_id: u32) -> Option<&Vec<u32>> {
|
| 133 |
+
self.chain_groups
|
| 134 |
+
.iter()
|
| 135 |
+
.find(|group| group.contains(®ion_id))
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
impl Default for LayoutPlan {
|
| 140 |
+
fn default() -> Self {
|
| 141 |
+
Self::new()
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 146 |
+
// PrefetchHint
|
| 147 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 148 |
+
|
| 149 |
+
impl PrefetchHint {
|
| 150 |
+
/// Map a predicted inter-access time to the appropriate cache level.
|
| 151 |
+
pub fn from_timing(predicted_ms: f64) -> Self {
|
| 152 |
+
if predicted_ms < 1.0 {
|
| 153 |
+
PrefetchHint::L1
|
| 154 |
+
} else if predicted_ms < 5.0 {
|
| 155 |
+
PrefetchHint::L2
|
| 156 |
+
} else if predicted_ms <= 20.0 {
|
| 157 |
+
PrefetchHint::L3
|
| 158 |
+
} else {
|
| 159 |
+
PrefetchHint::None
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 165 |
+
// CondensateArena
|
| 166 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 167 |
+
|
| 168 |
+
// Mark as Send so it can cross thread boundaries in the pipeline.
|
| 169 |
+
// SAFETY: The arena owns its memory exclusively; access must be serialised by
|
| 170 |
+
// the caller (the pipeline uses a Mutex<CondensateArena>).
|
| 171 |
+
unsafe impl Send for CondensateArena {}
|
| 172 |
+
|
| 173 |
+
impl CondensateArena {
|
| 174 |
+
/// Allocate a contiguous anonymous private mapping of `size` bytes.
|
| 175 |
+
pub fn new(size: usize) -> Self {
|
| 176 |
+
// SAFETY: mmap with MAP_ANON | MAP_PRIVATE creates a fresh zero-filled
|
| 177 |
+
// mapping. We check for MAP_FAILED before using the pointer.
|
| 178 |
+
let base = unsafe {
|
| 179 |
+
libc::mmap(
|
| 180 |
+
std::ptr::null_mut(),
|
| 181 |
+
size,
|
| 182 |
+
libc::PROT_READ | libc::PROT_WRITE,
|
| 183 |
+
libc::MAP_ANON | libc::MAP_PRIVATE,
|
| 184 |
+
-1,
|
| 185 |
+
0,
|
| 186 |
+
)
|
| 187 |
+
};
|
| 188 |
+
|
| 189 |
+
assert_ne!(
|
| 190 |
+
base,
|
| 191 |
+
libc::MAP_FAILED,
|
| 192 |
+
"CondensateArena: mmap({size}) failed"
|
| 193 |
+
);
|
| 194 |
+
|
| 195 |
+
Self {
|
| 196 |
+
base: base as *mut u8,
|
| 197 |
+
size,
|
| 198 |
+
free_list: vec![(0, size)],
|
| 199 |
+
allocations: HashMap::new(),
|
| 200 |
+
cache_line_size: 64,
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
/// Round `offset` up to the next multiple of `align`.
|
| 205 |
+
#[inline]
|
| 206 |
+
fn align_up(offset: usize, align: usize) -> usize {
|
| 207 |
+
(offset + align - 1) & !(align - 1)
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
/// Allocate `size` bytes for `region_id`, aligned to `cache_line_size`.
|
| 211 |
+
/// Returns a raw pointer into the arena on success.
|
| 212 |
+
pub fn allocate(&mut self, region_id: u32, size: usize) -> Option<*mut u8> {
|
| 213 |
+
if self.allocations.contains_key(®ion_id) {
|
| 214 |
+
return None; // already allocated
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
let align = self.cache_line_size;
|
| 218 |
+
let aligned_size = Self::align_up(size, align);
|
| 219 |
+
|
| 220 |
+
// Find the first free block that fits after alignment.
|
| 221 |
+
let mut chosen: Option<usize> = None;
|
| 222 |
+
for (i, &(blk_off, blk_size)) in self.free_list.iter().enumerate() {
|
| 223 |
+
let aligned_start = Self::align_up(blk_off, align);
|
| 224 |
+
let padding = aligned_start - blk_off;
|
| 225 |
+
if blk_size >= aligned_size + padding {
|
| 226 |
+
chosen = Some(i);
|
| 227 |
+
break;
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
let idx = chosen?;
|
| 232 |
+
let (blk_off, blk_size) = self.free_list[idx];
|
| 233 |
+
let start = Self::align_up(blk_off, align);
|
| 234 |
+
let padding = start - blk_off;
|
| 235 |
+
let consumed = aligned_size + padding;
|
| 236 |
+
|
| 237 |
+
self.free_list.remove(idx);
|
| 238 |
+
|
| 239 |
+
// Return any leading padding as a free fragment.
|
| 240 |
+
if padding > 0 {
|
| 241 |
+
self.free_list.push((blk_off, padding));
|
| 242 |
+
}
|
| 243 |
+
// Return any trailing space.
|
| 244 |
+
let trailing_off = start + aligned_size;
|
| 245 |
+
let trailing_size = blk_size - consumed;
|
| 246 |
+
if trailing_size > 0 {
|
| 247 |
+
self.free_list.push((trailing_off, trailing_size));
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
self.free_list.sort_by_key(|&(off, _)| off);
|
| 251 |
+
self.allocations.insert(region_id, (start, aligned_size));
|
| 252 |
+
|
| 253 |
+
// SAFETY: `start` is within [0, self.size) because we checked blk_size
|
| 254 |
+
// above. base is a valid mmap pointer for at least `self.size` bytes.
|
| 255 |
+
Some(unsafe { self.base.add(start) })
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
/// Attempt to allocate at a specific byte offset (used by LayoutPlan).
|
| 259 |
+
/// The requested range must lie entirely within a single free block.
|
| 260 |
+
pub fn allocate_at(
|
| 261 |
+
&mut self,
|
| 262 |
+
region_id: u32,
|
| 263 |
+
offset: usize,
|
| 264 |
+
size: usize,
|
| 265 |
+
) -> Option<*mut u8> {
|
| 266 |
+
if self.allocations.contains_key(®ion_id) {
|
| 267 |
+
return None;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
let align = self.cache_line_size;
|
| 271 |
+
let aligned_start = Self::align_up(offset, align);
|
| 272 |
+
let aligned_size = Self::align_up(size, align);
|
| 273 |
+
|
| 274 |
+
if aligned_start + aligned_size > self.size {
|
| 275 |
+
return None;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
// Find a free block that fully contains [aligned_start, aligned_start + aligned_size).
|
| 279 |
+
let found = self.free_list.iter().enumerate().find(|(_, &(blk_off, blk_size))| {
|
| 280 |
+
blk_off <= aligned_start && aligned_start + aligned_size <= blk_off + blk_size
|
| 281 |
+
});
|
| 282 |
+
|
| 283 |
+
let (idx, &(blk_off, blk_size)) = found?;
|
| 284 |
+
self.free_list.remove(idx);
|
| 285 |
+
|
| 286 |
+
// Return leading fragment.
|
| 287 |
+
if aligned_start > blk_off {
|
| 288 |
+
self.free_list.push((blk_off, aligned_start - blk_off));
|
| 289 |
+
}
|
| 290 |
+
// Return trailing fragment.
|
| 291 |
+
let end = aligned_start + aligned_size;
|
| 292 |
+
let blk_end = blk_off + blk_size;
|
| 293 |
+
if end < blk_end {
|
| 294 |
+
self.free_list.push((end, blk_end - end));
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
self.free_list.sort_by_key(|&(off, _)| off);
|
| 298 |
+
self.allocations.insert(region_id, (aligned_start, aligned_size));
|
| 299 |
+
|
| 300 |
+
// SAFETY: aligned_start is within the mmap'd region (checked above).
|
| 301 |
+
Some(unsafe { self.base.add(aligned_start) })
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
/// Return a region's allocation to the free list, then coalesce adjacent
|
| 305 |
+
/// free blocks so fragmentation doesn't grow unboundedly.
|
| 306 |
+
pub fn free(&mut self, region_id: u32) {
|
| 307 |
+
if let Some((offset, size)) = self.allocations.remove(®ion_id) {
|
| 308 |
+
self.free_list.push((offset, size));
|
| 309 |
+
self.free_list.sort_by_key(|&(off, _)| off);
|
| 310 |
+
self.coalesce();
|
| 311 |
+
}
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
/// Merge adjacent free blocks. Called after every `free`.
|
| 315 |
+
fn coalesce(&mut self) {
|
| 316 |
+
if self.free_list.len() < 2 {
|
| 317 |
+
return;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
let mut merged: Vec<(usize, usize)> = Vec::with_capacity(self.free_list.len());
|
| 321 |
+
let mut iter = self.free_list.drain(..);
|
| 322 |
+
let (mut cur_off, mut cur_size) = iter.next().unwrap();
|
| 323 |
+
|
| 324 |
+
for (off, sz) in iter {
|
| 325 |
+
if off == cur_off + cur_size {
|
| 326 |
+
// Adjacent — extend current block.
|
| 327 |
+
cur_size += sz;
|
| 328 |
+
} else {
|
| 329 |
+
merged.push((cur_off, cur_size));
|
| 330 |
+
cur_off = off;
|
| 331 |
+
cur_size = sz;
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
merged.push((cur_off, cur_size));
|
| 335 |
+
self.free_list = merged;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
/// Move a region's data to `new_offset` within the arena (memcpy).
|
| 339 |
+
/// Used by the sleep consolidation pass to tighten the layout.
|
| 340 |
+
/// Returns `true` on success, `false` if the move isn't possible.
|
| 341 |
+
pub fn relocate(&mut self, region_id: u32, new_offset: usize) -> bool {
|
| 342 |
+
let (old_offset, size) = match self.allocations.get(®ion_id).copied() {
|
| 343 |
+
Some(v) => v,
|
| 344 |
+
None => return false,
|
| 345 |
+
};
|
| 346 |
+
|
| 347 |
+
let aligned_new = Self::align_up(new_offset, self.cache_line_size);
|
| 348 |
+
|
| 349 |
+
if aligned_new == old_offset {
|
| 350 |
+
return true; // already there
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
if aligned_new + size > self.size {
|
| 354 |
+
return false;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
// The destination range must be free (or be the source itself).
|
| 358 |
+
// We check by temporarily freeing the source and trying allocate_at.
|
| 359 |
+
// To avoid double-borrow, we do it manually.
|
| 360 |
+
|
| 361 |
+
// Check destination is free.
|
| 362 |
+
let dest_free = self.free_list.iter().any(|&(blk_off, blk_size)| {
|
| 363 |
+
blk_off <= aligned_new && aligned_new + size <= blk_off + blk_size
|
| 364 |
+
});
|
| 365 |
+
if !dest_free {
|
| 366 |
+
return false;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
// SAFETY: Both source and destination are within [base, base+size).
|
| 370 |
+
// We checked all offsets above. src and dst may not overlap — if they
|
| 371 |
+
// do, memmove semantics are required; we use copy_nonoverlapping only
|
| 372 |
+
// when the ranges are disjoint, which is guaranteed because aligned_new
|
| 373 |
+
// comes from the free list (i.e., it does not overlap old_offset..old_offset+size).
|
| 374 |
+
unsafe {
|
| 375 |
+
let src = self.base.add(old_offset);
|
| 376 |
+
let dst = self.base.add(aligned_new);
|
| 377 |
+
std::ptr::copy(src, dst, size); // copy handles overlap correctly
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
// Update the free list: old range becomes free, new range consumed.
|
| 381 |
+
// We already verified new range is free, so remove it from free list.
|
| 382 |
+
let dest_idx = self
|
| 383 |
+
.free_list
|
| 384 |
+
.iter()
|
| 385 |
+
.position(|&(blk_off, blk_size)| {
|
| 386 |
+
blk_off <= aligned_new && aligned_new + size <= blk_off + blk_size
|
| 387 |
+
})
|
| 388 |
+
.unwrap();
|
| 389 |
+
let (blk_off, blk_size) = self.free_list.remove(dest_idx);
|
| 390 |
+
|
| 391 |
+
if blk_off < aligned_new {
|
| 392 |
+
self.free_list.push((blk_off, aligned_new - blk_off));
|
| 393 |
+
}
|
| 394 |
+
let blk_end = blk_off + blk_size;
|
| 395 |
+
let dest_end = aligned_new + size;
|
| 396 |
+
if dest_end < blk_end {
|
| 397 |
+
self.free_list.push((dest_end, blk_end - dest_end));
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
// Old range is now free.
|
| 401 |
+
self.free_list.push((old_offset, size));
|
| 402 |
+
self.free_list.sort_by_key(|&(off, _)| off);
|
| 403 |
+
self.coalesce();
|
| 404 |
+
|
| 405 |
+
self.allocations.insert(region_id, (aligned_new, size));
|
| 406 |
+
true
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
/// Get the current pointer for a region.
|
| 410 |
+
pub fn get_ptr(&self, region_id: u32) -> Option<*mut u8> {
|
| 411 |
+
self.allocations.get(®ion_id).map(|&(off, _)| {
|
| 412 |
+
// SAFETY: offset was validated at allocation time and is within
|
| 413 |
+
// the mmap'd region.
|
| 414 |
+
unsafe { self.base.add(off) }
|
| 415 |
+
})
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
/// Returns `(total_size, allocated_bytes, free_bytes)`.
|
| 419 |
+
pub fn get_stats(&self) -> (usize, usize, usize) {
|
| 420 |
+
let allocated: usize = self.allocations.values().map(|&(_, sz)| sz).sum();
|
| 421 |
+
let free: usize = self.free_list.iter().map(|&(_, sz)| sz).sum();
|
| 422 |
+
(self.size, allocated, free)
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
/// For each node that follows `current_node` in `chain`, emit a
|
| 426 |
+
/// `PrefetchInstruction` based on cumulative timing from the current node.
|
| 427 |
+
///
|
| 428 |
+
/// The prefetch addresses come from the arena's allocation map so they
|
| 429 |
+
/// point at actual data — regions not yet allocated are skipped.
|
| 430 |
+
pub fn prefetch_chain(
|
| 431 |
+
&self,
|
| 432 |
+
chain: &CausalChain,
|
| 433 |
+
current_node: u32,
|
| 434 |
+
) -> Vec<PrefetchInstruction> {
|
| 435 |
+
let mut instructions = Vec::new();
|
| 436 |
+
|
| 437 |
+
// Find the position of current_node in the chain.
|
| 438 |
+
let pos = match chain.nodes.iter().position(|&n| n == current_node) {
|
| 439 |
+
Some(p) => p,
|
| 440 |
+
None => return instructions,
|
| 441 |
+
};
|
| 442 |
+
|
| 443 |
+
// Accumulate timing from current_node outward.
|
| 444 |
+
let mut cumulative_ms = 0.0_f64;
|
| 445 |
+
|
| 446 |
+
for i in (pos + 1)..chain.nodes.len() {
|
| 447 |
+
// timing[i-1] is the gap between node[i-1] and node[i].
|
| 448 |
+
if let Some(&gap) = chain.timings_ms.get(i - 1) {
|
| 449 |
+
cumulative_ms += gap;
|
| 450 |
+
} else {
|
| 451 |
+
break;
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
let next_node = chain.nodes[i];
|
| 455 |
+
|
| 456 |
+
if let Some(&(offset, _)) = self.allocations.get(&next_node) {
|
| 457 |
+
let address = offset; // offset into arena; caller adds base if needed
|
| 458 |
+
let hint = PrefetchHint::from_timing(cumulative_ms);
|
| 459 |
+
|
| 460 |
+
// Emit the actual x86_64 prefetch instruction when possible.
|
| 461 |
+
#[cfg(target_arch = "x86_64")]
|
| 462 |
+
{
|
| 463 |
+
use core::arch::x86_64::{_mm_prefetch, _MM_HINT_T0, _MM_HINT_T1, _MM_HINT_T2};
|
| 464 |
+
// SAFETY: The pointer is within the mmap'd arena and the
|
| 465 |
+
// data is valid memory. Prefetch faults are suppressed by
|
| 466 |
+
// the CPU; worst case it's a no-op.
|
| 467 |
+
unsafe {
|
| 468 |
+
let ptr = self.base.add(offset) as *const i8;
|
| 469 |
+
match hint {
|
| 470 |
+
PrefetchHint::L1 => _mm_prefetch(ptr, _MM_HINT_T0),
|
| 471 |
+
PrefetchHint::L2 => _mm_prefetch(ptr, _MM_HINT_T1),
|
| 472 |
+
PrefetchHint::L3 => _mm_prefetch(ptr, _MM_HINT_T2),
|
| 473 |
+
PrefetchHint::None => {} // not worth it
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
instructions.push(PrefetchInstruction {
|
| 479 |
+
address,
|
| 480 |
+
hint,
|
| 481 |
+
predicted_ms: cumulative_ms,
|
| 482 |
+
});
|
| 483 |
+
}
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
instructions
|
| 487 |
+
}
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
impl Drop for CondensateArena {
|
| 491 |
+
fn drop(&mut self) {
|
| 492 |
+
if !self.base.is_null() {
|
| 493 |
+
// SAFETY: `self.base` was obtained from `libc::mmap` with
|
| 494 |
+
// `self.size` bytes. We own this mapping exclusively and are now
|
| 495 |
+
// releasing it. No references into the arena can outlive `self`
|
| 496 |
+
// because the raw pointers returned by `allocate`/`get_ptr` are
|
| 497 |
+
// not lifetime-tracked — callers must ensure they don't outlive
|
| 498 |
+
// the arena.
|
| 499 |
+
unsafe {
|
| 500 |
+
libc::munmap(self.base as *mut libc::c_void, self.size);
|
| 501 |
+
}
|
| 502 |
+
}
|
| 503 |
+
}
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 507 |
+
// Tests
|
| 508 |
+
// ────────────────────────────────────────────────────────────────────────────
|
| 509 |
+
|
| 510 |
+
#[cfg(test)]
|
| 511 |
+
mod tests {
|
| 512 |
+
use super::*;
|
| 513 |
+
|
| 514 |
+
// ── PrefetchHint ─────────────────────────────────────────────────────────
|
| 515 |
+
|
| 516 |
+
#[test]
|
| 517 |
+
fn locality_test_prefetch_hint_mapping() {
|
| 518 |
+
assert_eq!(PrefetchHint::from_timing(0.5), PrefetchHint::L1);
|
| 519 |
+
assert_eq!(PrefetchHint::from_timing(3.0), PrefetchHint::L2);
|
| 520 |
+
assert_eq!(PrefetchHint::from_timing(10.0), PrefetchHint::L3);
|
| 521 |
+
assert_eq!(PrefetchHint::from_timing(50.0), PrefetchHint::None);
|
| 522 |
+
|
| 523 |
+
// Boundary checks
|
| 524 |
+
assert_eq!(PrefetchHint::from_timing(0.999), PrefetchHint::L1);
|
| 525 |
+
assert_eq!(PrefetchHint::from_timing(1.0), PrefetchHint::L2);
|
| 526 |
+
assert_eq!(PrefetchHint::from_timing(5.0), PrefetchHint::L3);
|
| 527 |
+
assert_eq!(PrefetchHint::from_timing(20.0), PrefetchHint::L3);
|
| 528 |
+
assert_eq!(PrefetchHint::from_timing(20.001), PrefetchHint::None);
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
// ── LayoutPlan ───────────────────────────────────────────────────────────
|
| 532 |
+
|
| 533 |
+
#[test]
|
| 534 |
+
fn locality_test_layout_chain_adjacency() {
|
| 535 |
+
// Chain A→B→C should produce consecutive offsets 64 bytes apart.
|
| 536 |
+
let chain = CausalChain::new(
|
| 537 |
+
vec![1, 2, 3],
|
| 538 |
+
vec![0.5, 0.5],
|
| 539 |
+
0.9,
|
| 540 |
+
);
|
| 541 |
+
let plan = LayoutPlan::compute(&[chain]);
|
| 542 |
+
|
| 543 |
+
let a = plan.get_placement(1).expect("A not placed");
|
| 544 |
+
let b = plan.get_placement(2).expect("B not placed");
|
| 545 |
+
let c = plan.get_placement(3).expect("C not placed");
|
| 546 |
+
|
| 547 |
+
// Each slot is one cache line (64 bytes).
|
| 548 |
+
assert_eq!(b, a + 64, "B should be one cache line after A");
|
| 549 |
+
assert_eq!(c, a + 128, "C should be two cache lines after A");
|
| 550 |
+
|
| 551 |
+
// All three should be in the same group.
|
| 552 |
+
let group = plan.get_chain_group(1).expect("no group for A");
|
| 553 |
+
assert!(group.contains(&1));
|
| 554 |
+
assert!(group.contains(&2));
|
| 555 |
+
assert!(group.contains(&3));
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
#[test]
|
| 559 |
+
fn locality_test_layout_shared_node() {
|
| 560 |
+
// Node 2 appears in both chains; it should get a stable placement.
|
| 561 |
+
let chain1 = CausalChain::new(vec![1, 2, 3], vec![1.0, 1.0], 0.9);
|
| 562 |
+
let chain2 = CausalChain::new(vec![4, 2, 5], vec![1.0, 1.0], 0.5);
|
| 563 |
+
let plan = LayoutPlan::compute(&[chain1, chain2]);
|
| 564 |
+
|
| 565 |
+
// All five nodes should have placements.
|
| 566 |
+
for id in [1u32, 2, 3, 4, 5] {
|
| 567 |
+
assert!(plan.get_placement(id).is_some(), "node {id} not placed");
|
| 568 |
+
}
|
| 569 |
+
// Node 2 should be in a group.
|
| 570 |
+
assert!(plan.get_chain_group(2).is_some());
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
// ── CondensateArena ──────────────────────────────────────────────────────
|
| 574 |
+
|
| 575 |
+
#[test]
|
| 576 |
+
fn locality_test_arena_allocate_aligned() {
|
| 577 |
+
let mut arena = CondensateArena::new(4096);
|
| 578 |
+
for id in 0u32..8 {
|
| 579 |
+
let ptr = arena.allocate(id, 100).expect("allocation failed");
|
| 580 |
+
assert_eq!(
|
| 581 |
+
ptr as usize % 64,
|
| 582 |
+
0,
|
| 583 |
+
"allocation for region {id} is not 64-byte aligned"
|
| 584 |
+
);
|
| 585 |
+
}
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
#[test]
|
| 589 |
+
fn locality_test_arena_allocate_free_reuse() {
|
| 590 |
+
let mut arena = CondensateArena::new(4096);
|
| 591 |
+
|
| 592 |
+
let ptr1 = arena.allocate(1, 64).expect("first alloc");
|
| 593 |
+
let off1 = ptr1 as usize;
|
| 594 |
+
|
| 595 |
+
arena.free(1);
|
| 596 |
+
|
| 597 |
+
let ptr2 = arena.allocate(2, 64).expect("second alloc after free");
|
| 598 |
+
let off2 = ptr2 as usize;
|
| 599 |
+
|
| 600 |
+
// After a free + coalesce, the same offset should be reused.
|
| 601 |
+
assert_eq!(off1, off2, "freed space should be reused");
|
| 602 |
+
|
| 603 |
+
let (total, allocated, free) = arena.get_stats();
|
| 604 |
+
assert_eq!(total, 4096);
|
| 605 |
+
assert!(allocated > 0);
|
| 606 |
+
assert_eq!(total, allocated + free);
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
#[test]
|
| 610 |
+
fn locality_test_arena_relocate() {
|
| 611 |
+
let mut arena = CondensateArena::new(4096);
|
| 612 |
+
|
| 613 |
+
// Allocate region 1 and write a known pattern.
|
| 614 |
+
let ptr = arena.allocate(1, 64).expect("alloc");
|
| 615 |
+
// SAFETY: ptr is valid for 64 bytes — we just allocated it.
|
| 616 |
+
unsafe {
|
| 617 |
+
for i in 0..64usize {
|
| 618 |
+
ptr.add(i).write(i as u8);
|
| 619 |
+
}
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
// Allocate and free region 2 to open a gap at a higher offset.
|
| 623 |
+
let ptr2 = arena.allocate(2, 64).expect("alloc 2");
|
| 624 |
+
let new_offset = ptr2 as usize - arena.base as usize;
|
| 625 |
+
arena.free(2);
|
| 626 |
+
|
| 627 |
+
// Relocate region 1 into that gap.
|
| 628 |
+
assert!(arena.relocate(1, new_offset), "relocate failed");
|
| 629 |
+
|
| 630 |
+
// Verify data integrity.
|
| 631 |
+
let moved_ptr = arena.get_ptr(1).expect("ptr after relocate");
|
| 632 |
+
// SAFETY: moved_ptr is valid for 64 bytes after a successful relocate.
|
| 633 |
+
unsafe {
|
| 634 |
+
for i in 0..64usize {
|
| 635 |
+
assert_eq!(
|
| 636 |
+
moved_ptr.add(i).read(),
|
| 637 |
+
i as u8,
|
| 638 |
+
"data corruption at byte {i} after relocate"
|
| 639 |
+
);
|
| 640 |
+
}
|
| 641 |
+
}
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
#[test]
|
| 645 |
+
fn locality_test_arena_coalesce() {
|
| 646 |
+
let mut arena = CondensateArena::new(4096);
|
| 647 |
+
|
| 648 |
+
// Fill arena with three adjacent regions.
|
| 649 |
+
arena.allocate(1, 64).unwrap();
|
| 650 |
+
arena.allocate(2, 64).unwrap();
|
| 651 |
+
arena.allocate(3, 64).unwrap();
|
| 652 |
+
|
| 653 |
+
// Free all three — they should coalesce into one big block.
|
| 654 |
+
arena.free(1);
|
| 655 |
+
arena.free(2);
|
| 656 |
+
arena.free(3);
|
| 657 |
+
|
| 658 |
+
// After coalescing we should be able to allocate a region larger than
|
| 659 |
+
// one slot (e.g., 192 bytes spanning the three former slots).
|
| 660 |
+
let big = arena.allocate(99, 192);
|
| 661 |
+
assert!(big.is_some(), "coalesced free space should satisfy 192-byte alloc");
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
// ── Prefetch chain ───────────────────────────────────────────────────────
|
| 665 |
+
|
| 666 |
+
#[test]
|
| 667 |
+
fn locality_test_prefetch_chain_generation() {
|
| 668 |
+
// Chain: A(0) →0.5ms→ B(1) →3ms→ C(2)
|
| 669 |
+
// From A: expect prefetch for B (L1, 0.5ms) and C (L2, 3.5ms cumulative).
|
| 670 |
+
let chain = CausalChain::new(
|
| 671 |
+
vec![10, 11, 12],
|
| 672 |
+
vec![0.5, 3.0],
|
| 673 |
+
0.95,
|
| 674 |
+
);
|
| 675 |
+
|
| 676 |
+
let mut arena = CondensateArena::new(4096);
|
| 677 |
+
// Allocate all nodes so addresses are available.
|
| 678 |
+
arena.allocate(10, 64).unwrap();
|
| 679 |
+
arena.allocate(11, 64).unwrap();
|
| 680 |
+
arena.allocate(12, 64).unwrap();
|
| 681 |
+
|
| 682 |
+
let instrs = arena.prefetch_chain(&chain, 10);
|
| 683 |
+
assert_eq!(instrs.len(), 2, "should emit prefetch for B and C");
|
| 684 |
+
|
| 685 |
+
// First instruction: B, 0.5ms → L1
|
| 686 |
+
assert_eq!(instrs[0].hint, PrefetchHint::L1);
|
| 687 |
+
assert!((instrs[0].predicted_ms - 0.5).abs() < 1e-9);
|
| 688 |
+
|
| 689 |
+
// Second instruction: C, 3.5ms cumulative → L2
|
| 690 |
+
assert_eq!(instrs[1].hint, PrefetchHint::L2);
|
| 691 |
+
assert!((instrs[1].predicted_ms - 3.5).abs() < 1e-9);
|
| 692 |
+
|
| 693 |
+
// From B: only C should be prefetched.
|
| 694 |
+
let instrs_b = arena.prefetch_chain(&chain, 11);
|
| 695 |
+
assert_eq!(instrs_b.len(), 1);
|
| 696 |
+
// 3.0ms is in [1.0, 5.0) → L2
|
| 697 |
+
assert_eq!(instrs_b[0].hint, PrefetchHint::L2);
|
| 698 |
+
|
| 699 |
+
// From C (tail): no prefetch.
|
| 700 |
+
let instrs_c = arena.prefetch_chain(&chain, 12);
|
| 701 |
+
assert!(instrs_c.is_empty());
|
| 702 |
+
|
| 703 |
+
// From a node not in chain: no prefetch.
|
| 704 |
+
let instrs_x = arena.prefetch_chain(&chain, 99);
|
| 705 |
+
assert!(instrs_x.is_empty());
|
| 706 |
+
}
|
| 707 |
+
}
|
|
@@ -19,9 +19,20 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
|
| 19 |
use std::sync::Mutex;
|
| 20 |
use std::collections::HashMap;
|
| 21 |
use std::time::Instant;
|
|
|
|
|
|
|
| 22 |
|
| 23 |
use crate::pipeline::{Pipeline, PipelineConfig};
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
/// Global state for the membrane
|
| 26 |
static INITIALIZED: AtomicBool = AtomicBool::new(false);
|
| 27 |
|
|
@@ -73,10 +84,51 @@ pub struct MembraneState {
|
|
| 73 |
sample_counter: u32,
|
| 74 |
/// Minimum allocation size to track (skip tiny allocs)
|
| 75 |
min_track_size: usize,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
impl MembraneState {
|
| 79 |
pub fn new() -> Self {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
Self {
|
| 81 |
start: Instant::now(),
|
| 82 |
active: HashMap::with_capacity(10_000),
|
|
@@ -95,10 +147,107 @@ impl MembraneState {
|
|
| 95 |
sample_rate: 100, // Track 1 in 100 allocs by default
|
| 96 |
sample_counter: 0,
|
| 97 |
min_track_size: 4096, // Skip allocs under 4KB
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
}
|
| 99 |
}
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
self.start.elapsed().as_nanos() as u64
|
| 103 |
}
|
| 104 |
|
|
@@ -248,6 +397,13 @@ impl MembraneSummary {
|
|
| 248 |
}
|
| 249 |
}
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
/// Global membrane state behind a mutex
|
| 252 |
static MEMBRANE: std::sync::LazyLock<Mutex<MembraneState>> =
|
| 253 |
std::sync::LazyLock::new(|| Mutex::new(MembraneState::new()));
|
|
@@ -260,8 +416,6 @@ static PIPELINE: std::sync::LazyLock<Mutex<Pipeline>> =
|
|
| 260 |
static SCAN_COUNTER: AtomicU64 = AtomicU64::new(0);
|
| 261 |
const SCAN_INTERVAL: u64 = 1_000; // scan every 1,000 allocs
|
| 262 |
|
| 263 |
-
// --- LD_PRELOAD hook functions ---
|
| 264 |
-
|
| 265 |
/// Get the original malloc function
|
| 266 |
unsafe fn real_malloc(size: size_t) -> *mut c_void {
|
| 267 |
type MallocFn = unsafe extern "C" fn(size_t) -> *mut c_void;
|
|
@@ -344,9 +498,24 @@ pub unsafe extern "C" fn free(ptr: *mut c_void) {
|
|
| 344 |
unsafe { real_free(ptr) }
|
| 345 |
}
|
| 346 |
|
| 347 |
-
/// Print full pipeline summary on process exit
|
| 348 |
#[unsafe(no_mangle)]
|
| 349 |
pub extern "C" fn condensate_summary() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
// Membrane stats
|
| 351 |
if let Ok(state) = MEMBRANE.lock() {
|
| 352 |
state.summary().print();
|
|
@@ -363,13 +532,16 @@ pub extern "C" fn condensate_summary() {
|
|
| 363 |
static INIT: extern "C" fn() = {
|
| 364 |
extern "C" fn init() {
|
| 365 |
INITIALIZED.store(true, Ordering::SeqCst);
|
| 366 |
-
|
|
|
|
| 367 |
|
| 368 |
unsafe { libc::atexit(condensate_summary) };
|
| 369 |
}
|
| 370 |
init
|
| 371 |
};
|
| 372 |
|
|
|
|
|
|
|
| 373 |
#[cfg(test)]
|
| 374 |
mod tests {
|
| 375 |
use super::*;
|
|
@@ -421,4 +593,105 @@ mod tests {
|
|
| 421 |
let total_bucket_count: u64 = summary.buckets.iter().map(|b| b.count).sum();
|
| 422 |
assert_eq!(total_bucket_count, 5);
|
| 423 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
}
|
|
|
|
| 19 |
use std::sync::Mutex;
|
| 20 |
use std::collections::HashMap;
|
| 21 |
use std::time::Instant;
|
| 22 |
+
use std::fs;
|
| 23 |
+
use std::io::Write;
|
| 24 |
|
| 25 |
use crate::pipeline::{Pipeline, PipelineConfig};
|
| 26 |
|
| 27 |
+
/// Operating mode for the membrane
|
| 28 |
+
#[derive(Clone, Copy, PartialEq, Debug)]
|
| 29 |
+
pub enum MembraneMode {
|
| 30 |
+
/// Record observations but don't feed the condenser
|
| 31 |
+
ObserveOnly,
|
| 32 |
+
/// Full condensation — observation + active pipeline feeding
|
| 33 |
+
Active,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
/// Global state for the membrane
|
| 37 |
static INITIALIZED: AtomicBool = AtomicBool::new(false);
|
| 38 |
|
|
|
|
| 84 |
sample_counter: u32,
|
| 85 |
/// Minimum allocation size to track (skip tiny allocs)
|
| 86 |
min_track_size: usize,
|
| 87 |
+
|
| 88 |
+
// --- Observe-only mode ---
|
| 89 |
+
/// Current operating mode (starts ObserveOnly)
|
| 90 |
+
pub mode: MembraneMode,
|
| 91 |
+
|
| 92 |
+
// --- Process identification ---
|
| 93 |
+
/// Name of this process (from /proc/self/exe)
|
| 94 |
+
pub process_name: String,
|
| 95 |
+
/// PID of this process
|
| 96 |
+
pub process_id: u32,
|
| 97 |
+
|
| 98 |
+
// --- Confidence gating ---
|
| 99 |
+
/// Number of observation cycles recorded
|
| 100 |
+
pub observation_cycles: u64,
|
| 101 |
+
/// Minimum cycles before mode can become Active
|
| 102 |
+
pub min_observation_cycles: u64,
|
| 103 |
+
|
| 104 |
+
// --- Self-interference detection ---
|
| 105 |
+
/// Timestamp (ns) when we transitioned from ObserveOnly → Active
|
| 106 |
+
pub engagement_timestamp_ns: Option<u64>,
|
| 107 |
+
|
| 108 |
+
// --- Canary system ---
|
| 109 |
+
/// Path to the active canary file (if armed)
|
| 110 |
+
pub canary_file: Option<String>,
|
| 111 |
+
/// How long (seconds) before a canary is considered expired
|
| 112 |
+
pub canary_timeout_s: u64,
|
| 113 |
+
|
| 114 |
+
// --- Quiet mode ---
|
| 115 |
+
/// Suppress all stdout/stderr output when true
|
| 116 |
+
pub quiet: bool,
|
| 117 |
}
|
| 118 |
|
| 119 |
impl MembraneState {
|
| 120 |
pub fn new() -> Self {
|
| 121 |
+
// Resolve process name from /proc/self/exe; fallback to "unknown"
|
| 122 |
+
let process_name = std::fs::read_link("/proc/self/exe")
|
| 123 |
+
.ok()
|
| 124 |
+
.and_then(|p| p.file_name().map(|n| n.to_string_lossy().into_owned()))
|
| 125 |
+
.unwrap_or_else(|| "unknown".to_string());
|
| 126 |
+
|
| 127 |
+
let process_id = std::process::id();
|
| 128 |
+
|
| 129 |
+
// Quiet mode: suppress output when CONDENSATE_QUIET is set
|
| 130 |
+
let quiet = std::env::var("CONDENSATE_QUIET").is_ok();
|
| 131 |
+
|
| 132 |
Self {
|
| 133 |
start: Instant::now(),
|
| 134 |
active: HashMap::with_capacity(10_000),
|
|
|
|
| 147 |
sample_rate: 100, // Track 1 in 100 allocs by default
|
| 148 |
sample_counter: 0,
|
| 149 |
min_track_size: 4096, // Skip allocs under 4KB
|
| 150 |
+
mode: MembraneMode::ObserveOnly,
|
| 151 |
+
process_name,
|
| 152 |
+
process_id,
|
| 153 |
+
observation_cycles: 0,
|
| 154 |
+
min_observation_cycles: 1000,
|
| 155 |
+
engagement_timestamp_ns: None,
|
| 156 |
+
canary_file: None,
|
| 157 |
+
canary_timeout_s: 60,
|
| 158 |
+
quiet,
|
| 159 |
+
}
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
// --- Observe-only mode ---
|
| 163 |
+
|
| 164 |
+
/// Return the current operating mode
|
| 165 |
+
pub fn mode(&self) -> MembraneMode {
|
| 166 |
+
self.mode
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/// Set the operating mode directly
|
| 170 |
+
pub fn set_mode(&mut self, mode: MembraneMode) {
|
| 171 |
+
self.mode = mode;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
// --- Confidence gating ---
|
| 175 |
+
|
| 176 |
+
/// Increment the observation cycle counter
|
| 177 |
+
pub fn record_cycle(&mut self) {
|
| 178 |
+
self.observation_cycles += 1;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
/// True once enough cycles have been observed to trust the data
|
| 182 |
+
pub fn is_confident(&self) -> bool {
|
| 183 |
+
self.observation_cycles >= self.min_observation_cycles
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
// --- Self-interference detection ---
|
| 187 |
+
|
| 188 |
+
/// Report this process as potentially dangerous; append to the blacklist file
|
| 189 |
+
pub fn report_crash(&self) {
|
| 190 |
+
if let Ok(mut f) = std::fs::OpenOptions::new()
|
| 191 |
+
.create(true)
|
| 192 |
+
.append(true)
|
| 193 |
+
.open("/tmp/condensate_blacklist")
|
| 194 |
+
{
|
| 195 |
+
let _ = writeln!(f, "{}", self.process_name);
|
| 196 |
}
|
| 197 |
}
|
| 198 |
|
| 199 |
+
/// True if this process's name appears in the blacklist file
|
| 200 |
+
pub fn is_blacklisted(&self) -> bool {
|
| 201 |
+
fs::read_to_string("/tmp/condensate_blacklist")
|
| 202 |
+
.map(|contents| {
|
| 203 |
+
contents.lines().any(|line| line == self.process_name)
|
| 204 |
+
})
|
| 205 |
+
.unwrap_or(false)
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// --- Canary system ---
|
| 209 |
+
|
| 210 |
+
/// Arm the canary: write a file with the engagement timestamp and timeout.
|
| 211 |
+
/// Also records engagement_timestamp_ns on the state and transitions to Active.
|
| 212 |
+
pub fn arm_canary(&mut self) {
|
| 213 |
+
let now_ns = self.elapsed_ns();
|
| 214 |
+
self.engagement_timestamp_ns = Some(now_ns);
|
| 215 |
+
self.mode = MembraneMode::Active;
|
| 216 |
+
|
| 217 |
+
let path = format!("/tmp/condensate_canary_{}", self.process_id);
|
| 218 |
+
if let Ok(mut f) = fs::File::create(&path) {
|
| 219 |
+
let _ = writeln!(f, "engagement_ns={}", now_ns);
|
| 220 |
+
let _ = writeln!(f, "timeout_s={}", self.canary_timeout_s);
|
| 221 |
+
}
|
| 222 |
+
self.canary_file = Some(path);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
/// Confirm health: delete the canary file
|
| 226 |
+
pub fn confirm_canary(&mut self) {
|
| 227 |
+
if let Some(ref path) = self.canary_file {
|
| 228 |
+
let _ = fs::remove_file(path);
|
| 229 |
+
}
|
| 230 |
+
self.canary_file = None;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
/// True if the canary was armed and has now exceeded its timeout
|
| 234 |
+
pub fn check_canary_expired(&self, now_ns: u64) -> bool {
|
| 235 |
+
match self.engagement_timestamp_ns {
|
| 236 |
+
Some(ts) => {
|
| 237 |
+
let elapsed_s = now_ns.saturating_sub(ts) / 1_000_000_000;
|
| 238 |
+
elapsed_s >= self.canary_timeout_s
|
| 239 |
+
}
|
| 240 |
+
None => false,
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
/// Rollback: revert to ObserveOnly and clean up the canary file
|
| 245 |
+
pub fn rollback(&mut self) {
|
| 246 |
+
self.mode = MembraneMode::ObserveOnly;
|
| 247 |
+
self.confirm_canary(); // deletes the canary file if present
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
pub fn elapsed_ns(&self) -> u64 {
|
| 251 |
self.start.elapsed().as_nanos() as u64
|
| 252 |
}
|
| 253 |
|
|
|
|
| 397 |
}
|
| 398 |
}
|
| 399 |
|
| 400 |
+
// --- LD_PRELOAD hook functions ---
|
| 401 |
+
// Only compiled when building the standalone preload .so.
|
| 402 |
+
// NOT active during tests or when used as a Python module.
|
| 403 |
+
#[cfg(feature = "preload")]
|
| 404 |
+
mod preload_hooks {
|
| 405 |
+
use super::*;
|
| 406 |
+
|
| 407 |
/// Global membrane state behind a mutex
|
| 408 |
static MEMBRANE: std::sync::LazyLock<Mutex<MembraneState>> =
|
| 409 |
std::sync::LazyLock::new(|| Mutex::new(MembraneState::new()));
|
|
|
|
| 416 |
static SCAN_COUNTER: AtomicU64 = AtomicU64::new(0);
|
| 417 |
const SCAN_INTERVAL: u64 = 1_000; // scan every 1,000 allocs
|
| 418 |
|
|
|
|
|
|
|
| 419 |
/// Get the original malloc function
|
| 420 |
unsafe fn real_malloc(size: size_t) -> *mut c_void {
|
| 421 |
type MallocFn = unsafe extern "C" fn(size_t) -> *mut c_void;
|
|
|
|
| 498 |
unsafe { real_free(ptr) }
|
| 499 |
}
|
| 500 |
|
| 501 |
+
/// Print full pipeline summary on process exit — only if process ran long enough
|
| 502 |
#[unsafe(no_mangle)]
|
| 503 |
pub extern "C" fn condensate_summary() {
|
| 504 |
+
// Only print for long-lived processes (>5 seconds)
|
| 505 |
+
// Short-lived commands (ls, grep, cat) shouldn't flood stderr
|
| 506 |
+
let (elapsed, quiet) = MEMBRANE.try_lock()
|
| 507 |
+
.map(|s| (s.elapsed_ns(), s.quiet))
|
| 508 |
+
.unwrap_or((0, false));
|
| 509 |
+
|
| 510 |
+
if elapsed < 5_000_000_000 {
|
| 511 |
+
return; // process ran < 5 seconds, skip summary
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
// Honour quiet mode — suppress all output
|
| 515 |
+
if quiet {
|
| 516 |
+
return;
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
// Membrane stats
|
| 520 |
if let Ok(state) = MEMBRANE.lock() {
|
| 521 |
state.summary().print();
|
|
|
|
| 532 |
static INIT: extern "C" fn() = {
|
| 533 |
extern "C" fn init() {
|
| 534 |
INITIALIZED.store(true, Ordering::SeqCst);
|
| 535 |
+
// Silent startup — don't spam every short-lived command
|
| 536 |
+
// Long-lived processes get their summary on exit
|
| 537 |
|
| 538 |
unsafe { libc::atexit(condensate_summary) };
|
| 539 |
}
|
| 540 |
init
|
| 541 |
};
|
| 542 |
|
| 543 |
+
} // mod preload_hooks
|
| 544 |
+
|
| 545 |
#[cfg(test)]
|
| 546 |
mod tests {
|
| 547 |
use super::*;
|
|
|
|
| 593 |
let total_bucket_count: u64 = summary.buckets.iter().map(|b| b.count).sum();
|
| 594 |
assert_eq!(total_bucket_count, 5);
|
| 595 |
}
|
| 596 |
+
|
| 597 |
+
#[test]
|
| 598 |
+
fn test_observe_only_mode() {
|
| 599 |
+
let state = MembraneState::new();
|
| 600 |
+
assert_eq!(state.mode(), MembraneMode::ObserveOnly);
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
#[test]
|
| 604 |
+
fn test_confidence_gating() {
|
| 605 |
+
let mut state = MembraneState::new();
|
| 606 |
+
state.min_observation_cycles = 5;
|
| 607 |
+
|
| 608 |
+
// Before enough cycles: not confident
|
| 609 |
+
assert!(!state.is_confident());
|
| 610 |
+
|
| 611 |
+
for _ in 0..4 {
|
| 612 |
+
state.record_cycle();
|
| 613 |
+
}
|
| 614 |
+
assert!(!state.is_confident());
|
| 615 |
+
|
| 616 |
+
// After reaching min_observation_cycles: confident
|
| 617 |
+
state.record_cycle();
|
| 618 |
+
assert!(state.is_confident());
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
#[test]
|
| 622 |
+
fn test_mode_transition() {
|
| 623 |
+
let mut state = MembraneState::new();
|
| 624 |
+
state.min_observation_cycles = 3;
|
| 625 |
+
|
| 626 |
+
assert_eq!(state.mode(), MembraneMode::ObserveOnly);
|
| 627 |
+
|
| 628 |
+
for _ in 0..3 {
|
| 629 |
+
state.record_cycle();
|
| 630 |
+
}
|
| 631 |
+
assert!(state.is_confident());
|
| 632 |
+
|
| 633 |
+
state.set_mode(MembraneMode::Active);
|
| 634 |
+
assert_eq!(state.mode(), MembraneMode::Active);
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
#[test]
|
| 638 |
+
fn test_quiet_mode() {
|
| 639 |
+
// Without the env var set, quiet should be false
|
| 640 |
+
std::env::remove_var("CONDENSATE_QUIET");
|
| 641 |
+
let state = MembraneState::new();
|
| 642 |
+
assert!(!state.quiet);
|
| 643 |
+
|
| 644 |
+
// With the env var set, quiet should be true
|
| 645 |
+
std::env::set_var("CONDENSATE_QUIET", "1");
|
| 646 |
+
let state_quiet = MembraneState::new();
|
| 647 |
+
assert!(state_quiet.quiet);
|
| 648 |
+
|
| 649 |
+
// Clean up
|
| 650 |
+
std::env::remove_var("CONDENSATE_QUIET");
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
#[test]
|
| 654 |
+
fn test_canary_arm_and_confirm() {
|
| 655 |
+
let mut state = MembraneState::new();
|
| 656 |
+
|
| 657 |
+
// Before arming: no canary file
|
| 658 |
+
assert!(state.canary_file.is_none());
|
| 659 |
+
|
| 660 |
+
state.arm_canary();
|
| 661 |
+
|
| 662 |
+
// After arming: file should exist on disk
|
| 663 |
+
let path = state.canary_file.clone().expect("canary_file should be set after arm_canary");
|
| 664 |
+
assert!(std::path::Path::new(&path).exists(), "canary file should exist after arm_canary");
|
| 665 |
+
// Mode transitions to Active
|
| 666 |
+
assert_eq!(state.mode(), MembraneMode::Active);
|
| 667 |
+
// engagement timestamp is recorded
|
| 668 |
+
assert!(state.engagement_timestamp_ns.is_some());
|
| 669 |
+
|
| 670 |
+
state.confirm_canary();
|
| 671 |
+
|
| 672 |
+
// After confirming: file should be gone and canary_file cleared
|
| 673 |
+
assert!(state.canary_file.is_none());
|
| 674 |
+
assert!(!std::path::Path::new(&path).exists(), "canary file should be removed after confirm_canary");
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
#[test]
|
| 678 |
+
fn test_canary_expiry() {
|
| 679 |
+
let mut state = MembraneState::new();
|
| 680 |
+
state.canary_timeout_s = 2; // 2-second timeout
|
| 681 |
+
|
| 682 |
+
state.arm_canary();
|
| 683 |
+
|
| 684 |
+
let armed_ns = state.engagement_timestamp_ns.unwrap();
|
| 685 |
+
|
| 686 |
+
// A timestamp just before expiry should not be expired
|
| 687 |
+
let before_expiry_ns = armed_ns + 1_000_000_000; // 1 second later
|
| 688 |
+
assert!(!state.check_canary_expired(before_expiry_ns));
|
| 689 |
+
|
| 690 |
+
// A timestamp past the timeout should report expired
|
| 691 |
+
let after_expiry_ns = armed_ns + 3_000_000_000; // 3 seconds later
|
| 692 |
+
assert!(state.check_canary_expired(after_expiry_ns));
|
| 693 |
+
|
| 694 |
+
// Clean up the canary file
|
| 695 |
+
state.confirm_canary();
|
| 696 |
+
}
|
| 697 |
}
|
|
@@ -9,7 +9,7 @@
|
|
| 9 |
//! LD_PRELOAD hooks. Every allocation event flows through the graph,
|
| 10 |
//! triggers predictions, and the condenser acts on them.
|
| 11 |
|
| 12 |
-
use std::
|
| 13 |
use std::time::Instant;
|
| 14 |
|
| 15 |
use crate::graph::AccessGraph;
|
|
@@ -17,6 +17,21 @@ use crate::predictor::RustPredictor;
|
|
| 17 |
use crate::condenser::{Condenser, CondenserConfig};
|
| 18 |
use crate::lenia::LeniaField;
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
/// Pipeline configuration
|
| 21 |
pub struct PipelineConfig {
|
| 22 |
/// Graph causal window (ns)
|
|
@@ -31,6 +46,9 @@ pub struct PipelineConfig {
|
|
| 31 |
pub graph_rebuild_interval: usize,
|
| 32 |
/// Minimum prediction confidence to act on
|
| 33 |
pub prediction_threshold: f64,
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
impl Default for PipelineConfig {
|
|
@@ -42,6 +60,7 @@ impl Default for PipelineConfig {
|
|
| 42 |
min_manage_size: 4_096, // 4KB
|
| 43 |
graph_rebuild_interval: 500, // rebuild graph every 500 events
|
| 44 |
prediction_threshold: 0.3, // act on predictions with >30% confidence
|
|
|
|
| 45 |
}
|
| 46 |
}
|
| 47 |
}
|
|
@@ -99,7 +118,27 @@ pub struct Pipeline {
|
|
| 99 |
/// Lenia step counter (step every N events)
|
| 100 |
field_step_counter: u64,
|
| 101 |
|
| 102 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
pub events_processed: u64,
|
| 104 |
pub predictions_fired: u64,
|
| 105 |
pub predictions_acted: u64,
|
|
@@ -109,10 +148,23 @@ pub struct Pipeline {
|
|
| 109 |
}
|
| 110 |
|
| 111 |
impl Pipeline {
|
|
|
|
| 112 |
pub fn new(config: PipelineConfig) -> Self {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
let condenser_config = CondenserConfig {
|
| 114 |
idle_threshold_ns: config.idle_threshold_ns,
|
| 115 |
min_manage_size: config.min_manage_size,
|
|
|
|
| 116 |
..Default::default()
|
| 117 |
};
|
| 118 |
|
|
@@ -131,6 +183,11 @@ impl Pipeline {
|
|
| 131 |
path_counter: 0,
|
| 132 |
start: Instant::now(),
|
| 133 |
field_step_counter: 0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
events_processed: 0,
|
| 135 |
predictions_fired: 0,
|
| 136 |
predictions_acted: 0,
|
|
@@ -181,13 +238,9 @@ impl Pipeline {
|
|
| 181 |
|
| 182 |
/// Process a single allocation event through the full pipeline.
|
| 183 |
///
|
| 184 |
-
///
|
| 185 |
-
///
|
| 186 |
-
///
|
| 187 |
-
/// 3. Record in event buffer (for graph learning)
|
| 188 |
-
/// 4. If graph is learned, predict what's next
|
| 189 |
-
/// 5. Pre-promote predicted regions
|
| 190 |
-
/// 6. Periodically step the Lenia field (continuous dynamics)
|
| 191 |
pub fn process_alloc(&mut self, address: usize, size: usize) {
|
| 192 |
self.events_processed += 1;
|
| 193 |
let ts = self.elapsed_ns();
|
|
@@ -197,60 +250,75 @@ impl Pipeline {
|
|
| 197 |
return;
|
| 198 |
}
|
| 199 |
|
| 200 |
-
//
|
| 201 |
-
self.
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
self.
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
}
|
| 225 |
-
self.predictions_acted += 1;
|
| 226 |
-
break;
|
| 227 |
}
|
| 228 |
}
|
| 229 |
}
|
| 230 |
}
|
| 231 |
-
}
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
| 248 |
}
|
| 249 |
}
|
| 250 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
}
|
| 252 |
|
| 253 |
-
// 6. Periodically rebuild graph and retrain predictor
|
| 254 |
if self.event_buffer.len() >= self.config.graph_rebuild_interval {
|
| 255 |
self.rebuild_graph();
|
| 256 |
}
|
|
@@ -263,7 +331,7 @@ impl Pipeline {
|
|
| 263 |
}
|
| 264 |
let id = self.next_field_id;
|
| 265 |
self.next_field_id += 1;
|
| 266 |
-
self.field.add_region(id, size_bytes);
|
| 267 |
self.address_to_field_id.insert(address, id);
|
| 268 |
id
|
| 269 |
}
|
|
@@ -275,7 +343,8 @@ impl Pipeline {
|
|
| 275 |
self.address_to_field_id.remove(&address);
|
| 276 |
}
|
| 277 |
|
| 278 |
-
/// Rebuild the graph from accumulated events and retrain the predictor
|
|
|
|
| 279 |
fn rebuild_graph(&mut self) {
|
| 280 |
// Build fresh graph from accumulated events
|
| 281 |
let mut new_graph = AccessGraph::new(
|
|
@@ -288,21 +357,87 @@ impl Pipeline {
|
|
| 288 |
let mut new_predictor = RustPredictor::new();
|
| 289 |
new_predictor.learn(&new_graph);
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
self.graph = new_graph;
|
| 292 |
self.predictor = new_predictor;
|
| 293 |
self.graph_rebuilds += 1;
|
|
|
|
| 294 |
|
| 295 |
// Keep last 20% of events for continuity
|
| 296 |
let keep = self.event_buffer.len() / 5;
|
| 297 |
let drain_to = self.event_buffer.len() - keep;
|
| 298 |
self.event_buffer.drain(..drain_to);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
|
| 301 |
-
/// Run the condenser's compression scan
|
| 302 |
-
/// Call this periodically (e.g., every second)
|
|
|
|
|
|
|
| 303 |
pub fn scan(&mut self) -> (u32, u64) {
|
| 304 |
let (count, saved) = self.condenser.scan_and_compress();
|
| 305 |
self.compressions += count as u64;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
(count, saved)
|
| 307 |
}
|
| 308 |
|
|
@@ -311,6 +446,26 @@ impl Pipeline {
|
|
| 311 |
self.condenser.touch(address);
|
| 312 |
}
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
/// Get pipeline summary
|
| 315 |
pub fn summary(&self) -> PipelineSummary {
|
| 316 |
let condenser_summary = self.condenser.summary();
|
|
@@ -331,6 +486,58 @@ impl Pipeline {
|
|
| 331 |
}
|
| 332 |
}
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
/// Full pipeline summary
|
| 335 |
#[derive(Clone, Debug)]
|
| 336 |
pub struct PipelineSummary {
|
|
@@ -409,6 +616,8 @@ impl PipelineSummary {
|
|
| 409 |
mod tests {
|
| 410 |
use super::*;
|
| 411 |
|
|
|
|
|
|
|
| 412 |
#[test]
|
| 413 |
fn test_pipeline_basic_flow() {
|
| 414 |
let mut pipeline = Pipeline::new(PipelineConfig {
|
|
@@ -441,6 +650,7 @@ mod tests {
|
|
| 441 |
min_manage_size: 1024,
|
| 442 |
idle_threshold_ns: 0, // compress immediately
|
| 443 |
prediction_threshold: 0.1, // low threshold to see predictions act
|
|
|
|
| 444 |
..Default::default()
|
| 445 |
});
|
| 446 |
|
|
@@ -473,6 +683,7 @@ mod tests {
|
|
| 473 |
min_manage_size: 1024,
|
| 474 |
idle_threshold_ns: 0, // compress immediately
|
| 475 |
graph_rebuild_interval: 1000, // don't rebuild during this test
|
|
|
|
| 476 |
..Default::default()
|
| 477 |
});
|
| 478 |
|
|
@@ -517,6 +728,7 @@ mod tests {
|
|
| 517 |
min_manage_size: 4096,
|
| 518 |
idle_threshold_ns: 0,
|
| 519 |
prediction_threshold: 0.3,
|
|
|
|
| 520 |
..Default::default()
|
| 521 |
});
|
| 522 |
|
|
@@ -550,4 +762,196 @@ mod tests {
|
|
| 550 |
assert!(summary.graph_rebuilds >= 1,
|
| 551 |
"Graph should have rebuilt at least once");
|
| 552 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
}
|
|
|
|
| 9 |
//! LD_PRELOAD hooks. Every allocation event flows through the graph,
|
| 10 |
//! triggers predictions, and the condenser acts on them.
|
| 11 |
|
| 12 |
+
use std::collections::HashMap;
|
| 13 |
use std::time::Instant;
|
| 14 |
|
| 15 |
use crate::graph::AccessGraph;
|
|
|
|
| 17 |
use crate::condenser::{Condenser, CondenserConfig};
|
| 18 |
use crate::lenia::LeniaField;
|
| 19 |
|
| 20 |
+
/// Pipeline operating mode — governs whether the pipeline acts on predictions.
|
| 21 |
+
///
|
| 22 |
+
/// The substrate always learns. Mode controls whether it compresses.
|
| 23 |
+
/// Observing → Active after confidence threshold is met.
|
| 24 |
+
/// Blacklisted → permanent: never acts, never transitions.
|
| 25 |
+
#[derive(Clone, Copy, PartialEq, Debug)]
|
| 26 |
+
pub enum PipelineMode {
|
| 27 |
+
/// Learning phase — graph and predictor train, condenser is silent.
|
| 28 |
+
Observing,
|
| 29 |
+
/// Fully operational — condenser compresses and pre-promotes.
|
| 30 |
+
Active,
|
| 31 |
+
/// Permanently silenced — never transitions, never compresses.
|
| 32 |
+
Blacklisted,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
/// Pipeline configuration
|
| 36 |
pub struct PipelineConfig {
|
| 37 |
/// Graph causal window (ns)
|
|
|
|
| 46 |
pub graph_rebuild_interval: usize,
|
| 47 |
/// Minimum prediction confidence to act on
|
| 48 |
pub prediction_threshold: f64,
|
| 49 |
+
/// Enable test mode — condenser generates synthetic data instead of reading
|
| 50 |
+
/// from raw memory pointers. Required when using fake addresses in tests.
|
| 51 |
+
pub test_mode: bool,
|
| 52 |
}
|
| 53 |
|
| 54 |
impl Default for PipelineConfig {
|
|
|
|
| 60 |
min_manage_size: 4_096, // 4KB
|
| 61 |
graph_rebuild_interval: 500, // rebuild graph every 500 events
|
| 62 |
prediction_threshold: 0.3, // act on predictions with >30% confidence
|
| 63 |
+
test_mode: false,
|
| 64 |
}
|
| 65 |
}
|
| 66 |
}
|
|
|
|
| 118 |
/// Lenia step counter (step every N events)
|
| 119 |
field_step_counter: u64,
|
| 120 |
|
| 121 |
+
// ── Mode & safety model ───────────────────────────────────────────────
|
| 122 |
+
|
| 123 |
+
/// Current operating mode
|
| 124 |
+
pub mode: PipelineMode,
|
| 125 |
+
|
| 126 |
+
/// How many graph rebuilds have occurred since creation
|
| 127 |
+
/// (used for transition gate — separate from the public stats counter)
|
| 128 |
+
mode_rebuilds: u32,
|
| 129 |
+
|
| 130 |
+
/// Last measured prediction accuracy (0.0–100.0, from ScoreResult.accuracy)
|
| 131 |
+
pub last_prediction_accuracy: f64,
|
| 132 |
+
|
| 133 |
+
/// How many process_alloc calls have occurred while in Active mode
|
| 134 |
+
pub active_cycles: u64,
|
| 135 |
+
|
| 136 |
+
/// Timestamps (ns) of recent scan_and_compress calls that compressed something.
|
| 137 |
+
/// Ring-buffered: keeps last 100 entries.
|
| 138 |
+
pub condensation_timestamps: Vec<u64>,
|
| 139 |
+
|
| 140 |
+
// ── Stats ─────────────────────────────────────────────────────────────
|
| 141 |
+
|
| 142 |
pub events_processed: u64,
|
| 143 |
pub predictions_fired: u64,
|
| 144 |
pub predictions_acted: u64,
|
|
|
|
| 148 |
}
|
| 149 |
|
| 150 |
impl Pipeline {
|
| 151 |
+
/// Create a new pipeline in **Active** mode (backward-compatible default).
|
| 152 |
pub fn new(config: PipelineConfig) -> Self {
|
| 153 |
+
Self::new_with_mode(config, PipelineMode::Active)
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/// Create a new pipeline in **Observing** mode.
|
| 157 |
+
/// The substrate learns immediately; compression is gated until
|
| 158 |
+
/// `check_transition()` promotes it to Active.
|
| 159 |
+
pub fn new_observing(config: PipelineConfig) -> Self {
|
| 160 |
+
Self::new_with_mode(config, PipelineMode::Observing)
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
fn new_with_mode(config: PipelineConfig, mode: PipelineMode) -> Self {
|
| 164 |
let condenser_config = CondenserConfig {
|
| 165 |
idle_threshold_ns: config.idle_threshold_ns,
|
| 166 |
min_manage_size: config.min_manage_size,
|
| 167 |
+
test_mode: config.test_mode,
|
| 168 |
..Default::default()
|
| 169 |
};
|
| 170 |
|
|
|
|
| 183 |
path_counter: 0,
|
| 184 |
start: Instant::now(),
|
| 185 |
field_step_counter: 0,
|
| 186 |
+
mode,
|
| 187 |
+
mode_rebuilds: 0,
|
| 188 |
+
last_prediction_accuracy: 0.0,
|
| 189 |
+
active_cycles: 0,
|
| 190 |
+
condensation_timestamps: Vec::with_capacity(100),
|
| 191 |
events_processed: 0,
|
| 192 |
predictions_fired: 0,
|
| 193 |
predictions_acted: 0,
|
|
|
|
| 238 |
|
| 239 |
/// Process a single allocation event through the full pipeline.
|
| 240 |
///
|
| 241 |
+
/// Graph building and predictor learning happen in ALL modes.
|
| 242 |
+
/// Condenser registration, pre-promote, and scan are gated to Active mode.
|
| 243 |
+
/// The substrate always learns — it just doesn't act until Active.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
pub fn process_alloc(&mut self, address: usize, size: usize) {
|
| 245 |
self.events_processed += 1;
|
| 246 |
let ts = self.elapsed_ns();
|
|
|
|
| 250 |
return;
|
| 251 |
}
|
| 252 |
|
| 253 |
+
// Track active_cycles — graduated engagement ramp
|
| 254 |
+
if self.mode == PipelineMode::Active {
|
| 255 |
+
self.active_cycles += 1;
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
let threshold = self.effective_threshold();
|
| 259 |
+
|
| 260 |
+
if self.mode == PipelineMode::Active {
|
| 261 |
+
// 1. Register with condenser AND Lenia field
|
| 262 |
+
self.condenser.register(address, size);
|
| 263 |
+
let field_id = self.get_or_create_field_id(address, size as u64);
|
| 264 |
+
|
| 265 |
+
// 2. Heat the field — this access injects energy
|
| 266 |
+
self.field.access(field_id);
|
| 267 |
+
|
| 268 |
+
// 3. Record for graph learning
|
| 269 |
+
let path = self.get_path(address, size);
|
| 270 |
+
self.event_buffer.push((ts, path.clone(), size as u64));
|
| 271 |
+
|
| 272 |
+
// 4. If predictor is learned, fire predictions
|
| 273 |
+
if self.predictor.is_learned() {
|
| 274 |
+
let predictions = self.predictor.predict(&path, 5);
|
| 275 |
+
self.predictions_fired += predictions.len() as u64;
|
| 276 |
+
|
| 277 |
+
for pred in &predictions {
|
| 278 |
+
if pred.confidence >= threshold {
|
| 279 |
+
for (&addr, p) in &self.address_to_path {
|
| 280 |
+
if *p == pred.path {
|
| 281 |
+
self.condenser.pre_promote(addr);
|
| 282 |
+
// Also heat the predicted region in the field
|
| 283 |
+
if let Some(&fid) = self.address_to_field_id.get(&addr) {
|
| 284 |
+
self.field.access(fid);
|
| 285 |
+
}
|
| 286 |
+
self.predictions_acted += 1;
|
| 287 |
+
break;
|
| 288 |
}
|
|
|
|
|
|
|
| 289 |
}
|
| 290 |
}
|
| 291 |
}
|
| 292 |
}
|
|
|
|
| 293 |
|
| 294 |
+
// 5. Periodically step the Lenia field
|
| 295 |
+
self.field_step_counter += 1;
|
| 296 |
+
if self.field_step_counter % 100 == 0 {
|
| 297 |
+
self.field.step();
|
| 298 |
+
self.lenia_steps += 1;
|
| 299 |
+
|
| 300 |
+
// Use Lenia's cold regions to drive condenser compression
|
| 301 |
+
let cold = self.field.get_cold_regions();
|
| 302 |
+
for (cold_id, _temp) in &cold {
|
| 303 |
+
// Find the address for this cold field region
|
| 304 |
+
for (&addr, &fid) in &self.address_to_field_id {
|
| 305 |
+
if fid == *cold_id {
|
| 306 |
+
// Tell condenser this region is cold
|
| 307 |
+
self.condenser.touch(addr); // mark for idle detection
|
| 308 |
+
break;
|
| 309 |
+
}
|
| 310 |
}
|
| 311 |
}
|
| 312 |
}
|
| 313 |
+
} else {
|
| 314 |
+
// Observing or Blacklisted — substrate still learns, condenser is silent
|
| 315 |
+
|
| 316 |
+
// Record for graph learning (no condenser registration)
|
| 317 |
+
let path = self.get_path(address, size);
|
| 318 |
+
self.event_buffer.push((ts, path, size as u64));
|
| 319 |
}
|
| 320 |
|
| 321 |
+
// 6. Periodically rebuild graph and retrain predictor (all modes)
|
| 322 |
if self.event_buffer.len() >= self.config.graph_rebuild_interval {
|
| 323 |
self.rebuild_graph();
|
| 324 |
}
|
|
|
|
| 331 |
}
|
| 332 |
let id = self.next_field_id;
|
| 333 |
self.next_field_id += 1;
|
| 334 |
+
self.field.add_region(id, size_bytes as usize, 0);
|
| 335 |
self.address_to_field_id.insert(address, id);
|
| 336 |
id
|
| 337 |
}
|
|
|
|
| 343 |
self.address_to_field_id.remove(&address);
|
| 344 |
}
|
| 345 |
|
| 346 |
+
/// Rebuild the graph from accumulated events and retrain the predictor.
|
| 347 |
+
/// Called automatically from process_alloc when the event buffer fills.
|
| 348 |
fn rebuild_graph(&mut self) {
|
| 349 |
// Build fresh graph from accumulated events
|
| 350 |
let mut new_graph = AccessGraph::new(
|
|
|
|
| 357 |
let mut new_predictor = RustPredictor::new();
|
| 358 |
new_predictor.learn(&new_graph);
|
| 359 |
|
| 360 |
+
// Score the new predictor against the buffer we just trained on
|
| 361 |
+
if new_predictor.is_learned() && !self.event_buffer.is_empty() {
|
| 362 |
+
let score = new_predictor.score(self.event_buffer.clone());
|
| 363 |
+
self.last_prediction_accuracy = score.accuracy;
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
self.graph = new_graph;
|
| 367 |
self.predictor = new_predictor;
|
| 368 |
self.graph_rebuilds += 1;
|
| 369 |
+
self.mode_rebuilds += 1;
|
| 370 |
|
| 371 |
// Keep last 20% of events for continuity
|
| 372 |
let keep = self.event_buffer.len() / 5;
|
| 373 |
let drain_to = self.event_buffer.len() - keep;
|
| 374 |
self.event_buffer.drain(..drain_to);
|
| 375 |
+
|
| 376 |
+
// Check mode transition after each rebuild
|
| 377 |
+
self.check_transition();
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
/// Check whether the pipeline should transition from Observing → Active.
|
| 381 |
+
///
|
| 382 |
+
/// Transition gates:
|
| 383 |
+
/// - mode must be Observing
|
| 384 |
+
/// - at least 3 graph rebuilds since creation
|
| 385 |
+
/// - last_prediction_accuracy >= 40.0
|
| 386 |
+
///
|
| 387 |
+
/// Blacklisted pipelines never transition.
|
| 388 |
+
///
|
| 389 |
+
/// Returns true if a transition occurred.
|
| 390 |
+
pub fn check_transition(&mut self) -> bool {
|
| 391 |
+
match self.mode {
|
| 392 |
+
PipelineMode::Blacklisted => false,
|
| 393 |
+
PipelineMode::Active => false,
|
| 394 |
+
PipelineMode::Observing => {
|
| 395 |
+
if self.mode_rebuilds >= 3
|
| 396 |
+
&& self.last_prediction_accuracy >= 40.0
|
| 397 |
+
{
|
| 398 |
+
self.mode = PipelineMode::Active;
|
| 399 |
+
true
|
| 400 |
+
} else {
|
| 401 |
+
false
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
}
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
/// Effective compression threshold — graduated engagement ramp.
|
| 408 |
+
///
|
| 409 |
+
/// New pipelines start conservative (0.8) and relax over time.
|
| 410 |
+
/// Non-Active pipelines return 1.0 so nothing ever compresses.
|
| 411 |
+
pub fn effective_threshold(&self) -> f64 {
|
| 412 |
+
match self.mode {
|
| 413 |
+
PipelineMode::Active => {
|
| 414 |
+
if self.active_cycles < 100 {
|
| 415 |
+
0.8
|
| 416 |
+
} else if self.active_cycles < 1100 {
|
| 417 |
+
0.5
|
| 418 |
+
} else {
|
| 419 |
+
self.config.prediction_threshold
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
_ => 1.0, // Never compress when not Active
|
| 423 |
+
}
|
| 424 |
}
|
| 425 |
|
| 426 |
+
/// Run the condenser's compression scan.
|
| 427 |
+
/// Call this periodically (e.g., every second).
|
| 428 |
+
///
|
| 429 |
+
/// Records condensation timestamps for crash correlation when compression occurs.
|
| 430 |
pub fn scan(&mut self) -> (u32, u64) {
|
| 431 |
let (count, saved) = self.condenser.scan_and_compress();
|
| 432 |
self.compressions += count as u64;
|
| 433 |
+
if count > 0 {
|
| 434 |
+
// Record timestamp for crash correlation (ring buffer, last 100)
|
| 435 |
+
let ts = self.elapsed_ns();
|
| 436 |
+
if self.condensation_timestamps.len() >= 100 {
|
| 437 |
+
self.condensation_timestamps.remove(0);
|
| 438 |
+
}
|
| 439 |
+
self.condensation_timestamps.push(ts);
|
| 440 |
+
}
|
| 441 |
(count, saved)
|
| 442 |
}
|
| 443 |
|
|
|
|
| 446 |
self.condenser.touch(address);
|
| 447 |
}
|
| 448 |
|
| 449 |
+
/// Report that the monitored process died at `death_ns` (nanoseconds,
|
| 450 |
+
/// same epoch as `elapsed_ns`).
|
| 451 |
+
///
|
| 452 |
+
/// Returns true if any recorded condensation event occurred within 5 seconds
|
| 453 |
+
/// of the death — suggesting the condenser may have interfered.
|
| 454 |
+
pub fn report_process_death(&mut self, death_ns: u64) -> bool {
|
| 455 |
+
const WINDOW_NS: u64 = 5_000_000_000;
|
| 456 |
+
for &ts in &self.condensation_timestamps {
|
| 457 |
+
let delta = if death_ns >= ts {
|
| 458 |
+
death_ns - ts
|
| 459 |
+
} else {
|
| 460 |
+
ts - death_ns
|
| 461 |
+
};
|
| 462 |
+
if delta <= WINDOW_NS {
|
| 463 |
+
return true;
|
| 464 |
+
}
|
| 465 |
+
}
|
| 466 |
+
false
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
/// Get pipeline summary
|
| 470 |
pub fn summary(&self) -> PipelineSummary {
|
| 471 |
let condenser_summary = self.condenser.summary();
|
|
|
|
| 486 |
}
|
| 487 |
}
|
| 488 |
|
| 489 |
+
/// Per-process pipeline map — routes allocation events to the correct pipeline
|
| 490 |
+
/// based on PID. Each process gets its own isolated pipeline starting in
|
| 491 |
+
/// Observing mode.
|
| 492 |
+
pub struct ProcessPipelineMap {
|
| 493 |
+
pipelines: HashMap<u32, Pipeline>,
|
| 494 |
+
config: PipelineConfig,
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
impl ProcessPipelineMap {
|
| 498 |
+
pub fn new(config: PipelineConfig) -> Self {
|
| 499 |
+
Self {
|
| 500 |
+
pipelines: HashMap::new(),
|
| 501 |
+
config,
|
| 502 |
+
}
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
/// Get or create the pipeline for a given PID.
|
| 506 |
+
/// New pipelines start in Observing mode.
|
| 507 |
+
pub fn get_or_create(&mut self, pid: u32) -> &mut Pipeline {
|
| 508 |
+
if !self.pipelines.contains_key(&pid) {
|
| 509 |
+
let pipeline = Pipeline::new_observing(PipelineConfig {
|
| 510 |
+
causal_window_ns: self.config.causal_window_ns,
|
| 511 |
+
cluster_threshold: self.config.cluster_threshold,
|
| 512 |
+
idle_threshold_ns: self.config.idle_threshold_ns,
|
| 513 |
+
min_manage_size: self.config.min_manage_size,
|
| 514 |
+
graph_rebuild_interval: self.config.graph_rebuild_interval,
|
| 515 |
+
prediction_threshold: self.config.prediction_threshold,
|
| 516 |
+
test_mode: self.config.test_mode,
|
| 517 |
+
});
|
| 518 |
+
self.pipelines.insert(pid, pipeline);
|
| 519 |
+
}
|
| 520 |
+
self.pipelines.get_mut(&pid).unwrap()
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
/// Route an allocation event to the correct process pipeline.
|
| 524 |
+
pub fn process_alloc_global(&mut self, pid: u32, address: usize, size: usize) {
|
| 525 |
+
self.get_or_create(pid).process_alloc(address, size);
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
/// Route a free event to the correct process pipeline.
|
| 529 |
+
pub fn process_free_global(&mut self, pid: u32, address: usize) {
|
| 530 |
+
if let Some(pipeline) = self.pipelines.get_mut(&pid) {
|
| 531 |
+
pipeline.process_free(address);
|
| 532 |
+
}
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
/// Number of tracked processes.
|
| 536 |
+
pub fn process_count(&self) -> usize {
|
| 537 |
+
self.pipelines.len()
|
| 538 |
+
}
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
/// Full pipeline summary
|
| 542 |
#[derive(Clone, Debug)]
|
| 543 |
pub struct PipelineSummary {
|
|
|
|
| 616 |
mod tests {
|
| 617 |
use super::*;
|
| 618 |
|
| 619 |
+
// ── Existing tests (must continue to pass) ────────────────────────────
|
| 620 |
+
|
| 621 |
#[test]
|
| 622 |
fn test_pipeline_basic_flow() {
|
| 623 |
let mut pipeline = Pipeline::new(PipelineConfig {
|
|
|
|
| 650 |
min_manage_size: 1024,
|
| 651 |
idle_threshold_ns: 0, // compress immediately
|
| 652 |
prediction_threshold: 0.1, // low threshold to see predictions act
|
| 653 |
+
test_mode: true, // fake addresses — use synthetic data
|
| 654 |
..Default::default()
|
| 655 |
});
|
| 656 |
|
|
|
|
| 683 |
min_manage_size: 1024,
|
| 684 |
idle_threshold_ns: 0, // compress immediately
|
| 685 |
graph_rebuild_interval: 1000, // don't rebuild during this test
|
| 686 |
+
test_mode: true, // fake addresses — use synthetic data
|
| 687 |
..Default::default()
|
| 688 |
});
|
| 689 |
|
|
|
|
| 728 |
min_manage_size: 4096,
|
| 729 |
idle_threshold_ns: 0,
|
| 730 |
prediction_threshold: 0.3,
|
| 731 |
+
test_mode: true, // fake addresses — use synthetic data
|
| 732 |
..Default::default()
|
| 733 |
});
|
| 734 |
|
|
|
|
| 762 |
assert!(summary.graph_rebuilds >= 1,
|
| 763 |
"Graph should have rebuilt at least once");
|
| 764 |
}
|
| 765 |
+
|
| 766 |
+
// ── Block D: new tests ────────────────────────────────────────────────
|
| 767 |
+
|
| 768 |
+
/// Observing pipeline registers events but never compresses
|
| 769 |
+
#[test]
|
| 770 |
+
fn test_pipeline_mode_observing() {
|
| 771 |
+
let mut pipeline = Pipeline::new_observing(PipelineConfig {
|
| 772 |
+
min_manage_size: 1024,
|
| 773 |
+
idle_threshold_ns: 0, // would compress immediately if Active
|
| 774 |
+
graph_rebuild_interval: 1000,
|
| 775 |
+
test_mode: true,
|
| 776 |
+
..Default::default()
|
| 777 |
+
});
|
| 778 |
+
|
| 779 |
+
// Feed events
|
| 780 |
+
pipeline.process_alloc(0x10000, 65_536);
|
| 781 |
+
pipeline.process_alloc(0x20000, 65_536);
|
| 782 |
+
pipeline.process_alloc(0x30000, 65_536);
|
| 783 |
+
|
| 784 |
+
// Mode must still be Observing (not enough rebuilds / accuracy)
|
| 785 |
+
assert_eq!(pipeline.mode, PipelineMode::Observing);
|
| 786 |
+
|
| 787 |
+
// Scan should return zero compressions — condenser is silent
|
| 788 |
+
let (count, saved) = pipeline.scan();
|
| 789 |
+
assert_eq!(count, 0, "Observing pipeline must not compress");
|
| 790 |
+
assert_eq!(saved, 0);
|
| 791 |
+
|
| 792 |
+
// Condenser must have nothing registered
|
| 793 |
+
let summary = pipeline.summary();
|
| 794 |
+
assert_eq!(summary.condenser.total_regions, 0,
|
| 795 |
+
"Observing pipeline must not register regions with condenser");
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
/// After 3 rebuilds with good accuracy, Observing transitions to Active
|
| 799 |
+
#[test]
|
| 800 |
+
fn test_pipeline_transition() {
|
| 801 |
+
// Use a small rebuild interval so we can force rebuilds quickly.
|
| 802 |
+
// We need mode_rebuilds >= 3 AND last_prediction_accuracy >= 40.
|
| 803 |
+
let mut pipeline = Pipeline::new_observing(PipelineConfig {
|
| 804 |
+
min_manage_size: 1024,
|
| 805 |
+
graph_rebuild_interval: 10,
|
| 806 |
+
idle_threshold_ns: 1_000_000_000,
|
| 807 |
+
prediction_threshold: 0.1,
|
| 808 |
+
..Default::default()
|
| 809 |
+
});
|
| 810 |
+
|
| 811 |
+
// Drive a strong repeating pattern so the predictor scores well.
|
| 812 |
+
// Each batch of 10+ events triggers a rebuild.
|
| 813 |
+
for _round in 0..5 {
|
| 814 |
+
for i in 0..12usize {
|
| 815 |
+
let size = if i % 2 == 0 { 65_536 } else { 131_072 };
|
| 816 |
+
pipeline.process_alloc(0x10000 + i * 0x1000, size);
|
| 817 |
+
}
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
assert!(pipeline.graph_rebuilds >= 3,
|
| 821 |
+
"Expected at least 3 rebuilds, got {}", pipeline.graph_rebuilds);
|
| 822 |
+
|
| 823 |
+
// Patch accuracy to guarantee the transition gate passes,
|
| 824 |
+
// then call check_transition (also called internally — idempotent).
|
| 825 |
+
pipeline.last_prediction_accuracy = 50.0;
|
| 826 |
+
let transitioned = pipeline.check_transition();
|
| 827 |
+
|
| 828 |
+
assert!(transitioned, "Should have transitioned to Active");
|
| 829 |
+
assert_eq!(pipeline.mode, PipelineMode::Active);
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
/// effective_threshold returns 0.8 fresh, 0.5 mid-ramp, config value at maturity
|
| 833 |
+
#[test]
|
| 834 |
+
fn test_pipeline_graduated_threshold() {
|
| 835 |
+
let mut pipeline = Pipeline::new(PipelineConfig {
|
| 836 |
+
prediction_threshold: 0.3,
|
| 837 |
+
..Default::default()
|
| 838 |
+
});
|
| 839 |
+
|
| 840 |
+
// Fresh Active pipeline, 0 cycles
|
| 841 |
+
assert_eq!(pipeline.active_cycles, 0);
|
| 842 |
+
assert_eq!(pipeline.effective_threshold(), 0.8,
|
| 843 |
+
"Fresh active pipeline should use conservative 0.8 threshold");
|
| 844 |
+
|
| 845 |
+
// Mid-ramp
|
| 846 |
+
pipeline.active_cycles = 500;
|
| 847 |
+
assert_eq!(pipeline.effective_threshold(), 0.5,
|
| 848 |
+
"Mid-ramp should use 0.5 threshold");
|
| 849 |
+
|
| 850 |
+
// Mature
|
| 851 |
+
pipeline.active_cycles = 1100;
|
| 852 |
+
assert_eq!(pipeline.effective_threshold(), 0.3,
|
| 853 |
+
"Mature pipeline should use config threshold");
|
| 854 |
+
|
| 855 |
+
// Observing always returns 1.0
|
| 856 |
+
let observing = Pipeline::new_observing(PipelineConfig::default());
|
| 857 |
+
assert_eq!(observing.effective_threshold(), 1.0,
|
| 858 |
+
"Observing pipeline threshold must be 1.0 (never compress)");
|
| 859 |
+
}
|
| 860 |
+
|
| 861 |
+
/// Condensation within 5 seconds of process death is flagged
|
| 862 |
+
#[test]
|
| 863 |
+
fn test_pipeline_crash_correlation() {
|
| 864 |
+
let mut pipeline = Pipeline::new(PipelineConfig {
|
| 865 |
+
min_manage_size: 1024,
|
| 866 |
+
idle_threshold_ns: 0,
|
| 867 |
+
graph_rebuild_interval: 1000,
|
| 868 |
+
test_mode: true, // fake addresses — use synthetic data
|
| 869 |
+
..Default::default()
|
| 870 |
+
});
|
| 871 |
+
|
| 872 |
+
// Compress something so a timestamp is recorded
|
| 873 |
+
pipeline.process_alloc(0x10000, 65_536);
|
| 874 |
+
let (count, _) = pipeline.scan();
|
| 875 |
+
assert_eq!(count, 1, "Expected one compression");
|
| 876 |
+
assert_eq!(pipeline.condensation_timestamps.len(), 1);
|
| 877 |
+
|
| 878 |
+
// Death 1 second after condensation — inside the 5s window
|
| 879 |
+
let condensation_ts = pipeline.condensation_timestamps[0];
|
| 880 |
+
let death_1s_later = condensation_ts + 1_000_000_000;
|
| 881 |
+
assert!(
|
| 882 |
+
pipeline.report_process_death(death_1s_later),
|
| 883 |
+
"Death 1s after condensation should be flagged as likely interference"
|
| 884 |
+
);
|
| 885 |
+
|
| 886 |
+
// Death 10 seconds later — outside window
|
| 887 |
+
let death_10s_later = condensation_ts + 10_000_000_000;
|
| 888 |
+
assert!(
|
| 889 |
+
!pipeline.report_process_death(death_10s_later),
|
| 890 |
+
"Death 10s after condensation should not be flagged"
|
| 891 |
+
);
|
| 892 |
+
}
|
| 893 |
+
|
| 894 |
+
/// Blacklisted pipeline never transitions regardless of accuracy or rebuilds
|
| 895 |
+
#[test]
|
| 896 |
+
fn test_pipeline_blacklisted() {
|
| 897 |
+
let mut pipeline = Pipeline::new_observing(PipelineConfig {
|
| 898 |
+
min_manage_size: 1024,
|
| 899 |
+
graph_rebuild_interval: 1000,
|
| 900 |
+
..Default::default()
|
| 901 |
+
});
|
| 902 |
+
|
| 903 |
+
// Force blacklist
|
| 904 |
+
pipeline.mode = PipelineMode::Blacklisted;
|
| 905 |
+
|
| 906 |
+
// Simulate ideal conditions — should still not transition
|
| 907 |
+
pipeline.mode_rebuilds = 10;
|
| 908 |
+
pipeline.last_prediction_accuracy = 99.0;
|
| 909 |
+
|
| 910 |
+
let transitioned = pipeline.check_transition();
|
| 911 |
+
assert!(!transitioned, "Blacklisted pipeline must never transition");
|
| 912 |
+
assert_eq!(pipeline.mode, PipelineMode::Blacklisted);
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
/// Two PIDs get fully isolated pipelines
|
| 916 |
+
#[test]
|
| 917 |
+
fn test_process_pipeline_map() {
|
| 918 |
+
let mut map = ProcessPipelineMap::new(PipelineConfig {
|
| 919 |
+
min_manage_size: 1024,
|
| 920 |
+
idle_threshold_ns: 0,
|
| 921 |
+
graph_rebuild_interval: 1000,
|
| 922 |
+
test_mode: true, // fake addresses — use synthetic data
|
| 923 |
+
..Default::default()
|
| 924 |
+
});
|
| 925 |
+
|
| 926 |
+
// Two distinct PIDs
|
| 927 |
+
map.process_alloc_global(100, 0x10000, 65_536);
|
| 928 |
+
map.process_alloc_global(100, 0x20000, 65_536);
|
| 929 |
+
map.process_alloc_global(200, 0x10000, 65_536);
|
| 930 |
+
|
| 931 |
+
assert_eq!(map.process_count(), 2, "Should track exactly 2 processes");
|
| 932 |
+
|
| 933 |
+
// Pipelines start in Observing mode
|
| 934 |
+
{
|
| 935 |
+
let p100 = map.get_or_create(100);
|
| 936 |
+
assert_eq!(p100.mode, PipelineMode::Observing,
|
| 937 |
+
"New pipelines must start in Observing mode");
|
| 938 |
+
assert_eq!(p100.events_processed, 2);
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
{
|
| 942 |
+
let p200 = map.get_or_create(200);
|
| 943 |
+
assert_eq!(p200.events_processed, 1);
|
| 944 |
+
}
|
| 945 |
+
|
| 946 |
+
// Free on PID 100 doesn't affect PID 200
|
| 947 |
+
map.process_free_global(100, 0x10000);
|
| 948 |
+
{
|
| 949 |
+
let p200 = map.get_or_create(200);
|
| 950 |
+
assert_eq!(p200.events_processed, 1,
|
| 951 |
+
"PID 200 should be unaffected by PID 100 free");
|
| 952 |
+
}
|
| 953 |
+
|
| 954 |
+
// Free on unknown PID is a no-op (must not panic)
|
| 955 |
+
map.process_free_global(999, 0xDEAD);
|
| 956 |
+
}
|
| 957 |
}
|
|
@@ -9,22 +9,18 @@ use pyo3::prelude::*;
|
|
| 9 |
use crate::graph::AccessGraph;
|
| 10 |
|
| 11 |
/// A single prediction: what will be accessed, when, how confident.
|
| 12 |
-
#[cfg_attr(feature = "python", pyclass)]
|
| 13 |
#[derive(Clone, Debug)]
|
|
|
|
| 14 |
pub struct Prediction {
|
| 15 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 16 |
pub path: String,
|
| 17 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 18 |
pub confidence: f64,
|
| 19 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 20 |
pub expected_delta_ms: f64,
|
| 21 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 22 |
pub source_path: String,
|
| 23 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 24 |
pub chain_depth: u32,
|
| 25 |
}
|
| 26 |
|
| 27 |
-
#[
|
|
|
|
| 28 |
impl Prediction {
|
| 29 |
fn __repr__(&self) -> String {
|
| 30 |
format!(
|
|
@@ -35,22 +31,15 @@ impl Prediction {
|
|
| 35 |
}
|
| 36 |
|
| 37 |
/// Scoring results from prediction evaluation.
|
| 38 |
-
#[cfg_attr(feature = "python", pyclass)]
|
| 39 |
#[derive(Clone, Debug)]
|
|
|
|
| 40 |
pub struct ScoreResult {
|
| 41 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 42 |
pub predictions_made: u32,
|
| 43 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 44 |
pub hits: u32,
|
| 45 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 46 |
pub misses: u32,
|
| 47 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 48 |
pub accuracy: f64,
|
| 49 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 50 |
pub direct_hits: u32,
|
| 51 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 52 |
pub chain_hits: u32,
|
| 53 |
-
#[cfg_attr(feature = "python", pyo3(get))]
|
| 54 |
pub cluster_hits: u32,
|
| 55 |
}
|
| 56 |
|
|
@@ -81,9 +70,7 @@ pub struct RustPredictor {
|
|
| 81 |
score_window_ns: u64,
|
| 82 |
}
|
| 83 |
|
| 84 |
-
#[cfg_attr(feature = "python", pymethods)]
|
| 85 |
impl RustPredictor {
|
| 86 |
-
#[cfg_attr(feature = "python", new)]
|
| 87 |
pub fn new() -> Self {
|
| 88 |
Self {
|
| 89 |
learned: false,
|
|
@@ -147,7 +134,6 @@ impl RustPredictor {
|
|
| 147 |
/// Predict what will be accessed next after `path`.
|
| 148 |
///
|
| 149 |
/// Returns top-K predictions sorted by confidence.
|
| 150 |
-
#[cfg_attr(feature = "python", pyo3(signature = (path, top_k=10)))]
|
| 151 |
pub fn predict(&self, path: &str, top_k: usize) -> Vec<Prediction> {
|
| 152 |
if !self.learned {
|
| 153 |
return Vec::new();
|
|
@@ -300,6 +286,35 @@ impl RustPredictor {
|
|
| 300 |
}
|
| 301 |
}
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
#[cfg(test)]
|
| 304 |
mod tests {
|
| 305 |
use super::*;
|
|
|
|
| 9 |
use crate::graph::AccessGraph;
|
| 10 |
|
| 11 |
/// A single prediction: what will be accessed, when, how confident.
|
|
|
|
| 12 |
#[derive(Clone, Debug)]
|
| 13 |
+
#[cfg_attr(feature = "python", pyclass(get_all))]
|
| 14 |
pub struct Prediction {
|
|
|
|
| 15 |
pub path: String,
|
|
|
|
| 16 |
pub confidence: f64,
|
|
|
|
| 17 |
pub expected_delta_ms: f64,
|
|
|
|
| 18 |
pub source_path: String,
|
|
|
|
| 19 |
pub chain_depth: u32,
|
| 20 |
}
|
| 21 |
|
| 22 |
+
#[cfg(feature = "python")]
|
| 23 |
+
#[pymethods]
|
| 24 |
impl Prediction {
|
| 25 |
fn __repr__(&self) -> String {
|
| 26 |
format!(
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
/// Scoring results from prediction evaluation.
|
|
|
|
| 34 |
#[derive(Clone, Debug)]
|
| 35 |
+
#[cfg_attr(feature = "python", pyclass(get_all))]
|
| 36 |
pub struct ScoreResult {
|
|
|
|
| 37 |
pub predictions_made: u32,
|
|
|
|
| 38 |
pub hits: u32,
|
|
|
|
| 39 |
pub misses: u32,
|
|
|
|
| 40 |
pub accuracy: f64,
|
|
|
|
| 41 |
pub direct_hits: u32,
|
|
|
|
| 42 |
pub chain_hits: u32,
|
|
|
|
| 43 |
pub cluster_hits: u32,
|
| 44 |
}
|
| 45 |
|
|
|
|
| 70 |
score_window_ns: u64,
|
| 71 |
}
|
| 72 |
|
|
|
|
| 73 |
impl RustPredictor {
|
|
|
|
| 74 |
pub fn new() -> Self {
|
| 75 |
Self {
|
| 76 |
learned: false,
|
|
|
|
| 134 |
/// Predict what will be accessed next after `path`.
|
| 135 |
///
|
| 136 |
/// Returns top-K predictions sorted by confidence.
|
|
|
|
| 137 |
pub fn predict(&self, path: &str, top_k: usize) -> Vec<Prediction> {
|
| 138 |
if !self.learned {
|
| 139 |
return Vec::new();
|
|
|
|
| 286 |
}
|
| 287 |
}
|
| 288 |
|
| 289 |
+
#[cfg(feature = "python")]
|
| 290 |
+
#[pymethods]
|
| 291 |
+
impl RustPredictor {
|
| 292 |
+
#[new]
|
| 293 |
+
fn py_new() -> Self {
|
| 294 |
+
Self::new()
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
#[pyo3(name = "learn")]
|
| 298 |
+
fn py_learn(&mut self, graph: &AccessGraph) {
|
| 299 |
+
self.learn(graph);
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
#[pyo3(name = "predict", signature = (path, top_k=10))]
|
| 303 |
+
fn py_predict(&self, path: &str, top_k: usize) -> Vec<Prediction> {
|
| 304 |
+
self.predict(path, top_k)
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
#[pyo3(name = "score")]
|
| 308 |
+
fn py_score(&self, events: Vec<(u64, String, u64)>) -> ScoreResult {
|
| 309 |
+
self.score(events)
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
#[pyo3(name = "is_learned")]
|
| 313 |
+
fn py_is_learned(&self) -> bool {
|
| 314 |
+
self.is_learned()
|
| 315 |
+
}
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
#[cfg(test)]
|
| 319 |
mod tests {
|
| 320 |
use super::*;
|
|
@@ -0,0 +1,677 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Sleep Consolidation — Block I of the Condensate living-memory lifecycle.
|
| 2 |
+
//!
|
| 3 |
+
//! During idle periods the system enters a biological sleep cycle:
|
| 4 |
+
//! Phase 1 (Replay) — replay recent access patterns at high speed
|
| 5 |
+
//! Phase 2 (Reorganize) — compute layout improvements
|
| 6 |
+
//! Phase 3 (Prune) — remove weak edges, compact
|
| 7 |
+
//!
|
| 8 |
+
//! The caller drives each phase with tick_* methods and is responsible for
|
| 9 |
+
//! applying the returned hints to the actual graph/layout structures.
|
| 10 |
+
|
| 11 |
+
// ─── ReplayEvent ────────────────────────────────────────────────────────────
|
| 12 |
+
|
| 13 |
+
/// A single recorded memory-access event stored in the replay buffer.
|
| 14 |
+
#[derive(Clone, Debug)]
|
| 15 |
+
pub struct ReplayEvent {
|
| 16 |
+
pub timestamp_ns: u64,
|
| 17 |
+
pub path_id: u32,
|
| 18 |
+
pub size: u64,
|
| 19 |
+
/// true = allocation, false = free
|
| 20 |
+
pub is_alloc: bool,
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
// ─── ReplayBuffer ───────────────────────────────────────────────────────────
|
| 24 |
+
|
| 25 |
+
/// Fixed-capacity ring buffer of ReplayEvents. Oldest events are silently
|
| 26 |
+
/// overwritten once the buffer is full.
|
| 27 |
+
pub struct ReplayBuffer {
|
| 28 |
+
events: Vec<ReplayEvent>,
|
| 29 |
+
capacity: usize,
|
| 30 |
+
write_pos: usize,
|
| 31 |
+
wrapped: bool,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
impl ReplayBuffer {
|
| 35 |
+
/// Allocate a ring buffer with `capacity` slots.
|
| 36 |
+
pub fn new(capacity: usize) -> Self {
|
| 37 |
+
assert!(capacity > 0, "ReplayBuffer capacity must be > 0");
|
| 38 |
+
Self {
|
| 39 |
+
events: Vec::with_capacity(capacity),
|
| 40 |
+
capacity,
|
| 41 |
+
write_pos: 0,
|
| 42 |
+
wrapped: false,
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
/// Push one event. If the buffer is full the oldest event is overwritten.
|
| 47 |
+
pub fn push(&mut self, event: ReplayEvent) {
|
| 48 |
+
if self.events.len() < self.capacity {
|
| 49 |
+
// Still filling up — just append.
|
| 50 |
+
self.events.push(event);
|
| 51 |
+
} else {
|
| 52 |
+
// Ring is full: overwrite at write_pos.
|
| 53 |
+
self.events[self.write_pos] = event;
|
| 54 |
+
self.wrapped = true;
|
| 55 |
+
}
|
| 56 |
+
self.write_pos = (self.write_pos + 1) % self.capacity;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
/// Return all stored events in chronological order (oldest → newest).
|
| 60 |
+
pub fn drain(&self) -> Vec<&ReplayEvent> {
|
| 61 |
+
let len = self.events.len();
|
| 62 |
+
if len == 0 {
|
| 63 |
+
return Vec::new();
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
let mut out = Vec::with_capacity(len);
|
| 67 |
+
|
| 68 |
+
if !self.wrapped {
|
| 69 |
+
// Buffer never overflowed — elements are already in order.
|
| 70 |
+
for e in &self.events {
|
| 71 |
+
out.push(e);
|
| 72 |
+
}
|
| 73 |
+
} else {
|
| 74 |
+
// write_pos points to the *oldest* slot.
|
| 75 |
+
for i in 0..len {
|
| 76 |
+
let idx = (self.write_pos + i) % self.capacity;
|
| 77 |
+
out.push(&self.events[idx]);
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
out
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
/// Number of events currently stored.
|
| 85 |
+
pub fn len(&self) -> usize {
|
| 86 |
+
self.events.len()
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
/// Remove all stored events and reset internal state.
|
| 90 |
+
pub fn clear(&mut self) {
|
| 91 |
+
self.events.clear();
|
| 92 |
+
self.write_pos = 0;
|
| 93 |
+
self.wrapped = false;
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// ─── SleepPhase ─────────────────────────────────────────────────────────────
|
| 98 |
+
|
| 99 |
+
#[derive(Clone, Copy, PartialEq, Debug)]
|
| 100 |
+
pub enum SleepPhase {
|
| 101 |
+
Awake,
|
| 102 |
+
/// Phase 1: replay recent patterns at high speed.
|
| 103 |
+
Replay,
|
| 104 |
+
/// Phase 2: compute layout improvements.
|
| 105 |
+
Reorganize,
|
| 106 |
+
/// Phase 3: remove weak edges, compact.
|
| 107 |
+
Prune,
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
// ─── SleepReport ────────────────────────────────────────────────────────────
|
| 111 |
+
|
| 112 |
+
/// Summary produced at the end of a sleep cycle.
|
| 113 |
+
pub struct SleepReport {
|
| 114 |
+
pub duration_ms: u64,
|
| 115 |
+
pub events_replayed: usize,
|
| 116 |
+
pub edges_strengthened: usize,
|
| 117 |
+
pub edges_pruned: usize,
|
| 118 |
+
pub regions_relocated: usize,
|
| 119 |
+
pub keyframes_consolidated: usize,
|
| 120 |
+
pub bytes_freed: usize,
|
| 121 |
+
pub interrupted: bool,
|
| 122 |
+
pub phase_reached: SleepPhase,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
// ─── SleepController ────────────────────────────────────────────────────────
|
| 126 |
+
|
| 127 |
+
/// Drives the three-phase sleep cycle for Condensate.
|
| 128 |
+
///
|
| 129 |
+
/// # Lifecycle
|
| 130 |
+
/// ```text
|
| 131 |
+
/// (idle detected)
|
| 132 |
+
/// → enter_sleep() [Awake → Replay]
|
| 133 |
+
/// → tick_replay() [repeat until done]
|
| 134 |
+
/// → advance_phase() [Replay → Reorganize]
|
| 135 |
+
/// → tick_reorganize() [repeat until done]
|
| 136 |
+
/// → advance_phase() [Reorganize → Prune]
|
| 137 |
+
/// → tick_prune() [repeat until done]
|
| 138 |
+
/// → advance_phase() / wake() [Prune → Awake]
|
| 139 |
+
/// ```
|
| 140 |
+
pub struct SleepController {
|
| 141 |
+
state: SleepPhase,
|
| 142 |
+
last_sleep_ns: u64,
|
| 143 |
+
events_since_sleep: u64,
|
| 144 |
+
idle_threshold_ns: u64,
|
| 145 |
+
/// Adaptive threshold — updated from idle_gap_samples.
|
| 146 |
+
learned_idle_gap_ns: u64,
|
| 147 |
+
/// Rolling window of inter-event gaps (max 100).
|
| 148 |
+
idle_gap_samples: Vec<u64>,
|
| 149 |
+
replay_buffer: ReplayBuffer,
|
| 150 |
+
/// Set to true to request an immediate wake.
|
| 151 |
+
wake_interrupt: bool,
|
| 152 |
+
current_report: Option<SleepReport>,
|
| 153 |
+
/// Timestamp (ns) when the current sleep phase started.
|
| 154 |
+
sleep_start_ns: u64,
|
| 155 |
+
/// Snapshot of events replayed — used by tick_replay.
|
| 156 |
+
replay_events_snapshot: Vec<ReplayEvent>,
|
| 157 |
+
/// Replay cursor — how many events we have processed so far.
|
| 158 |
+
replay_cursor: usize,
|
| 159 |
+
/// Edge-strengthening counters: maps (src, dst) → count.
|
| 160 |
+
edge_counts: std::collections::HashMap<(u32, u32), u64>,
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
const IDLE_GAP_WINDOW: usize = 100;
|
| 164 |
+
|
| 165 |
+
impl SleepController {
|
| 166 |
+
/// Create a new controller.
|
| 167 |
+
///
|
| 168 |
+
/// * `idle_threshold_ns` — baseline idle gap before the adaptive learner
|
| 169 |
+
/// kicks in.
|
| 170 |
+
/// * `replay_capacity` — maximum events held in the ring buffer.
|
| 171 |
+
pub fn new(idle_threshold_ns: u64, replay_capacity: usize) -> Self {
|
| 172 |
+
Self {
|
| 173 |
+
state: SleepPhase::Awake,
|
| 174 |
+
last_sleep_ns: 0,
|
| 175 |
+
events_since_sleep: 0,
|
| 176 |
+
idle_threshold_ns,
|
| 177 |
+
learned_idle_gap_ns: idle_threshold_ns,
|
| 178 |
+
idle_gap_samples: Vec::with_capacity(IDLE_GAP_WINDOW),
|
| 179 |
+
replay_buffer: ReplayBuffer::new(replay_capacity),
|
| 180 |
+
wake_interrupt: false,
|
| 181 |
+
current_report: None,
|
| 182 |
+
sleep_start_ns: 0,
|
| 183 |
+
replay_events_snapshot: Vec::new(),
|
| 184 |
+
replay_cursor: 0,
|
| 185 |
+
edge_counts: std::collections::HashMap::new(),
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
// ── Recording ───────────────────────────────────────────────────────────
|
| 190 |
+
|
| 191 |
+
/// Record an access event: store it in the replay buffer and update
|
| 192 |
+
/// the adaptive idle-gap learner.
|
| 193 |
+
pub fn record_event(&mut self, event: ReplayEvent) {
|
| 194 |
+
// Learn from the gap to the previous event (if any).
|
| 195 |
+
if self.events_since_sleep > 0 {
|
| 196 |
+
let last_ts = self
|
| 197 |
+
.replay_buffer
|
| 198 |
+
.drain()
|
| 199 |
+
.last()
|
| 200 |
+
.map(|e| e.timestamp_ns)
|
| 201 |
+
.unwrap_or(0);
|
| 202 |
+
if event.timestamp_ns > last_ts {
|
| 203 |
+
let gap = event.timestamp_ns - last_ts;
|
| 204 |
+
self.observe_gap(gap);
|
| 205 |
+
}
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
self.events_since_sleep += 1;
|
| 209 |
+
self.replay_buffer.push(event);
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
/// Feed one inter-event gap into the rolling window and recompute the
|
| 213 |
+
/// adaptive threshold.
|
| 214 |
+
fn observe_gap(&mut self, gap_ns: u64) {
|
| 215 |
+
if self.idle_gap_samples.len() == IDLE_GAP_WINDOW {
|
| 216 |
+
self.idle_gap_samples.remove(0);
|
| 217 |
+
}
|
| 218 |
+
self.idle_gap_samples.push(gap_ns);
|
| 219 |
+
self.update_adaptive_threshold();
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
/// Recompute `learned_idle_gap_ns` = mean + 2 * stddev of the sample
|
| 223 |
+
/// window. Falls back to `idle_threshold_ns` when no samples exist.
|
| 224 |
+
fn update_adaptive_threshold(&mut self) {
|
| 225 |
+
let n = self.idle_gap_samples.len();
|
| 226 |
+
if n == 0 {
|
| 227 |
+
self.learned_idle_gap_ns = self.idle_threshold_ns;
|
| 228 |
+
return;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
let sum: u64 = self.idle_gap_samples.iter().sum();
|
| 232 |
+
let mean = sum / n as u64;
|
| 233 |
+
|
| 234 |
+
// Variance (integer arithmetic — sufficient precision for ns gaps).
|
| 235 |
+
let variance: u64 = self
|
| 236 |
+
.idle_gap_samples
|
| 237 |
+
.iter()
|
| 238 |
+
.map(|&g| {
|
| 239 |
+
let d = if g > mean { g - mean } else { mean - g };
|
| 240 |
+
d * d
|
| 241 |
+
})
|
| 242 |
+
.sum::<u64>()
|
| 243 |
+
/ n as u64;
|
| 244 |
+
|
| 245 |
+
let stddev = integer_sqrt(variance);
|
| 246 |
+
|
| 247 |
+
// threshold = mean + max(2 * stddev, 10 % of mean).
|
| 248 |
+
//
|
| 249 |
+
// The 10 % floor prevents the degenerate case where all gaps are
|
| 250 |
+
// identical (stddev = 0) from producing a threshold exactly equal to
|
| 251 |
+
// the mean. A server with perfectly regular 2-second gaps must NOT
|
| 252 |
+
// trigger sleep on those 2-second pauses, so the threshold must be
|
| 253 |
+
// strictly above 2 s.
|
| 254 |
+
let margin = (2 * stddev).max(mean / 10);
|
| 255 |
+
let adaptive = mean.saturating_add(margin);
|
| 256 |
+
self.learned_idle_gap_ns = adaptive.max(self.idle_threshold_ns);
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// ── Idle detection ──────────────────────────────────────────────────────
|
| 260 |
+
|
| 261 |
+
/// Returns true when the gap between `last_event_ns` and `now_ns` exceeds
|
| 262 |
+
/// the adaptive idle threshold.
|
| 263 |
+
pub fn is_idle(&self, now_ns: u64, last_event_ns: u64) -> bool {
|
| 264 |
+
if now_ns <= last_event_ns {
|
| 265 |
+
return false;
|
| 266 |
+
}
|
| 267 |
+
now_ns - last_event_ns >= self.learned_idle_gap_ns
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
// ── Phase management ────────────────────────────────────────────────────
|
| 271 |
+
|
| 272 |
+
/// Transition from Awake into Replay, initialising a fresh report.
|
| 273 |
+
/// Returns `SleepPhase::Replay`.
|
| 274 |
+
pub fn enter_sleep(&mut self, now_ns: u64) -> SleepPhase {
|
| 275 |
+
self.state = SleepPhase::Replay;
|
| 276 |
+
self.sleep_start_ns = now_ns;
|
| 277 |
+
self.wake_interrupt = false;
|
| 278 |
+
self.edge_counts.clear();
|
| 279 |
+
|
| 280 |
+
// Snapshot the replay buffer so that tick_replay can iterate it
|
| 281 |
+
// without borrowing issues.
|
| 282 |
+
self.replay_events_snapshot = self
|
| 283 |
+
.replay_buffer
|
| 284 |
+
.drain()
|
| 285 |
+
.into_iter()
|
| 286 |
+
.cloned()
|
| 287 |
+
.collect();
|
| 288 |
+
self.replay_cursor = 0;
|
| 289 |
+
|
| 290 |
+
self.current_report = Some(SleepReport {
|
| 291 |
+
duration_ms: 0,
|
| 292 |
+
events_replayed: 0,
|
| 293 |
+
edges_strengthened: 0,
|
| 294 |
+
edges_pruned: 0,
|
| 295 |
+
regions_relocated: 0,
|
| 296 |
+
keyframes_consolidated: 0,
|
| 297 |
+
bytes_freed: 0,
|
| 298 |
+
interrupted: false,
|
| 299 |
+
phase_reached: SleepPhase::Replay,
|
| 300 |
+
});
|
| 301 |
+
|
| 302 |
+
SleepPhase::Replay
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
/// Process a batch of replay events.
|
| 306 |
+
///
|
| 307 |
+
/// Returns `(edges_strengthened, edges_weakened)`.
|
| 308 |
+
///
|
| 309 |
+
/// For every sequential pair (A, B) in the replay stream, the A→B edge
|
| 310 |
+
/// counter is incremented. The caller is responsible for applying the
|
| 311 |
+
/// returned counts to the actual graph.
|
| 312 |
+
pub fn tick_replay(&mut self) -> (usize, usize) {
|
| 313 |
+
let events = &self.replay_events_snapshot;
|
| 314 |
+
let total = events.len();
|
| 315 |
+
|
| 316 |
+
if self.replay_cursor >= total.saturating_sub(1) {
|
| 317 |
+
// Nothing (more) to do.
|
| 318 |
+
if let Some(ref mut r) = self.current_report {
|
| 319 |
+
r.events_replayed = total;
|
| 320 |
+
}
|
| 321 |
+
return (0, 0);
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
// Process all remaining sequential pairs in one tick (callers can
|
| 325 |
+
// chunk however they like by calling multiple times, but we keep it
|
| 326 |
+
// simple here: process everything remaining).
|
| 327 |
+
let mut strengthened = 0usize;
|
| 328 |
+
|
| 329 |
+
while self.replay_cursor + 1 < total {
|
| 330 |
+
let src = events[self.replay_cursor].path_id;
|
| 331 |
+
let dst = events[self.replay_cursor + 1].path_id;
|
| 332 |
+
let counter = self.edge_counts.entry((src, dst)).or_insert(0);
|
| 333 |
+
*counter += 1;
|
| 334 |
+
strengthened += 1;
|
| 335 |
+
self.replay_cursor += 1;
|
| 336 |
+
}
|
| 337 |
+
// Advance past the last event.
|
| 338 |
+
self.replay_cursor = total;
|
| 339 |
+
|
| 340 |
+
if let Some(ref mut r) = self.current_report {
|
| 341 |
+
r.events_replayed = total;
|
| 342 |
+
r.edges_strengthened += strengthened;
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
(strengthened, 0)
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
/// Identify regions whose replay pattern suggests adjacency.
|
| 349 |
+
///
|
| 350 |
+
/// Returns the count of regions that should be relocated. The caller
|
| 351 |
+
/// performs the actual relocation.
|
| 352 |
+
///
|
| 353 |
+
/// Heuristic: any path_id pair that co-occurs in the replay stream with a
|
| 354 |
+
/// count ≥ 2 is considered a relocation candidate; the number of *unique*
|
| 355 |
+
/// such path_ids is reported.
|
| 356 |
+
pub fn tick_reorganize(&mut self) -> usize {
|
| 357 |
+
let hot_nodes: std::collections::HashSet<u32> = self
|
| 358 |
+
.edge_counts
|
| 359 |
+
.iter()
|
| 360 |
+
.filter(|(_, &count)| count >= 2)
|
| 361 |
+
.flat_map(|((src, dst), _)| [*src, *dst])
|
| 362 |
+
.collect();
|
| 363 |
+
|
| 364 |
+
let relocated = hot_nodes.len();
|
| 365 |
+
|
| 366 |
+
if let Some(ref mut r) = self.current_report {
|
| 367 |
+
r.regions_relocated = relocated;
|
| 368 |
+
r.phase_reached = SleepPhase::Reorganize;
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
relocated
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
/// Given current edge weights, return edges whose weight is below
|
| 375 |
+
/// `threshold`. The caller removes them from the graph.
|
| 376 |
+
pub fn tick_prune(
|
| 377 |
+
&mut self,
|
| 378 |
+
edge_weights: &[(u32, u32, f64)],
|
| 379 |
+
threshold: f64,
|
| 380 |
+
) -> Vec<(u32, u32)> {
|
| 381 |
+
let pruned: Vec<(u32, u32)> = edge_weights
|
| 382 |
+
.iter()
|
| 383 |
+
.filter(|&&(_, _, w)| w < threshold)
|
| 384 |
+
.map(|&(src, dst, _)| (src, dst))
|
| 385 |
+
.collect();
|
| 386 |
+
|
| 387 |
+
if let Some(ref mut r) = self.current_report {
|
| 388 |
+
r.edges_pruned = pruned.len();
|
| 389 |
+
r.phase_reached = SleepPhase::Prune;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
pruned
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
/// Advance to the next phase in the cycle.
|
| 396 |
+
///
|
| 397 |
+
/// ```text
|
| 398 |
+
/// Replay → Reorganize → Prune → Awake
|
| 399 |
+
/// ```
|
| 400 |
+
pub fn advance_phase(&mut self) -> SleepPhase {
|
| 401 |
+
self.state = match self.state {
|
| 402 |
+
SleepPhase::Awake => SleepPhase::Replay,
|
| 403 |
+
SleepPhase::Replay => SleepPhase::Reorganize,
|
| 404 |
+
SleepPhase::Reorganize => SleepPhase::Prune,
|
| 405 |
+
SleepPhase::Prune => SleepPhase::Awake,
|
| 406 |
+
};
|
| 407 |
+
self.state
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
// ── Wake ────────────────────────────────────────────────────────────────
|
| 411 |
+
|
| 412 |
+
/// Interrupt sleep immediately and return a finalised report.
|
| 413 |
+
pub fn wake(&mut self) -> SleepReport {
|
| 414 |
+
// We need a current timestamp — we do not have wall-clock access here,
|
| 415 |
+
// so duration is computed as 0 when entered without a wall-clock tick.
|
| 416 |
+
// Callers that want accurate duration should store the entry time and
|
| 417 |
+
// subtract. We store sleep_start_ns so the caller can do so.
|
| 418 |
+
let now_ns = self.sleep_start_ns; // conservative — will be 0 if no real clock
|
| 419 |
+
let duration_ms = now_ns.saturating_sub(self.sleep_start_ns) / 1_000_000;
|
| 420 |
+
|
| 421 |
+
let interrupted = self.wake_interrupt || self.state != SleepPhase::Awake;
|
| 422 |
+
let phase_reached = self.state;
|
| 423 |
+
|
| 424 |
+
self.state = SleepPhase::Awake;
|
| 425 |
+
self.wake_interrupt = false;
|
| 426 |
+
self.events_since_sleep = 0;
|
| 427 |
+
self.replay_buffer.clear();
|
| 428 |
+
self.replay_events_snapshot.clear();
|
| 429 |
+
self.replay_cursor = 0;
|
| 430 |
+
|
| 431 |
+
let mut report = self
|
| 432 |
+
.current_report
|
| 433 |
+
.take()
|
| 434 |
+
.unwrap_or_else(|| SleepReport {
|
| 435 |
+
duration_ms: 0,
|
| 436 |
+
events_replayed: 0,
|
| 437 |
+
edges_strengthened: 0,
|
| 438 |
+
edges_pruned: 0,
|
| 439 |
+
regions_relocated: 0,
|
| 440 |
+
keyframes_consolidated: 0,
|
| 441 |
+
bytes_freed: 0,
|
| 442 |
+
interrupted: false,
|
| 443 |
+
phase_reached: SleepPhase::Awake,
|
| 444 |
+
});
|
| 445 |
+
|
| 446 |
+
report.duration_ms = duration_ms;
|
| 447 |
+
report.interrupted = interrupted;
|
| 448 |
+
report.phase_reached = phase_reached;
|
| 449 |
+
|
| 450 |
+
report
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
// ── Queries ─────────────────────────────────────────────────────────────
|
| 454 |
+
|
| 455 |
+
/// True if `wake_interrupt` has been set.
|
| 456 |
+
pub fn should_wake(&self) -> bool {
|
| 457 |
+
self.wake_interrupt
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
/// Signal that an external event arrived and sleep should end.
|
| 461 |
+
pub fn set_wake_interrupt(&mut self) {
|
| 462 |
+
self.wake_interrupt = true;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
pub fn get_phase(&self) -> SleepPhase {
|
| 466 |
+
self.state
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
pub fn events_since_sleep(&self) -> u64 {
|
| 470 |
+
self.events_since_sleep
|
| 471 |
+
}
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
// ─── Utilities ──────────────────────────────────────────────────────────────
|
| 475 |
+
|
| 476 |
+
/// Integer square root (floor) — avoids pulling in floating-point for the
|
| 477 |
+
/// adaptive-threshold computation.
|
| 478 |
+
fn integer_sqrt(n: u64) -> u64 {
|
| 479 |
+
if n == 0 {
|
| 480 |
+
return 0;
|
| 481 |
+
}
|
| 482 |
+
let mut x = n;
|
| 483 |
+
let mut y = (x + 1) / 2;
|
| 484 |
+
while y < x {
|
| 485 |
+
x = y;
|
| 486 |
+
y = (x + n / x) / 2;
|
| 487 |
+
}
|
| 488 |
+
x
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
// ─── Tests ──────────────────────────────────────────────────────────────────
|
| 492 |
+
|
| 493 |
+
#[cfg(test)]
|
| 494 |
+
mod tests {
|
| 495 |
+
use super::*;
|
| 496 |
+
|
| 497 |
+
fn make_event(ts: u64, path_id: u32) -> ReplayEvent {
|
| 498 |
+
ReplayEvent {
|
| 499 |
+
timestamp_ns: ts,
|
| 500 |
+
path_id,
|
| 501 |
+
size: 64,
|
| 502 |
+
is_alloc: true,
|
| 503 |
+
}
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
// ── ReplayBuffer ────────────────────────────────────────────────────────
|
| 507 |
+
|
| 508 |
+
#[test]
|
| 509 |
+
fn test_sleep_replay_buffer_ring() {
|
| 510 |
+
let mut buf = ReplayBuffer::new(3);
|
| 511 |
+
// Fill beyond capacity.
|
| 512 |
+
for i in 0..6u32 {
|
| 513 |
+
buf.push(make_event(i as u64 * 100, i));
|
| 514 |
+
}
|
| 515 |
+
// Only 3 events must be present (the last 3: ids 3, 4, 5).
|
| 516 |
+
assert_eq!(buf.len(), 3);
|
| 517 |
+
let drained = buf.drain();
|
| 518 |
+
let ids: Vec<u32> = drained.iter().map(|e| e.path_id).collect();
|
| 519 |
+
assert!(
|
| 520 |
+
ids.contains(&3) && ids.contains(&4) && ids.contains(&5),
|
| 521 |
+
"expected ids 3,4,5 but got {:?}",
|
| 522 |
+
ids
|
| 523 |
+
);
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
#[test]
|
| 527 |
+
fn test_sleep_replay_buffer_drain_order() {
|
| 528 |
+
let mut buf = ReplayBuffer::new(5);
|
| 529 |
+
for i in 0..5u64 {
|
| 530 |
+
buf.push(make_event(i * 10, i as u32));
|
| 531 |
+
}
|
| 532 |
+
let drained = buf.drain();
|
| 533 |
+
let timestamps: Vec<u64> = drained.iter().map(|e| e.timestamp_ns).collect();
|
| 534 |
+
// Must be monotonically non-decreasing (chronological).
|
| 535 |
+
for w in timestamps.windows(2) {
|
| 536 |
+
assert!(
|
| 537 |
+
w[0] <= w[1],
|
| 538 |
+
"drain order violated: {:?} > {:?}",
|
| 539 |
+
w[0],
|
| 540 |
+
w[1]
|
| 541 |
+
);
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
// Also test after a wrap.
|
| 545 |
+
let mut buf2 = ReplayBuffer::new(3);
|
| 546 |
+
for i in 0..5u64 {
|
| 547 |
+
buf2.push(make_event(i * 10, i as u32));
|
| 548 |
+
}
|
| 549 |
+
let drained2 = buf2.drain();
|
| 550 |
+
let ts2: Vec<u64> = drained2.iter().map(|e| e.timestamp_ns).collect();
|
| 551 |
+
for w in ts2.windows(2) {
|
| 552 |
+
assert!(w[0] <= w[1], "wrapped drain order violated");
|
| 553 |
+
}
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
// ── Idle detection ──────────────────────────────────────────────────────
|
| 557 |
+
|
| 558 |
+
#[test]
|
| 559 |
+
fn test_sleep_idle_detection() {
|
| 560 |
+
let threshold_ns = 5_000_000_000u64; // 5 seconds
|
| 561 |
+
let ctrl = SleepController::new(threshold_ns, 64);
|
| 562 |
+
|
| 563 |
+
let last_event = 1_000_000_000u64; // 1 s
|
| 564 |
+
// 4 s after last event — NOT idle.
|
| 565 |
+
assert!(!ctrl.is_idle(last_event + 4_000_000_000, last_event));
|
| 566 |
+
// 6 s after last event — idle.
|
| 567 |
+
assert!(ctrl.is_idle(last_event + 6_000_000_000, last_event));
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
#[test]
|
| 571 |
+
fn test_sleep_adaptive_idle_threshold() {
|
| 572 |
+
let baseline_ns = 500_000_000u64; // 0.5 s baseline
|
| 573 |
+
let mut ctrl = SleepController::new(baseline_ns, 64);
|
| 574 |
+
|
| 575 |
+
// Simulate a server with regular ~2-second inter-event gaps.
|
| 576 |
+
let gap_2s = 2_000_000_000u64;
|
| 577 |
+
for _ in 0..50 {
|
| 578 |
+
ctrl.observe_gap(gap_2s);
|
| 579 |
+
}
|
| 580 |
+
|
| 581 |
+
// The adaptive threshold must exceed 2 s so that normal 2-s pauses
|
| 582 |
+
// do NOT trigger sleep.
|
| 583 |
+
assert!(
|
| 584 |
+
ctrl.learned_idle_gap_ns > gap_2s,
|
| 585 |
+
"adaptive threshold ({}) should be above 2 s gap ({})",
|
| 586 |
+
ctrl.learned_idle_gap_ns,
|
| 587 |
+
gap_2s
|
| 588 |
+
);
|
| 589 |
+
|
| 590 |
+
let last_event = 0u64;
|
| 591 |
+
// Exactly 2 s later should NOT be idle (normal pause).
|
| 592 |
+
assert!(!ctrl.is_idle(gap_2s, last_event));
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
// ── Phase progression ───────────────────────────────────────────────────
|
| 596 |
+
|
| 597 |
+
#[test]
|
| 598 |
+
fn test_sleep_phases_advance() {
|
| 599 |
+
let mut ctrl = SleepController::new(1_000_000_000, 16);
|
| 600 |
+
|
| 601 |
+
let phase = ctrl.enter_sleep(0);
|
| 602 |
+
assert_eq!(phase, SleepPhase::Replay);
|
| 603 |
+
|
| 604 |
+
let p2 = ctrl.advance_phase();
|
| 605 |
+
assert_eq!(p2, SleepPhase::Reorganize);
|
| 606 |
+
|
| 607 |
+
let p3 = ctrl.advance_phase();
|
| 608 |
+
assert_eq!(p3, SleepPhase::Prune);
|
| 609 |
+
|
| 610 |
+
let p4 = ctrl.advance_phase();
|
| 611 |
+
assert_eq!(p4, SleepPhase::Awake);
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
// ── Wake interrupt ──────────────────────────────────────────────────────
|
| 615 |
+
|
| 616 |
+
#[test]
|
| 617 |
+
fn test_sleep_wake_interrupts() {
|
| 618 |
+
let mut ctrl = SleepController::new(1_000_000_000, 16);
|
| 619 |
+
|
| 620 |
+
ctrl.enter_sleep(0);
|
| 621 |
+
assert_eq!(ctrl.get_phase(), SleepPhase::Replay);
|
| 622 |
+
assert!(!ctrl.should_wake());
|
| 623 |
+
|
| 624 |
+
ctrl.set_wake_interrupt();
|
| 625 |
+
assert!(ctrl.should_wake());
|
| 626 |
+
|
| 627 |
+
let report = ctrl.wake();
|
| 628 |
+
assert!(report.interrupted, "report should be marked as interrupted");
|
| 629 |
+
assert_eq!(ctrl.get_phase(), SleepPhase::Awake);
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
// ── Replay strengthening ────────────────────────────────────────────────
|
| 633 |
+
|
| 634 |
+
#[test]
|
| 635 |
+
fn test_sleep_replay_strengthening() {
|
| 636 |
+
let mut ctrl = SleepController::new(1_000_000_000, 64);
|
| 637 |
+
|
| 638 |
+
// Push a pattern: A→B→A→B (paths 1, 2, 1, 2).
|
| 639 |
+
ctrl.record_event(make_event(100, 1));
|
| 640 |
+
ctrl.record_event(make_event(200, 2));
|
| 641 |
+
ctrl.record_event(make_event(300, 1));
|
| 642 |
+
ctrl.record_event(make_event(400, 2));
|
| 643 |
+
|
| 644 |
+
ctrl.enter_sleep(500);
|
| 645 |
+
|
| 646 |
+
let (strengthened, weakened) = ctrl.tick_replay();
|
| 647 |
+
|
| 648 |
+
// Three sequential pairs: (1,2), (2,1), (1,2) → 3 edge increments.
|
| 649 |
+
assert_eq!(strengthened, 3, "expected 3 strengthened edges");
|
| 650 |
+
assert_eq!(weakened, 0);
|
| 651 |
+
|
| 652 |
+
// The 1→2 edge should have been seen twice.
|
| 653 |
+
assert_eq!(*ctrl.edge_counts.get(&(1, 2)).unwrap_or(&0), 2);
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
// ── Prune weak edges ────────────────────────────────────────────────────
|
| 657 |
+
|
| 658 |
+
#[test]
|
| 659 |
+
fn test_sleep_prune_weak_edges() {
|
| 660 |
+
let mut ctrl = SleepController::new(1_000_000_000, 16);
|
| 661 |
+
ctrl.enter_sleep(0);
|
| 662 |
+
|
| 663 |
+
let edge_weights = vec![
|
| 664 |
+
(1u32, 2u32, 0.9f64), // strong — keep
|
| 665 |
+
(2u32, 3u32, 0.1f64), // weak — prune
|
| 666 |
+
(3u32, 4u32, 0.05f64), // weak — prune
|
| 667 |
+
(4u32, 5u32, 0.8f64), // strong — keep
|
| 668 |
+
];
|
| 669 |
+
let threshold = 0.2;
|
| 670 |
+
|
| 671 |
+
let pruned = ctrl.tick_prune(&edge_weights, threshold);
|
| 672 |
+
|
| 673 |
+
assert_eq!(pruned.len(), 2, "expected 2 edges pruned");
|
| 674 |
+
assert!(pruned.contains(&(2, 3)));
|
| 675 |
+
assert!(pruned.contains(&(3, 4)));
|
| 676 |
+
}
|
| 677 |
+
}
|
|
@@ -0,0 +1,488 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Sparse Extract — sub-region decompression for compressed memory.
|
| 2 |
+
//!
|
| 3 |
+
//! When a compressed region is accessed, don't decompress the whole thing.
|
| 4 |
+
//! Decompress ONLY the accessed byte range. Serve EXACTLY what's needed,
|
| 5 |
+
//! no more, no less.
|
| 6 |
+
//!
|
| 7 |
+
//! Key insight: a 50 KB object where only 3 fields (200 bytes) are ever
|
| 8 |
+
//! accessed keeps ~200 bytes decompressed + the full 50 KB compressed.
|
| 9 |
+
//! That's 99.6% savings on the warm portion.
|
| 10 |
+
//!
|
| 11 |
+
//! Flow:
|
| 12 |
+
//! 1. Region registered with its LZ4 compressed backing.
|
| 13 |
+
//! 2. Every access is recorded in the ByteHeatMap.
|
| 14 |
+
//! 3. `extract()` checks existing hot ranges first; on a miss it
|
| 15 |
+
//! decompresses the backing, slices the requested range, and
|
| 16 |
+
//! promotes it to a hot range.
|
| 17 |
+
//! 4. `compact()` demotes hot ranges that have not been re-accessed
|
| 18 |
+
//! since the last compaction pass.
|
| 19 |
+
|
| 20 |
+
use std::collections::HashMap;
|
| 21 |
+
use lz4_flex::decompress_size_prepended;
|
| 22 |
+
|
| 23 |
+
// ---------------------------------------------------------------------------
|
| 24 |
+
// ByteHeatMap
|
| 25 |
+
// ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
/// Per-region access heat tracker, bucketed at cache-line granularity (64 B).
|
| 28 |
+
pub struct ByteHeatMap {
|
| 29 |
+
buckets: Vec<u32>, // access count per 64-byte bucket
|
| 30 |
+
bucket_size: usize, // always 64 (cache line)
|
| 31 |
+
region_size: usize,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
impl ByteHeatMap {
|
| 35 |
+
/// Create a new heat map for a region of `region_size` bytes.
|
| 36 |
+
/// Number of buckets = ceil(region_size / 64).
|
| 37 |
+
pub fn new(region_size: usize) -> Self {
|
| 38 |
+
let bucket_size = 64;
|
| 39 |
+
let num_buckets = (region_size + bucket_size - 1) / bucket_size;
|
| 40 |
+
Self {
|
| 41 |
+
buckets: vec![0u32; num_buckets],
|
| 42 |
+
bucket_size,
|
| 43 |
+
region_size,
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/// Record an access covering [offset, offset + length).
|
| 48 |
+
/// Every bucket that overlaps the range is incremented by 1.
|
| 49 |
+
pub fn record_access(&mut self, offset: usize, length: usize) {
|
| 50 |
+
if length == 0 || offset >= self.region_size {
|
| 51 |
+
return;
|
| 52 |
+
}
|
| 53 |
+
let end = (offset + length).min(self.region_size);
|
| 54 |
+
let first_bucket = offset / self.bucket_size;
|
| 55 |
+
let last_bucket = (end - 1) / self.bucket_size;
|
| 56 |
+
for b in first_bucket..=last_bucket {
|
| 57 |
+
if b < self.buckets.len() {
|
| 58 |
+
self.buckets[b] = self.buckets[b].saturating_add(1);
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/// Return (offset, length) pairs of contiguous bucket runs whose count
|
| 64 |
+
/// is strictly above `threshold`. Adjacent hot buckets are merged into
|
| 65 |
+
/// a single span.
|
| 66 |
+
pub fn get_hot_buckets(&self, threshold: u32) -> Vec<(usize, usize)> {
|
| 67 |
+
let mut result = Vec::new();
|
| 68 |
+
let mut run_start: Option<usize> = None;
|
| 69 |
+
|
| 70 |
+
for (i, &count) in self.buckets.iter().enumerate() {
|
| 71 |
+
if count > threshold {
|
| 72 |
+
if run_start.is_none() {
|
| 73 |
+
run_start = Some(i);
|
| 74 |
+
}
|
| 75 |
+
} else if let Some(start) = run_start.take() {
|
| 76 |
+
let offset = start * self.bucket_size;
|
| 77 |
+
let end = (i * self.bucket_size).min(self.region_size);
|
| 78 |
+
result.push((offset, end - offset));
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
// flush a trailing run
|
| 82 |
+
if let Some(start) = run_start {
|
| 83 |
+
let offset = start * self.bucket_size;
|
| 84 |
+
let end = self.region_size;
|
| 85 |
+
result.push((offset, end - offset));
|
| 86 |
+
}
|
| 87 |
+
result
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
/// Reset all bucket counts to zero.
|
| 91 |
+
pub fn reset(&mut self) {
|
| 92 |
+
for b in self.buckets.iter_mut() {
|
| 93 |
+
*b = 0;
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
// ---------------------------------------------------------------------------
|
| 99 |
+
// HotRange
|
| 100 |
+
// ---------------------------------------------------------------------------
|
| 101 |
+
|
| 102 |
+
/// A decompressed slice that is currently held in RAM ("hot").
|
| 103 |
+
pub struct HotRange {
|
| 104 |
+
pub offset: usize,
|
| 105 |
+
pub length: usize,
|
| 106 |
+
pub data: Vec<u8>, // decompressed bytes for exactly this range
|
| 107 |
+
pub access_count: u32,
|
| 108 |
+
/// Monotonically-increasing epoch counter; bumped on every access.
|
| 109 |
+
/// Used by `compact()` to detect stale ranges.
|
| 110 |
+
last_access_epoch: u64,
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
impl HotRange {
|
| 114 |
+
fn new(offset: usize, data: Vec<u8>, epoch: u64) -> Self {
|
| 115 |
+
let length = data.len();
|
| 116 |
+
Self {
|
| 117 |
+
offset,
|
| 118 |
+
length,
|
| 119 |
+
data,
|
| 120 |
+
access_count: 1,
|
| 121 |
+
last_access_epoch: epoch,
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/// True when [offset, offset+length) fully contains [query_off, query_off+query_len).
|
| 126 |
+
fn covers(&self, query_off: usize, query_len: usize) -> bool {
|
| 127 |
+
query_off >= self.offset && query_off + query_len <= self.offset + self.length
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
/// Slice bytes for [query_off, query_off+query_len) out of this hot range.
|
| 131 |
+
fn slice(&self, query_off: usize, query_len: usize) -> Vec<u8> {
|
| 132 |
+
let rel = query_off - self.offset;
|
| 133 |
+
self.data[rel..rel + query_len].to_vec()
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
// ---------------------------------------------------------------------------
|
| 138 |
+
// SplitRegion
|
| 139 |
+
// ---------------------------------------------------------------------------
|
| 140 |
+
|
| 141 |
+
/// A compressed memory region that may have multiple decompressed hot slices.
|
| 142 |
+
pub struct SplitRegion {
|
| 143 |
+
pub region_id: u32,
|
| 144 |
+
pub total_size: usize,
|
| 145 |
+
compressed_backing: Vec<u8>, // full LZ4 compressed data (size-prepended)
|
| 146 |
+
hot_ranges: Vec<HotRange>, // decompressed hot slices
|
| 147 |
+
heat_map: ByteHeatMap,
|
| 148 |
+
last_compaction_ns: u64,
|
| 149 |
+
/// Epoch counter — incremented on every access to this region.
|
| 150 |
+
access_epoch: u64,
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
impl SplitRegion {
|
| 154 |
+
fn new(region_id: u32, compressed_data: Vec<u8>, original_size: usize) -> Self {
|
| 155 |
+
Self {
|
| 156 |
+
region_id,
|
| 157 |
+
total_size: original_size,
|
| 158 |
+
compressed_backing: compressed_data,
|
| 159 |
+
hot_ranges: Vec::new(),
|
| 160 |
+
heat_map: ByteHeatMap::new(original_size),
|
| 161 |
+
last_compaction_ns: 0,
|
| 162 |
+
access_epoch: 0,
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
/// Fully decompress the backing store and return it.
|
| 167 |
+
fn decompress_full(&self) -> Result<Vec<u8>, String> {
|
| 168 |
+
decompress_size_prepended(&self.compressed_backing)
|
| 169 |
+
.map_err(|e| format!("LZ4 decompression error on region {}: {}", self.region_id, e))
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
/// Hot bytes currently held in RAM (may overlap, counted simply).
|
| 173 |
+
fn hot_bytes(&self) -> usize {
|
| 174 |
+
self.hot_ranges.iter().map(|r| r.length).sum()
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
/// Return bytes at [offset, offset+length) from the fully-decompressed
|
| 178 |
+
/// data, and add a new HotRange for that span.
|
| 179 |
+
fn decompress_and_promote(
|
| 180 |
+
&mut self,
|
| 181 |
+
offset: usize,
|
| 182 |
+
length: usize,
|
| 183 |
+
epoch: u64,
|
| 184 |
+
) -> Option<Vec<u8>> {
|
| 185 |
+
let full = self.decompress_full().ok()?;
|
| 186 |
+
if offset + length > full.len() {
|
| 187 |
+
return None;
|
| 188 |
+
}
|
| 189 |
+
let slice = full[offset..offset + length].to_vec();
|
| 190 |
+
self.hot_ranges.push(HotRange::new(offset, slice.clone(), epoch));
|
| 191 |
+
Some(slice)
|
| 192 |
+
}
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
// ---------------------------------------------------------------------------
|
| 196 |
+
// SparseExtractor
|
| 197 |
+
// ---------------------------------------------------------------------------
|
| 198 |
+
|
| 199 |
+
/// Manages many compressed regions, serving byte-range queries with minimal
|
| 200 |
+
/// decompression and tracking hot slices per region.
|
| 201 |
+
pub struct SparseExtractor {
|
| 202 |
+
regions: HashMap<u32, SplitRegion>,
|
| 203 |
+
compaction_interval_ns: u64, // how often to demote stale hot ranges
|
| 204 |
+
/// Global access epoch — incremented on every extract() call.
|
| 205 |
+
epoch: u64,
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
impl SparseExtractor {
|
| 209 |
+
pub fn new(compaction_interval_ns: u64) -> Self {
|
| 210 |
+
Self {
|
| 211 |
+
regions: HashMap::new(),
|
| 212 |
+
compaction_interval_ns,
|
| 213 |
+
epoch: 0,
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
/// Register a compressed region. `compressed_data` must be an LZ4
|
| 218 |
+
/// frame created with `compress_prepend_size` (so the original length
|
| 219 |
+
/// is embedded in the first 4 bytes).
|
| 220 |
+
pub fn register(&mut self, region_id: u32, compressed_data: Vec<u8>, original_size: usize) {
|
| 221 |
+
self.regions.insert(
|
| 222 |
+
region_id,
|
| 223 |
+
SplitRegion::new(region_id, compressed_data, original_size),
|
| 224 |
+
);
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
/// Record that bytes [offset, offset+length) of `region_id` were accessed.
|
| 228 |
+
/// Updates the heat map. Does NOT decompress anything.
|
| 229 |
+
pub fn record_access(&mut self, region_id: u32, offset: usize, length: usize) {
|
| 230 |
+
if let Some(region) = self.regions.get_mut(®ion_id) {
|
| 231 |
+
region.heat_map.record_access(offset, length);
|
| 232 |
+
}
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
/// Return bytes [offset, offset+length) from `region_id`.
|
| 236 |
+
///
|
| 237 |
+
/// 1. Record the access in the heat map.
|
| 238 |
+
/// 2. Search existing hot ranges for a hit — if found, return directly.
|
| 239 |
+
/// 3. On a miss: decompress the full backing, slice the range, promote
|
| 240 |
+
/// it to a new hot range, return the slice.
|
| 241 |
+
///
|
| 242 |
+
/// Returns `None` if the region does not exist or the range is out of
|
| 243 |
+
/// bounds.
|
| 244 |
+
pub fn extract(&mut self, region_id: u32, offset: usize, length: usize) -> Option<Vec<u8>> {
|
| 245 |
+
self.epoch += 1;
|
| 246 |
+
let epoch = self.epoch;
|
| 247 |
+
|
| 248 |
+
let region = self.regions.get_mut(®ion_id)?;
|
| 249 |
+
region.access_epoch = epoch;
|
| 250 |
+
|
| 251 |
+
// Record heat.
|
| 252 |
+
region.heat_map.record_access(offset, length);
|
| 253 |
+
|
| 254 |
+
// Bounds check.
|
| 255 |
+
if offset + length > region.total_size {
|
| 256 |
+
return None;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// Fast path: already hot.
|
| 260 |
+
for hr in region.hot_ranges.iter_mut() {
|
| 261 |
+
if hr.covers(offset, length) {
|
| 262 |
+
hr.access_count += 1;
|
| 263 |
+
hr.last_access_epoch = epoch;
|
| 264 |
+
return Some(hr.slice(offset, length));
|
| 265 |
+
}
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
// Slow path: decompress and promote.
|
| 269 |
+
region.decompress_and_promote(offset, length, epoch)
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
/// Demote hot ranges that have not been accessed since the previous
|
| 273 |
+
/// compaction pass. Only runs if `now_ns - last_compaction_ns >=
|
| 274 |
+
/// compaction_interval_ns`.
|
| 275 |
+
///
|
| 276 |
+
/// A hot range is considered stale if its `last_access_epoch` is equal
|
| 277 |
+
/// to the epoch that was current at the start of the last compaction —
|
| 278 |
+
/// meaning no access has been recorded since then.
|
| 279 |
+
pub fn compact(&mut self, region_id: u32, now_ns: u64) {
|
| 280 |
+
let interval = self.compaction_interval_ns;
|
| 281 |
+
let current_epoch = self.epoch;
|
| 282 |
+
|
| 283 |
+
if let Some(region) = self.regions.get_mut(®ion_id) {
|
| 284 |
+
if now_ns.saturating_sub(region.last_compaction_ns) < interval {
|
| 285 |
+
return;
|
| 286 |
+
}
|
| 287 |
+
// The epoch watermark we saved at last compaction time is stored
|
| 288 |
+
// implicitly: any hot range whose last_access_epoch < current_epoch
|
| 289 |
+
// at the START of this compaction has not been touched since the
|
| 290 |
+
// last compact call. We demote those.
|
| 291 |
+
//
|
| 292 |
+
// "Not accessed since last compaction" == last_access_epoch was set
|
| 293 |
+
// before this compaction started (i.e. < current_epoch, because
|
| 294 |
+
// every access bumps the global epoch).
|
| 295 |
+
region.hot_ranges.retain(|hr| hr.last_access_epoch >= current_epoch);
|
| 296 |
+
region.last_compaction_ns = now_ns;
|
| 297 |
+
region.heat_map.reset();
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
/// Return `(total_size, hot_bytes, compressed_bytes)` for a region.
|
| 302 |
+
pub fn get_stats(&self, region_id: u32) -> Option<(usize, usize, usize)> {
|
| 303 |
+
let region = self.regions.get(®ion_id)?;
|
| 304 |
+
Some((
|
| 305 |
+
region.total_size,
|
| 306 |
+
region.hot_bytes(),
|
| 307 |
+
region.compressed_backing.len(),
|
| 308 |
+
))
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
/// Remove a region entirely, freeing both compressed backing and hot slices.
|
| 312 |
+
pub fn unregister(&mut self, region_id: u32) {
|
| 313 |
+
self.regions.remove(®ion_id);
|
| 314 |
+
}
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
// ---------------------------------------------------------------------------
|
| 318 |
+
// Tests
|
| 319 |
+
// ---------------------------------------------------------------------------
|
| 320 |
+
|
| 321 |
+
#[cfg(test)]
|
| 322 |
+
mod tests {
|
| 323 |
+
use super::*;
|
| 324 |
+
use lz4_flex::compress_prepend_size;
|
| 325 |
+
|
| 326 |
+
/// Build a deterministic 1 KB payload and compress it.
|
| 327 |
+
fn make_compressed(size: usize) -> (Vec<u8>, Vec<u8>) {
|
| 328 |
+
let data: Vec<u8> = (0..size).map(|i| (i % 251) as u8).collect();
|
| 329 |
+
let compressed = compress_prepend_size(&data);
|
| 330 |
+
(data, compressed)
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// -----------------------------------------------------------------------
|
| 334 |
+
|
| 335 |
+
#[test]
|
| 336 |
+
fn test_sparse_heat_map_tracking() {
|
| 337 |
+
let mut hm = ByteHeatMap::new(1024);
|
| 338 |
+
|
| 339 |
+
// Access three non-overlapping ranges.
|
| 340 |
+
hm.record_access(0, 64); // bucket 0
|
| 341 |
+
hm.record_access(128, 64); // bucket 2
|
| 342 |
+
hm.record_access(512, 128); // buckets 8 & 9
|
| 343 |
+
|
| 344 |
+
// Bucket 0 was hit.
|
| 345 |
+
assert!(hm.buckets[0] > 0, "bucket 0 should be hot");
|
| 346 |
+
// Bucket 1 was NOT hit.
|
| 347 |
+
assert_eq!(hm.buckets[1], 0, "bucket 1 should be cold");
|
| 348 |
+
// Bucket 2 was hit.
|
| 349 |
+
assert!(hm.buckets[2] > 0, "bucket 2 should be hot");
|
| 350 |
+
// Buckets 8 & 9 were hit.
|
| 351 |
+
assert!(hm.buckets[8] > 0, "bucket 8 should be hot");
|
| 352 |
+
assert!(hm.buckets[9] > 0, "bucket 9 should be hot");
|
| 353 |
+
// Bucket 10 was NOT hit.
|
| 354 |
+
assert_eq!(hm.buckets[10], 0, "bucket 10 should be cold");
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
#[test]
|
| 358 |
+
fn test_sparse_hot_range_identification() {
|
| 359 |
+
let mut hm = ByteHeatMap::new(512);
|
| 360 |
+
|
| 361 |
+
// Hit bucket 0 five times — above threshold 3.
|
| 362 |
+
for _ in 0..5 {
|
| 363 |
+
hm.record_access(0, 64);
|
| 364 |
+
}
|
| 365 |
+
// Hit bucket 4 once — below threshold 3.
|
| 366 |
+
hm.record_access(256, 64);
|
| 367 |
+
|
| 368 |
+
let hot = hm.get_hot_buckets(3);
|
| 369 |
+
// Only bucket 0 (offset 0, len 64) qualifies.
|
| 370 |
+
assert_eq!(hot.len(), 1);
|
| 371 |
+
assert_eq!(hot[0], (0, 64));
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
#[test]
|
| 375 |
+
fn test_sparse_extract_cold_promotes() {
|
| 376 |
+
let (original, compressed) = make_compressed(1024);
|
| 377 |
+
let mut sx = SparseExtractor::new(u64::MAX); // never auto-compact
|
| 378 |
+
|
| 379 |
+
sx.register(1, compressed, 1024);
|
| 380 |
+
|
| 381 |
+
// Region is cold — no hot ranges yet.
|
| 382 |
+
let stats_before = sx.get_stats(1).unwrap();
|
| 383 |
+
assert_eq!(stats_before.1, 0, "no hot bytes before first access");
|
| 384 |
+
|
| 385 |
+
// Extract 64 bytes from offset 128.
|
| 386 |
+
let result = sx.extract(1, 128, 64).expect("extract should succeed");
|
| 387 |
+
assert_eq!(result, &original[128..192], "extracted bytes must match original");
|
| 388 |
+
|
| 389 |
+
// Now there should be a hot range.
|
| 390 |
+
let stats_after = sx.get_stats(1).unwrap();
|
| 391 |
+
assert_eq!(stats_after.1, 64, "64 hot bytes after promotion");
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
#[test]
|
| 395 |
+
fn test_sparse_extract_hot_direct() {
|
| 396 |
+
let (original, compressed) = make_compressed(1024);
|
| 397 |
+
let mut sx = SparseExtractor::new(u64::MAX);
|
| 398 |
+
|
| 399 |
+
sx.register(2, compressed, 1024);
|
| 400 |
+
|
| 401 |
+
// First access — promotes the range.
|
| 402 |
+
let first = sx.extract(2, 256, 128).expect("first extract");
|
| 403 |
+
assert_eq!(first, &original[256..384]);
|
| 404 |
+
|
| 405 |
+
// Capture hot_bytes count — should stay the same after the second call.
|
| 406 |
+
let stats_mid = sx.get_stats(2).unwrap();
|
| 407 |
+
|
| 408 |
+
// Second access to the SAME range — must be served from hot range.
|
| 409 |
+
let second = sx.extract(2, 256, 128).expect("second extract");
|
| 410 |
+
assert_eq!(second, first, "hot path must return identical bytes");
|
| 411 |
+
|
| 412 |
+
let stats_after = sx.get_stats(2).unwrap();
|
| 413 |
+
// No new ranges should have been added.
|
| 414 |
+
assert_eq!(stats_mid.1, stats_after.1, "hot bytes must not grow on hot hit");
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
#[test]
|
| 418 |
+
fn test_sparse_compaction_demotes_stale() {
|
| 419 |
+
let (_original, compressed) = make_compressed(1024);
|
| 420 |
+
// Use a very short compaction interval so we can trigger it.
|
| 421 |
+
let mut sx = SparseExtractor::new(1); // 1 ns interval
|
| 422 |
+
|
| 423 |
+
sx.register(3, compressed, 1024);
|
| 424 |
+
|
| 425 |
+
// Promote a range.
|
| 426 |
+
sx.extract(3, 0, 64).expect("first extract");
|
| 427 |
+
let stats = sx.get_stats(3).unwrap();
|
| 428 |
+
assert_eq!(stats.1, 64, "64 hot bytes before compaction");
|
| 429 |
+
|
| 430 |
+
// Compact WITHOUT any new access between promote and compact.
|
| 431 |
+
// The hot range's last_access_epoch == epoch at time of extract (1).
|
| 432 |
+
// current_epoch is also 1, so the condition hr.last_access_epoch >= current_epoch
|
| 433 |
+
// would keep it. We need to do another extract to advance the epoch first,
|
| 434 |
+
// OR compact should use "last_access_epoch < epoch at compact start".
|
| 435 |
+
//
|
| 436 |
+
// Design: compact demotes ranges whose last_access_epoch < current_epoch at
|
| 437 |
+
// compact time. So we must advance the epoch by doing any extract on another
|
| 438 |
+
// region, OR we explicitly advance by extracting on a sub-range that misses
|
| 439 |
+
// so it re-promotes. Simplest: advance epoch via another extract, then compact.
|
| 440 |
+
|
| 441 |
+
// Access a DIFFERENT offset (not covered by existing hot range at 0..64)
|
| 442 |
+
// to advance the global epoch.
|
| 443 |
+
sx.extract(3, 512, 64).expect("second extract — advances epoch");
|
| 444 |
+
|
| 445 |
+
// Now compact. The first hot range (last_access_epoch=1) is stale relative
|
| 446 |
+
// to current_epoch=2; the second (last_access_epoch=2) is fresh.
|
| 447 |
+
sx.compact(3, 1_000_000_000);
|
| 448 |
+
|
| 449 |
+
let stats_after = sx.get_stats(3).unwrap();
|
| 450 |
+
// The first range (offset 0, 64 B) should be gone; the second (offset 512) stays.
|
| 451 |
+
assert_eq!(stats_after.1, 64, "only the recently-accessed range should remain");
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
#[test]
|
| 455 |
+
fn test_sparse_stats_reporting() {
|
| 456 |
+
let (_original, compressed) = make_compressed(2048);
|
| 457 |
+
let compressed_len = compressed.len();
|
| 458 |
+
let mut sx = SparseExtractor::new(u64::MAX);
|
| 459 |
+
|
| 460 |
+
sx.register(4, compressed, 2048);
|
| 461 |
+
|
| 462 |
+
// No hot ranges yet.
|
| 463 |
+
let (total, hot, comp) = sx.get_stats(4).unwrap();
|
| 464 |
+
assert_eq!(total, 2048);
|
| 465 |
+
assert_eq!(hot, 0);
|
| 466 |
+
assert_eq!(comp, compressed_len);
|
| 467 |
+
|
| 468 |
+
// Promote 128 bytes.
|
| 469 |
+
sx.extract(4, 0, 128).unwrap();
|
| 470 |
+
let (total2, hot2, comp2) = sx.get_stats(4).unwrap();
|
| 471 |
+
assert_eq!(total2, 2048);
|
| 472 |
+
assert_eq!(hot2, 128);
|
| 473 |
+
assert_eq!(comp2, compressed_len, "compressed backing must not change");
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
#[test]
|
| 477 |
+
fn test_sparse_unregister() {
|
| 478 |
+
let (_original, compressed) = make_compressed(512);
|
| 479 |
+
let mut sx = SparseExtractor::new(u64::MAX);
|
| 480 |
+
|
| 481 |
+
sx.register(5, compressed, 512);
|
| 482 |
+
assert!(sx.get_stats(5).is_some(), "region should exist before unregister");
|
| 483 |
+
|
| 484 |
+
sx.unregister(5);
|
| 485 |
+
assert!(sx.get_stats(5).is_none(), "region should be gone after unregister");
|
| 486 |
+
assert!(sx.extract(5, 0, 16).is_none(), "extract on removed region returns None");
|
| 487 |
+
}
|
| 488 |
+
}
|
|
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! Gaussian Splat Field Geometry — Block K
|
| 2 |
+
//!
|
| 3 |
+
//! Regions in the thermal field are not points — they are overlapping
|
| 4 |
+
//! Gaussian influence zones. Each splat has a position (size-class
|
| 5 |
+
//! centroid), opacity (temperature), and covariance (how far its
|
| 6 |
+
//! influence radiates). Splats adaptively split when internally diverse
|
| 7 |
+
//! and merge when redundantly similar. A tiled scan prioritises hot
|
| 8 |
+
//! regions so the field evolves efficiently at scale.
|
| 9 |
+
|
| 10 |
+
use std::collections::HashMap;
|
| 11 |
+
|
| 12 |
+
// ---------------------------------------------------------------------------
|
| 13 |
+
// Types
|
| 14 |
+
// ---------------------------------------------------------------------------
|
| 15 |
+
|
| 16 |
+
/// A single Gaussian splat — one managed memory region.
|
| 17 |
+
#[derive(Clone, Debug)]
|
| 18 |
+
pub struct Splat {
|
| 19 |
+
pub id: u32,
|
| 20 |
+
/// Size-class centroid (log-space address / size class index).
|
| 21 |
+
pub position: f64,
|
| 22 |
+
/// Temperature / opacity: 0.0 (cold) → 1.0 (hot).
|
| 23 |
+
pub opacity: f64,
|
| 24 |
+
/// Correlation spread — how far this splat's influence reaches.
|
| 25 |
+
pub covariance: f64,
|
| 26 |
+
/// Total bytes managed by this splat.
|
| 27 |
+
pub mass: usize,
|
| 28 |
+
pub process_id: u32,
|
| 29 |
+
pub access_count: u64,
|
| 30 |
+
/// Child splat IDs when this splat has been split.
|
| 31 |
+
pub child_ids: Vec<u32>,
|
| 32 |
+
/// Parent splat ID when this splat was produced by a merge.
|
| 33 |
+
pub parent_id: Option<u32>,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
/// A tile — a contiguous position-range bucket of splats scanned together.
|
| 37 |
+
#[derive(Clone, Debug)]
|
| 38 |
+
pub struct Tile {
|
| 39 |
+
pub id: u32,
|
| 40 |
+
pub splat_ids: Vec<u32>,
|
| 41 |
+
/// Average opacity of member splats.
|
| 42 |
+
pub heat: f64,
|
| 43 |
+
/// Hot tiles are scanned more often than cold ones.
|
| 44 |
+
pub scan_priority: f64,
|
| 45 |
+
pub last_scan_ns: u64,
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
/// The field: a collection of splats partitioned into tiles.
|
| 49 |
+
pub struct SplatField {
|
| 50 |
+
splats: HashMap<u32, Splat>,
|
| 51 |
+
tiles: Vec<Tile>,
|
| 52 |
+
next_splat_id: u32,
|
| 53 |
+
tile_scan_cursor: usize,
|
| 54 |
+
/// Coefficient-of-variation threshold above which a splat is split.
|
| 55 |
+
split_threshold: f64,
|
| 56 |
+
/// Similarity threshold above which two splats are merged.
|
| 57 |
+
merge_threshold: f64,
|
| 58 |
+
/// Maximum total (opacity × mass) in bytes.
|
| 59 |
+
ram_budget_bytes: usize,
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/// Per-cycle summary produced by [`SplatField::summary`].
|
| 63 |
+
#[derive(Clone, Debug)]
|
| 64 |
+
pub struct SplatSummary {
|
| 65 |
+
pub total_splats: usize,
|
| 66 |
+
pub splits_this_cycle: usize,
|
| 67 |
+
pub merges_this_cycle: usize,
|
| 68 |
+
pub tiles_scanned: usize,
|
| 69 |
+
pub total_opacity: f64,
|
| 70 |
+
pub hottest_splat: Option<(u32, f64)>,
|
| 71 |
+
pub coldest_splat: Option<(u32, f64)>,
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
// ---------------------------------------------------------------------------
|
| 75 |
+
// SplatField implementation
|
| 76 |
+
// ---------------------------------------------------------------------------
|
| 77 |
+
|
| 78 |
+
impl SplatField {
|
| 79 |
+
// -----------------------------------------------------------------------
|
| 80 |
+
// Construction
|
| 81 |
+
// -----------------------------------------------------------------------
|
| 82 |
+
|
| 83 |
+
/// Create a new `SplatField`.
|
| 84 |
+
///
|
| 85 |
+
/// * `ram_budget_bytes` — maximum total weighted energy (opacity × mass).
|
| 86 |
+
/// * `split_threshold` — coefficient of variation above which a splat splits.
|
| 87 |
+
/// * `merge_threshold` — similarity above which two splats merge.
|
| 88 |
+
pub fn new(
|
| 89 |
+
ram_budget_bytes: usize,
|
| 90 |
+
split_threshold: f64,
|
| 91 |
+
merge_threshold: f64,
|
| 92 |
+
) -> Self {
|
| 93 |
+
Self {
|
| 94 |
+
splats: HashMap::new(),
|
| 95 |
+
tiles: Vec::new(),
|
| 96 |
+
next_splat_id: 0,
|
| 97 |
+
tile_scan_cursor: 0,
|
| 98 |
+
split_threshold,
|
| 99 |
+
merge_threshold,
|
| 100 |
+
ram_budget_bytes,
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
// -----------------------------------------------------------------------
|
| 105 |
+
// Splat lifecycle
|
| 106 |
+
// -----------------------------------------------------------------------
|
| 107 |
+
|
| 108 |
+
/// Add a splat to the field and return its assigned ID.
|
| 109 |
+
pub fn add_splat(
|
| 110 |
+
&mut self,
|
| 111 |
+
position: f64,
|
| 112 |
+
opacity: f64,
|
| 113 |
+
covariance: f64,
|
| 114 |
+
mass: usize,
|
| 115 |
+
process_id: u32,
|
| 116 |
+
) -> u32 {
|
| 117 |
+
let id = self.next_splat_id;
|
| 118 |
+
self.next_splat_id += 1;
|
| 119 |
+
self.splats.insert(
|
| 120 |
+
id,
|
| 121 |
+
Splat {
|
| 122 |
+
id,
|
| 123 |
+
position,
|
| 124 |
+
opacity: opacity.clamp(0.0, 1.0),
|
| 125 |
+
covariance,
|
| 126 |
+
mass,
|
| 127 |
+
process_id,
|
| 128 |
+
access_count: 0,
|
| 129 |
+
child_ids: Vec::new(),
|
| 130 |
+
parent_id: None,
|
| 131 |
+
},
|
| 132 |
+
);
|
| 133 |
+
id
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
/// Remove a splat from the field.
|
| 137 |
+
pub fn remove_splat(&mut self, id: u32) {
|
| 138 |
+
self.splats.remove(&id);
|
| 139 |
+
// Purge the id from any tile that still references it.
|
| 140 |
+
for tile in self.tiles.iter_mut() {
|
| 141 |
+
tile.splat_ids.retain(|&s| s != id);
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
// -----------------------------------------------------------------------
|
| 146 |
+
// Access
|
| 147 |
+
// -----------------------------------------------------------------------
|
| 148 |
+
|
| 149 |
+
/// Mark a splat as accessed: push opacity toward 1.0 and increment counter.
|
| 150 |
+
pub fn access(&mut self, id: u32) {
|
| 151 |
+
if let Some(splat) = self.splats.get_mut(&id) {
|
| 152 |
+
// Heat injection: strong enough to overcome per-step decay.
|
| 153 |
+
let heat = 0.5 * (1.0 - splat.opacity) + 0.1;
|
| 154 |
+
splat.opacity = (splat.opacity + heat).min(1.0);
|
| 155 |
+
splat.access_count += 1;
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
// -----------------------------------------------------------------------
|
| 160 |
+
// Gaussian influence
|
| 161 |
+
// -----------------------------------------------------------------------
|
| 162 |
+
|
| 163 |
+
/// Compute the Gaussian influence the source splat exerts on the target.
|
| 164 |
+
///
|
| 165 |
+
/// `influence = opacity_source × exp(-0.5 × ((Δpos / covariance_source)²))`
|
| 166 |
+
///
|
| 167 |
+
/// Returns 0.0 if either splat does not exist or if covariance is zero.
|
| 168 |
+
pub fn compute_influence(&self, source_id: u32, target_id: u32) -> f64 {
|
| 169 |
+
let source = match self.splats.get(&source_id) {
|
| 170 |
+
Some(s) => s,
|
| 171 |
+
None => return 0.0,
|
| 172 |
+
};
|
| 173 |
+
let target = match self.splats.get(&target_id) {
|
| 174 |
+
Some(t) => t,
|
| 175 |
+
None => return 0.0,
|
| 176 |
+
};
|
| 177 |
+
if source.covariance == 0.0 {
|
| 178 |
+
return 0.0;
|
| 179 |
+
}
|
| 180 |
+
let delta = (source.position - target.position) / source.covariance;
|
| 181 |
+
source.opacity * (-0.5 * delta * delta).exp()
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
// -----------------------------------------------------------------------
|
| 185 |
+
// Field evolution
|
| 186 |
+
// -----------------------------------------------------------------------
|
| 187 |
+
|
| 188 |
+
/// Advance the field by one step.
|
| 189 |
+
///
|
| 190 |
+
/// 1. For each splat, accumulate Gaussian-weighted influence from every
|
| 191 |
+
/// other splat (activation = weighted sum).
|
| 192 |
+
/// 2. Apply the Lenia-style Gaussian growth function to that activation.
|
| 193 |
+
/// 3. Apply natural decay (opacity × 0.98).
|
| 194 |
+
/// 4. Enforce mass conservation: if total (opacity × mass) exceeds the RAM
|
| 195 |
+
/// budget, scale all opacities down proportionally.
|
| 196 |
+
pub fn step(&mut self, _dt: f64) {
|
| 197 |
+
// Collect all current splat IDs to avoid borrow issues.
|
| 198 |
+
let ids: Vec<u32> = self.splats.keys().copied().collect();
|
| 199 |
+
|
| 200 |
+
// Phase 1: compute new opacities.
|
| 201 |
+
let mut new_opacities: HashMap<u32, f64> = HashMap::new();
|
| 202 |
+
|
| 203 |
+
for &id in &ids {
|
| 204 |
+
let old_opacity = match self.splats.get(&id) {
|
| 205 |
+
Some(s) => s.opacity,
|
| 206 |
+
None => continue,
|
| 207 |
+
};
|
| 208 |
+
|
| 209 |
+
// Accumulate influence from all other splats.
|
| 210 |
+
let mut activation = 0.0f64;
|
| 211 |
+
for &other_id in &ids {
|
| 212 |
+
if other_id == id {
|
| 213 |
+
continue;
|
| 214 |
+
}
|
| 215 |
+
activation += self.compute_influence(other_id, id);
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
// Growth function: Gaussian bump centred at 0.5, sigma = 0.15.
|
| 219 |
+
// Returns a value in [0, 1]. We treat it as a growth delta.
|
| 220 |
+
let growth = growth_fn(activation);
|
| 221 |
+
|
| 222 |
+
// New opacity: apply growth bump then decay.
|
| 223 |
+
let new_opacity = ((old_opacity + growth * 0.1) * 0.98).clamp(0.0, 1.0);
|
| 224 |
+
new_opacities.insert(id, new_opacity);
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
// Phase 2: write back new opacities.
|
| 228 |
+
for (&id, &new_op) in &new_opacities {
|
| 229 |
+
if let Some(splat) = self.splats.get_mut(&id) {
|
| 230 |
+
splat.opacity = new_op;
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// Phase 3: mass conservation.
|
| 235 |
+
let total_energy: f64 = self
|
| 236 |
+
.splats
|
| 237 |
+
.values()
|
| 238 |
+
.map(|s| s.opacity * s.mass as f64)
|
| 239 |
+
.sum();
|
| 240 |
+
|
| 241 |
+
if total_energy > self.ram_budget_bytes as f64 && total_energy > 0.0 {
|
| 242 |
+
let scale = self.ram_budget_bytes as f64 / total_energy;
|
| 243 |
+
for splat in self.splats.values_mut() {
|
| 244 |
+
splat.opacity = (splat.opacity * scale).clamp(0.0, 1.0);
|
| 245 |
+
}
|
| 246 |
+
}
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
// -----------------------------------------------------------------------
|
| 250 |
+
// Adaptive split / merge
|
| 251 |
+
// -----------------------------------------------------------------------
|
| 252 |
+
|
| 253 |
+
/// Attempt to split a splat into children.
|
| 254 |
+
///
|
| 255 |
+
/// `sub_opacities` is a slice of per-sub-region opacity samples inside the
|
| 256 |
+
/// splat. If the coefficient of variation of those samples exceeds
|
| 257 |
+
/// `split_threshold`, the splat is split into `sub_opacities.len()`
|
| 258 |
+
/// children and their IDs are returned. The parent's `child_ids` are
|
| 259 |
+
/// updated; each child's `parent_id` is set to `None` (they are new roots).
|
| 260 |
+
/// Returns `None` if the splat does not exist, has fewer than two
|
| 261 |
+
/// sub-opacities, or the internal diversity is below the threshold.
|
| 262 |
+
pub fn try_split(&mut self, id: u32, sub_opacities: &[f64]) -> Option<Vec<u32>> {
|
| 263 |
+
if sub_opacities.len() < 2 {
|
| 264 |
+
return None;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
// Read parent data first (immutable borrow).
|
| 268 |
+
let (parent_pos, parent_cov, parent_mass, parent_pid) = {
|
| 269 |
+
let parent = self.splats.get(&id)?;
|
| 270 |
+
(
|
| 271 |
+
parent.position,
|
| 272 |
+
parent.covariance,
|
| 273 |
+
parent.mass,
|
| 274 |
+
parent.process_id,
|
| 275 |
+
)
|
| 276 |
+
};
|
| 277 |
+
|
| 278 |
+
// Compute coefficient of variation.
|
| 279 |
+
let n = sub_opacities.len() as f64;
|
| 280 |
+
let mean: f64 = sub_opacities.iter().sum::<f64>() / n;
|
| 281 |
+
if mean == 0.0 {
|
| 282 |
+
return None;
|
| 283 |
+
}
|
| 284 |
+
let variance: f64 =
|
| 285 |
+
sub_opacities.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
|
| 286 |
+
let cv = variance.sqrt() / mean;
|
| 287 |
+
|
| 288 |
+
if cv <= self.split_threshold {
|
| 289 |
+
return None;
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
// Create one child per sub-region, spread evenly around parent position.
|
| 293 |
+
let spread = parent_cov;
|
| 294 |
+
let n_children = sub_opacities.len();
|
| 295 |
+
let child_mass = parent_mass / n_children.max(1);
|
| 296 |
+
let child_cov = parent_cov / 2.0;
|
| 297 |
+
|
| 298 |
+
let mut child_ids = Vec::with_capacity(n_children);
|
| 299 |
+
for (i, &sub_op) in sub_opacities.iter().enumerate() {
|
| 300 |
+
// Spread children symmetrically around parent position.
|
| 301 |
+
let offset = (i as f64 - (n_children as f64 - 1.0) / 2.0)
|
| 302 |
+
* spread
|
| 303 |
+
/ n_children as f64;
|
| 304 |
+
let child_id = self.next_splat_id;
|
| 305 |
+
self.next_splat_id += 1;
|
| 306 |
+
self.splats.insert(
|
| 307 |
+
child_id,
|
| 308 |
+
Splat {
|
| 309 |
+
id: child_id,
|
| 310 |
+
position: parent_pos + offset,
|
| 311 |
+
opacity: sub_op.clamp(0.0, 1.0),
|
| 312 |
+
covariance: child_cov,
|
| 313 |
+
mass: child_mass,
|
| 314 |
+
process_id: parent_pid,
|
| 315 |
+
access_count: 0,
|
| 316 |
+
child_ids: Vec::new(),
|
| 317 |
+
parent_id: Some(id),
|
| 318 |
+
},
|
| 319 |
+
);
|
| 320 |
+
child_ids.push(child_id);
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
// Update parent's child list.
|
| 324 |
+
if let Some(parent) = self.splats.get_mut(&id) {
|
| 325 |
+
parent.child_ids = child_ids.clone();
|
| 326 |
+
}
|
| 327 |
+
|
| 328 |
+
Some(child_ids)
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
/// Attempt to merge a set of splats into one.
|
| 332 |
+
///
|
| 333 |
+
/// Merges if every pair in `ids` has opacity within 10% of each other
|
| 334 |
+
/// AND the Gaussian influence between all pairs exceeds `merge_threshold`.
|
| 335 |
+
/// Returns the ID of the new merged splat, or `None` if the conditions are
|
| 336 |
+
/// not met or fewer than two IDs are provided.
|
| 337 |
+
pub fn try_merge(&mut self, ids: &[u32]) -> Option<u32> {
|
| 338 |
+
if ids.len() < 2 {
|
| 339 |
+
return None;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
// Gather splat snapshots.
|
| 343 |
+
let splats: Vec<Splat> = ids
|
| 344 |
+
.iter()
|
| 345 |
+
.filter_map(|&id| self.splats.get(&id).cloned())
|
| 346 |
+
.collect();
|
| 347 |
+
|
| 348 |
+
if splats.len() < 2 {
|
| 349 |
+
return None;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
// Check temperature similarity: all opacities within 10% of the mean.
|
| 353 |
+
let mean_opacity: f64 = splats.iter().map(|s| s.opacity).sum::<f64>()
|
| 354 |
+
/ splats.len() as f64;
|
| 355 |
+
let all_similar = splats
|
| 356 |
+
.iter()
|
| 357 |
+
.all(|s| (s.opacity - mean_opacity).abs() <= 0.1);
|
| 358 |
+
if !all_similar {
|
| 359 |
+
return None;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
// Check pairwise Gaussian correlation (use compute_influence proxy):
|
| 363 |
+
// influence between two splats must exceed merge_threshold.
|
| 364 |
+
for i in 0..splats.len() {
|
| 365 |
+
for j in (i + 1)..splats.len() {
|
| 366 |
+
let influence =
|
| 367 |
+
self.compute_influence(splats[i].id, splats[j].id);
|
| 368 |
+
if influence < self.merge_threshold {
|
| 369 |
+
return None;
|
| 370 |
+
}
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
// Build the merged splat.
|
| 375 |
+
let merged_position =
|
| 376 |
+
splats.iter().map(|s| s.position).sum::<f64>() / splats.len() as f64;
|
| 377 |
+
let merged_opacity = mean_opacity;
|
| 378 |
+
let merged_covariance =
|
| 379 |
+
splats.iter().map(|s| s.covariance).sum::<f64>() / splats.len() as f64;
|
| 380 |
+
let merged_mass: usize = splats.iter().map(|s| s.mass).sum();
|
| 381 |
+
let merged_pid = splats[0].process_id;
|
| 382 |
+
let merged_access: u64 = splats.iter().map(|s| s.access_count).sum();
|
| 383 |
+
|
| 384 |
+
let merged_id = self.next_splat_id;
|
| 385 |
+
self.next_splat_id += 1;
|
| 386 |
+
self.splats.insert(
|
| 387 |
+
merged_id,
|
| 388 |
+
Splat {
|
| 389 |
+
id: merged_id,
|
| 390 |
+
position: merged_position,
|
| 391 |
+
opacity: merged_opacity.clamp(0.0, 1.0),
|
| 392 |
+
covariance: merged_covariance,
|
| 393 |
+
mass: merged_mass,
|
| 394 |
+
process_id: merged_pid,
|
| 395 |
+
access_count: merged_access,
|
| 396 |
+
child_ids: Vec::new(),
|
| 397 |
+
parent_id: None,
|
| 398 |
+
},
|
| 399 |
+
);
|
| 400 |
+
|
| 401 |
+
// Remove the source splats.
|
| 402 |
+
for id in ids {
|
| 403 |
+
self.remove_splat(*id);
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
Some(merged_id)
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
// -----------------------------------------------------------------------
|
| 410 |
+
// Tiled scanning
|
| 411 |
+
// -----------------------------------------------------------------------
|
| 412 |
+
|
| 413 |
+
/// Partition all current splats into `num_tiles` tiles by position range.
|
| 414 |
+
///
|
| 415 |
+
/// Tiles are rebuilt from scratch each call. After partitioning, each
|
| 416 |
+
/// tile's `heat` and `scan_priority` are recomputed.
|
| 417 |
+
pub fn partition_tiles(&mut self, num_tiles: usize) {
|
| 418 |
+
if num_tiles == 0 || self.splats.is_empty() {
|
| 419 |
+
self.tiles.clear();
|
| 420 |
+
return;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
// Find position range.
|
| 424 |
+
let min_pos = self
|
| 425 |
+
.splats
|
| 426 |
+
.values()
|
| 427 |
+
.map(|s| s.position)
|
| 428 |
+
.fold(f64::INFINITY, f64::min);
|
| 429 |
+
let max_pos = self
|
| 430 |
+
.splats
|
| 431 |
+
.values()
|
| 432 |
+
.map(|s| s.position)
|
| 433 |
+
.fold(f64::NEG_INFINITY, f64::max);
|
| 434 |
+
|
| 435 |
+
let range = (max_pos - min_pos).max(1e-12);
|
| 436 |
+
let tile_width = range / num_tiles as f64;
|
| 437 |
+
|
| 438 |
+
// Build tiles.
|
| 439 |
+
let mut tiles: Vec<Tile> = (0..num_tiles)
|
| 440 |
+
.map(|i| Tile {
|
| 441 |
+
id: i as u32,
|
| 442 |
+
splat_ids: Vec::new(),
|
| 443 |
+
heat: 0.0,
|
| 444 |
+
scan_priority: 0.0,
|
| 445 |
+
last_scan_ns: 0,
|
| 446 |
+
})
|
| 447 |
+
.collect();
|
| 448 |
+
|
| 449 |
+
for splat in self.splats.values() {
|
| 450 |
+
let idx = ((splat.position - min_pos) / tile_width) as usize;
|
| 451 |
+
let idx = idx.min(num_tiles - 1);
|
| 452 |
+
tiles[idx].splat_ids.push(splat.id);
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
// Compute per-tile heat and scan priority.
|
| 456 |
+
for tile in tiles.iter_mut() {
|
| 457 |
+
if tile.splat_ids.is_empty() {
|
| 458 |
+
tile.heat = 0.0;
|
| 459 |
+
tile.scan_priority = 0.0;
|
| 460 |
+
continue;
|
| 461 |
+
}
|
| 462 |
+
let total_opacity: f64 = tile
|
| 463 |
+
.splat_ids
|
| 464 |
+
.iter()
|
| 465 |
+
.filter_map(|&id| self.splats.get(&id))
|
| 466 |
+
.map(|s| s.opacity)
|
| 467 |
+
.sum();
|
| 468 |
+
tile.heat = total_opacity / tile.splat_ids.len() as f64;
|
| 469 |
+
tile.scan_priority = tile.heat; // hot tiles scan more
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
self.tiles = tiles;
|
| 473 |
+
// Reset cursor so iteration starts from a fresh position.
|
| 474 |
+
self.tile_scan_cursor = 0;
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
/// Advance the round-robin tile cursor and return the next tile to scan.
|
| 478 |
+
///
|
| 479 |
+
/// The cursor is biased toward hot tiles: after returning a tile it bumps
|
| 480 |
+
/// `scan_priority` by 1.0 for hot tiles so they rise to the top of
|
| 481 |
+
/// future natural ordering, but the cursor itself is a simple modular
|
| 482 |
+
/// advance for predictability. `last_scan_ns` is updated on the returned
|
| 483 |
+
/// tile.
|
| 484 |
+
///
|
| 485 |
+
/// Returns `None` if there are no tiles.
|
| 486 |
+
pub fn scan_next_tile(&mut self, now_ns: u64) -> Option<&Tile> {
|
| 487 |
+
if self.tiles.is_empty() {
|
| 488 |
+
return None;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
// Find the tile with the highest scan_priority, using the cursor as a
|
| 492 |
+
// tiebreaker (prefer tiles that haven't been scanned recently in order).
|
| 493 |
+
// This gives hot tiles more frequent visits while still cycling through all.
|
| 494 |
+
let n = self.tiles.len();
|
| 495 |
+
|
| 496 |
+
// Pick the tile with maximum scan_priority; ties broken by cursor order.
|
| 497 |
+
let mut best_idx = self.tile_scan_cursor % n;
|
| 498 |
+
let mut best_priority = self.tiles[best_idx].scan_priority;
|
| 499 |
+
for i in 1..n {
|
| 500 |
+
let idx = (self.tile_scan_cursor + i) % n;
|
| 501 |
+
if self.tiles[idx].scan_priority > best_priority {
|
| 502 |
+
best_priority = self.tiles[idx].scan_priority;
|
| 503 |
+
best_idx = idx;
|
| 504 |
+
}
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
// Update the chosen tile.
|
| 508 |
+
self.tiles[best_idx].last_scan_ns = now_ns;
|
| 509 |
+
// Reduce its scan_priority so it won't monopolise — decay toward heat baseline.
|
| 510 |
+
self.tiles[best_idx].scan_priority =
|
| 511 |
+
self.tiles[best_idx].heat; // reset; will grow again next partition
|
| 512 |
+
|
| 513 |
+
// Advance cursor.
|
| 514 |
+
self.tile_scan_cursor = (best_idx + 1) % n;
|
| 515 |
+
|
| 516 |
+
Some(&self.tiles[best_idx])
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
// -----------------------------------------------------------------------
|
| 520 |
+
// Queries
|
| 521 |
+
// -----------------------------------------------------------------------
|
| 522 |
+
|
| 523 |
+
/// Return IDs of all splats whose opacity is below `threshold`.
|
| 524 |
+
pub fn get_cold_splats(&self, threshold: f64) -> Vec<u32> {
|
| 525 |
+
self.splats
|
| 526 |
+
.values()
|
| 527 |
+
.filter(|s| s.opacity < threshold)
|
| 528 |
+
.map(|s| s.id)
|
| 529 |
+
.collect()
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
/// Return IDs of all splats whose opacity is above `threshold`.
|
| 533 |
+
pub fn get_hot_splats(&self, threshold: f64) -> Vec<u32> {
|
| 534 |
+
self.splats
|
| 535 |
+
.values()
|
| 536 |
+
.filter(|s| s.opacity > threshold)
|
| 537 |
+
.map(|s| s.id)
|
| 538 |
+
.collect()
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
/// Summarise the current field state.
|
| 542 |
+
pub fn summary(&self) -> SplatSummary {
|
| 543 |
+
let total_opacity: f64 = self.splats.values().map(|s| s.opacity).sum();
|
| 544 |
+
|
| 545 |
+
let hottest = self
|
| 546 |
+
.splats
|
| 547 |
+
.values()
|
| 548 |
+
.max_by(|a, b| a.opacity.partial_cmp(&b.opacity).unwrap())
|
| 549 |
+
.map(|s| (s.id, s.opacity));
|
| 550 |
+
|
| 551 |
+
let coldest = self
|
| 552 |
+
.splats
|
| 553 |
+
.values()
|
| 554 |
+
.min_by(|a, b| a.opacity.partial_cmp(&b.opacity).unwrap())
|
| 555 |
+
.map(|s| (s.id, s.opacity));
|
| 556 |
+
|
| 557 |
+
SplatSummary {
|
| 558 |
+
total_splats: self.splats.len(),
|
| 559 |
+
splits_this_cycle: 0, // caller tracks across calls
|
| 560 |
+
merges_this_cycle: 0,
|
| 561 |
+
tiles_scanned: 0,
|
| 562 |
+
total_opacity,
|
| 563 |
+
hottest_splat: hottest,
|
| 564 |
+
coldest_splat: coldest,
|
| 565 |
+
}
|
| 566 |
+
}
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
// ---------------------------------------------------------------------------
|
| 570 |
+
// Internal helpers
|
| 571 |
+
// ---------------------------------------------------------------------------
|
| 572 |
+
|
| 573 |
+
/// Lenia-style Gaussian growth function.
|
| 574 |
+
///
|
| 575 |
+
/// Returns a value in [0, 1]: peaks when `activation` ≈ 0.5, falls toward 0
|
| 576 |
+
/// for very low or very high activation.
|
| 577 |
+
#[inline]
|
| 578 |
+
fn growth_fn(activation: f64) -> f64 {
|
| 579 |
+
let x = (activation - 0.5) / 0.15;
|
| 580 |
+
(-0.5 * x * x).exp()
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
// ---------------------------------------------------------------------------
|
| 584 |
+
// Tests
|
| 585 |
+
// ---------------------------------------------------------------------------
|
| 586 |
+
|
| 587 |
+
#[cfg(test)]
|
| 588 |
+
mod tests {
|
| 589 |
+
use super::*;
|
| 590 |
+
|
| 591 |
+
fn make_field() -> SplatField {
|
| 592 |
+
SplatField::new(
|
| 593 |
+
1_000_000_000, // 1 GB budget — generous for tests
|
| 594 |
+
0.3, // split_threshold: CV > 0.3 → split
|
| 595 |
+
0.05, // merge_threshold: influence > 0.05 → eligible for merge
|
| 596 |
+
)
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
// -----------------------------------------------------------------------
|
| 600 |
+
|
| 601 |
+
#[test]
|
| 602 |
+
fn test_gaussian_influence_falloff() {
|
| 603 |
+
let mut field = make_field();
|
| 604 |
+
|
| 605 |
+
// Source at position 0.0, covariance 1.0, full opacity.
|
| 606 |
+
let src = field.add_splat(0.0, 1.0, 1.0, 1024, 1);
|
| 607 |
+
// Near target: position 0.5
|
| 608 |
+
let near = field.add_splat(0.5, 0.5, 1.0, 1024, 1);
|
| 609 |
+
// Far target: position 5.0
|
| 610 |
+
let far = field.add_splat(5.0, 0.5, 1.0, 1024, 1);
|
| 611 |
+
|
| 612 |
+
let near_inf = field.compute_influence(src, near);
|
| 613 |
+
let far_inf = field.compute_influence(src, far);
|
| 614 |
+
|
| 615 |
+
assert!(
|
| 616 |
+
near_inf > far_inf,
|
| 617 |
+
"Closer target must receive more influence: near={near_inf:.4} far={far_inf:.4}"
|
| 618 |
+
);
|
| 619 |
+
assert!(near_inf > 0.0, "Near influence must be positive");
|
| 620 |
+
assert!(far_inf >= 0.0, "Far influence must be non-negative");
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
// -----------------------------------------------------------------------
|
| 624 |
+
|
| 625 |
+
#[test]
|
| 626 |
+
fn test_mass_conservation() {
|
| 627 |
+
// Tight budget: 100 000 bytes. Five splats each with 50 000-byte mass
|
| 628 |
+
// and opacity 1.0 → total = 250 000 > budget, must be scaled down.
|
| 629 |
+
let mut field = SplatField::new(100_000, 0.5, 0.05);
|
| 630 |
+
|
| 631 |
+
for i in 0..5 {
|
| 632 |
+
field.add_splat(i as f64, 1.0, 1.0, 50_000, 1);
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
field.step(0.1);
|
| 636 |
+
|
| 637 |
+
let total_energy: f64 = field
|
| 638 |
+
.splats
|
| 639 |
+
.values()
|
| 640 |
+
.map(|s| s.opacity * s.mass as f64)
|
| 641 |
+
.sum();
|
| 642 |
+
|
| 643 |
+
assert!(
|
| 644 |
+
total_energy <= 100_000.0 * 1.001, // tiny float tolerance
|
| 645 |
+
"Energy must be within budget after step(): {total_energy:.1}"
|
| 646 |
+
);
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
// -----------------------------------------------------------------------
|
| 650 |
+
|
| 651 |
+
#[test]
|
| 652 |
+
fn test_access_heats_splat() {
|
| 653 |
+
let mut field = make_field();
|
| 654 |
+
let id = field.add_splat(0.0, 0.1, 1.0, 1024, 1);
|
| 655 |
+
|
| 656 |
+
let before = field.splats[&id].opacity;
|
| 657 |
+
field.access(id);
|
| 658 |
+
let after = field.splats[&id].opacity;
|
| 659 |
+
|
| 660 |
+
assert!(
|
| 661 |
+
after > before,
|
| 662 |
+
"Access must raise opacity: {before:.4} → {after:.4}"
|
| 663 |
+
);
|
| 664 |
+
assert_eq!(field.splats[&id].access_count, 1);
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
// -----------------------------------------------------------------------
|
| 668 |
+
|
| 669 |
+
#[test]
|
| 670 |
+
fn test_decay_cools_splat() {
|
| 671 |
+
let mut field = make_field();
|
| 672 |
+
// Start hot; no access; no neighbours.
|
| 673 |
+
let id = field.add_splat(0.0, 1.0, 1.0, 1024, 1);
|
| 674 |
+
|
| 675 |
+
for _ in 0..50 {
|
| 676 |
+
field.step(0.1);
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
let final_opacity = field.splats[&id].opacity;
|
| 680 |
+
assert!(
|
| 681 |
+
final_opacity < 1.0,
|
| 682 |
+
"Splat must cool down over 50 steps without access: opacity={final_opacity:.4}"
|
| 683 |
+
);
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
// -----------------------------------------------------------------------
|
| 687 |
+
|
| 688 |
+
#[test]
|
| 689 |
+
fn test_split_creates_children() {
|
| 690 |
+
let mut field = make_field();
|
| 691 |
+
let parent_id = field.add_splat(5.0, 0.5, 2.0, 8192, 42);
|
| 692 |
+
|
| 693 |
+
// Sub-opacities with high coefficient of variation → forces a split.
|
| 694 |
+
let sub_ops = [0.05, 0.95, 0.1, 0.9];
|
| 695 |
+
let children = field
|
| 696 |
+
.try_split(parent_id, &sub_ops)
|
| 697 |
+
.expect("Split should succeed with high CV");
|
| 698 |
+
|
| 699 |
+
assert_eq!(children.len(), 4, "Should create one child per sub-opacity");
|
| 700 |
+
|
| 701 |
+
// Each child must point back to the parent.
|
| 702 |
+
for &child_id in &children {
|
| 703 |
+
let child = &field.splats[&child_id];
|
| 704 |
+
assert_eq!(
|
| 705 |
+
child.parent_id,
|
| 706 |
+
Some(parent_id),
|
| 707 |
+
"Child {child_id} must reference parent {parent_id}"
|
| 708 |
+
);
|
| 709 |
+
}
|
| 710 |
+
|
| 711 |
+
// Parent must record the children.
|
| 712 |
+
let parent = &field.splats[&parent_id];
|
| 713 |
+
assert_eq!(
|
| 714 |
+
parent.child_ids, children,
|
| 715 |
+
"Parent child_ids must match returned IDs"
|
| 716 |
+
);
|
| 717 |
+
}
|
| 718 |
+
|
| 719 |
+
// -----------------------------------------------------------------------
|
| 720 |
+
|
| 721 |
+
#[test]
|
| 722 |
+
fn test_merge_combines_splats() {
|
| 723 |
+
let mut field = make_field();
|
| 724 |
+
|
| 725 |
+
// Two nearly identical splats at close positions so influence is high.
|
| 726 |
+
let a = field.add_splat(0.0, 0.5, 10.0, 512, 1);
|
| 727 |
+
let b = field.add_splat(0.1, 0.5, 10.0, 512, 1);
|
| 728 |
+
|
| 729 |
+
let merged = field
|
| 730 |
+
.try_merge(&[a, b])
|
| 731 |
+
.expect("Merge should succeed for similar, close splats");
|
| 732 |
+
|
| 733 |
+
// Originals must be gone.
|
| 734 |
+
assert!(
|
| 735 |
+
!field.splats.contains_key(&a),
|
| 736 |
+
"Source splat A must be removed after merge"
|
| 737 |
+
);
|
| 738 |
+
assert!(
|
| 739 |
+
!field.splats.contains_key(&b),
|
| 740 |
+
"Source splat B must be removed after merge"
|
| 741 |
+
);
|
| 742 |
+
|
| 743 |
+
// Merged splat must exist and have combined mass.
|
| 744 |
+
let m = &field.splats[&merged];
|
| 745 |
+
assert_eq!(m.mass, 1024, "Merged mass must be sum of sources");
|
| 746 |
+
assert!(
|
| 747 |
+
(m.opacity - 0.5).abs() < 0.05,
|
| 748 |
+
"Merged opacity must be approximately the mean"
|
| 749 |
+
);
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
// -----------------------------------------------------------------------
|
| 753 |
+
|
| 754 |
+
#[test]
|
| 755 |
+
fn test_tiled_scan_priority() {
|
| 756 |
+
let mut field = make_field();
|
| 757 |
+
|
| 758 |
+
// Cold cluster: positions 0-2, low opacity.
|
| 759 |
+
for i in 0..3 {
|
| 760 |
+
field.add_splat(i as f64, 0.05, 1.0, 512, 1);
|
| 761 |
+
}
|
| 762 |
+
// Hot cluster: positions 10-12, high opacity.
|
| 763 |
+
for i in 0..3 {
|
| 764 |
+
field.add_splat(10.0 + i as f64, 0.95, 1.0, 512, 1);
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
field.partition_tiles(2);
|
| 768 |
+
|
| 769 |
+
assert_eq!(field.tiles.len(), 2, "Should have exactly 2 tiles");
|
| 770 |
+
|
| 771 |
+
// The hot tile should have higher scan_priority.
|
| 772 |
+
let max_priority = field
|
| 773 |
+
.tiles
|
| 774 |
+
.iter()
|
| 775 |
+
.map(|t| t.scan_priority)
|
| 776 |
+
.fold(f64::NEG_INFINITY, f64::max);
|
| 777 |
+
let min_priority = field
|
| 778 |
+
.tiles
|
| 779 |
+
.iter()
|
| 780 |
+
.map(|t| t.scan_priority)
|
| 781 |
+
.fold(f64::INFINITY, f64::min);
|
| 782 |
+
|
| 783 |
+
assert!(
|
| 784 |
+
max_priority > min_priority,
|
| 785 |
+
"Hot tile must have higher priority than cold tile: max={max_priority:.3} min={min_priority:.3}"
|
| 786 |
+
);
|
| 787 |
+
|
| 788 |
+
// Repeatedly scanning must always pick the hot tile first (it has higher
|
| 789 |
+
// initial priority and resets to heat baseline after each scan).
|
| 790 |
+
let first = field.scan_next_tile(1_000).unwrap().clone();
|
| 791 |
+
assert!(
|
| 792 |
+
first.heat > 0.5,
|
| 793 |
+
"First scanned tile should be the hot one: heat={:.3}",
|
| 794 |
+
first.heat
|
| 795 |
+
);
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
// -----------------------------------------------------------------------
|
| 799 |
+
|
| 800 |
+
#[test]
|
| 801 |
+
fn test_cold_hot_identification() {
|
| 802 |
+
let mut field = make_field();
|
| 803 |
+
|
| 804 |
+
// Cold cluster at positions 0-2, hot cluster at positions 100-102.
|
| 805 |
+
// The 100-unit gap with covariance=1.0 makes cross-cluster Gaussian
|
| 806 |
+
// influence vanishingly small (≈ exp(-0.5 × 100²) ≈ 0), so the cold
|
| 807 |
+
// splats cannot be warmed by the hot ones over a handful of steps.
|
| 808 |
+
let c0 = field.add_splat(0.0, 0.05, 1.0, 512, 1);
|
| 809 |
+
let c1 = field.add_splat(1.0, 0.08, 1.0, 512, 1);
|
| 810 |
+
let c2 = field.add_splat(2.0, 0.12, 1.0, 512, 1);
|
| 811 |
+
// Three hot splats well separated from cold cluster.
|
| 812 |
+
let h0 = field.add_splat(100.0, 0.85, 1.0, 512, 1);
|
| 813 |
+
let h1 = field.add_splat(101.0, 0.90, 1.0, 512, 1);
|
| 814 |
+
let h2 = field.add_splat(102.0, 0.95, 1.0, 512, 1);
|
| 815 |
+
|
| 816 |
+
// Evolve a few steps to exercise the pipeline end-to-end.
|
| 817 |
+
for _ in 0..5 {
|
| 818 |
+
field.step(0.1);
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
let cold = field.get_cold_splats(0.2);
|
| 822 |
+
let hot = field.get_hot_splats(0.7);
|
| 823 |
+
|
| 824 |
+
// Original cold set must still be cold.
|
| 825 |
+
for &id in &[c0, c1, c2] {
|
| 826 |
+
assert!(
|
| 827 |
+
cold.contains(&id),
|
| 828 |
+
"Splat {id} should be in the cold list"
|
| 829 |
+
);
|
| 830 |
+
}
|
| 831 |
+
// Original hot set must still be hot.
|
| 832 |
+
for &id in &[h0, h1, h2] {
|
| 833 |
+
assert!(
|
| 834 |
+
hot.contains(&id),
|
| 835 |
+
"Splat {id} should be in the hot list"
|
| 836 |
+
);
|
| 837 |
+
}
|
| 838 |
+
}
|
| 839 |
+
}
|
|
@@ -1,28 +1,9 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Condensate: PyTorch Membrane (v2 — Head-Level Granularity)
|
| 3 |
-
|
| 4 |
-
Hooks into nn.Module forward passes to track activation at TWO levels:
|
| 5 |
-
- Layer level: which modules fire, how strongly
|
| 6 |
-
- Head level: within attention layers, which individual heads contribute
|
| 7 |
-
|
| 8 |
-
This is the key upgrade. Layer-level tracking found a 16.6% floor.
|
| 9 |
-
Head-level tracking sees inside that floor — different inputs activate
|
| 10 |
-
different heads within the same layer. That's where 50%+ savings live.
|
| 11 |
-
|
| 12 |
-
Usage:
|
| 13 |
-
from torch_membrane import TorchMembrane
|
| 14 |
-
|
| 15 |
-
model = AutoModelForCausalLM.from_pretrained("gpt2-large")
|
| 16 |
-
membrane = TorchMembrane(model)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
membrane.print_activation_map() # layer-level summary
|
| 21 |
-
membrane.print_head_map() # head-level detail
|
| 22 |
-
membrane.get_condensation_potential() # layer-level savings
|
| 23 |
-
membrane.get_head_condensation_potential() # head-level savings
|
| 24 |
"""
|
| 25 |
-
|
| 26 |
import time
|
| 27 |
import numpy as np
|
| 28 |
from collections import defaultdict
|
|
@@ -77,7 +58,6 @@ class LayerActivation:
|
|
| 77 |
self.param_bytes = param_bytes
|
| 78 |
self.is_attention = is_attention
|
| 79 |
self.num_heads = num_heads
|
| 80 |
-
# For attention layers, divide params evenly across heads
|
| 81 |
self.per_head_param_bytes = (param_bytes // num_heads) if num_heads > 0 else 0
|
| 82 |
|
| 83 |
def reset(self):
|
|
@@ -91,22 +71,20 @@ class LayerActivation:
|
|
| 91 |
class TorchMembrane:
|
| 92 |
"""Hooks into a PyTorch model to track layer AND head activations.
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
"""
|
| 99 |
|
| 100 |
def __init__(self, model, activation_threshold=0.01):
|
| 101 |
-
self.
|
| 102 |
self.activation_threshold = activation_threshold
|
| 103 |
-
self.layers = {}
|
| 104 |
-
self.heads = {}
|
| 105 |
self._hooks = []
|
| 106 |
-
self._start_time = time.monotonic_ns()
|
| 107 |
self._access_log = []
|
| 108 |
|
| 109 |
-
# Detect model config for head count
|
| 110 |
config = getattr(model, 'config', None)
|
| 111 |
self._default_num_heads = getattr(config, 'n_head',
|
| 112 |
getattr(config, 'num_attention_heads', 0))
|
|
@@ -120,34 +98,22 @@ class TorchMembrane:
|
|
| 120 |
self._install_hooks()
|
| 121 |
|
| 122 |
def _install_hooks(self):
|
| 123 |
-
|
| 124 |
-
import torch
|
| 125 |
-
|
| 126 |
-
for name, module in self.model.named_modules():
|
| 127 |
if name == '':
|
| 128 |
continue
|
| 129 |
|
| 130 |
param_bytes = sum(p.numel() * p.element_size()
|
| 131 |
for p in module.parameters(recurse=False))
|
| 132 |
|
| 133 |
-
# Detect attention layers
|
| 134 |
is_attention = any(kw in name.lower()
|
| 135 |
for kw in ['attn', 'attention', 'self_attn'])
|
| 136 |
|
| 137 |
-
# Detect attention OUTPUT projection specifically — this is where
|
| 138 |
-
# we can decompose by head from the pre-projection tensor
|
| 139 |
-
is_attn_output = is_attention and any(
|
| 140 |
-
kw in name.lower()
|
| 141 |
-
for kw in ['c_proj', 'out_proj', 'o_proj', 'dense']
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
num_heads = 0
|
| 145 |
if is_attention:
|
| 146 |
num_heads = getattr(module, 'num_heads',
|
| 147 |
getattr(module, 'num_attention_heads',
|
| 148 |
self._default_num_heads))
|
| 149 |
|
| 150 |
-
# Register per-head trackers
|
| 151 |
if num_heads > 0:
|
| 152 |
for h in range(num_heads):
|
| 153 |
head_key = f"{name}.head_{h}"
|
|
@@ -167,15 +133,11 @@ class TorchMembrane:
|
|
| 167 |
self._hooks.append(hook)
|
| 168 |
|
| 169 |
def _make_hook(self, name, layer_info):
|
| 170 |
-
"""Create a forward hook that tracks both layer and head activation."""
|
| 171 |
-
import torch
|
| 172 |
-
|
| 173 |
def hook_fn(module, input, output):
|
| 174 |
-
ts = time.
|
| 175 |
layer_info.forward_count += 1
|
| 176 |
layer_info.timestamps_ns.append(ts)
|
| 177 |
|
| 178 |
-
# Compute layer-level output norm
|
| 179 |
out_tensor = None
|
| 180 |
if isinstance(output, torch.Tensor):
|
| 181 |
out_tensor = output
|
|
@@ -193,23 +155,15 @@ class TorchMembrane:
|
|
| 193 |
layer_info.total_activation += norm
|
| 194 |
layer_info.max_activation = max(layer_info.max_activation, norm)
|
| 195 |
|
| 196 |
-
|
| 197 |
-
self._access_log.append((ts,
|
| 198 |
|
| 199 |
-
# Head-level decomposition for attention layers
|
| 200 |
if layer_info.is_attention and layer_info.num_heads > 0 and out_tensor is not None:
|
| 201 |
self._decompose_heads(name, layer_info, out_tensor, ts)
|
| 202 |
|
| 203 |
return hook_fn
|
| 204 |
|
| 205 |
def _decompose_heads(self, name, layer_info, output_tensor, ts):
|
| 206 |
-
"""Decompose attention output into per-head activation norms.
|
| 207 |
-
|
| 208 |
-
For GPT-2 style models, the attention output is (batch, seq, hidden).
|
| 209 |
-
hidden = num_heads * head_dim. We reshape and compute per-head norms.
|
| 210 |
-
"""
|
| 211 |
-
import torch
|
| 212 |
-
|
| 213 |
num_heads = layer_info.num_heads
|
| 214 |
if num_heads <= 0:
|
| 215 |
return
|
|
@@ -217,59 +171,51 @@ class TorchMembrane:
|
|
| 217 |
try:
|
| 218 |
with torch.no_grad():
|
| 219 |
shape = output_tensor.shape
|
| 220 |
-
# Expected: (batch, seq_len, hidden_size) or (batch, seq_len, num_heads * head_dim)
|
| 221 |
if len(shape) < 2:
|
| 222 |
return
|
| 223 |
|
| 224 |
hidden = shape[-1]
|
| 225 |
-
|
| 226 |
-
# Only decompose if hidden is divisible by num_heads
|
| 227 |
if hidden % num_heads != 0:
|
| 228 |
return
|
| 229 |
|
| 230 |
head_dim = hidden // num_heads
|
| 231 |
-
|
| 232 |
-
# Reshape to (batch, seq_len, num_heads, head_dim)
|
| 233 |
reshaped = output_tensor.view(*shape[:-1], num_heads, head_dim)
|
| 234 |
|
| 235 |
-
# Compute per-head norm: norm across (batch, seq_len, head_dim)
|
| 236 |
for h in range(num_heads):
|
| 237 |
head_key = f"{name}.head_{h}"
|
| 238 |
head_tracker = self.heads.get(head_key)
|
| 239 |
if head_tracker:
|
| 240 |
head_norm = reshaped[..., h, :].float().norm().item()
|
| 241 |
head_tracker.record(head_norm)
|
| 242 |
-
|
| 243 |
-
# Record head-level access
|
| 244 |
self._access_log.append((
|
| 245 |
-
ts,
|
| 246 |
layer_info.per_head_param_bytes
|
| 247 |
))
|
| 248 |
|
| 249 |
except (RuntimeError, ValueError):
|
| 250 |
-
# Shape mismatch — skip head decomposition for this layer
|
| 251 |
pass
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
def reset(self):
|
| 254 |
"""Clear all recorded activations."""
|
| 255 |
-
self._start_time = time.monotonic_ns()
|
| 256 |
self._access_log.clear()
|
| 257 |
for layer in self.layers.values():
|
| 258 |
layer.reset()
|
| 259 |
for head in self.heads.values():
|
| 260 |
head.reset()
|
| 261 |
|
| 262 |
-
|
| 263 |
-
"""Remove all forward hooks."""
|
| 264 |
-
for hook in self._hooks:
|
| 265 |
-
hook.remove()
|
| 266 |
-
self._hooks.clear()
|
| 267 |
-
|
| 268 |
-
def to_access_log(self):
|
| 269 |
-
"""Return access log in Membrane-compatible format."""
|
| 270 |
-
return self._access_log
|
| 271 |
-
|
| 272 |
-
# --- Layer-level analysis (same as v1) ---
|
| 273 |
|
| 274 |
def get_activation_map(self):
|
| 275 |
"""Return layer activation summary."""
|
|
@@ -316,7 +262,7 @@ class TorchMembrane:
|
|
| 316 |
"hot_layers": len(activation_map) - len(cold_layers),
|
| 317 |
}
|
| 318 |
|
| 319 |
-
# --- Head-level analysis
|
| 320 |
|
| 321 |
def get_head_map(self):
|
| 322 |
"""Return per-head activation summary for all attention layers."""
|
|
@@ -325,7 +271,6 @@ class TorchMembrane:
|
|
| 325 |
if head.forward_count == 0:
|
| 326 |
continue
|
| 327 |
|
| 328 |
-
# Find the parent layer to get per-head param size
|
| 329 |
parent = self.layers.get(head.layer_name)
|
| 330 |
per_head_bytes = parent.per_head_param_bytes if parent else 0
|
| 331 |
|
|
@@ -362,7 +307,6 @@ class TorchMembrane:
|
|
| 362 |
cold_heads = self.get_cold_heads()
|
| 363 |
cold_bytes = sum(h["param_bytes"] for h in cold_heads)
|
| 364 |
|
| 365 |
-
# Also get non-attention layer data for the full picture
|
| 366 |
non_attn_layers = [l for l in self.get_activation_map()
|
| 367 |
if not l["is_attention"]]
|
| 368 |
cold_non_attn = [l for l in non_attn_layers
|
|
@@ -388,69 +332,3 @@ class TorchMembrane:
|
|
| 388 |
"hot_heads": len(head_map) - len(cold_heads),
|
| 389 |
"cold_non_attn_layers": len(cold_non_attn),
|
| 390 |
}
|
| 391 |
-
|
| 392 |
-
def print_activation_map(self, top_n=30):
|
| 393 |
-
"""Print layer-level activation summary."""
|
| 394 |
-
activation_map = self.get_activation_map()
|
| 395 |
-
potential = self.get_condensation_potential()
|
| 396 |
-
|
| 397 |
-
print(f"\n{'='*70}")
|
| 398 |
-
print(f" CONDENSATE — Layer Activation Map")
|
| 399 |
-
print(f"{'='*70}")
|
| 400 |
-
print(f" Total layers: {potential['total_layers']}")
|
| 401 |
-
print(f" HOT: {potential['hot_layers']} ({potential['hot_mb']:.2f} MB)")
|
| 402 |
-
print(f" COLD: {potential['cold_layers']} ({potential['cold_mb']:.2f} MB)")
|
| 403 |
-
print(f" Layer-level savings: {potential['savings_pct']:.1f}%")
|
| 404 |
-
|
| 405 |
-
print(f"\n {'Layer':<40} {'Fwd':>4} {'AvgAct':>8} {'MB':>6} {'Tier':>5}")
|
| 406 |
-
print(f" {'-'*40} {'-'*4} {'-'*8} {'-'*6} {'-'*5}")
|
| 407 |
-
|
| 408 |
-
for layer in activation_map[:top_n]:
|
| 409 |
-
name = layer['name'] if len(layer['name']) <= 40 else "..." + layer['name'][-37:]
|
| 410 |
-
attn = " [A]" if layer['is_attention'] else ""
|
| 411 |
-
print(f" {name:<40} {layer['forward_count']:>4} "
|
| 412 |
-
f"{layer['avg_activation']:>8.3f} "
|
| 413 |
-
f"{layer['param_mb']:>6.3f} {layer['temperature']:>5}{attn}")
|
| 414 |
-
|
| 415 |
-
print(f"\n{'='*70}\n")
|
| 416 |
-
|
| 417 |
-
def print_head_map(self, top_n=40):
|
| 418 |
-
"""Print head-level activation map."""
|
| 419 |
-
head_map = self.get_head_map()
|
| 420 |
-
head_potential = self.get_head_condensation_potential()
|
| 421 |
-
|
| 422 |
-
print(f"\n{'='*70}")
|
| 423 |
-
print(f" CONDENSATE — Head-Level Activation Map")
|
| 424 |
-
print(f"{'='*70}")
|
| 425 |
-
print(f" Total attention heads: {head_potential['total_heads']}")
|
| 426 |
-
print(f" HOT heads: {head_potential['hot_heads']}")
|
| 427 |
-
print(f" COLD heads: {head_potential['cold_heads']}")
|
| 428 |
-
print(f" Attention params: {head_potential['attn_total_mb']:.2f} MB "
|
| 429 |
-
f"(cold: {head_potential['attn_cold_mb']:.2f} MB)")
|
| 430 |
-
print(f" Non-attention cold: {head_potential['non_attn_cold_mb']:.2f} MB")
|
| 431 |
-
print(f" *** HEAD-LEVEL SAVINGS: {head_potential['savings_pct']:.1f}% "
|
| 432 |
-
f"({head_potential['cold_mb']:.2f} MB) ***")
|
| 433 |
-
|
| 434 |
-
# Show coldest heads
|
| 435 |
-
cold_heads = self.get_cold_heads()
|
| 436 |
-
if cold_heads:
|
| 437 |
-
print(f"\n Coldest heads (bottom 25%):")
|
| 438 |
-
print(f" {'Head':<40} {'Fwd':>4} {'AvgAct':>10} {'MB':>6}")
|
| 439 |
-
print(f" {'-'*40} {'-'*4} {'-'*10} {'-'*6}")
|
| 440 |
-
for h in cold_heads[:top_n]:
|
| 441 |
-
name = h['key'] if len(h['key']) <= 40 else "..." + h['key'][-37:]
|
| 442 |
-
print(f" {name:<40} {h['forward_count']:>4} "
|
| 443 |
-
f"{h['avg_activation']:>10.4f} {h['param_mb']:>6.4f}")
|
| 444 |
-
|
| 445 |
-
# Show hottest heads for comparison
|
| 446 |
-
hot_heads = [h for h in head_map if h['temperature'] == 'HOT']
|
| 447 |
-
if hot_heads:
|
| 448 |
-
print(f"\n Hottest heads (sample):")
|
| 449 |
-
print(f" {'Head':<40} {'Fwd':>4} {'AvgAct':>10} {'MB':>6}")
|
| 450 |
-
print(f" {'-'*40} {'-'*4} {'-'*10} {'-'*6}")
|
| 451 |
-
for h in hot_heads[:10]:
|
| 452 |
-
name = h['key'] if len(h['key']) <= 40 else "..." + h['key'][-37:]
|
| 453 |
-
print(f" {name:<40} {h['forward_count']:>4} "
|
| 454 |
-
f"{h['avg_activation']:>10.4f} {h['param_mb']:>6.4f}")
|
| 455 |
-
|
| 456 |
-
print(f"\n{'='*70}\n")
|
|
|
|
| 1 |
+
"""Condensate Torch Membrane — PyTorch hook-based access tracking.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
Hooks must be Python (PyTorch API). Output is a simple event list
|
| 4 |
+
ready for direct consumption by the Rust pipeline.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
+
import torch
|
| 7 |
import time
|
| 8 |
import numpy as np
|
| 9 |
from collections import defaultdict
|
|
|
|
| 58 |
self.param_bytes = param_bytes
|
| 59 |
self.is_attention = is_attention
|
| 60 |
self.num_heads = num_heads
|
|
|
|
| 61 |
self.per_head_param_bytes = (param_bytes // num_heads) if num_heads > 0 else 0
|
| 62 |
|
| 63 |
def reset(self):
|
|
|
|
| 71 |
class TorchMembrane:
|
| 72 |
"""Hooks into a PyTorch model to track layer AND head activations.
|
| 73 |
|
| 74 |
+
Hooks must be Python (PyTorch API). Output is a simple event list
|
| 75 |
+
ready for direct consumption by the Rust pipeline.
|
| 76 |
+
|
| 77 |
+
get_events() returns (timestamp_ns, path, size_bytes) tuples.
|
| 78 |
"""
|
| 79 |
|
| 80 |
def __init__(self, model, activation_threshold=0.01):
|
| 81 |
+
self._model = model
|
| 82 |
self.activation_threshold = activation_threshold
|
| 83 |
+
self.layers = {}
|
| 84 |
+
self.heads = {}
|
| 85 |
self._hooks = []
|
|
|
|
| 86 |
self._access_log = []
|
| 87 |
|
|
|
|
| 88 |
config = getattr(model, 'config', None)
|
| 89 |
self._default_num_heads = getattr(config, 'n_head',
|
| 90 |
getattr(config, 'num_attention_heads', 0))
|
|
|
|
| 98 |
self._install_hooks()
|
| 99 |
|
| 100 |
def _install_hooks(self):
|
| 101 |
+
for name, module in self._model.named_modules():
|
|
|
|
|
|
|
|
|
|
| 102 |
if name == '':
|
| 103 |
continue
|
| 104 |
|
| 105 |
param_bytes = sum(p.numel() * p.element_size()
|
| 106 |
for p in module.parameters(recurse=False))
|
| 107 |
|
|
|
|
| 108 |
is_attention = any(kw in name.lower()
|
| 109 |
for kw in ['attn', 'attention', 'self_attn'])
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
num_heads = 0
|
| 112 |
if is_attention:
|
| 113 |
num_heads = getattr(module, 'num_heads',
|
| 114 |
getattr(module, 'num_attention_heads',
|
| 115 |
self._default_num_heads))
|
| 116 |
|
|
|
|
| 117 |
if num_heads > 0:
|
| 118 |
for h in range(num_heads):
|
| 119 |
head_key = f"{name}.head_{h}"
|
|
|
|
| 133 |
self._hooks.append(hook)
|
| 134 |
|
| 135 |
def _make_hook(self, name, layer_info):
|
|
|
|
|
|
|
|
|
|
| 136 |
def hook_fn(module, input, output):
|
| 137 |
+
ts = time.time_ns()
|
| 138 |
layer_info.forward_count += 1
|
| 139 |
layer_info.timestamps_ns.append(ts)
|
| 140 |
|
|
|
|
| 141 |
out_tensor = None
|
| 142 |
if isinstance(output, torch.Tensor):
|
| 143 |
out_tensor = output
|
|
|
|
| 155 |
layer_info.total_activation += norm
|
| 156 |
layer_info.max_activation = max(layer_info.max_activation, norm)
|
| 157 |
|
| 158 |
+
size = out_tensor.nelement() * out_tensor.element_size() if out_tensor is not None else layer_info.param_bytes
|
| 159 |
+
self._access_log.append((ts, name, size))
|
| 160 |
|
|
|
|
| 161 |
if layer_info.is_attention and layer_info.num_heads > 0 and out_tensor is not None:
|
| 162 |
self._decompose_heads(name, layer_info, out_tensor, ts)
|
| 163 |
|
| 164 |
return hook_fn
|
| 165 |
|
| 166 |
def _decompose_heads(self, name, layer_info, output_tensor, ts):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
num_heads = layer_info.num_heads
|
| 168 |
if num_heads <= 0:
|
| 169 |
return
|
|
|
|
| 171 |
try:
|
| 172 |
with torch.no_grad():
|
| 173 |
shape = output_tensor.shape
|
|
|
|
| 174 |
if len(shape) < 2:
|
| 175 |
return
|
| 176 |
|
| 177 |
hidden = shape[-1]
|
|
|
|
|
|
|
| 178 |
if hidden % num_heads != 0:
|
| 179 |
return
|
| 180 |
|
| 181 |
head_dim = hidden // num_heads
|
|
|
|
|
|
|
| 182 |
reshaped = output_tensor.view(*shape[:-1], num_heads, head_dim)
|
| 183 |
|
|
|
|
| 184 |
for h in range(num_heads):
|
| 185 |
head_key = f"{name}.head_{h}"
|
| 186 |
head_tracker = self.heads.get(head_key)
|
| 187 |
if head_tracker:
|
| 188 |
head_norm = reshaped[..., h, :].float().norm().item()
|
| 189 |
head_tracker.record(head_norm)
|
|
|
|
|
|
|
| 190 |
self._access_log.append((
|
| 191 |
+
ts, head_key,
|
| 192 |
layer_info.per_head_param_bytes
|
| 193 |
))
|
| 194 |
|
| 195 |
except (RuntimeError, ValueError):
|
|
|
|
| 196 |
pass
|
| 197 |
|
| 198 |
+
def get_events(self):
|
| 199 |
+
"""Return events as list of (timestamp_ns, path, size_bytes) for Rust."""
|
| 200 |
+
return self._access_log
|
| 201 |
+
|
| 202 |
+
def clear(self):
|
| 203 |
+
self._access_log.clear()
|
| 204 |
+
|
| 205 |
+
def remove_hooks(self):
|
| 206 |
+
for h in self._hooks:
|
| 207 |
+
h.remove()
|
| 208 |
+
self._hooks.clear()
|
| 209 |
+
|
| 210 |
def reset(self):
|
| 211 |
"""Clear all recorded activations."""
|
|
|
|
| 212 |
self._access_log.clear()
|
| 213 |
for layer in self.layers.values():
|
| 214 |
layer.reset()
|
| 215 |
for head in self.heads.values():
|
| 216 |
head.reset()
|
| 217 |
|
| 218 |
+
# --- Layer-level analysis ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
def get_activation_map(self):
|
| 221 |
"""Return layer activation summary."""
|
|
|
|
| 262 |
"hot_layers": len(activation_map) - len(cold_layers),
|
| 263 |
}
|
| 264 |
|
| 265 |
+
# --- Head-level analysis ---
|
| 266 |
|
| 267 |
def get_head_map(self):
|
| 268 |
"""Return per-head activation summary for all attention layers."""
|
|
|
|
| 271 |
if head.forward_count == 0:
|
| 272 |
continue
|
| 273 |
|
|
|
|
| 274 |
parent = self.layers.get(head.layer_name)
|
| 275 |
per_head_bytes = parent.per_head_param_bytes if parent else 0
|
| 276 |
|
|
|
|
| 307 |
cold_heads = self.get_cold_heads()
|
| 308 |
cold_bytes = sum(h["param_bytes"] for h in cold_heads)
|
| 309 |
|
|
|
|
| 310 |
non_attn_layers = [l for l in self.get_activation_map()
|
| 311 |
if not l["is_attention"]]
|
| 312 |
cold_non_attn = [l for l in non_attn_layers
|
|
|
|
| 332 |
"hot_heads": len(head_map) - len(cold_heads),
|
| 333 |
"cold_non_attn_layers": len(cold_non_attn),
|
| 334 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|