#!/usr/bin/env python3 """ Substrate Sharding Strategy for Material-Field Governance Cache-Aware Design for 250,000+ Verified States Goal: Keep most relevant substrate anchors in L1 cache (32-64KB) while maintaining sub-microsecond access times and deterministic behavior. Architecture: 1. Hierarchical Sharding: Coarse → Fine-grained locality 2. Semantic Clustering: Group related knowledge domains 3. LRU Cache Management: Keep hot shards resident 4. Deterministic Retrieval: Same query → same shards loaded Reference Implementation - Verhash LLC Patent Priority: January 25, 2026 """ import math from dataclasses import dataclass from typing import List, Tuple, Optional, Dict, Set from collections import OrderedDict import hashlib import struct from deterministic_rng import normal # ============================================================================== # CACHE ARCHITECTURE CONSTANTS # ============================================================================== L1_CACHE_SIZE = 32 * 1024 # 32 KB typical L1 data cache L2_CACHE_SIZE = 256 * 1024 # 256 KB typical L2 cache L3_CACHE_SIZE = 8 * 1024 * 1024 # 8 MB typical L3 cache # Use explicit little-endian packing with fixed sizes for deterministic layout. # Store x/y as float64 to match Python's `float` (C double on CPython). VECTOR_SIZE_BYTES = 8 * 2 # 2 floats (x, y) × 8 bytes each = 16 bytes # Real: 768 floats × 4 bytes = 3KB per vector # L1 budget: Reserve space for working set + shard metadata L1_BUDGET_VECTORS = (L1_CACHE_SIZE // 2) // VECTOR_SIZE_BYTES # ~1000 vectors in L1 L2_BUDGET_VECTORS = (L2_CACHE_SIZE // 2) // VECTOR_SIZE_BYTES # ~8000 vectors in L2 # ============================================================================== # VECTOR REPRESENTATION # ============================================================================== @dataclass class CompactVector: """ Cache-optimized vector representation. In production: - Use float16 instead of float64 (halve memory) - Pack metadata into single 64-bit word - Align to cache line boundaries (64 bytes) """ x: float y: float shard_id: int # Which shard this belongs to domain_hash: int # Semantic domain identifier def to_bytes(self) -> bytes: """Pack into contiguous bytes for cache efficiency""" return struct.pack(' 'CompactVector': """Unpack from contiguous bytes""" x, y, shard_id, domain_hash = struct.unpack(' float: """Euclidean distance""" return math.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2) def dot_product(self, other: 'CompactVector') -> float: """Normalized dot product (cosine similarity)""" self_norm = math.sqrt(self.x ** 2 + self.y ** 2) other_norm = math.sqrt(other.x ** 2 + other.y ** 2) if self_norm == 0 or other_norm == 0: return 0.0 return (self.x * other.x + self.y * other.y) / (self_norm * other_norm) # ============================================================================== # SHARD STRUCTURE # ============================================================================== @dataclass class SubstrateShard: """ A shard is a contiguous block of verified states. Design: - Fixed size (e.g., 64 vectors = 1KB in 2D, 192KB in 768D) - Cache-line aligned - Sorted by locality (clustered semantically) - Immutable after creation (no reallocation) """ shard_id: int domain: str # e.g., "biology", "physics", "geography" centroid: CompactVector # Representative center of this shard vectors: List[CompactVector] # The actual verified states size_bytes: int = 0 # Memory footprint def __post_init__(self): if self.size_bytes == 0: self.size_bytes = len(self.vectors) * VECTOR_SIZE_BYTES def is_l1_resident(self) -> bool: """Can this shard fit in L1?""" return self.size_bytes <= L1_CACHE_SIZE // 4 # Reserve 75% for other data def relevance_score(self, query_vector: CompactVector) -> float: """ Compute relevance of this shard to query. Uses centroid distance as fast approximation. """ return 1.0 / (1.0 + query_vector.distance_to(self.centroid)) # ============================================================================== # HIERARCHICAL SHARDING STRATEGY # ============================================================================== class ShardedSubstrate: """ Hierarchical substrate sharding with cache-aware retrieval. Architecture: Level 1 (L1 Cache): Active shard (~64 vectors) Level 2 (L2 Cache): Hot shards (~8 shards) Level 3 (L3 Cache): Warm shards (~100 shards) Level 4 (Main RAM): All shards (250K vectors / 64 = ~3906 shards) Retrieval Strategy: 1. Hash query to domain (deterministic) 2. Load relevant domain shards into L3 3. Rank by centroid distance 4. Prefetch top-k shards into L2 5. Keep most relevant shard in L1 """ def __init__(self, shard_size: int = 64, l1_capacity: int = 1, # Number of shards in L1 l2_capacity: int = 8, # Number of shards in L2 l3_capacity: int = 100): # Number of shards in L3 self.shard_size = shard_size self.l1_capacity = l1_capacity self.l2_capacity = l2_capacity self.l3_capacity = l3_capacity # Storage hierarchy self.all_shards: Dict[int, SubstrateShard] = {} # All shards (RAM) self.domain_index: Dict[str, List[int]] = {} # Domain → shard IDs # Cache tiers (LRU with size limits) self.l1_cache: OrderedDict[int, SubstrateShard] = OrderedDict() self.l2_cache: OrderedDict[int, SubstrateShard] = OrderedDict() self.l3_cache: OrderedDict[int, SubstrateShard] = OrderedDict() # Statistics self.l1_hits = 0 self.l2_hits = 0 self.l3_hits = 0 self.l1_misses = 0 def add_shard(self, shard: SubstrateShard): """Add a shard to the substrate""" self.all_shards[shard.shard_id] = shard # Update domain index if shard.domain not in self.domain_index: self.domain_index[shard.domain] = [] self.domain_index[shard.domain].append(shard.shard_id) def _get_domain_from_query(self, query_vector: CompactVector) -> str: """ Deterministically map query to semantic domain. In production: - Use learned classifier - Or: hash query vector to domain buckets - Or: use query context/tags """ # Simplified: use domain_hash embedded in query if query_vector.domain_hash % 3 == 0: return "biology" elif query_vector.domain_hash % 3 == 1: return "geography" else: return "physics" def _evict_lru(self, cache: OrderedDict, capacity: int): """Evict least recently used items to maintain capacity""" while len(cache) > capacity: cache.popitem(last=False) # Remove oldest def _promote_to_l1(self, shard_id: int): """Move shard to L1 cache""" shard = self.all_shards[shard_id] # Remove from lower tiers self.l2_cache.pop(shard_id, None) self.l3_cache.pop(shard_id, None) # Add to L1 (most recent) self.l1_cache[shard_id] = shard self.l1_cache.move_to_end(shard_id) # Evict if over capacity self._evict_lru(self.l1_cache, self.l1_capacity) def _promote_to_l2(self, shard_id: int): """Move shard to L2 cache""" shard = self.all_shards[shard_id] # Remove from L3 self.l3_cache.pop(shard_id, None) # Add to L2 self.l2_cache[shard_id] = shard self.l2_cache.move_to_end(shard_id) self._evict_lru(self.l2_cache, self.l2_capacity) def _promote_to_l3(self, shard_id: int): """Move shard to L3 cache""" shard = self.all_shards[shard_id] # Add to L3 self.l3_cache[shard_id] = shard self.l3_cache.move_to_end(shard_id) self._evict_lru(self.l3_cache, self.l3_capacity) def retrieve_relevant_shards(self, query_vector: CompactVector, top_k: int = 8) -> List[SubstrateShard]: """ Retrieve most relevant shards for query. Cache-aware with deterministic retrieval. Strategy: 1. Determine domain (deterministic hash) 2. Get candidate shards from domain 3. Rank by centroid distance 4. Load top-k into L2 5. Return in order of relevance """ # Step 1: Get domain domain = self._get_domain_from_query(query_vector) # Step 2: Get candidate shard IDs from domain if domain not in self.domain_index: return [] # No shards for this domain candidate_ids = self.domain_index[domain] # Step 3: Rank shards by relevance shard_scores = [ (shard_id, self.all_shards[shard_id].relevance_score(query_vector)) for shard_id in candidate_ids ] shard_scores.sort(key=lambda x: x[1], reverse=True) # Step 4: Load top-k shards into cache hierarchy top_shard_ids = [shard_id for shard_id, _ in shard_scores[:top_k]] # Promote most relevant to L1 if top_shard_ids: self._promote_to_l1(top_shard_ids[0]) self.l1_hits += 1 # Promote rest to L2 for shard_id in top_shard_ids[1:]: if shard_id not in self.l2_cache: self._promote_to_l2(shard_id) self.l2_hits += 1 # Return shards in order of relevance return [self.all_shards[sid] for sid in top_shard_ids] def get_l1_vectors(self) -> List[CompactVector]: """ Get all vectors currently in L1 cache. This is the "hot path" for inference. """ vectors = [] for shard in self.l1_cache.values(): vectors.extend(shard.vectors) return vectors def print_stats(self): """Print cache statistics""" print("\n" + "=" * 80) print("SUBSTRATE SHARDING STATISTICS") print("=" * 80) print(f"Total shards: {len(self.all_shards)}") print(f"Total vectors: {sum(len(s.vectors) for s in self.all_shards.values())}") print(f"Domains: {list(self.domain_index.keys())}") print() print(f"L1 Cache ({self.l1_capacity} shards max):") print(f" Current: {len(self.l1_cache)} shards") print(f" Vectors: {sum(len(s.vectors) for s in self.l1_cache.values())}") print(f" Size: {sum(s.size_bytes for s in self.l1_cache.values())} bytes") print(f" Hits: {self.l1_hits}") print() print(f"L2 Cache ({self.l2_capacity} shards max):") print(f" Current: {len(self.l2_cache)} shards") print(f" Vectors: {sum(len(s.vectors) for s in self.l2_cache.values())}") print(f" Hits: {self.l2_hits}") print() print(f"L3 Cache ({self.l3_capacity} shards max):") print(f" Current: {len(self.l3_cache)} shards") print(f" Vectors: {sum(len(s.vectors) for s in self.l3_cache.values())}") print(f" Hits: {self.l3_hits}") print("=" * 80) # ============================================================================== # DEMONSTRATION: 250K VERIFIED STATES # ============================================================================== def create_demo_substrate(num_vectors: int = 250000, shard_size: int = 64) -> ShardedSubstrate: """ Create a demo substrate with 250K verified states. Simulates three domains: biology, geography, physics. """ print(f"Creating substrate with {num_vectors:,} verified states...") print(f"Shard size: {shard_size} vectors") print(f"Expected shards: {num_vectors // shard_size}") substrate = ShardedSubstrate(shard_size=shard_size) domains = ["biology", "geography", "physics"] vectors_per_domain = num_vectors // len(domains) shard_id = 0 for domain_idx, domain in enumerate(domains): print(f"\nGenerating {vectors_per_domain:,} vectors for domain: {domain}") # Generate vectors clustered around domain centroid # Biology: centered around (0.8, 0.8) # Geography: centered around (0.5, 0.5) # Physics: centered around (0.2, 0.2) if domain == "biology": center_x, center_y = 0.8, 0.8 elif domain == "geography": center_x, center_y = 0.5, 0.5 else: center_x, center_y = 0.2, 0.2 domain_hash = hashlib.sha256(domain.encode()).digest()[:8] domain_hash_int = int.from_bytes(domain_hash, 'big') domain_seed = b"demo_substrate|" + domain.encode("utf-8") # Create shards for this domain for i in range(0, vectors_per_domain, shard_size): vectors = [] for j in range(shard_size): if i + j >= vectors_per_domain: break # Generate vector with small random offset global_idx = i + j x = center_x + normal(domain_seed, global_idx * 4, mean=0.0, std=0.1) y = center_y + normal(domain_seed, global_idx * 4 + 2, mean=0.0, std=0.1) vec = CompactVector( x=x, y=y, shard_id=shard_id, domain_hash=domain_hash_int ) vectors.append(vec) # Compute centroid if vectors: centroid_x = sum(v.x for v in vectors) / len(vectors) centroid_y = sum(v.y for v in vectors) / len(vectors) centroid = CompactVector(centroid_x, centroid_y, shard_id, domain_hash_int) shard = SubstrateShard( shard_id=shard_id, domain=domain, centroid=centroid, vectors=vectors ) substrate.add_shard(shard) shard_id += 1 print(f"\nSubstrate created: {len(substrate.all_shards)} shards") return substrate def demo_query_performance(): """ Demonstrate query performance with sharded substrate. """ print(""" ╔══════════════════════════════════════════════════════════════════════════════╗ ║ ║ ║ Substrate Sharding Demonstration ║ ║ ║ ║ Exercises deterministic sharding and cache residency over ║ ║ a large verified substrate. Illustrates how relevance ║ ║ ranking and promotion constrain active context. ║ ║ ║ ╚══════════════════════════════════════════════════════════════════════════════╝ """) # Create substrate substrate = create_demo_substrate(num_vectors=250000, shard_size=64) substrate.print_stats() # Simulate queries from different domains print("\n" + "=" * 80) print("QUERY SIMULATION") print("=" * 80) # Biology query print("\nQuery 1: Biology domain (photosynthesis)") bio_query = CompactVector(x=0.82, y=0.78, shard_id=-1, domain_hash=0) relevant_shards = substrate.retrieve_relevant_shards(bio_query, top_k=8) print(f" Retrieved {len(relevant_shards)} relevant shards") print(f" L1 vectors available: {len(substrate.get_l1_vectors())}") print(f" Most relevant shard: {relevant_shards[0].shard_id} (domain: {relevant_shards[0].domain})") # Geography query print("\nQuery 2: Geography domain (capitals)") geo_query = CompactVector(x=0.48, y=0.52, shard_id=-1, domain_hash=1) relevant_shards = substrate.retrieve_relevant_shards(geo_query, top_k=8) print(f" Retrieved {len(relevant_shards)} relevant shards") print(f" L1 vectors available: {len(substrate.get_l1_vectors())}") print(f" Most relevant shard: {relevant_shards[0].shard_id} (domain: {relevant_shards[0].domain})") # Physics query print("\nQuery 3: Physics domain (mechanics)") phys_query = CompactVector(x=0.18, y=0.22, shard_id=-1, domain_hash=2) relevant_shards = substrate.retrieve_relevant_shards(phys_query, top_k=8) print(f" Retrieved {len(relevant_shards)} relevant shards") print(f" L1 vectors available: {len(substrate.get_l1_vectors())}") print(f" Most relevant shard: {relevant_shards[0].shard_id} (domain: {relevant_shards[0].domain})") # Print final stats substrate.print_stats() # Memory footprint analysis print("\n" + "=" * 80) print("MEMORY FOOTPRINT ANALYSIS") print("=" * 80) l1_size = sum(s.size_bytes for s in substrate.l1_cache.values()) l2_size = sum(s.size_bytes for s in substrate.l2_cache.values()) total_size = sum(s.size_bytes for s in substrate.all_shards.values()) print(f"L1 Cache: {l1_size:,} bytes ({l1_size/1024:.2f} KB)") print(f" Fits in L1? {l1_size <= L1_CACHE_SIZE}") print(f" L1 capacity: {L1_CACHE_SIZE:,} bytes ({L1_CACHE_SIZE/1024:.2f} KB)") print() print(f"L2 Cache: {l2_size:,} bytes ({l2_size/1024:.2f} KB)") print(f" Fits in L2? {l2_size <= L2_CACHE_SIZE}") print(f" L2 capacity: {L2_CACHE_SIZE:,} bytes ({L2_CACHE_SIZE/1024:.2f} KB)") print() print(f"Total Substrate: {total_size:,} bytes ({total_size/1024/1024:.2f} MB)") print() print(f"Cache efficiency: {(l1_size + l2_size) / total_size * 100:.2f}% in fast cache") print("\n" + "=" * 80) print("KEY INSIGHTS") print("=" * 80) print(""" 1. CACHE RESIDENCY: - L1 holds 1 shard (64 vectors) = most relevant for current query - L2 holds 8 shards (512 vectors) = domain-relevant context - Working set: 576 vectors in fast cache (< 10 KB in 2D) 2. RETRIEVAL STRATEGY: - Deterministic domain mapping (hash-based) - Centroid-based relevance ranking - LRU eviction maintains hot shards 3. SCALING TO 768D: - 768 floats × 4 bytes = 3 KB per vector - L1: 64 vectors × 3 KB = 192 KB (fits in L2, not L1) - Strategy: Reduce L1 capacity to 10-16 vectors for true L1 residence 4. PRODUCTION OPTIMIZATIONS: - Use float16 (half precision) → 1.5 KB per vector - Quantize to int8 → 768 bytes per vector - Product quantization → 64-128 bytes per vector - With PQ: L1 can hold 256-512 vectors (16-32 KB) 5. INFERENCE PATH: - Query arrives → Hash to domain → Load L2 shards - Rank by centroid → Promote top shard to L1 - Inference runs on L1-resident vectors only - Sub-microsecond retrieval, deterministic, cache-friendly """) if __name__ == "__main__": demo_query_performance()