1990two
/

memory_forest

+#############################################################################################################################################
+#||||- - - |6.25.2025| - - -                              ||   MEMORY FOREST   ||                         - - - |memory_forest.py| - - -||||#
+#############################################################################################################################################
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+from collections import defaultdict, deque
+from typing import List, Dict, Tuple, Optional
+SAFE_MIN = -1e6
+SAFE_MAX = 1e6
+EPS = 1e-8
+#||||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𓅸 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||||#
+def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
+    tensor = torch.where(torch.isnan(tensor), torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype), tensor)
+    tensor = torch.where(torch.isinf(tensor), torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype), tensor)
+    return torch.clamp(tensor, min_val, max_val)
+def safe_cosine_similarity(a, b, dim=-1, eps=EPS):
+    if a.dtype != torch.float32:
+        a = a.float()
+    if b.dtype != torch.float32:
+        b = b.float()
+    a_norm = torch.norm(a, dim=dim, keepdim=True).clamp(min=eps)
+    b_norm = torch.norm(b, dim=dim, keepdim=True).clamp(min=eps)
+    return torch.sum(a * b, dim=dim, keepdim=True) / (a_norm * b_norm)
+#############################################################################################################################################
+###################################################- - -   ASSOCIATIVE HASH BUCKET   - - -###################################################
+class AssociativeHashBucket(nn.Module):
+    def __init__(self, bucket_size=64, embedding_dim=128, num_hash_functions=4):
+        super().__init__()
+        self.bucket_size = bucket_size
+        self.embedding_dim = embedding_dim
+        self.num_hash_functions = num_hash_functions
+        self.hash_projections = nn.ModuleList([
+            nn.Linear(embedding_dim, 1, bias=True) for _ in range(num_hash_functions)
+        ])
+        self.register_buffer('stored_items', torch.zeros(bucket_size, embedding_dim))
+        self.register_buffer('item_hashes', torch.zeros(bucket_size, num_hash_functions))
+        self.register_buffer('occupancy', torch.zeros(bucket_size, dtype=torch.bool))
+        self.register_buffer('access_counts', torch.zeros(bucket_size))
+        self.similarity_threshold = nn.Parameter(torch.tensor(0.7))
+        self.decay_rate = nn.Parameter(torch.tensor(0.99))
+        self.storage_pointer = 0
+    def compute_hash_signature(self, item_embedding):
+        x = item_embedding
+        if x.dim() == 1:
+            x = x.unsqueeze(0)
+        signatures = []
+        for hash_proj in self.hash_projections:
+            sig = torch.tanh(hash_proj(x)).squeeze(-1)  # (B,)
+            signatures.append(sig)
+        sigs = torch.stack(signatures, dim=-1)  # (B, num_hash)
+        return sigs.squeeze(0)
+    def store_item(self, item_embedding, item_id=None):
+        if item_embedding.dim() == 1:
+            item_embedding = item_embedding.unsqueeze(0)
+        batch_size = item_embedding.shape[0]
+        stored_items = []
+        for i in range(batch_size):
+            embedding = item_embedding[i]
+            hash_sig = self.compute_hash_signature(embedding)
+            if self.occupancy.any():
+                similarities = safe_cosine_similarity(
+                    embedding.unsqueeze(0),
+                    self.stored_items[self.occupancy],
+                    dim=-1
+                ).squeeze()
+                threshold = torch.clamp(self.similarity_threshold, 0.1, 0.95)
+                if similarities.numel() > 0 and similarities.max() > threshold:
+                    best_idx = self.occupancy.nonzero(as_tuple=False)[similarities.argmax()]
+                    self.stored_items[best_idx] = 0.9 * self.stored_items[best_idx] + 0.1 * embedding
+                    self.access_counts[best_idx] += 1
+                    stored_items.append(int(best_idx.item()))
+                    continue
+            if self.storage_pointer >= self.bucket_size:
+                if self.occupancy.any():
+                    rel_idx = self.access_counts[self.occupancy].argmin()
+                    evict_idx = self.occupancy.nonzero(as_tuple=False)[rel_idx]
+                else:
+                    evict_idx = torch.tensor(0)
+            else:
+                evict_idx = torch.tensor(self.storage_pointer)
+                self.storage_pointer = min(self.storage_pointer + 1, self.bucket_size)
+            self.stored_items[evict_idx] = embedding
+            self.item_hashes[evict_idx] = hash_sig.squeeze()
+            self.occupancy[evict_idx] = True
+            self.access_counts[evict_idx] = 1
+            stored_items.append(int(evict_idx.item()))
+        return stored_items
+    def retrieve_similar(self, query_embedding, top_k=5):
+        if query_embedding.dim() == 1:
+            query_embedding = query_embedding.unsqueeze(0)
+        if not self.occupancy.any():
+            return [], []
+        valid_items = self.stored_items[self.occupancy]
+        valid_indices = self.occupancy.nonzero(as_tuple=False).squeeze(-1)
+        if valid_items.numel() == 0:
+            return [], []
+        similarities = safe_cosine_similarity(
+            query_embedding.expand(valid_items.shape[0], -1),
+            valid_items,
+            dim=-1
+        ).squeeze(-1)  # (N,)
+        if similarities.numel() == 0:
+            return [], []
+        k = min(top_k, similarities.size(0))
+        top_sims, top_indices = torch.topk(similarities, k)
+        retrieved_items = valid_items[top_indices]
+        retrieved_indices = valid_indices[top_indices]
+        for idx in retrieved_indices:
+            self.access_counts[idx] += 1
+        return retrieved_items, top_sims
+    def get_bucket_stats(self):
+        return {
+            'occupancy_rate': self.occupancy.float().mean().item(),
+            'total_accesses': self.access_counts.sum().item(),
+            'avg_similarity': self.similarity_threshold.item(),
+            'storage_pointer': self.storage_pointer
+        }
+###########################################################################################################################################
+################################################- - -   MEMORY DECISION TREE   - - -#######################################################
+class MemoryDecisionTree(nn.Module):
+    def __init__(self, input_dim, max_depth=6, min_samples_split=2):
+        super().__init__()
+        self.input_dim = input_dim
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        max_nodes = 2**max_depth - 1
+        self.split_weights = nn.Parameter(torch.randn(max_nodes, input_dim) * 0.1)
+        self.split_biases = nn.Parameter(torch.zeros(max_nodes))
+        self.split_temperatures = nn.Parameter(torch.ones(max_nodes))
+        with torch.no_grad():
+            self.split_temperatures.data.mul_(0.6)
+            self.split_biases.data.add_(0.01 * torch.randn_like(self.split_biases))
+        self.register_buffer('node_active', torch.zeros(max_nodes, dtype=torch.bool))
+        self.register_buffer('node_samples', torch.zeros(max_nodes))
+        self.leaf_to_bucket = {}
+        self.bucket_to_leaves = defaultdict(list)
+        self.node_active[0] = True
+    def get_node_split(self, node_idx, x):
+        if node_idx >= len(self.split_weights):
+            return torch.zeros(x.shape[0], device=x.device)
+        weights = self.split_weights[node_idx]
+        bias = self.split_biases[node_idx]
+        temp = torch.clamp(self.split_temperatures[node_idx], 0.1, 10.0)
+        split_score = torch.matmul(x, weights) + bias
+        split_prob = torch.sigmoid(split_score / temp)
+        return split_prob
+    def route_to_leaf(self, x, deterministic=False):
+        batch_size = x.shape[0]
+        device = x.device
+        current_nodes = torch.zeros(batch_size, dtype=torch.long, device=device)
+        paths = torch.zeros(batch_size, self.max_depth, dtype=torch.long, device=device)
+        for depth in range(self.max_depth - 1):
+            split_probs = torch.zeros(batch_size, device=device)
+            for i in range(batch_size):
+                node_idx = int(current_nodes[i].item())
+                if self.node_active[node_idx]:
+                    split_probs[i] = self.get_node_split(node_idx, x[i:i+1]).squeeze()
+            if deterministic:
+                go_right = (split_probs > 0.5).long()
+            else:
+                go_right = torch.bernoulli(split_probs).long()
+            paths[:, depth] = go_right
+            current_nodes = current_nodes * 2 + 1 + go_right
+        return current_nodes, paths
+    def assign_leaf_to_bucket(self, leaf_idx, bucket_idx):
+        self.leaf_to_bucket[int(leaf_idx.item())] = int(bucket_idx)
+        self.bucket_to_leaves[int(bucket_idx)].append(int(leaf_idx.item()))
+    def get_bucket_for_input(self, x, deterministic=True):
+        leaf_nodes, _ = self.route_to_leaf(x, deterministic=deterministic)
+        bucket_assignments = []
+        for leaf in leaf_nodes:
+            bucket_idx = self.leaf_to_bucket.get(int(leaf.item()), 0)
+            bucket_assignments.append(bucket_idx)
+        return torch.tensor(bucket_assignments, device=x.device)
+    def update_node_statistics(self, x, rewards):
+        leaf_nodes, paths = self.route_to_leaf(x, deterministic=True)
+        for i in range(x.shape[0]):
+            current_node = 0
+            reward = rewards[i].item() if torch.is_tensor(rewards[i]) else rewards[i]
+            for depth in range(self.max_depth - 1):
+                if current_node < len(self.node_samples):
+                    self.node_samples[current_node] += 1
+                    self.node_active[current_node] = True
+                    if reward > 0.5:
+                        direction = paths[i, depth]
+                        if direction == 1:
+                            self.split_biases.data[current_node] += 0.01
+                        else:
+                            self.split_biases.data[current_node] -= 0.01
+                direction = paths[i, depth] if depth < paths.shape[1] else 0
+                current_node = current_node * 2 + 1 + int(direction.item())
+                if current_node >= 2**self.max_depth - 1:
+                    break
+###########################################################################################################################################
+##################################################- - -   MEMORY FOREST   - - -############################################################
+class MemoryForest(nn.Module):
+    def __init__(self, input_dim, num_trees=5, max_depth=6, bucket_size=64, embedding_dim=128):
+        super().__init__()
+        self.input_dim = input_dim
+        self.num_trees = num_trees
+        self.embedding_dim = embedding_dim
+        self.trees = nn.ModuleList([
+            MemoryDecisionTree(input_dim, max_depth) for _ in range(num_trees)
+        ])
+        self.num_buckets = num_trees * (2**max_depth)
+        self.buckets = nn.ModuleList([
+            AssociativeHashBucket(bucket_size, embedding_dim) for _ in range(self.num_buckets)
+        ])
+        self.feature_encoder = nn.Sequential(
+            nn.Linear(input_dim, embedding_dim),
+            nn.LayerNorm(embedding_dim),
+            nn.ReLU(),
+            nn.Linear(embedding_dim, embedding_dim)
+        )
+        self._initialize_bucket_assignments()
+    def _initialize_bucket_assignments(self):
+        bucket_idx = 0
+        for tree_idx, tree in enumerate(self.trees):
+            start_leaf = 2**(tree.max_depth - 1) - 1
+            end_leaf = 2**tree.max_depth - 2
+            for leaf in range(start_leaf, end_leaf + 1):
+                if bucket_idx < self.num_buckets:
+                    tree.assign_leaf_to_bucket(torch.tensor(leaf), bucket_idx)
+                    bucket_idx += 1
+    def store(self, features, items=None):
+        if items is None:
+            items = features
+        embeddings = self.feature_encoder(features)
+        storage_results = []
+        for tree in self.trees:
+            bucket_assignments = tree.get_bucket_for_input(features, deterministic=False)
+            for i, b_idx in enumerate(bucket_assignments.tolist()):
+                if b_idx < len(self.buckets):
+                    stored_idx = self.buckets[b_idx].store_item(embeddings[i])
+                    storage_results.append((b_idx, stored_idx))
+        return storage_results
+    def retrieve(self, query_features, top_k=5):
+        query_embeddings = self.feature_encoder(query_features)
+        bucket_votes = defaultdict(list)
+        for tree in self.trees:
+            bucket_assignments = tree.get_bucket_for_input(query_features, deterministic=True)
+            for i, b_idx in enumerate(bucket_assignments.tolist()):
+                if b_idx < len(self.buckets):
+                    retrieved_items, similarities = self.buckets[b_idx].retrieve_similar(
+                        query_embeddings[i], top_k=top_k
+                    )
+                    if len(retrieved_items) > 0:
+                        float_sims = similarities.detach().cpu().tolist()
+                        for itm, sim_t, sim_f in zip(retrieved_items, similarities, float_sims):
+                            bucket_votes[i].append((itm, sim_f, sim_t))
+        final_results = []
+        for query_idx in range(query_features.shape[0]):
+            if query_idx in bucket_votes and len(bucket_votes[query_idx]) > 0:
+                candidates = bucket_votes[query_idx]
+                candidates.sort(key=lambda x: x[1], reverse=True)
+                top_candidates = candidates[:top_k]
+                items = [c[0] for c in top_candidates]
+                sims_t = [c[2] for c in top_candidates]
+                final_results.append((torch.stack(items), torch.stack(sims_t)))
+            else:
+                final_results.append((torch.tensor([]), torch.tensor([])))
+        return final_results
+    def update_routing(self, features, retrieval_success):
+        for tree in self.trees:
+            tree.update_node_statistics(features, retrieval_success)
+    def get_forest_stats(self):
+        stats = {
+            'num_trees': self.num_trees,
+            'num_buckets': self.num_buckets,
+            'bucket_stats': [],
+            'tree_stats': []
+        }
+        for i, bucket in enumerate(self.buckets):
+            bucket_stat = bucket.get_bucket_stats()
+            bucket_stat['bucket_id'] = i
+            stats['bucket_stats'].append(bucket_stat)
+        for i, tree in enumerate(self.trees):
+            tree_stat = {
+                'tree_id': i,
+                'active_nodes': tree.node_active.sum().item(),
+                'total_samples': tree.node_samples.sum().item(),
+                'max_depth': tree.max_depth
+            }
+            stats['tree_stats'].append(tree_stat)
+        return stats
+    def forward(self, features, items=None, mode='store'):
+        if mode == 'store':
+            return self.store(features, items)
+        elif mode == 'retrieve':
+            return self.retrieve(features)
+        else:
+            raise ValueError("Mode must be 'store' or 'retrieve'")

memory_forest_docs.py ADDED Viewed

	@@ -0,0 +1,1020 @@

+##############################################################################################################################################
+#||||- - - |6.25.2025| - - -                              ||   MEMORY FOREST   ||                                  - - - |1990two| - - -|||| #
+##############################################################################################################################################
+"""
+Mathematical Foundation & Conceptual Documentation
+-------------------------------------------------
+CORE PRINCIPLE:
+Combines decision tree routing with associative hash buckets to create scalable
+memory systems that learn optimal organization patterns. Instead of searching
+all memory linearly, learned decision trees route queries to relevant memory
+buckets, creating hierarchical, adaptive memory organization.
+MATHEMATICAL FOUNDATION:
+=======================
+1. DECISION TREE ROUTING:
+   Split Function: s(x, θ) = σ((w·x + b)/τ)
+   Where:
+   - x: input feature vector
+   - w, b: learnable split parameters
+   - τ: temperature parameter (controls split sharpness)
+   - σ: sigmoid function
+   - s(x,θ) ∈ [0,1]: routing probability (left vs right)
+2. HIERARCHICAL ROUTING:
+   Path to leaf: p = [s₁, s₂, ..., s_{d-1}] for depth d
+   Leaf index: L(x) = Σᵢ sᵢ × 2^i (binary path encoding)
+   Bucket assignment: B(x) = TreeToBucket[L(x)]
+3. ASSOCIATIVE MEMORY OPERATIONS:
+   Hash Functions: h_k(x) = tanh(W_k·x + b_k) for k = 1..K
+   Hash Signature: H(x) = [h₁(x), h₂(x), ..., h_K(x)]
+   Similarity: sim(x,y) = cosine(H(x), H(y))
+4. MEMORY STORAGE:
+   Storage Condition: sim(x, stored) < θ_similarity
+   Eviction Policy: LRU based on access_count[i]
+   Update Rule: x_stored ← α·x_stored + (1-α)·x_new for similar items
+5. ENSEMBLE RETRIEVAL:
+   Tree Votes: V_t(x) = {items from bucket B_t(x)}
+   Similarity Scores: S(q,i) = cosine_similarity(q, i)
+   Final Ranking: rank = argmax_i Σ_t w_t × S(q,i) × I(i ∈ V_t)
+   Where w_t are tree importance weights.
+6. ADAPTIVE LEARNING:
+   Success Feedback: R(query, retrieval) ∈ [0,1]
+   Tree Update: θ_t ← θ_t + η·∇θ log P(correct_path|R)
+   Split Reinforcement: bias_node ← bias_node + α·sign(R - 0.5)
+CONCEPTUAL REASONING:
+====================
+WHY DECISION TREES + HASH BUCKETS?
+- Linear search over large memories is O(n) - doesn't scale
+- Fixed hash functions don't adapt to data distribution
+- Decision trees provide hierarchical, learned routing (O(log n))
+- Hash buckets enable efficient similarity-based storage/retrieval
+- Combination creates adaptive, scalable associative memory
+KEY INNOVATIONS:
+1. **Learned Routing**: Decision trees adapt splits based on retrieval success
+2. **Hierarchical Organization**: Multi-level memory structure (trees → buckets → items)
+3. **Ensemble Retrieval**: Multiple trees vote on best memories
+4. **Adaptive Hash Functions**: Learnable hash functions with Hebbian updates
+5. **Success-Based Learning**: Trees optimize for retrieval performance
+APPLICATIONS:
+- Large-scale information retrieval systems
+- Adaptive caching and content distribution
+- Knowledge base organization and query
+- Recommender systems with hierarchical user models
+- Scientific literature search and organization
+COMPLEXITY ANALYSIS:
+- Storage: O(log T + B) where T=trees, B=bucket_size
+- Retrieval: O(T × log T + k × B) where k=top_k results
+- Tree Update: O(log T) per feedback sample
+- Memory: O(T × 2^D + N × E) where D=depth, N=items, E=embedding_dim
+- Scalability: Sub-linear in number of stored items
+BIOLOGICAL INSPIRATION:
+- Hippocampal place cell organization for spatial memory
+- Cortical hierarchical feature extraction and routing
+- Cerebellar learned motor program selection
+- Associative memory formation in neural circuits
+- Synaptic plasticity for adaptive connection strengths
+"""
+from __future__ import annotations
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+from collections import defaultdict, deque
+from typing import List, Dict, Tuple, Optional
+SAFE_MIN = -1e6
+SAFE_MAX = 1e6
+EPS = 1e-8
+#||||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𝔦 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||||#
+def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
+    tensor = torch.where(torch.isnan(tensor), torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype), tensor)
+    tensor = torch.where(torch.isinf(tensor), torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype), tensor)
+    return torch.clamp(tensor, min_val, max_val)
+def safe_cosine_similarity(a, b, dim=-1, eps=EPS):
+    """Numerically stable cosine similarity computation.
+    Computes cosine similarity between vectors with proper normalization
+    and numerical stability checks to prevent division by zero.
+    Mathematical Details:
+    - cosine(a,b) = (a·b) / (||a|| ||b||)
+    - Handles zero vectors gracefully
+    - Clamps norms to minimum value for stability
+    Args:
+        a, b: Input tensors
+        dim: Dimension along which to compute similarity
+        eps: Minimum norm value for numerical stability
+    Returns:
+        Cosine similarity values ∈ [-1, 1]
+    """
+    if a.dtype != torch.float32:
+        a = a.float()
+    if b.dtype != torch.float32:
+        b = b.float()
+    a_norm = torch.norm(a, dim=dim, keepdim=True).clamp(min=eps)
+    b_norm = torch.norm(b, dim=dim, keepdim=True).clamp(min=eps)
+    return torch.sum(a * b, dim=dim, keepdim=True) / (a_norm * b_norm)
+###########################################################################################################################################
+#################################################- - -   ASSOCIATIVE HASH BUCKET   - - -###################################################
+class AssociativeHashBucket(nn.Module):
+    """Associative memory bucket with learnable hash functions and similarity clustering.
+    Implements a memory bucket that stores items with learned hash signatures
+    and retrieves similar items based on cosine similarity. Features adaptive
+    hash functions, similarity-based clustering, and LRU eviction policy.
+    Mathematical Framework:
+    - Hash functions: h_k(x) = tanh(W_k·x + b_k) for k = 1..K
+    - Similarity threshold: store only if max_sim(x, stored) < θ
+    - Retrieval: rank by cosine similarity in hash space
+    - Eviction: LRU based on access patterns
+    The bucket learns to cluster similar items together and adapts
+    its hash functions based on storage and retrieval patterns.
+    """
+    def __init__(self, bucket_size=64, embedding_dim=128, num_hash_functions=4):
+        super().__init__()
+        self.bucket_size = bucket_size
+        self.embedding_dim = embedding_dim
+        self.num_hash_functions = num_hash_functions
+        # Learnable hash functions (linear projections with nonlinearity)
+        self.hash_projections = nn.ModuleList([
+            nn.Linear(embedding_dim, 1, bias=True) for _ in range(num_hash_functions)
+        ])
+        # Storage buffers for items and metadata
+        self.register_buffer('stored_items', torch.zeros(bucket_size, embedding_dim))
+        self.register_buffer('item_hashes', torch.zeros(bucket_size, num_hash_functions))
+        self.register_buffer('occupancy', torch.zeros(bucket_size, dtype=torch.bool))
+        self.register_buffer('access_counts', torch.zeros(bucket_size))
+        # Associative memory parameters
+        self.similarity_threshold = nn.Parameter(torch.tensor(0.7))
+        self.decay_rate = nn.Parameter(torch.tensor(0.99))
+        # Storage management
+        self.storage_pointer = 0
+    def compute_hash_signature(self, item_embedding):
+        """Compute hash signature for item using learnable hash functions.
+        Applies K learned hash functions to generate a signature vector
+        that captures important features for similarity matching.
+        Mathematical Details:
+        - Each hash function: h_k(x) = tanh(W_k·x + b_k)
+        - Signature: [h₁(x), h₂(x), ..., h_K(x)]
+        - Tanh provides bounded, differentiable hash values
+        Args:
+            item_embedding: Input embedding tensor [batch_size?, embedding_dim]
+        Returns:
+            Hash signature [num_hash_functions] or [batch_size, num_hash_functions]
+        """
+        x = item_embedding
+        if x.dim() == 1:
+            x = x.unsqueeze(0)
+        signatures = []
+        for hash_proj in self.hash_projections:
+            sig = torch.tanh(hash_proj(x)).squeeze(-1)  # [batch_size]
+            signatures.append(sig)
+        sigs = torch.stack(signatures, dim=-1)  # [batch_size, num_hash_functions]
+        return sigs.squeeze(0) if item_embedding.dim() == 1 else sigs
+    def store_item(self, item_embedding, item_id=None):
+        """Store item in bucket with similarity-based clustering and eviction.
+        Storage Strategy:
+        1. Check similarity to existing items
+        2. If similar item exists, update it (clustering)
+        3. Otherwise, store as new item
+        4. Use LRU eviction when bucket is full
+        Mathematical Details:
+        - Similarity check: max_i cos_sim(x, stored_i) > θ
+        - Update rule: stored_i ← α·stored_i + (1-α)·x (α=0.9)
+        - Eviction: remove item with minimum access_count
+        Args:
+            item_embedding: Item to store [embedding_dim] or [batch_size, embedding_dim]
+            item_id: Optional item identifier
+        Returns:
+            List of storage indices where items were placed
+        """
+        if item_embedding.dim() == 1:
+            item_embedding = item_embedding.unsqueeze(0)
+        batch_size = item_embedding.shape[0]
+        stored_items = []
+        for i in range(batch_size):
+            embedding = item_embedding[i]
+            hash_sig = self.compute_hash_signature(embedding)
+            # Check similarity to existing items (similarity-based clustering)
+            if self.occupancy.any():
+                similarities = safe_cosine_similarity(
+                    embedding.unsqueeze(0),
+                    self.stored_items[self.occupancy],
+                    dim=-1
+                ).squeeze()
+                threshold = torch.clamp(self.similarity_threshold, 0.1, 0.95)
+                if similarities.numel() > 0 and similarities.max() > threshold:
+                    # Update existing similar item (weighted average)
+                    best_idx = self.occupancy.nonzero(as_tuple=False)[similarities.argmax()]
+                    self.stored_items[best_idx] = 0.9 * self.stored_items[best_idx] + 0.1 * embedding
+                    self.access_counts[best_idx] += 1
+                    stored_items.append(int(best_idx.item()))
+                    continue
+            # Store as new item
+            if self.storage_pointer >= self.bucket_size:
+                # Bucket full - use LRU eviction
+                if self.occupancy.any():
+                    rel_idx = self.access_counts[self.occupancy].argmin()
+                    evict_idx = self.occupancy.nonzero(as_tuple=False)[rel_idx]
+                else:
+                    evict_idx = torch.tensor(0)
+            else:
+                evict_idx = torch.tensor(self.storage_pointer)
+                self.storage_pointer = min(self.storage_pointer + 1, self.bucket_size)
+            # Store item and metadata
+            self.stored_items[evict_idx] = embedding
+            self.item_hashes[evict_idx] = hash_sig.squeeze()
+            self.occupancy[evict_idx] = True
+            self.access_counts[evict_idx] = 1
+            stored_items.append(int(evict_idx.item()))
+        return stored_items
+    def retrieve_similar(self, query_embedding, top_k=5):
+        """Retrieve most similar items to query based on cosine similarity.
+        Retrieval Process:
+        1. Compute similarities to all stored items
+        2. Rank by similarity score
+        3. Return top-k most similar items
+        4. Update access counts for retrieved items
+        Mathematical Details:
+        - Similarity: cos_sim(query, stored_i) for all stored items
+        - Ranking: argsort(similarities, descending=True)
+        - Access update: access_count[retrieved] += 1
+        Args:
+            query_embedding: Query vector [embedding_dim] or [batch_size, embedding_dim]
+            top_k: Number of most similar items to return
+        Returns:
+            Tuple of (retrieved_items, similarity_scores)
+        """
+        if query_embedding.dim() == 1:
+            query_embedding = query_embedding.unsqueeze(0)
+        if not self.occupancy.any():
+            return [], []
+        # Get valid stored items
+        valid_items = self.stored_items[self.occupancy]
+        valid_indices = self.occupancy.nonzero(as_tuple=False).squeeze(-1)
+        if valid_items.numel() == 0:
+            return [], []
+        # Compute cosine similarities
+        similarities = safe_cosine_similarity(
+            query_embedding.expand(valid_items.shape[0], -1),
+            valid_items,
+            dim=-1
+        ).squeeze(-1)  # [num_valid_items]
+        if similarities.numel() == 0:
+            return [], []
+        # Get top-k most similar items
+        k = min(top_k, similarities.size(0))
+        top_sims, top_indices = torch.topk(similarities, k)
+        retrieved_items = valid_items[top_indices]
+        retrieved_indices = valid_indices[top_indices]
+        # Update access counts for retrieved items (LRU maintenance)
+        for idx in retrieved_indices:
+            self.access_counts[idx] += 1
+        return retrieved_items, top_sims
+    def get_bucket_stats(self):
+        """Get comprehensive bucket statistics for monitoring and analysis.
+        Returns:
+            Dictionary containing occupancy, access patterns, and configuration info
+        """
+        return {
+            'occupancy_rate': self.occupancy.float().mean().item(),
+            'total_accesses': self.access_counts.sum().item(),
+            'avg_similarity': self.similarity_threshold.item(),
+            'storage_pointer': self.storage_pointer
+        }
+###########################################################################################################################################
+################################################- - -   MEMORY DECISION TREE   - - -#######################################################
+class MemoryDecisionTree(nn.Module):
+    """Learned decision tree for adaptive memory routing with success-based updates.
+    Implements a binary decision tree where each internal node learns a split
+    function based on retrieval success feedback. Trees adapt their routing
+    decisions to maximize memory retrieval performance.
+    Mathematical Framework:
+    - Split functions: s(x) = σ((w·x + b)/τ) where σ is sigmoid
+    - Path encoding: binary path through tree to leaf
+    - Success feedback: R ∈ [0,1] from retrieval quality
+    - Parameter updates: θ ← θ + η·∇ log P(success|path)
+    The tree learns to route queries to memory buckets where similar
+    items are most likely to be found, adapting based on retrieval success.
+    """
+    def __init__(self, input_dim, max_depth=6, min_samples_split=2):
+        super().__init__()
+        self.input_dim = input_dim
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        # Maximum number of internal nodes (2^max_depth - 1)
+        max_nodes = 2**max_depth - 1
+        # Learnable split functions for each internal node
+        self.split_weights = nn.Parameter(torch.randn(max_nodes, input_dim) * 0.1)
+        self.split_biases = nn.Parameter(torch.zeros(max_nodes))
+        self.split_temperatures = nn.Parameter(torch.ones(max_nodes))
+        # Initialize parameters for stable splits
+        with torch.no_grad():
+            self.split_temperatures.data.mul_(0.6)  # Lower temp = sharper splits
+            self.split_biases.data.add_(0.01 * torch.randn_like(self.split_biases))
+        # Node tracking and statistics
+        self.register_buffer('node_active', torch.zeros(max_nodes, dtype=torch.bool))
+        self.register_buffer('node_samples', torch.zeros(max_nodes))
+        # Bucket assignment mappings
+        self.leaf_to_bucket = {}
+        self.bucket_to_leaves = defaultdict(list)
+        # Initialize root node as active
+        self.node_active[0] = True
+    def get_node_split(self, node_idx, x):
+        """Compute split probability for node given input.
+        Evaluates the learned split function at a specific node to determine
+        routing probability (left vs right child).
+        Mathematical Details:
+        - Split score: s = w·x + b
+        - Temperature scaling: s' = s/τ
+        - Probability: p = σ(s') where σ is sigmoid
+        - p > 0.5 → go right, p ≤ 0.5 → go left
+        Args:
+            node_idx: Index of tree node
+            x: Input feature vector [batch_size?, input_dim]
+        Returns:
+            Split probabilities [batch_size] (probability of going right)
+        """
+        if node_idx >= len(self.split_weights):
+            return torch.zeros(x.shape[0], device=x.device)
+        weights = self.split_weights[node_idx]
+        bias = self.split_biases[node_idx]
+        temp = torch.clamp(self.split_temperatures[node_idx], 0.1, 10.0)
+        split_score = torch.matmul(x, weights) + bias
+        split_prob = torch.sigmoid(split_score / temp)
+        return split_prob
+    def route_to_leaf(self, x, deterministic=False):
+        """Route input through tree to leaf node.
+        Traverses the decision tree from root to leaf, making routing
+        decisions at each internal node based on learned split functions.
+        Tree Traversal:
+        - Start at root (index 0)
+        - At each node, compute split probability
+        - Go left (2*i + 1) or right (2*i + 2) based on probability
+        - Continue until reaching leaf at max_depth
+        Args:
+            x: Input features [batch_size, input_dim]
+            deterministic: If True, use deterministic splits (p > 0.5)
+        Returns:
+            Tuple of (leaf_nodes, routing_paths)
+        """
+        batch_size = x.shape[0]
+        device = x.device
+        # Start at root node
+        current_nodes = torch.zeros(batch_size, dtype=torch.long, device=device)
+        paths = torch.zeros(batch_size, self.max_depth, dtype=torch.long, device=device)
+        # Traverse tree to leaf depth
+        for depth in range(self.max_depth - 1):
+            split_probs = torch.zeros(batch_size, device=device)
+            # Compute split probabilities for current nodes
+            for i in range(batch_size):
+                node_idx = int(current_nodes[i].item())
+                if self.node_active[node_idx]:
+                    split_probs[i] = self.get_node_split(node_idx, x[i:i+1]).squeeze()
+            # Make routing decisions
+            if deterministic:
+                go_right = (split_probs > 0.5).long()
+            else:
+                go_right = torch.bernoulli(split_probs).long()
+            paths[:, depth] = go_right
+            # Update current nodes using heap indexing
+            current_nodes = current_nodes * 2 + 1 + go_right
+        return current_nodes, paths
+    def assign_leaf_to_bucket(self, leaf_idx, bucket_idx):
+        """Assign tree leaf to memory bucket for storage routing.
+        Creates bidirectional mapping between tree leaves and memory buckets
+        to enable routing queries to appropriate storage locations.
+        Args:
+            leaf_idx: Tree leaf index
+            bucket_idx: Memory bucket index
+        """
+        self.leaf_to_bucket[int(leaf_idx.item())] = int(bucket_idx)
+        self.bucket_to_leaves[int(bucket_idx)].append(int(leaf_idx.item()))
+    def get_bucket_for_input(self, x, deterministic=True):
+        """Route input to appropriate memory bucket via tree traversal.
+        Uses the learned routing tree to determine which memory bucket
+        should store/retrieve items for the given input.
+        Args:
+            x: Input features [batch_size, input_dim]
+            deterministic: Whether to use deterministic routing
+        Returns:
+            Bucket indices [batch_size]
+        """
+        leaf_nodes, _ = self.route_to_leaf(x, deterministic=deterministic)
+        bucket_assignments = []
+        for leaf in leaf_nodes:
+            bucket_idx = self.leaf_to_bucket.get(int(leaf.item()), 0)
+            bucket_assignments.append(bucket_idx)
+        return torch.tensor(bucket_assignments, device=x.device)
+    def update_node_statistics(self, x, rewards):
+        """Update tree parameters based on retrieval success feedback.
+        Implements success-based learning where tree parameters are updated
+        to reinforce routing decisions that lead to successful retrievals.
+        Learning Algorithm:
+        1. Trace path through tree for each input
+        2. For each node on successful paths, reinforce split decision
+        3. For each node on unsuccessful paths, weaken split decision
+        4. Update sample counts and node activation
+        Mathematical Details:
+        - Success reinforcement: bias ← bias + α·sign(reward - 0.5)
+        - Learning rate α = 0.01 for stable updates
+        - Binary rewards: >0.5 = success, ≤0.5 = failure
+        Args:
+            x: Input features [batch_size, input_dim]
+            rewards: Retrieval success scores [batch_size] ∈ [0,1]
+        """
+        leaf_nodes, paths = self.route_to_leaf(x, deterministic=True)
+        # Update parameters based on success feedback
+        for i in range(x.shape[0]):
+            current_node = 0
+            reward = rewards[i].item() if torch.is_tensor(rewards[i]) else rewards[i]
+            # Traverse path and update nodes
+            for depth in range(self.max_depth - 1):
+                if current_node < len(self.node_samples):
+                    # Update statistics
+                    self.node_samples[current_node] += 1
+                    self.node_active[current_node] = True
+                    # Reinforce successful splits, weaken unsuccessful ones
+                    if reward > 0.5:  # Successful retrieval
+                        direction = paths[i, depth]
+                        if direction == 1:  # Went right - reinforce positive bias
+                            self.split_biases.data[current_node] += 0.01
+                        else:  # Went left - reinforce negative bias
+                            self.split_biases.data[current_node] -= 0.01
+                # Move to next node in path
+                direction = paths[i, depth] if depth < paths.shape[1] else 0
+                current_node = current_node * 2 + 1 + int(direction.item())
+                if current_node >= 2**self.max_depth - 1:
+                    break
+###########################################################################################################################################
+##################################################- - -   MEMORY FOREST   - - -############################################################
+class MemoryForest(nn.Module):
+    """Complete memory forest system with ensemble routing and associative storage.
+    Implements the full Memory Forest architecture combining multiple decision
+    trees for routing with associative hash buckets for storage. Uses ensemble
+    voting across trees and success-based adaptation of routing decisions.
+    System Architecture:
+    1. Multiple decision trees learn different routing strategies
+    2. Shared memory buckets store items with associative clustering
+    3. Feature encoder maps inputs to embedding space
+    4. Ensemble retrieval combines votes from all trees
+    5. Success feedback adapts tree routing over time
+    The system learns to organize memory hierarchically, with trees discovering
+    optimal routing patterns and buckets clustering similar items.
+    """
+    def __init__(self, input_dim, num_trees=5, max_depth=6, bucket_size=64, embedding_dim=128):
+        super().__init__()
+        self.input_dim = input_dim
+        self.num_trees = num_trees
+        self.embedding_dim = embedding_dim
+        # Multiple decision trees for ensemble routing
+        self.trees = nn.ModuleList([
+            MemoryDecisionTree(input_dim, max_depth) for _ in range(num_trees)
+        ])
+        # Shared memory buckets across all trees
+        self.num_buckets = num_trees * (2**max_depth)
+        self.buckets = nn.ModuleList([
+            AssociativeHashBucket(bucket_size, embedding_dim) for _ in range(self.num_buckets)
+        ])
+        # Feature encoder: maps raw inputs to embedding space
+        self.feature_encoder = nn.Sequential(
+            nn.Linear(input_dim, embedding_dim),
+            nn.LayerNorm(embedding_dim),
+            nn.ReLU(),
+            nn.Linear(embedding_dim, embedding_dim)
+        )
+        # Initialize bucket assignments for tree leaves
+        self._initialize_bucket_assignments()
+    def _initialize_bucket_assignments(self):
+        """Initialize mapping from tree leaves to memory buckets.
+        Creates systematic assignment of tree leaves to buckets to ensure
+        good distribution and avoid conflicts between trees.
+        Assignment Strategy:
+        - Each tree gets a separate range of buckets
+        - Leaf nodes mapped to buckets in order
+        - Ensures no bucket conflicts between trees
+        """
+        bucket_idx = 0
+        for tree_idx, tree in enumerate(self.trees):
+            # Leaf nodes are in range [2^(D-1)-1, 2^D-2] for depth D
+            start_leaf = 2**(tree.max_depth - 1) - 1
+            end_leaf = 2**tree.max_depth - 2
+            for leaf in range(start_leaf, end_leaf + 1):
+                if bucket_idx < self.num_buckets:
+                    tree.assign_leaf_to_bucket(torch.tensor(leaf), bucket_idx)
+                    bucket_idx += 1
+    def store(self, features, items=None):
+        """Store items in memory forest using learned routing.
+        Storage Process:
+        1. Encode features to embedding space
+        2. Route through each tree to get bucket assignments
+        3. Store in assigned buckets with associative clustering
+        4. Return storage locations for tracking
+        Multiple trees may route the same item to different buckets,
+        creating redundancy that improves retrieval robustness.
+        Args:
+            features: Input features [batch_size, input_dim]
+            items: Items to store (defaults to features) [batch_size, input_dim]
+        Returns:
+            List of (bucket_id, storage_indices) tuples
+        """
+        if items is None:
+            items = features
+        # Encode features to embedding space
+        embeddings = self.feature_encoder(features)
+        storage_results = []
+        # Route through each tree and store in assigned buckets
+        for tree in self.trees:
+            bucket_assignments = tree.get_bucket_for_input(features, deterministic=False)
+            for i, b_idx in enumerate(bucket_assignments.tolist()):
+                if b_idx < len(self.buckets):
+                    stored_idx = self.buckets[b_idx].store_item(embeddings[i])
+                    storage_results.append((b_idx, stored_idx))
+        return storage_results
+    def retrieve(self, query_features, top_k=5):
+        """Retrieve similar items using ensemble voting across trees.
+        Retrieval Process:
+        1. Encode query features to embedding space
+        2. Route queries through all trees to get bucket candidates
+        3. Retrieve similar items from each candidate bucket
+        4. Aggregate results using ensemble voting
+        5. Rank by similarity scores and return top-k
+        Ensemble Strategy:
+        - Each tree votes for items from its assigned bucket
+        - Items receive votes from multiple trees if routed similarly
+        - Final ranking combines similarity scores across votes
+        Args:
+            query_features: Query feature vectors [batch_size, input_dim]
+            top_k: Number of most similar items to return
+        Returns:
+            List of (retrieved_items, similarity_scores) for each query
+        """
+        query_embeddings = self.feature_encoder(query_features)
+        # Collect votes from all trees
+        bucket_votes = defaultdict(list)
+        for tree in self.trees:
+            bucket_assignments = tree.get_bucket_for_input(query_features, deterministic=True)
+            for i, b_idx in enumerate(bucket_assignments.tolist()):
+                if b_idx < len(self.buckets):
+                    retrieved_items, similarities = self.buckets[b_idx].retrieve_similar(
+                        query_embeddings[i], top_k=top_k
+                    )
+                    if len(retrieved_items) > 0:
+                        # Store items with both float and tensor similarities
+                        float_sims = similarities.detach().cpu().tolist()
+                        for itm, sim_t, sim_f in zip(retrieved_items, similarities, float_sims):
+                            bucket_votes[i].append((itm, sim_f, sim_t))
+        # Aggregate ensemble results
+        final_results = []
+        for query_idx in range(query_features.shape[0]):
+            if query_idx in bucket_votes and len(bucket_votes[query_idx]) > 0:
+                # Sort candidates by similarity score
+                candidates = bucket_votes[query_idx]
+                candidates.sort(key=lambda x: x[1], reverse=True)
+                # Extract top-k results
+                top_candidates = candidates[:top_k]
+                items = [c[0] for c in top_candidates]
+                sims_t = [c[2] for c in top_candidates]
+                final_results.append((torch.stack(items), torch.stack(sims_t)))
+            else:
+                # No results found
+                final_results.append((torch.tensor([]), torch.tensor([])))
+        return final_results
+    def update_routing(self, features, retrieval_success):
+        """Update tree routing based on retrieval success feedback.
+        Implements the learning component where trees adapt their routing
+        decisions based on how successful retrievals were. This enables
+        the forest to optimize its organization over time.
+        Learning Process:
+        1. Trees receive feedback on routing decisions
+        2. Successful routes are reinforced
+        3. Unsuccessful routes are weakened
+        4. Parameters updated via gradient-free reinforcement
+        Args:
+            features: Input features that were queried [batch_size, input_dim]
+            retrieval_success: Success scores [batch_size] ∈ [0,1]
+        """
+        for tree in self.trees:
+            tree.update_node_statistics(features, retrieval_success)
+    def get_forest_stats(self):
+        """Get comprehensive statistics about the memory forest state.
+        Provides detailed information about forest utilization, tree states,
+        bucket occupancy, and overall system health for monitoring.
+        Returns:
+            Dictionary with complete forest statistics
+        """
+        stats = {
+            'num_trees': self.num_trees,
+            'num_buckets': self.num_buckets,
+            'bucket_stats': [],
+            'tree_stats': []
+        }
+        # Collect bucket statistics
+        for i, bucket in enumerate(self.buckets):
+            bucket_stat = bucket.get_bucket_stats()
+            bucket_stat['bucket_id'] = i
+            stats['bucket_stats'].append(bucket_stat)
+        # Collect tree statistics
+        for i, tree in enumerate(self.trees):
+            tree_stat = {
+                'tree_id': i,
+                'active_nodes': tree.node_active.sum().item(),
+                'total_samples': tree.node_samples.sum().item(),
+                'max_depth': tree.max_depth
+            }
+            stats['tree_stats'].append(tree_stat)
+        return stats
+    def forward(self, features, items=None, mode='store'):
+        """Unified forward interface for storage and retrieval operations.
+        Args:
+            features: Input feature vectors
+            items: Items to store (for store mode)
+            mode: 'store' or 'retrieve'
+        Returns:
+            Storage results or retrieval results based on mode
+        """
+        if mode == 'store':
+            return self.store(features, items)
+        elif mode == 'retrieve':
+            return self.retrieve(features)
+        else:
+            raise ValueError("Mode must be 'store' or 'retrieve'")
+###########################################################################################################################################
+####################################################- - -   DEMO AND TESTING   - - -#######################################################
+def test_memory_forest():
+    """Comprehensive test of Memory Forest functionality and performance."""
+    print(" Testing Memory Forest - Associative Memory with Learned Routing")
+    print("=" * 70)
+    # Create memory forest system
+    input_dim = 64
+    embedding_dim = 128
+    forest = MemoryForest(
+        input_dim=input_dim,
+        num_trees=3,
+        max_depth=4,
+        bucket_size=32,
+        embedding_dim=embedding_dim
+    )
+    print(f"Created Memory Forest:")
+    print(f"  - Input dimension: {input_dim}")
+    print(f"  - Embedding dimension: {embedding_dim}")
+    print(f"  - Number of trees: {forest.num_trees}")
+    print(f"  - Tree depth: 4")
+    print(f"  - Total buckets: {forest.num_buckets}")
+    print(f"  - Bucket capacity: 32 items each")
+    # Generate test data with some structure for meaningful clustering
+    print(f"\n Generating structured test data...")
+    num_items = 100
+    # Create clustered data (3 clusters)
+    cluster_centers = torch.randn(3, input_dim) * 2
+    test_features = []
+    for _ in range(num_items):
+        cluster_id = torch.randint(0, 3, (1,)).item()
+        noise = torch.randn(input_dim) * 0.5
+        item = cluster_centers[cluster_id] + noise
+        test_features.append(item)
+    test_features = torch.stack(test_features)
+    print(f"  - Generated {num_items} items in 3 clusters")
+    print(f"  - Feature dimension: {input_dim}")
+    # Test storage
+    print(f"\n Testing storage operations...")
+    storage_results = forest.store(test_features)
+    unique_buckets = len(set(r[0] for r in storage_results))
+    print(f"  - Stored {num_items} items")
+    print(f"  - Used {unique_buckets} different buckets")
+    print(f"  - Average items per bucket: {len(storage_results) / unique_buckets:.1f}")
+    # Test retrieval without learning
+    print(f"\n Testing retrieval (before learning)...")
+    query_features = test_features[:5]  # Use first 5 items as queries
+    retrieval_results = forest.retrieve(query_features, top_k=3)
+    initial_success_count = 0
+    print("Initial retrieval results:")
+    for i, (items, similarities) in enumerate(retrieval_results):
+        if len(items) > 0:
+            best_sim = similarities[0].item()
+            success = best_sim > 0.8  # Threshold for "good" retrieval
+            print(f"  Query {i}: {len(items)} items, best similarity: {best_sim:.3f} {'✓' if success else '✗'}")
+            if success:
+                initial_success_count += 1
+        else:
+            print(f"  Query {i}: No items retrieved ✗")
+    initial_success_rate = initial_success_count / len(query_features)
+    print(f"  Initial success rate: {initial_success_rate:.1%}")
+    # Test adaptive learning
+    print(f"\n Testing adaptive learning...")
+    print("Simulating retrieval feedback and tree adaptation...")
+    # Simulate multiple rounds of feedback
+    for round_num in range(3):
+        # Generate random retrieval success scores (biased toward improvement)
+        retrieval_success = torch.rand(len(query_features)) * 0.6 + 0.3
+        # Update tree routing based on feedback
+        forest.update_routing(query_features, retrieval_success)
+        print(f"  Round {round_num + 1}: Updated trees with feedback")
+    # Test retrieval after learning
+    print(f"\n Testing retrieval (after learning)...")
+    learned_results = forest.retrieve(query_features, top_k=3)
+    learned_success_count = 0
+    print("Post-learning retrieval results:")
+    for i, (items, similarities) in enumerate(learned_results):
+        if len(items) > 0:
+            best_sim = similarities[0].item()
+            success = best_sim > 0.8
+            print(f"  Query {i}: {len(items)} items, best similarity: {best_sim:.3f} {'✓' if success else '✗'}")
+            if success:
+                learned_success_count += 1
+        else:
+            print(f"  Query {i}: No items retrieved ✗")
+    learned_success_rate = learned_success_count / len(query_features)
+    improvement = learned_success_rate - initial_success_rate
+    print(f"  Post-learning success rate: {learned_success_rate:.1%}")
+    print(f"  Improvement: {improvement:+.1%}")
+    # Analyze forest statistics
+    print(f"\n Forest analysis:")
+    stats = forest.get_forest_stats()
+    avg_bucket_occupancy = np.mean([b['occupancy_rate'] for b in stats['bucket_stats']])
+    total_accesses = sum(b['total_accesses'] for b in stats['bucket_stats'])
+    active_nodes = sum(t['active_nodes'] for t in stats['tree_stats'])
+    print(f"  - Average bucket occupancy: {avg_bucket_occupancy:.1%}")
+    print(f"  - Total bucket accesses: {total_accesses}")
+    print(f"  - Active tree nodes: {active_nodes}")
+    # Test different query types
+    print(f"\n Testing query diversity...")
+    # Similar query (from stored data)
+    similar_query = test_features[10:11]  # Known stored item
+    similar_results = forest.retrieve(similar_query, top_k=3)
+    similar_best = similar_results[0][1][0].item() if len(similar_results[0][1]) > 0 else 0
+    # Random query (not from stored data)
+    random_query = torch.randn(1, input_dim)
+    random_results = forest.retrieve(random_query, top_k=3)
+    random_best = random_results[0][1][0].item() if len(random_results[0][1]) > 0 else 0
+    print(f"  - Known item query similarity: {similar_best:.3f}")
+    print(f"  - Random query similarity: {random_best:.3f}")
+    print(f"  - Discrimination ratio: {similar_best / max(random_best, 0.01):.1f}x")
+    print(f"\n Memory Forest test completed!")
+    print("✓ Hierarchical memory organization with learned routing")
+    print("✓ Associative storage with similarity clustering")
+    print("✓ Ensemble retrieval across multiple trees")
+    print("✓ Adaptive routing based on retrieval success")
+    print("✓ Efficient O(log n) routing instead of O(n) search")
+    print("✓ Scalable architecture for large memory systems")
+    return True
+def simple_demo():
+    """Simple demonstration with clear patterns."""
+    print("\n" + "="*50)
+    print(" MEMORY FOREST SIMPLE DEMO")
+    print("="*50)
+    # Create small forest for clear demonstration
+    forest = MemoryForest(input_dim=8, num_trees=2, max_depth=3, bucket_size=16, embedding_dim=32)
+    # Create simple patterns that should cluster together
+    patterns = torch.tensor([
+        [1, 0, 1, 0, 1, 0, 1, 0],  # Pattern A (alternating)
+        [0, 1, 0, 1, 0, 1, 0, 1],  # Pattern B (inverse alternating)
+        [1, 1, 0, 0, 1, 1, 0, 0],  # Pattern C (pairs)
+        [0, 0, 1, 1, 0, 0, 1, 1],  # Pattern D (inverse pairs)
+        [1, 0, 1, 0, 1, 0, 1, 1],  # Pattern A variant
+        [0, 1, 0, 1, 0, 1, 0, 0],  # Pattern B variant
+    ], dtype=torch.float32)
+    print("Storing 6 distinct patterns...")
+    print("  - 2 alternating patterns (A, B)")
+    print("  - 2 pair patterns (C, D)")
+    print("  - 2 pattern variants")
+    # Store patterns
+    forest.store(patterns)
+    # Test exact pattern retrieval
+    print("\nTesting exact pattern retrieval:")
+    results = forest.retrieve(patterns[:4])  # Query first 4 patterns
+    for i, (items, sims) in enumerate(results):
+        if len(items) > 0:
+            best_sim = sims[0].item()
+            print(f"  Pattern {i}: Found {len(items)} matches, best similarity: {best_sim:.3f}")
+        else:
+            print(f"  Pattern {i}: No matches found")
+    # Test noisy pattern retrieval
+    print("\nTesting noisy pattern retrieval:")
+    noisy_patterns = patterns[:2] + 0.1 * torch.randn_like(patterns[:2])
+    noisy_results = forest.retrieve(noisy_patterns)
+    for i, (items, sims) in enumerate(noisy_results):
+        if len(items) > 0:
+            best_sim = sims[0].item()
+            print(f"  Noisy pattern {i}: Found {len(items)} matches, best similarity: {best_sim:.3f}")
+        else:
+            print(f"  Noisy pattern {i}: No matches found")
+    # Show forest organization
+    stats = forest.get_forest_stats()
+    used_buckets = sum(1 for b in stats['bucket_stats'] if b['occupancy_rate'] > 0)
+    print(f"\nForest organization:")
+    print(f"  - Used {used_buckets} buckets out of {len(stats['bucket_stats'])}")
+    print(f"  - Trees routed patterns to different memory locations")
+    print(f"  - Associative clustering groups similar patterns")
+    print("\n Demo completed. Memory Forest successfully organized and retrieved patterns.")
+if __name__ == "__main__":
+    test_memory_forest()
+    simple_demo()
+###########################################################################################################################################
+###########################################################################################################################################