""" Contextual Query Masking via XOR Attention (Phase 4.0) ====================================================== Implements an XOR-based soft attention mechanism over Binary HDV space. How it works: 1. A "context key" is constructed by bundling recent HOT-tier vectors. 2. A XOR attention mask is generated: mask = query XOR context_key This creates a residual vector that is ORTHOGONAL to the context, effectively suppressing already-known dimensions and amplifying novel ones. 3. Query results are re-ranked by a composite score: composite = alpha * raw_similarity + beta * novelty_boost(mask, mem_hdv) 4. The mask is also available for downstream gap-detection. Motivation (VSA theory): - XOR in binary HDV space is the self-inverse binding operator. - query.xor(context) ≈ "what about this query is NOT already represented in context?" - Hamming similarity(mask, candidate) ≈ novelty of candidate relative to context. Phase 4.1: XOR-based Project Isolation ====================================== XORIsolationMask provides deterministic project-based memory isolation: - Each project_id derives a unique binary mask via SHA256(project_id) -> seed -> RNG - store(): masked_hdv = original_hdv XOR project_mask - query(): unmasked_query = query_hdv XOR project_mask (then search in masked space) - Memories from different projects are effectively orthogonal (~50% similarity) """ from __future__ import annotations import hashlib from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple import numpy as np from loguru import logger from .binary_hdv import BinaryHDV, majority_bundle @dataclass class AttentionConfig: """Tunable hyperparameters for XOR attention.""" alpha: float = 0.6 # Weight for raw similarity beta: float = 0.4 # Weight for novelty score from XOR mask context_sample_n: int = 50 # How many HOT nodes to include in context key min_novelty_boost: float = 0.0 # Floor for novelty contribution enabled: bool = True def validate(self) -> None: assert 0.0 <= self.alpha <= 1.0, "alpha must be in [0, 1]" assert 0.0 <= self.beta <= 1.0, "beta must be in [0, 1]" assert abs((self.alpha + self.beta) - 1.0) < 1e-6, "alpha + beta must equal 1.0" @dataclass class AttentionResult: """Enriched result from contextual reranking.""" node_id: str raw_score: float novelty_score: float composite_score: float attention_mask: Optional[BinaryHDV] = field(default=None, repr=False) class XORAttentionMasker: """ Contextual query masking using XOR binding in binary HDV space. Usage: masker = XORAttentionMasker(config) mask = masker.build_attention_mask(query_vec, context_vecs) reranked = masker.rerank(raw_scores, memory_vectors, mask) """ def __init__(self, config: Optional[AttentionConfig] = None): self.config = config or AttentionConfig() def build_context_key(self, context_nodes_hdv: List[BinaryHDV]) -> BinaryHDV: """ Bundle HOT-tier vectors into a single context summary key. Uses majority vote bundling (sum > threshold → 1, else → 0). Falls back to zero-vector if no context is available. """ if not context_nodes_hdv: return BinaryHDV.zeros(context_nodes_hdv[0].dimension if context_nodes_hdv else 16384) return majority_bundle(context_nodes_hdv) def build_attention_mask( self, query_vec: BinaryHDV, context_key: BinaryHDV, ) -> BinaryHDV: """ Compute XOR attention mask: mask = query XOR context_key. The mask represents "query minus context" — bits that are unique to the query compared to what the system already holds in working memory. High Hamming similarity between mask and a candidate → that candidate is novel / peripheral relative to the current context. """ mask = query_vec.xor_bind(context_key) logger.debug( "Built XOR attention mask — " f"query/context Hamming dist = {query_vec.normalized_distance(context_key):.4f}" ) return mask def novelty_score(self, mask: BinaryHDV, candidate_hdv: BinaryHDV) -> float: """ Calculate novelty of a candidate relative to the context. Defined as: Hamming similarity(mask, candidate) in [0, 1]. Higher value → candidate is more "attention-worthy" given the query context. """ return mask.similarity(candidate_hdv) def rerank( self, raw_scores: Dict[str, float], memory_vectors: Dict[str, BinaryHDV], mask: BinaryHDV, ) -> List[AttentionResult]: """ Re-rank retrieved memories using the composite XOR attention score. Args: raw_scores: {node_id: raw_similarity} from initial retrieval. memory_vectors: {node_id: BinaryHDV} for novelty calculation. mask: XOR attention mask built from query and context. Returns: Sorted list of AttentionResult (highest composite first). """ cfg = self.config results: List[AttentionResult] = [] for node_id, raw in raw_scores.items(): hdv = memory_vectors.get(node_id) if hdv is None: novelty = cfg.min_novelty_boost else: novelty = max(self.novelty_score(mask, hdv), cfg.min_novelty_boost) composite = cfg.alpha * raw + cfg.beta * novelty results.append( AttentionResult( node_id=node_id, raw_score=raw, novelty_score=novelty, composite_score=composite, attention_mask=mask, ) ) results.sort(key=lambda r: r.composite_score, reverse=True) return results def extract_scores( self, results: List[AttentionResult] ) -> List[Tuple[str, float]]: """Convert AttentionResult list to the standard (node_id, score) tuple format.""" return [(r.node_id, r.composite_score) for r in results] # ============================================================================== # Phase 4.1: XOR-based Project Isolation # ============================================================================== @dataclass class IsolationConfig: """Configuration for XOR-based project isolation.""" enabled: bool = True dimension: int = 16384 def validate(self) -> None: assert self.dimension > 0, "dimension must be positive" assert self.dimension % 8 == 0, "dimension must be multiple of 8" class XORIsolationMask: """ Deterministic XOR-based isolation mask for multi-tenant memory isolation. Design: ------- Each project_id derives a unique binary mask through: SHA256(project_id) -> 256-bit digest -> seed -> np.random.Generator -> binary mask The mask is applied via XOR binding: - store(content, project_id="A"): masked_hdv = original_hdv XOR mask_A - query(query_text, project_id="A"): unmasked = query_hdv XOR mask_A Properties: ----------- - Self-inverse: XOR twice with the same mask recovers the original vector - Deterministic: Same project_id always produces the same mask - Orthogonal isolation: Different projects' masks are ~50% different (random) - No key management: project_id IS the key (no external secrets needed) Security Model: --------------- This provides cryptographic isolation via the one-time pad principle: - A masked vector reveals NO information about the original without the mask - Cross-project queries will match random noise (~50% similarity baseline) - The isolation strength depends on the secrecy of project_ids Usage: ------ masker = XORIsolationMask(config) mask = masker.get_mask("project-alpha") # Deterministic mask # Store masked_hdv = masker.apply_mask(original_hdv, "project-alpha") # Query (apply same mask to query to search in masked space) masked_query = masker.apply_mask(query_hdv, "project-alpha") # Remove mask (if needed for inspection) original = masker.remove_mask(masked_hdv, "project-alpha") """ def __init__(self, config: Optional[IsolationConfig] = None): self.config = config or IsolationConfig() self._mask_cache: Dict[str, BinaryHDV] = {} def _derive_seed(self, project_id: str) -> int: """ Derive a deterministic 64-bit seed from project_id using SHA256. Args: project_id: Unique project identifier string. Returns: 64-bit integer seed for numpy's Generator. """ digest = hashlib.sha256(f"mnemo_isolation_v1:{project_id}".encode()).digest() return int.from_bytes(digest[:8], byteorder="big", signed=False) def get_mask(self, project_id: str) -> BinaryHDV: """ Get or create the deterministic isolation mask for a project. The mask is cached for efficiency. Same project_id always returns the same BinaryHDV mask. Args: project_id: Unique project identifier. Returns: BinaryHDV mask of dimension self.config.dimension. """ if project_id in self._mask_cache: return self._mask_cache[project_id] seed = self._derive_seed(project_id) rng = np.random.default_rng(seed) # Generate random binary mask n_bytes = self.config.dimension // 8 mask_bytes = rng.integers(0, 256, size=n_bytes, dtype=np.uint8) mask = BinaryHDV(data=mask_bytes, dimension=self.config.dimension) self._mask_cache[project_id] = mask logger.debug(f"Generated isolation mask for project '{project_id}' (seed={seed})") return mask def apply_mask(self, hdv: BinaryHDV, project_id: str) -> BinaryHDV: """ Apply project isolation mask to a vector (XOR binding). Args: hdv: The BinaryHDV to mask. project_id: Project identifier for mask derivation. Returns: Masked BinaryHDV (original XOR project_mask). """ if not self.config.enabled: return hdv mask = self.get_mask(project_id) return hdv.xor_bind(mask) def remove_mask(self, masked_hdv: BinaryHDV, project_id: str) -> BinaryHDV: """ Remove project isolation mask from a vector (XOR is self-inverse). Note: This is identical to apply_mask() due to XOR's self-inverse property. Kept as a separate method for semantic clarity. Args: masked_hdv: The masked BinaryHDV. project_id: Project identifier used for masking. Returns: Original unmasked BinaryHDV. """ return self.apply_mask(masked_hdv, project_id) def clear_cache(self) -> None: """Clear the mask cache (useful for testing).""" self._mask_cache.clear() def is_isolated( self, hdv_a: BinaryHDV, project_id_a: str, hdv_b: BinaryHDV, project_id_b: str, threshold: float = 0.55, ) -> bool: """ Check if two vectors are properly isolated (different projects). After masking, vectors from different projects should have ~50% similarity. This method checks if the cross-project similarity is within expected bounds. Args: hdv_a: First (unmasked) vector. project_id_a: First vector's project. hdv_b: Second (unmasked) vector. project_id_b: Second vector's project. threshold: Maximum similarity for "isolated" (default 0.55). Returns: True if vectors are isolated (different projects), False otherwise. """ if project_id_a == project_id_b: return False # Same project = not isolated masked_a = self.apply_mask(hdv_a, project_id_a) masked_b = self.apply_mask(hdv_b, project_id_b) similarity = masked_a.similarity(masked_b) return similarity < threshold