MnemoCore / src /mnemocore /core /attention.py
Granis87's picture
Initial upload of MnemoCore
dbb04e4 verified
"""
Contextual Query Masking via XOR Attention (Phase 4.0)
======================================================
Implements an XOR-based soft attention mechanism over Binary HDV space.
How it works:
1. A "context key" is constructed by bundling recent HOT-tier vectors.
2. A XOR attention mask is generated: mask = query XOR context_key
This creates a residual vector that is ORTHOGONAL to the context,
effectively suppressing already-known dimensions and amplifying novel ones.
3. Query results are re-ranked by a composite score:
composite = alpha * raw_similarity + beta * novelty_boost(mask, mem_hdv)
4. The mask is also available for downstream gap-detection.
Motivation (VSA theory):
- XOR in binary HDV space is the self-inverse binding operator.
- query.xor(context) ≈ "what about this query is NOT already represented in context?"
- Hamming similarity(mask, candidate) ≈ novelty of candidate relative to context.
Phase 4.1: XOR-based Project Isolation
======================================
XORIsolationMask provides deterministic project-based memory isolation:
- Each project_id derives a unique binary mask via SHA256(project_id) -> seed -> RNG
- store(): masked_hdv = original_hdv XOR project_mask
- query(): unmasked_query = query_hdv XOR project_mask (then search in masked space)
- Memories from different projects are effectively orthogonal (~50% similarity)
"""
from __future__ import annotations
import hashlib
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
import numpy as np
from loguru import logger
from .binary_hdv import BinaryHDV, majority_bundle
@dataclass
class AttentionConfig:
"""Tunable hyperparameters for XOR attention."""
alpha: float = 0.6 # Weight for raw similarity
beta: float = 0.4 # Weight for novelty score from XOR mask
context_sample_n: int = 50 # How many HOT nodes to include in context key
min_novelty_boost: float = 0.0 # Floor for novelty contribution
enabled: bool = True
def validate(self) -> None:
assert 0.0 <= self.alpha <= 1.0, "alpha must be in [0, 1]"
assert 0.0 <= self.beta <= 1.0, "beta must be in [0, 1]"
assert abs((self.alpha + self.beta) - 1.0) < 1e-6, "alpha + beta must equal 1.0"
@dataclass
class AttentionResult:
"""Enriched result from contextual reranking."""
node_id: str
raw_score: float
novelty_score: float
composite_score: float
attention_mask: Optional[BinaryHDV] = field(default=None, repr=False)
class XORAttentionMasker:
"""
Contextual query masking using XOR binding in binary HDV space.
Usage:
masker = XORAttentionMasker(config)
mask = masker.build_attention_mask(query_vec, context_vecs)
reranked = masker.rerank(raw_scores, memory_vectors, mask)
"""
def __init__(self, config: Optional[AttentionConfig] = None):
self.config = config or AttentionConfig()
def build_context_key(self, context_nodes_hdv: List[BinaryHDV]) -> BinaryHDV:
"""
Bundle HOT-tier vectors into a single context summary key.
Uses majority vote bundling (sum > threshold → 1, else → 0).
Falls back to zero-vector if no context is available.
"""
if not context_nodes_hdv:
return BinaryHDV.zeros(context_nodes_hdv[0].dimension if context_nodes_hdv else 16384)
return majority_bundle(context_nodes_hdv)
def build_attention_mask(
self,
query_vec: BinaryHDV,
context_key: BinaryHDV,
) -> BinaryHDV:
"""
Compute XOR attention mask: mask = query XOR context_key.
The mask represents "query minus context" — bits that are unique
to the query compared to what the system already holds in working memory.
High Hamming similarity between mask and a candidate → that candidate
is novel / peripheral relative to the current context.
"""
mask = query_vec.xor_bind(context_key)
logger.debug(
"Built XOR attention mask — "
f"query/context Hamming dist = {query_vec.normalized_distance(context_key):.4f}"
)
return mask
def novelty_score(self, mask: BinaryHDV, candidate_hdv: BinaryHDV) -> float:
"""
Calculate novelty of a candidate relative to the context.
Defined as: Hamming similarity(mask, candidate) in [0, 1].
Higher value → candidate is more "attention-worthy" given the query context.
"""
return mask.similarity(candidate_hdv)
def rerank(
self,
raw_scores: Dict[str, float],
memory_vectors: Dict[str, BinaryHDV],
mask: BinaryHDV,
) -> List[AttentionResult]:
"""
Re-rank retrieved memories using the composite XOR attention score.
Args:
raw_scores: {node_id: raw_similarity} from initial retrieval.
memory_vectors: {node_id: BinaryHDV} for novelty calculation.
mask: XOR attention mask built from query and context.
Returns:
Sorted list of AttentionResult (highest composite first).
"""
cfg = self.config
results: List[AttentionResult] = []
for node_id, raw in raw_scores.items():
hdv = memory_vectors.get(node_id)
if hdv is None:
novelty = cfg.min_novelty_boost
else:
novelty = max(self.novelty_score(mask, hdv), cfg.min_novelty_boost)
composite = cfg.alpha * raw + cfg.beta * novelty
results.append(
AttentionResult(
node_id=node_id,
raw_score=raw,
novelty_score=novelty,
composite_score=composite,
attention_mask=mask,
)
)
results.sort(key=lambda r: r.composite_score, reverse=True)
return results
def extract_scores(
self, results: List[AttentionResult]
) -> List[Tuple[str, float]]:
"""Convert AttentionResult list to the standard (node_id, score) tuple format."""
return [(r.node_id, r.composite_score) for r in results]
# ==============================================================================
# Phase 4.1: XOR-based Project Isolation
# ==============================================================================
@dataclass
class IsolationConfig:
"""Configuration for XOR-based project isolation."""
enabled: bool = True
dimension: int = 16384
def validate(self) -> None:
assert self.dimension > 0, "dimension must be positive"
assert self.dimension % 8 == 0, "dimension must be multiple of 8"
class XORIsolationMask:
"""
Deterministic XOR-based isolation mask for multi-tenant memory isolation.
Design:
-------
Each project_id derives a unique binary mask through:
SHA256(project_id) -> 256-bit digest -> seed -> np.random.Generator -> binary mask
The mask is applied via XOR binding:
- store(content, project_id="A"): masked_hdv = original_hdv XOR mask_A
- query(query_text, project_id="A"): unmasked = query_hdv XOR mask_A
Properties:
-----------
- Self-inverse: XOR twice with the same mask recovers the original vector
- Deterministic: Same project_id always produces the same mask
- Orthogonal isolation: Different projects' masks are ~50% different (random)
- No key management: project_id IS the key (no external secrets needed)
Security Model:
---------------
This provides cryptographic isolation via the one-time pad principle:
- A masked vector reveals NO information about the original without the mask
- Cross-project queries will match random noise (~50% similarity baseline)
- The isolation strength depends on the secrecy of project_ids
Usage:
------
masker = XORIsolationMask(config)
mask = masker.get_mask("project-alpha") # Deterministic mask
# Store
masked_hdv = masker.apply_mask(original_hdv, "project-alpha")
# Query (apply same mask to query to search in masked space)
masked_query = masker.apply_mask(query_hdv, "project-alpha")
# Remove mask (if needed for inspection)
original = masker.remove_mask(masked_hdv, "project-alpha")
"""
def __init__(self, config: Optional[IsolationConfig] = None):
self.config = config or IsolationConfig()
self._mask_cache: Dict[str, BinaryHDV] = {}
def _derive_seed(self, project_id: str) -> int:
"""
Derive a deterministic 64-bit seed from project_id using SHA256.
Args:
project_id: Unique project identifier string.
Returns:
64-bit integer seed for numpy's Generator.
"""
digest = hashlib.sha256(f"mnemo_isolation_v1:{project_id}".encode()).digest()
return int.from_bytes(digest[:8], byteorder="big", signed=False)
def get_mask(self, project_id: str) -> BinaryHDV:
"""
Get or create the deterministic isolation mask for a project.
The mask is cached for efficiency. Same project_id always returns
the same BinaryHDV mask.
Args:
project_id: Unique project identifier.
Returns:
BinaryHDV mask of dimension self.config.dimension.
"""
if project_id in self._mask_cache:
return self._mask_cache[project_id]
seed = self._derive_seed(project_id)
rng = np.random.default_rng(seed)
# Generate random binary mask
n_bytes = self.config.dimension // 8
mask_bytes = rng.integers(0, 256, size=n_bytes, dtype=np.uint8)
mask = BinaryHDV(data=mask_bytes, dimension=self.config.dimension)
self._mask_cache[project_id] = mask
logger.debug(f"Generated isolation mask for project '{project_id}' (seed={seed})")
return mask
def apply_mask(self, hdv: BinaryHDV, project_id: str) -> BinaryHDV:
"""
Apply project isolation mask to a vector (XOR binding).
Args:
hdv: The BinaryHDV to mask.
project_id: Project identifier for mask derivation.
Returns:
Masked BinaryHDV (original XOR project_mask).
"""
if not self.config.enabled:
return hdv
mask = self.get_mask(project_id)
return hdv.xor_bind(mask)
def remove_mask(self, masked_hdv: BinaryHDV, project_id: str) -> BinaryHDV:
"""
Remove project isolation mask from a vector (XOR is self-inverse).
Note: This is identical to apply_mask() due to XOR's self-inverse property.
Kept as a separate method for semantic clarity.
Args:
masked_hdv: The masked BinaryHDV.
project_id: Project identifier used for masking.
Returns:
Original unmasked BinaryHDV.
"""
return self.apply_mask(masked_hdv, project_id)
def clear_cache(self) -> None:
"""Clear the mask cache (useful for testing)."""
self._mask_cache.clear()
def is_isolated(
self,
hdv_a: BinaryHDV,
project_id_a: str,
hdv_b: BinaryHDV,
project_id_b: str,
threshold: float = 0.55,
) -> bool:
"""
Check if two vectors are properly isolated (different projects).
After masking, vectors from different projects should have ~50% similarity.
This method checks if the cross-project similarity is within expected bounds.
Args:
hdv_a: First (unmasked) vector.
project_id_a: First vector's project.
hdv_b: Second (unmasked) vector.
project_id_b: Second vector's project.
threshold: Maximum similarity for "isolated" (default 0.55).
Returns:
True if vectors are isolated (different projects), False otherwise.
"""
if project_id_a == project_id_b:
return False # Same project = not isolated
masked_a = self.apply_mask(hdv_a, project_id_a)
masked_b = self.apply_mask(hdv_b, project_id_b)
similarity = masked_a.similarity(masked_b)
return similarity < threshold