Spaces:

Fabuilds
/

sovereign-node

Sleeping

App Files Files Community

Fabuilds commited on 23 days ago

Commit

66363ae

verified ·

1 Parent(s): 3eb08b0

Delete in_memory_index.py

Browse files

Files changed (1) hide show

in_memory_index.py +0 -471

in_memory_index.py DELETED Viewed

@@ -1,471 +0,0 @@
-"""
-IN-MEMORY PATTERN INDEX
-Fast lookup without HDD writes - merge existing + conversation + Gemini chat patterns
-"""
-import sys
-import os
-import json
-import time
-import re
-try:
-    from System.semantic_embedder import SemanticEmbedder
-except ImportError:
-    try:
-        from semantic_embedder import SemanticEmbedder
-    except ImportError:
-        # Final fallback for scripts in Shop/
-        sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-        from semantic_embedder import SemanticEmbedder
-# Existing 5 lattice patterns
-LATTICE_PATTERNS = {
-    "PATTERN_SINGLETON_DATABASE": {
-        "lba": 8534859776,
-        "domain": "SOFTWARE_ARCHITECTURE",
-        "problem": "Need to ensure only one database connection exists",
-        "solution": "Singleton pattern with thread-safe initialization",
-        "reusability": 9,
-        "confidence": 0.82
-    },
-    "PATTERN_REACT_HOOKS_DEPS": {
-        "lba": 3371401216,
-        "domain": "WEB_DEVELOPMENT",
-        "problem": "React component not re-rendering when props change",
-        "solution": "Add dependency array to useEffect",
-        "reusability": 10,
-        "confidence": 0.85
-    }
-}
-CONVERSATION_PATTERNS = {
-    "AGENT_IS_LATTICE": {
-        "domain": "CONCEPTUAL",
-        "problem": "Separation between agent and data structure",
-        "solution": "Agent is non-orientable surface - no inside/outside separation",
-        "confidence": 0.95
-    }
-}
-class InMemoryIndex:
-    """
-    Adaptive Distillation Index.
-    Tracks pattern hit counts to distinguish signal from noise:
-    - Once-patterns (1 hit) = UNCONFIRMED (might be noise)
-    - Twice-patterns (2 hits) = PLAUSIBLE
-    - Multi-patterns (3+ hits) = CONFIRMED (logic)
-    The lattice self-cleans through use. Signal persists, noise decays.
-    """
-    # Hit tracking file handled dynamically in __init__
-    HIT_LOG_PATH = None
-    # Magnitude layers: logic exists in layers
-    # Layer 0: Surface (keyword substring match) = low magnitude
-    # Layer 1: Structural (multi-word + domain match) = medium magnitude
-    # Layer 2: Conceptual (phrase match in problem/solution) = high magnitude
-    # Decay: magnitude halves every DECAY_HALF_LIFE seconds without a hit
-    DECAY_HALF_LIFE = 86400  # 24 hours
-    MAGNITUDE_LAYERS = {
-        "surface": 0.3,      # keyword substring match (low relevance)
-        "structural": 0.6,   # multi-word + domain match (medium)
-        "conceptual": 1.0,   # full phrase match in problem/solution (high)
-    }
-    def __init__(self):
-        # Handle relative pathing for portability
-        BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        self.LATTICE_DB_DIR = os.path.join(BASE_DIR, "Lattice_DB")
-        self.HIT_LOG_PATH = os.path.join(self.LATTICE_DB_DIR, "pattern_hits.json")
-        index_path = os.path.join(self.LATTICE_DB_DIR, "dual_anchor_index.json")
-        if os.path.exists(index_path):
-            with open(index_path, 'r') as f:
-                data = json.load(f)
-            self.patterns = data.get('patterns', {})
-            sources = data.get('sources', {})
-            print(f"[INDEX] Loaded {len(self.patterns)} dual-anchor patterns")
-        else:
-            # Fallback to original patterns
-            self.patterns = {}
-            self.load_lattice_patterns()
-            self.load_conversation_patterns()
-            print("[INDEX] Dual-anchor index not found, using original 16 patterns")
-        # Load hit tracking (magnitude-weighted)
-        self.hits = self._load_hits()
-        # Calculate adaptive threshold based on pattern count
-        self.base_threshold = 0.3 + (0.4 * min(len(self.patterns) / 200, 1.0))
-        # Initialize Semantic Engine
-        print("[INDEX] Initializing Semantic Manifold...")
-        self.embedder = SemanticEmbedder()
-        self.pattern_vectors = {}
-        self._reindex_vectors()
-        confirmed = sum(1 for h in self.hits.values() if self._total_magnitude(h) >= 2.0)
-        unconfirmed = sum(1 for h in self.hits.values() if 0 < self._total_magnitude(h) < 1.0)
-        print(f"[DISTILLER] Confirmed: {confirmed} | Unconfirmed: {unconfirmed} | Threshold: {self.base_threshold:.2f}")
-        self.word_freq = self._calculate_word_freq()
-    def _reindex_vectors(self):
-        """Pre-calculates semantic embeddings for all known patterns."""
-        print(f"[INDEX]: Generating embeddings for {len(self.patterns)} patterns...")
-        for label, p in self.patterns.items():
-            # Combine problem + solution for semantic context
-            context = f"{p.get('problem', '')} {p.get('solution', '')} {label}"
-            self.pattern_vectors[label] = self.embedder.embed_text(context)
-        print(f"[INDEX]: ✅ Semantic manifold mapped ({len(self.pattern_vectors)} vectors).")
-    def _calculate_word_freq(self):
-        """Calculate inverse pattern frequency (IPF) for lean semantic weighting."""
-        freq = {}
-        for p in self.patterns.values():
-            text = (p.get('problem','') + " " + p.get('solution','')).lower()
-            words = set(re.findall(r'\w+', text))
-            for w in words:
-                freq[w] = freq.get(w, 0) + 1
-        return freq
-    def _get_word_weight(self, word, structural_weight):
-        """Calculate semantic weight: rare words matter more."""
-        count = self.word_freq.get(word, 0)
-        if count == 0: return structural_weight
-        # Logarithmic scale for IPF: weight = 1 + log(total / count)
-        import math
-        ipf = 1.0 + math.log(len(self.patterns) / count)
-        return structural_weight * ipf
-    def _fuzzy_match(self, w1, w2):
-        """Lightweight Jaccard similarity for fuzzy matching."""
-        if w1 == w2: return 1.0
-        if len(w1) < 4 or len(w2) < 4: return 0.0
-        s1, s2 = set(w1), set(w2)
-        intersection = len(s1 & s2)
-        union = len(s1 | s2)
-        score = intersection / union
-        return score if score > 0.7 else 0.0
-    def _load_hits(self):
-        """Load magnitude-weighted hit data from disk."""
-        if os.path.exists(self.HIT_LOG_PATH):
-            with open(self.HIT_LOG_PATH, 'r') as f:
-                data = json.load(f)
-            # Backward compat: convert flat counts to magnitude format
-            for label, val in data.items():
-                if isinstance(val, (int, float)):
-                    data[label] = {"count": int(val), "magnitude": float(val) * 0.5, "layers": []}
-            return data
-        return {}
-    def _save_hits(self):
-        """Persist hit data to disk."""
-        with open(self.HIT_LOG_PATH, 'w') as f:
-            json.dump(self.hits, f, indent=2)
-    def _total_magnitude(self, hit_data):
-        """Get current magnitude with decay applied."""
-        if isinstance(hit_data, dict):
-            raw_mag = hit_data.get('magnitude', 0)
-            last_hit = hit_data.get('last_hit', 0)
-            if last_hit > 0 and raw_mag > 0:
-                elapsed = time.time() - last_hit
-                # Halve every DECAY_HALF_LIFE seconds
-                decay_factor = 0.5 ** (elapsed / self.DECAY_HALF_LIFE)
-                return raw_mag * decay_factor
-            return raw_mag
-        return float(hit_data) * 0.5  # backward compat
-    def _classify_relevance(self, relevance):
-        """Classify match into magnitude layer based on relevance score."""
-        if relevance >= 0.7:
-            return "conceptual", self.MAGNITUDE_LAYERS["conceptual"]
-        elif relevance >= 0.4:
-            return "structural", self.MAGNITUDE_LAYERS["structural"]
-        else:
-            return "surface", self.MAGNITUDE_LAYERS["surface"]
-    def _record_hit(self, label, relevance):
-        """Record a hit. Re-mention restores magnitude to peak."""
-        layer_name, magnitude = self._classify_relevance(relevance)
-        if label not in self.hits:
-            self.hits[label] = {"count": 0, "magnitude": 0.0, "peak": 0.0, "layers": [], "last_hit": 0}
-        h = self.hits[label]
-        h["count"] += 1
-        h["last_hit"] = time.time()
-        # Restore to peak first (re-mention recovery), then add new magnitude
-        current_peak = h.get("peak", h["magnitude"])
-        h["magnitude"] = current_peak + magnitude
-        h["peak"] = h["magnitude"]  # new peak
-        # Track which layers have been hit
-        if layer_name not in h["layers"]:
-            h["layers"].append(layer_name)
-    def get_status(self, label):
-        """Get distillation status based on decayed magnitude."""
-        hit_data = self.hits.get(label, {})
-        mag = self._total_magnitude(hit_data)  # applies decay
-        layers = hit_data.get('layers', []) if isinstance(hit_data, dict) else []
-        if mag == 0:
-            return "NEW"
-        elif mag < 1.0:
-            return "UNCONFIRMED"    # surface-only = might be noise
-        elif mag < 2.0:
-            return "PLAUSIBLE"
-        elif len(layers) >= 2:
-            return "DEEP_LOGIC"     # hit at multiple layers = real
-        else:
-            return "CONFIRMED"      # high magnitude single layer
-    def add_note(self, text, domain="NOTE", forced_label=None):
-        """Add a new pattern from freeform text. Self-organizing entry point."""
-        if forced_label:
-            label = forced_label
-        else:
-            # Auto-generate label from text
-            words = re.sub(r'[^a-zA-Z0-9\s]', '', text).upper().split()
-            # Take first 4 meaningful words for label
-            label_words = [w for w in words if len(w) > 2][:4]
-            label = "_".join(label_words) if label_words else "NOTE_" + str(int(time.time()))
-        # Don't overwrite existing patterns unless forced
-        if label in self.patterns and not forced_label:
-            label = label + "_" + str(int(time.time()) % 10000)
-        self.patterns[label] = {
-            "problem": text,
-            "solution": text,
-            "domain": domain,
-            "confidence": 0.5,  # starts neutral
-            "source": "notepad",
-            "type": "NOTE",
-            "created": time.time(),
-        }
-        # Initial hit at conceptual layer (you wrote it = you meant it)
-        self._record_hit(label, 1.0)
-        self._save_hits()
-        # Update threshold for new pattern count
-        self.base_threshold = 0.3 + (0.4 * min(len(self.patterns) / 200, 1.0))
-        return label
-    def load_lattice_patterns(self):
-        """Load existing 5 patterns from lattice."""
-        for label, data in LATTICE_PATTERNS.items():
-            self.patterns[label] = {
-                **data,
-                "source": "lattice",
-                "type": "CODE_PATTERN"
-            }
-    def load_conversation_patterns(self):
-        """Load 11 patterns from this conversation."""
-        for label, data in CONVERSATION_PATTERNS.items():
-            self.patterns[label] = {
-                **data,
-                "source": "conversation_0938ac6c",
-                "type": "INSIGHT"
-            }
-    def search(self, query, threshold=None, record=True):
-        """
-        Adaptive distillation search.
-        - Matches patterns using phrase + word relevance
-        - Integrates 384-dim semantic similarity from manifolds
-        - Records hits for matched patterns
-        """
-        if threshold is None:
-            threshold = self.base_threshold
-        results = []
-        query_lower = query.lower()
-        # 1. Generate Query Vector
-        query_vector = self.embedder.embed_text(query)
-        # 2. Hard matching patterns
-        STRUCTURAL_WORDS = { 'a', 'an', 'the', 'is', 'it', 'in', 'on', 'at', 'to', 'of', 'and', 'or', 'but' }
-        query_words = [(w, self._get_word_weight(w, 0.3 if w in STRUCTURAL_WORDS else 1.0)) for w in query_lower.split()]
-        links = re.findall(r'\[\[(\w+)\]\]', query_lower)
-        for label, pattern in self.patterns.items():
-            problem = pattern.get('problem', '').lower()
-            solution = pattern.get('solution', '').lower()
-            label_text = label.lower()
-            relevance = 0
-            # Semantic Boost (Manifold Pathfinding)
-            pattern_vector = self.pattern_vectors.get(label)
-            semantic_score = 0 # Initialize semantic_score
-            if pattern_vector:
-                semantic_score = self.embedder.cosine_similarity(query_vector, pattern_vector)
-                # Apply high weight to semantic resonance (The "LOVE" Anchor)
-                relevance += (semantic_score * 0.8)
-            # Exact phrase match (The 0x52 Anchor)
-            if query_lower in problem: relevance += 0.4
-            if query_lower in solution: relevance += 0.3
-            if query_lower in label_text: relevance += 0.5
-            # Link boost
-            if label.lower() in links: relevance += 2.0
-            # Combine logic
-            if relevance >= threshold:
-                status = self.get_status(label)
-                # Record magnitude-weighted hit
-                if record:
-                    self._record_hit(label, relevance)
-                hit_data = self.hits.get(label, {})
-                results.append({
-                    "label": label,
-                    "relevance": relevance,
-                    "confidence": pattern.get('confidence', 0.5),
-                    "status": status,
-                    "hits": hit_data.get('count', 0) if isinstance(hit_data, dict) else 0,
-                    "magnitude": self._total_magnitude(hit_data),
-                    "layers": hit_data.get('layers', []) if isinstance(hit_data, dict) else [],
-                    **pattern
-                })
-        # Sort by: confirmed first, then relevance, then confidence
-        status_order = {"DEEP_LOGIC": 4, "CONFIRMED": 3, "PLAUSIBLE": 2, "UNCONFIRMED": 1, "NEW": 0}
-        results.sort(key=lambda x: (
-            status_order.get(x.get('status', 'NEW'), 0),
-            x['relevance'],
-            x['confidence']
-        ), reverse=True)
-        # Save hits after search
-        if record:
-            self._save_hits()
-        return results
-    def distillation_report(self):
-        """Report on pattern distillation with magnitude layers."""
-        deep_logic = []
-        confirmed = []
-        plausible = []
-        unconfirmed = []
-        new_patterns = []
-        for label in self.patterns:
-            status = self.get_status(label)
-            hit_data = self.hits.get(label, {})
-            mag = self._total_magnitude(hit_data)
-            layers = hit_data.get('layers', []) if isinstance(hit_data, dict) else []
-            entry = (label, mag, layers)
-            if status == "DEEP_LOGIC":
-                deep_logic.append(entry)
-            elif status == "CONFIRMED":
-                confirmed.append(entry)
-            elif status == "PLAUSIBLE":
-                plausible.append(entry)
-            elif status == "UNCONFIRMED":
-                unconfirmed.append(entry)
-            else:
-                new_patterns.append(entry)
-        print(f"\n{'='*60}")
-        print(f"DISTILLATION REPORT (Magnitude Layers)")
-        print(f"{'='*60}")
-        print(f"Total patterns: {len(self.patterns)}")
-        print(f"  DEEP_LOGIC (multi-layer):  {len(deep_logic)} = verified across layers")
-        print(f"  CONFIRMED (mag >= 2.0):    {len(confirmed)} = strong signal")
-        print(f"  PLAUSIBLE (mag 1.0-2.0):   {len(plausible)} = growing")
-        print(f"  UNCONFIRMED (mag < 1.0):   {len(unconfirmed)} = potential noise")
-        print(f"  NEW (untested):            {len(new_patterns)}")
-        print(f"\nAdaptive threshold: {self.base_threshold:.2f}")
-        if deep_logic:
-            print(f"\nDEEP LOGIC (multi-layer verified):")
-            for label, mag, layers in sorted(deep_logic, key=lambda x: x[1], reverse=True):
-                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
-        if confirmed:
-            print(f"\nCONFIRMED (strong signal):")
-            for label, mag, layers in sorted(confirmed, key=lambda x: x[1], reverse=True):
-                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
-        if unconfirmed:
-            print(f"\nUNCONFIRMED (potential noise):")
-            for label, mag, layers in unconfirmed:
-                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
-        return {
-            "confirmed": len(confirmed),
-            "plausible": len(plausible),
-            "unconfirmed": len(unconfirmed),
-            "new": len(new_patterns),
-            "threshold": self.base_threshold
-        }
-    def save_to_json(self, path):
-        """Persist to JSON for inspection."""
-        with open(path, 'w') as f:
-            json.dump({
-                "total_patterns": len(self.patterns),
-                "sources": {
-                    "lattice": len(LATTICE_PATTERNS),
-                    "conversation": len(CONVERSATION_PATTERNS)
-                },
-                "patterns": self.patterns
-            }, f, indent=2)
-        print(f"\n💾 Saved index to: {path}")
-    def stats(self):
-        """Print statistics."""
-        print(f"\n{'='*60}")
-        print(f"IN-MEMORY PATTERN INDEX")
-        print(f"{'='*60}")
-        print(f"Total patterns: {len(self.patterns)}")
-        print(f"  From lattice: {len(LATTICE_PATTERNS)}")
-        print(f"  From conversation: {len(CONVERSATION_PATTERNS)}")
-        print(f"Average confidence: {sum(p.get('confidence', 0.5) for p in self.patterns.values()) / len(self.patterns):.0%}")
-        # Domain breakdown
-        domains = {}
-        for p in self.patterns.values():
-            d = p.get('domain', 'UNKNOWN')
-            domains[d] = domains.get(d, 0) + 1
-        print(f"\nDomains:")
-        for domain, count in sorted(domains.items(), key=lambda x: x[1], reverse=True):
-            print(f"  {domain}: {count}")
-if __name__ == "__main__":
-    index = InMemoryIndex()
-    index.stats()
-    # Save to JSON
-    save_path = os.path.join(index.LATTICE_DB_DIR, "in_memory_index.json")
-    index.save_to_json(save_path)
-    # Test search
-    print(f"\n{'='*60}")
-    print(f"TEST SEARCHES")
-    print(f"{'='*60}\n")
-    for query in ["singleton", "react", "lattice", "honest"]:
-        results = index.search(query)
-        print(f"Query: '{query}' → {len(results)} results")
-        if results:
-            print(f"  Top: {results[0]['label']} ({results[0]['confidence']:.0%})")
-        print()