File size: 19,312 Bytes
d68c0f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
"""

IN-MEMORY PATTERN INDEX

Fast lookup without HDD writes - merge existing + conversation + Gemini chat patterns

"""
import sys
import os
import json
import time
import re

try:
    from System.semantic_embedder import SemanticEmbedder
except ImportError:
    try:
        from semantic_embedder import SemanticEmbedder
    except ImportError:
        # Final fallback for scripts in Shop/
        sys.path.append(os.path.dirname(os.path.abspath(__file__)))
        from semantic_embedder import SemanticEmbedder
# Existing 5 lattice patterns
LATTICE_PATTERNS = {
    "PATTERN_SINGLETON_DATABASE": {
        "lba": 8534859776,
        "domain": "SOFTWARE_ARCHITECTURE",
        "problem": "Need to ensure only one database connection exists",
        "solution": "Singleton pattern with thread-safe initialization",
        "reusability": 9,
        "confidence": 0.82
    },
    "PATTERN_REACT_HOOKS_DEPS": {
        "lba": 3371401216,
        "domain": "WEB_DEVELOPMENT",
        "problem": "React component not re-rendering when props change",
        "solution": "Add dependency array to useEffect",
        "reusability": 10,
        "confidence": 0.85
    }
}

CONVERSATION_PATTERNS = {
    "AGENT_IS_LATTICE": {
        "domain": "CONCEPTUAL",
        "problem": "Separation between agent and data structure",
        "solution": "Agent is non-orientable surface - no inside/outside separation",
        "confidence": 0.95
    }
}

class InMemoryIndex:
    """

    Adaptive Distillation Index.

    

    Tracks pattern hit counts to distinguish signal from noise:

    - Once-patterns (1 hit) = UNCONFIRMED (might be noise)

    - Twice-patterns (2 hits) = PLAUSIBLE

    - Multi-patterns (3+ hits) = CONFIRMED (logic)

    

    The lattice self-cleans through use. Signal persists, noise decays.

    """
    
    # Hit tracking file handled dynamically in __init__
    HIT_LOG_PATH = None
    
    # Magnitude layers: logic exists in layers
    # Layer 0: Surface (keyword substring match) = low magnitude
    # Layer 1: Structural (multi-word + domain match) = medium magnitude  
    # Layer 2: Conceptual (phrase match in problem/solution) = high magnitude
    # Decay: magnitude halves every DECAY_HALF_LIFE seconds without a hit
    DECAY_HALF_LIFE = 86400  # 24 hours
    
    MAGNITUDE_LAYERS = {
        "surface": 0.3,      # keyword substring match (low relevance)
        "structural": 0.6,   # multi-word + domain match (medium)
        "conceptual": 1.0,   # full phrase match in problem/solution (high)
    }
    
    def __init__(self):
        # Handle relative pathing for portability
        BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        self.LATTICE_DB_DIR = os.path.join(BASE_DIR, "Lattice_DB")
        self.HIT_LOG_PATH = os.path.join(self.LATTICE_DB_DIR, "pattern_hits.json")
        
        index_path = os.path.join(self.LATTICE_DB_DIR, "dual_anchor_index.json")
        
        if os.path.exists(index_path):
            with open(index_path, 'r') as f:
                data = json.load(f)
            self.patterns = data.get('patterns', {})
            sources = data.get('sources', {})
            print(f"[INDEX] Loaded {len(self.patterns)} dual-anchor patterns")
        else:
            # Fallback to original patterns
            self.patterns = {}
            self.load_lattice_patterns()
            self.load_conversation_patterns()
            print("[INDEX] Dual-anchor index not found, using original 16 patterns")
        
        # Load hit tracking (magnitude-weighted)
        self.hits = self._load_hits()
        
        # Calculate adaptive threshold based on pattern count
        self.base_threshold = 0.3 + (0.4 * min(len(self.patterns) / 200, 1.0))
        
        # Initialize Semantic Engine
        print("[INDEX] Initializing Semantic Manifold...")
        self.embedder = SemanticEmbedder()
        self.pattern_vectors = {}
        self._reindex_vectors()
        
        confirmed = sum(1 for h in self.hits.values() if self._total_magnitude(h) >= 2.0)
        unconfirmed = sum(1 for h in self.hits.values() if 0 < self._total_magnitude(h) < 1.0)
        print(f"[DISTILLER] Confirmed: {confirmed} | Unconfirmed: {unconfirmed} | Threshold: {self.base_threshold:.2f}")
        self.word_freq = self._calculate_word_freq()

    def _reindex_vectors(self):
        """Pre-calculates semantic embeddings for all known patterns."""
        print(f"[INDEX]: Generating embeddings for {len(self.patterns)} patterns...")
        for label, p in self.patterns.items():
            # Combine problem + solution for semantic context
            context = f"{p.get('problem', '')} {p.get('solution', '')} {label}"
            self.pattern_vectors[label] = self.embedder.embed_text(context)
        print(f"[INDEX]: ✅ Semantic manifold mapped ({len(self.pattern_vectors)} vectors).")

    def _calculate_word_freq(self):
        """Calculate inverse pattern frequency (IPF) for lean semantic weighting."""
        freq = {}
        for p in self.patterns.values():
            text = (p.get('problem','') + " " + p.get('solution','')).lower()
            words = set(re.findall(r'\w+', text))
            for w in words:
                freq[w] = freq.get(w, 0) + 1
        return freq

    def _get_word_weight(self, word, structural_weight):
        """Calculate semantic weight: rare words matter more."""
        count = self.word_freq.get(word, 0)
        if count == 0: return structural_weight
        # Logarithmic scale for IPF: weight = 1 + log(total / count)
        import math
        ipf = 1.0 + math.log(len(self.patterns) / count)
        return structural_weight * ipf

    def _fuzzy_match(self, w1, w2):
        """Lightweight Jaccard similarity for fuzzy matching."""
        if w1 == w2: return 1.0
        if len(w1) < 4 or len(w2) < 4: return 0.0
        s1, s2 = set(w1), set(w2)
        intersection = len(s1 & s2)
        union = len(s1 | s2)
        score = intersection / union
        return score if score > 0.7 else 0.0
    
    def _load_hits(self):
        """Load magnitude-weighted hit data from disk."""
        if os.path.exists(self.HIT_LOG_PATH):
            with open(self.HIT_LOG_PATH, 'r') as f:
                data = json.load(f)
            # Backward compat: convert flat counts to magnitude format
            for label, val in data.items():
                if isinstance(val, (int, float)):
                    data[label] = {"count": int(val), "magnitude": float(val) * 0.5, "layers": []}
            return data
        return {}
    
    def _save_hits(self):
        """Persist hit data to disk."""
        with open(self.HIT_LOG_PATH, 'w') as f:
            json.dump(self.hits, f, indent=2)
    
    def _total_magnitude(self, hit_data):
        """Get current magnitude with decay applied."""
        if isinstance(hit_data, dict):
            raw_mag = hit_data.get('magnitude', 0)
            last_hit = hit_data.get('last_hit', 0)
            if last_hit > 0 and raw_mag > 0:
                elapsed = time.time() - last_hit
                # Halve every DECAY_HALF_LIFE seconds
                decay_factor = 0.5 ** (elapsed / self.DECAY_HALF_LIFE)
                return raw_mag * decay_factor
            return raw_mag
        return float(hit_data) * 0.5  # backward compat
    
    def _classify_relevance(self, relevance):
        """Classify match into magnitude layer based on relevance score."""
        if relevance >= 0.7:
            return "conceptual", self.MAGNITUDE_LAYERS["conceptual"]
        elif relevance >= 0.4:
            return "structural", self.MAGNITUDE_LAYERS["structural"]
        else:
            return "surface", self.MAGNITUDE_LAYERS["surface"]
    
    def _record_hit(self, label, relevance):
        """Record a hit. Re-mention restores magnitude to peak."""
        layer_name, magnitude = self._classify_relevance(relevance)
        
        if label not in self.hits:
            self.hits[label] = {"count": 0, "magnitude": 0.0, "peak": 0.0, "layers": [], "last_hit": 0}
        
        h = self.hits[label]
        h["count"] += 1
        h["last_hit"] = time.time()
        
        # Restore to peak first (re-mention recovery), then add new magnitude
        current_peak = h.get("peak", h["magnitude"])
        h["magnitude"] = current_peak + magnitude
        h["peak"] = h["magnitude"]  # new peak
        
        # Track which layers have been hit
        if layer_name not in h["layers"]:
            h["layers"].append(layer_name)
    
    def get_status(self, label):
        """Get distillation status based on decayed magnitude."""
        hit_data = self.hits.get(label, {})
        mag = self._total_magnitude(hit_data)  # applies decay
        layers = hit_data.get('layers', []) if isinstance(hit_data, dict) else []
        
        if mag == 0:
            return "NEW"
        elif mag < 1.0:
            return "UNCONFIRMED"    # surface-only = might be noise
        elif mag < 2.0:
            return "PLAUSIBLE"
        elif len(layers) >= 2:
            return "DEEP_LOGIC"     # hit at multiple layers = real
        else:
            return "CONFIRMED"      # high magnitude single layer
    
    def add_note(self, text, domain="NOTE", forced_label=None):
        """Add a new pattern from freeform text. Self-organizing entry point."""
        if forced_label:
            label = forced_label
        else:
            # Auto-generate label from text
            words = re.sub(r'[^a-zA-Z0-9\s]', '', text).upper().split()
            # Take first 4 meaningful words for label
            label_words = [w for w in words if len(w) > 2][:4]
            label = "_".join(label_words) if label_words else "NOTE_" + str(int(time.time()))
        
        # Don't overwrite existing patterns unless forced
        if label in self.patterns and not forced_label:
            label = label + "_" + str(int(time.time()) % 10000)
        
        self.patterns[label] = {
            "problem": text,
            "solution": text,
            "domain": domain,
            "confidence": 0.5,  # starts neutral
            "source": "notepad",
            "type": "NOTE",
            "created": time.time(),
        }
        
        # Initial hit at conceptual layer (you wrote it = you meant it)
        self._record_hit(label, 1.0)
        self._save_hits()
        
        # Update threshold for new pattern count
        self.base_threshold = 0.3 + (0.4 * min(len(self.patterns) / 200, 1.0))
        
        return label
        
    def load_lattice_patterns(self):
        """Load existing 5 patterns from lattice."""
        for label, data in LATTICE_PATTERNS.items():
            self.patterns[label] = {
                **data,
                "source": "lattice",
                "type": "CODE_PATTERN"
            }
    
    def load_conversation_patterns(self):
        """Load 11 patterns from this conversation."""
        for label, data in CONVERSATION_PATTERNS.items():
            self.patterns[label] = {
                **data,
                "source": "conversation_0938ac6c",
                "type": "INSIGHT"
            }
    
    def search(self, query, threshold=None, record=True):
        """

        Adaptive distillation search.

        

        - Matches patterns using phrase + word relevance

        - Integrates 384-dim semantic similarity from manifolds

        - Records hits for matched patterns

        """
        if threshold is None:
            threshold = self.base_threshold
            
        results = []
        query_lower = query.lower()
        
        # 1. Generate Query Vector
        query_vector = self.embedder.embed_text(query)
        
        # 2. Hard matching patterns
        STRUCTURAL_WORDS = { 'a', 'an', 'the', 'is', 'it', 'in', 'on', 'at', 'to', 'of', 'and', 'or', 'but' }
        query_words = [(w, self._get_word_weight(w, 0.3 if w in STRUCTURAL_WORDS else 1.0)) for w in query_lower.split()]
        links = re.findall(r'\[\[(\w+)\]\]', query_lower)
        
        for label, pattern in self.patterns.items():
            problem = pattern.get('problem', '').lower()
            solution = pattern.get('solution', '').lower()
            label_text = label.lower()
            
            relevance = 0
            
            # Semantic Boost (Manifold Pathfinding)
            pattern_vector = self.pattern_vectors.get(label)
            semantic_score = 0 # Initialize semantic_score
            if pattern_vector:
                semantic_score = self.embedder.cosine_similarity(query_vector, pattern_vector)
                # Apply high weight to semantic resonance (The "LOVE" Anchor)
                relevance += (semantic_score * 0.8)
            
            # Exact phrase match (The 0x52 Anchor)
            if query_lower in problem: relevance += 0.4
            if query_lower in solution: relevance += 0.3
            if query_lower in label_text: relevance += 0.5
            
            # Link boost
            if label.lower() in links: relevance += 2.0
            
            # Combine logic
            if relevance >= threshold:
                status = self.get_status(label)
                
                # Record magnitude-weighted hit
                if record:
                    self._record_hit(label, relevance)
                
                hit_data = self.hits.get(label, {})
                results.append({
                    "label": label,
                    "relevance": relevance,
                    "confidence": pattern.get('confidence', 0.5),
                    "status": status,
                    "hits": hit_data.get('count', 0) if isinstance(hit_data, dict) else 0,
                    "magnitude": self._total_magnitude(hit_data),
                    "layers": hit_data.get('layers', []) if isinstance(hit_data, dict) else [],
                    **pattern
                })
        
        # Sort by: confirmed first, then relevance, then confidence
        status_order = {"DEEP_LOGIC": 4, "CONFIRMED": 3, "PLAUSIBLE": 2, "UNCONFIRMED": 1, "NEW": 0}
        results.sort(key=lambda x: (
            status_order.get(x.get('status', 'NEW'), 0),
            x['relevance'],
            x['confidence']
        ), reverse=True)
        
        # Save hits after search
        if record:
            self._save_hits()
        
        return results
    
    def distillation_report(self):
        """Report on pattern distillation with magnitude layers."""
        deep_logic = []
        confirmed = []
        plausible = []
        unconfirmed = []
        new_patterns = []
        
        for label in self.patterns:
            status = self.get_status(label)
            hit_data = self.hits.get(label, {})
            mag = self._total_magnitude(hit_data)
            layers = hit_data.get('layers', []) if isinstance(hit_data, dict) else []
            
            entry = (label, mag, layers)
            if status == "DEEP_LOGIC":
                deep_logic.append(entry)
            elif status == "CONFIRMED":
                confirmed.append(entry)
            elif status == "PLAUSIBLE":
                plausible.append(entry)
            elif status == "UNCONFIRMED":
                unconfirmed.append(entry)
            else:
                new_patterns.append(entry)
        
        print(f"\n{'='*60}")
        print(f"DISTILLATION REPORT (Magnitude Layers)")
        print(f"{'='*60}")
        print(f"Total patterns: {len(self.patterns)}")
        print(f"  DEEP_LOGIC (multi-layer):  {len(deep_logic)} = verified across layers")
        print(f"  CONFIRMED (mag >= 2.0):    {len(confirmed)} = strong signal")
        print(f"  PLAUSIBLE (mag 1.0-2.0):   {len(plausible)} = growing")
        print(f"  UNCONFIRMED (mag < 1.0):   {len(unconfirmed)} = potential noise")
        print(f"  NEW (untested):            {len(new_patterns)}")
        print(f"\nAdaptive threshold: {self.base_threshold:.2f}")
        
        if deep_logic:
            print(f"\nDEEP LOGIC (multi-layer verified):")
            for label, mag, layers in sorted(deep_logic, key=lambda x: x[1], reverse=True):
                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
        
        if confirmed:
            print(f"\nCONFIRMED (strong signal):")
            for label, mag, layers in sorted(confirmed, key=lambda x: x[1], reverse=True):
                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
        
        if unconfirmed:
            print(f"\nUNCONFIRMED (potential noise):")
            for label, mag, layers in unconfirmed:
                print(f"  [mag:{mag:.1f}] [{'+'.join(layers)}] {label}")
        
        return {
            "confirmed": len(confirmed),
            "plausible": len(plausible),
            "unconfirmed": len(unconfirmed),
            "new": len(new_patterns),
            "threshold": self.base_threshold
        }
    
    def save_to_json(self, path):
        """Persist to JSON for inspection."""
        with open(path, 'w') as f:
            json.dump({
                "total_patterns": len(self.patterns),
                "sources": {
                    "lattice": len(LATTICE_PATTERNS),
                    "conversation": len(CONVERSATION_PATTERNS)
                },
                "patterns": self.patterns
            }, f, indent=2)
        print(f"\n💾 Saved index to: {path}")
    
    def stats(self):
        """Print statistics."""
        print(f"\n{'='*60}")
        print(f"IN-MEMORY PATTERN INDEX")
        print(f"{'='*60}")
        print(f"Total patterns: {len(self.patterns)}")
        print(f"  From lattice: {len(LATTICE_PATTERNS)}")
        print(f"  From conversation: {len(CONVERSATION_PATTERNS)}")
        print(f"Average confidence: {sum(p.get('confidence', 0.5) for p in self.patterns.values()) / len(self.patterns):.0%}")
        
        # Domain breakdown
        domains = {}
        for p in self.patterns.values():
            d = p.get('domain', 'UNKNOWN')
            domains[d] = domains.get(d, 0) + 1
        
        print(f"\nDomains:")
        for domain, count in sorted(domains.items(), key=lambda x: x[1], reverse=True):
            print(f"  {domain}: {count}")

if __name__ == "__main__":
    index = InMemoryIndex()
    index.stats()
    
    # Save to JSON
    save_path = os.path.join(index.LATTICE_DB_DIR, "in_memory_index.json")
    index.save_to_json(save_path)
    
    # Test search
    print(f"\n{'='*60}")
    print(f"TEST SEARCHES")
    print(f"{'='*60}\n")
    
    for query in ["singleton", "react", "lattice", "honest"]:
        results = index.search(query)
        print(f"Query: '{query}' → {len(results)} results")
        if results:
            print(f"  Top: {results[0]['label']} ({results[0]['confidence']:.0%})")
        print()