FerrellSyntheticIntelligence commited on
Commit
9a93ed4
·
1 Parent(s): cdbaac1

feat: expose sovereign retrieval matrix and explainable tracking roots via Flask API

Browse files
Files changed (1) hide show
  1. src/core/retrieval_engine.py +73 -0
src/core/retrieval_engine.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import torch
5
+ import time
6
+
7
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
+
9
+ class LocalRetrievalEngine:
10
+ def __init__(self, model_name="all-MiniLM-L6-v2", storage_dir="storage/knowledge"):
11
+ from sentence_transformers import SentenceTransformer
12
+ self.embedder = SentenceTransformer(model_name)
13
+ self.storage_dir = os.path.join(os.getcwd(), storage_dir)
14
+ self.manifest_path = os.path.join(self.storage_dir, "chunks_manifest.json")
15
+ self.vectors_path = os.path.join(self.storage_dir, "vectors_cache.pt")
16
+
17
+ def _load_memory_vault(self):
18
+ """Loads local tensor arrays and structural manifests from the sovereign database."""
19
+ if not os.path.exists(self.manifest_path) or not os.path.exists(self.vectors_path):
20
+ print("[-] Retrieval Warning: Memory vault matrix files do not exist on disk yet.")
21
+ return None, None
22
+
23
+ with open(self.manifest_path, 'r', encoding='utf-8') as f:
24
+ manifest = json.load(f)
25
+ vectors = torch.load(self.vectors_path, map_location='cpu')
26
+ return manifest, vectors
27
+
28
+ def query(self, query_string, top_k=3, temporal_ceiling=None):
29
+ """Vectorizes user query offline and extracts the highest-affinity contextual matches."""
30
+ manifest, vectors = self._load_memory_vault()
31
+ if manifest is None or vectors is None:
32
+ return []
33
+
34
+ # Step 1: Compute query vector locally
35
+ query_vector = self.embedder.encode(query_string, convert_to_tensor=True, show_progress_bar=False).cpu()
36
+
37
+ # Step 2: Compute exact cosine similarities across the stacked tensor array
38
+ similarities = torch.nn.functional.cosine_similarity(vectors, query_vector.unsqueeze(0), dim=1)
39
+
40
+ # Step 3: Apply Temporal Constraints if active
41
+ if temporal_ceiling is not None:
42
+ valid_indices = [
43
+ idx for idx, chunk in enumerate(manifest)
44
+ if chunk.get('timestamp', 0) <= temporal_ceiling
45
+ ]
46
+ else:
47
+ valid_indices = list(range(len(manifest)))
48
+
49
+ if not valid_indices:
50
+ return []
51
+
52
+ # Isolate scores matching structural boundaries
53
+ filtered_similarities = similarities[valid_indices]
54
+
55
+ # Step 4: Extract top-K coordinate coordinates
56
+ actual_k = min(top_k, len(filtered_similarities))
57
+ top_results = torch.topk(filtered_similarities, actual_k)
58
+
59
+ matched_context = []
60
+ for score, local_idx in zip(top_results.values, top_results.indices):
61
+ actual_manifest_idx = valid_indices[local_idx.item()]
62
+ chunk_data = manifest[actual_manifest_idx].copy()
63
+ chunk_data['score'] = float(score.item())
64
+ matched_context.append(chunk_data)
65
+
66
+ return matched_context
67
+
68
+ if __name__ == "__main__":
69
+ # Internal baseline validation harness
70
+ retriever = LocalRetrievalEngine()
71
+ print("[*] Local Retrieval Subsystem Initialized. Testing internal matrix queries...")
72
+ sample_matches = retriever.query("quantum mechanics psychiatry architecture configuration", top_k=2)
73
+ print(f"[+] Operational Check: Extracted {len(sample_matches)} matches from local database.")