Spaces:

shyamilicious
/

neuro-hack

Sleeping

shyamilicious commited on Feb 8

Commit

dd50ca1

1 Parent(s): d8211f5

Fix_Semantic_Retrieval_Threshold

Files changed (2) hide show

config.yaml CHANGED Viewed

@@ -13,7 +13,7 @@ vector:
   # Using a smaller model to avoid OOM on standard machines during demo
   embedding_model: "all-MiniLM-L6-v2"
   dim: 384
-  similarity_threshold: 0.4 # Slightly lower for MiniLM
 evaluation:
   checkpoints: [100, 500, 937, 1000, 1200]
   recall_k: 6

   # Using a smaller model to avoid OOM on standard machines during demo
   embedding_model: "all-MiniLM-L6-v2"
   dim: 384
+  similarity_threshold: 0.5 # Filter out weak semantic matches (was 0.4)
 evaluation:
   checkpoints: [100, 500, 937, 1000, 1200]
   recall_k: 6

src/neurohack_memory/system.py CHANGED Viewed

@@ -108,10 +108,18 @@ class MemorySystem:
     def retrieve(self, query):
         cfgm = self.cfg["memory"]
         t = Timer.start()
         hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
         candidates = []
         for mid, base_score in hits:
             m = self._memory_cache.get(mid)
             if not m:
                 continue

     def retrieve(self, query):
         cfgm = self.cfg["memory"]
+        cfgv = self.cfg.get("vector", {})
+        min_similarity = cfgv.get("similarity_threshold", 0.4)
         t = Timer.start()
         hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
         candidates = []
         for mid, base_score in hits:
+            # CRITICAL FIX: Filter out semantically irrelevant matches
+            # If base_score (cosine similarity) is below threshold, skip it
+            if base_score < min_similarity:
+                continue
             m = self._memory_cache.get(mid)
             if not m:
                 continue