shyamilicious commited on
Commit
dd50ca1
·
1 Parent(s): d8211f5

Fix_Semantic_Retrieval_Threshold

Browse files
Files changed (2) hide show
  1. config.yaml +1 -1
  2. src/neurohack_memory/system.py +8 -0
config.yaml CHANGED
@@ -13,7 +13,7 @@ vector:
13
  # Using a smaller model to avoid OOM on standard machines during demo
14
  embedding_model: "all-MiniLM-L6-v2"
15
  dim: 384
16
- similarity_threshold: 0.4 # Slightly lower for MiniLM
17
  evaluation:
18
  checkpoints: [100, 500, 937, 1000, 1200]
19
  recall_k: 6
 
13
  # Using a smaller model to avoid OOM on standard machines during demo
14
  embedding_model: "all-MiniLM-L6-v2"
15
  dim: 384
16
+ similarity_threshold: 0.5 # Filter out weak semantic matches (was 0.4)
17
  evaluation:
18
  checkpoints: [100, 500, 937, 1000, 1200]
19
  recall_k: 6
src/neurohack_memory/system.py CHANGED
@@ -108,10 +108,18 @@ class MemorySystem:
108
 
109
  def retrieve(self, query):
110
  cfgm = self.cfg["memory"]
 
 
 
111
  t = Timer.start()
112
  hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
113
  candidates = []
114
  for mid, base_score in hits:
 
 
 
 
 
115
  m = self._memory_cache.get(mid)
116
  if not m:
117
  continue
 
108
 
109
  def retrieve(self, query):
110
  cfgm = self.cfg["memory"]
111
+ cfgv = self.cfg.get("vector", {})
112
+ min_similarity = cfgv.get("similarity_threshold", 0.4)
113
+
114
  t = Timer.start()
115
  hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
116
  candidates = []
117
  for mid, base_score in hits:
118
+ # CRITICAL FIX: Filter out semantically irrelevant matches
119
+ # If base_score (cosine similarity) is below threshold, skip it
120
+ if base_score < min_similarity:
121
+ continue
122
+
123
  m = self._memory_cache.get(mid)
124
  if not m:
125
  continue