Spaces:
Sleeping
Sleeping
Commit ·
dd50ca1
1
Parent(s): d8211f5
Fix_Semantic_Retrieval_Threshold
Browse files- config.yaml +1 -1
- src/neurohack_memory/system.py +8 -0
config.yaml
CHANGED
|
@@ -13,7 +13,7 @@ vector:
|
|
| 13 |
# Using a smaller model to avoid OOM on standard machines during demo
|
| 14 |
embedding_model: "all-MiniLM-L6-v2"
|
| 15 |
dim: 384
|
| 16 |
-
similarity_threshold: 0.
|
| 17 |
evaluation:
|
| 18 |
checkpoints: [100, 500, 937, 1000, 1200]
|
| 19 |
recall_k: 6
|
|
|
|
| 13 |
# Using a smaller model to avoid OOM on standard machines during demo
|
| 14 |
embedding_model: "all-MiniLM-L6-v2"
|
| 15 |
dim: 384
|
| 16 |
+
similarity_threshold: 0.5 # Filter out weak semantic matches (was 0.4)
|
| 17 |
evaluation:
|
| 18 |
checkpoints: [100, 500, 937, 1000, 1200]
|
| 19 |
recall_k: 6
|
src/neurohack_memory/system.py
CHANGED
|
@@ -108,10 +108,18 @@ class MemorySystem:
|
|
| 108 |
|
| 109 |
def retrieve(self, query):
|
| 110 |
cfgm = self.cfg["memory"]
|
|
|
|
|
|
|
|
|
|
| 111 |
t = Timer.start()
|
| 112 |
hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
|
| 113 |
candidates = []
|
| 114 |
for mid, base_score in hits:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
m = self._memory_cache.get(mid)
|
| 116 |
if not m:
|
| 117 |
continue
|
|
|
|
| 108 |
|
| 109 |
def retrieve(self, query):
|
| 110 |
cfgm = self.cfg["memory"]
|
| 111 |
+
cfgv = self.cfg.get("vector", {})
|
| 112 |
+
min_similarity = cfgv.get("similarity_threshold", 0.4)
|
| 113 |
+
|
| 114 |
t = Timer.start()
|
| 115 |
hits = self.vindex.search(query, top_k=max(10, cfgm["top_k"]*3))
|
| 116 |
candidates = []
|
| 117 |
for mid, base_score in hits:
|
| 118 |
+
# CRITICAL FIX: Filter out semantically irrelevant matches
|
| 119 |
+
# If base_score (cosine similarity) is below threshold, skip it
|
| 120 |
+
if base_score < min_similarity:
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
m = self._memory_cache.get(mid)
|
| 124 |
if not m:
|
| 125 |
continue
|