Spaces:
Sleeping
Sleeping
Create semantic_memory.py
Browse files- semantic_memory.py +62 -0
semantic_memory.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json, os, time
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
|
| 5 |
+
def _cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
|
| 6 |
+
an = np.linalg.norm(a)
|
| 7 |
+
bn = np.linalg.norm(b)
|
| 8 |
+
if an == 0 or bn == 0:
|
| 9 |
+
return 0.0
|
| 10 |
+
return float(np.dot(a, b) / (an * bn))
|
| 11 |
+
|
| 12 |
+
class SemanticMemory:
|
| 13 |
+
"""
|
| 14 |
+
Per-agent vector memory: stores (text, category, embedding, ts)
|
| 15 |
+
Fast, JSON-backed, CPU-only.
|
| 16 |
+
"""
|
| 17 |
+
def __init__(self, agent_id: str, file_path: str = None, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
|
| 18 |
+
self.agent_id = agent_id
|
| 19 |
+
self.file_path = file_path or f"semantic_memory_{agent_id}.json"
|
| 20 |
+
self.model = SentenceTransformer(model_name)
|
| 21 |
+
self._init_store()
|
| 22 |
+
|
| 23 |
+
def _init_store(self):
|
| 24 |
+
if not os.path.exists(self.file_path):
|
| 25 |
+
with open(self.file_path, "w") as f:
|
| 26 |
+
json.dump({"entries": []}, f)
|
| 27 |
+
|
| 28 |
+
def _load(self):
|
| 29 |
+
with open(self.file_path, "r") as f:
|
| 30 |
+
return json.load(f)
|
| 31 |
+
|
| 32 |
+
def _save(self, obj):
|
| 33 |
+
with open(self.file_path, "w") as f:
|
| 34 |
+
json.dump(obj, f, indent=2)
|
| 35 |
+
|
| 36 |
+
def add(self, text: str, category: str):
|
| 37 |
+
store = self._load()
|
| 38 |
+
emb = self.model.encode([text])[0].tolist() # list[float]
|
| 39 |
+
store["entries"].append({
|
| 40 |
+
"text": text,
|
| 41 |
+
"category": category,
|
| 42 |
+
"embedding": emb,
|
| 43 |
+
"ts": int(time.time())
|
| 44 |
+
})
|
| 45 |
+
self._save(store)
|
| 46 |
+
|
| 47 |
+
def query(self, query_text: str, category: str = None, top_k: int = 5):
|
| 48 |
+
store = self._load()
|
| 49 |
+
entries = store.get("entries", [])
|
| 50 |
+
if category:
|
| 51 |
+
entries = [e for e in entries if e.get("category") == category]
|
| 52 |
+
if not entries:
|
| 53 |
+
return []
|
| 54 |
+
|
| 55 |
+
q = self.model.encode([query_text])[0]
|
| 56 |
+
scored = []
|
| 57 |
+
for e in entries:
|
| 58 |
+
v = np.array(e["embedding"], dtype=np.float32)
|
| 59 |
+
s = _cosine_sim(q, v)
|
| 60 |
+
scored.append((s, e))
|
| 61 |
+
scored.sort(key=lambda x: x[0], reverse=True)
|
| 62 |
+
return [e for (s, e) in scored[:top_k] if s > 0.35] # simple confidence cutoff
|