MnemoSense / rag_store.py
Vineetha00's picture
Update rag_store.py
83661a8 verified
import os
import json
import uuid
from typing import List, Dict
import numpy as np
from embedder import embed_texts
STORE_JSON = "memories.jsonl"
STORE_EMB_NPY = "memories.npy"
def _load_store():
ids, texts = [], []
if os.path.exists(STORE_JSON):
with open(STORE_JSON, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
obj = json.loads(line)
ids.append(obj["id"])
texts.append(obj["text"])
if texts and os.path.exists(STORE_EMB_NPY):
embs = np.load(STORE_EMB_NPY)
else:
embs = np.zeros((0, 0), dtype="float32")
return ids, texts, embs
def _save_store(ids, texts, embs):
with open(STORE_JSON, "w", encoding="utf-8") as f:
for i, t in zip(ids, texts):
json.dump({"id": i, "text": t}, f, ensure_ascii=False)
f.write("\n")
np.save(STORE_EMB_NPY, embs)
def add_memory(text: str):
"""Add one memory text to the store."""
text = (text or "").strip()
if not text:
return
ids, texts, embs = _load_store()
new_id = str(uuid.uuid4())
new_emb = embed_texts([text])[0]
if embs.size == 0:
embs = new_emb.reshape(1, -1)
else:
embs = np.vstack([embs, new_emb])
ids.append(new_id)
texts.append(text)
_save_store(ids, texts, embs)
def search(query: str, k: int = 5) -> List[Dict]:
"""Return top-k most similar memories to the query."""
ids, texts, embs = _load_store()
if not texts or embs.size == 0:
return []
q_emb = embed_texts([query])[0]
# embed_texts already returns normalized embeddings, but normalize again just in case
q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-8)
embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-8)
sims = embs_norm @ q_emb
idxs = np.argsort(-sims)[:k]
results = []
for i in idxs:
results.append(
{
"id": ids[int(i)],
"text": texts[int(i)],
"score": float(sims[int(i)]),
}
)
return results