import os import json import uuid from typing import List, Dict import numpy as np from embedder import embed_texts STORE_JSON = "memories.jsonl" STORE_EMB_NPY = "memories.npy" def _load_store(): ids, texts = [], [] if os.path.exists(STORE_JSON): with open(STORE_JSON, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue obj = json.loads(line) ids.append(obj["id"]) texts.append(obj["text"]) if texts and os.path.exists(STORE_EMB_NPY): embs = np.load(STORE_EMB_NPY) else: embs = np.zeros((0, 0), dtype="float32") return ids, texts, embs def _save_store(ids, texts, embs): with open(STORE_JSON, "w", encoding="utf-8") as f: for i, t in zip(ids, texts): json.dump({"id": i, "text": t}, f, ensure_ascii=False) f.write("\n") np.save(STORE_EMB_NPY, embs) def add_memory(text: str): """Add one memory text to the store.""" text = (text or "").strip() if not text: return ids, texts, embs = _load_store() new_id = str(uuid.uuid4()) new_emb = embed_texts([text])[0] if embs.size == 0: embs = new_emb.reshape(1, -1) else: embs = np.vstack([embs, new_emb]) ids.append(new_id) texts.append(text) _save_store(ids, texts, embs) def search(query: str, k: int = 5) -> List[Dict]: """Return top-k most similar memories to the query.""" ids, texts, embs = _load_store() if not texts or embs.size == 0: return [] q_emb = embed_texts([query])[0] # embed_texts already returns normalized embeddings, but normalize again just in case q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-8) embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-8) sims = embs_norm @ q_emb idxs = np.argsort(-sims)[:k] results = [] for i in idxs: results.append( { "id": ids[int(i)], "text": texts[int(i)], "score": float(sims[int(i)]), } ) return results