Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import uuid | |
| from typing import List, Dict | |
| import numpy as np | |
| from embedder import embed_texts | |
| STORE_JSON = "memories.jsonl" | |
| STORE_EMB_NPY = "memories.npy" | |
| def _load_store(): | |
| ids, texts = [], [] | |
| if os.path.exists(STORE_JSON): | |
| with open(STORE_JSON, "r", encoding="utf-8") as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| obj = json.loads(line) | |
| ids.append(obj["id"]) | |
| texts.append(obj["text"]) | |
| if texts and os.path.exists(STORE_EMB_NPY): | |
| embs = np.load(STORE_EMB_NPY) | |
| else: | |
| embs = np.zeros((0, 0), dtype="float32") | |
| return ids, texts, embs | |
| def _save_store(ids, texts, embs): | |
| with open(STORE_JSON, "w", encoding="utf-8") as f: | |
| for i, t in zip(ids, texts): | |
| json.dump({"id": i, "text": t}, f, ensure_ascii=False) | |
| f.write("\n") | |
| np.save(STORE_EMB_NPY, embs) | |
| def add_memory(text: str): | |
| """Add one memory text to the store.""" | |
| text = (text or "").strip() | |
| if not text: | |
| return | |
| ids, texts, embs = _load_store() | |
| new_id = str(uuid.uuid4()) | |
| new_emb = embed_texts([text])[0] | |
| if embs.size == 0: | |
| embs = new_emb.reshape(1, -1) | |
| else: | |
| embs = np.vstack([embs, new_emb]) | |
| ids.append(new_id) | |
| texts.append(text) | |
| _save_store(ids, texts, embs) | |
| def search(query: str, k: int = 5) -> List[Dict]: | |
| """Return top-k most similar memories to the query.""" | |
| ids, texts, embs = _load_store() | |
| if not texts or embs.size == 0: | |
| return [] | |
| q_emb = embed_texts([query])[0] | |
| # embed_texts already returns normalized embeddings, but normalize again just in case | |
| q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-8) | |
| embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-8) | |
| sims = embs_norm @ q_emb | |
| idxs = np.argsort(-sims)[:k] | |
| results = [] | |
| for i in idxs: | |
| results.append( | |
| { | |
| "id": ids[int(i)], | |
| "text": texts[int(i)], | |
| "score": float(sims[int(i)]), | |
| } | |
| ) | |
| return results | |