File size: 2,183 Bytes
83661a8
 
 
 
 
6df4ebe
 
 
83661a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import json
import uuid
from typing import List, Dict

import numpy as np
from embedder import embed_texts

STORE_JSON = "memories.jsonl"
STORE_EMB_NPY = "memories.npy"


def _load_store():
    ids, texts = [], []
    if os.path.exists(STORE_JSON):
        with open(STORE_JSON, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                obj = json.loads(line)
                ids.append(obj["id"])
                texts.append(obj["text"])

    if texts and os.path.exists(STORE_EMB_NPY):
        embs = np.load(STORE_EMB_NPY)
    else:
        embs = np.zeros((0, 0), dtype="float32")

    return ids, texts, embs


def _save_store(ids, texts, embs):
    with open(STORE_JSON, "w", encoding="utf-8") as f:
        for i, t in zip(ids, texts):
            json.dump({"id": i, "text": t}, f, ensure_ascii=False)
            f.write("\n")
    np.save(STORE_EMB_NPY, embs)


def add_memory(text: str):
    """Add one memory text to the store."""
    text = (text or "").strip()
    if not text:
        return

    ids, texts, embs = _load_store()
    new_id = str(uuid.uuid4())
    new_emb = embed_texts([text])[0]

    if embs.size == 0:
        embs = new_emb.reshape(1, -1)
    else:
        embs = np.vstack([embs, new_emb])

    ids.append(new_id)
    texts.append(text)
    _save_store(ids, texts, embs)


def search(query: str, k: int = 5) -> List[Dict]:
    """Return top-k most similar memories to the query."""
    ids, texts, embs = _load_store()
    if not texts or embs.size == 0:
        return []

    q_emb = embed_texts([query])[0]
    # embed_texts already returns normalized embeddings, but normalize again just in case
    q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-8)
    embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-8)

    sims = embs_norm @ q_emb
    idxs = np.argsort(-sims)[:k]

    results = []
    for i in idxs:
        results.append(
            {
                "id": ids[int(i)],
                "text": texts[int(i)],
                "score": float(sims[int(i)]),
            }
        )
    return results