File size: 4,730 Bytes
9e84bed
 
 
80d286d
9e84bed
 
 
 
 
 
 
 
 
80d286d
 
9e84bed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80d286d
9e84bed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80d286d
9e84bed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80d286d
9e84bed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# semantic_memory.py
import json, os, math, time
from typing import List, Dict, Any, Tuple
from sentence_transformers import SentenceTransformer, util

DEFAULT_STORE = "mem_store.json"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

EMOTION_WORDS = {
    "love": 0.25, "hate": 0.25, "excited": 0.2, "hyped": 0.2, "proud": 0.2,
    "stressed": 0.2, "angry": 0.2, "furious": 0.25, "grateful": 0.15,
    "dream": 0.15, "goal": 0.15, "mission": 0.15, "ambitious": 0.15
}

class SemanticMemory:
    """
    Vector memory with importance weighting, recency decay, and simple emotion boost.
    Persists to a small JSON file so it survives restarts.
    """
    def __init__(self, store_path: str = DEFAULT_STORE):
        self.store_path = store_path
        os.environ.setdefault("TRANSFORMERS_CACHE", "/home/user/.cache")
        self.model = SentenceTransformer(MODEL_NAME)
        self._load()

    # ---------- persistence ----------
    def _load(self):
        if os.path.exists(self.store_path):
            with open(self.store_path, "r") as f:
                self.store: List[Dict[str, Any]] = json.load(f)
        else:
            self.store = []
            self._flush()

    def _flush(self):
        with open(self.store_path, "w") as f:
            json.dump(self.store, f, indent=2)

    # ---------- scoring helpers ----------
    @staticmethod
    def _now() -> float:
        return time.time()

    @staticmethod
    def _base_weight_from_text(text: str) -> float:
        """
        0.2 base + features:
        - caps/emphasis
        - contains numbers (often facts)
        - emotion keywords
        """
        t = text.strip()
        weight = 0.2
        if any(c.isupper() for c in t) and sum(map(str.isupper, t)) > 6:
            weight += 0.15
        if any(ch.isdigit() for ch in t):
            weight += 0.1
        lower = t.lower()
        for w, boost in EMOTION_WORDS.items():
            if w in lower:
                weight += boost
        return max(0.2, min(weight, 1.0))

    @staticmethod
    def _decay(age_hours: float, half_life_hours: float = 48.0) -> float:
        """
        Exponential decay: 0.5 every `half_life_hours`.
        """
        if age_hours <= 0:
            return 1.0
        # factor = 0.5 ** (age/half_life)
        return 0.5 ** (age_hours / half_life_hours)

    # ---------- public API ----------
    def add(self, text: str, source: str = "user", tags: List[str] = None, weight: float = None):
        if not text or not text.strip():
            return
        tags = tags or []
        emb = self.model.encode(text, convert_to_tensor=True).tolist()
        w = weight if weight is not None else self._base_weight_from_text(text)
        item = {
            "text": text.strip(),
            "source": source,
            "tags": tags,
            "ts": self._now(),
            "weight": float(round(w, 4)),
            "embedding": emb
        }
        self.store.append(item)
        self._flush()

    def _torch_tensor(self, x):
        # Lazy import torch to keep import time snappy
        import torch
        return torch.tensor(x)

    def search(self, query: str, top_k: int = 5,
               alpha: float = 0.65, beta: float = 0.35) -> List[Tuple[Dict[str, Any], float]]:
        """
        Returns list of (memory_item, score) sorted by score desc.

        score = alpha * cosine_similarity + beta * (weight * recency_decay)
        """
        if not self.store:
            return []

        import torch
        q_emb = self.model.encode(query, convert_to_tensor=True)
        mem_embs = self._torch_tensor([m["embedding"] for m in self.store])

        sims = util.cos_sim(q_emb, mem_embs).squeeze(0)  # shape [N]

        now = self._now()
        scored: List[Tuple[int, float]] = []
        for i, m in enumerate(self.store):
            age_hours = (now - m["ts"]) / 3600.0
            decay = self._decay(age_hours)
            weighted = m["weight"] * decay
            score = float(alpha * sims[i].item() + beta * weighted)
            scored.append((i, score))

        scored.sort(key=lambda x: x[1], reverse=True)
        results: List[Tuple[Dict[str, Any], float]] = []
        for idx, sc in scored[:top_k]:
            results.append((self.store[idx], float(round(sc, 4))))
        return results

    def summarize_context(self, query: str, top_k: int = 5) -> str:
        """
        Lightweight summarizer over top_k hits.
        """
        hits = self.search(query, top_k=top_k)
        if not hits:
            return "No memory yet."
        bullets = []
        for m, sc in hits:
            bullets.append(f"- {m['text']}  (score: {sc})")
        return "Relevant memories:\n" + "\n".join(bullets)