Spaces:
Runtime error
Runtime error
File size: 4,730 Bytes
9e84bed 80d286d 9e84bed 80d286d 9e84bed 80d286d 9e84bed 80d286d 9e84bed 80d286d 9e84bed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# semantic_memory.py
import json, os, math, time
from typing import List, Dict, Any, Tuple
from sentence_transformers import SentenceTransformer, util
DEFAULT_STORE = "mem_store.json"
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMOTION_WORDS = {
"love": 0.25, "hate": 0.25, "excited": 0.2, "hyped": 0.2, "proud": 0.2,
"stressed": 0.2, "angry": 0.2, "furious": 0.25, "grateful": 0.15,
"dream": 0.15, "goal": 0.15, "mission": 0.15, "ambitious": 0.15
}
class SemanticMemory:
"""
Vector memory with importance weighting, recency decay, and simple emotion boost.
Persists to a small JSON file so it survives restarts.
"""
def __init__(self, store_path: str = DEFAULT_STORE):
self.store_path = store_path
os.environ.setdefault("TRANSFORMERS_CACHE", "/home/user/.cache")
self.model = SentenceTransformer(MODEL_NAME)
self._load()
# ---------- persistence ----------
def _load(self):
if os.path.exists(self.store_path):
with open(self.store_path, "r") as f:
self.store: List[Dict[str, Any]] = json.load(f)
else:
self.store = []
self._flush()
def _flush(self):
with open(self.store_path, "w") as f:
json.dump(self.store, f, indent=2)
# ---------- scoring helpers ----------
@staticmethod
def _now() -> float:
return time.time()
@staticmethod
def _base_weight_from_text(text: str) -> float:
"""
0.2 base + features:
- caps/emphasis
- contains numbers (often facts)
- emotion keywords
"""
t = text.strip()
weight = 0.2
if any(c.isupper() for c in t) and sum(map(str.isupper, t)) > 6:
weight += 0.15
if any(ch.isdigit() for ch in t):
weight += 0.1
lower = t.lower()
for w, boost in EMOTION_WORDS.items():
if w in lower:
weight += boost
return max(0.2, min(weight, 1.0))
@staticmethod
def _decay(age_hours: float, half_life_hours: float = 48.0) -> float:
"""
Exponential decay: 0.5 every `half_life_hours`.
"""
if age_hours <= 0:
return 1.0
# factor = 0.5 ** (age/half_life)
return 0.5 ** (age_hours / half_life_hours)
# ---------- public API ----------
def add(self, text: str, source: str = "user", tags: List[str] = None, weight: float = None):
if not text or not text.strip():
return
tags = tags or []
emb = self.model.encode(text, convert_to_tensor=True).tolist()
w = weight if weight is not None else self._base_weight_from_text(text)
item = {
"text": text.strip(),
"source": source,
"tags": tags,
"ts": self._now(),
"weight": float(round(w, 4)),
"embedding": emb
}
self.store.append(item)
self._flush()
def _torch_tensor(self, x):
# Lazy import torch to keep import time snappy
import torch
return torch.tensor(x)
def search(self, query: str, top_k: int = 5,
alpha: float = 0.65, beta: float = 0.35) -> List[Tuple[Dict[str, Any], float]]:
"""
Returns list of (memory_item, score) sorted by score desc.
score = alpha * cosine_similarity + beta * (weight * recency_decay)
"""
if not self.store:
return []
import torch
q_emb = self.model.encode(query, convert_to_tensor=True)
mem_embs = self._torch_tensor([m["embedding"] for m in self.store])
sims = util.cos_sim(q_emb, mem_embs).squeeze(0) # shape [N]
now = self._now()
scored: List[Tuple[int, float]] = []
for i, m in enumerate(self.store):
age_hours = (now - m["ts"]) / 3600.0
decay = self._decay(age_hours)
weighted = m["weight"] * decay
score = float(alpha * sims[i].item() + beta * weighted)
scored.append((i, score))
scored.sort(key=lambda x: x[1], reverse=True)
results: List[Tuple[Dict[str, Any], float]] = []
for idx, sc in scored[:top_k]:
results.append((self.store[idx], float(round(sc, 4))))
return results
def summarize_context(self, query: str, top_k: int = 5) -> str:
"""
Lightweight summarizer over top_k hits.
"""
hits = self.search(query, top_k=top_k)
if not hits:
return "No memory yet."
bullets = []
for m, sc in hits:
bullets.append(f"- {m['text']} (score: {sc})")
return "Relevant memories:\n" + "\n".join(bullets) |