Spaces:
Runtime error
Runtime error
| # semantic_memory.py | |
| import json, os, math, time | |
| from typing import List, Dict, Any, Tuple | |
| from sentence_transformers import SentenceTransformer, util | |
| DEFAULT_STORE = "mem_store.json" | |
| MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| EMOTION_WORDS = { | |
| "love": 0.25, "hate": 0.25, "excited": 0.2, "hyped": 0.2, "proud": 0.2, | |
| "stressed": 0.2, "angry": 0.2, "furious": 0.25, "grateful": 0.15, | |
| "dream": 0.15, "goal": 0.15, "mission": 0.15, "ambitious": 0.15 | |
| } | |
| class SemanticMemory: | |
| """ | |
| Vector memory with importance weighting, recency decay, and simple emotion boost. | |
| Persists to a small JSON file so it survives restarts. | |
| """ | |
| def __init__(self, store_path: str = DEFAULT_STORE): | |
| self.store_path = store_path | |
| os.environ.setdefault("TRANSFORMERS_CACHE", "/home/user/.cache") | |
| self.model = SentenceTransformer(MODEL_NAME) | |
| self._load() | |
| # ---------- persistence ---------- | |
| def _load(self): | |
| if os.path.exists(self.store_path): | |
| with open(self.store_path, "r") as f: | |
| self.store: List[Dict[str, Any]] = json.load(f) | |
| else: | |
| self.store = [] | |
| self._flush() | |
| def _flush(self): | |
| with open(self.store_path, "w") as f: | |
| json.dump(self.store, f, indent=2) | |
| # ---------- scoring helpers ---------- | |
| def _now() -> float: | |
| return time.time() | |
| def _base_weight_from_text(text: str) -> float: | |
| """ | |
| 0.2 base + features: | |
| - caps/emphasis | |
| - contains numbers (often facts) | |
| - emotion keywords | |
| """ | |
| t = text.strip() | |
| weight = 0.2 | |
| if any(c.isupper() for c in t) and sum(map(str.isupper, t)) > 6: | |
| weight += 0.15 | |
| if any(ch.isdigit() for ch in t): | |
| weight += 0.1 | |
| lower = t.lower() | |
| for w, boost in EMOTION_WORDS.items(): | |
| if w in lower: | |
| weight += boost | |
| return max(0.2, min(weight, 1.0)) | |
| def _decay(age_hours: float, half_life_hours: float = 48.0) -> float: | |
| """ | |
| Exponential decay: 0.5 every `half_life_hours`. | |
| """ | |
| if age_hours <= 0: | |
| return 1.0 | |
| # factor = 0.5 ** (age/half_life) | |
| return 0.5 ** (age_hours / half_life_hours) | |
| # ---------- public API ---------- | |
| def add(self, text: str, source: str = "user", tags: List[str] = None, weight: float = None): | |
| if not text or not text.strip(): | |
| return | |
| tags = tags or [] | |
| emb = self.model.encode(text, convert_to_tensor=True).tolist() | |
| w = weight if weight is not None else self._base_weight_from_text(text) | |
| item = { | |
| "text": text.strip(), | |
| "source": source, | |
| "tags": tags, | |
| "ts": self._now(), | |
| "weight": float(round(w, 4)), | |
| "embedding": emb | |
| } | |
| self.store.append(item) | |
| self._flush() | |
| def _torch_tensor(self, x): | |
| # Lazy import torch to keep import time snappy | |
| import torch | |
| return torch.tensor(x) | |
| def search(self, query: str, top_k: int = 5, | |
| alpha: float = 0.65, beta: float = 0.35) -> List[Tuple[Dict[str, Any], float]]: | |
| """ | |
| Returns list of (memory_item, score) sorted by score desc. | |
| score = alpha * cosine_similarity + beta * (weight * recency_decay) | |
| """ | |
| if not self.store: | |
| return [] | |
| import torch | |
| q_emb = self.model.encode(query, convert_to_tensor=True) | |
| mem_embs = self._torch_tensor([m["embedding"] for m in self.store]) | |
| sims = util.cos_sim(q_emb, mem_embs).squeeze(0) # shape [N] | |
| now = self._now() | |
| scored: List[Tuple[int, float]] = [] | |
| for i, m in enumerate(self.store): | |
| age_hours = (now - m["ts"]) / 3600.0 | |
| decay = self._decay(age_hours) | |
| weighted = m["weight"] * decay | |
| score = float(alpha * sims[i].item() + beta * weighted) | |
| scored.append((i, score)) | |
| scored.sort(key=lambda x: x[1], reverse=True) | |
| results: List[Tuple[Dict[str, Any], float]] = [] | |
| for idx, sc in scored[:top_k]: | |
| results.append((self.store[idx], float(round(sc, 4)))) | |
| return results | |
| def summarize_context(self, query: str, top_k: int = 5) -> str: | |
| """ | |
| Lightweight summarizer over top_k hits. | |
| """ | |
| hits = self.search(query, top_k=top_k) | |
| if not hits: | |
| return "No memory yet." | |
| bullets = [] | |
| for m, sc in hits: | |
| bullets.append(f"- {m['text']} (score: {sc})") | |
| return "Relevant memories:\n" + "\n".join(bullets) |