# semantic_memory.py import json, os, math, time from typing import List, Dict, Any, Tuple from sentence_transformers import SentenceTransformer, util DEFAULT_STORE = "mem_store.json" MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" EMOTION_WORDS = { "love": 0.25, "hate": 0.25, "excited": 0.2, "hyped": 0.2, "proud": 0.2, "stressed": 0.2, "angry": 0.2, "furious": 0.25, "grateful": 0.15, "dream": 0.15, "goal": 0.15, "mission": 0.15, "ambitious": 0.15 } class SemanticMemory: """ Vector memory with importance weighting, recency decay, and simple emotion boost. Persists to a small JSON file so it survives restarts. """ def __init__(self, store_path: str = DEFAULT_STORE): self.store_path = store_path os.environ.setdefault("TRANSFORMERS_CACHE", "/home/user/.cache") self.model = SentenceTransformer(MODEL_NAME) self._load() # ---------- persistence ---------- def _load(self): if os.path.exists(self.store_path): with open(self.store_path, "r") as f: self.store: List[Dict[str, Any]] = json.load(f) else: self.store = [] self._flush() def _flush(self): with open(self.store_path, "w") as f: json.dump(self.store, f, indent=2) # ---------- scoring helpers ---------- @staticmethod def _now() -> float: return time.time() @staticmethod def _base_weight_from_text(text: str) -> float: """ 0.2 base + features: - caps/emphasis - contains numbers (often facts) - emotion keywords """ t = text.strip() weight = 0.2 if any(c.isupper() for c in t) and sum(map(str.isupper, t)) > 6: weight += 0.15 if any(ch.isdigit() for ch in t): weight += 0.1 lower = t.lower() for w, boost in EMOTION_WORDS.items(): if w in lower: weight += boost return max(0.2, min(weight, 1.0)) @staticmethod def _decay(age_hours: float, half_life_hours: float = 48.0) -> float: """ Exponential decay: 0.5 every `half_life_hours`. """ if age_hours <= 0: return 1.0 # factor = 0.5 ** (age/half_life) return 0.5 ** (age_hours / half_life_hours) # ---------- public API ---------- def add(self, text: str, source: str = "user", tags: List[str] = None, weight: float = None): if not text or not text.strip(): return tags = tags or [] emb = self.model.encode(text, convert_to_tensor=True).tolist() w = weight if weight is not None else self._base_weight_from_text(text) item = { "text": text.strip(), "source": source, "tags": tags, "ts": self._now(), "weight": float(round(w, 4)), "embedding": emb } self.store.append(item) self._flush() def _torch_tensor(self, x): # Lazy import torch to keep import time snappy import torch return torch.tensor(x) def search(self, query: str, top_k: int = 5, alpha: float = 0.65, beta: float = 0.35) -> List[Tuple[Dict[str, Any], float]]: """ Returns list of (memory_item, score) sorted by score desc. score = alpha * cosine_similarity + beta * (weight * recency_decay) """ if not self.store: return [] import torch q_emb = self.model.encode(query, convert_to_tensor=True) mem_embs = self._torch_tensor([m["embedding"] for m in self.store]) sims = util.cos_sim(q_emb, mem_embs).squeeze(0) # shape [N] now = self._now() scored: List[Tuple[int, float]] = [] for i, m in enumerate(self.store): age_hours = (now - m["ts"]) / 3600.0 decay = self._decay(age_hours) weighted = m["weight"] * decay score = float(alpha * sims[i].item() + beta * weighted) scored.append((i, score)) scored.sort(key=lambda x: x[1], reverse=True) results: List[Tuple[Dict[str, Any], float]] = [] for idx, sc in scored[:top_k]: results.append((self.store[idx], float(round(sc, 4)))) return results def summarize_context(self, query: str, top_k: int = 5) -> str: """ Lightweight summarizer over top_k hits. """ hits = self.search(query, top_k=top_k) if not hits: return "No memory yet." bullets = [] for m, sc in hits: bullets.append(f"- {m['text']} (score: {sc})") return "Relevant memories:\n" + "\n".join(bullets)