""" Mnemo v4 MCP Server - SLM-Inspired Memory API ============================================== REST API for Mnemo v4 memory system. Features: - Three-tiered memory hierarchy - Neural link pathways - Memory utility predictor - Self-tuning parameters """ from flask import Flask, request, jsonify import hashlib import time import numpy as np from typing import Dict, List, Optional, Tuple from dataclasses import dataclass, field, asdict from collections import defaultdict from enum import Enum import json app = Flask(__name__) # ============================================================================= # MNEMO v4 CORE # ============================================================================= class MemoryTier(Enum): WORKING = "working" TOKEN = "token" SEMANTIC = "semantic" class LinkType(Enum): DIRECT_REFERENCE = "direct_reference" SEMANTIC_SIMILARITY = "semantic_similarity" CO_OCCURRENCE = "co_occurrence" HIERARCHICAL = "hierarchical" TEMPORAL = "temporal" CAUSAL = "causal" CROSS_DOMAIN = "cross_domain" ASSOCIATIVE = "associative" LINK_PROPERTIES = { LinkType.DIRECT_REFERENCE: {"threshold": 0.85, "strength": 0.90, "decay": 0.005}, LinkType.SEMANTIC_SIMILARITY: {"threshold": 0.50, "strength": 0.75, "decay": 0.010}, LinkType.CO_OCCURRENCE: {"threshold": 0.60, "strength": 0.70, "decay": 0.015}, LinkType.HIERARCHICAL: {"threshold": 0.80, "strength": 0.85, "decay": 0.003}, LinkType.TEMPORAL: {"threshold": 0.55, "strength": 0.65, "decay": 0.020}, LinkType.CAUSAL: {"threshold": 0.75, "strength": 0.80, "decay": 0.005}, LinkType.CROSS_DOMAIN: {"threshold": 0.70, "strength": 0.65, "decay": 0.008}, LinkType.ASSOCIATIVE: {"threshold": 0.45, "strength": 0.60, "decay": 0.025}, } INJECTION_SIGNALS = [ "previous", "earlier", "before", "you said", "you mentioned", "based on", "using your", "your analysis", "your framework", "compare", "contrast", "synthesize", "combine", "apply your", "you previously", "your earlier" ] SKIP_SIGNALS = ["this is a new", "new topic", "what is", "define"] @dataclass class Memory: id: str content: str embedding: np.ndarray tier: str = "semantic" namespace: str = "default" quality_score: float = 0.5 access_count: int = 0 priority: float = 1.0 created_at: float = field(default_factory=time.time) last_accessed: float = field(default_factory=time.time) metadata: Dict = field(default_factory=dict) def to_dict(self): return { "id": self.id, "content": self.content, "tier": self.tier, "namespace": self.namespace, "quality_score": self.quality_score, "access_count": self.access_count, "created_at": self.created_at, "metadata": self.metadata } @dataclass class NeuralLink: source_id: str target_id: str link_type: str strength: float created_at: float = field(default_factory=time.time) class MnemoV4: """Mnemo v4: SLM-Inspired Memory System""" SIMILARITY_THRESHOLD = 0.10 QUALITY_THRESHOLD = 0.35 def __init__(self, embedding_dim: int = 384): self.embedding_dim = embedding_dim self.memories: Dict[str, Memory] = {} self.links: Dict[str, NeuralLink] = {} self.outgoing: Dict[str, set] = defaultdict(set) self._embeddings: List[np.ndarray] = [] self._ids: List[str] = [] self._cache: Dict[str, np.ndarray] = {} self.stats = { "adds": 0, "adds_rejected": 0, "searches": 0, "links_created": 0, "inject_recommended": 0, "skip_recommended": 0 } def _get_embedding(self, text: str) -> np.ndarray: cache_key = hashlib.md5(text.encode()).hexdigest() if cache_key in self._cache: return self._cache[cache_key] embedding = np.zeros(self.embedding_dim, dtype=np.float32) words = text.lower().split() for i, word in enumerate(words): idx = hash(word) % self.embedding_dim embedding[idx] += 1.0 / (i + 1) norm = np.linalg.norm(embedding) if norm > 0: embedding = embedding / norm self._cache[cache_key] = embedding return embedding def _estimate_quality(self, content: str) -> float: score = 0.5 words = len(content.split()) if words < 5: score -= 0.3 elif words > 20: score += 0.1 if any(r in content.lower() for r in ["because", "therefore", "shows"]): score += 0.2 return max(0.0, min(1.0, score)) def should_inject(self, query: str, context: str = "", conversation_history: str = "") -> Tuple[bool, str]: combined = (query + " " + context).lower() for signal in SKIP_SIGNALS: if signal in combined: self.stats["skip_recommended"] += 1 return False, f"skip:{signal}" for signal in INJECTION_SIGNALS: if signal in combined: if conversation_history and len(conversation_history.split()) > 500: query_kws = set(query.lower().split()) if sum(1 for kw in query_kws if kw in conversation_history.lower()) > len(query_kws) * 0.6: self.stats["skip_recommended"] += 1 return False, "context_window_sufficient" self.stats["inject_recommended"] += 1 return True, f"inject:{signal}" self.stats["skip_recommended"] += 1 return False, "no_signal" def add(self, content: str, namespace: str = "default", metadata: Dict = None) -> Optional[str]: quality = self._estimate_quality(content) if quality < self.QUALITY_THRESHOLD: self.stats["adds_rejected"] += 1 return None memory_id = f"mem_{hashlib.md5(content.encode()).hexdigest()[:8]}" embedding = self._get_embedding(content) memory = Memory( id=memory_id, content=content, embedding=embedding, namespace=namespace, quality_score=quality, metadata=metadata or {} ) self.memories[memory_id] = memory self._embeddings.append(embedding) self._ids.append(memory_id) self._create_links(memory_id, embedding) self.stats["adds"] += 1 return memory_id def _create_links(self, memory_id: str, embedding: np.ndarray): if len(self._ids) < 2: return for other_id, other_emb in zip(self._ids[:-1], self._embeddings[:-1]): sim = float(np.dot(embedding, other_emb)) props = LINK_PROPERTIES[LinkType.SEMANTIC_SIMILARITY] if sim >= props["threshold"]: link_id = f"{memory_id}:{other_id}:semantic" self.links[link_id] = NeuralLink( source_id=memory_id, target_id=other_id, link_type="semantic_similarity", strength=props["strength"] ) self.outgoing[memory_id].add(link_id) self.stats["links_created"] += 1 def search(self, query: str, top_k: int = 5, namespace: Optional[str] = None) -> List[Dict]: if not self.memories: return [] self.stats["searches"] += 1 query_embedding = self._get_embedding(query) semantic_scores = {} for mem_id, emb in zip(self._ids, self._embeddings): semantic_scores[mem_id] = float(np.dot(query_embedding, emb)) link_scores = {} top_semantic = sorted(semantic_scores.items(), key=lambda x: x[1], reverse=True)[:3] for mem_id, _ in top_semantic: for link_id in self.outgoing.get(mem_id, set()): link = self.links.get(link_id) if link: link_scores[link.target_id] = link_scores.get(link.target_id, 0) + 0.2 all_ids = set(semantic_scores.keys()) if namespace: all_ids = {mid for mid in all_ids if self.memories[mid].namespace == namespace} results = [] for mem_id in all_ids: combined = semantic_scores.get(mem_id, 0) * 0.7 + link_scores.get(mem_id, 0) * 0.3 if combined >= self.SIMILARITY_THRESHOLD: memory = self.memories[mem_id] memory.access_count += 1 memory.last_accessed = time.time() results.append({ "id": mem_id, "content": memory.content, "score": round(combined, 3), "tier": memory.tier, "semantic_score": round(semantic_scores.get(mem_id, 0), 3), "link_score": round(link_scores.get(mem_id, 0), 3) }) results.sort(key=lambda x: x["score"], reverse=True) return results[:top_k] def get_context(self, query: str, top_k: int = 3) -> str: results = self.search(query, top_k=top_k) if not results: return "" parts = ["[RELEVANT CONTEXT FROM MEMORY]"] for r in results: parts.append(f"• [{r['tier'].upper()}] {r['content']}") parts.append("[END CONTEXT]\n") return "\n".join(parts) def get(self, memory_id: str) -> Optional[Dict]: if memory_id in self.memories: return self.memories[memory_id].to_dict() return None def delete(self, memory_id: str) -> bool: if memory_id in self.memories: del self.memories[memory_id] return True return False def list_all(self) -> List[Dict]: return [mem.to_dict() for mem in self.memories.values()] def get_stats(self) -> Dict: link_counts = defaultdict(int) for link in self.links.values(): link_counts[link.link_type] += 1 return { "total_memories": len(self.memories), "total_links": len(self.links), "links_by_type": dict(link_counts), **self.stats } def clear(self): self.memories.clear() self.links.clear() self.outgoing.clear() self._embeddings.clear() self._ids.clear() self._cache.clear() # Global instance mnemo = MnemoV4() # ============================================================================= # REST API ENDPOINTS # ============================================================================= @app.route("/") def index(): return jsonify({ "name": "Mnemo v4 MCP Server", "version": "4.0.0", "features": [ "Three-tiered memory hierarchy", "Neural link pathways (8 types)", "Memory utility predictor", "Self-tuning parameters" ], "endpoints": [ "POST /add", "POST /search", "POST /should_inject", "POST /get_context", "GET /get/", "DELETE /delete/", "GET /list", "GET /stats", "POST /clear" ] }) @app.route("/add", methods=["POST"]) def add_memory(): data = request.get_json() or {} content = data.get("content", "") namespace = data.get("namespace", "default") metadata = data.get("metadata", {}) if not content: return jsonify({"status": "error", "message": "Content required"}), 400 memory_id = mnemo.add(content, namespace, metadata) if memory_id: return jsonify({ "status": "success", "memory_id": memory_id, "message": "Memory stored successfully" }) else: return jsonify({ "status": "rejected", "message": "Memory rejected (low quality)" }) @app.route("/search", methods=["POST"]) def search_memories(): data = request.get_json() or {} query = data.get("query", "") top_k = data.get("top_k", 5) namespace = data.get("namespace") if not query: return jsonify({"status": "error", "message": "Query required"}), 400 results = mnemo.search(query, top_k, namespace) return jsonify({ "status": "success", "count": len(results), "results": results }) @app.route("/should_inject", methods=["POST"]) def should_inject(): data = request.get_json() or {} query = data.get("query", "") context = data.get("context", "") conversation_history = data.get("conversation_history", "") if not query: return jsonify({"status": "error", "message": "Query required"}), 400 should, reason = mnemo.should_inject(query, context, conversation_history) return jsonify({ "should_inject": should, "reason": reason, "recommendation": "Inject memory context" if should else "Skip memory - use direct response" }) @app.route("/get_context", methods=["POST"]) def get_context(): data = request.get_json() or {} query = data.get("query", "") top_k = data.get("top_k", 3) if not query: return jsonify({"status": "error", "message": "Query required"}), 400 context = mnemo.get_context(query, top_k) return jsonify({ "status": "success", "context": context if context else None, "message": "Context retrieved" if context else "No relevant context found" }) @app.route("/get/", methods=["GET"]) def get_memory(memory_id): memory = mnemo.get(memory_id) if memory: return jsonify({"status": "success", "memory": memory}) else: return jsonify({"status": "error", "message": "Memory not found"}), 404 @app.route("/delete/", methods=["DELETE"]) def delete_memory(memory_id): success = mnemo.delete(memory_id) if success: return jsonify({"status": "success", "message": f"Memory {memory_id} deleted"}) else: return jsonify({"status": "error", "message": "Memory not found"}), 404 @app.route("/list", methods=["GET"]) def list_memories(): memories = mnemo.list_all() return jsonify({ "status": "success", "count": len(memories), "memories": memories }) @app.route("/stats", methods=["GET"]) def get_stats(): stats = mnemo.get_stats() return jsonify({"status": "success", "stats": stats}) @app.route("/clear", methods=["POST"]) def clear_memories(): data = request.get_json() or {} confirm = data.get("confirm", False) if not confirm: return jsonify({ "status": "error", "message": "Set confirm=true to clear all memories" }), 400 mnemo.clear() return jsonify({"status": "success", "message": "All memories cleared"}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)