mnemo-mcp / server.py
AthelaPerk's picture
Fix: Lower similarity_threshold to 0.10 for hash embeddings (was 0.50, too strict)
0e3bd0e verified
"""
Mnemo v4 MCP Server - SLM-Inspired Memory API
==============================================
REST API for Mnemo v4 memory system.
Features:
- Three-tiered memory hierarchy
- Neural link pathways
- Memory utility predictor
- Self-tuning parameters
"""
from flask import Flask, request, jsonify
import hashlib
import time
import numpy as np
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, field, asdict
from collections import defaultdict
from enum import Enum
import json
app = Flask(__name__)
# =============================================================================
# MNEMO v4 CORE
# =============================================================================
class MemoryTier(Enum):
WORKING = "working"
TOKEN = "token"
SEMANTIC = "semantic"
class LinkType(Enum):
DIRECT_REFERENCE = "direct_reference"
SEMANTIC_SIMILARITY = "semantic_similarity"
CO_OCCURRENCE = "co_occurrence"
HIERARCHICAL = "hierarchical"
TEMPORAL = "temporal"
CAUSAL = "causal"
CROSS_DOMAIN = "cross_domain"
ASSOCIATIVE = "associative"
LINK_PROPERTIES = {
LinkType.DIRECT_REFERENCE: {"threshold": 0.85, "strength": 0.90, "decay": 0.005},
LinkType.SEMANTIC_SIMILARITY: {"threshold": 0.50, "strength": 0.75, "decay": 0.010},
LinkType.CO_OCCURRENCE: {"threshold": 0.60, "strength": 0.70, "decay": 0.015},
LinkType.HIERARCHICAL: {"threshold": 0.80, "strength": 0.85, "decay": 0.003},
LinkType.TEMPORAL: {"threshold": 0.55, "strength": 0.65, "decay": 0.020},
LinkType.CAUSAL: {"threshold": 0.75, "strength": 0.80, "decay": 0.005},
LinkType.CROSS_DOMAIN: {"threshold": 0.70, "strength": 0.65, "decay": 0.008},
LinkType.ASSOCIATIVE: {"threshold": 0.45, "strength": 0.60, "decay": 0.025},
}
INJECTION_SIGNALS = [
"previous", "earlier", "before", "you said", "you mentioned",
"based on", "using your", "your analysis", "your framework",
"compare", "contrast", "synthesize", "combine",
"apply your", "you previously", "your earlier"
]
SKIP_SIGNALS = ["this is a new", "new topic", "what is", "define"]
@dataclass
class Memory:
id: str
content: str
embedding: np.ndarray
tier: str = "semantic"
namespace: str = "default"
quality_score: float = 0.5
access_count: int = 0
priority: float = 1.0
created_at: float = field(default_factory=time.time)
last_accessed: float = field(default_factory=time.time)
metadata: Dict = field(default_factory=dict)
def to_dict(self):
return {
"id": self.id,
"content": self.content,
"tier": self.tier,
"namespace": self.namespace,
"quality_score": self.quality_score,
"access_count": self.access_count,
"created_at": self.created_at,
"metadata": self.metadata
}
@dataclass
class NeuralLink:
source_id: str
target_id: str
link_type: str
strength: float
created_at: float = field(default_factory=time.time)
class MnemoV4:
"""Mnemo v4: SLM-Inspired Memory System"""
SIMILARITY_THRESHOLD = 0.10
QUALITY_THRESHOLD = 0.35
def __init__(self, embedding_dim: int = 384):
self.embedding_dim = embedding_dim
self.memories: Dict[str, Memory] = {}
self.links: Dict[str, NeuralLink] = {}
self.outgoing: Dict[str, set] = defaultdict(set)
self._embeddings: List[np.ndarray] = []
self._ids: List[str] = []
self._cache: Dict[str, np.ndarray] = {}
self.stats = {
"adds": 0, "adds_rejected": 0, "searches": 0,
"links_created": 0, "inject_recommended": 0, "skip_recommended": 0
}
def _get_embedding(self, text: str) -> np.ndarray:
cache_key = hashlib.md5(text.encode()).hexdigest()
if cache_key in self._cache:
return self._cache[cache_key]
embedding = np.zeros(self.embedding_dim, dtype=np.float32)
words = text.lower().split()
for i, word in enumerate(words):
idx = hash(word) % self.embedding_dim
embedding[idx] += 1.0 / (i + 1)
norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm
self._cache[cache_key] = embedding
return embedding
def _estimate_quality(self, content: str) -> float:
score = 0.5
words = len(content.split())
if words < 5:
score -= 0.3
elif words > 20:
score += 0.1
if any(r in content.lower() for r in ["because", "therefore", "shows"]):
score += 0.2
return max(0.0, min(1.0, score))
def should_inject(self, query: str, context: str = "",
conversation_history: str = "") -> Tuple[bool, str]:
combined = (query + " " + context).lower()
for signal in SKIP_SIGNALS:
if signal in combined:
self.stats["skip_recommended"] += 1
return False, f"skip:{signal}"
for signal in INJECTION_SIGNALS:
if signal in combined:
if conversation_history and len(conversation_history.split()) > 500:
query_kws = set(query.lower().split())
if sum(1 for kw in query_kws if kw in conversation_history.lower()) > len(query_kws) * 0.6:
self.stats["skip_recommended"] += 1
return False, "context_window_sufficient"
self.stats["inject_recommended"] += 1
return True, f"inject:{signal}"
self.stats["skip_recommended"] += 1
return False, "no_signal"
def add(self, content: str, namespace: str = "default",
metadata: Dict = None) -> Optional[str]:
quality = self._estimate_quality(content)
if quality < self.QUALITY_THRESHOLD:
self.stats["adds_rejected"] += 1
return None
memory_id = f"mem_{hashlib.md5(content.encode()).hexdigest()[:8]}"
embedding = self._get_embedding(content)
memory = Memory(
id=memory_id,
content=content,
embedding=embedding,
namespace=namespace,
quality_score=quality,
metadata=metadata or {}
)
self.memories[memory_id] = memory
self._embeddings.append(embedding)
self._ids.append(memory_id)
self._create_links(memory_id, embedding)
self.stats["adds"] += 1
return memory_id
def _create_links(self, memory_id: str, embedding: np.ndarray):
if len(self._ids) < 2:
return
for other_id, other_emb in zip(self._ids[:-1], self._embeddings[:-1]):
sim = float(np.dot(embedding, other_emb))
props = LINK_PROPERTIES[LinkType.SEMANTIC_SIMILARITY]
if sim >= props["threshold"]:
link_id = f"{memory_id}:{other_id}:semantic"
self.links[link_id] = NeuralLink(
source_id=memory_id,
target_id=other_id,
link_type="semantic_similarity",
strength=props["strength"]
)
self.outgoing[memory_id].add(link_id)
self.stats["links_created"] += 1
def search(self, query: str, top_k: int = 5,
namespace: Optional[str] = None) -> List[Dict]:
if not self.memories:
return []
self.stats["searches"] += 1
query_embedding = self._get_embedding(query)
semantic_scores = {}
for mem_id, emb in zip(self._ids, self._embeddings):
semantic_scores[mem_id] = float(np.dot(query_embedding, emb))
link_scores = {}
top_semantic = sorted(semantic_scores.items(), key=lambda x: x[1], reverse=True)[:3]
for mem_id, _ in top_semantic:
for link_id in self.outgoing.get(mem_id, set()):
link = self.links.get(link_id)
if link:
link_scores[link.target_id] = link_scores.get(link.target_id, 0) + 0.2
all_ids = set(semantic_scores.keys())
if namespace:
all_ids = {mid for mid in all_ids if self.memories[mid].namespace == namespace}
results = []
for mem_id in all_ids:
combined = semantic_scores.get(mem_id, 0) * 0.7 + link_scores.get(mem_id, 0) * 0.3
if combined >= self.SIMILARITY_THRESHOLD:
memory = self.memories[mem_id]
memory.access_count += 1
memory.last_accessed = time.time()
results.append({
"id": mem_id,
"content": memory.content,
"score": round(combined, 3),
"tier": memory.tier,
"semantic_score": round(semantic_scores.get(mem_id, 0), 3),
"link_score": round(link_scores.get(mem_id, 0), 3)
})
results.sort(key=lambda x: x["score"], reverse=True)
return results[:top_k]
def get_context(self, query: str, top_k: int = 3) -> str:
results = self.search(query, top_k=top_k)
if not results:
return ""
parts = ["[RELEVANT CONTEXT FROM MEMORY]"]
for r in results:
parts.append(f"• [{r['tier'].upper()}] {r['content']}")
parts.append("[END CONTEXT]\n")
return "\n".join(parts)
def get(self, memory_id: str) -> Optional[Dict]:
if memory_id in self.memories:
return self.memories[memory_id].to_dict()
return None
def delete(self, memory_id: str) -> bool:
if memory_id in self.memories:
del self.memories[memory_id]
return True
return False
def list_all(self) -> List[Dict]:
return [mem.to_dict() for mem in self.memories.values()]
def get_stats(self) -> Dict:
link_counts = defaultdict(int)
for link in self.links.values():
link_counts[link.link_type] += 1
return {
"total_memories": len(self.memories),
"total_links": len(self.links),
"links_by_type": dict(link_counts),
**self.stats
}
def clear(self):
self.memories.clear()
self.links.clear()
self.outgoing.clear()
self._embeddings.clear()
self._ids.clear()
self._cache.clear()
# Global instance
mnemo = MnemoV4()
# =============================================================================
# REST API ENDPOINTS
# =============================================================================
@app.route("/")
def index():
return jsonify({
"name": "Mnemo v4 MCP Server",
"version": "4.0.0",
"features": [
"Three-tiered memory hierarchy",
"Neural link pathways (8 types)",
"Memory utility predictor",
"Self-tuning parameters"
],
"endpoints": [
"POST /add",
"POST /search",
"POST /should_inject",
"POST /get_context",
"GET /get/<memory_id>",
"DELETE /delete/<memory_id>",
"GET /list",
"GET /stats",
"POST /clear"
]
})
@app.route("/add", methods=["POST"])
def add_memory():
data = request.get_json() or {}
content = data.get("content", "")
namespace = data.get("namespace", "default")
metadata = data.get("metadata", {})
if not content:
return jsonify({"status": "error", "message": "Content required"}), 400
memory_id = mnemo.add(content, namespace, metadata)
if memory_id:
return jsonify({
"status": "success",
"memory_id": memory_id,
"message": "Memory stored successfully"
})
else:
return jsonify({
"status": "rejected",
"message": "Memory rejected (low quality)"
})
@app.route("/search", methods=["POST"])
def search_memories():
data = request.get_json() or {}
query = data.get("query", "")
top_k = data.get("top_k", 5)
namespace = data.get("namespace")
if not query:
return jsonify({"status": "error", "message": "Query required"}), 400
results = mnemo.search(query, top_k, namespace)
return jsonify({
"status": "success",
"count": len(results),
"results": results
})
@app.route("/should_inject", methods=["POST"])
def should_inject():
data = request.get_json() or {}
query = data.get("query", "")
context = data.get("context", "")
conversation_history = data.get("conversation_history", "")
if not query:
return jsonify({"status": "error", "message": "Query required"}), 400
should, reason = mnemo.should_inject(query, context, conversation_history)
return jsonify({
"should_inject": should,
"reason": reason,
"recommendation": "Inject memory context" if should else "Skip memory - use direct response"
})
@app.route("/get_context", methods=["POST"])
def get_context():
data = request.get_json() or {}
query = data.get("query", "")
top_k = data.get("top_k", 3)
if not query:
return jsonify({"status": "error", "message": "Query required"}), 400
context = mnemo.get_context(query, top_k)
return jsonify({
"status": "success",
"context": context if context else None,
"message": "Context retrieved" if context else "No relevant context found"
})
@app.route("/get/<memory_id>", methods=["GET"])
def get_memory(memory_id):
memory = mnemo.get(memory_id)
if memory:
return jsonify({"status": "success", "memory": memory})
else:
return jsonify({"status": "error", "message": "Memory not found"}), 404
@app.route("/delete/<memory_id>", methods=["DELETE"])
def delete_memory(memory_id):
success = mnemo.delete(memory_id)
if success:
return jsonify({"status": "success", "message": f"Memory {memory_id} deleted"})
else:
return jsonify({"status": "error", "message": "Memory not found"}), 404
@app.route("/list", methods=["GET"])
def list_memories():
memories = mnemo.list_all()
return jsonify({
"status": "success",
"count": len(memories),
"memories": memories
})
@app.route("/stats", methods=["GET"])
def get_stats():
stats = mnemo.get_stats()
return jsonify({"status": "success", "stats": stats})
@app.route("/clear", methods=["POST"])
def clear_memories():
data = request.get_json() or {}
confirm = data.get("confirm", False)
if not confirm:
return jsonify({
"status": "error",
"message": "Set confirm=true to clear all memories"
}), 400
mnemo.clear()
return jsonify({"status": "success", "message": "All memories cleared"})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)