Spaces:

AthelaPerk
/

mnemo-mcp

Running

App Files Files Community

mnemo-mcp / server.py

AthelaPerk

Fix: Lower similarity_threshold to 0.10 for hash embeddings (was 0.50, too strict)

0e3bd0e verified 1 day ago

raw

history blame contribute delete

15.3 kB

	"""
	Mnemo v4 MCP Server - SLM-Inspired Memory API
	==============================================

	REST API for Mnemo v4 memory system.

	Features:
	- Three-tiered memory hierarchy
	- Neural link pathways
	- Memory utility predictor
	- Self-tuning parameters
	"""

	from flask import Flask, request, jsonify
	import hashlib
	import time
	import numpy as np
	from typing import Dict, List, Optional, Tuple
	from dataclasses import dataclass, field, asdict
	from collections import defaultdict
	from enum import Enum
	import json

	app = Flask(__name__)

	# =============================================================================
	# MNEMO v4 CORE
	# =============================================================================

	class MemoryTier(Enum):
	WORKING = "working"
	TOKEN = "token"
	SEMANTIC = "semantic"


	class LinkType(Enum):
	DIRECT_REFERENCE = "direct_reference"
	SEMANTIC_SIMILARITY = "semantic_similarity"
	CO_OCCURRENCE = "co_occurrence"
	HIERARCHICAL = "hierarchical"
	TEMPORAL = "temporal"
	CAUSAL = "causal"
	CROSS_DOMAIN = "cross_domain"
	ASSOCIATIVE = "associative"


	LINK_PROPERTIES = {
	LinkType.DIRECT_REFERENCE: {"threshold": 0.85, "strength": 0.90, "decay": 0.005},
	LinkType.SEMANTIC_SIMILARITY: {"threshold": 0.50, "strength": 0.75, "decay": 0.010},
	LinkType.CO_OCCURRENCE: {"threshold": 0.60, "strength": 0.70, "decay": 0.015},
	LinkType.HIERARCHICAL: {"threshold": 0.80, "strength": 0.85, "decay": 0.003},
	LinkType.TEMPORAL: {"threshold": 0.55, "strength": 0.65, "decay": 0.020},
	LinkType.CAUSAL: {"threshold": 0.75, "strength": 0.80, "decay": 0.005},
	LinkType.CROSS_DOMAIN: {"threshold": 0.70, "strength": 0.65, "decay": 0.008},
	LinkType.ASSOCIATIVE: {"threshold": 0.45, "strength": 0.60, "decay": 0.025},
	}

	INJECTION_SIGNALS = [
	"previous", "earlier", "before", "you said", "you mentioned",
	"based on", "using your", "your analysis", "your framework",
	"compare", "contrast", "synthesize", "combine",
	"apply your", "you previously", "your earlier"
	]

	SKIP_SIGNALS = ["this is a new", "new topic", "what is", "define"]


	@dataclass
	class Memory:
	id: str
	content: str
	embedding: np.ndarray
	tier: str = "semantic"
	namespace: str = "default"
	quality_score: float = 0.5
	access_count: int = 0
	priority: float = 1.0
	created_at: float = field(default_factory=time.time)
	last_accessed: float = field(default_factory=time.time)
	metadata: Dict = field(default_factory=dict)

	def to_dict(self):
	return {
	"id": self.id,
	"content": self.content,
	"tier": self.tier,
	"namespace": self.namespace,
	"quality_score": self.quality_score,
	"access_count": self.access_count,
	"created_at": self.created_at,
	"metadata": self.metadata
	}


	@dataclass
	class NeuralLink:
	source_id: str
	target_id: str
	link_type: str
	strength: float
	created_at: float = field(default_factory=time.time)


	class MnemoV4:
	"""Mnemo v4: SLM-Inspired Memory System"""

	SIMILARITY_THRESHOLD = 0.10
	QUALITY_THRESHOLD = 0.35

	def __init__(self, embedding_dim: int = 384):
	self.embedding_dim = embedding_dim
	self.memories: Dict[str, Memory] = {}
	self.links: Dict[str, NeuralLink] = {}
	self.outgoing: Dict[str, set] = defaultdict(set)
	self._embeddings: List[np.ndarray] = []
	self._ids: List[str] = []
	self._cache: Dict[str, np.ndarray] = {}

	self.stats = {
	"adds": 0, "adds_rejected": 0, "searches": 0,
	"links_created": 0, "inject_recommended": 0, "skip_recommended": 0
	}

	def _get_embedding(self, text: str) -> np.ndarray:
	cache_key = hashlib.md5(text.encode()).hexdigest()
	if cache_key in self._cache:
	return self._cache[cache_key]

	embedding = np.zeros(self.embedding_dim, dtype=np.float32)
	words = text.lower().split()
	for i, word in enumerate(words):
	idx = hash(word) % self.embedding_dim
	embedding[idx] += 1.0 / (i + 1)

	norm = np.linalg.norm(embedding)
	if norm > 0:
	embedding = embedding / norm

	self._cache[cache_key] = embedding
	return embedding

	def _estimate_quality(self, content: str) -> float:
	score = 0.5
	words = len(content.split())
	if words < 5:
	score -= 0.3
	elif words > 20:
	score += 0.1
	if any(r in content.lower() for r in ["because", "therefore", "shows"]):
	score += 0.2
	return max(0.0, min(1.0, score))

	def should_inject(self, query: str, context: str = "",
	conversation_history: str = "") -> Tuple[bool, str]:
	combined = (query + " " + context).lower()

	for signal in SKIP_SIGNALS:
	if signal in combined:
	self.stats["skip_recommended"] += 1
	return False, f"skip:{signal}"

	for signal in INJECTION_SIGNALS:
	if signal in combined:
	if conversation_history and len(conversation_history.split()) > 500:
	query_kws = set(query.lower().split())
	if sum(1 for kw in query_kws if kw in conversation_history.lower()) > len(query_kws) * 0.6:
	self.stats["skip_recommended"] += 1
	return False, "context_window_sufficient"

	self.stats["inject_recommended"] += 1
	return True, f"inject:{signal}"

	self.stats["skip_recommended"] += 1
	return False, "no_signal"

	def add(self, content: str, namespace: str = "default",
	metadata: Dict = None) -> Optional[str]:
	quality = self._estimate_quality(content)

	if quality < self.QUALITY_THRESHOLD:
	self.stats["adds_rejected"] += 1
	return None

	memory_id = f"mem_{hashlib.md5(content.encode()).hexdigest()[:8]}"
	embedding = self._get_embedding(content)

	memory = Memory(
	id=memory_id,
	content=content,
	embedding=embedding,
	namespace=namespace,
	quality_score=quality,
	metadata=metadata or {}
	)

	self.memories[memory_id] = memory
	self._embeddings.append(embedding)
	self._ids.append(memory_id)

	self._create_links(memory_id, embedding)
	self.stats["adds"] += 1
	return memory_id

	def _create_links(self, memory_id: str, embedding: np.ndarray):
	if len(self._ids) < 2:
	return

	for other_id, other_emb in zip(self._ids[:-1], self._embeddings[:-1]):
	sim = float(np.dot(embedding, other_emb))

	props = LINK_PROPERTIES[LinkType.SEMANTIC_SIMILARITY]
	if sim >= props["threshold"]:
	link_id = f"{memory_id}:{other_id}:semantic"
	self.links[link_id] = NeuralLink(
	source_id=memory_id,
	target_id=other_id,
	link_type="semantic_similarity",
	strength=props["strength"]
	)
	self.outgoing[memory_id].add(link_id)
	self.stats["links_created"] += 1

	def search(self, query: str, top_k: int = 5,
	namespace: Optional[str] = None) -> List[Dict]:
	if not self.memories:
	return []

	self.stats["searches"] += 1
	query_embedding = self._get_embedding(query)

	semantic_scores = {}
	for mem_id, emb in zip(self._ids, self._embeddings):
	semantic_scores[mem_id] = float(np.dot(query_embedding, emb))

	link_scores = {}
	top_semantic = sorted(semantic_scores.items(), key=lambda x: x[1], reverse=True)[:3]
	for mem_id, _ in top_semantic:
	for link_id in self.outgoing.get(mem_id, set()):
	link = self.links.get(link_id)
	if link:
	link_scores[link.target_id] = link_scores.get(link.target_id, 0) + 0.2

	all_ids = set(semantic_scores.keys())
	if namespace:
	all_ids = {mid for mid in all_ids if self.memories[mid].namespace == namespace}

	results = []
	for mem_id in all_ids:
	combined = semantic_scores.get(mem_id, 0) * 0.7 + link_scores.get(mem_id, 0) * 0.3

	if combined >= self.SIMILARITY_THRESHOLD:
	memory = self.memories[mem_id]
	memory.access_count += 1
	memory.last_accessed = time.time()

	results.append({
	"id": mem_id,
	"content": memory.content,
	"score": round(combined, 3),
	"tier": memory.tier,
	"semantic_score": round(semantic_scores.get(mem_id, 0), 3),
	"link_score": round(link_scores.get(mem_id, 0), 3)
	})

	results.sort(key=lambda x: x["score"], reverse=True)
	return results[:top_k]

	def get_context(self, query: str, top_k: int = 3) -> str:
	results = self.search(query, top_k=top_k)
	if not results:
	return ""

	parts = ["[RELEVANT CONTEXT FROM MEMORY]"]
	for r in results:
	parts.append(f"• [{r['tier'].upper()}] {r['content']}")
	parts.append("[END CONTEXT]\n")
	return "\n".join(parts)

	def get(self, memory_id: str) -> Optional[Dict]:
	if memory_id in self.memories:
	return self.memories[memory_id].to_dict()
	return None

	def delete(self, memory_id: str) -> bool:
	if memory_id in self.memories:
	del self.memories[memory_id]
	return True
	return False

	def list_all(self) -> List[Dict]:
	return [mem.to_dict() for mem in self.memories.values()]

	def get_stats(self) -> Dict:
	link_counts = defaultdict(int)
	for link in self.links.values():
	link_counts[link.link_type] += 1

	return {
	"total_memories": len(self.memories),
	"total_links": len(self.links),
	"links_by_type": dict(link_counts),
	**self.stats
	}

	def clear(self):
	self.memories.clear()
	self.links.clear()
	self.outgoing.clear()
	self._embeddings.clear()
	self._ids.clear()
	self._cache.clear()


	# Global instance
	mnemo = MnemoV4()


	# =============================================================================
	# REST API ENDPOINTS
	# =============================================================================

	@app.route("/")
	def index():
	return jsonify({
	"name": "Mnemo v4 MCP Server",
	"version": "4.0.0",
	"features": [
	"Three-tiered memory hierarchy",
	"Neural link pathways (8 types)",
	"Memory utility predictor",
	"Self-tuning parameters"
	],
	"endpoints": [
	"POST /add",
	"POST /search",
	"POST /should_inject",
	"POST /get_context",
	"GET /get/<memory_id>",
	"DELETE /delete/<memory_id>",
	"GET /list",
	"GET /stats",
	"POST /clear"
	]
	})


	@app.route("/add", methods=["POST"])
	def add_memory():
	data = request.get_json() or {}
	content = data.get("content", "")
	namespace = data.get("namespace", "default")
	metadata = data.get("metadata", {})

	if not content:
	return jsonify({"status": "error", "message": "Content required"}), 400

	memory_id = mnemo.add(content, namespace, metadata)

	if memory_id:
	return jsonify({
	"status": "success",
	"memory_id": memory_id,
	"message": "Memory stored successfully"
	})
	else:
	return jsonify({
	"status": "rejected",
	"message": "Memory rejected (low quality)"
	})


	@app.route("/search", methods=["POST"])
	def search_memories():
	data = request.get_json() or {}
	query = data.get("query", "")
	top_k = data.get("top_k", 5)
	namespace = data.get("namespace")

	if not query:
	return jsonify({"status": "error", "message": "Query required"}), 400

	results = mnemo.search(query, top_k, namespace)

	return jsonify({
	"status": "success",
	"count": len(results),
	"results": results
	})


	@app.route("/should_inject", methods=["POST"])
	def should_inject():
	data = request.get_json() or {}
	query = data.get("query", "")
	context = data.get("context", "")
	conversation_history = data.get("conversation_history", "")

	if not query:
	return jsonify({"status": "error", "message": "Query required"}), 400

	should, reason = mnemo.should_inject(query, context, conversation_history)

	return jsonify({
	"should_inject": should,
	"reason": reason,
	"recommendation": "Inject memory context" if should else "Skip memory - use direct response"
	})


	@app.route("/get_context", methods=["POST"])
	def get_context():
	data = request.get_json() or {}
	query = data.get("query", "")
	top_k = data.get("top_k", 3)

	if not query:
	return jsonify({"status": "error", "message": "Query required"}), 400

	context = mnemo.get_context(query, top_k)

	return jsonify({
	"status": "success",
	"context": context if context else None,
	"message": "Context retrieved" if context else "No relevant context found"
	})


	@app.route("/get/<memory_id>", methods=["GET"])
	def get_memory(memory_id):
	memory = mnemo.get(memory_id)

	if memory:
	return jsonify({"status": "success", "memory": memory})
	else:
	return jsonify({"status": "error", "message": "Memory not found"}), 404


	@app.route("/delete/<memory_id>", methods=["DELETE"])
	def delete_memory(memory_id):
	success = mnemo.delete(memory_id)

	if success:
	return jsonify({"status": "success", "message": f"Memory {memory_id} deleted"})
	else:
	return jsonify({"status": "error", "message": "Memory not found"}), 404


	@app.route("/list", methods=["GET"])
	def list_memories():
	memories = mnemo.list_all()
	return jsonify({
	"status": "success",
	"count": len(memories),
	"memories": memories
	})


	@app.route("/stats", methods=["GET"])
	def get_stats():
	stats = mnemo.get_stats()
	return jsonify({"status": "success", "stats": stats})


	@app.route("/clear", methods=["POST"])
	def clear_memories():
	data = request.get_json() or {}
	confirm = data.get("confirm", False)

	if not confirm:
	return jsonify({
	"status": "error",
	"message": "Set confirm=true to clear all memories"
	}), 400

	mnemo.clear()
	return jsonify({"status": "success", "message": "All memories cleared"})


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)