Spaces:

AthelaPerk
/

mnemo-mcp

Running

File size: 15,338 Bytes

"""
Mnemo v4 MCP Server - SLM-Inspired Memory API
==============================================

REST API for Mnemo v4 memory system.

Features:
- Three-tiered memory hierarchy
- Neural link pathways
- Memory utility predictor
- Self-tuning parameters
"""

from flask import Flask, request, jsonify
import hashlib
import time
import numpy as np
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, field, asdict
from collections import defaultdict
from enum import Enum
import json

app = Flask(__name__)

# =============================================================================
# MNEMO v4 CORE
# =============================================================================

class MemoryTier(Enum):
    WORKING = "working"
    TOKEN = "token"
    SEMANTIC = "semantic"


class LinkType(Enum):
    DIRECT_REFERENCE = "direct_reference"
    SEMANTIC_SIMILARITY = "semantic_similarity"
    CO_OCCURRENCE = "co_occurrence"
    HIERARCHICAL = "hierarchical"
    TEMPORAL = "temporal"
    CAUSAL = "causal"
    CROSS_DOMAIN = "cross_domain"
    ASSOCIATIVE = "associative"


LINK_PROPERTIES = {
    LinkType.DIRECT_REFERENCE: {"threshold": 0.85, "strength": 0.90, "decay": 0.005},
    LinkType.SEMANTIC_SIMILARITY: {"threshold": 0.50, "strength": 0.75, "decay": 0.010},
    LinkType.CO_OCCURRENCE: {"threshold": 0.60, "strength": 0.70, "decay": 0.015},
    LinkType.HIERARCHICAL: {"threshold": 0.80, "strength": 0.85, "decay": 0.003},
    LinkType.TEMPORAL: {"threshold": 0.55, "strength": 0.65, "decay": 0.020},
    LinkType.CAUSAL: {"threshold": 0.75, "strength": 0.80, "decay": 0.005},
    LinkType.CROSS_DOMAIN: {"threshold": 0.70, "strength": 0.65, "decay": 0.008},
    LinkType.ASSOCIATIVE: {"threshold": 0.45, "strength": 0.60, "decay": 0.025},
}

INJECTION_SIGNALS = [
    "previous", "earlier", "before", "you said", "you mentioned",
    "based on", "using your", "your analysis", "your framework",
    "compare", "contrast", "synthesize", "combine",
    "apply your", "you previously", "your earlier"
]

SKIP_SIGNALS = ["this is a new", "new topic", "what is", "define"]


@dataclass
class Memory:
    id: str
    content: str
    embedding: np.ndarray
    tier: str = "semantic"
    namespace: str = "default"
    quality_score: float = 0.5
    access_count: int = 0
    priority: float = 1.0
    created_at: float = field(default_factory=time.time)
    last_accessed: float = field(default_factory=time.time)
    metadata: Dict = field(default_factory=dict)
    
    def to_dict(self):
        return {
            "id": self.id,
            "content": self.content,
            "tier": self.tier,
            "namespace": self.namespace,
            "quality_score": self.quality_score,
            "access_count": self.access_count,
            "created_at": self.created_at,
            "metadata": self.metadata
        }


@dataclass
class NeuralLink:
    source_id: str
    target_id: str
    link_type: str
    strength: float
    created_at: float = field(default_factory=time.time)


class MnemoV4:
    """Mnemo v4: SLM-Inspired Memory System"""
    
    SIMILARITY_THRESHOLD = 0.10
    QUALITY_THRESHOLD = 0.35
    
    def __init__(self, embedding_dim: int = 384):
        self.embedding_dim = embedding_dim
        self.memories: Dict[str, Memory] = {}
        self.links: Dict[str, NeuralLink] = {}
        self.outgoing: Dict[str, set] = defaultdict(set)
        self._embeddings: List[np.ndarray] = []
        self._ids: List[str] = []
        self._cache: Dict[str, np.ndarray] = {}
        
        self.stats = {
            "adds": 0, "adds_rejected": 0, "searches": 0,
            "links_created": 0, "inject_recommended": 0, "skip_recommended": 0
        }
    
    def _get_embedding(self, text: str) -> np.ndarray:
        cache_key = hashlib.md5(text.encode()).hexdigest()
        if cache_key in self._cache:
            return self._cache[cache_key]
        
        embedding = np.zeros(self.embedding_dim, dtype=np.float32)
        words = text.lower().split()
        for i, word in enumerate(words):
            idx = hash(word) % self.embedding_dim
            embedding[idx] += 1.0 / (i + 1)
        
        norm = np.linalg.norm(embedding)
        if norm > 0:
            embedding = embedding / norm
        
        self._cache[cache_key] = embedding
        return embedding
    
    def _estimate_quality(self, content: str) -> float:
        score = 0.5
        words = len(content.split())
        if words < 5:
            score -= 0.3
        elif words > 20:
            score += 0.1
        if any(r in content.lower() for r in ["because", "therefore", "shows"]):
            score += 0.2
        return max(0.0, min(1.0, score))
    
    def should_inject(self, query: str, context: str = "",
                      conversation_history: str = "") -> Tuple[bool, str]:
        combined = (query + " " + context).lower()
        
        for signal in SKIP_SIGNALS:
            if signal in combined:
                self.stats["skip_recommended"] += 1
                return False, f"skip:{signal}"
        
        for signal in INJECTION_SIGNALS:
            if signal in combined:
                if conversation_history and len(conversation_history.split()) > 500:
                    query_kws = set(query.lower().split())
                    if sum(1 for kw in query_kws if kw in conversation_history.lower()) > len(query_kws) * 0.6:
                        self.stats["skip_recommended"] += 1
                        return False, "context_window_sufficient"
                
                self.stats["inject_recommended"] += 1
                return True, f"inject:{signal}"
        
        self.stats["skip_recommended"] += 1
        return False, "no_signal"
    
    def add(self, content: str, namespace: str = "default",
            metadata: Dict = None) -> Optional[str]:
        quality = self._estimate_quality(content)
        
        if quality < self.QUALITY_THRESHOLD:
            self.stats["adds_rejected"] += 1
            return None
        
        memory_id = f"mem_{hashlib.md5(content.encode()).hexdigest()[:8]}"
        embedding = self._get_embedding(content)
        
        memory = Memory(
            id=memory_id,
            content=content,
            embedding=embedding,
            namespace=namespace,
            quality_score=quality,
            metadata=metadata or {}
        )
        
        self.memories[memory_id] = memory
        self._embeddings.append(embedding)
        self._ids.append(memory_id)
        
        self._create_links(memory_id, embedding)
        self.stats["adds"] += 1
        return memory_id
    
    def _create_links(self, memory_id: str, embedding: np.ndarray):
        if len(self._ids) < 2:
            return
        
        for other_id, other_emb in zip(self._ids[:-1], self._embeddings[:-1]):
            sim = float(np.dot(embedding, other_emb))
            
            props = LINK_PROPERTIES[LinkType.SEMANTIC_SIMILARITY]
            if sim >= props["threshold"]:
                link_id = f"{memory_id}:{other_id}:semantic"
                self.links[link_id] = NeuralLink(
                    source_id=memory_id,
                    target_id=other_id,
                    link_type="semantic_similarity",
                    strength=props["strength"]
                )
                self.outgoing[memory_id].add(link_id)
                self.stats["links_created"] += 1
    
    def search(self, query: str, top_k: int = 5,
               namespace: Optional[str] = None) -> List[Dict]:
        if not self.memories:
            return []
        
        self.stats["searches"] += 1
        query_embedding = self._get_embedding(query)
        
        semantic_scores = {}
        for mem_id, emb in zip(self._ids, self._embeddings):
            semantic_scores[mem_id] = float(np.dot(query_embedding, emb))
        
        link_scores = {}
        top_semantic = sorted(semantic_scores.items(), key=lambda x: x[1], reverse=True)[:3]
        for mem_id, _ in top_semantic:
            for link_id in self.outgoing.get(mem_id, set()):
                link = self.links.get(link_id)
                if link:
                    link_scores[link.target_id] = link_scores.get(link.target_id, 0) + 0.2
        
        all_ids = set(semantic_scores.keys())
        if namespace:
            all_ids = {mid for mid in all_ids if self.memories[mid].namespace == namespace}
        
        results = []
        for mem_id in all_ids:
            combined = semantic_scores.get(mem_id, 0) * 0.7 + link_scores.get(mem_id, 0) * 0.3
            
            if combined >= self.SIMILARITY_THRESHOLD:
                memory = self.memories[mem_id]
                memory.access_count += 1
                memory.last_accessed = time.time()
                
                results.append({
                    "id": mem_id,
                    "content": memory.content,
                    "score": round(combined, 3),
                    "tier": memory.tier,
                    "semantic_score": round(semantic_scores.get(mem_id, 0), 3),
                    "link_score": round(link_scores.get(mem_id, 0), 3)
                })
        
        results.sort(key=lambda x: x["score"], reverse=True)
        return results[:top_k]
    
    def get_context(self, query: str, top_k: int = 3) -> str:
        results = self.search(query, top_k=top_k)
        if not results:
            return ""
        
        parts = ["[RELEVANT CONTEXT FROM MEMORY]"]
        for r in results:
            parts.append(f"• [{r['tier'].upper()}] {r['content']}")
        parts.append("[END CONTEXT]\n")
        return "\n".join(parts)
    
    def get(self, memory_id: str) -> Optional[Dict]:
        if memory_id in self.memories:
            return self.memories[memory_id].to_dict()
        return None
    
    def delete(self, memory_id: str) -> bool:
        if memory_id in self.memories:
            del self.memories[memory_id]
            return True
        return False
    
    def list_all(self) -> List[Dict]:
        return [mem.to_dict() for mem in self.memories.values()]
    
    def get_stats(self) -> Dict:
        link_counts = defaultdict(int)
        for link in self.links.values():
            link_counts[link.link_type] += 1
        
        return {
            "total_memories": len(self.memories),
            "total_links": len(self.links),
            "links_by_type": dict(link_counts),
            **self.stats
        }
    
    def clear(self):
        self.memories.clear()
        self.links.clear()
        self.outgoing.clear()
        self._embeddings.clear()
        self._ids.clear()
        self._cache.clear()


# Global instance
mnemo = MnemoV4()


# =============================================================================
# REST API ENDPOINTS
# =============================================================================

@app.route("/")
def index():
    return jsonify({
        "name": "Mnemo v4 MCP Server",
        "version": "4.0.0",
        "features": [
            "Three-tiered memory hierarchy",
            "Neural link pathways (8 types)",
            "Memory utility predictor",
            "Self-tuning parameters"
        ],
        "endpoints": [
            "POST /add",
            "POST /search",
            "POST /should_inject",
            "POST /get_context",
            "GET /get/<memory_id>",
            "DELETE /delete/<memory_id>",
            "GET /list",
            "GET /stats",
            "POST /clear"
        ]
    })


@app.route("/add", methods=["POST"])
def add_memory():
    data = request.get_json() or {}
    content = data.get("content", "")
    namespace = data.get("namespace", "default")
    metadata = data.get("metadata", {})
    
    if not content:
        return jsonify({"status": "error", "message": "Content required"}), 400
    
    memory_id = mnemo.add(content, namespace, metadata)
    
    if memory_id:
        return jsonify({
            "status": "success",
            "memory_id": memory_id,
            "message": "Memory stored successfully"
        })
    else:
        return jsonify({
            "status": "rejected",
            "message": "Memory rejected (low quality)"
        })


@app.route("/search", methods=["POST"])
def search_memories():
    data = request.get_json() or {}
    query = data.get("query", "")
    top_k = data.get("top_k", 5)
    namespace = data.get("namespace")
    
    if not query:
        return jsonify({"status": "error", "message": "Query required"}), 400
    
    results = mnemo.search(query, top_k, namespace)
    
    return jsonify({
        "status": "success",
        "count": len(results),
        "results": results
    })


@app.route("/should_inject", methods=["POST"])
def should_inject():
    data = request.get_json() or {}
    query = data.get("query", "")
    context = data.get("context", "")
    conversation_history = data.get("conversation_history", "")
    
    if not query:
        return jsonify({"status": "error", "message": "Query required"}), 400
    
    should, reason = mnemo.should_inject(query, context, conversation_history)
    
    return jsonify({
        "should_inject": should,
        "reason": reason,
        "recommendation": "Inject memory context" if should else "Skip memory - use direct response"
    })


@app.route("/get_context", methods=["POST"])
def get_context():
    data = request.get_json() or {}
    query = data.get("query", "")
    top_k = data.get("top_k", 3)
    
    if not query:
        return jsonify({"status": "error", "message": "Query required"}), 400
    
    context = mnemo.get_context(query, top_k)
    
    return jsonify({
        "status": "success",
        "context": context if context else None,
        "message": "Context retrieved" if context else "No relevant context found"
    })


@app.route("/get/<memory_id>", methods=["GET"])
def get_memory(memory_id):
    memory = mnemo.get(memory_id)
    
    if memory:
        return jsonify({"status": "success", "memory": memory})
    else:
        return jsonify({"status": "error", "message": "Memory not found"}), 404


@app.route("/delete/<memory_id>", methods=["DELETE"])
def delete_memory(memory_id):
    success = mnemo.delete(memory_id)
    
    if success:
        return jsonify({"status": "success", "message": f"Memory {memory_id} deleted"})
    else:
        return jsonify({"status": "error", "message": "Memory not found"}), 404


@app.route("/list", methods=["GET"])
def list_memories():
    memories = mnemo.list_all()
    return jsonify({
        "status": "success",
        "count": len(memories),
        "memories": memories
    })


@app.route("/stats", methods=["GET"])
def get_stats():
    stats = mnemo.get_stats()
    return jsonify({"status": "success", "stats": stats})


@app.route("/clear", methods=["POST"])
def clear_memories():
    data = request.get_json() or {}
    confirm = data.get("confirm", False)
    
    if not confirm:
        return jsonify({
            "status": "error",
            "message": "Set confirm=true to clear all memories"
        }), 400
    
    mnemo.clear()
    return jsonify({"status": "success", "message": "All memories cleared"})


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)