Spaces:

MCP-1st-Birthday
/

central-memory-agent

Runtime error

App Files Files Community

Kishor Ramanan commited on Nov 30, 2025

Commit

0a25329

1 Parent(s): 6e68a92

Base

Browse files

Files changed (8) hide show

.python-version +1 -0
README.md +44 -1
agent.py +210 -0
app.py +20 -4
clients.py +52 -0
config.py +11 -0
pyproject.toml +15 -0
utility.py +144 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

README.md CHANGED Viewed

@@ -11,4 +11,47 @@ license: mit
 short_description: Storing Memories and Agentic Retrieval with MCP
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: Storing Memories and Agentic Retrieval with MCP
 ---
+# Central Memory Agent
+Central Memory Agent is a Gradio-based chatbot application designed to store and retrieve information. It provides a user-friendly interface and exposes tools as MCP (Model Context Protocol) endpoints for seamless integration with MCP clients.
+---
+## Features
+- **Chatbot Interface**: Interact with the memory system to store and retrieve information.
+- **Memory Storage**: Add content to memory with metadata (category, topic).
+- **Memory Retrieval**: Search stored information using agentic retrieval.
+- **MCP Endpoints**: Access `populate_memory` and `search_memory` tools via MCP clients.
+---
+## Usage
+1. **Run the Application**:
+   ```bash
+   uv run main.py
+   ```
+2. **Interact with the Chatbot**:
+   - Use the chatbot interface to store and retrieve memories.
+3. **Connect MCP Clients**:
+   - Access the `populate_memory` and `search_memory` tools via MCP endpoints.
+---
+## Project Structure
+- `main.py`: The main application file that launches the Gradio interface.
+- `utility.py`: Contains the `populate_memory` and `search_memory` tools.
+- `agent.py`: Manages retrieval states and builds retrieval graphs.
+- `clients.py`: Defines the language model and vector store clients.
+- `pyproject.toml`: Project configuration and dependencies.
+---
+## Acknowledgments
+- Built with [Gradio](https://gradio.app/).
+- Powered by [LangChain](https://langchain.com/) and [Qdrant](https://qdrant.tech/).

agent.py ADDED Viewed

	@@ -0,0 +1,210 @@

+from typing import List, Literal, Optional, TypedDict
+from langchain_core.documents import Document
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import END, START, StateGraph
+from pydantic import BaseModel, Field
+from qdrant_client.http.models import (
+    FieldCondition,
+    Filter,
+    MatchValue,
+)
+from clients import LLM, VECTOR_STORE
+class RetrievalState(TypedDict):
+    """State for the agentic retrieval graph."""
+    original_query: str
+    current_query: str
+    category: Optional[str]
+    topic: Optional[str]
+    documents: List[Document]
+    relevant_documents: List[Document]
+    generation: str
+    retry_count: int
+    max_retries: int
+class GradeDocuments(BaseModel):
+    """Grade whether a document is relevant to the query."""
+    is_relevant: Literal["yes", "no"] = Field(
+        description="Is the document relevant to the query? 'yes' or 'no'"
+    )
+    reason: str = Field(description="Brief reason for the relevance decision")
+def retrieve_documents(state: RetrievalState) -> RetrievalState:
+    """Retrieve documents from vector store."""
+    query = state["current_query"]
+    category = state.get("category")
+    topic = state.get("topic")
+    # Build Qdrant filter
+    conditions = []
+    if category:
+        conditions.append(
+            FieldCondition(key="metadata.category", match=MatchValue(value=category))
+        )
+    if topic:
+        conditions.append(
+            FieldCondition(key="metadata.topic", match=MatchValue(value=topic))
+        )
+    qdrant_filter = Filter(must=conditions) if conditions else None
+    documents = VECTOR_STORE.similarity_search(
+        query,
+        k=5,
+        filter=qdrant_filter,
+    )
+    return {**state, "documents": documents}
+def grade_documents(state: RetrievalState) -> RetrievalState:
+    """Grade documents for relevance using LLM."""
+    query = state["original_query"]
+    documents = state["documents"]
+    if not documents:
+        return {**state, "relevant_documents": []}
+    # Create grader with structured output
+    grader_llm = LLM.with_structured_output(GradeDocuments)
+    grading_prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a grader assessing relevance of a retrieved document to a user query.
+If the document contains keywords or semantic meaning related to the query, grade it as relevant.
+Be lenient - even partial relevance should be marked as 'yes'.
+Only mark 'no' if the document is completely unrelated.""",
+            ),
+            (
+                "human",
+                """Query: {query}
+Document content: {document}
+Is this document relevant to the query?""",
+            ),
+        ]
+    )
+    relevant_docs = []
+    for doc in documents:
+        try:
+            result = grader_llm.invoke(
+                grading_prompt.format_messages(
+                    query=query,
+                    document=doc.page_content[:1000],  # Limit content length
+                )
+            )
+            if result.is_relevant == "yes":
+                relevant_docs.append(doc)
+        except Exception:
+            # If grading fails, include the document (fail-safe)
+            relevant_docs.append(doc)
+    return {**state, "relevant_documents": relevant_docs}
+def rewrite_query(state: RetrievalState) -> RetrievalState:
+    """Rewrite the query for better retrieval."""
+    original_query = state["original_query"]
+    retry_count = state["retry_count"]
+    rewrite_prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are an expert at reformulating search queries.
+Given the original query, generate a better search query that might retrieve more relevant documents.
+Focus on:
+- Extracting key concepts and entities
+- Using synonyms or related terms
+- Being more specific or more general as appropriate
+Return ONLY the rewritten query, nothing else.""",
+            ),
+            ("human", "Original query: {query}\n\nRewritten query:"),
+        ]
+    )
+    response = LLM.invoke(rewrite_prompt.format_messages(query=original_query))
+    new_query = response.content.strip()
+    return {
+        **state,
+        "current_query": new_query,
+        "retry_count": retry_count + 1,
+    }
+def generate_response(state: RetrievalState) -> RetrievalState:
+    """Generate final response from relevant documents."""
+    relevant_docs = state["relevant_documents"]
+    if not relevant_docs:
+        return {**state, "generation": "No relevant memories found."}
+    # Format documents
+    formatted = []
+    for i, doc in enumerate(relevant_docs, 1):
+        meta = doc.metadata
+        formatted.append(
+            f"{i}. [{meta.get('category', 'N/A')}/{meta.get('topic', 'N/A')}]: {doc.page_content}"
+        )
+    return {**state, "generation": "\n".join(formatted)}
+def should_retry(state: RetrievalState) -> Literal["rewrite", "generate"]:
+    """Decide whether to retry with a rewritten query."""
+    relevant_docs = state["relevant_documents"]
+    retry_count = state["retry_count"]
+    max_retries = state["max_retries"]
+    # If we have relevant docs, generate response
+    if relevant_docs:
+        return "generate"
+    # If no relevant docs and we can still retry, rewrite query
+    if retry_count < max_retries:
+        return "rewrite"
+    # Max retries reached, generate (empty) response
+    return "generate"
+def build_retrieval_graph():
+    workflow = StateGraph(RetrievalState)
+    # Add nodes
+    workflow.add_node("retrieve", retrieve_documents)
+    workflow.add_node("grade", grade_documents)
+    workflow.add_node("rewrite", rewrite_query)
+    workflow.add_node("generate", generate_response)
+    # Add edges
+    workflow.add_edge(START, "retrieve")
+    workflow.add_edge("retrieve", "grade")
+    workflow.add_conditional_edges(
+        "grade",
+        should_retry,
+        {
+            "rewrite": "rewrite",
+            "generate": "generate",
+        },
+    )
+    workflow.add_edge("rewrite", "retrieve")
+    workflow.add_edge("generate", END)
+    return workflow.compile()

app.py CHANGED Viewed

@@ -1,7 +1,23 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from utility import chat, populate_memory, search_memory
+with gr.Blocks(title="Central Memory") as app:
+    gr.ChatInterface(
+        fn=chat,
+        title="Central Memory ChatBot",
+        examples=[
+            "Remember that my favorite color is blue",
+            "Store this: I'm learning Rust to make an OS",
+            "Search the memorie about learning rust",
+        ],
+        api_visibility="private",
+    )
+    gr.api(populate_memory.func)
+    gr.api(search_memory.func)
+    gr.Markdown("""---
+**Note:** `search_memory` using agentic retrieval. This application exposes all tools as MCP endpoints.
+Connect your MCP client to this server to access the `populate_memory` and `search_memory` tools.
+""")
+app.launch(mcp_server=True, share=False, theme=gr.themes.Soft())

clients.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_qdrant import QdrantVectorStore
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import (
+    Distance,
+    VectorParams,
+)
+from config import (
+    COLLECTION_NAME,
+    OPENAI_API_KEY,
+    OPENAI_BASE_URL,
+    QDRANT_API_KEY,
+    QDRANT_URL,
+)
+EMBEDDING = OpenAIEmbeddings(
+    openai_api_key=OPENAI_API_KEY,
+    openai_api_base=OPENAI_BASE_URL,
+    model="Qwen/Qwen3-Embedding-8B",
+    check_embedding_ctx_length=False,
+)
+QDRANT_CLIENT = QdrantClient(
+    url=QDRANT_URL,
+    api_key=QDRANT_API_KEY,
+    port=443,
+    https=True,
+)
+if not QDRANT_CLIENT.collection_exists(COLLECTION_NAME):
+    QDRANT_CLIENT.create_collection(
+        collection_name=COLLECTION_NAME,
+        vectors_config=VectorParams(
+            size=4096,
+            distance=Distance.COSINE,
+        ),
+    )
+VECTOR_STORE = QdrantVectorStore(
+    client=QDRANT_CLIENT,
+    collection_name=COLLECTION_NAME,
+    embedding=EMBEDDING,
+)
+LLM = ChatOpenAI(
+    openai_api_key=OPENAI_API_KEY,
+    openai_api_base=OPENAI_BASE_URL,
+    model="openai/gpt-oss-120b",
+    temperature=0.3,
+    streaming=True,
+)

config.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
+QDRANT_URL = os.getenv("QDRANT_URL")
+QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+COLLECTION_NAME = "memories"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "central-memory"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "gradio[mcp]>=6.0.1",
+    "langchain-openai>=1.1.0",
+    "langchain-qdrant>=1.1.0",
+    "langgraph>=1.0.4",
+    "mcp>=1.22.0",
+    "python-dotenv>=1.2.1",
+    "qdrant-client>=1.16.1",
+]

utility.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from typing import Generator, Optional
+from langchain_core.documents import Document
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, SystemMessage
+from langchain_core.tools import tool
+from agent import RetrievalState, build_retrieval_graph
+from clients import LLM, VECTOR_STORE
+@tool
+def populate_memory(
+    content: str,
+    category: str,
+    topic: str,
+) -> str:
+    """Add content with metadata to the memory for later retrieval. Use this to store important information the user wants to remember.
+    Args:
+        content: The content to store in memory
+        category: Category of the memory (e.g., 'personal', 'work', 'learning')
+        topic: Specific topic of the memory
+    """
+    VECTOR_STORE.add_documents(
+        documents=[
+            Document(
+                page_content=content, metadata={"category": category, "topic": topic}
+            )
+        ]
+    )
+    return f"Successfully stored memory about '{topic}' in category '{category}'"
+@tool
+def search_memory(
+    query: str,
+    category: Optional[str] = None,
+    topic: Optional[str] = None,
+) -> str:
+    """Search and retrieve relevant information from memory using intelligent agentic retrieval.
+    This tool uses advanced retrieval with:
+    - Document relevance grading
+    - Automatic query rewriting if no relevant results found
+    - Self-correction with retry logic
+    Args:
+        query: The search query to find relevant memories
+        category: Optional category filter
+        topic: Optional topic filter
+    """
+    try:
+        initial_state: RetrievalState = {
+            "original_query": query,
+            "current_query": query,
+            "category": category,
+            "topic": topic,
+            "documents": [],
+            "relevant_documents": [],
+            "generation": "",
+            "retry_count": 0,
+            "max_retries": 2,  # Allow up to 2 query rewrites
+        }
+        final_state = _get_retrieval_agent().invoke(initial_state)
+        result = final_state["generation"]
+        return result
+    except Exception as e:
+        error_msg = f"Error in search_memory: {str(e)}"
+        print(f"DEBUG: {error_msg}")
+        return error_msg
+# Create tools list and bound LLM
+TOOLS = [search_memory, populate_memory]
+CHAT_LLM = LLM.bind_tools(TOOLS)
+# Lazy initialization to avoid circular imports
+_retrieval_agent = None
+def _get_retrieval_agent():
+    global _retrieval_agent
+    if _retrieval_agent is None:
+        _retrieval_agent = build_retrieval_graph()
+    return _retrieval_agent
+def chat(
+    message: str,
+    history: list[dict],
+) -> Generator[str, None, None]:
+    messages = [
+        SystemMessage(content="Whenever the user asks you a question, you must always use the search_memory tool first to look for relevant information in your memory. If you find relevant information, use it to answer the user's question. if you don't find any relevant information, answer the question to the best of your ability.")
+    ]
+    for msg in history:
+        if msg["role"] == "user":
+            messages.append(HumanMessage(content=msg["content"]))
+        elif msg["role"] == "assistant":
+            messages.append(AIMessage(content=msg["content"]))
+    messages.append(HumanMessage(content=message))
+    max_iterations = 10
+    iteration = 0
+    while iteration < max_iterations:
+        iteration += 1
+        response = CHAT_LLM.invoke(messages)
+        messages.append(response)
+        if not response.tool_calls:
+            if response.content:
+                yield response.content
+            else:
+                yield "Done!"
+            return
+        tool_map = {t.name: t for t in TOOLS}
+        for tool_call in response.tool_calls:
+            tool_name = tool_call["name"]
+            tool_args = tool_call["args"]
+            yield f"🔧 Using {tool_name}..."
+            if tool_name in tool_map:
+                try:
+                    result = tool_map[tool_name].invoke(tool_args)
+                except Exception as e:
+                    result = f"Error: {str(e)}"
+            else:
+                result = f"Unknown tool: {tool_name}"
+            messages.append(
+                ToolMessage(
+                    content=str(result),
+                    tool_call_id=tool_call["id"],
+                )
+            )
+    yield "I processed your request but couldn't generate a final response."