talk2data

Paused

App Files Files Community

Selcan Yukcu commited on Apr 22, 2025

Commit

6a5afc3

2 Parent(s): 0d6f96c 0fe55ab

Merge remote-tracking branch 'origin/main' into selcan_test

Browse files

# Conflicts:
# .env.sample
# gradio_app.py
# langchain_mcp_client.py

Files changed (7) hide show

.env.sample +2 -3
.gitignore +124 -1
chat_history.json +1 -0
gradio_app.py +3 -3
langchain_mcp_client.py +104 -82
postgre_smolagent_client.py +4 -1
run.sh +1 -1

.env.sample CHANGED Viewed

@@ -1,3 +1,2 @@
-API_KEY = ""
-DSN =
-SCHEMA =


1	+ API_KEY=
2	+ MCP_SERVER_PATH=/home/andre/andre/postgre_mcp_server.py

.gitignore CHANGED Viewed

@@ -1,2 +1,125 @@
 .idea
-.env

 .idea
+.env
+.vscode
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.coveragerc
+*.pylintrc
+# Translations
+*.mo
+*.pot
+# Django stuff
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff
+instance/
+.webassets-cache
+# Scrapy stuff
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery
+celerybeat-schedule
+celerybeat.pid
+# SageMath
+*.sage.py
+# Environments
+.env/
+.venv/
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/

chat_history.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "# Result\nThe tables in the database are:\n* dim\\_agreement\n* dim\\_customer\n* dim\\_product\n* dim\\_product\\_order\\_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my previous request ?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "# Result\nlist all tables\n# Explanation\nThe user's previous request was to list all the tables in the database. This was determined by reviewing the previous turn in the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "count tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\n4\n# Explanation\nThe user requested the number of tables. The previous response listed the tables, and counting them gives the answer.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my last message ?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\ncount tables\n# Explanation\nThe user's last message was \"count tables\". This was determined by reviewing the previous turn in the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my very first message in this conversation?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nlist all tables\n# Explanation\nThe user's very first message in this conversation was \"list all tables\". This was determined by reviewing the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "total number of sales in 2024", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nThe tables in the database are:\n* dim\\_agreement\n* dim\\_customer\n* dim\\_product\n* dim\\_product\\_order\\_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nThe tables in the database are:\n* dim_agreement\n* dim_customer\n* dim_product\n* dim_product_order_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}]

gradio_app.py CHANGED Viewed

@@ -19,13 +19,13 @@ def load_db_configs():
     return configs["db_configs"]
 # Async-compatible wrapper
-async def run_agent(request):
     # configs = load_db_configs()
     # final_answer, last_tool_answer, = await pg_mcp_exec(request)
     # return final_answer, last_tool_answer
-    result = await lc_mcp_exec(request)
-    return result
 # Gradio UI
 demo = gr.Interface(

     return configs["db_configs"]
 # Async-compatible wrapper
+async def run_agent(request, history):
     # configs = load_db_configs()
     # final_answer, last_tool_answer, = await pg_mcp_exec(request)
     # return final_answer, last_tool_answer
+    response, message = await lc_mcp_exec(request, history)
+    return response
 # Gradio UI
 demo = gr.Interface(

langchain_mcp_client.py CHANGED Viewed

@@ -5,66 +5,112 @@ from mcp import ClientSession, StdioServerParameters
 from mcp.client.stdio import stdio_client
 from langchain_mcp_adapters.tools import load_mcp_tools
 from langgraph.prebuilt import create_react_agent
 from langchain.chat_models import init_chat_model
 from utils import parse_mcp_output, classify_intent
-from langchain.memory import ConversationBufferMemory
-from langchain_core.messages import AIMessage, HumanMessage
-import asyncio
 import logging
-import json
 from dotenv import load_dotenv
 load_dotenv()
 logger = logging.getLogger(__name__)
 async def lc_mcp_exec(request: str) -> tuple[Any, Any]:
     """
-    Execute the full PostgreSQL MCP pipeline: load summary, connect session,
-    load memory and tools, build prompt, run agent, update memory.
-    Args:
-        request (str): User's request input.
-        llm (Any): Language model for reasoning agent.
-    Returns:
-        str: Agent response message.
-    """
-    # TODO: give summary file path from config
-    table_summary = load_table_summary("table_summary.txt")
-    server_params = get_server_params()
-    api_key = os.getenv("API_KEY")
-    llm = init_chat_model(
-        model="gemini-2.0-flash",
-        model_provider="google_genai",
-        api_key=api_key,
-        temperature=0.5,
-    )
-    async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write) as session:
-            await session.initialize()
-            memory =  load_or_create_memory()
-            tools = await load_and_enrich_tools(session, table_summary)
-            past_data = get_memory_snapshot(memory)
-            intent = classify_intent(request)
-            prompt = await build_prompt(session, intent, request, tools, table_summary,  past_data)
-            agent = create_react_agent(llm, tools)
-            agent_response = await agent.ainvoke({"messages": prompt})
-            parsed_steps, final_answer, last_tool_answer, _ = parse_mcp_output(agent_response)
-            # Add memory update before return
-            memory.chat_memory.add_message(HumanMessage(content=request))
-            memory.chat_memory.add_message(AIMessage(content=final_answer))
-            await handle_memory_save_or_reset(memory, request)
-            return  final_answer, last_tool_answer
 # ---------------- Helper Functions ---------------- #
@@ -77,40 +123,18 @@ def get_server_params() -> StdioServerParameters:
     # TODO: give server params from config
     return StdioServerParameters(
         command="python",
-        args=[r"C:\Users\yukcus\Desktop\query_mcp_server\postgre_mcp_server.py"],
     )
-def load_or_create_memory() -> ConversationBufferMemory:
-    memory = ConversationBufferMemory(return_messages=True)
-    # You can optionally load from a file or a store if needed
-    if os.path.exists("memory.json"):
-        try:
-            with open("memory.json", "r") as f:
-                history = json.load(f)
-                for msg in history:
-                    if msg["type"] == "human":
-                        memory.chat_memory.add_message(HumanMessage(content=msg["content"]))
-                    elif msg["type"] == "ai":
-                        memory.chat_memory.add_message(AIMessage(content=msg["content"]))
-        except Exception as e:
-            logger.warning(f"Failed to load memory: {e}")
-    return memory
-async def load_and_enrich_tools(session: ClientSession, summary: str):
     tools = await load_mcp_tools(session)
     return tools
-def get_memory_snapshot(memory: ConversationBufferMemory) -> dict:
-    return {
-        "chat_history": "\n".join([f"{m.type}: {m.content}" for m in memory.chat_memory.messages])
-    }
-async def build_prompt(session, intent, request, tools, summary, history):
     superset_prompt = await session.read_resource("resource://last_prompt")
     conversation_prompt = await session.read_resource("resource://base_prompt")
     if intent == "superset_request":
         template = superset_prompt.contents[0].text
         return template.format(
@@ -119,27 +143,25 @@ async def build_prompt(session, intent, request, tools, summary, history):
     else:
         template = conversation_prompt.contents[0].text
         tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
         return template.format(
             new_request=request,
             tools=tools_str,
             descriptions=summary,
-            chat_history = history
         )
-async def handle_memory_save_or_reset(memory: ConversationBufferMemory, request: str):
-    if request.strip().lower() == "stop":
-        memory.clear()
-        if os.path.exists("memory.json"):
-            os.remove("memory.json")
-        logger.info("Conversation memory reset.")
-    else:
-        history = []
-        for msg in memory.chat_memory.messages:
-            if isinstance(msg, HumanMessage):
-                history.append({"type": "human", "content": msg.content})
-            elif isinstance(msg, AIMessage):
-                history.append({"type": "ai", "content": msg.content})
-        with open("memory.json", "w") as f:
-            json.dump(history, f, indent=2)
-        logger.info("Conversation memory saved.")

 from mcp.client.stdio import stdio_client
 from langchain_mcp_adapters.tools import load_mcp_tools
 from langgraph.prebuilt import create_react_agent
+from langchain_core.prompts import PromptTemplate
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain.memory import ChatMessageHistory
+from langchain_community.chat_message_histories import FileChatMessageHistory
 from langchain.chat_models import init_chat_model
 from utils import parse_mcp_output, classify_intent
 import logging
 from dotenv import load_dotenv
 load_dotenv()
 logger = logging.getLogger(__name__)
 async def lc_mcp_exec(request: str) -> tuple[Any, Any]:
     """
+    Execute the full PostgreSQL MCP pipeline with persistent memory.
+    """
+    try:
+        history_file = os.path.join(os.path.dirname(__file__), "chat_history.json")
+        # Initialize chat history file if it doesn't exist or is empty
+        if not os.path.exists(history_file) or os.path.getsize(history_file) == 0:
+            with open(history_file, 'w') as f:
+                json.dump({"messages": []}, f)
+        message_history = FileChatMessageHistory(file_path=history_file)
+        try:
+            # Load existing messages or handle bootstrap scenario
+            existing_messages = message_history.messages
+        except json.JSONDecodeError:
+            # If JSON is corrupted, reinitialize the file
+            logger.warning("Chat history file corrupted, reinitializing...")
+            with open(history_file, 'w') as f:
+                json.dump({"messages": []}, f)
+            existing_messages = []
+        # Format existing messages properly
+        formatted_history = []
+        for msg in existing_messages:
+            if isinstance(msg, HumanMessage):
+                formatted_history.append(HumanMessage(content=msg.content))
+            elif isinstance(msg, AIMessage):
+                formatted_history.append(AIMessage(content=msg.content))
+        # TODO: give summary file path from config
+        table_summary = load_table_summary("table_summary.txt")
+        server_params = get_server_params()
+        api_key = os.getenv("API_KEY")
+        llm = init_chat_model(model="gemini-2.0-flash", model_provider="google_genai",
+                              api_key=api_key)
+        async with stdio_client(server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                tools = await load_and_enrich_tools(session)
+                intent = classify_intent(request)
+                # Add new user message before processing
+                message_history.add_user_message(request)
+                # Create agent and prepare system message
+                agent = create_react_agent(llm, tools)
+                # Create base messages list with system message
+                base_message = HumanMessage(content="""You are a PostgreSQL database expert assistant.
+                    Use the conversation history for context when available.""")
+                messages = [base_message]
+                # Add history if exists
+                if formatted_history:
+                    messages.extend(formatted_history)
+                # Add current request
+                messages.append(HumanMessage(content=request))
+                # Build prompt with conversation context
+                prompt = await build_prompt(session, intent, request, tools, table_summary, formatted_history)
+                # Invoke agent with proper message structure
+                agent_response = await agent.ainvoke(
+                    {
+                        "messages": prompt,
+                        "chat_history": [msg.content for msg in formatted_history]
+                    },
+                    config={"configurable": {"thread_id": "conversation_123"}}
+                )
+                if "messages" in agent_response:
+                    response = agent_response["messages"][-1].content
+                    # Save assistant response
+                    message_history.add_ai_message(response)
+                else:
+                    response = "No response generated"
+                    message_history.add_ai_message(response)
+                # Return current response and up-to-date messages
+                return response, message_history.messages
+    except Exception as e:
+        logger.error(f"Error in chat history handling: {str(e)}", exc_info=True)
+        # Fallback to stateless response if history fails
+        return f"Error in conversation: {str(e)}", []
 # ---------------- Helper Functions ---------------- #
     # TODO: give server params from config
     return StdioServerParameters(
         command="python",
+        args=[os.environ["MCP_SERVER_PATH"]],
     )
+async def load_and_enrich_tools(session: ClientSession):
     tools = await load_mcp_tools(session)
     return tools
+async def build_prompt(session, intent, request, tools, summary, chat_history):
     superset_prompt = await session.read_resource("resource://last_prompt")
     conversation_prompt = await session.read_resource("resource://base_prompt")
     if intent == "superset_request":
         template = superset_prompt.contents[0].text
         return template.format(
     else:
         template = conversation_prompt.contents[0].text
         tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
+        # Handle history formatting with proper message access
+        history_str = ""
+        if chat_history:
+            history_sections = []
+            for msg in chat_history:
+                if isinstance(msg, HumanMessage):
+                    history_sections.append(f"Previous Human Question:\n{msg.content}\n")
+                elif isinstance(msg, AIMessage):
+                    history_sections.append(f"Previous Assistant Response:\n{msg.content}\n")
+            history_str = "\n".join(history_sections)
         return template.format(
             new_request=request,
             tools=tools_str,
             descriptions=summary,
+            chat_history=f"\nPrevious Conversation History:\n{history_str}" if history_str else "\nThis is a new conversation.",
+            system_instructions="""You are a PostgreSQL database expert assistant.
+                Use the conversation history when available to maintain context.
+                For new conversations, focus on understanding the initial request."""
         )

postgre_smolagent_client.py CHANGED Viewed

@@ -8,6 +8,8 @@ from conversation_memory import ConversationMemory
 from utils import parse_mcp_output, classify_intent
 import logging
 from smolagents import LiteLLMModel, ToolCollection, CodeAgent
 logger = logging.getLogger(__name__)
@@ -63,9 +65,10 @@ def load_table_summary(path: str) -> str:
 def get_server_params() -> StdioServerParameters:
     # TODO: give server params from config
     return StdioServerParameters(
         command="python",
-        args=[r"/home/amirkia/Desktop/query_mcp_server/postgre_mcp_server.py"],
     )
 async def load_or_create_memory() -> ConversationMemory:

 from utils import parse_mcp_output, classify_intent
 import logging
 from smolagents import LiteLLMModel, ToolCollection, CodeAgent
+from dotenv import load_dotenv
 logger = logging.getLogger(__name__)
 def get_server_params() -> StdioServerParameters:
     # TODO: give server params from config
+    load_dotenv()
     return StdioServerParameters(
         command="python",
+        args=[os.environ["MCP_SERVER_PATH"]],
     )
 async def load_or_create_memory() -> ConversationMemory:

run.sh CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/bin/bash
 # Replace 'myenv' with the name of your conda environment
-conda activate myenv
 python gradio_app.py

 #!/bin/bash
 # Replace 'myenv' with the name of your conda environment
+# conda activate myenv
 python gradio_app.py