talk2data

Paused

App Files Files Community

amirkiarafiei commited on Apr 22, 2025

Commit

1acd5e4

1 Parent(s): 434a9c4

feat: enhance chat history management and update .gitignore

Browse files

Files changed (4) hide show

.gitignore +124 -1
chat_history.json +1 -0
langchain_mcp_client.py +104 -62
run.sh +1 -1

.gitignore CHANGED Viewed

@@ -1,2 +1,125 @@
 .idea
-.env

 .idea
+.env
+.vscode
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+nosetests.xml
+coverage.xml
+*.coveragerc
+*.pylintrc
+# Translations
+*.mo
+*.pot
+# Django stuff
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff
+instance/
+.webassets-cache
+# Scrapy stuff
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# celery
+celerybeat-schedule
+celerybeat.pid
+# SageMath
+*.sage.py
+# Environments
+.env/
+.venv/
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/

chat_history.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "# Result\nThe tables in the database are:\n* dim\\_agreement\n* dim\\_customer\n* dim\\_product\n* dim\\_product\\_order\\_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my previous request ?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "# Result\nlist all tables\n# Explanation\nThe user's previous request was to list all the tables in the database. This was determined by reviewing the previous turn in the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "count tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\n4\n# Explanation\nThe user requested the number of tables. The previous response listed the tables, and counting them gives the answer.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my last message ?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\ncount tables\n# Explanation\nThe user's last message was \"count tables\". This was determined by reviewing the previous turn in the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "what was my very first message in this conversation?", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nlist all tables\n# Explanation\nThe user's very first message in this conversation was \"list all tables\". This was determined by reviewing the conversation history.\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "total number of sales in 2024", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nThe tables in the database are:\n* dim\\_agreement\n* dim\\_customer\n* dim\\_product\n* dim\\_product\\_order\\_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}, {"type": "human", "data": {"content": "list all tables", "additional_kwargs": {}, "response_metadata": {}, "type": "human", "name": null, "id": null, "example": false}}, {"type": "ai", "data": {"content": "```markdown\n# Result\nThe tables in the database are:\n* dim_agreement\n* dim_customer\n* dim_product\n* dim_product_order_item\n\n# Explanation\nThe `list_tables` tool was called to retrieve a list of all available tables in the database schema. The result shows the names of these tables.\n\n# Query\n```sql\nN/A\n```", "additional_kwargs": {}, "response_metadata": {}, "type": "ai", "name": null, "id": null, "example": false, "tool_calls": [], "invalid_tool_calls": [], "usage_metadata": null}}]

langchain_mcp_client.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os.path
 from typing import Tuple, Any
 from mcp import ClientSession, StdioServerParameters
@@ -7,11 +8,12 @@ from langchain_mcp_adapters.tools import load_mcp_tools
 from langgraph.prebuilt import create_react_agent
 from langchain_core.prompts import PromptTemplate
 from langchain_core.messages import AIMessage, HumanMessage
 from langchain.chat_models import init_chat_model
 from utils import parse_mcp_output, classify_intent
 import logging
 from dotenv import load_dotenv
-from langgraph.checkpoint.memory import MemorySaver
@@ -19,71 +21,96 @@ logger = logging.getLogger(__name__)
 load_dotenv()
 async def lc_mcp_exec(request: str, history) -> tuple[Any, Any]:
     """
-    Execute the full PostgreSQL MCP pipeline: load summary, connect session,
-    load memory and tools, build prompt, run agent, update memory.
-    Args:
-        request (str): User's request input.
-        llm (Any): Language model for reasoning agent.
-    Returns:
-        str: Agent response message.
     """
-    # TODO: give summary file path from config
-    table_summary = load_table_summary("table_summary.txt")
-    server_params = get_server_params()
-    api_key = os.getenv("API_KEY")
-    llm = init_chat_model(model="gemini-2.0-flash", model_provider="google_genai",
-                          api_key=api_key)
-    async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write) as session:
-            await session.initialize()
-            tools = await load_and_enrich_tools(session)
-            intent = classify_intent(request)
-            messages = []
-            memory = MemorySaver()
-            agent = create_react_agent(llm, tools, checkpointer=memory)
-            messages.append(HumanMessage(content=request))
-            if history:
-                # Clear existing messages and rebuild from history
-                messages = []
-                # Process Gradio chat history format
-                for msg in history:
-                    # Gradio format: {'role': 'user/assistant', 'metadata': None, 'content': 'message', 'options': None}
-                    role = msg.get('role', '')
-                    content = msg.get('content', '')
-                    if role == 'user' and content:
-                        messages.append(HumanMessage(content=content))
-                    elif role == 'assistant' and content:
-                        messages.append(AIMessage(content=content))
-                # Add the current query
                 messages.append(HumanMessage(content=request))
-            prompt = await build_prompt(session, intent, request, tools, table_summary, messages)
-            config = {"configurable": {"thread_id": "conversation_123"}}
-            agent_response = await agent.ainvoke(
-                {"messages": prompt},
-                config
-            )
-            if "messages" in agent_response:
-                response = agent_response["messages"][-1].content
-            else:
-                response = "No response generated"
-            messages.append(AIMessage(content=response))
-            return  response, messages
 # ---------------- Helper Functions ---------------- #
@@ -104,10 +131,10 @@ async def load_and_enrich_tools(session: ClientSession):
     tools = await load_mcp_tools(session)
     return tools
-async def build_prompt(session, intent, request, tools, summary, messages):
     superset_prompt = await session.read_resource("resource://last_prompt")
     conversation_prompt = await session.read_resource("resource://base_prompt")
-    # TODO: add uri's from config
     if intent == "superset_request":
         template = superset_prompt.contents[0].text
         return template.format(
@@ -116,10 +143,25 @@ async def build_prompt(session, intent, request, tools, summary, messages):
     else:
         template = conversation_prompt.contents[0].text
         tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
         return template.format(
             new_request=request,
             tools=tools_str,
             descriptions=summary,
-            chat_history = messages
         )

 import os.path
+import json
 from typing import Tuple, Any
 from mcp import ClientSession, StdioServerParameters
 from langgraph.prebuilt import create_react_agent
 from langchain_core.prompts import PromptTemplate
 from langchain_core.messages import AIMessage, HumanMessage
+from langchain.memory import ChatMessageHistory
+from langchain_community.chat_message_histories import FileChatMessageHistory
 from langchain.chat_models import init_chat_model
 from utils import parse_mcp_output, classify_intent
 import logging
 from dotenv import load_dotenv
 load_dotenv()
 async def lc_mcp_exec(request: str, history) -> tuple[Any, Any]:
     """
+    Execute the full PostgreSQL MCP pipeline with persistent memory.
     """
+    try:
+        history_file = os.path.join(os.path.dirname(__file__), "chat_history.json")
+        # Initialize chat history file if it doesn't exist or is empty
+        if not os.path.exists(history_file) or os.path.getsize(history_file) == 0:
+            with open(history_file, 'w') as f:
+                json.dump({"messages": []}, f)
+        message_history = FileChatMessageHistory(file_path=history_file)
+        try:
+            # Load existing messages or handle bootstrap scenario
+            existing_messages = message_history.messages
+        except json.JSONDecodeError:
+            # If JSON is corrupted, reinitialize the file
+            logger.warning("Chat history file corrupted, reinitializing...")
+            with open(history_file, 'w') as f:
+                json.dump({"messages": []}, f)
+            existing_messages = []
+        # Format existing messages properly
+        formatted_history = []
+        for msg in existing_messages:
+            if isinstance(msg, HumanMessage):
+                formatted_history.append(HumanMessage(content=msg.content))
+            elif isinstance(msg, AIMessage):
+                formatted_history.append(AIMessage(content=msg.content))
+        # TODO: give summary file path from config
+        table_summary = load_table_summary("table_summary.txt")
+        server_params = get_server_params()
+        api_key = os.getenv("API_KEY")
+        llm = init_chat_model(model="gemini-2.0-flash", model_provider="google_genai",
+                              api_key=api_key)
+        async with stdio_client(server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                tools = await load_and_enrich_tools(session)
+                intent = classify_intent(request)
+                # Add new user message before processing
+                message_history.add_user_message(request)
+                # Create agent and prepare system message
+                agent = create_react_agent(llm, tools)
+                # Create base messages list with system message
+                base_message = HumanMessage(content="""You are a PostgreSQL database expert assistant.
+                    Use the conversation history for context when available.""")
+                messages = [base_message]
+                # Add history if exists
+                if formatted_history:
+                    messages.extend(formatted_history)
+                # Add current request
                 messages.append(HumanMessage(content=request))
+                # Build prompt with conversation context
+                prompt = await build_prompt(session, intent, request, tools, table_summary, formatted_history)
+                # Invoke agent with proper message structure
+                agent_response = await agent.ainvoke(
+                    {
+                        "messages": prompt,
+                        "chat_history": [msg.content for msg in formatted_history]
+                    },
+                    config={"configurable": {"thread_id": "conversation_123"}}
+                )
+                if "messages" in agent_response:
+                    response = agent_response["messages"][-1].content
+                    # Save assistant response
+                    message_history.add_ai_message(response)
+                else:
+                    response = "No response generated"
+                    message_history.add_ai_message(response)
+                # Return current response and up-to-date messages
+                return response, message_history.messages
+    except Exception as e:
+        logger.error(f"Error in chat history handling: {str(e)}", exc_info=True)
+        # Fallback to stateless response if history fails
+        return f"Error in conversation: {str(e)}", []
 # ---------------- Helper Functions ---------------- #
     tools = await load_mcp_tools(session)
     return tools
+async def build_prompt(session, intent, request, tools, summary, chat_history):
     superset_prompt = await session.read_resource("resource://last_prompt")
     conversation_prompt = await session.read_resource("resource://base_prompt")
     if intent == "superset_request":
         template = superset_prompt.contents[0].text
         return template.format(
     else:
         template = conversation_prompt.contents[0].text
         tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
+        # Handle history formatting with proper message access
+        history_str = ""
+        if chat_history:
+            history_sections = []
+            for msg in chat_history:
+                if isinstance(msg, HumanMessage):
+                    history_sections.append(f"Previous Human Question:\n{msg.content}\n")
+                elif isinstance(msg, AIMessage):
+                    history_sections.append(f"Previous Assistant Response:\n{msg.content}\n")
+            history_str = "\n".join(history_sections)
         return template.format(
             new_request=request,
             tools=tools_str,
             descriptions=summary,
+            chat_history=f"\nPrevious Conversation History:\n{history_str}" if history_str else "\nThis is a new conversation.",
+            system_instructions="""You are a PostgreSQL database expert assistant.
+                Use the conversation history when available to maintain context.
+                For new conversations, focus on understanding the initial request."""
         )

run.sh CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/bin/bash
 # Replace 'myenv' with the name of your conda environment
-conda activate myenv
 python gradio_app.py

 #!/bin/bash
 # Replace 'myenv' with the name of your conda environment
+# conda activate myenv
 python gradio_app.py