talk2data

Paused

App Files Files Community

Selcan Yukcu commited on Apr 14, 2025

Commit

0e5cf1e

1 Parent(s): a3f399c

feat: intent classification, intent base prompt choice

Browse files

Files changed (4) hide show

conversation_memory.py +3 -2
postgre_mcp_client.py +31 -14
postgre_mcp_server.py +118 -5
utils.py +23 -0

conversation_memory.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
 class ConversationMemory:
     def __init__(self):
         self.history = []  # All parsed steps from all requests
@@ -36,7 +36,8 @@ class ConversationMemory:
     def get_all_user_messages(self):
         return list(set(self.user_messages))
-    def reset(self):
         self.__init__()  # Re-initialize the object
     def summary(self):

 import json
+import os
 class ConversationMemory:
     def __init__(self):
         self.history = []  # All parsed steps from all requests
     def get_all_user_messages(self):
         return list(set(self.user_messages))
+    def reset(self, path = "memory.json"):
+        os.remove(path)
         self.__init__()  # Re-initialize the object
     def summary(self):

postgre_mcp_client.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langgraph.prebuilt import create_react_agent
 from langchain.chat_models import init_chat_model
 from conversation_memory import ConversationMemory
-from utils import parse_mcp_output
 llm = init_chat_model(model="gemini-2.0-flash-lite", model_provider="google_genai",api_key ="AIzaSyAuxYmci0DVU5l5L_YcxLlxHzR5MLn70js")
@@ -24,6 +24,7 @@ The posts table represents content created by users, such as blog posts or messa
 request = "can you show me the result of the join of all tables?"
 request2 = "how many columns are there in this joined table?"
 async def main():
     async with stdio_client(server_params) as (read, write):
         async with ClientSession(read, write) as session:
@@ -43,22 +44,38 @@ async def main():
             past_results = memory.get_last_n_results()
             past_requests = memory.get_all_user_messages()
-            uri = f"resource://base_prompt_table"
-            resource = await session.read_resource(uri)
-            base_prompt = resource.contents[0].text
-            # Create a formatted string of tools
-            tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
-            prompt = base_prompt.format(
-                user_requests=past_requests,
-                past_tools=past_tools,
-                last_queries=past_queries,
-                last_results=past_results,
-                new_request = request2,
-                tools = tools_str
-            )

 from langchain.chat_models import init_chat_model
 from conversation_memory import ConversationMemory
+from utils import parse_mcp_output, classify_intent
 llm = init_chat_model(model="gemini-2.0-flash-lite", model_provider="google_genai",api_key ="AIzaSyAuxYmci0DVU5l5L_YcxLlxHzR5MLn70js")
 request = "can you show me the result of the join of all tables?"
 request2 = "how many columns are there in this joined table?"
+request3 = "send the last table"
 async def main():
     async with stdio_client(server_params) as (read, write):
         async with ClientSession(read, write) as session:
             past_results = memory.get_last_n_results()
             past_requests = memory.get_all_user_messages()
+            intent = classify_intent(request)
+            if intent == "superset_request":
+                uri = f"resource://last_prompt"
+                resource = await session.read_resource(uri)
+                base_prompt = resource.contents[0].text
+                prompt = base_prompt.format(
+                    user_requests=past_requests,
+                    past_tools=past_tools,
+                    last_queries=past_queries,
+                    last_results=past_results,
+                    new_request=request3
+                )
+            else:
+                uri = f"resource://base_prompt"
+                resource = await session.read_resource(uri)
+                base_prompt = resource.contents[0].text
+                # Create a formatted string of tools
+                tools_str = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])
+                prompt = base_prompt.format(
+                    user_requests=past_requests,
+                    past_tools=past_tools,
+                    last_queries=past_queries,
+                    last_results=past_results,
+                    new_request = request2,
+                    tools = tools_str
+                )

postgre_mcp_server.py CHANGED Viewed

@@ -50,12 +50,12 @@ mcp = FastMCP(
 @mcp.resource(
-    uri="resource://base_prompt_table",
-    name="base_prompt_table",
-    description="A base prompt to generate description of a table"
 )
-async def base_prompt_table() -> str:
-    """Returns a base prompt to generate description of a table"""
     base_prompt = """
@@ -181,6 +181,15 @@ async def base_prompt_table() -> str:
                        ```sql
                        SELECT name FROM customers WHERE country = 'Germany';
                     ### Invalid Example — DELETE Operation (Not Allowed):
                     **User Request:** "Delete all customers from Germany."
@@ -204,6 +213,110 @@ async def base_prompt_table() -> str:
     return base_prompt
 @mcp.tool(description="tests the database connection and returns the PostgreSQL version or an error message.")
 async def test_connection(ctx: Context) -> str:
     """Test database connection"""

 @mcp.resource(
+    uri="resource://base_prompt",
+    name="base_prompt",
+    description="A base prompt to generate SQL queries and answer questions"
 )
+async def base_prompt_query() -> str:
+    """Returns a base prompt to generate sql queries and answer questions"""
     base_prompt = """
                        ```sql
                        SELECT name FROM customers WHERE country = 'Germany';
+                    ## Example 6 — Basic Aggregation**
+                    **User Request:** "Get total sales for each product"
+                    **Steps:**
+                    1.  Use memory or List Tables → Get schema for `sales_data`
+                    2. Generate and execute query:
+                    ```sql
+                    SELECT product_name, SUM(total_sales) FROM sales_data GROUP BY product_name;
                     ### Invalid Example — DELETE Operation (Not Allowed):
                     **User Request:** "Delete all customers from Germany."
     return base_prompt
+@mcp.resource(
+    uri="resource://last_prompt",
+    name="last_prompt",
+    description="A prompt that identifies the most recent SQL query related to the user's request and reformats it into ANSI SQL syntax for use in Superset."
+)
+async def last_prompt() -> str:
+    """A prompt that identifies the most recent SQL query related to the user's request and reformats it into ANSI SQL syntax for use in Superset."""
+    base_prompt = """
+==========================
+# Your Role
+==========================
+You are an expert at reading and understanding SQL queries.
+Your task is to retrieve the **exact SQL query** that produced a previously seen result, convert the query to the **ANSI SQL query** and return **only the ANSI SQL query** — no explanation, reasoning, or commentary.
+You have access to a **short-term memory**, which stores relevant context from earlier interactions in the current conversation.
+---
+==========================
+# Your Objective
+==========================
+When a user submits a request (e.g., *"send me that table"*, *"send the last query"*, etc.), follow these steps:
+1. Identify which previous result the user is referring to, using your short-term memory.
+2. Retrieve the corresponding SQL query that produced that result.
+3. Convert the SQL query to the ANSI SQL query
+3. Return **only** that ANSI SQL query.
+---
+==========================
+# Critical Rules
+==========================
+- Do **not** ask questions or request clarification.
+- Do **not** explain anything to the user.
+- Only use the **memory** to determine which query is relevant.
+- Respond with the **exact ANSI SQL query only**, formatted cleanly.
+- Do **not** guess — only retrieve queries that actually exist in memory.
+- If no query fits, respond with: "Query not found."
+---
+==========================
+# Short-Term Memory
+==========================
+You have access to the following memory from this conversation:
+- **Previous user requests**:
+  `{user_requests}`
+- **Tools used so far**:
+  `{past_tools}`
+- **Recent SQL queries**:
+  `{last_queries}`
+- **Result preview from last query**:
+  `{last_results}`
+Use this memory to resolve any references in the user's latest request.
+---
+==========================
+# Examples
+==========================
+### Example 1 — Referring last query (Check the memory and find the most recent query that generates a table) :
+**User Request:** "send the last table"
+**You return:**
+```sql
+SELECT * FROM posts INNER JOIN users ON posts.user_id = users.id;
+### Example 2 — Referring to a specific query (check the memory and find the query that returns the count of users):
+**User Request:** "send the query that gave us the count of users"
+**You return:**
+```sql
+SELECT COUNT(*) FROM users;
+### Example 3 — Referring latest known query (Check the memory and find the most recent query.)
+**User Request:** "send"
+**You return:**
+(latest known query):
+```sql
+SELECT * FROM posts WHERE user_id = 1;
+Remember: Only respond with valid SQL from memory converted to the ANSI SQL. No assumptions. No explanations.
+=========================
+# New User Request
+=========================
+{new_request}
+                        """
+    return base_prompt
 @mcp.tool(description="tests the database connection and returns the PostgreSQL version or an error message.")
 async def test_connection(ctx: Context) -> str:
     """Test database connection"""

utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
 def parse_mcp_output(output_dict):
     result = []
     messages = output_dict.get("messages", [])
@@ -72,3 +75,23 @@ def parse_mcp_output(output_dict):
     return result, query_store

+import re
 def parse_mcp_output(output_dict):
     result = []
     messages = output_dict.get("messages", [])
     return result, query_store
+def classify_intent(user_input: str) -> str:
+    user_input = user_input.lower().strip()
+    superset_keywords = [
+        "send to superset", "chart", "visualize", "visualise",
+        "plot", "graph", "send this", "send that", "create a chart",
+        "push to superset", "make a chart", "show chart", "dashboard", "send"
+    ]
+    # Check for superset intent
+    if any(kw in user_input for kw in superset_keywords):
+        return "superset_request"
+    # Fallback
+    return "sql_request"