Spaces:

ohmygaugh
/

agent-mcp-sql

No application file

App Files Files Community

Timothy Eastridge commited on Sep 23, 2025

Commit

7faf776

1 Parent(s): 84473fd

commit step 6

Browse files

Files changed (5) hide show

agent/main.py +125 -0
agent/requirements.txt +2 -0
docker-compose.yml +2 -0
ops/scripts/seed.py +24 -3
seed_localhost.py +44 -55

agent/main.py CHANGED Viewed

@@ -3,11 +3,23 @@ import time
 import json
 import requests
 from datetime import datetime
 MCP_URL = os.getenv("MCP_URL", "http://mcp:8000/mcp")
 API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
 POLL_INTERVAL = int(os.getenv("AGENT_POLL_INTERVAL", "30"))
 def call_mcp(tool, params=None):
     response = requests.post(
         MCP_URL,
@@ -16,6 +28,28 @@ def call_mcp(tool, params=None):
     )
     return response.json()
 def handle_discover_schema(instruction):
     """Discover PostgreSQL schema and store in Neo4j"""
     print(f"[{datetime.now()}] Discovering PostgreSQL schema...")
@@ -115,6 +149,95 @@ def handle_discover_schema(instruction):
         "columns_discovered": sum(len(cols) for cols in schema.values())
     }
 def main():
     print(f"[{datetime.now()}] Agent starting, polling every {POLL_INTERVAL}s")
@@ -135,6 +258,8 @@ def main():
                 # Handle different instruction types
                 if instruction['type'] == 'discover_schema':
                     exec_result = handle_discover_schema(instruction)
                 else:
                     # Default dummy execution
                     exec_result = {"status": "success", "result": "Dummy execution"}

 import json
 import requests
 from datetime import datetime
+import openai
+from anthropic import Anthropic
 MCP_URL = os.getenv("MCP_URL", "http://mcp:8000/mcp")
 API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
 POLL_INTERVAL = int(os.getenv("AGENT_POLL_INTERVAL", "30"))
+# Configure LLM
+LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4")
+LLM_API_KEY = os.getenv("LLM_API_KEY")
+if "gpt" in LLM_MODEL:
+    openai.api_key = LLM_API_KEY
+    llm_client = None
+else:
+    llm_client = Anthropic(api_key=LLM_API_KEY)
 def call_mcp(tool, params=None):
     response = requests.post(
         MCP_URL,
     )
     return response.json()
+def get_llm_response(prompt):
+    """Get response from configured LLM"""
+    if "gpt" in LLM_MODEL:
+        response = openai.ChatCompletion.create(
+            model=LLM_MODEL,
+            messages=[
+                {"role": "system", "content": "You are a SQL expert. Generate only valid PostgreSQL queries."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0,
+            max_tokens=500
+        )
+        return response.choices[0].message.content
+    else:
+        response = llm_client.messages.create(
+            model=LLM_MODEL,
+            max_tokens=500,
+            temperature=0,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.content[0].text
 def handle_discover_schema(instruction):
     """Discover PostgreSQL schema and store in Neo4j"""
     print(f"[{datetime.now()}] Discovering PostgreSQL schema...")
         "columns_discovered": sum(len(cols) for cols in schema.values())
     }
+def handle_generate_sql(instruction):
+    """Generate SQL from natural language using LLM"""
+    print(f"[{datetime.now()}] Generating SQL from natural language...")
+    # Get the user question from instruction parameters
+    params = json.loads(instruction.get('parameters', '{}'))
+    user_question = params.get('question', 'Show all data')
+    # Fetch schema from Neo4j
+    schema_result = call_mcp("query_graph", {
+        "query": """
+            MATCH (t:Table)-[:HAS_COLUMN]->(c:Column)
+            RETURN t.name as table_name,
+                   collect({
+                       name: c.name,
+                       type: c.data_type,
+                       nullable: c.nullable
+                   }) as columns
+        """
+    })
+    # Format schema for LLM
+    schema_text = "PostgreSQL Schema:\n"
+    for record in schema_result['data']:
+        table = record['table_name']
+        columns = record['columns']
+        schema_text += f"\nTable: {table}\n"
+        for col in columns:
+            nullable = "NULL" if col['nullable'] else "NOT NULL"
+            schema_text += f"  - {col['name']}: {col['type']} {nullable}\n"
+    # Create prompt
+    prompt = f"""Given this PostgreSQL schema:
+{schema_text}
+Generate a SQL query for this question: {user_question}
+Return ONLY the SQL query, no explanations or markdown."""
+    try:
+        # Get SQL from LLM
+        generated_sql = get_llm_response(prompt)
+        # Clean up the SQL (remove markdown if present)
+        generated_sql = generated_sql.strip()
+        if generated_sql.startswith("```"):
+            generated_sql = generated_sql.split("```")[1]
+            if generated_sql.startswith("sql"):
+                generated_sql = generated_sql[3:]
+        generated_sql = generated_sql.strip()
+        print(f"[{datetime.now()}] Generated SQL: {generated_sql}")
+        # Execute the SQL
+        query_result = call_mcp("query_postgres", {"query": generated_sql})
+        if "error" in query_result:
+            return {
+                "status": "failed",
+                "generated_sql": generated_sql,
+                "error": query_result["error"]
+            }
+        # Store successful query as template
+        call_mcp("write_graph", {
+            "action": "create_node",
+            "label": "QueryTemplate",
+            "properties": {
+                "id": f"generated-{int(time.time())}",
+                "query": generated_sql,
+                "question": user_question,
+                "created_at": datetime.now().isoformat()
+            }
+        })
+        return {
+            "status": "success",
+            "generated_sql": generated_sql,
+            "row_count": query_result.get("row_count", 0),
+            "data": query_result.get("data", [])[:10]  # Limit to 10 rows for storage
+        }
+    except Exception as e:
+        return {
+            "status": "failed",
+            "error": str(e)
+        }
 def main():
     print(f"[{datetime.now()}] Agent starting, polling every {POLL_INTERVAL}s")
                 # Handle different instruction types
                 if instruction['type'] == 'discover_schema':
                     exec_result = handle_discover_schema(instruction)
+                elif instruction['type'] == 'generate_sql':
+                    exec_result = handle_generate_sql(instruction)
                 else:
                     # Default dummy execution
                     exec_result = {"status": "success", "result": "Dummy execution"}

agent/requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 requests==2.31.0
 python-dotenv==1.0.0

 requests==2.31.0
 python-dotenv==1.0.0
+openai==0.28.1
+anthropic==0.7.0

docker-compose.yml CHANGED Viewed

@@ -56,6 +56,8 @@ services:
       - MCP_URL=http://mcp:8000/mcp
       - MCP_API_KEY=dev-key-123
       - AGENT_POLL_INTERVAL=${AGENT_POLL_INTERVAL:-30}
     depends_on:
       - mcp
       - neo4j

       - MCP_URL=http://mcp:8000/mcp
       - MCP_API_KEY=dev-key-123
       - AGENT_POLL_INTERVAL=${AGENT_POLL_INTERVAL:-30}
+      - LLM_API_KEY=${LLM_API_KEY}
+      - LLM_MODEL=${LLM_MODEL:-gpt-4}
     depends_on:
       - mcp
       - neo4j

ops/scripts/seed.py CHANGED Viewed

@@ -28,9 +28,30 @@ print(f"Created workflow: {workflow}")
 # Create three instructions
 instructions = [
-    {"id": "inst-1", "sequence": 1, "type": "discover_schema", "status": "pending", "pause_duration": 300},
-    {"id": "inst-2", "sequence": 2, "type": "generate_sql", "status": "pending", "pause_duration": 300},
-    {"id": "inst-3", "sequence": 3, "type": "review_results", "status": "pending", "pause_duration": 300}
 ]
 for inst in instructions:

 # Create three instructions
 instructions = [
+    {
+        "id": "inst-1",
+        "sequence": 1,
+        "type": "discover_schema",
+        "status": "pending",
+        "pause_duration": 300,
+        "parameters": "{}"
+    },
+    {
+        "id": "inst-2",
+        "sequence": 2,
+        "type": "generate_sql",
+        "status": "pending",
+        "pause_duration": 300,
+        "parameters": json.dumps({"question": "Show all customers who have placed orders"})
+    },
+    {
+        "id": "inst-3",
+        "sequence": 3,
+        "type": "review_results",
+        "status": "pending",
+        "pause_duration": 300,
+        "parameters": "{}"
+    }
 ]
 for inst in instructions:

seed_localhost.py CHANGED Viewed

@@ -1,73 +1,62 @@
 import requests
 import json
-MCP_URL = "http://localhost:8000/mcp"
-API_KEY = "dev-key-123"
 def call_mcp(tool, params=None):
     response = requests.post(
-        MCP_URL,
-        headers={"X-API-Key": API_KEY, "Content-Type": "application/json"},
-        json={"tool": tool, "params": params or {}}
     )
     return response.json()
 # Create demo workflow
-workflow = call_mcp("write_graph", {
-    "action": "create_node",
-    "label": "Workflow",
-    "properties": {
-        "id": "demo-workflow-1",
-        "name": "Entity Resolution Demo",
-        "status": "active",
-        "max_iterations": 10,
-        "current_iteration": 0
     }
 })
-print(f"Created workflow: {workflow}")
-# Create three instructions
 instructions = [
-    {"id": "inst-1", "sequence": 1, "type": "discover_schema", "status": "pending", "pause_duration": 300},
-    {"id": "inst-2", "sequence": 2, "type": "generate_sql", "status": "pending", "pause_duration": 300},
-    {"id": "inst-3", "sequence": 3, "type": "review_results", "status": "pending", "pause_duration": 300}
 ]
 for inst in instructions:
-    result = call_mcp("write_graph", {
-        "action": "create_node",
-        "label": "Instruction",
-        "properties": inst
     })
-    print(f"Created instruction: {inst['id']}")
-    # Link to workflow
-    call_mcp("query_graph", {
-        "query": '''
-            MATCH (w:Workflow {id: }), (i:Instruction {id: })
-            CREATE (w)-[:HAS_INSTRUCTION]->(i)
-        ''',
-        "parameters": {"wid": "demo-workflow-1", "iid": inst['id']}
-    })
-# Create instruction chain
-call_mcp("query_graph", {
-    "query": '''
-        MATCH (i1:Instruction {id: 'inst-1'}), (i2:Instruction {id: 'inst-2'})
-        CREATE (i1)-[:NEXT_INSTRUCTION]->(i2)
-    '''
-})
-call_mcp("query_graph", {
-    "query": '''
-        MATCH (i2:Instruction {id: 'inst-2'}), (i3:Instruction {id: 'inst-3'})
-        CREATE (i2)-[:NEXT_INSTRUCTION]->(i3)
-    '''
-})
-# Create indexes
-call_mcp("query_graph", {"query": "CREATE INDEX workflow_id IF NOT EXISTS FOR (w:Workflow) ON (w.id)"})
-call_mcp("query_graph", {"query": "CREATE INDEX instruction_id IF NOT EXISTS FOR (i:Instruction) ON (i.id)"})
-call_mcp("query_graph", {"query": "CREATE INDEX instruction_status_seq IF NOT EXISTS FOR (i:Instruction) ON (i.status, i.sequence)"})
-print(" Seeding complete!")

 import requests
 import json
 def call_mcp(tool, params=None):
     response = requests.post(
+        'http://localhost:8000/mcp',
+        headers={'X-API-Key': 'dev-key-123', 'Content-Type': 'application/json'},
+        json={'tool': tool, 'params': params or {}}
     )
     return response.json()
 # Create demo workflow
+workflow = call_mcp('write_graph', {
+    'action': 'create_node',
+    'label': 'Workflow',
+    'properties': {
+        'id': 'demo-workflow-1',
+        'name': 'Entity Resolution Demo',
+        'status': 'active',
+        'max_iterations': 10,
+        'current_iteration': 0
     }
 })
+print(f'Created workflow: {workflow}')
+# Create three instructions with parameters
 instructions = [
+    {
+        'id': 'inst-1',
+        'sequence': 1,
+        'type': 'discover_schema',
+        'status': 'pending',
+        'pause_duration': 300,
+        'parameters': '{}'
+    },
+    {
+        'id': 'inst-2',
+        'sequence': 2,
+        'type': 'generate_sql',
+        'status': 'pending',
+        'pause_duration': 300,
+        'parameters': json.dumps({'question': 'Show all customers who have placed orders'})
+    },
+    {
+        'id': 'inst-3',
+        'sequence': 3,
+        'type': 'review_results',
+        'status': 'pending',
+        'pause_duration': 300,
+        'parameters': '{}'
+    }
 ]
 for inst in instructions:
+    result = call_mcp('write_graph', {
+        'action': 'create_node',
+        'label': 'Instruction',
+        'properties': inst
     })
+    print(f'Created instruction: {inst["id"]}')
+print('✅ Seeding complete!')