Spaces:

ohmygaugh
/

agent-mcp-sql

No application file

App Files Files Community

ohmygaugh commited on Oct 6, 2025

Commit

a0eb181

1 Parent(s): 9d411a7

demo working

Browse files

Files changed (19) hide show

Makefile +5 -13
agent/main.py +51 -51
agent/tools.py +7 -4
frontend/Dockerfile +0 -12
frontend/app/globals.css +0 -7
frontend/app/layout.tsx +0 -13
frontend/app/page.tsx +0 -532
frontend/next-env.d.ts +0 -5
frontend/next.config.js +0 -6
frontend/package.json +0 -25
frontend/postcss.config.js +0 -6
frontend/tailwind.config.js +0 -11
frontend/tsconfig.json +0 -43
frontend/types/cytoscape-fcose.d.ts +0 -16
mcp/core/discovery.py +62 -25
mcp/core/graph.py +55 -0
mcp/requirements.txt +1 -0
ops/scripts/ingest.py +34 -54
streamlit/app.py +16 -11

Makefile CHANGED Viewed

@@ -32,15 +32,12 @@ clean:
 	docker-compose down -v
 	docker system prune -f
 	@if [ -d "neo4j/data" ]; then rm -rf neo4j/data; fi
-	@if [ -d "frontend/.next" ]; then rm -rf frontend/.next; fi
-	@if [ -d "frontend/node_modules" ]; then rm -rf frontend/node_modules; fi
 # Health check all services
 health:
 	@echo "Checking service health..."
 	@docker-compose exec neo4j cypher-shell -u neo4j -p password "MATCH (n) RETURN count(n) LIMIT 1" > /dev/null 2>&1 && echo "✅ Neo4j: Healthy" || echo "❌ Neo4j: Unhealthy"
 	@curl -s http://localhost:8000/health > /dev/null && echo "✅ MCP Server: Healthy" || echo "❌ MCP Server: Unhealthy"
-	@curl -s http://localhost:3000 > /dev/null && echo "✅ Frontend: Healthy" || echo "❌ Frontend: Unhealthy"
 	@curl -s http://localhost:8501 > /dev/null && echo "✅ Streamlit: Healthy" || echo "❌ Streamlit: Unhealthy"
 	@docker-compose ps agent | grep -q "Up" && echo "✅ Agent: Running" || echo "❌ Agent: Not running"
@@ -49,7 +46,7 @@ test: health
 	@echo "Running integration test..."
 	@make seed
 	@sleep 5
-	@echo "Check http://localhost:3000 and Neo4j Browser at http://localhost:7474"
 # Demo workflow
 demo:
@@ -59,11 +56,10 @@ demo:
 	@make seed
 	@echo ""
 	@echo "🎉 Demo Ready!"
-	@echo "1. Open http://localhost:3000 in your browser (Main Chat Interface)"
-	@echo "2. Open http://localhost:8501 in your browser (Streamlit Monitor)"
-	@echo "3. Ask a question like: 'Show me all customers who have placed orders'"
-	@echo "4. Watch the agent process through the workflow"
-	@echo "5. Check Neo4j Browser at http://localhost:7474 (neo4j/password)"
 	@echo ""
 	@echo "During 5-minute pauses, you can edit instructions in Neo4j Browser:"
 	@echo "MATCH (i:Instruction {status: 'pending'}) SET i.parameters = '{\"question\": \"new question\"}'"
@@ -79,8 +75,6 @@ restart-agent:
 restart-mcp:
 	docker-compose restart mcp
-restart-frontend:
-	docker-compose restart frontend
 restart-streamlit:
 	docker-compose restart streamlit
@@ -92,8 +86,6 @@ debug-agent:
 debug-mcp:
 	docker-compose logs mcp
-debug-frontend:
-	docker-compose logs frontend
 debug-streamlit:
 	docker-compose logs streamlit

 	docker-compose down -v
 	docker system prune -f
 	@if [ -d "neo4j/data" ]; then rm -rf neo4j/data; fi
 # Health check all services
 health:
 	@echo "Checking service health..."
 	@docker-compose exec neo4j cypher-shell -u neo4j -p password "MATCH (n) RETURN count(n) LIMIT 1" > /dev/null 2>&1 && echo "✅ Neo4j: Healthy" || echo "❌ Neo4j: Unhealthy"
 	@curl -s http://localhost:8000/health > /dev/null && echo "✅ MCP Server: Healthy" || echo "❌ MCP Server: Unhealthy"
 	@curl -s http://localhost:8501 > /dev/null && echo "✅ Streamlit: Healthy" || echo "❌ Streamlit: Unhealthy"
 	@docker-compose ps agent | grep -q "Up" && echo "✅ Agent: Running" || echo "❌ Agent: Not running"
 	@echo "Running integration test..."
 	@make seed
 	@sleep 5
+	@echo "Check http://localhost:8501 and Neo4j Browser at http://localhost:7474"
 # Demo workflow
 demo:
 	@make seed
 	@echo ""
 	@echo "🎉 Demo Ready!"
+	@echo "1. Open http://localhost:8501 in your browser (Main Chat Interface)"
+	@echo "2. Ask a question like: 'Show me all customers who have placed orders'"
+	@echo "3. Watch the agent process through the workflow"
+	@echo "4. Check Neo4j Browser at http://localhost:7474 (neo4j/password)"
 	@echo ""
 	@echo "During 5-minute pauses, you can edit instructions in Neo4j Browser:"
 	@echo "MATCH (i:Instruction {status: 'pending'}) SET i.parameters = '{\"question\": \"new question\"}'"
 restart-mcp:
 	docker-compose restart mcp
 restart-streamlit:
 	docker-compose restart streamlit
 debug-mcp:
 	docker-compose logs mcp
 debug-streamlit:
 	docker-compose logs streamlit

agent/main.py CHANGED Viewed

@@ -2,17 +2,15 @@ import os
 import sys
 import logging
 import json
-from typing import Annotated, List, TypedDict
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from pydantic import BaseModel
 import uvicorn
 from fastapi.responses import StreamingResponse
-from langchain_core.messages import BaseMessage, ToolMessage, AIMessage
 from langchain_openai import ChatOpenAI
-from langgraph.graph import StateGraph, START, END
-from langgraph.prebuilt import ToolNode
 from tools import MCPClient, SchemaSearchTool, JoinPathFinderTool, QueryExecutorTool
@@ -24,9 +22,16 @@ MCP_URL = os.getenv("MCP_URL", "http://mcp:8000/mcp")
 API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
 LLM_API_KEY = os.getenv("LLM_API_KEY")
-# --- Agent State Definition ---
-class AgentState(TypedDict):
-    messages: List[BaseMessage]
 # --- Agent Initialization ---
 class GraphRAGAgent:
@@ -45,53 +50,48 @@ class GraphRAGAgent:
             QueryExecutorTool(mcp_client=mcp_client),
         ]
-        self.llm_with_tools = llm.bind_tools(tools)
-        self.tool_node = ToolNode(tools)
-        # Define the agent graph
-        graph = StateGraph(AgentState)
-        graph.add_node("llm", self.call_llm)
-        graph.add_node("tools", self.tool_node)
-        graph.add_edge(START, "llm")
-        graph.add_conditional_edges("llm", self.should_call_tools)
-        graph.add_edge("tools", "llm")
-        self.graph = graph.compile()
-    def should_call_tools(self, state: AgentState) -> str:
-        """Determines whether to call tools or end the execution."""
-        last_message = state["messages"][-1]
-        if not last_message.tool_calls:
-            return END
-        return "tools"
-    def call_llm(self, state: AgentState) -> dict:
-        """Calls the LLM with the current state to decide the next action."""
-        response = self.llm_with_tools.invoke(state["messages"])
-        return {"messages": [response]}
     async def stream_query(self, question: str):
         """Processes a question and streams the intermediate steps."""
-        inputs = {"messages": [("user", question)]}
-        async for event in self.graph.astream(inputs, stream_mode="values"):
-            last_message = event["messages"][-1]
-            if isinstance(last_message, AIMessage) and last_message.tool_calls:
-                # Agent is thinking and calling a tool
-                tool_call = last_message.tool_calls[0]
-                yield json.dumps({
-                    "type": "thought",
-                    "content": f"🤖 Calling tool `{tool_call['name']}` with args: {tool_call['args']}"
-                }) + "\\n\\n"
-            elif isinstance(last_message, ToolMessage):
-                # A tool has returned its result
-                 yield json.dumps({
-                     "type": "observation",
-                     "content": f"🛠️ Tool `{last_message.name}` returned:\n\n```\n{last_message.content}\n```"
-                 }) + "\\n\\n"
-            elif isinstance(last_message, AIMessage):
-                # This is the final answer
-                yield json.dumps({"type": "final_answer", "content": last_message.content}) + "\\n\\n"
 # --- FastAPI Application ---
 agent = None

 import sys
 import logging
 import json
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from pydantic import BaseModel
 import uvicorn
 from fastapi.responses import StreamingResponse
+from langchain_core.messages import ToolMessage, AIMessage
 from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import create_react_agent
 from tools import MCPClient, SchemaSearchTool, JoinPathFinderTool, QueryExecutorTool
 API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
 LLM_API_KEY = os.getenv("LLM_API_KEY")
+# --- System Prompt ---
+SYSTEM_PROMPT = """You are a helpful assistant for querying life sciences databases.
+You have access to these tools:
+- schema_search: Find relevant database tables and columns based on keywords
+- find_join_path: Discover how to join tables together using the knowledge graph
+- execute_query: Run SQL queries against the databases
+Always use schema_search first to understand the available data, then construct appropriate SQL queries.
+When querying, be specific about what tables and columns you're using."""
 # --- Agent Initialization ---
 class GraphRAGAgent:
             QueryExecutorTool(mcp_client=mcp_client),
         ]
+        # Use LangGraph's prebuilt create_react_agent for proper message handling
+        self.graph = create_react_agent(llm, tools, state_modifier=SYSTEM_PROMPT)
     async def stream_query(self, question: str):
         """Processes a question and streams the intermediate steps."""
+        try:
+            async for event in self.graph.astream(
+                {"messages": [("user", question)]},
+                stream_mode="values"
+            ):
+                # create_react_agent uses standard message format
+                messages = event.get("messages", [])
+                if not messages:
+                    continue
+                last_message = messages[-1]
+                if isinstance(last_message, AIMessage) and last_message.tool_calls:
+                    # Agent is deciding to call a tool
+                    tool_call = last_message.tool_calls[0]
+                    yield json.dumps({
+                        "type": "thought",
+                        "content": f"🤖 Calling tool `{tool_call['name']}` with args: {tool_call['args']}"
+                    }) + "\n\n"
+                elif isinstance(last_message, ToolMessage):
+                    # A tool has returned its result
+                    yield json.dumps({
+                        "type": "observation",
+                        "content": f"🛠️ Tool `{last_message.name}` returned:\n\n```\n{last_message.content}\n```"
+                    }) + "\n\n"
+                elif isinstance(last_message, AIMessage) and last_message.content:
+                    # This is the final answer (AIMessage with content but no tool_calls)
+                    yield json.dumps({
+                        "type": "final_answer",
+                        "content": last_message.content
+                    }) + "\n\n"
+        except Exception as e:
+            logger.error(f"Error in agent workflow: {e}", exc_info=True)
+            yield json.dumps({
+                "type": "final_answer",
+                "content": f"I encountered an error while processing your request. Please try rephrasing your question or asking something simpler."
+            }) + "\n\n"
 # --- FastAPI Application ---
 agent = None

agent/tools.py CHANGED Viewed

@@ -14,7 +14,7 @@ class MCPClient:
     def __init__(self, mcp_url: str, api_key: str):
         self.mcp_url = mcp_url
         self.headers = {
-            "Authorization": f"Bearer {api_key}",
             "Content-Type": "application/json"
         }
@@ -64,7 +64,8 @@ class SchemaSearchTool(BaseTool):
             return f"Error searching schemas: {response.get('message', 'Unknown error')}"
     async def _arun(self, query: str) -> str:
-        raise NotImplementedError("SchemaSearchTool does not support async")
 class JoinPathFinderTool(BaseTool):
@@ -99,7 +100,8 @@ class JoinPathFinderTool(BaseTool):
             return f"Failed to find join path: {str(e)}"
     async def _arun(self, table_names: str) -> str:
-        raise NotImplementedError("JoinPathFinderTool does not support async")
 class QueryExecutorTool(BaseTool):
@@ -147,4 +149,5 @@ class QueryExecutorTool(BaseTool):
             return f"Failed to execute query: {str(e)}"
     async def _arun(self, sql: str) -> str:
-        raise NotImplementedError("QueryExecutorTool does not support async")

     def __init__(self, mcp_url: str, api_key: str):
         self.mcp_url = mcp_url
         self.headers = {
+            "x-api-key": api_key,
             "Content-Type": "application/json"
         }
             return f"Error searching schemas: {response.get('message', 'Unknown error')}"
     async def _arun(self, query: str) -> str:
+        """Async version - just calls sync version."""
+        return self._run(query)
 class JoinPathFinderTool(BaseTool):
             return f"Failed to find join path: {str(e)}"
     async def _arun(self, table_names: str) -> str:
+        """Async version - just calls sync version."""
+        return self._run(table_names)
 class QueryExecutorTool(BaseTool):
             return f"Failed to execute query: {str(e)}"
     async def _arun(self, sql: str) -> str:
+        """Async version - just calls sync version."""
+        return self._run(sql)

frontend/Dockerfile DELETED Viewed

@@ -1,12 +0,0 @@
-FROM node:18-alpine
-WORKDIR /app
-COPY package*.json ./
-RUN npm install
-COPY . .
-RUN npm run build
-EXPOSE 3000
-CMD ["npm", "start"]

frontend/app/globals.css DELETED Viewed

@@ -1,7 +0,0 @@
-@tailwind base;
-@tailwind components;
-@tailwind utilities;
-body {
-  @apply bg-gray-50;
-}

frontend/app/layout.tsx DELETED Viewed

@@ -1,13 +0,0 @@
-import './globals.css'
-export default function RootLayout({
-  children,
-}: {
-  children: React.ReactNode
-}) {
-  return (
-    <html lang="en">
-      <body>{children}</body>
-    </html>
-  )
-}

frontend/app/page.tsx DELETED Viewed

@@ -1,532 +0,0 @@
-'use client';
-import { useState, useEffect, useRef } from 'react';
-import cytoscape from 'cytoscape';
-import fcose from 'cytoscape-fcose';
-cytoscape.use(fcose);
-const MCP_URL = process.env.NEXT_PUBLIC_MCP_URL || 'http://localhost:8000';
-const API_KEY = 'dev-key-123';
-interface Message {
-  id: string;
-  role: 'user' | 'assistant' | 'system';
-  content: string;
-  timestamp: Date;
-}
-interface WorkflowStatus {
-  workflow_id: string;
-  current_instruction: string;
-  status: string;
-  pause_remaining?: number;
-}
-export default function ChatPage() {
-  const [messages, setMessages] = useState<Message[]>([]);
-  const [input, setInput] = useState('');
-  const [loading, setLoading] = useState(false);
-  const [workflowStatus, setWorkflowStatus] = useState<WorkflowStatus | null>(null);
-  const [lastInstructions, setLastInstructions] = useState<any[]>([]);
-  const [graphData, setGraphData] = useState<any>(null);
-  const cyContainer = useRef<HTMLDivElement>(null);
-  const cyRef = useRef<any>(null);
-  // MCP API call helper
-  const callMCP = async (tool: string, params: any = {}) => {
-    const response = await fetch(`${MCP_URL}/mcp`, {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        'X-API-Key': API_KEY,
-      },
-      body: JSON.stringify({ tool, params }),
-    });
-    return response.json();
-  };
-  // Poll for workflow status
-  useEffect(() => {
-    const interval = setInterval(async () => {
-      try {
-        // Get active workflow
-        const result = await callMCP('query_graph', {
-          query: `
-            MATCH (w:Workflow {status: 'active'})-[:HAS_INSTRUCTION]->(i:Instruction {status: 'executing'})
-            RETURN w.id as workflow_id, i.id as current_instruction, i.type as instruction_type,
-                   i.status as status, i.pause_duration as pause_duration
-            ORDER BY i.sequence DESC
-            LIMIT 1
-          `,
-        });
-        if (result.data && result.data.length > 0) {
-          const data = result.data[0];
-          setWorkflowStatus({
-            workflow_id: data.workflow_id,
-            current_instruction: data.instruction_type,
-            status: data.status,
-          });
-        } else {
-          setWorkflowStatus(null);
-        }
-        // Get last 5 instructions
-        const instructionsResult = await callMCP('query_graph', {
-          query: `
-            MATCH (i:Instruction)-[:EXECUTED_AS]->(e:Execution)
-            RETURN i.id as id, i.type as type, i.status as status,
-                   e.completed_at as completed_at
-            ORDER BY e.completed_at DESC
-            LIMIT 5
-          `,
-        });
-        if (instructionsResult.data) {
-          setLastInstructions(instructionsResult.data);
-        }
-        // Update graph visualization
-        await updateGraph();
-      } catch (error) {
-        console.error('Error polling status:', error);
-      }
-    }, 5000); // Poll every 5 seconds
-    return () => clearInterval(interval);
-  }, []);
-  // Initialize and update graph
-  const updateGraph = async () => {
-    try {
-      const result = await callMCP('query_graph', {
-        query: `
-          MATCH (w:Workflow {status: 'active'})-[:HAS_INSTRUCTION]->(i:Instruction)
-          OPTIONAL MATCH (i)-[:NEXT_INSTRUCTION]->(next)
-          RETURN w, i, next
-        `,
-      });
-      if (!result.data || result.data.length === 0) return;
-      const nodes: any[] = [];
-      const edges: any[] = [];
-      const nodeIds = new Set();
-      // Add workflow node
-      const workflow = result.data[0].w;
-      if (workflow && !nodeIds.has(workflow.id)) {
-        nodes.push({
-          data: {
-            id: workflow.id,
-            label: workflow.name || 'Workflow',
-            type: 'workflow',
-            status: workflow.status
-          }
-        });
-        nodeIds.add(workflow.id);
-      }
-      // Add instruction nodes and edges
-      result.data.forEach((row: any) => {
-        const instruction = row.i;
-        const next = row.next;
-        if (instruction && !nodeIds.has(instruction.id)) {
-          nodes.push({
-            data: {
-              id: instruction.id,
-              label: instruction.type,
-              type: 'instruction',
-              status: instruction.status
-            }
-          });
-          nodeIds.add(instruction.id);
-          // Add edge from workflow to instruction
-          if (workflow) {
-            edges.push({
-              data: {
-                id: `${workflow.id}-${instruction.id}`,
-                source: workflow.id,
-                target: instruction.id
-              }
-            });
-          }
-        }
-        // Add next instruction edge
-        if (instruction && next) {
-          edges.push({
-            data: {
-              id: `${instruction.id}-${next.id}`,
-              source: instruction.id,
-              target: next.id
-            }
-          });
-        }
-      });
-      // Initialize or update Cytoscape
-      if (!cyRef.current && cyContainer.current) {
-        cyRef.current = cytoscape({
-          container: cyContainer.current,
-          elements: { nodes, edges },
-          style: [
-            {
-              selector: 'node',
-              style: {
-                'label': 'data(label)',
-                'text-valign': 'center',
-                'text-halign': 'center',
-                'width': '80px',
-                'height': '80px',
-                'font-size': '12px',
-                'background-color': (ele: any) => {
-                  const status = ele.data('status');
-                  if (status === 'complete') return '#10B981';
-                  if (status === 'executing') return '#FCD34D';
-                  if (status === 'failed') return '#EF4444';
-                  return '#9CA3AF';
-                }
-              }
-            },
-            {
-              selector: 'edge',
-              style: {
-                'width': 2,
-                'line-color': '#9CA3AF',
-                'target-arrow-color': '#9CA3AF',
-                'target-arrow-shape': 'triangle',
-                'curve-style': 'bezier'
-              }
-            }
-          ],
-          layout: {
-            name: 'fcose'
-          }
-        });
-        // Add click handler for nodes
-        cyRef.current.on('tap', 'node', function(evt: any) {
-          const node = evt.target;
-          alert(`Node: ${node.data('label')}\nStatus: ${node.data('status')}\nID: ${node.data('id')}`);
-        });
-      } else if (cyRef.current) {
-        // Update existing graph
-        cyRef.current.json({ elements: { nodes, edges } });
-        cyRef.current.layout({ name: 'fcose' }).run();
-      }
-    } catch (error) {
-      console.error('Error updating graph:', error);
-    }
-  };
-  // Handle message submission
-  const handleSubmit = async (e: React.FormEvent) => {
-    e.preventDefault();
-    if (!input.trim() || loading) return;
-    const userMessage: Message = {
-      id: Date.now().toString(),
-      role: 'user',
-      content: input,
-      timestamp: new Date(),
-    };
-    setMessages(prev => [...prev, userMessage]);
-    setInput('');
-    setLoading(true);
-    try {
-      // Create a new workflow with the user's question
-      const workflowResult = await callMCP('write_graph', {
-        action: 'create_node',
-        label: 'Workflow',
-        properties: {
-          id: `workflow-${Date.now()}`,
-          name: `Query: ${input.substring(0, 50)}`,
-          status: 'active',
-          created_at: new Date().toISOString(),
-        },
-      });
-      // Create instructions for the workflow
-      const instructions = [
-        { type: 'discover_schema', sequence: 1 },
-        { type: 'generate_sql', sequence: 2, parameters: JSON.stringify({ question: input }) },
-        { type: 'review_results', sequence: 3 },
-      ];
-      for (const inst of instructions) {
-        const instResult = await callMCP('write_graph', {
-          action: 'create_node',
-          label: 'Instruction',
-          properties: {
-            id: `inst-${Date.now()}-${inst.sequence}`,
-            type: inst.type,
-            sequence: inst.sequence,
-            status: 'pending',
-            pause_duration: 30, // Shorter pause for demo
-            parameters: inst.parameters || '{}',
-          },
-        });
-        // Link instruction to workflow
-        await callMCP('query_graph', {
-          query: `
-            MATCH (w:Workflow {id: $wid}), (i:Instruction {id: $iid})
-            CREATE (w)-[:HAS_INSTRUCTION]->(i)
-          `,
-          parameters: {
-            wid: workflowResult.created.id,
-            iid: instResult.created.id,
-          },
-        });
-      }
-      // Create instruction chain
-      const instIds = instructions.map((_, i) => `inst-${Date.now()}-${i + 1}`);
-      for (let i = 0; i < instIds.length - 1; i++) {
-        await callMCP('query_graph', {
-          query: `
-            MATCH (i1:Instruction {id: $id1}), (i2:Instruction {id: $id2})
-            CREATE (i1)-[:NEXT_INSTRUCTION]->(i2)
-          `,
-          parameters: { id1: instIds[i], id2: instIds[i + 1] },
-        });
-      }
-      const systemMessage: Message = {
-        id: (Date.now() + 1).toString(),
-        role: 'system',
-        content: 'Workflow created! The agent will now process your request...',
-        timestamp: new Date(),
-      };
-      setMessages(prev => [...prev, systemMessage]);
-      // Poll for results
-      const pollForResults = async () => {
-        let attempts = 0;
-        const maxAttempts = 60; // 5 minutes max
-        while (attempts < maxAttempts) {
-          await new Promise(resolve => setTimeout(resolve, 5000));
-          // Check if SQL generation is complete
-          const execResult = await callMCP('query_graph', {
-            query: `
-              MATCH (i:Instruction {type: 'generate_sql'})-[:EXECUTED_AS]->(e:Execution)
-              WHERE i.id IN $inst_ids
-              RETURN e.result as result
-              ORDER BY e.completed_at DESC
-              LIMIT 1
-            `,
-            parameters: { inst_ids: instIds },
-          });
-          if (execResult.data && execResult.data.length > 0) {
-            const result = JSON.parse(execResult.data[0].result);
-            if (result.status === 'success') {
-              const assistantMessage: Message = {
-                id: (Date.now() + 2).toString(),
-                role: 'assistant',
-                content: formatSQLResult(result),
-                timestamp: new Date(),
-              };
-              setMessages(prev => [...prev, assistantMessage]);
-              break;
-            }
-          }
-          attempts++;
-        }
-      };
-      pollForResults();
-    } catch (error) {
-      console.error('Error creating workflow:', error);
-      const errorMessage: Message = {
-        id: (Date.now() + 2).toString(),
-        role: 'system',
-        content: 'Error: Failed to process your request',
-        timestamp: new Date(),
-      };
-      setMessages(prev => [...prev, errorMessage]);
-    } finally {
-      setLoading(false);
-    }
-  };
-  // Format SQL results for display
-  const formatSQLResult = (result: any) => {
-    if (!result.data || result.data.length === 0) {
-      return `Query executed successfully but returned no results.\n\nSQL: ${result.generated_sql}`;
-    }
-    const columns = Object.keys(result.data[0]);
-    const rows = result.data;
-    let table = '<table class="min-w-full border border-gray-300">';
-    table += '<thead><tr class="bg-gray-100">';
-    columns.forEach(col => {
-      table += `<th class="px-4 py-2 border">${col}</th>`;
-    });
-    table += '</tr></thead><tbody>';
-    rows.forEach((row: any) => {
-      table += '<tr>';
-      columns.forEach(col => {
-        table += `<td class="px-4 py-2 border">${row[col] ?? 'null'}</td>`;
-      });
-      table += '</tr>';
-    });
-    table += '</tbody></table>';
-    return `
-      <div>
-        <p class="mb-2"><strong>Generated SQL:</strong></p>
-        <code class="block bg-gray-100 p-2 mb-4 rounded">${result.generated_sql}</code>
-        <p class="mb-2"><strong>Results (${result.row_count} rows):</strong></p>
-        ${table}
-      </div>
-    `;
-  };
-  // Handle stop button
-  const handleStop = async () => {
-    if (!workflowStatus) return;
-    await callMCP('query_graph', {
-      query: `MATCH (w:Workflow {id: $id}) SET w.status = 'stopped'`,
-      parameters: { id: workflowStatus.workflow_id },
-    });
-    setWorkflowStatus(null);
-  };
-  return (
-    <div className="flex h-screen">
-      {/* Main Chat Area */}
-      <div className="flex-1 flex flex-col">
-        {/* Header */}
-        <div className="bg-white border-b px-6 py-4">
-          <h1 className="text-2xl font-bold">Graph-Driven Agent Chat</h1>
-          {workflowStatus && (
-            <div className="mt-2 flex items-center space-x-4">
-              <span className="text-sm text-gray-600">
-                Status: <span className="font-semibold">{workflowStatus.status}</span>
-              </span>
-              <span className="text-sm text-gray-600">
-                Current: <span className="font-semibold">{workflowStatus.current_instruction}</span>
-              </span>
-              {loading && (
-                <span className="text-sm text-yellow-600 animate-pulse">
-                  Agent thinking...
-                </span>
-              )}
-            </div>
-          )}
-        </div>
-        {/* Messages */}
-        <div className="flex-1 overflow-y-auto p-6 space-y-4">
-          {messages.map(message => (
-            <div
-              key={message.id}
-              className={`flex ${message.role === 'user' ? 'justify-end' : 'justify-start'}`}
-            >
-              <div
-                className={`max-w-2xl px-4 py-2 rounded-lg ${
-                  message.role === 'user'
-                    ? 'bg-blue-500 text-white'
-                    : message.role === 'assistant'
-                    ? 'bg-gray-200'
-                    : 'bg-yellow-100'
-                }`}
-              >
-                {message.content.startsWith('<div>') ? (
-                  <div dangerouslySetInnerHTML={{ __html: message.content }} />
-                ) : (
-                  <p>{message.content}</p>
-                )}
-                <p className="text-xs mt-1 opacity-70">
-                  {message.timestamp.toLocaleTimeString()}
-                </p>
-              </div>
-            </div>
-          ))}
-        </div>
-        {/* Input Form */}
-        <form onSubmit={handleSubmit} className="border-t bg-white p-4">
-          <div className="flex space-x-4">
-            <input
-              type="text"
-              value={input}
-              onChange={e => setInput(e.target.value)}
-              placeholder="Ask a question about your data..."
-              className="flex-1 px-4 py-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500"
-              disabled={loading}
-            />
-            <button
-              type="submit"
-              disabled={loading || !input.trim()}
-              className="px-6 py-2 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:bg-gray-400"
-            >
-              Send
-            </button>
-            {workflowStatus && (
-              <button
-                type="button"
-                onClick={handleStop}
-                className="px-6 py-2 bg-red-500 text-white rounded-lg hover:bg-red-600"
-              >
-                STOP
-              </button>
-            )}
-          </div>
-        </form>
-      </div>
-      {/* Right Sidebar */}
-      <div className="w-96 bg-gray-50 border-l flex flex-col">
-        {/* Graph Visualization */}
-        <div className="flex-1 p-4">
-          <h2 className="text-lg font-semibold mb-2">Workflow Graph</h2>
-          <div
-            ref={cyContainer}
-            className="w-full h-64 bg-white border rounded-lg"
-          />
-        </div>
-        {/* Recent Instructions */}
-        <div className="p-4 border-t">
-          <h2 className="text-lg font-semibold mb-2">Recent Instructions</h2>
-          <div className="space-y-2">
-            {lastInstructions.map((inst, idx) => (
-              <div key={idx} className="text-sm bg-white p-2 rounded border">
-                <span className={`font-semibold ${
-                  inst.status === 'complete' ? 'text-green-600' :
-                  inst.status === 'failed' ? 'text-red-600' :
-                  'text-gray-600'
-                }`}>
-                  {inst.type}
-                </span>
-                <span className="text-gray-500 ml-2">
-                  {inst.status}
-                </span>
-              </div>
-            ))}
-          </div>
-        </div>
-      </div>
-    </div>
-  );
-}

frontend/next-env.d.ts DELETED Viewed

@@ -1,5 +0,0 @@
-/// <reference types="next" />
-/// <reference types="next/image-types/global" />
-// NOTE: This file should not be edited
-// see https://nextjs.org/docs/basic-features/typescript for more information.

frontend/next.config.js DELETED Viewed

@@ -1,6 +0,0 @@
-/** @type {import('next').NextConfig} */
-const nextConfig = {
-  // App directory is enabled by default in Next.js 13+
-}
-module.exports = nextConfig

frontend/package.json DELETED Viewed

@@ -1,25 +0,0 @@
-{
-  "name": "agent-frontend",
-  "version": "1.0.0",
-  "scripts": {
-    "dev": "next dev",
-    "build": "next build",
-    "start": "next start"
-  },
-  "dependencies": {
-    "next": "14.0.0",
-    "react": "18.2.0",
-    "react-dom": "18.2.0",
-    "typescript": "5.2.2",
-    "@types/react": "18.2.0",
-    "@types/node": "20.8.0",
-    "cytoscape": "3.27.0",
-    "cytoscape-fcose": "2.2.0",
-    "@types/cytoscape": "3.19.0"
-  },
-  "devDependencies": {
-    "tailwindcss": "3.3.5",
-    "autoprefixer": "10.4.16",
-    "postcss": "8.4.31"
-  }
-}

frontend/postcss.config.js DELETED Viewed

@@ -1,6 +0,0 @@
-module.exports = {
-  plugins: {
-    tailwindcss: {},
-    autoprefixer: {},
-  },
-}

frontend/tailwind.config.js DELETED Viewed

@@ -1,11 +0,0 @@
-module.exports = {
-  content: [
-    './pages/**/*.{js,ts,jsx,tsx}',
-    './components/**/*.{js,ts,jsx,tsx}',
-    './app/**/*.{js,ts,jsx,tsx}',
-  ],
-  theme: {
-    extend: {},
-  },
-  plugins: [],
-}

frontend/tsconfig.json DELETED Viewed

@@ -1,43 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es5",
-    "lib": [
-      "dom",
-      "dom.iterable",
-      "esnext"
-    ],
-    "allowJs": true,
-    "skipLibCheck": true,
-    "strict": true,
-    "forceConsistentCasingInFileNames": true,
-    "noEmit": true,
-    "esModuleInterop": true,
-    "module": "esnext",
-    "moduleResolution": "node",
-    "resolveJsonModule": true,
-    "isolatedModules": true,
-    "jsx": "preserve",
-    "incremental": true,
-    "baseUrl": ".",
-    "paths": {
-      "@/*": [
-        "./*"
-      ]
-    },
-    "plugins": [
-      {
-        "name": "next"
-      }
-    ]
-  },
-  "include": [
-    "next-env.d.ts",
-    "**/*.ts",
-    "**/*.tsx",
-    "types/**/*.d.ts",
-    ".next/types/**/*.ts"
-  ],
-  "exclude": [
-    "node_modules"
-  ]
-}

frontend/types/cytoscape-fcose.d.ts DELETED Viewed

@@ -1,16 +0,0 @@
-declare module 'cytoscape-fcose' {
-  import { Core } from 'cytoscape';
-  interface FcoseLayoutOptions {
-    name: string;
-    animate?: boolean;
-    randomize?: boolean;
-    fit?: boolean;
-    padding?: number;
-    nodeRepulsion?: number;
-    idealEdgeLength?: number;
-  }
-  function fcose(cytoscape: (options?: any) => Core): void;
-  export = fcose;
-}

mcp/core/discovery.py CHANGED Viewed

@@ -3,6 +3,7 @@ from sqlalchemy.engine import Engine
 from typing import Dict, Any, List
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from .database import get_db_connections
@@ -24,47 +25,83 @@ def _discover_single_db_schema(db_name: str, engine: Engine) -> Dict[str, Any]:
         })
     return db_schema
-async def get_relevant_schemas(query: str) -> List[Dict[str, Any]]:
     """
-    Discovers schemas from all connected databases and performs a simple keyword search.
-    A more advanced implementation would use embeddings for semantic search.
     """
     db_engines = get_db_connections()
     all_schemas = []
     with ThreadPoolExecutor() as executor:
-        # Discover all schemas in parallel
-        future_to_db = {executor.submit(_discover_single_db_schema, name, eng): name for name, eng in db_engines.items()}
-        for future in as_completed(future_to_db):
             try:
-                all_schemas.append(future.result())
             except Exception as e:
-                db_name = future_to_db[future]
-                logger.error(f"Failed to discover schema for {db_name}: {e}")
     if not query:
-        return all_schemas
-    # Simple keyword filtering
     keywords = query.lower().split()
     relevant_schemas = []
     for db_schema in all_schemas:
         for table in db_schema.get("tables", []):
             if any(keyword in table['name'].lower() for keyword in keywords):
-                relevant_schemas.append({
-                    "database": db_schema["database_name"],
-                    "table": table['name'],
-                    "columns": table['columns']
-                })
             else:
                 for col in table.get("columns", []):
                     if any(keyword in col['name'].lower() for keyword in keywords):
-                        relevant_schemas.append({
-                            "database": db_schema["database_name"],
-                            "table": table['name'],
-                             "columns": table['columns'] # Return full table if a column matches
-                        })
-                        break # Move to next table
-    # Deduplicate results (in case multiple keywords match the same table)
-    return [dict(t) for t in {tuple(d.items()) for d in relevant_schemas}]

 from typing import Dict, Any, List
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import asyncio
 from .database import get_db_connections
         })
     return db_schema
+async def discover_all_schemas() -> List[Dict[str, Any]]:
     """
+    Discovers the full schema for all connected databases in parallel.
     """
     db_engines = get_db_connections()
     all_schemas = []
+    loop = asyncio.get_running_loop()
     with ThreadPoolExecutor() as executor:
+        # Create a list of futures
+        futures = [
+            loop.run_in_executor(executor, _discover_single_db_schema, name, eng)
+            for name, eng in db_engines.items()
+        ]
+        # await the results
+        for future in asyncio.as_completed(futures):
             try:
+                result = await future
+                all_schemas.append(result)
             except Exception as e:
+                logger.error(f"Schema discovery for a database failed: {e}", exc_info=True)
+    return all_schemas
+async def get_relevant_schemas(query: str) -> List[Dict[str, Any]]:
+    """
+    Discovers schemas and performs a simple keyword search.
+    If no query is provided, returns the full schema.
+    """
+    all_schemas = await discover_all_schemas()
     if not query:
+        # If no query, return a flat list of all tables and columns for the UI
+        flat_list = []
+        for db in all_schemas:
+            for tbl in db.get("tables", []):
+                for col in tbl.get("columns", []):
+                    flat_list.append({
+                        "database": db["database_name"],
+                        "table": tbl["name"],
+                        "name": col["name"],
+                        "type": [col["type"]]
+                    })
+        return flat_list
+    # Simple keyword filtering logic...
     keywords = query.lower().split()
     relevant_schemas = []
     for db_schema in all_schemas:
         for table in db_schema.get("tables", []):
+            match = False
             if any(keyword in table['name'].lower() for keyword in keywords):
+                match = True
             else:
                 for col in table.get("columns", []):
                     if any(keyword in col['name'].lower() for keyword in keywords):
+                        match = True
+                        break # column match is enough
+            if match:
+                # Return the full table info if there's a match
+                for col in table.get("columns", []):
+                     relevant_schemas.append({
+                        "database": db_schema["database_name"],
+                        "table": table['name'],
+                        "name": col['name'],
+                        "type": [col['type']]
+                    })
+    # Deduplicate results by converting to JSON strings
+    seen = set()
+    deduped = []
+    for schema in relevant_schemas:
+        # Convert to tuple for deduplication (lists aren't hashable)
+        key = (schema['database'], schema['table'], schema['name'])
+        if key not in seen:
+            seen.add(key)
+            deduped.append(schema)
+    return deduped

mcp/core/graph.py CHANGED Viewed

@@ -33,6 +33,61 @@ def _ensure_constraints(driver: Driver):
         session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Column) REQUIRE c.unique_name IS UNIQUE")
     logger.info("Neo4j constraints ensured.")
 def _keyword_search(keyword: str) -> List[Dict[str, Any]]:
     """Internal helper to search for table nodes by keyword."""
     driver = get_graph_driver()

         session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Column) REQUIRE c.unique_name IS UNIQUE")
     logger.info("Neo4j constraints ensured.")
+def import_schema(schema_data: dict):
+    """
+    Imports a discovered database schema into the Neo4j graph.
+    """
+    driver = get_graph_driver()
+    db_name = schema_data['database_name']
+    with driver.session() as session:
+        # Create Database node
+        session.run("MERGE (d:Database {name: $db_name})", db_name=db_name)
+        for table in schema_data['tables']:
+            table_unique_name = f"{db_name}.{table['name']}"
+            table_properties = {
+                "name": table['name'],
+                "unique_name": table_unique_name,
+            }
+            # Create Table node and HAS_TABLE relationship
+            session.run(
+                """
+                MATCH (d:Database {name: $db_name})
+                MERGE (t:Table {unique_name: $unique_name})
+                ON CREATE SET t += $props
+                ON MATCH SET t += $props
+                MERGE (d)-[:HAS_TABLE]->(t)
+                """,
+                db_name=db_name,
+                unique_name=table_unique_name,
+                props=table_properties
+            )
+            for column in table['columns']:
+                column_unique_name = f"{table_unique_name}.{column['name']}"
+                column_properties = {
+                    "name": column['name'],
+                    "unique_name": column_unique_name,
+                    "type": column['type'],
+                }
+                # Create Column node and HAS_COLUMN relationship
+                session.run(
+                    """
+                    MATCH (t:Table {unique_name: $table_unique_name})
+                    MERGE (c:Column {unique_name: $column_unique_name})
+                    ON CREATE SET c += $props
+                    ON MATCH SET c += $props
+                    MERGE (t)-[:HAS_COLUMN]->(c)
+                    """,
+                    table_unique_name=table_unique_name,
+                    column_unique_name=column_unique_name,
+                    props=column_properties
+                )
+    logger.info(f"Successfully imported schema for database: {db_name}")
 def _keyword_search(keyword: str) -> List[Dict[str, Any]]:
     """Internal helper to search for table nodes by keyword."""
     driver = get_graph_driver()

mcp/requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ pydantic==2.4.0
 requests==2.31.0
 SQLAlchemy==2.0.29
 sqlparse==0.5.0

 requests==2.31.0
 SQLAlchemy==2.0.29
 sqlparse==0.5.0
+mcp==1.1.1

ops/scripts/ingest.py CHANGED Viewed

@@ -1,71 +1,51 @@
 import os
 import sys
 import logging
-from sqlalchemy import create_engine
-# Add project root to path to allow imports from mcp
-project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-sys.path.append(project_root)
-from core.discovery import discover_schema
-from core.graph import GraphStore
-from core.config import SQLITE_DATA_DIR
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-def ingest_sqlite_database(db_file: str, graph_store: GraphStore):
-    """Discovers schema from a SQLite DB and ingests it into Neo4j."""
-    db_path = os.path.join(SQLITE_DATA_DIR, db_file)
-    logger.info(f"Processing database: {db_path}")
-    if not os.path.exists(db_path):
-        logger.error(f"Database file not found: {db_path}")
-        return
-    try:
-        engine = create_engine(f"sqlite:///{db_path}")
-        schema_data = discover_schema(engine)
-        if schema_data:
-            logger.info(f"Discovered schema for {db_file}, ingesting into Neo4j...")
-            graph_store.import_schema(schema_data)
-            logger.info(f"Successfully ingested schema for {db_file}")
-        else:
-            logger.warning(f"Could not discover schema for {db_file}. Skipping.")
-    except Exception as e:
-        logger.error(f"An error occurred while processing {db_file}: {e}")
-def main():
     """
-    Main function to run the ingestion process for all SQLite databases
-    found in the data directory.
     """
     logger.info("Starting schema ingestion process...")
-    if not os.path.exists(SQLITE_DATA_DIR) or not os.path.isdir(SQLITE_DATA_DIR):
-        logger.error(f"Data directory not found: {SQLITE_DATA_DIR}")
-        return
-    db_files = [f for f in os.listdir(SQLITE_DATA_DIR) if f.endswith(".db")]
-    if not db_files:
-        logger.warning(f"No SQLite database files (.db) found in {SQLITE_DATA_DIR}.")
-        return
     try:
-        graph_store = GraphStore()
-        logger.info("Successfully connected to Neo4j.")
-    except Exception as e:
-        logger.error(f"Failed to connect to Neo4j. Aborting ingestion. Error: {e}")
-        return
-    for db_file in db_files:
-        ingest_sqlite_database(db_file, graph_store)
-    graph_store.close()
-    logger.info("Schema ingestion process completed.")
 if __name__ == "__main__":
-    main()

 import os
 import sys
 import logging
+import asyncio
+# The script runs inside the 'mcp' container where the WORKDIR is '/app'.
+# The 'core' module is at '/app/core'. We need to add '/app' to the Python path.
+sys.path.insert(0, '/app')
+from core.discovery import discover_all_schemas
+from core.graph import import_schema, close_graph_driver
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+async def main():
     """
+    Main asynchronous function to run the full schema discovery and ingestion process.
     """
     logger.info("Starting schema ingestion process...")
     try:
+        # Step 1: Discover schemas from all connected SQLite databases
+        logger.info("Discovering schemas from all databases...")
+        all_schemas = await discover_all_schemas()
+        if not all_schemas:
+            logger.warning("No schemas were discovered. Ingestion cannot proceed.")
+            return
+        logger.info(f"Discovered {len(all_schemas)} schemas. Now ingesting into Neo4j...")
+        # Step 2: Import each discovered schema into Neo4j
+        for schema_data in all_schemas:
+            try:
+                import_schema(schema_data)
+                logger.info(f"Successfully ingested schema for: {schema_data['database_name']}")
+            except Exception as e:
+                logger.error(f"Failed to ingest schema for {schema_data['database_name']}: {e}", exc_info=True)
+        logger.info("Schema ingestion process completed successfully.")
+    except Exception as e:
+        logger.critical(f"A critical error occurred during the ingestion process: {e}", exc_info=True)
+    finally:
+        # Step 3: Ensure all connections are closed
+        close_graph_driver()
+        logger.info("Neo4j connection closed.")
 if __name__ == "__main__":
+    asyncio.run(main())

streamlit/app.py CHANGED Viewed

@@ -36,13 +36,12 @@ def stream_agent_response(question: str):
     try:
         with requests.post(AGENT_URL, json={"question": question}, stream=True, timeout=300) as r:
             r.raise_for_status()
-            for chunk in r.iter_content(chunk_size=None):
-                if chunk:
                     try:
-                        yield json.loads(chunk)
                     except json.JSONDecodeError:
-                        # Handle potential parsing errors if chunks are not perfect JSON
-                        logger.warning(f"Could not decode JSON chunk: {chunk}")
                         continue
     except requests.exceptions.RequestException as e:
         yield {"error": f"Failed to connect to agent: {e}"}
@@ -52,7 +51,7 @@ def fetch_schema_info() -> str:
     try:
         response = requests.post(
             f"{MCP_URL}/discovery/get_relevant_schemas",
-            headers={"Authorization": f"Bearer {MCP_API_KEY}", "Content-Type": "application/json"},
             json={"query": ""}
         )
         response.raise_for_status()
@@ -87,12 +86,13 @@ def get_cached_schema():
     """Cache the schema info to avoid repeated calls."""
     return fetch_schema_info()
 def check_service_health(service_name: str, url: str) -> bool:
-    """Checks if a service is reachable."""
     try:
-        response = requests.get(url, timeout=5)
         return response.status_code in [200, 401]
-    except requests.exceptions.RequestException:
         return False
 # --- UI Components ---
@@ -109,8 +109,13 @@ def display_sidebar():
         st.markdown("---")
         st.title("🔌 Service Status")
-        neo4j_status = "✅ Online" if check_service_health("Neo4j", NEO4J_URL) else "❌ Offline"
-        mcp_status = "✅ Online" if check_service_health("MCP", MCP_URL.replace("/mcp", "/health")) else "❌ Offline"
         st.markdown(f"**Neo4j:** {neo4j_status}")
         st.markdown(f"**MCP Server:** {mcp_status}")

     try:
         with requests.post(AGENT_URL, json={"question": question}, stream=True, timeout=300) as r:
             r.raise_for_status()
+            for line in r.iter_lines():
+                if line:
                     try:
+                        yield json.loads(line.decode('utf-8'))
                     except json.JSONDecodeError:
+                        # Skip malformed JSON lines
                         continue
     except requests.exceptions.RequestException as e:
         yield {"error": f"Failed to connect to agent: {e}"}
     try:
         response = requests.post(
             f"{MCP_URL}/discovery/get_relevant_schemas",
+            headers={"x-api-key": MCP_API_KEY, "Content-Type": "application/json"},
             json={"query": ""}
         )
         response.raise_for_status()
     """Cache the schema info to avoid repeated calls."""
     return fetch_schema_info()
+@st.cache_data(ttl=10)
 def check_service_health(service_name: str, url: str) -> bool:
+    """Checks if a service is reachable. Cached for 10 seconds."""
     try:
+        response = requests.get(url, timeout=2)
         return response.status_code in [200, 401]
+    except Exception:
         return False
 # --- UI Components ---
         st.markdown("---")
         st.title("🔌 Service Status")
+        try:
+            neo4j_status = "✅ Online" if check_service_health("Neo4j", NEO4J_URL) else "❌ Offline"
+            mcp_health_url = "http://mcp:8000/health"
+            mcp_status = "✅ Online" if check_service_health("MCP", mcp_health_url) else "❌ Offline"
+        except Exception as e:
+            neo4j_status = "❓ Unknown"
+            mcp_status = "❓ Unknown"
         st.markdown(f"**Neo4j:** {neo4j_status}")
         st.markdown(f"**MCP Server:** {mcp_status}")