ohmygaugh commited on
Commit
a0eb181
Β·
1 Parent(s): 9d411a7

demo working

Browse files
Makefile CHANGED
@@ -32,15 +32,12 @@ clean:
32
  docker-compose down -v
33
  docker system prune -f
34
  @if [ -d "neo4j/data" ]; then rm -rf neo4j/data; fi
35
- @if [ -d "frontend/.next" ]; then rm -rf frontend/.next; fi
36
- @if [ -d "frontend/node_modules" ]; then rm -rf frontend/node_modules; fi
37
 
38
  # Health check all services
39
  health:
40
  @echo "Checking service health..."
41
  @docker-compose exec neo4j cypher-shell -u neo4j -p password "MATCH (n) RETURN count(n) LIMIT 1" > /dev/null 2>&1 && echo "βœ… Neo4j: Healthy" || echo "❌ Neo4j: Unhealthy"
42
  @curl -s http://localhost:8000/health > /dev/null && echo "βœ… MCP Server: Healthy" || echo "❌ MCP Server: Unhealthy"
43
- @curl -s http://localhost:3000 > /dev/null && echo "βœ… Frontend: Healthy" || echo "❌ Frontend: Unhealthy"
44
  @curl -s http://localhost:8501 > /dev/null && echo "βœ… Streamlit: Healthy" || echo "❌ Streamlit: Unhealthy"
45
  @docker-compose ps agent | grep -q "Up" && echo "βœ… Agent: Running" || echo "❌ Agent: Not running"
46
 
@@ -49,7 +46,7 @@ test: health
49
  @echo "Running integration test..."
50
  @make seed
51
  @sleep 5
52
- @echo "Check http://localhost:3000 and Neo4j Browser at http://localhost:7474"
53
 
54
  # Demo workflow
55
  demo:
@@ -59,11 +56,10 @@ demo:
59
  @make seed
60
  @echo ""
61
  @echo "πŸŽ‰ Demo Ready!"
62
- @echo "1. Open http://localhost:3000 in your browser (Main Chat Interface)"
63
- @echo "2. Open http://localhost:8501 in your browser (Streamlit Monitor)"
64
- @echo "3. Ask a question like: 'Show me all customers who have placed orders'"
65
- @echo "4. Watch the agent process through the workflow"
66
- @echo "5. Check Neo4j Browser at http://localhost:7474 (neo4j/password)"
67
  @echo ""
68
  @echo "During 5-minute pauses, you can edit instructions in Neo4j Browser:"
69
  @echo "MATCH (i:Instruction {status: 'pending'}) SET i.parameters = '{\"question\": \"new question\"}'"
@@ -79,8 +75,6 @@ restart-agent:
79
  restart-mcp:
80
  docker-compose restart mcp
81
 
82
- restart-frontend:
83
- docker-compose restart frontend
84
 
85
  restart-streamlit:
86
  docker-compose restart streamlit
@@ -92,8 +86,6 @@ debug-agent:
92
  debug-mcp:
93
  docker-compose logs mcp
94
 
95
- debug-frontend:
96
- docker-compose logs frontend
97
 
98
  debug-streamlit:
99
  docker-compose logs streamlit
 
32
  docker-compose down -v
33
  docker system prune -f
34
  @if [ -d "neo4j/data" ]; then rm -rf neo4j/data; fi
 
 
35
 
36
  # Health check all services
37
  health:
38
  @echo "Checking service health..."
39
  @docker-compose exec neo4j cypher-shell -u neo4j -p password "MATCH (n) RETURN count(n) LIMIT 1" > /dev/null 2>&1 && echo "βœ… Neo4j: Healthy" || echo "❌ Neo4j: Unhealthy"
40
  @curl -s http://localhost:8000/health > /dev/null && echo "βœ… MCP Server: Healthy" || echo "❌ MCP Server: Unhealthy"
 
41
  @curl -s http://localhost:8501 > /dev/null && echo "βœ… Streamlit: Healthy" || echo "❌ Streamlit: Unhealthy"
42
  @docker-compose ps agent | grep -q "Up" && echo "βœ… Agent: Running" || echo "❌ Agent: Not running"
43
 
 
46
  @echo "Running integration test..."
47
  @make seed
48
  @sleep 5
49
+ @echo "Check http://localhost:8501 and Neo4j Browser at http://localhost:7474"
50
 
51
  # Demo workflow
52
  demo:
 
56
  @make seed
57
  @echo ""
58
  @echo "πŸŽ‰ Demo Ready!"
59
+ @echo "1. Open http://localhost:8501 in your browser (Main Chat Interface)"
60
+ @echo "2. Ask a question like: 'Show me all customers who have placed orders'"
61
+ @echo "3. Watch the agent process through the workflow"
62
+ @echo "4. Check Neo4j Browser at http://localhost:7474 (neo4j/password)"
 
63
  @echo ""
64
  @echo "During 5-minute pauses, you can edit instructions in Neo4j Browser:"
65
  @echo "MATCH (i:Instruction {status: 'pending'}) SET i.parameters = '{\"question\": \"new question\"}'"
 
75
  restart-mcp:
76
  docker-compose restart mcp
77
 
 
 
78
 
79
  restart-streamlit:
80
  docker-compose restart streamlit
 
86
  debug-mcp:
87
  docker-compose logs mcp
88
 
 
 
89
 
90
  debug-streamlit:
91
  docker-compose logs streamlit
agent/main.py CHANGED
@@ -2,17 +2,15 @@ import os
2
  import sys
3
  import logging
4
  import json
5
- from typing import Annotated, List, TypedDict
6
  from contextlib import asynccontextmanager
7
  from fastapi import FastAPI
8
  from pydantic import BaseModel
9
  import uvicorn
10
  from fastapi.responses import StreamingResponse
11
 
12
- from langchain_core.messages import BaseMessage, ToolMessage, AIMessage
13
  from langchain_openai import ChatOpenAI
14
- from langgraph.graph import StateGraph, START, END
15
- from langgraph.prebuilt import ToolNode
16
 
17
  from tools import MCPClient, SchemaSearchTool, JoinPathFinderTool, QueryExecutorTool
18
 
@@ -24,9 +22,16 @@ MCP_URL = os.getenv("MCP_URL", "http://mcp:8000/mcp")
24
  API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
25
  LLM_API_KEY = os.getenv("LLM_API_KEY")
26
 
27
- # --- Agent State Definition ---
28
- class AgentState(TypedDict):
29
- messages: List[BaseMessage]
 
 
 
 
 
 
 
30
 
31
  # --- Agent Initialization ---
32
  class GraphRAGAgent:
@@ -45,53 +50,48 @@ class GraphRAGAgent:
45
  QueryExecutorTool(mcp_client=mcp_client),
46
  ]
47
 
48
- self.llm_with_tools = llm.bind_tools(tools)
49
- self.tool_node = ToolNode(tools)
50
-
51
- # Define the agent graph
52
- graph = StateGraph(AgentState)
53
- graph.add_node("llm", self.call_llm)
54
- graph.add_node("tools", self.tool_node)
55
-
56
- graph.add_edge(START, "llm")
57
- graph.add_conditional_edges("llm", self.should_call_tools)
58
- graph.add_edge("tools", "llm")
59
-
60
- self.graph = graph.compile()
61
-
62
- def should_call_tools(self, state: AgentState) -> str:
63
- """Determines whether to call tools or end the execution."""
64
- last_message = state["messages"][-1]
65
- if not last_message.tool_calls:
66
- return END
67
- return "tools"
68
-
69
- def call_llm(self, state: AgentState) -> dict:
70
- """Calls the LLM with the current state to decide the next action."""
71
- response = self.llm_with_tools.invoke(state["messages"])
72
- return {"messages": [response]}
73
 
74
  async def stream_query(self, question: str):
75
  """Processes a question and streams the intermediate steps."""
76
- inputs = {"messages": [("user", question)]}
77
- async for event in self.graph.astream(inputs, stream_mode="values"):
78
- last_message = event["messages"][-1]
79
- if isinstance(last_message, AIMessage) and last_message.tool_calls:
80
- # Agent is thinking and calling a tool
81
- tool_call = last_message.tool_calls[0]
82
- yield json.dumps({
83
- "type": "thought",
84
- "content": f"πŸ€– Calling tool `{tool_call['name']}` with args: {tool_call['args']}"
85
- }) + "\\n\\n"
86
- elif isinstance(last_message, ToolMessage):
87
- # A tool has returned its result
88
- yield json.dumps({
89
- "type": "observation",
90
- "content": f"πŸ› οΈ Tool `{last_message.name}` returned:\n\n```\n{last_message.content}\n```"
91
- }) + "\\n\\n"
92
- elif isinstance(last_message, AIMessage):
93
- # This is the final answer
94
- yield json.dumps({"type": "final_answer", "content": last_message.content}) + "\\n\\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # --- FastAPI Application ---
97
  agent = None
 
2
  import sys
3
  import logging
4
  import json
 
5
  from contextlib import asynccontextmanager
6
  from fastapi import FastAPI
7
  from pydantic import BaseModel
8
  import uvicorn
9
  from fastapi.responses import StreamingResponse
10
 
11
+ from langchain_core.messages import ToolMessage, AIMessage
12
  from langchain_openai import ChatOpenAI
13
+ from langgraph.prebuilt import create_react_agent
 
14
 
15
  from tools import MCPClient, SchemaSearchTool, JoinPathFinderTool, QueryExecutorTool
16
 
 
22
  API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
23
  LLM_API_KEY = os.getenv("LLM_API_KEY")
24
 
25
+ # --- System Prompt ---
26
+ SYSTEM_PROMPT = """You are a helpful assistant for querying life sciences databases.
27
+
28
+ You have access to these tools:
29
+ - schema_search: Find relevant database tables and columns based on keywords
30
+ - find_join_path: Discover how to join tables together using the knowledge graph
31
+ - execute_query: Run SQL queries against the databases
32
+
33
+ Always use schema_search first to understand the available data, then construct appropriate SQL queries.
34
+ When querying, be specific about what tables and columns you're using."""
35
 
36
  # --- Agent Initialization ---
37
  class GraphRAGAgent:
 
50
  QueryExecutorTool(mcp_client=mcp_client),
51
  ]
52
 
53
+ # Use LangGraph's prebuilt create_react_agent for proper message handling
54
+ self.graph = create_react_agent(llm, tools, state_modifier=SYSTEM_PROMPT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  async def stream_query(self, question: str):
57
  """Processes a question and streams the intermediate steps."""
58
+ try:
59
+ async for event in self.graph.astream(
60
+ {"messages": [("user", question)]},
61
+ stream_mode="values"
62
+ ):
63
+ # create_react_agent uses standard message format
64
+ messages = event.get("messages", [])
65
+ if not messages:
66
+ continue
67
+
68
+ last_message = messages[-1]
69
+
70
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
71
+ # Agent is deciding to call a tool
72
+ tool_call = last_message.tool_calls[0]
73
+ yield json.dumps({
74
+ "type": "thought",
75
+ "content": f"πŸ€– Calling tool `{tool_call['name']}` with args: {tool_call['args']}"
76
+ }) + "\n\n"
77
+ elif isinstance(last_message, ToolMessage):
78
+ # A tool has returned its result
79
+ yield json.dumps({
80
+ "type": "observation",
81
+ "content": f"πŸ› οΈ Tool `{last_message.name}` returned:\n\n```\n{last_message.content}\n```"
82
+ }) + "\n\n"
83
+ elif isinstance(last_message, AIMessage) and last_message.content:
84
+ # This is the final answer (AIMessage with content but no tool_calls)
85
+ yield json.dumps({
86
+ "type": "final_answer",
87
+ "content": last_message.content
88
+ }) + "\n\n"
89
+ except Exception as e:
90
+ logger.error(f"Error in agent workflow: {e}", exc_info=True)
91
+ yield json.dumps({
92
+ "type": "final_answer",
93
+ "content": f"I encountered an error while processing your request. Please try rephrasing your question or asking something simpler."
94
+ }) + "\n\n"
95
 
96
  # --- FastAPI Application ---
97
  agent = None
agent/tools.py CHANGED
@@ -14,7 +14,7 @@ class MCPClient:
14
  def __init__(self, mcp_url: str, api_key: str):
15
  self.mcp_url = mcp_url
16
  self.headers = {
17
- "Authorization": f"Bearer {api_key}",
18
  "Content-Type": "application/json"
19
  }
20
 
@@ -64,7 +64,8 @@ class SchemaSearchTool(BaseTool):
64
  return f"Error searching schemas: {response.get('message', 'Unknown error')}"
65
 
66
  async def _arun(self, query: str) -> str:
67
- raise NotImplementedError("SchemaSearchTool does not support async")
 
68
 
69
 
70
  class JoinPathFinderTool(BaseTool):
@@ -99,7 +100,8 @@ class JoinPathFinderTool(BaseTool):
99
  return f"Failed to find join path: {str(e)}"
100
 
101
  async def _arun(self, table_names: str) -> str:
102
- raise NotImplementedError("JoinPathFinderTool does not support async")
 
103
 
104
 
105
  class QueryExecutorTool(BaseTool):
@@ -147,4 +149,5 @@ class QueryExecutorTool(BaseTool):
147
  return f"Failed to execute query: {str(e)}"
148
 
149
  async def _arun(self, sql: str) -> str:
150
- raise NotImplementedError("QueryExecutorTool does not support async")
 
 
14
  def __init__(self, mcp_url: str, api_key: str):
15
  self.mcp_url = mcp_url
16
  self.headers = {
17
+ "x-api-key": api_key,
18
  "Content-Type": "application/json"
19
  }
20
 
 
64
  return f"Error searching schemas: {response.get('message', 'Unknown error')}"
65
 
66
  async def _arun(self, query: str) -> str:
67
+ """Async version - just calls sync version."""
68
+ return self._run(query)
69
 
70
 
71
  class JoinPathFinderTool(BaseTool):
 
100
  return f"Failed to find join path: {str(e)}"
101
 
102
  async def _arun(self, table_names: str) -> str:
103
+ """Async version - just calls sync version."""
104
+ return self._run(table_names)
105
 
106
 
107
  class QueryExecutorTool(BaseTool):
 
149
  return f"Failed to execute query: {str(e)}"
150
 
151
  async def _arun(self, sql: str) -> str:
152
+ """Async version - just calls sync version."""
153
+ return self._run(sql)
frontend/Dockerfile DELETED
@@ -1,12 +0,0 @@
1
- FROM node:18-alpine
2
-
3
- WORKDIR /app
4
-
5
- COPY package*.json ./
6
- RUN npm install
7
-
8
- COPY . .
9
- RUN npm run build
10
-
11
- EXPOSE 3000
12
- CMD ["npm", "start"]
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/app/globals.css DELETED
@@ -1,7 +0,0 @@
1
- @tailwind base;
2
- @tailwind components;
3
- @tailwind utilities;
4
-
5
- body {
6
- @apply bg-gray-50;
7
- }
 
 
 
 
 
 
 
 
frontend/app/layout.tsx DELETED
@@ -1,13 +0,0 @@
1
- import './globals.css'
2
-
3
- export default function RootLayout({
4
- children,
5
- }: {
6
- children: React.ReactNode
7
- }) {
8
- return (
9
- <html lang="en">
10
- <body>{children}</body>
11
- </html>
12
- )
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/app/page.tsx DELETED
@@ -1,532 +0,0 @@
1
- 'use client';
2
-
3
- import { useState, useEffect, useRef } from 'react';
4
- import cytoscape from 'cytoscape';
5
- import fcose from 'cytoscape-fcose';
6
-
7
- cytoscape.use(fcose);
8
-
9
- const MCP_URL = process.env.NEXT_PUBLIC_MCP_URL || 'http://localhost:8000';
10
- const API_KEY = 'dev-key-123';
11
-
12
- interface Message {
13
- id: string;
14
- role: 'user' | 'assistant' | 'system';
15
- content: string;
16
- timestamp: Date;
17
- }
18
-
19
- interface WorkflowStatus {
20
- workflow_id: string;
21
- current_instruction: string;
22
- status: string;
23
- pause_remaining?: number;
24
- }
25
-
26
- export default function ChatPage() {
27
- const [messages, setMessages] = useState<Message[]>([]);
28
- const [input, setInput] = useState('');
29
- const [loading, setLoading] = useState(false);
30
- const [workflowStatus, setWorkflowStatus] = useState<WorkflowStatus | null>(null);
31
- const [lastInstructions, setLastInstructions] = useState<any[]>([]);
32
- const [graphData, setGraphData] = useState<any>(null);
33
- const cyContainer = useRef<HTMLDivElement>(null);
34
- const cyRef = useRef<any>(null);
35
-
36
- // MCP API call helper
37
- const callMCP = async (tool: string, params: any = {}) => {
38
- const response = await fetch(`${MCP_URL}/mcp`, {
39
- method: 'POST',
40
- headers: {
41
- 'Content-Type': 'application/json',
42
- 'X-API-Key': API_KEY,
43
- },
44
- body: JSON.stringify({ tool, params }),
45
- });
46
- return response.json();
47
- };
48
-
49
- // Poll for workflow status
50
- useEffect(() => {
51
- const interval = setInterval(async () => {
52
- try {
53
- // Get active workflow
54
- const result = await callMCP('query_graph', {
55
- query: `
56
- MATCH (w:Workflow {status: 'active'})-[:HAS_INSTRUCTION]->(i:Instruction {status: 'executing'})
57
- RETURN w.id as workflow_id, i.id as current_instruction, i.type as instruction_type,
58
- i.status as status, i.pause_duration as pause_duration
59
- ORDER BY i.sequence DESC
60
- LIMIT 1
61
- `,
62
- });
63
-
64
- if (result.data && result.data.length > 0) {
65
- const data = result.data[0];
66
- setWorkflowStatus({
67
- workflow_id: data.workflow_id,
68
- current_instruction: data.instruction_type,
69
- status: data.status,
70
- });
71
- } else {
72
- setWorkflowStatus(null);
73
- }
74
-
75
- // Get last 5 instructions
76
- const instructionsResult = await callMCP('query_graph', {
77
- query: `
78
- MATCH (i:Instruction)-[:EXECUTED_AS]->(e:Execution)
79
- RETURN i.id as id, i.type as type, i.status as status,
80
- e.completed_at as completed_at
81
- ORDER BY e.completed_at DESC
82
- LIMIT 5
83
- `,
84
- });
85
-
86
- if (instructionsResult.data) {
87
- setLastInstructions(instructionsResult.data);
88
- }
89
-
90
- // Update graph visualization
91
- await updateGraph();
92
-
93
- } catch (error) {
94
- console.error('Error polling status:', error);
95
- }
96
- }, 5000); // Poll every 5 seconds
97
-
98
- return () => clearInterval(interval);
99
- }, []);
100
-
101
- // Initialize and update graph
102
- const updateGraph = async () => {
103
- try {
104
- const result = await callMCP('query_graph', {
105
- query: `
106
- MATCH (w:Workflow {status: 'active'})-[:HAS_INSTRUCTION]->(i:Instruction)
107
- OPTIONAL MATCH (i)-[:NEXT_INSTRUCTION]->(next)
108
- RETURN w, i, next
109
- `,
110
- });
111
-
112
- if (!result.data || result.data.length === 0) return;
113
-
114
- const nodes: any[] = [];
115
- const edges: any[] = [];
116
- const nodeIds = new Set();
117
-
118
- // Add workflow node
119
- const workflow = result.data[0].w;
120
- if (workflow && !nodeIds.has(workflow.id)) {
121
- nodes.push({
122
- data: {
123
- id: workflow.id,
124
- label: workflow.name || 'Workflow',
125
- type: 'workflow',
126
- status: workflow.status
127
- }
128
- });
129
- nodeIds.add(workflow.id);
130
- }
131
-
132
- // Add instruction nodes and edges
133
- result.data.forEach((row: any) => {
134
- const instruction = row.i;
135
- const next = row.next;
136
-
137
- if (instruction && !nodeIds.has(instruction.id)) {
138
- nodes.push({
139
- data: {
140
- id: instruction.id,
141
- label: instruction.type,
142
- type: 'instruction',
143
- status: instruction.status
144
- }
145
- });
146
- nodeIds.add(instruction.id);
147
-
148
- // Add edge from workflow to instruction
149
- if (workflow) {
150
- edges.push({
151
- data: {
152
- id: `${workflow.id}-${instruction.id}`,
153
- source: workflow.id,
154
- target: instruction.id
155
- }
156
- });
157
- }
158
- }
159
-
160
- // Add next instruction edge
161
- if (instruction && next) {
162
- edges.push({
163
- data: {
164
- id: `${instruction.id}-${next.id}`,
165
- source: instruction.id,
166
- target: next.id
167
- }
168
- });
169
- }
170
- });
171
-
172
- // Initialize or update Cytoscape
173
- if (!cyRef.current && cyContainer.current) {
174
- cyRef.current = cytoscape({
175
- container: cyContainer.current,
176
- elements: { nodes, edges },
177
- style: [
178
- {
179
- selector: 'node',
180
- style: {
181
- 'label': 'data(label)',
182
- 'text-valign': 'center',
183
- 'text-halign': 'center',
184
- 'width': '80px',
185
- 'height': '80px',
186
- 'font-size': '12px',
187
- 'background-color': (ele: any) => {
188
- const status = ele.data('status');
189
- if (status === 'complete') return '#10B981';
190
- if (status === 'executing') return '#FCD34D';
191
- if (status === 'failed') return '#EF4444';
192
- return '#9CA3AF';
193
- }
194
- }
195
- },
196
- {
197
- selector: 'edge',
198
- style: {
199
- 'width': 2,
200
- 'line-color': '#9CA3AF',
201
- 'target-arrow-color': '#9CA3AF',
202
- 'target-arrow-shape': 'triangle',
203
- 'curve-style': 'bezier'
204
- }
205
- }
206
- ],
207
- layout: {
208
- name: 'fcose'
209
- }
210
- });
211
-
212
- // Add click handler for nodes
213
- cyRef.current.on('tap', 'node', function(evt: any) {
214
- const node = evt.target;
215
- alert(`Node: ${node.data('label')}\nStatus: ${node.data('status')}\nID: ${node.data('id')}`);
216
- });
217
-
218
- } else if (cyRef.current) {
219
- // Update existing graph
220
- cyRef.current.json({ elements: { nodes, edges } });
221
- cyRef.current.layout({ name: 'fcose' }).run();
222
- }
223
-
224
- } catch (error) {
225
- console.error('Error updating graph:', error);
226
- }
227
- };
228
-
229
- // Handle message submission
230
- const handleSubmit = async (e: React.FormEvent) => {
231
- e.preventDefault();
232
- if (!input.trim() || loading) return;
233
-
234
- const userMessage: Message = {
235
- id: Date.now().toString(),
236
- role: 'user',
237
- content: input,
238
- timestamp: new Date(),
239
- };
240
-
241
- setMessages(prev => [...prev, userMessage]);
242
- setInput('');
243
- setLoading(true);
244
-
245
- try {
246
- // Create a new workflow with the user's question
247
- const workflowResult = await callMCP('write_graph', {
248
- action: 'create_node',
249
- label: 'Workflow',
250
- properties: {
251
- id: `workflow-${Date.now()}`,
252
- name: `Query: ${input.substring(0, 50)}`,
253
- status: 'active',
254
- created_at: new Date().toISOString(),
255
- },
256
- });
257
-
258
- // Create instructions for the workflow
259
- const instructions = [
260
- { type: 'discover_schema', sequence: 1 },
261
- { type: 'generate_sql', sequence: 2, parameters: JSON.stringify({ question: input }) },
262
- { type: 'review_results', sequence: 3 },
263
- ];
264
-
265
- for (const inst of instructions) {
266
- const instResult = await callMCP('write_graph', {
267
- action: 'create_node',
268
- label: 'Instruction',
269
- properties: {
270
- id: `inst-${Date.now()}-${inst.sequence}`,
271
- type: inst.type,
272
- sequence: inst.sequence,
273
- status: 'pending',
274
- pause_duration: 30, // Shorter pause for demo
275
- parameters: inst.parameters || '{}',
276
- },
277
- });
278
-
279
- // Link instruction to workflow
280
- await callMCP('query_graph', {
281
- query: `
282
- MATCH (w:Workflow {id: $wid}), (i:Instruction {id: $iid})
283
- CREATE (w)-[:HAS_INSTRUCTION]->(i)
284
- `,
285
- parameters: {
286
- wid: workflowResult.created.id,
287
- iid: instResult.created.id,
288
- },
289
- });
290
- }
291
-
292
- // Create instruction chain
293
- const instIds = instructions.map((_, i) => `inst-${Date.now()}-${i + 1}`);
294
- for (let i = 0; i < instIds.length - 1; i++) {
295
- await callMCP('query_graph', {
296
- query: `
297
- MATCH (i1:Instruction {id: $id1}), (i2:Instruction {id: $id2})
298
- CREATE (i1)-[:NEXT_INSTRUCTION]->(i2)
299
- `,
300
- parameters: { id1: instIds[i], id2: instIds[i + 1] },
301
- });
302
- }
303
-
304
- const systemMessage: Message = {
305
- id: (Date.now() + 1).toString(),
306
- role: 'system',
307
- content: 'Workflow created! The agent will now process your request...',
308
- timestamp: new Date(),
309
- };
310
- setMessages(prev => [...prev, systemMessage]);
311
-
312
- // Poll for results
313
- const pollForResults = async () => {
314
- let attempts = 0;
315
- const maxAttempts = 60; // 5 minutes max
316
-
317
- while (attempts < maxAttempts) {
318
- await new Promise(resolve => setTimeout(resolve, 5000));
319
-
320
- // Check if SQL generation is complete
321
- const execResult = await callMCP('query_graph', {
322
- query: `
323
- MATCH (i:Instruction {type: 'generate_sql'})-[:EXECUTED_AS]->(e:Execution)
324
- WHERE i.id IN $inst_ids
325
- RETURN e.result as result
326
- ORDER BY e.completed_at DESC
327
- LIMIT 1
328
- `,
329
- parameters: { inst_ids: instIds },
330
- });
331
-
332
- if (execResult.data && execResult.data.length > 0) {
333
- const result = JSON.parse(execResult.data[0].result);
334
-
335
- if (result.status === 'success') {
336
- const assistantMessage: Message = {
337
- id: (Date.now() + 2).toString(),
338
- role: 'assistant',
339
- content: formatSQLResult(result),
340
- timestamp: new Date(),
341
- };
342
- setMessages(prev => [...prev, assistantMessage]);
343
- break;
344
- }
345
- }
346
-
347
- attempts++;
348
- }
349
- };
350
-
351
- pollForResults();
352
-
353
- } catch (error) {
354
- console.error('Error creating workflow:', error);
355
- const errorMessage: Message = {
356
- id: (Date.now() + 2).toString(),
357
- role: 'system',
358
- content: 'Error: Failed to process your request',
359
- timestamp: new Date(),
360
- };
361
- setMessages(prev => [...prev, errorMessage]);
362
- } finally {
363
- setLoading(false);
364
- }
365
- };
366
-
367
- // Format SQL results for display
368
- const formatSQLResult = (result: any) => {
369
- if (!result.data || result.data.length === 0) {
370
- return `Query executed successfully but returned no results.\n\nSQL: ${result.generated_sql}`;
371
- }
372
-
373
- const columns = Object.keys(result.data[0]);
374
- const rows = result.data;
375
-
376
- let table = '<table class="min-w-full border border-gray-300">';
377
- table += '<thead><tr class="bg-gray-100">';
378
- columns.forEach(col => {
379
- table += `<th class="px-4 py-2 border">${col}</th>`;
380
- });
381
- table += '</tr></thead><tbody>';
382
-
383
- rows.forEach((row: any) => {
384
- table += '<tr>';
385
- columns.forEach(col => {
386
- table += `<td class="px-4 py-2 border">${row[col] ?? 'null'}</td>`;
387
- });
388
- table += '</tr>';
389
- });
390
- table += '</tbody></table>';
391
-
392
- return `
393
- <div>
394
- <p class="mb-2"><strong>Generated SQL:</strong></p>
395
- <code class="block bg-gray-100 p-2 mb-4 rounded">${result.generated_sql}</code>
396
- <p class="mb-2"><strong>Results (${result.row_count} rows):</strong></p>
397
- ${table}
398
- </div>
399
- `;
400
- };
401
-
402
- // Handle stop button
403
- const handleStop = async () => {
404
- if (!workflowStatus) return;
405
-
406
- await callMCP('query_graph', {
407
- query: `MATCH (w:Workflow {id: $id}) SET w.status = 'stopped'`,
408
- parameters: { id: workflowStatus.workflow_id },
409
- });
410
-
411
- setWorkflowStatus(null);
412
- };
413
-
414
- return (
415
- <div className="flex h-screen">
416
- {/* Main Chat Area */}
417
- <div className="flex-1 flex flex-col">
418
- {/* Header */}
419
- <div className="bg-white border-b px-6 py-4">
420
- <h1 className="text-2xl font-bold">Graph-Driven Agent Chat</h1>
421
- {workflowStatus && (
422
- <div className="mt-2 flex items-center space-x-4">
423
- <span className="text-sm text-gray-600">
424
- Status: <span className="font-semibold">{workflowStatus.status}</span>
425
- </span>
426
- <span className="text-sm text-gray-600">
427
- Current: <span className="font-semibold">{workflowStatus.current_instruction}</span>
428
- </span>
429
- {loading && (
430
- <span className="text-sm text-yellow-600 animate-pulse">
431
- Agent thinking...
432
- </span>
433
- )}
434
- </div>
435
- )}
436
- </div>
437
-
438
- {/* Messages */}
439
- <div className="flex-1 overflow-y-auto p-6 space-y-4">
440
- {messages.map(message => (
441
- <div
442
- key={message.id}
443
- className={`flex ${message.role === 'user' ? 'justify-end' : 'justify-start'}`}
444
- >
445
- <div
446
- className={`max-w-2xl px-4 py-2 rounded-lg ${
447
- message.role === 'user'
448
- ? 'bg-blue-500 text-white'
449
- : message.role === 'assistant'
450
- ? 'bg-gray-200'
451
- : 'bg-yellow-100'
452
- }`}
453
- >
454
- {message.content.startsWith('<div>') ? (
455
- <div dangerouslySetInnerHTML={{ __html: message.content }} />
456
- ) : (
457
- <p>{message.content}</p>
458
- )}
459
- <p className="text-xs mt-1 opacity-70">
460
- {message.timestamp.toLocaleTimeString()}
461
- </p>
462
- </div>
463
- </div>
464
- ))}
465
- </div>
466
-
467
- {/* Input Form */}
468
- <form onSubmit={handleSubmit} className="border-t bg-white p-4">
469
- <div className="flex space-x-4">
470
- <input
471
- type="text"
472
- value={input}
473
- onChange={e => setInput(e.target.value)}
474
- placeholder="Ask a question about your data..."
475
- className="flex-1 px-4 py-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500"
476
- disabled={loading}
477
- />
478
- <button
479
- type="submit"
480
- disabled={loading || !input.trim()}
481
- className="px-6 py-2 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:bg-gray-400"
482
- >
483
- Send
484
- </button>
485
- {workflowStatus && (
486
- <button
487
- type="button"
488
- onClick={handleStop}
489
- className="px-6 py-2 bg-red-500 text-white rounded-lg hover:bg-red-600"
490
- >
491
- STOP
492
- </button>
493
- )}
494
- </div>
495
- </form>
496
- </div>
497
-
498
- {/* Right Sidebar */}
499
- <div className="w-96 bg-gray-50 border-l flex flex-col">
500
- {/* Graph Visualization */}
501
- <div className="flex-1 p-4">
502
- <h2 className="text-lg font-semibold mb-2">Workflow Graph</h2>
503
- <div
504
- ref={cyContainer}
505
- className="w-full h-64 bg-white border rounded-lg"
506
- />
507
- </div>
508
-
509
- {/* Recent Instructions */}
510
- <div className="p-4 border-t">
511
- <h2 className="text-lg font-semibold mb-2">Recent Instructions</h2>
512
- <div className="space-y-2">
513
- {lastInstructions.map((inst, idx) => (
514
- <div key={idx} className="text-sm bg-white p-2 rounded border">
515
- <span className={`font-semibold ${
516
- inst.status === 'complete' ? 'text-green-600' :
517
- inst.status === 'failed' ? 'text-red-600' :
518
- 'text-gray-600'
519
- }`}>
520
- {inst.type}
521
- </span>
522
- <span className="text-gray-500 ml-2">
523
- {inst.status}
524
- </span>
525
- </div>
526
- ))}
527
- </div>
528
- </div>
529
- </div>
530
- </div>
531
- );
532
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/next-env.d.ts DELETED
@@ -1,5 +0,0 @@
1
- /// <reference types="next" />
2
- /// <reference types="next/image-types/global" />
3
-
4
- // NOTE: This file should not be edited
5
- // see https://nextjs.org/docs/basic-features/typescript for more information.
 
 
 
 
 
 
frontend/next.config.js DELETED
@@ -1,6 +0,0 @@
1
- /** @type {import('next').NextConfig} */
2
- const nextConfig = {
3
- // App directory is enabled by default in Next.js 13+
4
- }
5
-
6
- module.exports = nextConfig
 
 
 
 
 
 
 
frontend/package.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "name": "agent-frontend",
3
- "version": "1.0.0",
4
- "scripts": {
5
- "dev": "next dev",
6
- "build": "next build",
7
- "start": "next start"
8
- },
9
- "dependencies": {
10
- "next": "14.0.0",
11
- "react": "18.2.0",
12
- "react-dom": "18.2.0",
13
- "typescript": "5.2.2",
14
- "@types/react": "18.2.0",
15
- "@types/node": "20.8.0",
16
- "cytoscape": "3.27.0",
17
- "cytoscape-fcose": "2.2.0",
18
- "@types/cytoscape": "3.19.0"
19
- },
20
- "devDependencies": {
21
- "tailwindcss": "3.3.5",
22
- "autoprefixer": "10.4.16",
23
- "postcss": "8.4.31"
24
- }
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/postcss.config.js DELETED
@@ -1,6 +0,0 @@
1
- module.exports = {
2
- plugins: {
3
- tailwindcss: {},
4
- autoprefixer: {},
5
- },
6
- }
 
 
 
 
 
 
 
frontend/tailwind.config.js DELETED
@@ -1,11 +0,0 @@
1
- module.exports = {
2
- content: [
3
- './pages/**/*.{js,ts,jsx,tsx}',
4
- './components/**/*.{js,ts,jsx,tsx}',
5
- './app/**/*.{js,ts,jsx,tsx}',
6
- ],
7
- theme: {
8
- extend: {},
9
- },
10
- plugins: [],
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
frontend/tsconfig.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "es5",
4
- "lib": [
5
- "dom",
6
- "dom.iterable",
7
- "esnext"
8
- ],
9
- "allowJs": true,
10
- "skipLibCheck": true,
11
- "strict": true,
12
- "forceConsistentCasingInFileNames": true,
13
- "noEmit": true,
14
- "esModuleInterop": true,
15
- "module": "esnext",
16
- "moduleResolution": "node",
17
- "resolveJsonModule": true,
18
- "isolatedModules": true,
19
- "jsx": "preserve",
20
- "incremental": true,
21
- "baseUrl": ".",
22
- "paths": {
23
- "@/*": [
24
- "./*"
25
- ]
26
- },
27
- "plugins": [
28
- {
29
- "name": "next"
30
- }
31
- ]
32
- },
33
- "include": [
34
- "next-env.d.ts",
35
- "**/*.ts",
36
- "**/*.tsx",
37
- "types/**/*.d.ts",
38
- ".next/types/**/*.ts"
39
- ],
40
- "exclude": [
41
- "node_modules"
42
- ]
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/types/cytoscape-fcose.d.ts DELETED
@@ -1,16 +0,0 @@
1
- declare module 'cytoscape-fcose' {
2
- import { Core } from 'cytoscape';
3
-
4
- interface FcoseLayoutOptions {
5
- name: string;
6
- animate?: boolean;
7
- randomize?: boolean;
8
- fit?: boolean;
9
- padding?: number;
10
- nodeRepulsion?: number;
11
- idealEdgeLength?: number;
12
- }
13
-
14
- function fcose(cytoscape: (options?: any) => Core): void;
15
- export = fcose;
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mcp/core/discovery.py CHANGED
@@ -3,6 +3,7 @@ from sqlalchemy.engine import Engine
3
  from typing import Dict, Any, List
4
  import logging
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
6
 
7
  from .database import get_db_connections
8
 
@@ -24,47 +25,83 @@ def _discover_single_db_schema(db_name: str, engine: Engine) -> Dict[str, Any]:
24
  })
25
  return db_schema
26
 
27
- async def get_relevant_schemas(query: str) -> List[Dict[str, Any]]:
28
  """
29
- Discovers schemas from all connected databases and performs a simple keyword search.
30
- A more advanced implementation would use embeddings for semantic search.
31
  """
32
  db_engines = get_db_connections()
33
  all_schemas = []
 
34
 
35
  with ThreadPoolExecutor() as executor:
36
- # Discover all schemas in parallel
37
- future_to_db = {executor.submit(_discover_single_db_schema, name, eng): name for name, eng in db_engines.items()}
38
- for future in as_completed(future_to_db):
 
 
 
 
 
39
  try:
40
- all_schemas.append(future.result())
 
41
  except Exception as e:
42
- db_name = future_to_db[future]
43
- logger.error(f"Failed to discover schema for {db_name}: {e}")
 
 
 
 
 
 
 
 
44
 
45
  if not query:
46
- return all_schemas
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Simple keyword filtering
49
  keywords = query.lower().split()
50
  relevant_schemas = []
51
  for db_schema in all_schemas:
52
  for table in db_schema.get("tables", []):
 
53
  if any(keyword in table['name'].lower() for keyword in keywords):
54
- relevant_schemas.append({
55
- "database": db_schema["database_name"],
56
- "table": table['name'],
57
- "columns": table['columns']
58
- })
59
  else:
60
  for col in table.get("columns", []):
61
  if any(keyword in col['name'].lower() for keyword in keywords):
62
- relevant_schemas.append({
63
- "database": db_schema["database_name"],
64
- "table": table['name'],
65
- "columns": table['columns'] # Return full table if a column matches
66
- })
67
- break # Move to next table
68
-
69
- # Deduplicate results (in case multiple keywords match the same table)
70
- return [dict(t) for t in {tuple(d.items()) for d in relevant_schemas}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from typing import Dict, Any, List
4
  import logging
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ import asyncio
7
 
8
  from .database import get_db_connections
9
 
 
25
  })
26
  return db_schema
27
 
28
+ async def discover_all_schemas() -> List[Dict[str, Any]]:
29
  """
30
+ Discovers the full schema for all connected databases in parallel.
 
31
  """
32
  db_engines = get_db_connections()
33
  all_schemas = []
34
+ loop = asyncio.get_running_loop()
35
 
36
  with ThreadPoolExecutor() as executor:
37
+ # Create a list of futures
38
+ futures = [
39
+ loop.run_in_executor(executor, _discover_single_db_schema, name, eng)
40
+ for name, eng in db_engines.items()
41
+ ]
42
+
43
+ # await the results
44
+ for future in asyncio.as_completed(futures):
45
  try:
46
+ result = await future
47
+ all_schemas.append(result)
48
  except Exception as e:
49
+ logger.error(f"Schema discovery for a database failed: {e}", exc_info=True)
50
+
51
+ return all_schemas
52
+
53
+ async def get_relevant_schemas(query: str) -> List[Dict[str, Any]]:
54
+ """
55
+ Discovers schemas and performs a simple keyword search.
56
+ If no query is provided, returns the full schema.
57
+ """
58
+ all_schemas = await discover_all_schemas()
59
 
60
  if not query:
61
+ # If no query, return a flat list of all tables and columns for the UI
62
+ flat_list = []
63
+ for db in all_schemas:
64
+ for tbl in db.get("tables", []):
65
+ for col in tbl.get("columns", []):
66
+ flat_list.append({
67
+ "database": db["database_name"],
68
+ "table": tbl["name"],
69
+ "name": col["name"],
70
+ "type": [col["type"]]
71
+ })
72
+ return flat_list
73
 
74
+ # Simple keyword filtering logic...
75
  keywords = query.lower().split()
76
  relevant_schemas = []
77
  for db_schema in all_schemas:
78
  for table in db_schema.get("tables", []):
79
+ match = False
80
  if any(keyword in table['name'].lower() for keyword in keywords):
81
+ match = True
 
 
 
 
82
  else:
83
  for col in table.get("columns", []):
84
  if any(keyword in col['name'].lower() for keyword in keywords):
85
+ match = True
86
+ break # column match is enough
87
+
88
+ if match:
89
+ # Return the full table info if there's a match
90
+ for col in table.get("columns", []):
91
+ relevant_schemas.append({
92
+ "database": db_schema["database_name"],
93
+ "table": table['name'],
94
+ "name": col['name'],
95
+ "type": [col['type']]
96
+ })
97
+
98
+ # Deduplicate results by converting to JSON strings
99
+ seen = set()
100
+ deduped = []
101
+ for schema in relevant_schemas:
102
+ # Convert to tuple for deduplication (lists aren't hashable)
103
+ key = (schema['database'], schema['table'], schema['name'])
104
+ if key not in seen:
105
+ seen.add(key)
106
+ deduped.append(schema)
107
+ return deduped
mcp/core/graph.py CHANGED
@@ -33,6 +33,61 @@ def _ensure_constraints(driver: Driver):
33
  session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Column) REQUIRE c.unique_name IS UNIQUE")
34
  logger.info("Neo4j constraints ensured.")
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def _keyword_search(keyword: str) -> List[Dict[str, Any]]:
37
  """Internal helper to search for table nodes by keyword."""
38
  driver = get_graph_driver()
 
33
  session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Column) REQUIRE c.unique_name IS UNIQUE")
34
  logger.info("Neo4j constraints ensured.")
35
 
36
+ def import_schema(schema_data: dict):
37
+ """
38
+ Imports a discovered database schema into the Neo4j graph.
39
+ """
40
+ driver = get_graph_driver()
41
+ db_name = schema_data['database_name']
42
+
43
+ with driver.session() as session:
44
+ # Create Database node
45
+ session.run("MERGE (d:Database {name: $db_name})", db_name=db_name)
46
+
47
+ for table in schema_data['tables']:
48
+ table_unique_name = f"{db_name}.{table['name']}"
49
+ table_properties = {
50
+ "name": table['name'],
51
+ "unique_name": table_unique_name,
52
+ }
53
+
54
+ # Create Table node and HAS_TABLE relationship
55
+ session.run(
56
+ """
57
+ MATCH (d:Database {name: $db_name})
58
+ MERGE (t:Table {unique_name: $unique_name})
59
+ ON CREATE SET t += $props
60
+ ON MATCH SET t += $props
61
+ MERGE (d)-[:HAS_TABLE]->(t)
62
+ """,
63
+ db_name=db_name,
64
+ unique_name=table_unique_name,
65
+ props=table_properties
66
+ )
67
+
68
+ for column in table['columns']:
69
+ column_unique_name = f"{table_unique_name}.{column['name']}"
70
+ column_properties = {
71
+ "name": column['name'],
72
+ "unique_name": column_unique_name,
73
+ "type": column['type'],
74
+ }
75
+
76
+ # Create Column node and HAS_COLUMN relationship
77
+ session.run(
78
+ """
79
+ MATCH (t:Table {unique_name: $table_unique_name})
80
+ MERGE (c:Column {unique_name: $column_unique_name})
81
+ ON CREATE SET c += $props
82
+ ON MATCH SET c += $props
83
+ MERGE (t)-[:HAS_COLUMN]->(c)
84
+ """,
85
+ table_unique_name=table_unique_name,
86
+ column_unique_name=column_unique_name,
87
+ props=column_properties
88
+ )
89
+ logger.info(f"Successfully imported schema for database: {db_name}")
90
+
91
  def _keyword_search(keyword: str) -> List[Dict[str, Any]]:
92
  """Internal helper to search for table nodes by keyword."""
93
  driver = get_graph_driver()
mcp/requirements.txt CHANGED
@@ -5,3 +5,4 @@ pydantic==2.4.0
5
  requests==2.31.0
6
  SQLAlchemy==2.0.29
7
  sqlparse==0.5.0
 
 
5
  requests==2.31.0
6
  SQLAlchemy==2.0.29
7
  sqlparse==0.5.0
8
+ mcp==1.1.1
ops/scripts/ingest.py CHANGED
@@ -1,71 +1,51 @@
1
  import os
2
  import sys
3
  import logging
4
- from sqlalchemy import create_engine
5
 
6
- # Add project root to path to allow imports from mcp
7
- project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
- sys.path.append(project_root)
9
 
10
- from core.discovery import discover_schema
11
- from core.graph import GraphStore
12
- from core.config import SQLITE_DATA_DIR
13
 
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
  logger = logging.getLogger(__name__)
16
 
17
- def ingest_sqlite_database(db_file: str, graph_store: GraphStore):
18
- """Discovers schema from a SQLite DB and ingests it into Neo4j."""
19
- db_path = os.path.join(SQLITE_DATA_DIR, db_file)
20
- logger.info(f"Processing database: {db_path}")
21
-
22
- if not os.path.exists(db_path):
23
- logger.error(f"Database file not found: {db_path}")
24
- return
25
-
26
- try:
27
- engine = create_engine(f"sqlite:///{db_path}")
28
- schema_data = discover_schema(engine)
29
-
30
- if schema_data:
31
- logger.info(f"Discovered schema for {db_file}, ingesting into Neo4j...")
32
- graph_store.import_schema(schema_data)
33
- logger.info(f"Successfully ingested schema for {db_file}")
34
- else:
35
- logger.warning(f"Could not discover schema for {db_file}. Skipping.")
36
-
37
- except Exception as e:
38
- logger.error(f"An error occurred while processing {db_file}: {e}")
39
-
40
- def main():
41
  """
42
- Main function to run the ingestion process for all SQLite databases
43
- found in the data directory.
44
  """
45
  logger.info("Starting schema ingestion process...")
46
 
47
- if not os.path.exists(SQLITE_DATA_DIR) or not os.path.isdir(SQLITE_DATA_DIR):
48
- logger.error(f"Data directory not found: {SQLITE_DATA_DIR}")
49
- return
50
-
51
- db_files = [f for f in os.listdir(SQLITE_DATA_DIR) if f.endswith(".db")]
52
-
53
- if not db_files:
54
- logger.warning(f"No SQLite database files (.db) found in {SQLITE_DATA_DIR}.")
55
- return
56
-
57
  try:
58
- graph_store = GraphStore()
59
- logger.info("Successfully connected to Neo4j.")
60
- except Exception as e:
61
- logger.error(f"Failed to connect to Neo4j. Aborting ingestion. Error: {e}")
62
- return
63
-
64
- for db_file in db_files:
65
- ingest_sqlite_database(db_file, graph_store)
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- graph_store.close()
68
- logger.info("Schema ingestion process completed.")
 
 
 
 
69
 
70
  if __name__ == "__main__":
71
- main()
 
1
  import os
2
  import sys
3
  import logging
4
+ import asyncio
5
 
6
+ # The script runs inside the 'mcp' container where the WORKDIR is '/app'.
7
+ # The 'core' module is at '/app/core'. We need to add '/app' to the Python path.
8
+ sys.path.insert(0, '/app')
9
 
10
+ from core.discovery import discover_all_schemas
11
+ from core.graph import import_schema, close_graph_driver
 
12
 
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
  logger = logging.getLogger(__name__)
15
 
16
+ async def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
+ Main asynchronous function to run the full schema discovery and ingestion process.
 
19
  """
20
  logger.info("Starting schema ingestion process...")
21
 
 
 
 
 
 
 
 
 
 
 
22
  try:
23
+ # Step 1: Discover schemas from all connected SQLite databases
24
+ logger.info("Discovering schemas from all databases...")
25
+ all_schemas = await discover_all_schemas()
26
+
27
+ if not all_schemas:
28
+ logger.warning("No schemas were discovered. Ingestion cannot proceed.")
29
+ return
30
+
31
+ logger.info(f"Discovered {len(all_schemas)} schemas. Now ingesting into Neo4j...")
32
+
33
+ # Step 2: Import each discovered schema into Neo4j
34
+ for schema_data in all_schemas:
35
+ try:
36
+ import_schema(schema_data)
37
+ logger.info(f"Successfully ingested schema for: {schema_data['database_name']}")
38
+ except Exception as e:
39
+ logger.error(f"Failed to ingest schema for {schema_data['database_name']}: {e}", exc_info=True)
40
+
41
+ logger.info("Schema ingestion process completed successfully.")
42
 
43
+ except Exception as e:
44
+ logger.critical(f"A critical error occurred during the ingestion process: {e}", exc_info=True)
45
+ finally:
46
+ # Step 3: Ensure all connections are closed
47
+ close_graph_driver()
48
+ logger.info("Neo4j connection closed.")
49
 
50
  if __name__ == "__main__":
51
+ asyncio.run(main())
streamlit/app.py CHANGED
@@ -36,13 +36,12 @@ def stream_agent_response(question: str):
36
  try:
37
  with requests.post(AGENT_URL, json={"question": question}, stream=True, timeout=300) as r:
38
  r.raise_for_status()
39
- for chunk in r.iter_content(chunk_size=None):
40
- if chunk:
41
  try:
42
- yield json.loads(chunk)
43
  except json.JSONDecodeError:
44
- # Handle potential parsing errors if chunks are not perfect JSON
45
- logger.warning(f"Could not decode JSON chunk: {chunk}")
46
  continue
47
  except requests.exceptions.RequestException as e:
48
  yield {"error": f"Failed to connect to agent: {e}"}
@@ -52,7 +51,7 @@ def fetch_schema_info() -> str:
52
  try:
53
  response = requests.post(
54
  f"{MCP_URL}/discovery/get_relevant_schemas",
55
- headers={"Authorization": f"Bearer {MCP_API_KEY}", "Content-Type": "application/json"},
56
  json={"query": ""}
57
  )
58
  response.raise_for_status()
@@ -87,12 +86,13 @@ def get_cached_schema():
87
  """Cache the schema info to avoid repeated calls."""
88
  return fetch_schema_info()
89
 
 
90
  def check_service_health(service_name: str, url: str) -> bool:
91
- """Checks if a service is reachable."""
92
  try:
93
- response = requests.get(url, timeout=5)
94
  return response.status_code in [200, 401]
95
- except requests.exceptions.RequestException:
96
  return False
97
 
98
  # --- UI Components ---
@@ -109,8 +109,13 @@ def display_sidebar():
109
  st.markdown("---")
110
  st.title("πŸ”Œ Service Status")
111
 
112
- neo4j_status = "βœ… Online" if check_service_health("Neo4j", NEO4J_URL) else "❌ Offline"
113
- mcp_status = "βœ… Online" if check_service_health("MCP", MCP_URL.replace("/mcp", "/health")) else "❌ Offline"
 
 
 
 
 
114
 
115
  st.markdown(f"**Neo4j:** {neo4j_status}")
116
  st.markdown(f"**MCP Server:** {mcp_status}")
 
36
  try:
37
  with requests.post(AGENT_URL, json={"question": question}, stream=True, timeout=300) as r:
38
  r.raise_for_status()
39
+ for line in r.iter_lines():
40
+ if line:
41
  try:
42
+ yield json.loads(line.decode('utf-8'))
43
  except json.JSONDecodeError:
44
+ # Skip malformed JSON lines
 
45
  continue
46
  except requests.exceptions.RequestException as e:
47
  yield {"error": f"Failed to connect to agent: {e}"}
 
51
  try:
52
  response = requests.post(
53
  f"{MCP_URL}/discovery/get_relevant_schemas",
54
+ headers={"x-api-key": MCP_API_KEY, "Content-Type": "application/json"},
55
  json={"query": ""}
56
  )
57
  response.raise_for_status()
 
86
  """Cache the schema info to avoid repeated calls."""
87
  return fetch_schema_info()
88
 
89
+ @st.cache_data(ttl=10)
90
  def check_service_health(service_name: str, url: str) -> bool:
91
+ """Checks if a service is reachable. Cached for 10 seconds."""
92
  try:
93
+ response = requests.get(url, timeout=2)
94
  return response.status_code in [200, 401]
95
+ except Exception:
96
  return False
97
 
98
  # --- UI Components ---
 
109
  st.markdown("---")
110
  st.title("πŸ”Œ Service Status")
111
 
112
+ try:
113
+ neo4j_status = "βœ… Online" if check_service_health("Neo4j", NEO4J_URL) else "❌ Offline"
114
+ mcp_health_url = "http://mcp:8000/health"
115
+ mcp_status = "βœ… Online" if check_service_health("MCP", mcp_health_url) else "❌ Offline"
116
+ except Exception as e:
117
+ neo4j_status = "❓ Unknown"
118
+ mcp_status = "❓ Unknown"
119
 
120
  st.markdown(f"**Neo4j:** {neo4j_status}")
121
  st.markdown(f"**MCP Server:** {mcp_status}")