agent-mcp-sql / ops /scripts /seed_comprehensive.py
Timothy Eastridge
clean up on seeding and system overview
da2713e
#!/usr/bin/env python3
"""
Comprehensive seed script for the agentic system.
Creates multiple workflow templates and instruction types for various scenarios.
"""
import requests
import json
import time
import os
# Configuration
MCP_URL = os.getenv("MCP_URL", "http://localhost:8000/mcp")
API_KEY = os.getenv("MCP_API_KEY", "dev-key-123")
def call_mcp(tool, params=None):
"""Call the MCP API"""
response = requests.post(
MCP_URL,
headers={"X-API-Key": API_KEY, "Content-Type": "application/json"},
json={"tool": tool, "params": params or {}}
)
return response.json()
def create_workflow_templates():
"""Create different workflow templates for various use cases"""
print("🌱 Creating workflow templates...")
workflows = [
{
"id": "template-basic-query",
"name": "Basic Data Query",
"description": "Simple question-to-SQL workflow",
"status": "template"
},
{
"id": "template-analysis",
"name": "Data Analysis Workflow",
"description": "Multi-step analysis with validation",
"status": "template"
},
{
"id": "template-report",
"name": "Report Generation",
"description": "Generate formatted reports from data",
"status": "template"
}
]
for workflow in workflows:
result = call_mcp("write_graph", {
"action": "create_node",
"label": "WorkflowTemplate",
"properties": {
**workflow,
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
print(f"βœ… Created workflow template: {workflow['name']}")
def create_instruction_types():
"""Create instruction type definitions"""
print("πŸ”§ Creating instruction type definitions...")
instruction_types = [
{
"type": "discover_schema",
"name": "Schema Discovery",
"description": "Discover and analyze database schema",
"default_pause": 60,
"parameters_schema": "{}"
},
{
"type": "generate_sql",
"name": "SQL Generation",
"description": "Convert natural language to SQL",
"default_pause": 300,
"parameters_schema": json.dumps({
"question": "string",
"context": "string (optional)"
})
},
{
"type": "execute_sql",
"name": "SQL Execution",
"description": "Execute SQL query against database",
"default_pause": 120,
"parameters_schema": json.dumps({
"query": "string",
"limit": "integer (optional)"
})
},
{
"type": "validate_results",
"name": "Result Validation",
"description": "Validate and check query results",
"default_pause": 60,
"parameters_schema": json.dumps({
"validation_rules": "array (optional)"
})
},
{
"type": "format_output",
"name": "Output Formatting",
"description": "Format results for presentation",
"default_pause": 30,
"parameters_schema": json.dumps({
"format": "string (table|chart|json)",
"title": "string (optional)"
})
},
{
"type": "review_results",
"name": "Human Review",
"description": "Human review checkpoint",
"default_pause": 300,
"parameters_schema": "{}"
}
]
for inst_type in instruction_types:
result = call_mcp("write_graph", {
"action": "create_node",
"label": "InstructionType",
"properties": {
**inst_type,
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
print(f"βœ… Created instruction type: {inst_type['name']}")
def create_query_library():
"""Create a library of common queries"""
print("πŸ“š Creating query library...")
queries = [
{
"id": "query-customer-count",
"category": "basic",
"question": "How many customers do we have?",
"sql": "SELECT COUNT(*) as customer_count FROM customers",
"description": "Total customer count"
},
{
"id": "query-recent-orders",
"category": "basic",
"question": "Show me recent orders",
"sql": "SELECT o.id, o.order_date, c.name, o.total_amount FROM orders o JOIN customers c ON o.customer_id = c.id ORDER BY o.order_date DESC LIMIT 10",
"description": "Last 10 orders with customer info"
},
{
"id": "query-revenue-total",
"category": "analytics",
"question": "What's our total revenue?",
"sql": "SELECT SUM(total_amount) as total_revenue FROM orders",
"description": "Sum of all order amounts"
},
{
"id": "query-top-customers",
"category": "analytics",
"question": "Who are our top customers by revenue?",
"sql": "SELECT c.name, c.email, SUM(o.total_amount) as total_spent FROM customers c JOIN orders o ON c.id = o.customer_id GROUP BY c.id, c.name, c.email ORDER BY total_spent DESC LIMIT 5",
"description": "Top 5 customers by total spending"
},
{
"id": "query-monthly-trend",
"category": "analytics",
"question": "Show monthly revenue trend",
"sql": "SELECT DATE_TRUNC('month', order_date) as month, SUM(total_amount) as monthly_revenue FROM orders GROUP BY DATE_TRUNC('month', order_date) ORDER BY month",
"description": "Revenue by month"
},
{
"id": "query-customer-orders",
"category": "detailed",
"question": "Show customers with their order details",
"sql": "SELECT c.name, c.email, o.order_date, o.total_amount, o.status FROM customers c LEFT JOIN orders o ON c.id = o.customer_id ORDER BY c.name, o.order_date DESC",
"description": "Customer and order details"
}
]
for query in queries:
result = call_mcp("write_graph", {
"action": "create_node",
"label": "QueryTemplate",
"properties": {
**query,
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
print(f"βœ… Created query: {query['description']}")
def create_demo_workflows():
"""Create ready-to-run demo workflows"""
print("🎯 Creating demo workflows...")
# Demo Workflow 1: Simple Query
workflow1 = call_mcp("write_graph", {
"action": "create_node",
"label": "Workflow",
"properties": {
"id": "demo-simple-query",
"name": "Simple Customer Count",
"description": "Demo: Count total customers",
"status": "active",
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
# Instructions for workflow 1
inst1 = call_mcp("write_graph", {
"action": "create_node",
"label": "Instruction",
"properties": {
"id": "demo-simple-1",
"type": "generate_sql",
"sequence": 1,
"status": "pending",
"pause_duration": 60, # 1 minute for demo
"parameters": json.dumps({"question": "How many customers do we have?"}),
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
# Link instruction to workflow
call_mcp("query_graph", {
"query": "MATCH (w:Workflow {id: 'demo-simple-query'}), (i:Instruction {id: 'demo-simple-1'}) CREATE (w)-[:HAS_INSTRUCTION]->(i)"
})
print("βœ… Created simple demo workflow")
# Demo Workflow 2: Multi-step Analysis
workflow2 = call_mcp("write_graph", {
"action": "create_node",
"label": "Workflow",
"properties": {
"id": "demo-analysis",
"name": "Customer Revenue Analysis",
"description": "Demo: Multi-step customer analysis",
"status": "template", # Not active by default
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
# Multi-step instructions
analysis_instructions = [
{
"id": "demo-analysis-1",
"type": "discover_schema",
"sequence": 1,
"description": "Discover customer and order tables",
"parameters": "{}"
},
{
"id": "demo-analysis-2",
"type": "generate_sql",
"sequence": 2,
"description": "Generate customer revenue query",
"parameters": json.dumps({"question": "Show me top customers by total revenue"})
},
{
"id": "demo-analysis-3",
"type": "review_results",
"sequence": 3,
"description": "Review results before final output",
"parameters": "{}"
}
]
for inst in analysis_instructions:
call_mcp("write_graph", {
"action": "create_node",
"label": "Instruction",
"properties": {
**inst,
"status": "template",
"pause_duration": 120,
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
})
# Link to workflow
call_mcp("query_graph", {
"query": "MATCH (w:Workflow {id: 'demo-analysis'}), (i:Instruction {id: $iid}) CREATE (w)-[:HAS_INSTRUCTION]->(i)",
"parameters": {"iid": inst["id"]}
})
# Create instruction chain
for i in range(len(analysis_instructions) - 1):
current = analysis_instructions[i]["id"]
next_inst = analysis_instructions[i + 1]["id"]
call_mcp("query_graph", {
"query": "MATCH (i1:Instruction {id: $id1}), (i2:Instruction {id: $id2}) CREATE (i1)-[:NEXT_INSTRUCTION]->(i2)",
"parameters": {"id1": current, "id2": next_inst}
})
print("βœ… Created multi-step analysis workflow")
def create_system_config():
"""Create system configuration nodes"""
print("βš™οΈ Creating system configuration...")
config = {
"system_version": "1.0.0",
"default_pause_duration": 300,
"max_retry_attempts": 3,
"default_polling_interval": 30,
"supported_instruction_types": json.dumps([
"discover_schema", "generate_sql", "execute_sql",
"validate_results", "format_output", "review_results"
]),
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ")
}
result = call_mcp("write_graph", {
"action": "create_node",
"label": "SystemConfig",
"properties": config
})
print("βœ… Created system configuration")
def verify_seeding():
"""Verify all seeded data"""
print("\nπŸ” Verifying seeded data...")
# Count nodes by type
counts = call_mcp("query_graph", {
"query": """
MATCH (n)
RETURN labels(n)[0] as label, count(n) as count
ORDER BY count DESC
"""
})
print("\nπŸ“Š Node Statistics:")
for item in counts.get("data", []):
print(f" - {item['label']}: {item['count']} nodes")
# Check active workflows
active_workflows = call_mcp("query_graph", {
"query": "MATCH (w:Workflow {status: 'active'}) RETURN w.name as name"
})
if active_workflows.get("data"):
print(f"\n🎯 Active Workflows:")
for wf in active_workflows["data"]:
print(f" - {wf['name']}")
print(f"\nβœ… Comprehensive seeding completed successfully!")
def main():
print("πŸš€ Starting comprehensive seed process...")
# Check services first
try:
health_response = requests.get(f"{MCP_URL.replace('/mcp', '/health')}", timeout=5)
if health_response.status_code != 200:
print("❌ MCP service not available")
return False
except Exception as e:
print(f"❌ Service check failed: {e}")
return False
print("βœ… Services are available\n")
# Run all seeding functions
create_workflow_templates()
create_instruction_types()
create_query_library()
create_demo_workflows()
create_system_config()
verify_seeding()
print("\nπŸ“‹ What's Available:")
print("1. Open http://localhost:3000 - Frontend interface")
print("2. Open http://localhost:7474 - Neo4j Browser (neo4j/password)")
print("3. Try asking: 'How many customers do we have?'")
print("4. Check the 'Customer Revenue Analysis' workflow template")
print("5. Explore the query library for more examples")
return True
if __name__ == "__main__":
if main():
exit(0)
else:
exit(1)