#!/usr/bin/env python3 """ Comprehensive seed script for the agentic system. Creates multiple workflow templates and instruction types for various scenarios. """ import requests import json import time import os # Configuration MCP_URL = os.getenv("MCP_URL", "http://localhost:8000/mcp") API_KEY = os.getenv("MCP_API_KEY", "dev-key-123") def call_mcp(tool, params=None): """Call the MCP API""" response = requests.post( MCP_URL, headers={"X-API-Key": API_KEY, "Content-Type": "application/json"}, json={"tool": tool, "params": params or {}} ) return response.json() def create_workflow_templates(): """Create different workflow templates for various use cases""" print("🌱 Creating workflow templates...") workflows = [ { "id": "template-basic-query", "name": "Basic Data Query", "description": "Simple question-to-SQL workflow", "status": "template" }, { "id": "template-analysis", "name": "Data Analysis Workflow", "description": "Multi-step analysis with validation", "status": "template" }, { "id": "template-report", "name": "Report Generation", "description": "Generate formatted reports from data", "status": "template" } ] for workflow in workflows: result = call_mcp("write_graph", { "action": "create_node", "label": "WorkflowTemplate", "properties": { **workflow, "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) print(f"āœ… Created workflow template: {workflow['name']}") def create_instruction_types(): """Create instruction type definitions""" print("šŸ”§ Creating instruction type definitions...") instruction_types = [ { "type": "discover_schema", "name": "Schema Discovery", "description": "Discover and analyze database schema", "default_pause": 60, "parameters_schema": "{}" }, { "type": "generate_sql", "name": "SQL Generation", "description": "Convert natural language to SQL", "default_pause": 300, "parameters_schema": json.dumps({ "question": "string", "context": "string (optional)" }) }, { "type": "execute_sql", "name": "SQL Execution", "description": "Execute SQL query against database", "default_pause": 120, "parameters_schema": json.dumps({ "query": "string", "limit": "integer (optional)" }) }, { "type": "validate_results", "name": "Result Validation", "description": "Validate and check query results", "default_pause": 60, "parameters_schema": json.dumps({ "validation_rules": "array (optional)" }) }, { "type": "format_output", "name": "Output Formatting", "description": "Format results for presentation", "default_pause": 30, "parameters_schema": json.dumps({ "format": "string (table|chart|json)", "title": "string (optional)" }) }, { "type": "review_results", "name": "Human Review", "description": "Human review checkpoint", "default_pause": 300, "parameters_schema": "{}" } ] for inst_type in instruction_types: result = call_mcp("write_graph", { "action": "create_node", "label": "InstructionType", "properties": { **inst_type, "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) print(f"āœ… Created instruction type: {inst_type['name']}") def create_query_library(): """Create a library of common queries""" print("šŸ“š Creating query library...") queries = [ { "id": "query-customer-count", "category": "basic", "question": "How many customers do we have?", "sql": "SELECT COUNT(*) as customer_count FROM customers", "description": "Total customer count" }, { "id": "query-recent-orders", "category": "basic", "question": "Show me recent orders", "sql": "SELECT o.id, o.order_date, c.name, o.total_amount FROM orders o JOIN customers c ON o.customer_id = c.id ORDER BY o.order_date DESC LIMIT 10", "description": "Last 10 orders with customer info" }, { "id": "query-revenue-total", "category": "analytics", "question": "What's our total revenue?", "sql": "SELECT SUM(total_amount) as total_revenue FROM orders", "description": "Sum of all order amounts" }, { "id": "query-top-customers", "category": "analytics", "question": "Who are our top customers by revenue?", "sql": "SELECT c.name, c.email, SUM(o.total_amount) as total_spent FROM customers c JOIN orders o ON c.id = o.customer_id GROUP BY c.id, c.name, c.email ORDER BY total_spent DESC LIMIT 5", "description": "Top 5 customers by total spending" }, { "id": "query-monthly-trend", "category": "analytics", "question": "Show monthly revenue trend", "sql": "SELECT DATE_TRUNC('month', order_date) as month, SUM(total_amount) as monthly_revenue FROM orders GROUP BY DATE_TRUNC('month', order_date) ORDER BY month", "description": "Revenue by month" }, { "id": "query-customer-orders", "category": "detailed", "question": "Show customers with their order details", "sql": "SELECT c.name, c.email, o.order_date, o.total_amount, o.status FROM customers c LEFT JOIN orders o ON c.id = o.customer_id ORDER BY c.name, o.order_date DESC", "description": "Customer and order details" } ] for query in queries: result = call_mcp("write_graph", { "action": "create_node", "label": "QueryTemplate", "properties": { **query, "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) print(f"āœ… Created query: {query['description']}") def create_demo_workflows(): """Create ready-to-run demo workflows""" print("šŸŽÆ Creating demo workflows...") # Demo Workflow 1: Simple Query workflow1 = call_mcp("write_graph", { "action": "create_node", "label": "Workflow", "properties": { "id": "demo-simple-query", "name": "Simple Customer Count", "description": "Demo: Count total customers", "status": "active", "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) # Instructions for workflow 1 inst1 = call_mcp("write_graph", { "action": "create_node", "label": "Instruction", "properties": { "id": "demo-simple-1", "type": "generate_sql", "sequence": 1, "status": "pending", "pause_duration": 60, # 1 minute for demo "parameters": json.dumps({"question": "How many customers do we have?"}), "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) # Link instruction to workflow call_mcp("query_graph", { "query": "MATCH (w:Workflow {id: 'demo-simple-query'}), (i:Instruction {id: 'demo-simple-1'}) CREATE (w)-[:HAS_INSTRUCTION]->(i)" }) print("āœ… Created simple demo workflow") # Demo Workflow 2: Multi-step Analysis workflow2 = call_mcp("write_graph", { "action": "create_node", "label": "Workflow", "properties": { "id": "demo-analysis", "name": "Customer Revenue Analysis", "description": "Demo: Multi-step customer analysis", "status": "template", # Not active by default "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) # Multi-step instructions analysis_instructions = [ { "id": "demo-analysis-1", "type": "discover_schema", "sequence": 1, "description": "Discover customer and order tables", "parameters": "{}" }, { "id": "demo-analysis-2", "type": "generate_sql", "sequence": 2, "description": "Generate customer revenue query", "parameters": json.dumps({"question": "Show me top customers by total revenue"}) }, { "id": "demo-analysis-3", "type": "review_results", "sequence": 3, "description": "Review results before final output", "parameters": "{}" } ] for inst in analysis_instructions: call_mcp("write_graph", { "action": "create_node", "label": "Instruction", "properties": { **inst, "status": "template", "pause_duration": 120, "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } }) # Link to workflow call_mcp("query_graph", { "query": "MATCH (w:Workflow {id: 'demo-analysis'}), (i:Instruction {id: $iid}) CREATE (w)-[:HAS_INSTRUCTION]->(i)", "parameters": {"iid": inst["id"]} }) # Create instruction chain for i in range(len(analysis_instructions) - 1): current = analysis_instructions[i]["id"] next_inst = analysis_instructions[i + 1]["id"] call_mcp("query_graph", { "query": "MATCH (i1:Instruction {id: $id1}), (i2:Instruction {id: $id2}) CREATE (i1)-[:NEXT_INSTRUCTION]->(i2)", "parameters": {"id1": current, "id2": next_inst} }) print("āœ… Created multi-step analysis workflow") def create_system_config(): """Create system configuration nodes""" print("āš™ļø Creating system configuration...") config = { "system_version": "1.0.0", "default_pause_duration": 300, "max_retry_attempts": 3, "default_polling_interval": 30, "supported_instruction_types": json.dumps([ "discover_schema", "generate_sql", "execute_sql", "validate_results", "format_output", "review_results" ]), "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ") } result = call_mcp("write_graph", { "action": "create_node", "label": "SystemConfig", "properties": config }) print("āœ… Created system configuration") def verify_seeding(): """Verify all seeded data""" print("\nšŸ” Verifying seeded data...") # Count nodes by type counts = call_mcp("query_graph", { "query": """ MATCH (n) RETURN labels(n)[0] as label, count(n) as count ORDER BY count DESC """ }) print("\nšŸ“Š Node Statistics:") for item in counts.get("data", []): print(f" - {item['label']}: {item['count']} nodes") # Check active workflows active_workflows = call_mcp("query_graph", { "query": "MATCH (w:Workflow {status: 'active'}) RETURN w.name as name" }) if active_workflows.get("data"): print(f"\nšŸŽÆ Active Workflows:") for wf in active_workflows["data"]: print(f" - {wf['name']}") print(f"\nāœ… Comprehensive seeding completed successfully!") def main(): print("šŸš€ Starting comprehensive seed process...") # Check services first try: health_response = requests.get(f"{MCP_URL.replace('/mcp', '/health')}", timeout=5) if health_response.status_code != 200: print("āŒ MCP service not available") return False except Exception as e: print(f"āŒ Service check failed: {e}") return False print("āœ… Services are available\n") # Run all seeding functions create_workflow_templates() create_instruction_types() create_query_library() create_demo_workflows() create_system_config() verify_seeding() print("\nšŸ“‹ What's Available:") print("1. Open http://localhost:3000 - Frontend interface") print("2. Open http://localhost:7474 - Neo4j Browser (neo4j/password)") print("3. Try asking: 'How many customers do we have?'") print("4. Check the 'Customer Revenue Analysis' workflow template") print("5. Explore the query library for more examples") return True if __name__ == "__main__": if main(): exit(0) else: exit(1)