import gradio as gr from groq import Groq from pydantic import BaseModel import json import sqlite3 import pandas as pd from datetime import datetime, timedelta import random # Pydantic models for structured output class ValidationStatus(BaseModel): is_valid: bool syntax_errors: list[str] class SQLQueryGeneration(BaseModel): query: str query_type: str tables_used: list[str] estimated_complexity: str execution_notes: list[str] validation_status: ValidationStatus # Sample data generators def generate_sample_customers(count=10): """Generate sample customer data""" first_names = ["Alice", "Bob", "Carol", "David", "Emma", "Frank", "Grace", "Henry", "Ivy", "Jack"] last_names = ["Johnson", "Smith", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez"] customers = [] for i in range(1, count + 1): fname = random.choice(first_names) lname = random.choice(last_names) customers.append({ 'customer_id': i, 'name': f"{fname} {lname}", 'email': f"{fname.lower()}{i}@example.com" }) return customers def generate_sample_orders(customer_count=10, order_count=20): """Generate sample order data""" orders = [] base_date = datetime.now() for i in range(1, order_count + 1): days_ago = random.randint(0, 60) order_date = (base_date - timedelta(days=days_ago)).strftime('%Y-%m-%d') orders.append({ 'order_id': 100 + i, 'customer_id': random.randint(1, customer_count), 'total_amount': random.choice([250, 350, 450, 600, 800, 1200, 1500, 300]), 'order_date': order_date }) return orders def generate_sample_products(count=15): """Generate sample product data""" products = [] categories = ["Electronics", "Clothing", "Home", "Sports", "Books"] product_names = ["Widget", "Gadget", "Tool", "Item", "Device"] for i in range(1, count + 1): products.append({ 'product_id': i, 'product_name': f"{random.choice(product_names)} {i}", 'category': random.choice(categories), 'price': round(random.uniform(10, 500), 2), 'stock_quantity': random.randint(0, 100) }) return products def create_database_from_tables(tables_used): """Create SQLite database with sample data based on tables mentioned in query""" conn = sqlite3.connect(':memory:') cursor = conn.cursor() sample_data = {} # Generate data based on tables mentioned if 'customers' in tables_used: customers = generate_sample_customers(10) df_customers = pd.DataFrame(customers) df_customers.to_sql('customers', conn, index=False, if_exists='replace') sample_data['customers'] = df_customers if 'orders' in tables_used: orders = generate_sample_orders(10, 20) df_orders = pd.DataFrame(orders) df_orders.to_sql('orders', conn, index=False, if_exists='replace') sample_data['orders'] = df_orders if 'products' in tables_used: products = generate_sample_products(15) df_products = pd.DataFrame(products) df_products.to_sql('products', conn, index=False, if_exists='replace') sample_data['products'] = df_products return conn, sample_data def execute_sql_on_sample_data(sql_query, conn): """Execute the generated SQL query on sample database""" try: df_result = pd.read_sql_query(sql_query, conn) return df_result, None except Exception as e: return None, str(e) def process_nl_query(api_key, natural_query): """Main function to process natural language query""" if not api_key: return "❌ Please enter your Groq API key", "", "", "" if not natural_query: return "❌ Please enter a natural language query", "", "", "" try: # Initialize Groq client client = Groq(api_key=api_key) # Step 1: Generate SQL from natural language output_text = "## 📋 STEP-BY-STEP PROCESS\n\n" output_text += "### Step 1: Understanding User Intent\n" output_text += f"**User Query:** {natural_query}\n\n" # Call Groq API for SQL generation response = client.chat.completions.create( model="mixtral-8x7b-32768", messages=[ { "role": "system", "content": "You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata. Use standard SQL syntax compatible with SQLite.", }, {"role": "user", "content": natural_query}, ], response_format={ "type": "json_object" } ) # Parse the response response_content = response.choices[0].message.content sql_data = json.loads(response_content) # Try to map to our Pydantic model try: sql_query_gen = SQLQueryGeneration(**sql_data) except: # If response doesn't match exact schema, create it manually sql_query_gen = SQLQueryGeneration( query=sql_data.get('query', ''), query_type=sql_data.get('query_type', 'SELECT'), tables_used=sql_data.get('tables_used', []), estimated_complexity=sql_data.get('estimated_complexity', 'medium'), execution_notes=sql_data.get('execution_notes', []), validation_status=ValidationStatus( is_valid=sql_data.get('validation_status', {}).get('is_valid', True), syntax_errors=sql_data.get('validation_status', {}).get('syntax_errors', []) ) ) # Step 2: Display Structured SQL Output output_text += "### Step 2: Generated Structured SQL\n\n" output_text += "```json\n" output_text += json.dumps(sql_query_gen.model_dump(), indent=2) output_text += "\n```\n\n" # Step 3: Generate Sample Database Tables output_text += "### Step 3: Auto-Generated Sample Database Tables\n\n" conn, sample_data = create_database_from_tables(sql_query_gen.tables_used) # Display sample tables for table_name, df in sample_data.items(): output_text += f"**📊 Sample `{table_name}` Table:**\n\n" output_text += df.to_markdown(index=False) output_text += "\n\n" # Step 4: Execute SQL Query output_text += "### Step 4: Execute Generated SQL on Sample Tables\n\n" output_text += f"**SQL Query:**\n```sql\n{sql_query_gen.query}\n```\n\n" result_df, error = execute_sql_on_sample_data(sql_query_gen.query, conn) if error: output_text += f"❌ **Execution Error:** {error}\n" result_table = None else: output_text += "✅ **Query executed successfully!**\n\n" output_text += "**📈 SQL Execution Result:**\n\n" output_text += result_df.to_markdown(index=False) result_table = result_df conn.close() # Format outputs for Gradio json_output = json.dumps(sql_query_gen.model_dump(), indent=2) if result_df is not None: result_display = result_df else: result_display = pd.DataFrame({"Error": [error]}) return output_text, json_output, result_display, sql_query_gen.query except Exception as e: error_msg = f"❌ **Error:** {str(e)}\n\nPlease check your API key and query." return error_msg, "", pd.DataFrame(), "" # Create Gradio Interface with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🔍 Natural Language to SQL Query Executor Convert natural language queries into SQL, generate sample data, and execute queries automatically! **Example queries to try:** - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount" - "Show all products with stock quantity less than 10" - "List top 5 customers by total order amount" """) with gr.Row(): with gr.Column(scale=1): api_key_input = gr.Textbox( label="🔑 Groq API Key", type="password", placeholder="Enter your Groq API key here...", info="Get your API key from https://console.groq.com" ) query_input = gr.Textbox( label="💬 Natural Language Query", placeholder="e.g., Find all customers who made orders over $500 in the last 30 days...", lines=3 ) submit_btn = gr.Button("🚀 Generate & Execute SQL", variant="primary", size="lg") gr.Markdown("### 📝 Generated SQL Query") sql_output = gr.Code(label="SQL Query", language="sql") with gr.Row(): with gr.Column(): gr.Markdown("### 📊 Process & Results") process_output = gr.Markdown() with gr.Row(): with gr.Column(): gr.Markdown("### 🎯 Structured JSON Output") json_output = gr.Code(label="JSON Response", language="json") with gr.Row(): with gr.Column(): gr.Markdown("### 📈 Query Execution Result") result_output = gr.Dataframe( label="Result Table", interactive=False ) # Connect the button to the processing function submit_btn.click( fn=process_nl_query, inputs=[api_key_input, query_input], outputs=[process_output, json_output, result_output, sql_output] ) gr.Markdown(""" --- ### 📖 How it works: 1. **Enter your Groq API key** - Required for SQL generation 2. **Write your query in plain English** - Describe what data you want to find 3. **Click Generate & Execute** - The system will: - Convert your query to SQL - Generate sample database tables - Execute the query - Show you the results ### 🎯 Features: - ✅ Natural language to SQL conversion - ✅ Automatic sample data generation - ✅ Query validation and metadata - ✅ SQL execution on sample data - ✅ Structured JSON output format """) # Launch the app if __name__ == "__main__": demo.launch()