Spaces:
Paused
Paused
| import gradio as gr | |
| from groq import Groq | |
| from pydantic import BaseModel | |
| import json | |
| import sqlite3 | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| import random | |
| # Pydantic models for structured output | |
| class ValidationStatus(BaseModel): | |
| is_valid: bool | |
| syntax_errors: list[str] | |
| class SQLQueryGeneration(BaseModel): | |
| query: str | |
| query_type: str | |
| tables_used: list[str] | |
| estimated_complexity: str | |
| execution_notes: list[str] | |
| validation_status: ValidationStatus | |
| # Sample data generators | |
| def generate_sample_customers(count=10): | |
| """Generate sample customer data""" | |
| first_names = ["Alice", "Bob", "Carol", "David", "Emma", "Frank", "Grace", "Henry", "Ivy", "Jack"] | |
| last_names = ["Johnson", "Smith", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez"] | |
| customers = [] | |
| for i in range(1, count + 1): | |
| fname = random.choice(first_names) | |
| lname = random.choice(last_names) | |
| customers.append({ | |
| 'customer_id': i, | |
| 'name': f"{fname} {lname}", | |
| 'email': f"{fname.lower()}{i}@example.com" | |
| }) | |
| return customers | |
| def generate_sample_orders(customer_count=10, order_count=20): | |
| """Generate sample order data""" | |
| orders = [] | |
| base_date = datetime.now() | |
| for i in range(1, order_count + 1): | |
| days_ago = random.randint(0, 60) | |
| order_date = (base_date - timedelta(days=days_ago)).strftime('%Y-%m-%d') | |
| orders.append({ | |
| 'order_id': 100 + i, | |
| 'customer_id': random.randint(1, customer_count), | |
| 'total_amount': random.choice([250, 350, 450, 600, 800, 1200, 1500, 300]), | |
| 'order_date': order_date | |
| }) | |
| return orders | |
| def generate_sample_products(count=15): | |
| """Generate sample product data""" | |
| products = [] | |
| categories = ["Electronics", "Clothing", "Home", "Sports", "Books"] | |
| product_names = ["Widget", "Gadget", "Tool", "Item", "Device"] | |
| for i in range(1, count + 1): | |
| products.append({ | |
| 'product_id': i, | |
| 'product_name': f"{random.choice(product_names)} {i}", | |
| 'category': random.choice(categories), | |
| 'price': round(random.uniform(10, 500), 2), | |
| 'stock_quantity': random.randint(0, 100) | |
| }) | |
| return products | |
| def create_database_from_tables(tables_used): | |
| """Create SQLite database with sample data based on tables mentioned in query""" | |
| conn = sqlite3.connect(':memory:') | |
| cursor = conn.cursor() | |
| sample_data = {} | |
| # Generate data based on tables mentioned | |
| if 'customers' in tables_used: | |
| customers = generate_sample_customers(10) | |
| df_customers = pd.DataFrame(customers) | |
| df_customers.to_sql('customers', conn, index=False, if_exists='replace') | |
| sample_data['customers'] = df_customers | |
| if 'orders' in tables_used: | |
| orders = generate_sample_orders(10, 20) | |
| df_orders = pd.DataFrame(orders) | |
| df_orders.to_sql('orders', conn, index=False, if_exists='replace') | |
| sample_data['orders'] = df_orders | |
| if 'products' in tables_used: | |
| products = generate_sample_products(15) | |
| df_products = pd.DataFrame(products) | |
| df_products.to_sql('products', conn, index=False, if_exists='replace') | |
| sample_data['products'] = df_products | |
| return conn, sample_data | |
| def execute_sql_on_sample_data(sql_query, conn): | |
| """Execute the generated SQL query on sample database""" | |
| try: | |
| df_result = pd.read_sql_query(sql_query, conn) | |
| return df_result, None | |
| except Exception as e: | |
| return None, str(e) | |
| def process_nl_query(api_key, natural_query): | |
| """Main function to process natural language query""" | |
| if not api_key: | |
| return "β Please enter your Groq API key", "", "", "" | |
| if not natural_query: | |
| return "β Please enter a natural language query", "", "", "" | |
| try: | |
| # Initialize Groq client | |
| client = Groq(api_key=api_key) | |
| # Step 1: Generate SQL from natural language | |
| output_text = "## π STEP-BY-STEP PROCESS\n\n" | |
| output_text += "### Step 1: Understanding User Intent\n" | |
| output_text += f"**User Query:** {natural_query}\n\n" | |
| # Call Groq API for SQL generation | |
| response = client.chat.completions.create( | |
| model="mixtral-8x7b-32768", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata. Use standard SQL syntax compatible with SQLite.", | |
| }, | |
| {"role": "user", "content": natural_query}, | |
| ], | |
| response_format={ | |
| "type": "json_object" | |
| } | |
| ) | |
| # Parse the response | |
| response_content = response.choices[0].message.content | |
| sql_data = json.loads(response_content) | |
| # Try to map to our Pydantic model | |
| try: | |
| sql_query_gen = SQLQueryGeneration(**sql_data) | |
| except: | |
| # If response doesn't match exact schema, create it manually | |
| sql_query_gen = SQLQueryGeneration( | |
| query=sql_data.get('query', ''), | |
| query_type=sql_data.get('query_type', 'SELECT'), | |
| tables_used=sql_data.get('tables_used', []), | |
| estimated_complexity=sql_data.get('estimated_complexity', 'medium'), | |
| execution_notes=sql_data.get('execution_notes', []), | |
| validation_status=ValidationStatus( | |
| is_valid=sql_data.get('validation_status', {}).get('is_valid', True), | |
| syntax_errors=sql_data.get('validation_status', {}).get('syntax_errors', []) | |
| ) | |
| ) | |
| # Step 2: Display Structured SQL Output | |
| output_text += "### Step 2: Generated Structured SQL\n\n" | |
| output_text += "```json\n" | |
| output_text += json.dumps(sql_query_gen.model_dump(), indent=2) | |
| output_text += "\n```\n\n" | |
| # Step 3: Generate Sample Database Tables | |
| output_text += "### Step 3: Auto-Generated Sample Database Tables\n\n" | |
| conn, sample_data = create_database_from_tables(sql_query_gen.tables_used) | |
| # Display sample tables | |
| for table_name, df in sample_data.items(): | |
| output_text += f"**π Sample `{table_name}` Table:**\n\n" | |
| output_text += df.to_markdown(index=False) | |
| output_text += "\n\n" | |
| # Step 4: Execute SQL Query | |
| output_text += "### Step 4: Execute Generated SQL on Sample Tables\n\n" | |
| output_text += f"**SQL Query:**\n```sql\n{sql_query_gen.query}\n```\n\n" | |
| result_df, error = execute_sql_on_sample_data(sql_query_gen.query, conn) | |
| if error: | |
| output_text += f"β **Execution Error:** {error}\n" | |
| result_table = None | |
| else: | |
| output_text += "β **Query executed successfully!**\n\n" | |
| output_text += "**π SQL Execution Result:**\n\n" | |
| output_text += result_df.to_markdown(index=False) | |
| result_table = result_df | |
| conn.close() | |
| # Format outputs for Gradio | |
| json_output = json.dumps(sql_query_gen.model_dump(), indent=2) | |
| if result_df is not None: | |
| result_display = result_df | |
| else: | |
| result_display = pd.DataFrame({"Error": [error]}) | |
| return output_text, json_output, result_display, sql_query_gen.query | |
| except Exception as e: | |
| error_msg = f"β **Error:** {str(e)}\n\nPlease check your API key and query." | |
| return error_msg, "", pd.DataFrame(), "" | |
| # Create Gradio Interface | |
| with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π Natural Language to SQL Query Executor | |
| Convert natural language queries into SQL, generate sample data, and execute queries automatically! | |
| **Example queries to try:** | |
| - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount" | |
| - "Show all products with stock quantity less than 10" | |
| - "List top 5 customers by total order amount" | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| api_key_input = gr.Textbox( | |
| label="π Groq API Key", | |
| type="password", | |
| placeholder="Enter your Groq API key here...", | |
| info="Get your API key from https://console.groq.com" | |
| ) | |
| query_input = gr.Textbox( | |
| label="π¬ Natural Language Query", | |
| placeholder="e.g., Find all customers who made orders over $500 in the last 30 days...", | |
| lines=3 | |
| ) | |
| submit_btn = gr.Button("π Generate & Execute SQL", variant="primary", size="lg") | |
| gr.Markdown("### π Generated SQL Query") | |
| sql_output = gr.Code(label="SQL Query", language="sql") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π Process & Results") | |
| process_output = gr.Markdown() | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π― Structured JSON Output") | |
| json_output = gr.Code(label="JSON Response", language="json") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### π Query Execution Result") | |
| result_output = gr.Dataframe( | |
| label="Result Table", | |
| interactive=False | |
| ) | |
| # Connect the button to the processing function | |
| submit_btn.click( | |
| fn=process_nl_query, | |
| inputs=[api_key_input, query_input], | |
| outputs=[process_output, json_output, result_output, sql_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π How it works: | |
| 1. **Enter your Groq API key** - Required for SQL generation | |
| 2. **Write your query in plain English** - Describe what data you want to find | |
| 3. **Click Generate & Execute** - The system will: | |
| - Convert your query to SQL | |
| - Generate sample database tables | |
| - Execute the query | |
| - Show you the results | |
| ### π― Features: | |
| - β Natural language to SQL conversion | |
| - β Automatic sample data generation | |
| - β Query validation and metadata | |
| - β SQL execution on sample data | |
| - β Structured JSON output format | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |