Spaces:

shukdevdattaEX
/

SQLGenie

Paused

App Files Files Community

shukdevdattaEX commited on 27 days ago

Commit

944a160

verified ·

1 Parent(s): f9d7c74

Update app.py

Browse files

Files changed (1) hide show

app.py +256 -439

app.py CHANGED Viewed

@@ -3,10 +3,9 @@ from groq import Groq
 from pydantic import BaseModel
 import json
 import sqlite3
-import pandas as pd
 from datetime import datetime, timedelta
 import random
-import re
 # Pydantic models for structured output
 class ValidationStatus(BaseModel):
@@ -21,510 +20,328 @@ class SQLQueryGeneration(BaseModel):
     execution_notes: list[str]
     validation_status: ValidationStatus
-def extract_table_schema_from_sql(sql_query):
-    """Extract all column names and table names from SQL query"""
-    # Extract table names
-    table_pattern = r'FROM\s+(\w+)|JOIN\s+(\w+)'
-    tables = re.findall(table_pattern, sql_query, re.IGNORECASE)
-    table_names = [t[0] or t[1] for t in tables]
-    # Extract column names from SELECT, WHERE, GROUP BY, ORDER BY
-    # Remove aliases (AS something)
-    cleaned_query = re.sub(r'\s+AS\s+\w+', '', sql_query, flags=re.IGNORECASE)
-    # Find all potential column references (table.column or column)
-    column_pattern = r'(?:[\w]+\.)?(\w+)'
-    # Extract from different parts
-    columns = set()
-    # From SELECT clause
-    select_match = re.search(r'SELECT\s+(.+?)\s+FROM', sql_query, re.IGNORECASE | re.DOTALL)
-    if select_match:
-        select_part = select_match.group(1)
-        # Remove aggregation functions
-        select_part = re.sub(r'(SUM|COUNT|AVG|MAX|MIN|DISTINCT)\s*\(', '', select_part, flags=re.IGNORECASE)
-        select_part = re.sub(r'\)', '', select_part)
-        cols = re.findall(r'[\w]+\.(\w+)|(?:^|,\s*)(\w+)', select_part)
-        for col in cols:
-            c = col[0] or col[1]
-            if c and c.upper() not in ['SELECT', 'FROM', 'WHERE', 'AS', 'ON']:
-                columns.add(c.lower())
-    # From WHERE clause
-    where_match = re.search(r'WHERE\s+(.+?)(?:GROUP|ORDER|LIMIT|$)', sql_query, re.IGNORECASE | re.DOTALL)
-    if where_match:
-        where_part = where_match.group(1)
-        cols = re.findall(r'[\w]+\.(\w+)|(\w+)\s*[=<>!]', where_part)
-        for col in cols:
-            c = col[0] or col[1]
-            if c and c.upper() not in ['AND', 'OR', 'NOT', 'IN', 'LIKE', 'IS', 'NULL']:
-                columns.add(c.lower())
-    # From JOIN ON clause
-    join_matches = re.findall(r'ON\s+(.+?)(?:WHERE|GROUP|ORDER|JOIN|$)', sql_query, re.IGNORECASE)
-    for join_match in join_matches:
-        cols = re.findall(r'[\w]+\.(\w+)', join_match)
-        columns.update([c.lower() for c in cols])
-    # From GROUP BY
-    group_match = re.search(r'GROUP\s+BY\s+(.+?)(?:ORDER|HAVING|LIMIT|$)', sql_query, re.IGNORECASE)
-    if group_match:
-        group_part = group_match.group(1)
-        cols = re.findall(r'[\w]+\.(\w+)|(\w+)', group_part)
-        for col in cols:
-            c = col[0] or col[1]
-            if c:
-                columns.add(c.lower())
-    # From ORDER BY
-    order_match = re.search(r'ORDER\s+BY\s+(.+?)(?:LIMIT|$)', sql_query, re.IGNORECASE)
-    if order_match:
-        order_part = order_match.group(1)
-        cols = re.findall(r'[\w]+\.(\w+)|(\w+)', order_part)
-        for col in cols:
-            c = col[0] or col[1]
-            if c and c.upper() not in ['ASC', 'DESC']:
-                columns.add(c.lower())
-    return list(set(table_names)), list(columns)
-def generate_table_with_columns(table_name, required_columns, row_count=15):
-    """Generate table data ensuring ALL required columns exist"""
-    # Helper functions
-    def gen_id():
-        return list(range(1, row_count + 1))
-    def gen_names():
-        first = ["Alice", "Bob", "Carol", "David", "Emma", "Frank", "Grace", "Henry", "Ivy", "Jack",
-                 "Karen", "Leo", "Maria", "Nathan", "Olivia"]
-        last = ["Johnson", "Smith", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
-                "Rodriguez", "Martinez", "Anderson", "Taylor", "Thomas", "Moore", "Jackson"]
-        return [f"{random.choice(first)} {random.choice(last)}" for _ in range(row_count)]
-    def gen_emails():
-        return [f"user{i}@example.com" for i in range(1, row_count + 1)]
-    def gen_dates(days_back=365):
-        base = datetime.now()
-        return [(base - timedelta(days=random.randint(0, days_back))).strftime('%Y-%m-%d')
-                for _ in range(row_count)]
-    def gen_years():
-        return [random.randint(2000, 2025) for _ in range(row_count)]
-    def gen_amounts():
-        return [round(random.uniform(100, 5000), 2) for _ in range(row_count)]
-    def gen_salaries():
-        return [random.choice([45000, 55000, 65000, 75000, 85000, 95000, 105000, 120000])
-                for _ in range(row_count)]
-    def gen_prices():
-        return [round(random.uniform(10, 1000), 2) for _ in range(row_count)]
-    def gen_quantities():
-        return [random.randint(0, 100) for _ in range(row_count)]
-    def gen_ratings():
-        return [round(random.uniform(1.0, 10.0), 1) for _ in range(row_count)]
-    def gen_scores():
-        return [random.randint(60, 100) for _ in range(row_count)]
-    def gen_ages():
-        return [random.randint(18, 80) for _ in range(row_count)]
-    def gen_boolean():
-        return [random.choice([True, False, True, True]) for _ in range(row_count)]
-    def gen_status():
-        return [random.choice(['Active', 'Inactive', 'Pending', 'Active', 'Active'])
-                for _ in range(row_count)]
-    def gen_categories():
-        return [random.choice(['Category A', 'Category B', 'Category C', 'Category D'])
-                for _ in range(row_count)]
-    def gen_foreign_key():
-        return [random.randint(1, 15) for _ in range(row_count)]
-    def gen_phone():
-        return [f"+1-555-{random.randint(1000, 9999)}" for _ in range(row_count)]
-    def gen_text():
-        return [f"Text content {i}" for i in range(1, row_count + 1)]
-    def gen_duration():
-        return [random.randint(60, 240) for _ in range(row_count)]
-    # Column type mapping based on name patterns
-    def infer_column_data(col_name):
-        col_lower = col_name.lower()
-        # ID columns
-        if col_lower.endswith('_id') or col_lower == 'id':
-            if col_lower == f'{table_name}_id' or col_lower == 'id':
-                return gen_id()
-            return gen_foreign_key()
-        # Name columns
-        if 'name' in col_lower or 'title' in col_lower:
-            return gen_names() if 'name' in col_lower else gen_text()
-        # Email columns
-        if 'email' in col_lower:
-            return gen_emails()
-        # Phone columns
-        if 'phone' in col_lower:
-            return gen_phone()
-        # Date columns
-        if any(word in col_lower for word in ['date', 'created', 'updated', 'joined', 'registered', 'hired', 'published', 'visited', 'appointed', 'enrolled']):
-            return gen_dates()
-        # Year columns
-        if 'year' in col_lower or col_lower.endswith('_year'):
-            return gen_years()
-        # Money/Amount columns
-        if any(word in col_lower for word in ['salary', 'amount', 'price', 'cost', 'revenue', 'budget']):
-            if 'salary' in col_lower:
-                return gen_salaries()
-            elif 'price' in col_lower or 'cost' in col_lower:
-                return gen_prices()
-            return gen_amounts()
-        # Rating columns
-        if 'rating' in col_lower or 'score' in col_lower:
-            if 'rating' in col_lower:
-                return gen_ratings()
-            return gen_scores()
-        # Age columns
-        if 'age' in col_lower:
-            return gen_ages()
-        # Quantity/Stock columns
-        if any(word in col_lower for word in ['quantity', 'stock', 'count', 'level']):
-            return gen_quantities()
-        # Status columns
-        if 'status' in col_lower:
-            return gen_status()
-        # Category/Type columns
-        if any(word in col_lower for word in ['category', 'type', 'genre', 'department', 'major', 'subject']):
-            return gen_categories()
-        # Boolean columns
-        if any(word in col_lower for word in ['available', 'active', 'enabled', 'verified', 'completed']):
-            return gen_boolean()
-        # Duration/Time columns
-        if any(word in col_lower for word in ['duration', 'time', 'minutes', 'hours']):
-            return gen_duration()
-        # Position/Role columns
-        if any(word in col_lower for word in ['position', 'role', 'job', 'title']):
-            return [random.choice(['Manager', 'Engineer', 'Analyst', 'Developer', 'Designer'])
-                   for _ in range(row_count)]
-        # Default to text
-        return gen_text()
-    # Build the table schema
-    table_data = {}
-    # Ensure primary ID exists
-    primary_id = f'{table_name}_id'
-    if primary_id not in required_columns and 'id' not in required_columns:
-        table_data[primary_id] = gen_id()
-    # Add all required columns
-    for col in required_columns:
-        if col not in table_data:
-            table_data[col] = infer_column_data(col)
-    return table_data
-def create_database_from_sql(sql_query, tables_used):
-    """Create SQLite database with sample data based on SQL query analysis"""
     conn = sqlite3.connect(':memory:')
-    # Extract schema from SQL
-    detected_tables, detected_columns = extract_table_schema_from_sql(sql_query)
-    # Merge with provided tables
-    all_tables = list(set(tables_used + detected_tables))
     sample_data = {}
-    # For each table, determine which columns it needs
-    for table in all_tables:
-        table_name = table.lower().strip()
-        # Find columns that belong to this table from SQL
-        table_columns = []
-        # Look for table.column references
-        table_col_pattern = rf'{table_name}\.(\w+)'
-        table_specific_cols = re.findall(table_col_pattern, sql_query, re.IGNORECASE)
-        table_columns.extend([col.lower() for col in table_specific_cols])
-        # If no table-specific columns found, add common columns based on detected columns
-        if not table_columns:
-            table_columns = detected_columns
-        # Ensure we have at least some basic columns
-        if not table_columns:
-            table_columns = ['id', 'name', 'created_date', 'status']
-        # Generate table with required columns
-        row_count = 5 if table_name == 'departments' else 15
-        table_dict = generate_table_with_columns(table_name, table_columns, row_count)
-        df = pd.DataFrame(table_dict)
-        df.to_sql(table_name, conn, index=False, if_exists='replace')
-        sample_data[table_name] = df
-    return conn, sample_data
-def execute_sql_on_sample_data(sql_query, conn):
-    """Execute the generated SQL query on sample database"""
     try:
-        df_result = pd.read_sql_query(sql_query, conn)
-        return df_result, None
     except Exception as e:
-        return None, str(e)
-def process_nl_query(api_key, natural_query):
     """Main function to process natural language query"""
     if not api_key:
-        return "❌ Please enter your Groq API key", "", pd.DataFrame(), ""
-    if not natural_query:
-        return "❌ Please enter a natural language query", "", pd.DataFrame(), ""
     try:
-        # Initialize Groq client
         client = Groq(api_key=api_key)
-        # Step 1: Generate SQL from natural language
-        output_text = "## 📋 STEP-BY-STEP PROCESS\n\n"
-        output_text += "### Step 1: Understanding User Intent\n"
-        output_text += f"**User Query:** {natural_query}\n\n"
-        # Call Groq API for SQL generation with Kimi model
         response = client.chat.completions.create(
             model="moonshotai/kimi-k2-instruct-0905",
             messages=[
                 {
                     "role": "system",
-                    "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
-                    IMPORTANT: Return your response in JSON format with the following structure:
-                    {
-                        "query": "SQL query string",
-                        "query_type": "SELECT/INSERT/UPDATE/DELETE",
-                        "tables_used": ["table1", "table2"],
-                        "estimated_complexity": "low/medium/high",
-                        "execution_notes": ["note1", "note2"],
-                        "validation_status": {
-                            "is_valid": true/false,
-                            "syntax_errors": []
-                        }
-                    }
-                    CRITICAL SQL GENERATION RULES:
-                    - Use standard SQL syntax compatible with SQLite
-                    - Always use proper JOINs when multiple tables are involved
-                    - Use WHERE clauses for filtering
-                    - Use GROUP BY for aggregations
-                    - For date/year comparisons, use column names like 'release_year' NOT 'release_date' for year-based filtering
-                    - Common date columns: created_date, updated_date, order_date, hire_date, publication_year, release_year
-                    - Extract ALL table names mentioned or implied in the query and list them in "tables_used"
-                    - If a query mentions departments and employees, include BOTH tables
-                    - Be thorough in identifying all tables needed for the query
-                    - Use consistent column naming: prefer release_year over release_date for movies, publication_year for books
-                    - When filtering by years or time periods, use the appropriate column (release_year, publication_year, etc.)""",
-                },
-                {
-                    "role": "user",
-                    "content": f"Convert this natural language query to SQL and return as JSON. Use proper column names (e.g., release_year instead of release_date for year-based filters): {natural_query}"
                 },
             ],
             response_format={
-                "type": "json_object"
-            },
-            temperature=0.3
         )
-        # Parse the response
-        response_content = response.choices[0].message.content
-        sql_data = json.loads(response_content)
-        # Try to map to our Pydantic model with better error handling
-        try:
-            sql_query_gen = SQLQueryGeneration(**sql_data)
-        except Exception as e:
-            # If response doesn't match exact schema, create it manually
-            sql_query_gen = SQLQueryGeneration(
-                query=sql_data.get('query', sql_data.get('sql_query', '')),
-                query_type=sql_data.get('query_type', 'SELECT'),
-                tables_used=sql_data.get('tables_used', sql_data.get('tables', [])),
-                estimated_complexity=sql_data.get('estimated_complexity', 'medium'),
-                execution_notes=sql_data.get('execution_notes', sql_data.get('notes', [])),
-                validation_status=ValidationStatus(
-                    is_valid=sql_data.get('validation_status', {}).get('is_valid', True),
-                    syntax_errors=sql_data.get('validation_status', {}).get('syntax_errors', [])
-                )
-            )
-        # Step 2: Display Structured SQL Output
-        output_text += "### Step 2: Generated Structured SQL\n\n"
-        output_text += "```json\n"
-        output_text += json.dumps(sql_query_gen.model_dump(), indent=2)
-        output_text += "\n```\n\n"
-        # Step 3: Generate Sample Database Tables - INTELLIGENT SCHEMA DETECTION
-        output_text += "### Step 3: Auto-Generated Sample Database Tables\n\n"
-        output_text += f"**Analyzing SQL query to create appropriate table schemas...**\n\n"
-        conn, sample_data = create_database_from_sql(sql_query_gen.query, sql_query_gen.tables_used)
-        # Display sample tables (show first 10 rows for readability)
-        for table_name, df in sample_data.items():
-            output_text += f"**📊 Sample `{table_name}` Table** ({len(df)} rows):\n\n"
-            output_text += f"*Columns: {', '.join(df.columns.tolist())}*\n\n"
-            display_df = df.head(10)
-            output_text += display_df.to_markdown(index=False)
-            if len(df) > 10:
-                output_text += f"\n\n*...and {len(df) - 10} more rows*"
-            output_text += "\n\n"
-        # Step 4: Execute SQL Query
-        output_text += "### Step 4: Execute Generated SQL on Sample Tables\n\n"
-        output_text += f"**SQL Query:**\n```sql\n{sql_query_gen.query}\n```\n\n"
-        result_df, error = execute_sql_on_sample_data(sql_query_gen.query, conn)
         if error:
-            output_text += f"❌ **Execution Error:** {error}\n\n"
-            output_text += "**Troubleshooting:** The SQL query may reference columns that don't exist in the generated tables. "
-            output_text += "This can happen if the AI model uses different column names than what was generated.\n"
-            result_table = pd.DataFrame({"Error": [error]})
         else:
-            output_text += "✅ **Query executed successfully!**\n\n"
-            output_text += f"**📈 SQL Execution Result** ({len(result_df)} rows returned):\n\n"
-            if len(result_df) > 0:
-                output_text += result_df.to_markdown(index=False)
-            else:
-                output_text += "*No results found matching the criteria*"
-            result_table = result_df
         conn.close()
-        # Format outputs for Gradio
-        json_output = json.dumps(sql_query_gen.model_dump(), indent=2)
-        return output_text, json_output, result_table, sql_query_gen.query
     except Exception as e:
-        error_msg = f"❌ **Error:** {str(e)}\n\n**Full error details:**\n```\n{repr(e)}\n```\n\nPlease check your API key and try again."
-        return error_msg, "", pd.DataFrame({"Error": [str(e)]}), ""
-# Create Gradio Interface
-with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Ocean()) as demo:
     gr.Markdown("""
-    # 🔍 Natural Language to SQL Query Executor with Intelligent Schema Detection
-    Convert **ANY** natural language query into SQL, automatically generate matching database schemas, and execute queries!
     **Example queries to try:**
     - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount"
-    - "Show all employees who earn more than $75,000 and work in the Engineering department"
-    - "List students who scored above 85% in Mathematics"
-    - "Find all movies released in the last 5 years with rating above 8.0"
-    - "Show properties with price between $200,000 and $500,000"
-    - "List all books published after 2020 that are available"
-    - "Show active gym members whose membership expires in the next 30 days"
     """)
     with gr.Row():
-        with gr.Column(scale=1):
             api_key_input = gr.Textbox(
                 label="🔑 Groq API Key",
-                type="password",
                 placeholder="Enter your Groq API key here...",
-                info="Get your API key from https://console.groq.com"
             )
             query_input = gr.Textbox(
                 label="💬 Natural Language Query",
-                placeholder="e.g., Find all movies released in the last 5 years with rating above 8.0...",
                 lines=3
             )
             submit_btn = gr.Button("🚀 Generate & Execute SQL", variant="primary", size="lg")
-            gr.Markdown("### 📝 Generated SQL Query")
-            sql_output = gr.Code(label="SQL Query", language="sql")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📊 Process & Results")
-            process_output = gr.Markdown()
     with gr.Row():
         with gr.Column():
-            gr.Markdown("### 🎯 Structured JSON Output")
-            json_output = gr.Code(label="JSON Response", language="json")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📈 Query Execution Result")
-            result_output = gr.Dataframe(
-                label="Result Table",
-                interactive=False,
-                wrap=True
-            )
-    # Connect the button to the processing function
     submit_btn.click(
-        fn=process_nl_query,
         inputs=[api_key_input, query_input],
-        outputs=[process_output, json_output, result_output, sql_output]
     )
     gr.Markdown("""
     ---
-    ### 📖 How it works:
-    1. **Enter your Groq API key** - Required for SQL generation (using Kimi K2 Instruct model)
-    2. **Write your query in plain English** - Describe what data you want to find
-    3. **Click Generate & Execute** - The system will:
-       - Convert your query to SQL
-       - **Intelligently analyze the SQL to detect required columns**
-       - Automatically create tables with the exact columns needed
-       - Generate realistic sample data matching the schema
-       - Execute the query
-       - Show you the results
-    ### 🎯 Revolutionary Features:
-    - ✅ **AI-powered SQL generation** using Kimi K2 Instruct
-    - ✅ **Intelligent schema detection** - Analyzes SQL to create matching tables
-    - ✅ **Dynamic column inference** - Automatically determines column types from SQL
-    - ✅ **Handles ANY query** - No predefined schemas, works with any table/column combination
-    - ✅ **Smart data generation** - Creates realistic data based on column names
-    - ✅ **Zero errors** - Tables always match the generated SQL
-    - ✅ **Universal support** - Works with employees, movies, students, products, and ANY other domain!
-    ### 🧠 Intelligence:
-    The system analyzes your SQL query to understand what columns are needed, then generates tables with exactly those columns!
     """)
-# Launch the app
 if __name__ == "__main__":
-    demo.launch()

 from pydantic import BaseModel
 import json
 import sqlite3
+import re
 from datetime import datetime, timedelta
 import random
 # Pydantic models for structured output
 class ValidationStatus(BaseModel):
     execution_notes: list[str]
     validation_status: ValidationStatus
+def generate_sample_data(query, tables_used):
+    """Generate sample data based on the query and tables used"""
     conn = sqlite3.connect(':memory:')
+    cursor = conn.cursor()
     sample_data = {}
+    # Generate data based on common table patterns
+    if 'customers' in tables_used:
+        cursor.execute('''
+            CREATE TABLE customers (
+                customer_id INTEGER PRIMARY KEY,
+                name TEXT,
+                email TEXT
+            )
+        ''')
+        customers = [
+            (1, 'Alice Johnson', 'alice@example.com'),
+            (2, 'Bob Smith', 'bob@example.com'),
+            (3, 'Carol Williams', 'carol@example.com'),
+            (4, 'David Brown', 'david@example.com'),
+            (5, 'Eve Davis', 'eve@example.com')
+        ]
+        cursor.executemany('INSERT INTO customers VALUES (?, ?, ?)', customers)
+        sample_data['customers'] = customers
+    if 'orders' in tables_used:
+        cursor.execute('''
+            CREATE TABLE orders (
+                order_id INTEGER PRIMARY KEY,
+                customer_id INTEGER,
+                total_amount REAL,
+                order_date TEXT
+            )
+        ''')
+        today = datetime.now()
+        orders = [
+            (101, 1, 600, (today - timedelta(days=10)).strftime('%Y-%m-%d')),
+            (102, 1, 450, (today - timedelta(days=5)).strftime('%Y-%m-%d')),
+            (103, 2, 1200, (today - timedelta(days=15)).strftime('%Y-%m-%d')),
+            (104, 3, 300, (today - timedelta(days=20)).strftime('%Y-%m-%d')),
+            (105, 3, 800, (today - timedelta(days=2)).strftime('%Y-%m-%d')),
+            (106, 4, 550, (today - timedelta(days=7)).strftime('%Y-%m-%d')),
+            (107, 5, 1500, (today - timedelta(days=12)).strftime('%Y-%m-%d'))
+        ]
+        cursor.executemany('INSERT INTO orders VALUES (?, ?, ?, ?)', orders)
+        sample_data['orders'] = orders
+    if 'products' in tables_used:
+        cursor.execute('''
+            CREATE TABLE products (
+                product_id INTEGER PRIMARY KEY,
+                product_name TEXT,
+                price REAL,
+                category TEXT
+            )
+        ''')
+        products = [
+            (1, 'Laptop', 999.99, 'Electronics'),
+            (2, 'Mouse', 29.99, 'Electronics'),
+            (3, 'Keyboard', 79.99, 'Electronics'),
+            (4, 'Monitor', 299.99, 'Electronics'),
+            (5, 'Desk', 199.99, 'Furniture')
+        ]
+        cursor.executemany('INSERT INTO products VALUES (?, ?, ?, ?)', products)
+        sample_data['products'] = products
+    if 'employees' in tables_used:
+        cursor.execute('''
+            CREATE TABLE employees (
+                employee_id INTEGER PRIMARY KEY,
+                name TEXT,
+                department TEXT,
+                salary REAL
+            )
+        ''')
+        employees = [
+            (1, 'John Doe', 'Engineering', 85000),
+            (2, 'Jane Smith', 'Marketing', 75000),
+            (3, 'Mike Johnson', 'Sales', 70000),
+            (4, 'Sarah Williams', 'Engineering', 90000),
+            (5, 'Tom Brown', 'HR', 65000)
+        ]
+        cursor.executemany('INSERT INTO employees VALUES (?, ?, ?, ?)', employees)
+        sample_data['employees'] = employees
+    return conn, cursor, sample_data
+def execute_sql_query(cursor, query):
+    """Execute the SQL query and return results"""
     try:
+        # Convert MySQL/PostgreSQL specific functions to SQLite
+        sqlite_query = query.replace('DATE_SUB(NOW(), INTERVAL 30 DAY)',
+                                     f"date('now', '-30 days')")
+        sqlite_query = sqlite_query.replace('NOW()', "date('now')")
+        cursor.execute(sqlite_query)
+        results = cursor.fetchall()
+        columns = [description[0] for description in cursor.description]
+        return results, columns, None
     except Exception as e:
+        return None, None, str(e)
+def format_sample_tables(sample_data):
+    """Format sample tables as HTML for display"""
+    html = "<div style='margin: 20px 0;'>"
+    for table_name, data in sample_data.items():
+        html += f"<h3>📊 Sample {table_name} Table</h3>"
+        html += "<table style='border-collapse: collapse; width: 100%; margin-bottom: 20px;'>"
+        if table_name == 'customers':
+            html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>customer_id</th><th style='border: 1px solid #ddd; padding: 8px;'>name</th><th style='border: 1px solid #ddd; padding: 8px;'>email</th></tr>"
+            for row in data:
+                html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{row[0]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[1]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[2]}</td></tr>"
+        elif table_name == 'orders':
+            html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>order_id</th><th style='border: 1px solid #ddd; padding: 8px;'>customer_id</th><th style='border: 1px solid #ddd; padding: 8px;'>total_amount</th><th style='border: 1px solid #ddd; padding: 8px;'>order_date</th></tr>"
+            for row in data:
+                html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{row[0]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[1]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[2]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[3]}</td></tr>"
+        elif table_name == 'products':
+            html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>product_id</th><th style='border: 1px solid #ddd; padding: 8px;'>product_name</th><th style='border: 1px solid #ddd; padding: 8px;'>price</th><th style='border: 1px solid #ddd; padding: 8px;'>category</th></tr>"
+            for row in data:
+                html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{row[0]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[1]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[2]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[3]}</td></tr>"
+        elif table_name == 'employees':
+            html += "<tr style='background-color: #f0f0f0;'><th style='border: 1px solid #ddd; padding: 8px;'>employee_id</th><th style='border: 1px solid #ddd; padding: 8px;'>name</th><th style='border: 1px solid #ddd; padding: 8px;'>department</th><th style='border: 1px solid #ddd; padding: 8px;'>salary</th></tr>"
+            for row in data:
+                html += f"<tr><td style='border: 1px solid #ddd; padding: 8px;'>{row[0]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[1]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[2]}</td><td style='border: 1px solid #ddd; padding: 8px;'>{row[3]}</td></tr>"
+        html += "</table>"
+    html += "</div>"
+    return html
+def format_execution_result(results, columns):
+    """Format SQL execution results as HTML table"""
+    if not results:
+        return "<p>No results found.</p>"
+    html = "<div style='margin: 20px 0;'>"
+    html += "<h3>✅ SQL Execution Result (Final Output Table)</h3>"
+    html += "<table style='border-collapse: collapse; width: 100%;'>"
+    # Header
+    html += "<tr style='background-color: #4CAF50; color: white;'>"
+    for col in columns:
+        html += f"<th style='border: 1px solid #ddd; padding: 8px;'>{col}</th>"
+    html += "</tr>"
+    # Rows
+    for row in results:
+        html += "<tr>"
+        for cell in row:
+            html += f"<td style='border: 1px solid #ddd; padding: 8px;'>{cell}</td>"
+        html += "</tr>"
+    html += "</table></div>"
+    return html
+def process_query(api_key, user_query):
     """Main function to process natural language query"""
     if not api_key:
+        return "❌ Please enter your Groq API key", "", "", ""
+    if not user_query:
+        return "❌ Please enter a query", "", "", ""
     try:
+        # Step 1: Generate SQL using Groq
         client = Groq(api_key=api_key)
         response = client.chat.completions.create(
             model="moonshotai/kimi-k2-instruct-0905",
             messages=[
                 {
                     "role": "system",
+                    "content": "You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata. Use standard SQL syntax compatible with MySQL.",
                 },
+                {"role": "user", "content": user_query},
             ],
             response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "sql_query_generation",
+                    "schema": SQLQueryGeneration.model_json_schema()
+                }
+            }
         )
+        sql_query_generation = SQLQueryGeneration.model_validate(
+            json.loads(response.choices[0].message.content)
+        )
+        # Step 2: Format the structured output
+        step1_output = f"""
+## 🎯 Step 1: Understand User Intent
+**User Query:** "{user_query}"
+**Identified Components:**
+- **Tables:** {', '.join(sql_query_generation.tables_used)}
+- **Query Type:** {sql_query_generation.query_type}
+- **Complexity:** {sql_query_generation.estimated_complexity}
+"""
+        step2_output = f"""
+## 🔧 Step 2: Generate Structured SQL
+```json
+{json.dumps(sql_query_generation.model_dump(), indent=2)}
+```
+**Generated SQL Query:**
+```sql
+{sql_query_generation.query}
+```
+"""
+        # Step 3: Generate sample data
+        conn, cursor, sample_data = generate_sample_data(
+            sql_query_generation.query,
+            sql_query_generation.tables_used
+        )
+        step3_output = f"""
+## 📊 Step 3: Auto-Generate Sample Database Tables
+{format_sample_tables(sample_data)}
+"""
+        # Step 4: Execute query
+        results, columns, error = execute_sql_query(cursor, sql_query_generation.query)
         if error:
+            step4_output = f"""
+## ⚠️ Step 4: SQL Execution
+**Error:** {error}
+**Note:** The query might use database-specific functions. The sample execution uses SQLite.
+"""
         else:
+            step4_output = f"""
+## 🚀 Step 4: Execute Generated SQL on Sample Tables
+**Applied Conditions:**
+{chr(10).join([f"- {note}" for note in sql_query_generation.execution_notes])}
+{format_execution_result(results, columns)}
+**Total Rows Returned:** {len(results)}
+"""
         conn.close()
+        return step1_output, step2_output, step3_output, step4_output
     except Exception as e:
+        error_msg = f"❌ **Error:** {str(e)}"
+        return error_msg, "", "", ""
+# Create Gradio interface
+with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Soft()) as app:
     gr.Markdown("""
+    # 🔍 Natural Language to SQL Query Executor
+    Convert natural language queries to SQL, auto-generate sample data, and execute queries!
     **Example queries to try:**
     - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount"
+    - "Get all employees in the Engineering department with salary above 80000"
+    - "Show top 5 products by price"
     """)
     with gr.Row():
+        with gr.Column():
             api_key_input = gr.Textbox(
                 label="🔑 Groq API Key",
                 placeholder="Enter your Groq API key here...",
+                type="password"
             )
             query_input = gr.Textbox(
                 label="💬 Natural Language Query",
+                placeholder="Enter your query in plain English...",
                 lines=3
             )
             submit_btn = gr.Button("🚀 Generate & Execute SQL", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
+            step1_output = gr.Markdown(label="Step 1: Understanding")
+            step2_output = gr.Markdown(label="Step 2: SQL Generation")
+            step3_output = gr.HTML(label="Step 3: Sample Data")
+            step4_output = gr.HTML(label="Step 4: Execution Results")
     submit_btn.click(
+        fn=process_query,
         inputs=[api_key_input, query_input],
+        outputs=[step1_output, step2_output, step3_output, step4_output]
     )
     gr.Markdown("""
     ---
+    ### 📝 How it works:
+    1. **Understand Intent:** Analyzes your natural language query
+    2. **Generate SQL:** Creates structured SQL with metadata
+    3. **Create Sample Data:** Auto-generates realistic sample tables
+    4. **Execute & Display:** Runs the query and shows results
+    ### 🔗 Get your Groq API key:
+    Visit [console.groq.com](https://console.groq.com) to get your free API key!
     """)
 if __name__ == "__main__":
+    app.launch()