Spaces:

shukdevdattaEX
/

SQLGenie

Paused

App Files Files Community

shukdevdattaEX commited on 26 days ago

Commit

1326ed2

verified ·

1 Parent(s): 850c2bc

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -87

app.py CHANGED Viewed

@@ -3,10 +3,12 @@ from pydantic import BaseModel
 import json
 import gradio as gr
 import pandas as pd
 class ValidationStatus(BaseModel):
     is_valid: bool
     syntax_errors: list[str]
 class SQLQueryGeneration(BaseModel):
     query: str
@@ -19,6 +21,8 @@ class SQLQueryGeneration(BaseModel):
     sample_data: str
     execution_results: str
     optimization_notes: list[str]
 def parse_execution_results_to_dataframe(execution_results):
     """Convert text-based table results to pandas DataFrame"""
@@ -27,11 +31,9 @@ def parse_execution_results_to_dataframe(execution_results):
         if len(lines) < 3:
             return None
-        # Extract header
         header_line = lines[0]
         headers = [col.strip() for col in header_line.split('|')]
-        # Extract data rows (skip separator line)
         data_rows = []
         for line in lines[2:]:
             if line.strip() and not line.strip().startswith('-'):
@@ -47,49 +49,73 @@ def parse_execution_results_to_dataframe(execution_results):
         print(f"Error parsing results: {e}")
         return None
-def generate_sql_query(api_key, user_query):
     """Generate SQL query from natural language using GROQ API"""
     try:
         if not api_key:
-            return "Error: Please enter your GROQ API key", "", "", "", None, ""
         if not user_query:
-            return "Error: Please enter a query description", "", "", "", None, ""
         client = Groq(api_key=api_key)
-        response = client.chat.completions.create(
-            model="moonshotai/kimi-k2-instruct-0905",
-            messages=[
-                {
-                    "role": "system",
-                    "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
-After generating the SQL query, you must:
-1. Create a sample SQL table schema based on the natural language description, including all necessary columns with appropriate data types
-2. Populate the table with realistic sample data that demonstrates the query's functionality
-3. Execute the generated SQL query against the sample table
-4. Display the SQL table structure and data clearly
-5. Show the query execution results in a pipe-delimited table format
-IMPORTANT: The execution_results field must contain a properly formatted table with:
-- Header row with column names separated by pipes (|)
-- A separator row with dashes
-- Data rows with values separated by pipes (|)
-Example format:
 column1 | column2 | column3
 --------|---------|--------
 value1  | value2  | value3
-value4  | value5  | value6
-Always present your response in this order:
-- Generated SQL query with syntax explanation
-- Table schema (CREATE TABLE statement)
-- Sample data (INSERT statements or table visualization)
-- Query execution results (in pipe-delimited table format)
-- Any relevant notes about assumptions made or query optimization suggestions""",
-                },
-                {
-                    "role": "user",
-                    "content": user_query
-                },
             ],
             response_format={
                 "type": "json_schema",
@@ -104,23 +130,43 @@ Always present your response in this order:
             json.loads(response.choices[0].message.content)
         )
-        # Format validation status
-        validation_text = f"Valid: {sql_query_generation.validation_status.is_valid}\n"
         if sql_query_generation.validation_status.syntax_errors:
-            validation_text += "Errors:\n" + "\n".join(
-                f"- {error}" for error in sql_query_generation.validation_status.syntax_errors
             )
         else:
-            validation_text += "No syntax errors found"
         # Format metadata
-        metadata = f"""Query Type: {sql_query_generation.query_type}
-Tables Used: {', '.join(sql_query_generation.tables_used)}
-Complexity: {sql_query_generation.estimated_complexity}
-Execution Notes:
-{chr(10).join(f"- {note}" for note in sql_query_generation.execution_notes)}
-Optimization Notes:
-{chr(10).join(f"- {note}" for note in sql_query_generation.optimization_notes)}"""
         # Convert execution results to DataFrame
         results_df = parse_execution_results_to_dataframe(sql_query_generation.execution_results)
@@ -131,50 +177,96 @@ Optimization Notes:
             sql_query_generation.table_schema,
             sql_query_generation.sample_data,
             results_df,
-            validation_text
         )
     except Exception as e:
-        error_msg = f"Error: {str(e)}"
-        return error_msg, "", "", "", None, ""
-# Create Gradio interface
-with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
     gr.Markdown(
         """
-        # 🗄️ Natural Language to SQL Query Generator
-        Convert your natural language descriptions into structured SQL queries with validation and execution results.
         """
     )
     with gr.Row():
-        with gr.Column():
             api_key_input = gr.Textbox(
-                label="GROQ API Key",
                 type="password",
                 placeholder="Enter your GROQ API key here...",
                 info="Your API key is not stored and only used for this session"
             )
             query_input = gr.Textbox(
                 label="Natural Language Query",
-                placeholder="e.g., Find all the students who scored more than 90 out of 100",
-                lines=3,
                 value="Find all the students who scored more than 90 out of 100"
             )
-            generate_btn = gr.Button("Generate SQL Query", variant="primary", size="lg")
             gr.Examples(
                 examples=[
                     ["Find all the students who scored more than 90 out of 100"],
-                    ["Get the top 5 customers by total purchase amount"],
-                    ["List all employees hired in the last 6 months"],
-                    ["Find products with price between $50 and $100"],
-                    ["Show average salary by department"]
                 ],
                 inputs=query_input,
-                label="Example Queries"
             )
     with gr.Row():
@@ -182,32 +274,40 @@ with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
             sql_output = gr.Code(
                 label="Generated SQL Query",
                 language="sql",
-                lines=5
             )
-            metadata_output = gr.Textbox(
-                label="Query Metadata",
-                lines=8
             )
-            validation_output = gr.Textbox(
-                label="Validation Status",
-                lines=3
-            )
     with gr.Row():
         with gr.Column():
             schema_output = gr.Code(
-                label="Table Schema",
                 language="sql",
-                lines=8
             )
         with gr.Column():
             sample_data_output = gr.Code(
-                label="Sample Data",
                 language="sql",
-                lines=8
             )
     with gr.Row():
@@ -221,32 +321,62 @@ with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
             interactive=False
         )
     generate_btn.click(
         fn=generate_sql_query,
-        inputs=[api_key_input, query_input],
         outputs=[
             sql_output,
             metadata_output,
             schema_output,
             sample_data_output,
             execution_output,
-            validation_output
         ]
     )
     gr.Markdown(
         """
         ---
-        ### How to use:
-        1. Enter your GROQ API key (get one from [console.groq.com](https://console.groq.com))
-        2. Type your natural language query description
-        3. Click "Generate SQL Query" to see the results
-        The app will provide:
-        - A validated SQL query
-        - Table schema and sample data
-        - Execution results in Excel-style table format
-        - Optimization suggestions
         """
     )

 import json
 import gradio as gr
 import pandas as pd
+from datetime import datetime
 class ValidationStatus(BaseModel):
     is_valid: bool
     syntax_errors: list[str]
+    warnings: list[str] = []
 class SQLQueryGeneration(BaseModel):
     query: str
     sample_data: str
     execution_results: str
     optimization_notes: list[str]
+    explanation: str = ""
+    alternative_queries: list[str] = []
 def parse_execution_results_to_dataframe(execution_results):
     """Convert text-based table results to pandas DataFrame"""
         if len(lines) < 3:
             return None
         header_line = lines[0]
         headers = [col.strip() for col in header_line.split('|')]
         data_rows = []
         for line in lines[2:]:
             if line.strip() and not line.strip().startswith('-'):
         print(f"Error parsing results: {e}")
         return None
+def generate_sql_query(api_key, user_query, sql_dialect, include_comments, complexity_level):
     """Generate SQL query from natural language using GROQ API"""
     try:
         if not api_key:
+            return "Error: Please enter your GROQ API key", "", "", "", None, "", "", ""
         if not user_query:
+            return "Error: Please enter a query description", "", "", "", None, "", "", ""
         client = Groq(api_key=api_key)
+        # Enhanced system prompt
+        system_prompt = f"""You are an expert SQL database architect and query optimizer. Generate production-ready SQL queries from natural language descriptions.
+**SQL Dialect**: {sql_dialect}
+**Include Comments**: {include_comments}
+**Target Complexity**: {complexity_level}
+## Core Requirements:
+1. Generate syntactically correct {sql_dialect} queries
+2. Follow {sql_dialect}-specific best practices and conventions
+3. Use proper indexing hints where applicable
+4. Include CTEs (Common Table Expressions) for complex queries when appropriate
+5. Add inline comments explaining complex logic (if comments enabled)
+6. Consider query performance and optimization
+## Security Guidelines:
+- Use parameterized query patterns (e.g., WHERE id = $1)
+- Never include actual sensitive data in examples
+- Validate that queries follow principle of least privilege
+- Flag any potential SQL injection vulnerabilities
+## Query Analysis:
+- Identify query type (SELECT, INSERT, UPDATE, DELETE, etc.)
+- Estimate complexity (Simple, Moderate, Complex, Advanced)
+- List all tables and joins involved
+- Provide optimization suggestions (indexes, query rewriting, etc.)
+- Warn about potential performance issues (N+1 queries, missing indexes, etc.)
+## Sample Data Requirements:
+1. Create realistic table schemas with appropriate:
+   - Primary keys and foreign keys
+   - Indexes on commonly queried columns
+   - Constraints (NOT NULL, UNIQUE, CHECK)
+   - Appropriate data types for {sql_dialect}
+2. Generate 5-10 rows of realistic sample data
+3. Execute the query against sample data
+4. Show results in pipe-delimited table format:
 column1 | column2 | column3
 --------|---------|--------
 value1  | value2  | value3
+## Additional Features:
+- Provide a clear explanation of what the query does
+- Suggest 1-2 alternative approaches if applicable
+- Include execution notes about assumptions made
+- List optimization opportunities
+## Response Structure:
+Return a complete JSON object with all fields populated, including explanation and alternative_queries arrays."""
+        response = client.chat.completions.create(
+            model="moonshotai/kimi-k2-instruct-0905",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_query},
             ],
             response_format={
                 "type": "json_schema",
             json.loads(response.choices[0].message.content)
         )
+        # Format validation status with warnings
+        validation_text = f"✓ Valid: {sql_query_generation.validation_status.is_valid}\n"
         if sql_query_generation.validation_status.syntax_errors:
+            validation_text += "\n❌ Errors:\n" + "\n".join(
+                f"  • {error}" for error in sql_query_generation.validation_status.syntax_errors
             )
         else:
+            validation_text += "✓ No syntax errors found"
+        if sql_query_generation.validation_status.warnings:
+            validation_text += "\n\n⚠️ Warnings:\n" + "\n".join(
+                f"  • {warning}" for warning in sql_query_generation.validation_status.warnings
+            )
         # Format metadata
+        metadata = f"""📊 Query Type: {sql_query_generation.query_type}
+📁 Tables Used: {', '.join(sql_query_generation.tables_used)}
+⚡ Complexity: {sql_query_generation.estimated_complexity}
+🔍 Execution Notes:
+{chr(10).join(f"  • {note}" for note in sql_query_generation.execution_notes)}
+⚙️ Optimization Notes:
+{chr(10).join(f"  • {note}" for note in sql_query_generation.optimization_notes)}"""
+        # Format explanation
+        explanation = sql_query_generation.explanation or "No explanation provided"
+        # Format alternative queries
+        alternatives = ""
+        if sql_query_generation.alternative_queries:
+            alternatives = "\n\n".join(
+                f"Alternative {i+1}:\n{query}"
+                for i, query in enumerate(sql_query_generation.alternative_queries)
+            )
+        else:
+            alternatives = "No alternative approaches suggested"
         # Convert execution results to DataFrame
         results_df = parse_execution_results_to_dataframe(sql_query_generation.execution_results)
             sql_query_generation.table_schema,
             sql_query_generation.sample_data,
             results_df,
+            validation_text,
+            explanation,
+            alternatives
         )
     except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        return error_msg, "", "", "", None, "", "", ""
+def export_query(sql_query, schema, sample_data):
+    """Export query with schema and sample data as a complete SQL file"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    content = f"""-- Generated SQL Query
+-- Timestamp: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+--
+{'-' * 60}
+-- TABLE SCHEMA
+{schema}
+{'-' * 60}
+-- SAMPLE DATA
+{sample_data}
+{'-' * 60}
+-- QUERY
+{sql_query}
+"""
+    return content
+# Create Gradio interface with enhanced features
+with gr.Blocks(title="SQL Query Generator Pro", theme=gr.themes.Ocean()) as demo:
     gr.Markdown(
         """
+        # 🗄️ Natural Language to SQL Query Generator Pro
+        Convert natural language descriptions into production-ready SQL queries with validation, optimization, and execution results.
         """
     )
     with gr.Row():
+        with gr.Column(scale=1):
             api_key_input = gr.Textbox(
+                label="🔑 GROQ API Key",
                 type="password",
                 placeholder="Enter your GROQ API key here...",
                 info="Your API key is not stored and only used for this session"
             )
+            sql_dialect = gr.Dropdown(
+                label="SQL Dialect",
+                choices=["PostgreSQL", "MySQL", "SQLite", "SQL Server", "Oracle"],
+                value="PostgreSQL",
+                info="Select your target database system"
+            )
+            with gr.Row():
+                include_comments = gr.Checkbox(
+                    label="Include inline comments",
+                    value=True
+                )
+                complexity_level = gr.Radio(
+                    label="Target Complexity",
+                    choices=["Simple", "Moderate", "Advanced"],
+                    value="Moderate"
+                )
             query_input = gr.Textbox(
                 label="Natural Language Query",
+                placeholder="e.g., Find all customers who made purchases over $1000 in the last quarter and group by region",
+                lines=4,
                 value="Find all the students who scored more than 90 out of 100"
             )
+            generate_btn = gr.Button("🚀 Generate SQL Query", variant="primary", size="lg")
             gr.Examples(
                 examples=[
                     ["Find all the students who scored more than 90 out of 100"],
+                    ["Get the top 5 customers by total purchase amount with their contact info"],
+                    ["List all employees hired in the last 6 months with their department and salary"],
+                    ["Find products with price between $50 and $100 ordered by popularity"],
+                    ["Show average salary by department with employee count"],
+                    ["Get customers who haven't made a purchase in the last 90 days"],
+                    ["Find duplicate email addresses in the users table"],
+                    ["Calculate running total of sales by date for each product category"]
                 ],
                 inputs=query_input,
+                label="📚 Example Queries"
             )
     with gr.Row():
             sql_output = gr.Code(
                 label="Generated SQL Query",
                 language="sql",
+                lines=10
             )
+            explanation_output = gr.Textbox(
+                label="📖 Query Explanation",
+                lines=4
             )
+            with gr.Row():
+                with gr.Column():
+                    metadata_output = gr.Textbox(
+                        label="📊 Query Metadata",
+                        lines=10
+                    )
+                with gr.Column():
+                    validation_output = gr.Textbox(
+                        label="✅ Validation Status",
+                        lines=10
+                    )
     with gr.Row():
         with gr.Column():
             schema_output = gr.Code(
+                label="🏗️ Table Schema",
                 language="sql",
+                lines=10
             )
         with gr.Column():
             sample_data_output = gr.Code(
+                label="📝 Sample Data",
                 language="sql",
+                lines=10
             )
     with gr.Row():
             interactive=False
         )
+    with gr.Row():
+        alternatives_output = gr.Code(
+            label="🔄 Alternative Query Approaches",
+            language="sql",
+            lines=8
+        )
+    with gr.Row():
+        export_btn = gr.Button("💾 Export Complete SQL File", variant="secondary")
+        export_output = gr.File(label="Download SQL File")
     generate_btn.click(
         fn=generate_sql_query,
+        inputs=[api_key_input, query_input, sql_dialect, include_comments, complexity_level],
         outputs=[
             sql_output,
             metadata_output,
             schema_output,
             sample_data_output,
             execution_output,
+            validation_output,
+            explanation_output,
+            alternatives_output
         ]
     )
+    export_btn.click(
+        fn=export_query,
+        inputs=[sql_output, schema_output, sample_data_output],
+        outputs=export_output
+    )
     gr.Markdown(
         """
         ---
+        ### 📋 How to use:
+        1. **API Key**: Enter your GROQ API key (get one from [console.groq.com](https://console.groq.com))
+        2. **Configure**: Select your SQL dialect and preferences
+        3. **Query**: Type your natural language description
+        4. **Generate**: Click the button to get your SQL query
+        5. **Export**: Download the complete SQL file with schema and sample data
+        ### ✨ Features:
+        - ✅ Multi-dialect SQL support (PostgreSQL, MySQL, SQLite, SQL Server, Oracle)
+        - 🔍 Syntax validation with warnings
+        - ⚡ Performance optimization suggestions
+        - 📊 Live query execution with sample data
+        - 🔄 Alternative query approaches
+        - 📖 Clear explanations of query logic
+        - 💾 Export complete SQL files
+        - 🎯 Complexity level control
+        ### 🔒 Security:
+        - Your API key is never stored
+        - Queries use parameterized patterns
+        - No sensitive data in examples
         """
     )