Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from dotenv import load_dotenv | |
| from nl_parser import NaturalLanguageParser | |
| from schema_analyzer import SchemaAnalyzer | |
| from sql_validator import SQLValidator | |
| from rag.template_store import get_template_store | |
| from testing.test_runner import get_test_runner | |
| from export.dbt_exporter import get_dbt_exporter | |
| # Load environment variables | |
| load_dotenv() | |
| # Initialize components | |
| print("Initializing CodeFlow AI - Advanced NL-to-SQL Engine...") | |
| try: | |
| parser = NaturalLanguageParser() | |
| print("β Natural Language Parser ready") | |
| except Exception as e: | |
| print(f"β Error initializing parser: {e}") | |
| print("Make sure your ANTHROPIC_API_KEY is set in .env file!") | |
| exit(1) | |
| schema_analyzer = SchemaAnalyzer() | |
| print("β Schema Analyzer ready") | |
| # Get database schema | |
| DB_SCHEMA = schema_analyzer.get_schema() | |
| print(f"β Loaded schema with {len(DB_SCHEMA)} tables") | |
| # Initialize SQL Validator | |
| validator = SQLValidator(DB_SCHEMA) | |
| print("β SQL Validator ready") | |
| # Initialize RAG Template Store (PRIMARY SPONSOR - LlamaIndex) | |
| try: | |
| template_store = get_template_store() | |
| print("β RAG Template Store ready (LlamaIndex)") | |
| except Exception as e: | |
| print(f"β οΈ Warning: RAG Template Store initialization failed: {e}") | |
| template_store = None | |
| # Initialize Test Runner (SECONDARY SPONSOR - Modal) | |
| try: | |
| test_runner = get_test_runner() | |
| print("β SQL Test Runner ready (Modal/DuckDB)") | |
| except Exception as e: | |
| print(f"β οΈ Warning: Test Runner initialization failed: {e}") | |
| test_runner = None | |
| # Initialize dbt Exporter | |
| try: | |
| dbt_exporter = get_dbt_exporter() | |
| print("β dbt Exporter ready") | |
| except Exception as e: | |
| print(f"β οΈ Warning: dbt Exporter initialization failed: {e}") | |
| dbt_exporter = None | |
| def generate_sql(description, dialect): | |
| """Main function to generate SQL with advanced analysis""" | |
| if not description.strip(): | |
| return "β οΈ Please enter a description of your query.", "", "" | |
| try: | |
| # Generate SQL using enhanced parser | |
| result = parser.generate_sql(description, DB_SCHEMA, dialect) | |
| # Extract components | |
| sql = result.get("sql", "-- No SQL generated") | |
| explanation = result.get("explanation", "") | |
| query_type = result.get("query_type", "unknown") | |
| warnings = result.get("warnings", []) | |
| optimizations = result.get("optimizations", []) | |
| # Validate SQL using sql_validator (NEW: Integration) | |
| validation_result = validator.validate(sql) | |
| validator_suggestions = validator.suggest_optimizations(sql) | |
| # Find similar patterns using RAG (NEW: PRIMARY SPONSOR - LlamaIndex) | |
| similar_patterns = [] | |
| if template_store: | |
| try: | |
| similar_patterns = template_store.find_similar(description, top_k=3) | |
| except Exception as e: | |
| print(f"β οΈ RAG query failed: {e}") | |
| # Build metadata display | |
| metadata_parts = [] | |
| # Query type | |
| metadata_parts.append(f"**Query Type:** {query_type.title()}") | |
| # Explanation | |
| if explanation: | |
| metadata_parts.append(f"\n**Explanation:**\n{explanation}") | |
| # Optimizations applied | |
| if optimizations: | |
| metadata_parts.append("\n**β¨ Optimizations Applied:**") | |
| for opt in optimizations: | |
| metadata_parts.append(f" β’ {opt}") | |
| # Validator suggestions (NEW) | |
| if validator_suggestions: | |
| metadata_parts.append("\n**π‘ Additional Optimization Suggestions:**") | |
| for suggestion in validator_suggestions: | |
| metadata_parts.append(f" β’ {suggestion}") | |
| # Validation errors (NEW) | |
| if validation_result.get("errors"): | |
| metadata_parts.append("\n**β Validation Errors:**") | |
| for error in validation_result["errors"]: | |
| metadata_parts.append(f" β’ {error}") | |
| # Validation warnings (NEW) | |
| if validation_result.get("warnings"): | |
| metadata_parts.append("\n**β οΈ Validation Warnings:**") | |
| for warn in validation_result["warnings"]: | |
| metadata_parts.append(f" β’ {warn}") | |
| # Original warnings from parser | |
| if warnings: | |
| metadata_parts.append("\n**β οΈ Parser Warnings:**") | |
| for warn in warnings: | |
| metadata_parts.append(f" β’ {warn}") | |
| # Similar patterns from RAG (NEW: PRIMARY SPONSOR) | |
| if similar_patterns: | |
| metadata_parts.append("\n**π Similar Patterns (LlamaIndex RAG):**") | |
| for i, pattern in enumerate(similar_patterns, 1): | |
| template_name = pattern.get("template_name", "Unknown") | |
| metadata_parts.append(f" {i}. **{template_name}**") | |
| if pattern.get("excerpt"): | |
| # Show first 2 lines of excerpt | |
| excerpt_lines = pattern["excerpt"].split('\n')[:2] | |
| for line in excerpt_lines: | |
| if line.strip(): | |
| metadata_parts.append(f" `{line.strip()}`") | |
| metadata_text = "\n".join(metadata_parts) | |
| # Format SQL with header | |
| formatted_sql = f"""-- CodeFlow AI - Advanced NL-to-SQL Engine | |
| -- Dialect: {dialect} | |
| -- Query Type: {query_type.title()} | |
| {sql}""" | |
| return formatted_sql, metadata_text, "" | |
| except Exception as e: | |
| error_msg = f"-- Error generating SQL\n-- {str(e)}\n\n-- Please try rephrasing your request." | |
| metadata_msg = f"**β Error:**\n{str(e)}" | |
| return error_msg, metadata_msg, "" | |
| def test_sql_query(sql_code): | |
| """Test SQL query execution (SECONDARY SPONSOR - Modal)""" | |
| if not sql_code or not sql_code.strip(): | |
| return "β οΈ No SQL to test. Generate SQL first." | |
| # Extract just the SQL (remove comments) | |
| sql_lines = [] | |
| for line in sql_code.split('\n'): | |
| stripped = line.strip() | |
| if stripped and not stripped.startswith('--'): | |
| sql_lines.append(line) | |
| clean_sql = '\n'.join(sql_lines).strip() | |
| if not clean_sql: | |
| return "β οΈ No executable SQL found." | |
| if not test_runner: | |
| return "β οΈ Test runner not available. Install Modal or DuckDB." | |
| try: | |
| # Test the SQL | |
| result = test_runner.test_sql(clean_sql) | |
| # Format results | |
| if result["success"]: | |
| output_parts = [] | |
| output_parts.append(f"**β Query Executed Successfully**") | |
| output_parts.append(f"**Execution Method:** {result.get('execution_method', 'Unknown')}") | |
| output_parts.append(f"**Rows Returned:** {result['row_count']}") | |
| if result.get("columns"): | |
| output_parts.append(f"\n**Columns:** {', '.join(result['columns'])}") | |
| if result.get("rows"): | |
| output_parts.append(f"\n**Sample Results (first 10 rows):**") | |
| output_parts.append("```") | |
| for i, row in enumerate(result['rows'][:10], 1): | |
| output_parts.append(f"{i}. {row}") | |
| output_parts.append("```") | |
| if result['row_count'] > 10: | |
| output_parts.append(f"\n_(Showing 10 of {result['row_count']} rows)_") | |
| return "\n".join(output_parts) | |
| else: | |
| return f"**β Query Execution Failed**\n\n**Error:** {result.get('error', 'Unknown error')}" | |
| except Exception as e: | |
| return f"**β Test Error:**\n{str(e)}" | |
| def export_to_dbt(sql_code, model_name, description): | |
| """Export SQL as dbt model with downloadable files""" | |
| if not sql_code or not sql_code.strip(): | |
| return "β οΈ No SQL to export. Generate SQL first.", None | |
| if not model_name or not model_name.strip(): | |
| return "β οΈ Please provide a model name.", None | |
| if not dbt_exporter: | |
| return "β οΈ dbt Exporter not available.", None | |
| try: | |
| # Clean model name (replace spaces with underscores, lowercase) | |
| clean_model_name = model_name.strip().lower().replace(' ', '_').replace('-', '_') | |
| clean_model_name = ''.join(c for c in clean_model_name if c.isalnum() or c == '_') | |
| if not clean_model_name: | |
| return "β οΈ Invalid model name. Use letters, numbers, and underscores only.", None | |
| # Export to dbt | |
| files = dbt_exporter.export_model( | |
| sql=sql_code, | |
| model_name=clean_model_name, | |
| description=description or f"Generated model: {clean_model_name}", | |
| materialization="table", | |
| schema="analytics", | |
| tags="codeflow_generated" | |
| ) | |
| # Create ZIP file | |
| zip_bytes = dbt_exporter.create_zip(files, clean_model_name) | |
| # Create success message | |
| message = f"""**β dbt Model Exported Successfully!** | |
| **Model Name:** `{clean_model_name}` | |
| **Files Generated:** | |
| - `models/{clean_model_name}.sql` - dbt model | |
| - `models/schema.yml` - Documentation and tests | |
| - `README.md` - Usage instructions | |
| **Next Steps:** | |
| 1. Download the ZIP file below | |
| 2. Extract to your dbt project's `models/` directory | |
| 3. Run: `dbt run --select {clean_model_name}` | |
| 4. Test: `dbt test --select {clean_model_name}` | |
| The model is configured as a table in the `analytics` schema. | |
| You can modify the configuration at the top of the model file. | |
| """ | |
| return message, zip_bytes | |
| except Exception as e: | |
| return f"**β Export Error:**\n{str(e)}", None | |
| def create_app(): | |
| """Create Gradio interface""" | |
| with gr.Blocks(title="CodeFlow AI - Advanced NL-to-SQL") as app: | |
| gr.Markdown(""" | |
| # π CodeFlow AI - Advanced Natural Language to SQL Engine | |
| Transform plain English into **production-ready, optimized SQL** instantly! | |
| **Advanced Features:** | |
| - β¨ Automatic query optimization | |
| - π Schema-aware validation | |
| - π Support for CTEs, window functions, and complex analytics | |
| - β οΈ Intelligent warnings and suggestions | |
| - π― Multiple SQL dialect support | |
| - π **RAG-powered similar patterns** (LlamaIndex) | |
| - π‘ **Advanced optimization suggestions** | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Input") | |
| description = gr.Textbox( | |
| label="Describe your data transformation", | |
| placeholder="Example: Find top 5 customers by total order value with running totals", | |
| lines=6 | |
| ) | |
| dialect = gr.Dropdown( | |
| choices=["PostgreSQL", "MySQL", "SQLite", "SQL Server"], | |
| value="PostgreSQL", | |
| label="SQL Dialect" | |
| ) | |
| with gr.Row(): | |
| generate_btn = gr.Button("β¨ Generate SQL", variant="primary", scale=2) | |
| clear_btn = gr.Button("ποΈ Clear", scale=1) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π» Generated SQL") | |
| output = gr.Code( | |
| label="SQL Output", | |
| language="python", | |
| lines=15 | |
| ) | |
| # Test Query button (SECONDARY SPONSOR - Modal) | |
| with gr.Row(): | |
| test_btn = gr.Button("π§ͺ Test Query (Modal)", variant="secondary") | |
| gr.Markdown("### π Query Analysis") | |
| metadata = gr.Markdown( | |
| value="", | |
| label="Analysis" | |
| ) | |
| # Test results section | |
| with gr.Accordion("π§ͺ Test Results", open=False): | |
| test_results = gr.Markdown( | |
| value="Click 'Test Query' to execute the SQL and see results.", | |
| label="Test Output" | |
| ) | |
| # dbt Export section | |
| gr.Markdown("### π¦ Export to dbt") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| model_name_input = gr.Textbox( | |
| label="Model Name", | |
| placeholder="customer_analytics", | |
| value="my_model" | |
| ) | |
| with gr.Column(scale=3): | |
| model_desc_input = gr.Textbox( | |
| label="Model Description", | |
| placeholder="Describe what this model does...", | |
| value="" | |
| ) | |
| export_btn = gr.Button("π¦ Export to dbt", variant="secondary") | |
| with gr.Accordion("π¦ Export Status", open=False): | |
| export_status = gr.Markdown( | |
| value="Configure model details above and click 'Export to dbt' to generate production-ready dbt files.", | |
| label="Export Output" | |
| ) | |
| download_file = gr.File(label="Download dbt Model", visible=False) | |
| # Example queries | |
| gr.Markdown("### π‘ Try These Advanced Examples") | |
| gr.Examples( | |
| examples=[ | |
| ["Find top 10 customers by total order value with their rank"], | |
| ["Show monthly revenue trends with year-over-year comparison"], | |
| ["Get customers who have never placed an order"], | |
| ["Calculate running total of sales by date for each product"], | |
| ["Find the 3 most popular products in each category"], | |
| ["Show customers with above-average order values"], | |
| ["List orders with customer details, handling missing customers gracefully"], | |
| ["Calculate customer cohorts based on first purchase month"], | |
| ["Find products that were ordered in 2023 but not in 2024"], | |
| ["Show daily order count with 7-day moving average"] | |
| ], | |
| inputs=[description] | |
| ) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_sql, | |
| inputs=[description, dialect], | |
| outputs=[output, metadata, gr.Textbox(visible=False)] | |
| ) | |
| test_btn.click( | |
| fn=test_sql_query, | |
| inputs=[output], | |
| outputs=[test_results] | |
| ) | |
| def handle_export(sql_code, model_name, description): | |
| """Handle dbt export and show/hide download button""" | |
| message, zip_bytes = export_to_dbt(sql_code, model_name, description) | |
| if zip_bytes: | |
| # Save to temp file for download | |
| import tempfile | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.zip', mode='wb') as f: | |
| f.write(zip_bytes) | |
| temp_path = f.name | |
| return message, gr.File(value=temp_path, visible=True) | |
| else: | |
| return message, gr.File(visible=False) | |
| export_btn.click( | |
| fn=handle_export, | |
| inputs=[output, model_name_input, model_desc_input], | |
| outputs=[export_status, download_file] | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("", "", "", "Click 'Test Query' to execute the SQL and see results.", | |
| "Configure model details above and click 'Export to dbt' to generate production-ready dbt files.", | |
| gr.File(visible=False)), | |
| outputs=[description, output, metadata, test_results, export_status, download_file] | |
| ) | |
| # Schema display | |
| with gr.Accordion("π Database Schema", open=False): | |
| schema_md = "## Available Tables and Columns\n\n" | |
| for table, columns in DB_SCHEMA.items(): | |
| schema_md += f"### π {table}\n\n" | |
| schema_md += "| Column | Type | Constraints |\n" | |
| schema_md += "|--------|------|-------------|\n" | |
| for col in columns: | |
| constraints = [] | |
| if col.get('primary_key'): | |
| constraints.append("PRIMARY KEY") | |
| if not col.get('nullable', True): | |
| constraints.append("NOT NULL") | |
| constraint_str = ", ".join(constraints) if constraints else "-" | |
| schema_md += f"| {col['name']} | {col['type']} | {constraint_str} |\n" | |
| schema_md += "\n" | |
| gr.Markdown(schema_md) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| **π CodeFlow AI - Advanced NL-to-SQL Engine** | |
| **Core Features:** | |
| - π§ Powered by Claude 3 Opus (Anthropic) | |
| - π **LlamaIndex RAG** - Similar pattern suggestions from template library | |
| - β¨ Automatic query optimization with validation | |
| - π Complex query support (CTEs, Window Functions, Subqueries) | |
| - β οΈ Intelligent error handling and warnings | |
| **Sponsor Integrations:** | |
| - π― **LlamaIndex** - RAG-powered template matching (PRIMARY SPONSOR) | |
| - β‘ **Modal** - Serverless SQL testing (coming soon) | |
| Built for: MCP 1st Birthday Hackathon | |
| """) | |
| return app | |
| if __name__ == "__main__": | |
| print("\n" + "="*50) | |
| print("Starting CodeFlow AI - Advanced NL-to-SQL Engine...") | |
| print("="*50 + "\n") | |
| app = create_app() | |
| app.launch(share=False) |