codeflow-ai / app.py
unknown
Enable RAG without OpenAI by always calling find_similar
246c95b
import gradio as gr
import os
from dotenv import load_dotenv
from nl_parser import NaturalLanguageParser
from schema_analyzer import SchemaAnalyzer
from sql_validator import SQLValidator
from rag.template_store import get_template_store
from testing.test_runner import get_test_runner
from export.dbt_exporter import get_dbt_exporter
# Load environment variables
load_dotenv()
# Initialize components
print("Initializing CodeFlow AI - Advanced NL-to-SQL Engine...")
try:
parser = NaturalLanguageParser()
print("βœ“ Natural Language Parser ready")
except Exception as e:
print(f"βœ— Error initializing parser: {e}")
print("Make sure your ANTHROPIC_API_KEY is set in .env file!")
exit(1)
schema_analyzer = SchemaAnalyzer()
print("βœ“ Schema Analyzer ready")
# Get database schema
DB_SCHEMA = schema_analyzer.get_schema()
print(f"βœ“ Loaded schema with {len(DB_SCHEMA)} tables")
# Initialize SQL Validator
validator = SQLValidator(DB_SCHEMA)
print("βœ“ SQL Validator ready")
# Initialize RAG Template Store (PRIMARY SPONSOR - LlamaIndex)
try:
template_store = get_template_store()
print("βœ“ RAG Template Store ready (LlamaIndex)")
except Exception as e:
print(f"⚠️ Warning: RAG Template Store initialization failed: {e}")
template_store = None
# Initialize Test Runner (SECONDARY SPONSOR - Modal)
try:
test_runner = get_test_runner()
print("βœ“ SQL Test Runner ready (Modal/DuckDB)")
except Exception as e:
print(f"⚠️ Warning: Test Runner initialization failed: {e}")
test_runner = None
# Initialize dbt Exporter
try:
dbt_exporter = get_dbt_exporter()
print("βœ“ dbt Exporter ready")
except Exception as e:
print(f"⚠️ Warning: dbt Exporter initialization failed: {e}")
dbt_exporter = None
def generate_sql(description, dialect):
"""Main function to generate SQL with advanced analysis"""
if not description.strip():
return "⚠️ Please enter a description of your query.", "", ""
try:
# Generate SQL using enhanced parser
result = parser.generate_sql(description, DB_SCHEMA, dialect)
# Extract components
sql = result.get("sql", "-- No SQL generated")
explanation = result.get("explanation", "")
query_type = result.get("query_type", "unknown")
warnings = result.get("warnings", [])
optimizations = result.get("optimizations", [])
# Validate SQL using sql_validator (NEW: Integration)
validation_result = validator.validate(sql)
validator_suggestions = validator.suggest_optimizations(sql)
# Find similar patterns using RAG (NEW: PRIMARY SPONSOR - LlamaIndex)
similar_patterns = []
if template_store:
try:
similar_patterns = template_store.find_similar(description, top_k=3)
except Exception as e:
print(f"⚠️ RAG query failed: {e}")
# Build metadata display
metadata_parts = []
# Query type
metadata_parts.append(f"**Query Type:** {query_type.title()}")
# Explanation
if explanation:
metadata_parts.append(f"\n**Explanation:**\n{explanation}")
# Optimizations applied
if optimizations:
metadata_parts.append("\n**✨ Optimizations Applied:**")
for opt in optimizations:
metadata_parts.append(f" β€’ {opt}")
# Validator suggestions (NEW)
if validator_suggestions:
metadata_parts.append("\n**πŸ’‘ Additional Optimization Suggestions:**")
for suggestion in validator_suggestions:
metadata_parts.append(f" β€’ {suggestion}")
# Validation errors (NEW)
if validation_result.get("errors"):
metadata_parts.append("\n**❌ Validation Errors:**")
for error in validation_result["errors"]:
metadata_parts.append(f" β€’ {error}")
# Validation warnings (NEW)
if validation_result.get("warnings"):
metadata_parts.append("\n**⚠️ Validation Warnings:**")
for warn in validation_result["warnings"]:
metadata_parts.append(f" β€’ {warn}")
# Original warnings from parser
if warnings:
metadata_parts.append("\n**⚠️ Parser Warnings:**")
for warn in warnings:
metadata_parts.append(f" β€’ {warn}")
# Similar patterns from RAG (NEW: PRIMARY SPONSOR)
if similar_patterns:
metadata_parts.append("\n**πŸ” Similar Patterns (LlamaIndex RAG):**")
for i, pattern in enumerate(similar_patterns, 1):
template_name = pattern.get("template_name", "Unknown")
metadata_parts.append(f" {i}. **{template_name}**")
if pattern.get("excerpt"):
# Show first 2 lines of excerpt
excerpt_lines = pattern["excerpt"].split('\n')[:2]
for line in excerpt_lines:
if line.strip():
metadata_parts.append(f" `{line.strip()}`")
metadata_text = "\n".join(metadata_parts)
# Format SQL with header
formatted_sql = f"""-- CodeFlow AI - Advanced NL-to-SQL Engine
-- Dialect: {dialect}
-- Query Type: {query_type.title()}
{sql}"""
return formatted_sql, metadata_text, ""
except Exception as e:
error_msg = f"-- Error generating SQL\n-- {str(e)}\n\n-- Please try rephrasing your request."
metadata_msg = f"**❌ Error:**\n{str(e)}"
return error_msg, metadata_msg, ""
def test_sql_query(sql_code):
"""Test SQL query execution (SECONDARY SPONSOR - Modal)"""
if not sql_code or not sql_code.strip():
return "⚠️ No SQL to test. Generate SQL first."
# Extract just the SQL (remove comments)
sql_lines = []
for line in sql_code.split('\n'):
stripped = line.strip()
if stripped and not stripped.startswith('--'):
sql_lines.append(line)
clean_sql = '\n'.join(sql_lines).strip()
if not clean_sql:
return "⚠️ No executable SQL found."
if not test_runner:
return "⚠️ Test runner not available. Install Modal or DuckDB."
try:
# Test the SQL
result = test_runner.test_sql(clean_sql)
# Format results
if result["success"]:
output_parts = []
output_parts.append(f"**βœ… Query Executed Successfully**")
output_parts.append(f"**Execution Method:** {result.get('execution_method', 'Unknown')}")
output_parts.append(f"**Rows Returned:** {result['row_count']}")
if result.get("columns"):
output_parts.append(f"\n**Columns:** {', '.join(result['columns'])}")
if result.get("rows"):
output_parts.append(f"\n**Sample Results (first 10 rows):**")
output_parts.append("```")
for i, row in enumerate(result['rows'][:10], 1):
output_parts.append(f"{i}. {row}")
output_parts.append("```")
if result['row_count'] > 10:
output_parts.append(f"\n_(Showing 10 of {result['row_count']} rows)_")
return "\n".join(output_parts)
else:
return f"**❌ Query Execution Failed**\n\n**Error:** {result.get('error', 'Unknown error')}"
except Exception as e:
return f"**❌ Test Error:**\n{str(e)}"
def export_to_dbt(sql_code, model_name, description):
"""Export SQL as dbt model with downloadable files"""
if not sql_code or not sql_code.strip():
return "⚠️ No SQL to export. Generate SQL first.", None
if not model_name or not model_name.strip():
return "⚠️ Please provide a model name.", None
if not dbt_exporter:
return "⚠️ dbt Exporter not available.", None
try:
# Clean model name (replace spaces with underscores, lowercase)
clean_model_name = model_name.strip().lower().replace(' ', '_').replace('-', '_')
clean_model_name = ''.join(c for c in clean_model_name if c.isalnum() or c == '_')
if not clean_model_name:
return "⚠️ Invalid model name. Use letters, numbers, and underscores only.", None
# Export to dbt
files = dbt_exporter.export_model(
sql=sql_code,
model_name=clean_model_name,
description=description or f"Generated model: {clean_model_name}",
materialization="table",
schema="analytics",
tags="codeflow_generated"
)
# Create ZIP file
zip_bytes = dbt_exporter.create_zip(files, clean_model_name)
# Create success message
message = f"""**βœ… dbt Model Exported Successfully!**
**Model Name:** `{clean_model_name}`
**Files Generated:**
- `models/{clean_model_name}.sql` - dbt model
- `models/schema.yml` - Documentation and tests
- `README.md` - Usage instructions
**Next Steps:**
1. Download the ZIP file below
2. Extract to your dbt project's `models/` directory
3. Run: `dbt run --select {clean_model_name}`
4. Test: `dbt test --select {clean_model_name}`
The model is configured as a table in the `analytics` schema.
You can modify the configuration at the top of the model file.
"""
return message, zip_bytes
except Exception as e:
return f"**❌ Export Error:**\n{str(e)}", None
def create_app():
"""Create Gradio interface"""
with gr.Blocks(title="CodeFlow AI - Advanced NL-to-SQL") as app:
gr.Markdown("""
# πŸš€ CodeFlow AI - Advanced Natural Language to SQL Engine
Transform plain English into **production-ready, optimized SQL** instantly!
**Advanced Features:**
- ✨ Automatic query optimization
- πŸ” Schema-aware validation
- πŸ“Š Support for CTEs, window functions, and complex analytics
- ⚠️ Intelligent warnings and suggestions
- 🎯 Multiple SQL dialect support
- πŸ” **RAG-powered similar patterns** (LlamaIndex)
- πŸ’‘ **Advanced optimization suggestions**
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Input")
description = gr.Textbox(
label="Describe your data transformation",
placeholder="Example: Find top 5 customers by total order value with running totals",
lines=6
)
dialect = gr.Dropdown(
choices=["PostgreSQL", "MySQL", "SQLite", "SQL Server"],
value="PostgreSQL",
label="SQL Dialect"
)
with gr.Row():
generate_btn = gr.Button("✨ Generate SQL", variant="primary", scale=2)
clear_btn = gr.Button("πŸ—‘οΈ Clear", scale=1)
with gr.Column(scale=2):
gr.Markdown("### πŸ’» Generated SQL")
output = gr.Code(
label="SQL Output",
language="python",
lines=15
)
# Test Query button (SECONDARY SPONSOR - Modal)
with gr.Row():
test_btn = gr.Button("πŸ§ͺ Test Query (Modal)", variant="secondary")
gr.Markdown("### πŸ“Š Query Analysis")
metadata = gr.Markdown(
value="",
label="Analysis"
)
# Test results section
with gr.Accordion("πŸ§ͺ Test Results", open=False):
test_results = gr.Markdown(
value="Click 'Test Query' to execute the SQL and see results.",
label="Test Output"
)
# dbt Export section
gr.Markdown("### πŸ“¦ Export to dbt")
with gr.Row():
with gr.Column(scale=2):
model_name_input = gr.Textbox(
label="Model Name",
placeholder="customer_analytics",
value="my_model"
)
with gr.Column(scale=3):
model_desc_input = gr.Textbox(
label="Model Description",
placeholder="Describe what this model does...",
value=""
)
export_btn = gr.Button("πŸ“¦ Export to dbt", variant="secondary")
with gr.Accordion("πŸ“¦ Export Status", open=False):
export_status = gr.Markdown(
value="Configure model details above and click 'Export to dbt' to generate production-ready dbt files.",
label="Export Output"
)
download_file = gr.File(label="Download dbt Model", visible=False)
# Example queries
gr.Markdown("### πŸ’‘ Try These Advanced Examples")
gr.Examples(
examples=[
["Find top 10 customers by total order value with their rank"],
["Show monthly revenue trends with year-over-year comparison"],
["Get customers who have never placed an order"],
["Calculate running total of sales by date for each product"],
["Find the 3 most popular products in each category"],
["Show customers with above-average order values"],
["List orders with customer details, handling missing customers gracefully"],
["Calculate customer cohorts based on first purchase month"],
["Find products that were ordered in 2023 but not in 2024"],
["Show daily order count with 7-day moving average"]
],
inputs=[description]
)
# Event handlers
generate_btn.click(
fn=generate_sql,
inputs=[description, dialect],
outputs=[output, metadata, gr.Textbox(visible=False)]
)
test_btn.click(
fn=test_sql_query,
inputs=[output],
outputs=[test_results]
)
def handle_export(sql_code, model_name, description):
"""Handle dbt export and show/hide download button"""
message, zip_bytes = export_to_dbt(sql_code, model_name, description)
if zip_bytes:
# Save to temp file for download
import tempfile
with tempfile.NamedTemporaryFile(delete=False, suffix='.zip', mode='wb') as f:
f.write(zip_bytes)
temp_path = f.name
return message, gr.File(value=temp_path, visible=True)
else:
return message, gr.File(visible=False)
export_btn.click(
fn=handle_export,
inputs=[output, model_name_input, model_desc_input],
outputs=[export_status, download_file]
)
clear_btn.click(
fn=lambda: ("", "", "", "Click 'Test Query' to execute the SQL and see results.",
"Configure model details above and click 'Export to dbt' to generate production-ready dbt files.",
gr.File(visible=False)),
outputs=[description, output, metadata, test_results, export_status, download_file]
)
# Schema display
with gr.Accordion("πŸ“‹ Database Schema", open=False):
schema_md = "## Available Tables and Columns\n\n"
for table, columns in DB_SCHEMA.items():
schema_md += f"### πŸ“Š {table}\n\n"
schema_md += "| Column | Type | Constraints |\n"
schema_md += "|--------|------|-------------|\n"
for col in columns:
constraints = []
if col.get('primary_key'):
constraints.append("PRIMARY KEY")
if not col.get('nullable', True):
constraints.append("NOT NULL")
constraint_str = ", ".join(constraints) if constraints else "-"
schema_md += f"| {col['name']} | {col['type']} | {constraint_str} |\n"
schema_md += "\n"
gr.Markdown(schema_md)
gr.Markdown("---")
gr.Markdown("""
**πŸ† CodeFlow AI - Advanced NL-to-SQL Engine**
**Core Features:**
- 🧠 Powered by Claude 3 Opus (Anthropic)
- πŸ” **LlamaIndex RAG** - Similar pattern suggestions from template library
- ✨ Automatic query optimization with validation
- πŸ“Š Complex query support (CTEs, Window Functions, Subqueries)
- ⚠️ Intelligent error handling and warnings
**Sponsor Integrations:**
- 🎯 **LlamaIndex** - RAG-powered template matching (PRIMARY SPONSOR)
- ⚑ **Modal** - Serverless SQL testing (coming soon)
Built for: MCP 1st Birthday Hackathon
""")
return app
if __name__ == "__main__":
print("\n" + "="*50)
print("Starting CodeFlow AI - Advanced NL-to-SQL Engine...")
print("="*50 + "\n")
app = create_app()
app.launch(share=False)