shukdevdattaEX commited on
Commit
1326ed2
Β·
verified Β·
1 Parent(s): 850c2bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -87
app.py CHANGED
@@ -3,10 +3,12 @@ from pydantic import BaseModel
3
  import json
4
  import gradio as gr
5
  import pandas as pd
 
6
 
7
  class ValidationStatus(BaseModel):
8
  is_valid: bool
9
  syntax_errors: list[str]
 
10
 
11
  class SQLQueryGeneration(BaseModel):
12
  query: str
@@ -19,6 +21,8 @@ class SQLQueryGeneration(BaseModel):
19
  sample_data: str
20
  execution_results: str
21
  optimization_notes: list[str]
 
 
22
 
23
  def parse_execution_results_to_dataframe(execution_results):
24
  """Convert text-based table results to pandas DataFrame"""
@@ -27,11 +31,9 @@ def parse_execution_results_to_dataframe(execution_results):
27
  if len(lines) < 3:
28
  return None
29
 
30
- # Extract header
31
  header_line = lines[0]
32
  headers = [col.strip() for col in header_line.split('|')]
33
 
34
- # Extract data rows (skip separator line)
35
  data_rows = []
36
  for line in lines[2:]:
37
  if line.strip() and not line.strip().startswith('-'):
@@ -47,49 +49,73 @@ def parse_execution_results_to_dataframe(execution_results):
47
  print(f"Error parsing results: {e}")
48
  return None
49
 
50
- def generate_sql_query(api_key, user_query):
51
  """Generate SQL query from natural language using GROQ API"""
52
  try:
53
  if not api_key:
54
- return "Error: Please enter your GROQ API key", "", "", "", None, ""
55
 
56
  if not user_query:
57
- return "Error: Please enter a query description", "", "", "", None, ""
58
 
59
  client = Groq(api_key=api_key)
60
 
61
- response = client.chat.completions.create(
62
- model="moonshotai/kimi-k2-instruct-0905",
63
- messages=[
64
- {
65
- "role": "system",
66
- "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
67
- After generating the SQL query, you must:
68
- 1. Create a sample SQL table schema based on the natural language description, including all necessary columns with appropriate data types
69
- 2. Populate the table with realistic sample data that demonstrates the query's functionality
70
- 3. Execute the generated SQL query against the sample table
71
- 4. Display the SQL table structure and data clearly
72
- 5. Show the query execution results in a pipe-delimited table format
73
- IMPORTANT: The execution_results field must contain a properly formatted table with:
74
- - Header row with column names separated by pipes (|)
75
- - A separator row with dashes
76
- - Data rows with values separated by pipes (|)
77
- Example format:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  column1 | column2 | column3
79
  --------|---------|--------
80
  value1 | value2 | value3
81
- value4 | value5 | value6
82
- Always present your response in this order:
83
- - Generated SQL query with syntax explanation
84
- - Table schema (CREATE TABLE statement)
85
- - Sample data (INSERT statements or table visualization)
86
- - Query execution results (in pipe-delimited table format)
87
- - Any relevant notes about assumptions made or query optimization suggestions""",
88
- },
89
- {
90
- "role": "user",
91
- "content": user_query
92
- },
 
 
 
93
  ],
94
  response_format={
95
  "type": "json_schema",
@@ -104,23 +130,43 @@ Always present your response in this order:
104
  json.loads(response.choices[0].message.content)
105
  )
106
 
107
- # Format validation status
108
- validation_text = f"Valid: {sql_query_generation.validation_status.is_valid}\n"
109
  if sql_query_generation.validation_status.syntax_errors:
110
- validation_text += "Errors:\n" + "\n".join(
111
- f"- {error}" for error in sql_query_generation.validation_status.syntax_errors
112
  )
113
  else:
114
- validation_text += "No syntax errors found"
 
 
 
 
 
115
 
116
  # Format metadata
117
- metadata = f"""Query Type: {sql_query_generation.query_type}
118
- Tables Used: {', '.join(sql_query_generation.tables_used)}
119
- Complexity: {sql_query_generation.estimated_complexity}
120
- Execution Notes:
121
- {chr(10).join(f"- {note}" for note in sql_query_generation.execution_notes)}
122
- Optimization Notes:
123
- {chr(10).join(f"- {note}" for note in sql_query_generation.optimization_notes)}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  # Convert execution results to DataFrame
126
  results_df = parse_execution_results_to_dataframe(sql_query_generation.execution_results)
@@ -131,50 +177,96 @@ Optimization Notes:
131
  sql_query_generation.table_schema,
132
  sql_query_generation.sample_data,
133
  results_df,
134
- validation_text
 
 
135
  )
136
 
137
  except Exception as e:
138
- error_msg = f"Error: {str(e)}"
139
- return error_msg, "", "", "", None, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- # Create Gradio interface
142
- with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
 
 
 
 
 
143
  gr.Markdown(
144
  """
145
- # πŸ—„οΈ Natural Language to SQL Query Generator
146
- Convert your natural language descriptions into structured SQL queries with validation and execution results.
147
  """
148
  )
149
 
150
  with gr.Row():
151
- with gr.Column():
152
  api_key_input = gr.Textbox(
153
- label="GROQ API Key",
154
  type="password",
155
  placeholder="Enter your GROQ API key here...",
156
  info="Your API key is not stored and only used for this session"
157
  )
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  query_input = gr.Textbox(
160
  label="Natural Language Query",
161
- placeholder="e.g., Find all the students who scored more than 90 out of 100",
162
- lines=3,
163
  value="Find all the students who scored more than 90 out of 100"
164
  )
165
 
166
- generate_btn = gr.Button("Generate SQL Query", variant="primary", size="lg")
167
 
168
  gr.Examples(
169
  examples=[
170
  ["Find all the students who scored more than 90 out of 100"],
171
- ["Get the top 5 customers by total purchase amount"],
172
- ["List all employees hired in the last 6 months"],
173
- ["Find products with price between $50 and $100"],
174
- ["Show average salary by department"]
 
 
 
175
  ],
176
  inputs=query_input,
177
- label="Example Queries"
178
  )
179
 
180
  with gr.Row():
@@ -182,32 +274,40 @@ with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
182
  sql_output = gr.Code(
183
  label="Generated SQL Query",
184
  language="sql",
185
- lines=5
186
  )
187
 
188
- metadata_output = gr.Textbox(
189
- label="Query Metadata",
190
- lines=8
191
  )
192
 
193
- validation_output = gr.Textbox(
194
- label="Validation Status",
195
- lines=3
196
- )
 
 
 
 
 
 
 
 
197
 
198
  with gr.Row():
199
  with gr.Column():
200
  schema_output = gr.Code(
201
- label="Table Schema",
202
  language="sql",
203
- lines=8
204
  )
205
 
206
  with gr.Column():
207
  sample_data_output = gr.Code(
208
- label="Sample Data",
209
  language="sql",
210
- lines=8
211
  )
212
 
213
  with gr.Row():
@@ -221,32 +321,62 @@ with gr.Blocks(title="SQL Query Generator", theme=gr.themes.Ocean()) as demo:
221
  interactive=False
222
  )
223
 
 
 
 
 
 
 
 
 
 
 
 
224
  generate_btn.click(
225
  fn=generate_sql_query,
226
- inputs=[api_key_input, query_input],
227
  outputs=[
228
  sql_output,
229
  metadata_output,
230
  schema_output,
231
  sample_data_output,
232
  execution_output,
233
- validation_output
 
 
234
  ]
235
  )
236
 
 
 
 
 
 
 
237
  gr.Markdown(
238
  """
239
  ---
240
- ### How to use:
241
- 1. Enter your GROQ API key (get one from [console.groq.com](https://console.groq.com))
242
- 2. Type your natural language query description
243
- 3. Click "Generate SQL Query" to see the results
244
-
245
- The app will provide:
246
- - A validated SQL query
247
- - Table schema and sample data
248
- - Execution results in Excel-style table format
249
- - Optimization suggestions
 
 
 
 
 
 
 
 
 
 
 
250
  """
251
  )
252
 
 
3
  import json
4
  import gradio as gr
5
  import pandas as pd
6
+ from datetime import datetime
7
 
8
  class ValidationStatus(BaseModel):
9
  is_valid: bool
10
  syntax_errors: list[str]
11
+ warnings: list[str] = []
12
 
13
  class SQLQueryGeneration(BaseModel):
14
  query: str
 
21
  sample_data: str
22
  execution_results: str
23
  optimization_notes: list[str]
24
+ explanation: str = ""
25
+ alternative_queries: list[str] = []
26
 
27
  def parse_execution_results_to_dataframe(execution_results):
28
  """Convert text-based table results to pandas DataFrame"""
 
31
  if len(lines) < 3:
32
  return None
33
 
 
34
  header_line = lines[0]
35
  headers = [col.strip() for col in header_line.split('|')]
36
 
 
37
  data_rows = []
38
  for line in lines[2:]:
39
  if line.strip() and not line.strip().startswith('-'):
 
49
  print(f"Error parsing results: {e}")
50
  return None
51
 
52
+ def generate_sql_query(api_key, user_query, sql_dialect, include_comments, complexity_level):
53
  """Generate SQL query from natural language using GROQ API"""
54
  try:
55
  if not api_key:
56
+ return "Error: Please enter your GROQ API key", "", "", "", None, "", "", ""
57
 
58
  if not user_query:
59
+ return "Error: Please enter a query description", "", "", "", None, "", "", ""
60
 
61
  client = Groq(api_key=api_key)
62
 
63
+ # Enhanced system prompt
64
+ system_prompt = f"""You are an expert SQL database architect and query optimizer. Generate production-ready SQL queries from natural language descriptions.
65
+
66
+ **SQL Dialect**: {sql_dialect}
67
+ **Include Comments**: {include_comments}
68
+ **Target Complexity**: {complexity_level}
69
+
70
+ ## Core Requirements:
71
+ 1. Generate syntactically correct {sql_dialect} queries
72
+ 2. Follow {sql_dialect}-specific best practices and conventions
73
+ 3. Use proper indexing hints where applicable
74
+ 4. Include CTEs (Common Table Expressions) for complex queries when appropriate
75
+ 5. Add inline comments explaining complex logic (if comments enabled)
76
+ 6. Consider query performance and optimization
77
+
78
+ ## Security Guidelines:
79
+ - Use parameterized query patterns (e.g., WHERE id = $1)
80
+ - Never include actual sensitive data in examples
81
+ - Validate that queries follow principle of least privilege
82
+ - Flag any potential SQL injection vulnerabilities
83
+
84
+ ## Query Analysis:
85
+ - Identify query type (SELECT, INSERT, UPDATE, DELETE, etc.)
86
+ - Estimate complexity (Simple, Moderate, Complex, Advanced)
87
+ - List all tables and joins involved
88
+ - Provide optimization suggestions (indexes, query rewriting, etc.)
89
+ - Warn about potential performance issues (N+1 queries, missing indexes, etc.)
90
+
91
+ ## Sample Data Requirements:
92
+ 1. Create realistic table schemas with appropriate:
93
+ - Primary keys and foreign keys
94
+ - Indexes on commonly queried columns
95
+ - Constraints (NOT NULL, UNIQUE, CHECK)
96
+ - Appropriate data types for {sql_dialect}
97
+ 2. Generate 5-10 rows of realistic sample data
98
+ 3. Execute the query against sample data
99
+ 4. Show results in pipe-delimited table format:
100
+
101
  column1 | column2 | column3
102
  --------|---------|--------
103
  value1 | value2 | value3
104
+
105
+ ## Additional Features:
106
+ - Provide a clear explanation of what the query does
107
+ - Suggest 1-2 alternative approaches if applicable
108
+ - Include execution notes about assumptions made
109
+ - List optimization opportunities
110
+
111
+ ## Response Structure:
112
+ Return a complete JSON object with all fields populated, including explanation and alternative_queries arrays."""
113
+
114
+ response = client.chat.completions.create(
115
+ model="moonshotai/kimi-k2-instruct-0905",
116
+ messages=[
117
+ {"role": "system", "content": system_prompt},
118
+ {"role": "user", "content": user_query},
119
  ],
120
  response_format={
121
  "type": "json_schema",
 
130
  json.loads(response.choices[0].message.content)
131
  )
132
 
133
+ # Format validation status with warnings
134
+ validation_text = f"βœ“ Valid: {sql_query_generation.validation_status.is_valid}\n"
135
  if sql_query_generation.validation_status.syntax_errors:
136
+ validation_text += "\n❌ Errors:\n" + "\n".join(
137
+ f" β€’ {error}" for error in sql_query_generation.validation_status.syntax_errors
138
  )
139
  else:
140
+ validation_text += "βœ“ No syntax errors found"
141
+
142
+ if sql_query_generation.validation_status.warnings:
143
+ validation_text += "\n\n⚠️ Warnings:\n" + "\n".join(
144
+ f" β€’ {warning}" for warning in sql_query_generation.validation_status.warnings
145
+ )
146
 
147
  # Format metadata
148
+ metadata = f"""πŸ“Š Query Type: {sql_query_generation.query_type}
149
+ πŸ“ Tables Used: {', '.join(sql_query_generation.tables_used)}
150
+ ⚑ Complexity: {sql_query_generation.estimated_complexity}
151
+
152
+ πŸ” Execution Notes:
153
+ {chr(10).join(f" β€’ {note}" for note in sql_query_generation.execution_notes)}
154
+
155
+ βš™οΈ Optimization Notes:
156
+ {chr(10).join(f" β€’ {note}" for note in sql_query_generation.optimization_notes)}"""
157
+
158
+ # Format explanation
159
+ explanation = sql_query_generation.explanation or "No explanation provided"
160
+
161
+ # Format alternative queries
162
+ alternatives = ""
163
+ if sql_query_generation.alternative_queries:
164
+ alternatives = "\n\n".join(
165
+ f"Alternative {i+1}:\n{query}"
166
+ for i, query in enumerate(sql_query_generation.alternative_queries)
167
+ )
168
+ else:
169
+ alternatives = "No alternative approaches suggested"
170
 
171
  # Convert execution results to DataFrame
172
  results_df = parse_execution_results_to_dataframe(sql_query_generation.execution_results)
 
177
  sql_query_generation.table_schema,
178
  sql_query_generation.sample_data,
179
  results_df,
180
+ validation_text,
181
+ explanation,
182
+ alternatives
183
  )
184
 
185
  except Exception as e:
186
+ error_msg = f"❌ Error: {str(e)}"
187
+ return error_msg, "", "", "", None, "", "", ""
188
+
189
+ def export_query(sql_query, schema, sample_data):
190
+ """Export query with schema and sample data as a complete SQL file"""
191
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
192
+ content = f"""-- Generated SQL Query
193
+ -- Timestamp: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
194
+ --
195
+ {'-' * 60}
196
+
197
+ -- TABLE SCHEMA
198
+ {schema}
199
+
200
+ {'-' * 60}
201
+
202
+ -- SAMPLE DATA
203
+ {sample_data}
204
+
205
+ {'-' * 60}
206
 
207
+ -- QUERY
208
+ {sql_query}
209
+ """
210
+ return content
211
+
212
+ # Create Gradio interface with enhanced features
213
+ with gr.Blocks(title="SQL Query Generator Pro", theme=gr.themes.Ocean()) as demo:
214
  gr.Markdown(
215
  """
216
+ # πŸ—„οΈ Natural Language to SQL Query Generator Pro
217
+ Convert natural language descriptions into production-ready SQL queries with validation, optimization, and execution results.
218
  """
219
  )
220
 
221
  with gr.Row():
222
+ with gr.Column(scale=1):
223
  api_key_input = gr.Textbox(
224
+ label="πŸ”‘ GROQ API Key",
225
  type="password",
226
  placeholder="Enter your GROQ API key here...",
227
  info="Your API key is not stored and only used for this session"
228
  )
229
 
230
+ sql_dialect = gr.Dropdown(
231
+ label="SQL Dialect",
232
+ choices=["PostgreSQL", "MySQL", "SQLite", "SQL Server", "Oracle"],
233
+ value="PostgreSQL",
234
+ info="Select your target database system"
235
+ )
236
+
237
+ with gr.Row():
238
+ include_comments = gr.Checkbox(
239
+ label="Include inline comments",
240
+ value=True
241
+ )
242
+ complexity_level = gr.Radio(
243
+ label="Target Complexity",
244
+ choices=["Simple", "Moderate", "Advanced"],
245
+ value="Moderate"
246
+ )
247
+
248
  query_input = gr.Textbox(
249
  label="Natural Language Query",
250
+ placeholder="e.g., Find all customers who made purchases over $1000 in the last quarter and group by region",
251
+ lines=4,
252
  value="Find all the students who scored more than 90 out of 100"
253
  )
254
 
255
+ generate_btn = gr.Button("πŸš€ Generate SQL Query", variant="primary", size="lg")
256
 
257
  gr.Examples(
258
  examples=[
259
  ["Find all the students who scored more than 90 out of 100"],
260
+ ["Get the top 5 customers by total purchase amount with their contact info"],
261
+ ["List all employees hired in the last 6 months with their department and salary"],
262
+ ["Find products with price between $50 and $100 ordered by popularity"],
263
+ ["Show average salary by department with employee count"],
264
+ ["Get customers who haven't made a purchase in the last 90 days"],
265
+ ["Find duplicate email addresses in the users table"],
266
+ ["Calculate running total of sales by date for each product category"]
267
  ],
268
  inputs=query_input,
269
+ label="πŸ“š Example Queries"
270
  )
271
 
272
  with gr.Row():
 
274
  sql_output = gr.Code(
275
  label="Generated SQL Query",
276
  language="sql",
277
+ lines=10
278
  )
279
 
280
+ explanation_output = gr.Textbox(
281
+ label="πŸ“– Query Explanation",
282
+ lines=4
283
  )
284
 
285
+ with gr.Row():
286
+ with gr.Column():
287
+ metadata_output = gr.Textbox(
288
+ label="πŸ“Š Query Metadata",
289
+ lines=10
290
+ )
291
+
292
+ with gr.Column():
293
+ validation_output = gr.Textbox(
294
+ label="βœ… Validation Status",
295
+ lines=10
296
+ )
297
 
298
  with gr.Row():
299
  with gr.Column():
300
  schema_output = gr.Code(
301
+ label="πŸ—οΈ Table Schema",
302
  language="sql",
303
+ lines=10
304
  )
305
 
306
  with gr.Column():
307
  sample_data_output = gr.Code(
308
+ label="πŸ“ Sample Data",
309
  language="sql",
310
+ lines=10
311
  )
312
 
313
  with gr.Row():
 
321
  interactive=False
322
  )
323
 
324
+ with gr.Row():
325
+ alternatives_output = gr.Code(
326
+ label="πŸ”„ Alternative Query Approaches",
327
+ language="sql",
328
+ lines=8
329
+ )
330
+
331
+ with gr.Row():
332
+ export_btn = gr.Button("πŸ’Ύ Export Complete SQL File", variant="secondary")
333
+ export_output = gr.File(label="Download SQL File")
334
+
335
  generate_btn.click(
336
  fn=generate_sql_query,
337
+ inputs=[api_key_input, query_input, sql_dialect, include_comments, complexity_level],
338
  outputs=[
339
  sql_output,
340
  metadata_output,
341
  schema_output,
342
  sample_data_output,
343
  execution_output,
344
+ validation_output,
345
+ explanation_output,
346
+ alternatives_output
347
  ]
348
  )
349
 
350
+ export_btn.click(
351
+ fn=export_query,
352
+ inputs=[sql_output, schema_output, sample_data_output],
353
+ outputs=export_output
354
+ )
355
+
356
  gr.Markdown(
357
  """
358
  ---
359
+ ### πŸ“‹ How to use:
360
+ 1. **API Key**: Enter your GROQ API key (get one from [console.groq.com](https://console.groq.com))
361
+ 2. **Configure**: Select your SQL dialect and preferences
362
+ 3. **Query**: Type your natural language description
363
+ 4. **Generate**: Click the button to get your SQL query
364
+ 5. **Export**: Download the complete SQL file with schema and sample data
365
+
366
+ ### ✨ Features:
367
+ - βœ… Multi-dialect SQL support (PostgreSQL, MySQL, SQLite, SQL Server, Oracle)
368
+ - πŸ” Syntax validation with warnings
369
+ - ⚑ Performance optimization suggestions
370
+ - πŸ“Š Live query execution with sample data
371
+ - πŸ”„ Alternative query approaches
372
+ - πŸ“– Clear explanations of query logic
373
+ - πŸ’Ύ Export complete SQL files
374
+ - 🎯 Complexity level control
375
+
376
+ ### πŸ”’ Security:
377
+ - Your API key is never stored
378
+ - Queries use parameterized patterns
379
+ - No sensitive data in examples
380
  """
381
  )
382