shukdevdattaEX commited on
Commit
d8ff681
Β·
verified Β·
1 Parent(s): 2d1df48

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +318 -0
app.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from groq import Groq
3
+ from pydantic import BaseModel
4
+ import json
5
+ import sqlite3
6
+ import pandas as pd
7
+ from typing import List, Optional
8
+ import re
9
+
10
+ # Pydantic Models
11
+ class ValidationStatus(BaseModel):
12
+ is_valid: bool
13
+ syntax_errors: list[str]
14
+
15
+ class SQLQueryGeneration(BaseModel):
16
+ query: str
17
+ query_type: str
18
+ tables_used: list[str]
19
+ estimated_complexity: str
20
+ execution_notes: list[str]
21
+ validation_status: ValidationStatus
22
+
23
+ class TableSchema(BaseModel):
24
+ table_name: str
25
+ columns: list[dict]
26
+ sample_data: list[dict]
27
+
28
+ def generate_sample_data(user_query: str, groq_api_key: str) -> dict:
29
+ """Generate sample table schema and data based on user query"""
30
+ try:
31
+ client = Groq(api_key=groq_api_key)
32
+
33
+ # Request to generate table schema and sample data
34
+ schema_prompt = f"""Based on this query: "{user_query}"
35
+
36
+ Generate a realistic database schema with sample data. Return ONLY valid JSON with this structure:
37
+ {{
38
+ "tables": [
39
+ {{
40
+ "table_name": "table_name",
41
+ "columns": [
42
+ {{"name": "column_name", "type": "INTEGER|TEXT|REAL|DATE"}},
43
+ ...
44
+ ],
45
+ "sample_data": [
46
+ {{"column_name": value, ...}},
47
+ ...at least 10-15 rows
48
+ ]
49
+ }}
50
+ ]
51
+ }}
52
+
53
+ Make the data realistic and relevant to the query. Include enough variety to make the query results meaningful."""
54
+
55
+ response = client.chat.completions.create(
56
+ model="moonshotai/kimi-k2-instruct-0905",
57
+ messages=[
58
+ {"role": "system", "content": "You are a database expert. Generate realistic table schemas and sample data. Return ONLY valid JSON, no markdown formatting."},
59
+ {"role": "user", "content": schema_prompt}
60
+ ],
61
+ temperature=0.7
62
+ )
63
+
64
+ # Parse response
65
+ content = response.choices[0].message.content.strip()
66
+ # Remove markdown code blocks if present
67
+ content = re.sub(r'```json\s*', '', content)
68
+ content = re.sub(r'```\s*$', '', content)
69
+
70
+ schema_data = json.loads(content)
71
+ return schema_data
72
+ except Exception as e:
73
+ raise Exception(f"Error generating sample data: {str(e)}")
74
+
75
+ def create_tables_in_db(schema_data: dict) -> sqlite3.Connection:
76
+ """Create SQLite tables and populate with sample data"""
77
+ conn = sqlite3.connect(':memory:')
78
+ cursor = conn.cursor()
79
+
80
+ for table in schema_data['tables']:
81
+ table_name = table['table_name']
82
+ columns = table['columns']
83
+
84
+ # Create table
85
+ column_defs = []
86
+ for col in columns:
87
+ col_type = col['type'].upper()
88
+ column_defs.append(f"{col['name']} {col_type}")
89
+
90
+ create_table_sql = f"CREATE TABLE {table_name} ({', '.join(column_defs)})"
91
+ cursor.execute(create_table_sql)
92
+
93
+ # Insert sample data
94
+ sample_data = table['sample_data']
95
+ if sample_data:
96
+ col_names = [col['name'] for col in columns]
97
+ placeholders = ', '.join(['?' for _ in col_names])
98
+ insert_sql = f"INSERT INTO {table_name} ({', '.join(col_names)}) VALUES ({placeholders})"
99
+
100
+ for row in sample_data:
101
+ values = [row.get(col) for col in col_names]
102
+ cursor.execute(insert_sql, values)
103
+
104
+ conn.commit()
105
+ return conn
106
+
107
+ def generate_sql_query(user_query: str, groq_api_key: str, schema_info: str) -> SQLQueryGeneration:
108
+ """Generate SQL query using Groq API with schema context"""
109
+ try:
110
+ client = Groq(api_key=groq_api_key)
111
+
112
+ enhanced_query = f"""Database Schema:
113
+ {schema_info}
114
+
115
+ User Request: {user_query}
116
+
117
+ Generate a SQL query that works with the above schema."""
118
+
119
+ response = client.chat.completions.create(
120
+ model="moonshotai/kimi-k2-instruct-0905",
121
+ messages=[
122
+ {
123
+ "role": "system",
124
+ "content": "You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata. Use standard SQL syntax compatible with SQLite.",
125
+ },
126
+ {"role": "user", "content": enhanced_query},
127
+ ],
128
+ response_format={
129
+ "type": "json_schema",
130
+ "json_schema": {
131
+ "name": "sql_query_generation",
132
+ "schema": SQLQueryGeneration.model_json_schema()
133
+ }
134
+ }
135
+ )
136
+
137
+ sql_query_generation = SQLQueryGeneration.model_validate(
138
+ json.loads(response.choices[0].message.content)
139
+ )
140
+ return sql_query_generation
141
+ except Exception as e:
142
+ raise Exception(f"Error generating SQL query: {str(e)}")
143
+
144
+ def execute_sql_query(conn: sqlite3.Connection, query: str) -> pd.DataFrame:
145
+ """Execute SQL query and return results as DataFrame"""
146
+ try:
147
+ df = pd.read_sql_query(query, conn)
148
+ return df
149
+ except Exception as e:
150
+ raise Exception(f"Error executing SQL query: {str(e)}")
151
+
152
+ def format_schema_info(schema_data: dict) -> str:
153
+ """Format schema information for display"""
154
+ info = []
155
+ for table in schema_data['tables']:
156
+ info.append(f"\nTable: {table['table_name']}")
157
+ info.append("Columns:")
158
+ for col in table['columns']:
159
+ info.append(f" - {col['name']} ({col['type']})")
160
+ info.append(f"Sample rows: {len(table['sample_data'])}")
161
+ return '\n'.join(info)
162
+
163
+ def process_query(user_query: str, groq_api_key: str):
164
+ """Main processing function"""
165
+ if not groq_api_key or not groq_api_key.strip():
166
+ return "❌ Please enter your Groq API key", "", "", "", ""
167
+
168
+ if not user_query or not user_query.strip():
169
+ return "❌ Please enter a query", "", "", "", ""
170
+
171
+ try:
172
+ output_log = []
173
+
174
+ # Step 1: Generate sample data
175
+ output_log.append("### Step 1: Generating Sample Database Schema and Data")
176
+ output_log.append(f"Query: {user_query}\n")
177
+
178
+ schema_data = generate_sample_data(user_query, groq_api_key)
179
+ schema_info = format_schema_info(schema_data)
180
+
181
+ output_log.append("βœ… Generated database schema:")
182
+ output_log.append(schema_info)
183
+ output_log.append("")
184
+
185
+ # Step 2: Create tables
186
+ output_log.append("### Step 2: Creating In-Memory SQLite Database")
187
+ conn = create_tables_in_db(schema_data)
188
+ output_log.append("βœ… Tables created and populated with sample data\n")
189
+
190
+ # Display sample data
191
+ sample_tables_html = []
192
+ for table in schema_data['tables']:
193
+ df_sample = pd.DataFrame(table['sample_data'][:5]) # Show first 5 rows
194
+ sample_tables_html.append(f"<h4>Sample Data from '{table['table_name']}' (first 5 rows):</h4>")
195
+ sample_tables_html.append(df_sample.to_html(index=False, border=1))
196
+
197
+ # Step 3: Generate SQL query
198
+ output_log.append("### Step 3: Generating SQL Query")
199
+ sql_generation = generate_sql_query(user_query, groq_api_key, schema_info)
200
+
201
+ # Format the SQL generation output
202
+ sql_output = {
203
+ "query": sql_generation.query,
204
+ "query_type": sql_generation.query_type,
205
+ "tables_used": sql_generation.tables_used,
206
+ "estimated_complexity": sql_generation.estimated_complexity,
207
+ "execution_notes": sql_generation.execution_notes,
208
+ "validation_status": {
209
+ "is_valid": sql_generation.validation_status.is_valid,
210
+ "syntax_errors": sql_generation.validation_status.syntax_errors
211
+ }
212
+ }
213
+
214
+ sql_output_formatted = json.dumps(sql_output, indent=2)
215
+ output_log.append("βœ… SQL Query Generated:\n")
216
+
217
+ # Step 4: Execute query
218
+ output_log.append("\n### Step 4: Executing SQL Query")
219
+ output_log.append(f"Executing: {sql_generation.query}\n")
220
+
221
+ result_df = execute_sql_query(conn, sql_generation.query)
222
+
223
+ if len(result_df) == 0:
224
+ output_log.append("ℹ️ Query executed successfully but returned 0 rows")
225
+ result_html = "<p>No results found</p>"
226
+ else:
227
+ output_log.append(f"βœ… Query executed successfully! Returned {len(result_df)} row(s)\n")
228
+ result_html = result_df.to_html(index=False, border=1)
229
+
230
+ conn.close()
231
+
232
+ # Combine all outputs
233
+ process_log = '\n'.join(output_log)
234
+ sample_data_html = '\n'.join(sample_tables_html)
235
+
236
+ return process_log, sql_output_formatted, sample_data_html, result_html, ""
237
+
238
+ except Exception as e:
239
+ error_msg = f"❌ Error: {str(e)}"
240
+ return error_msg, "", "", "", ""
241
+
242
+ # Gradio Interface
243
+ with gr.Blocks(title="SQL Query Generator & Executor", theme=gr.themes.Soft()) as app:
244
+ gr.Markdown("""
245
+ # πŸ” SQL Query Generator & Executor
246
+
247
+ This app uses Groq's Kimi-K2 model to:
248
+ 1. Generate realistic sample database tables based on your query
249
+ 2. Generate a structured SQL query from natural language
250
+ 3. Execute the query and show results
251
+
252
+ ### How to use:
253
+ 1. Enter your Groq API key ([Get one here](https://console.groq.com/keys))
254
+ 2. Enter your query in plain English
255
+ 3. Click "Generate & Execute SQL"
256
+ """)
257
+
258
+ with gr.Row():
259
+ with gr.Column(scale=2):
260
+ api_key_input = gr.Textbox(
261
+ label="Groq API Key",
262
+ placeholder="Enter your Groq API key here...",
263
+ type="password"
264
+ )
265
+
266
+ query_input = gr.Textbox(
267
+ label="Natural Language Query",
268
+ placeholder="Example: Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount",
269
+ lines=3
270
+ )
271
+
272
+ submit_btn = gr.Button("πŸš€ Generate & Execute SQL", variant="primary", size="lg")
273
+
274
+ with gr.Row():
275
+ with gr.Column():
276
+ gr.Markdown("### πŸ“‹ Process Log")
277
+ process_output = gr.Textbox(
278
+ label="Execution Steps",
279
+ lines=12,
280
+ max_lines=20
281
+ )
282
+
283
+ with gr.Row():
284
+ with gr.Column():
285
+ gr.Markdown("### πŸ—‚οΈ Sample Database Tables")
286
+ sample_data_output = gr.HTML(label="Sample Data")
287
+
288
+ with gr.Row():
289
+ with gr.Column():
290
+ gr.Markdown("### πŸ“ Generated SQL Query (Structured Output)")
291
+ sql_output = gr.JSON(label="SQL Query Metadata")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ gr.Markdown("### ✨ Query Execution Results")
296
+ result_output = gr.HTML(label="Results")
297
+
298
+ # Examples
299
+ gr.Examples(
300
+ examples=[
301
+ ["Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount"],
302
+ ["List all products that are out of stock along with their supplier information"],
303
+ ["Show the top 5 employees by total sales in the last quarter"],
304
+ ["Find all students who scored above 85% in Mathematics and their contact details"],
305
+ ["Get all active users who haven't logged in for more than 60 days"]
306
+ ],
307
+ inputs=query_input,
308
+ label="Example Queries"
309
+ )
310
+
311
+ submit_btn.click(
312
+ fn=process_query,
313
+ inputs=[query_input, api_key_input],
314
+ outputs=[process_output, sql_output, sample_data_output, result_output, gr.Textbox(visible=False)]
315
+ )
316
+
317
+ if __name__ == "__main__":
318
+ app.launch()