shukdevdattaEX commited on
Commit
dea7244
Β·
verified Β·
1 Parent(s): 2f64b1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -91
app.py CHANGED
@@ -6,6 +6,7 @@ import sqlite3
6
  import pandas as pd
7
  from datetime import datetime, timedelta
8
  import random
 
9
 
10
  # Pydantic models for structured output
11
  class ValidationStatus(BaseModel):
@@ -20,81 +21,251 @@ class SQLQueryGeneration(BaseModel):
20
  execution_notes: list[str]
21
  validation_status: ValidationStatus
22
 
23
- # Sample data generators
24
- def generate_sample_customers(count=10):
25
- """Generate sample customer data"""
26
- first_names = ["Alice", "Bob", "Carol", "David", "Emma", "Frank", "Grace", "Henry", "Ivy", "Jack"]
27
- last_names = ["Johnson", "Smith", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez"]
28
 
29
- customers = []
30
- for i in range(1, count + 1):
31
- fname = random.choice(first_names)
32
- lname = random.choice(last_names)
33
- customers.append({
34
- 'customer_id': i,
35
- 'name': f"{fname} {lname}",
36
- 'email': f"{fname.lower()}{i}@example.com"
37
- })
38
- return customers
39
-
40
- def generate_sample_orders(customer_count=10, order_count=20):
41
- """Generate sample order data"""
42
- orders = []
43
- base_date = datetime.now()
44
 
45
- for i in range(1, order_count + 1):
46
- days_ago = random.randint(0, 60)
47
- order_date = (base_date - timedelta(days=days_ago)).strftime('%Y-%m-%d')
48
-
49
- orders.append({
50
- 'order_id': 100 + i,
51
- 'customer_id': random.randint(1, customer_count),
52
- 'total_amount': random.choice([250, 350, 450, 600, 800, 1200, 1500, 300]),
53
- 'order_date': order_date
54
- })
55
- return orders
56
-
57
- def generate_sample_products(count=15):
58
- """Generate sample product data"""
59
- products = []
60
- categories = ["Electronics", "Clothing", "Home", "Sports", "Books"]
61
- product_names = ["Widget", "Gadget", "Tool", "Item", "Device"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- for i in range(1, count + 1):
64
- products.append({
65
- 'product_id': i,
66
- 'product_name': f"{random.choice(product_names)} {i}",
67
- 'category': random.choice(categories),
68
- 'price': round(random.uniform(10, 500), 2),
69
- 'stock_quantity': random.randint(0, 100)
70
- })
71
- return products
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def create_database_from_tables(tables_used):
74
- """Create SQLite database with sample data based on tables mentioned in query"""
75
  conn = sqlite3.connect(':memory:')
76
  cursor = conn.cursor()
77
 
78
  sample_data = {}
79
 
80
- # Generate data based on tables mentioned
81
- if 'customers' in tables_used:
82
- customers = generate_sample_customers(10)
83
- df_customers = pd.DataFrame(customers)
84
- df_customers.to_sql('customers', conn, index=False, if_exists='replace')
85
- sample_data['customers'] = df_customers
86
-
87
- if 'orders' in tables_used:
88
- orders = generate_sample_orders(10, 20)
89
- df_orders = pd.DataFrame(orders)
90
- df_orders.to_sql('orders', conn, index=False, if_exists='replace')
91
- sample_data['orders'] = df_orders
92
-
93
- if 'products' in tables_used:
94
- products = generate_sample_products(15)
95
- df_products = pd.DataFrame(products)
96
- df_products.to_sql('products', conn, index=False, if_exists='replace')
97
- sample_data['products'] = df_products
98
 
99
  return conn, sample_data
100
 
@@ -109,10 +280,10 @@ def execute_sql_on_sample_data(sql_query, conn):
109
  def process_nl_query(api_key, natural_query):
110
  """Main function to process natural language query"""
111
  if not api_key:
112
- return "❌ Please enter your Groq API key", "", "", ""
113
 
114
  if not natural_query:
115
- return "❌ Please enter a natural language query", "", "", ""
116
 
117
  try:
118
  # Initialize Groq client
@@ -123,7 +294,7 @@ def process_nl_query(api_key, natural_query):
123
  output_text += "### Step 1: Understanding User Intent\n"
124
  output_text += f"**User Query:** {natural_query}\n\n"
125
 
126
- # Call Groq API for SQL generation - FIXED: Added "JSON" to system message
127
  response = client.chat.completions.create(
128
  model="moonshotai/kimi-k2-instruct-0905",
129
  messages=[
@@ -131,7 +302,7 @@ def process_nl_query(api_key, natural_query):
131
  "role": "system",
132
  "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
133
 
134
- Return your response in JSON format with the following structure:
135
  {
136
  "query": "SQL query string",
137
  "query_type": "SELECT/INSERT/UPDATE/DELETE",
@@ -144,7 +315,14 @@ def process_nl_query(api_key, natural_query):
144
  }
145
  }
146
 
147
- Use standard SQL syntax compatible with SQLite. Always use proper JOINs, WHERE clauses, and GROUP BY when needed.""",
 
 
 
 
 
 
 
148
  },
149
  {
150
  "role": "user",
@@ -186,12 +364,16 @@ def process_nl_query(api_key, natural_query):
186
 
187
  # Step 3: Generate Sample Database Tables
188
  output_text += "### Step 3: Auto-Generated Sample Database Tables\n\n"
 
 
189
  conn, sample_data = create_database_from_tables(sql_query_gen.tables_used)
190
 
191
- # Display sample tables
192
  for table_name, df in sample_data.items():
193
- output_text += f"**πŸ“Š Sample `{table_name}` Table:**\n\n"
194
- output_text += df.to_markdown(index=False)
 
 
195
  output_text += "\n\n"
196
 
197
  # Step 4: Execute SQL Query
@@ -202,10 +384,10 @@ def process_nl_query(api_key, natural_query):
202
 
203
  if error:
204
  output_text += f"❌ **Execution Error:** {error}\n"
205
- result_table = None
206
  else:
207
  output_text += "βœ… **Query executed successfully!**\n\n"
208
- output_text += "**πŸ“ˆ SQL Execution Result:**\n\n"
209
  if len(result_df) > 0:
210
  output_text += result_df.to_markdown(index=False)
211
  else:
@@ -217,19 +399,14 @@ def process_nl_query(api_key, natural_query):
217
  # Format outputs for Gradio
218
  json_output = json.dumps(sql_query_gen.model_dump(), indent=2)
219
 
220
- if result_df is not None:
221
- result_display = result_df
222
- else:
223
- result_display = pd.DataFrame({"Error": [error]})
224
-
225
- return output_text, json_output, result_display, sql_query_gen.query
226
 
227
  except Exception as e:
228
- error_msg = f"❌ **Error:** {str(e)}\n\nPlease check your API key and query."
229
- return error_msg, "", pd.DataFrame(), ""
230
 
231
  # Create Gradio Interface
232
- with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Soft()) as demo:
233
  gr.Markdown("""
234
  # πŸ” Natural Language to SQL Query Executor
235
 
@@ -237,8 +414,10 @@ with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.S
237
 
238
  **Example queries to try:**
239
  - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount"
240
- - "Show all products with stock quantity less than 10"
241
- - "List top 5 customers by total order amount"
 
 
242
  """)
243
 
244
  with gr.Row():
@@ -276,7 +455,8 @@ with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.S
276
  gr.Markdown("### πŸ“ˆ Query Execution Result")
277
  result_output = gr.Dataframe(
278
  label="Result Table",
279
- interactive=False
 
280
  )
281
 
282
  # Connect the button to the processing function
@@ -289,20 +469,23 @@ with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.S
289
  gr.Markdown("""
290
  ---
291
  ### πŸ“– How it works:
292
- 1. **Enter your Groq API key** - Required for SQL generation
293
  2. **Write your query in plain English** - Describe what data you want to find
294
  3. **Click Generate & Execute** - The system will:
295
  - Convert your query to SQL
296
- - Generate sample database tables
 
297
  - Execute the query
298
  - Show you the results
299
 
300
  ### 🎯 Features:
301
- - βœ… Natural language to SQL conversion
302
- - βœ… Automatic sample data generation
 
303
  - βœ… Query validation and metadata
304
  - βœ… SQL execution on sample data
305
  - βœ… Structured JSON output format
 
306
  """)
307
 
308
  # Launch the app
 
6
  import pandas as pd
7
  from datetime import datetime, timedelta
8
  import random
9
+ import re
10
 
11
  # Pydantic models for structured output
12
  class ValidationStatus(BaseModel):
 
21
  execution_notes: list[str]
22
  validation_status: ValidationStatus
23
 
24
+ # Enhanced data generators for ANY table type
25
+ def generate_generic_table_data(table_name, row_count=15):
26
+ """Generate sample data for ANY table based on common patterns"""
 
 
27
 
28
+ # Define field generators
29
+ def gen_id():
30
+ return list(range(1, row_count + 1))
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def gen_names():
33
+ first = ["Alice", "Bob", "Carol", "David", "Emma", "Frank", "Grace", "Henry", "Ivy", "Jack",
34
+ "Karen", "Leo", "Maria", "Nathan", "Olivia"]
35
+ last = ["Johnson", "Smith", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis",
36
+ "Rodriguez", "Martinez", "Anderson", "Taylor", "Thomas", "Moore", "Jackson"]
37
+ return [f"{random.choice(first)} {random.choice(last)}" for _ in range(row_count)]
38
+
39
+ def gen_emails(names=None):
40
+ if names:
41
+ return [f"{name.lower().replace(' ', '.')}@example.com" for name in names]
42
+ return [f"user{i}@example.com" for i in range(1, row_count + 1)]
43
+
44
+ def gen_dates(days_back=365):
45
+ base = datetime.now()
46
+ return [(base - timedelta(days=random.randint(0, days_back))).strftime('%Y-%m-%d')
47
+ for _ in range(row_count)]
48
+
49
+ def gen_amounts():
50
+ return [round(random.uniform(100, 5000), 2) for _ in range(row_count)]
51
+
52
+ def gen_salaries():
53
+ return [random.choice([45000, 55000, 65000, 75000, 85000, 95000, 105000, 120000])
54
+ for _ in range(row_count)]
55
+
56
+ def gen_prices():
57
+ return [round(random.uniform(10, 1000), 2) for _ in range(row_count)]
58
+
59
+ def gen_quantities():
60
+ return [random.randint(0, 100) for _ in range(row_count)]
61
+
62
+ def gen_ratings():
63
+ return [round(random.uniform(1, 10), 1) for _ in range(row_count)]
64
+
65
+ def gen_scores():
66
+ return [random.randint(60, 100) for _ in range(row_count)]
67
+
68
+ def gen_ages():
69
+ return [random.randint(18, 80) for _ in range(row_count)]
70
+
71
+ def gen_boolean():
72
+ return [random.choice([True, False, True, True]) for _ in range(row_count)]
73
 
74
+ def gen_status():
75
+ return [random.choice(['Active', 'Inactive', 'Pending', 'Active', 'Active'])
76
+ for _ in range(row_count)]
77
+
78
+ # Table-specific schemas with intelligent field detection
79
+ table_schemas = {
80
+ 'employees': {
81
+ 'employee_id': gen_id(),
82
+ 'name': gen_names(),
83
+ 'email': gen_emails(gen_names()),
84
+ 'department_id': [random.randint(1, 5) for _ in range(row_count)],
85
+ 'salary': gen_salaries(),
86
+ 'hire_date': gen_dates(1825),
87
+ 'position': [random.choice(['Engineer', 'Manager', 'Analyst', 'Developer', 'Designer'])
88
+ for _ in range(row_count)]
89
+ },
90
+ 'departments': {
91
+ 'id': list(range(1, 6)),
92
+ 'name': ['Engineering', 'Sales', 'Marketing', 'HR', 'Finance'],
93
+ 'manager_id': [random.randint(1, 15) for _ in range(5)],
94
+ 'budget': [random.randint(100000, 1000000) for _ in range(5)]
95
+ }[:5],
96
+ 'books': {
97
+ 'book_id': gen_id(),
98
+ 'title': [f"Book Title {i}" for i in range(1, row_count + 1)],
99
+ 'author': gen_names(),
100
+ 'publication_year': [random.randint(2000, 2025) for _ in range(row_count)],
101
+ 'isbn': [f"978-{random.randint(1000000000, 9999999999)}" for _ in range(row_count)],
102
+ 'available': gen_boolean(),
103
+ 'category': [random.choice(['Fiction', 'Science', 'History', 'Technology', 'Arts'])
104
+ for _ in range(row_count)]
105
+ },
106
+ 'students': {
107
+ 'student_id': gen_id(),
108
+ 'name': gen_names(),
109
+ 'email': gen_emails(gen_names()),
110
+ 'age': [random.randint(18, 25) for _ in range(row_count)],
111
+ 'major': [random.choice(['Computer Science', 'Engineering', 'Business', 'Mathematics', 'Physics'])
112
+ for _ in range(row_count)],
113
+ 'gpa': [round(random.uniform(2.5, 4.0), 2) for _ in range(row_count)],
114
+ 'enrollment_year': [random.randint(2020, 2025) for _ in range(row_count)]
115
+ },
116
+ 'courses': {
117
+ 'course_id': gen_id(),
118
+ 'course_name': [f"Course {i}" for i in range(1, row_count + 1)],
119
+ 'subject': [random.choice(['Mathematics', 'Computer Science', 'Physics', 'Chemistry'])
120
+ for _ in range(row_count)],
121
+ 'credits': [random.choice([3, 4, 5]) for _ in range(row_count)],
122
+ 'instructor': gen_names()
123
+ },
124
+ 'grades': {
125
+ 'grade_id': gen_id(),
126
+ 'student_id': [random.randint(1, 15) for _ in range(row_count)],
127
+ 'course_id': [random.randint(1, 15) for _ in range(row_count)],
128
+ 'score': gen_scores(),
129
+ 'grade_date': gen_dates(180)
130
+ },
131
+ 'items': {
132
+ 'item_id': gen_id(),
133
+ 'item_name': [f"Item {i}" for i in range(1, row_count + 1)],
134
+ 'category': [random.choice(['Electronics', 'Furniture', 'Supplies', 'Equipment'])
135
+ for _ in range(row_count)],
136
+ 'stock_level': gen_quantities(),
137
+ 'reorder_point': [random.randint(10, 30) for _ in range(row_count)],
138
+ 'price': gen_prices()
139
+ },
140
+ 'movies': {
141
+ 'movie_id': gen_id(),
142
+ 'title': [f"Movie Title {i}" for i in range(1, row_count + 1)],
143
+ 'director': gen_names(),
144
+ 'release_year': [random.randint(2015, 2025) for _ in range(row_count)],
145
+ 'rating': gen_ratings(),
146
+ 'genre': [random.choice(['Action', 'Drama', 'Comedy', 'Sci-Fi', 'Thriller'])
147
+ for _ in range(row_count)],
148
+ 'duration_minutes': [random.randint(90, 180) for _ in range(row_count)]
149
+ },
150
+ 'patients': {
151
+ 'patient_id': gen_id(),
152
+ 'name': gen_names(),
153
+ 'age': gen_ages(),
154
+ 'email': gen_emails(gen_names()),
155
+ 'phone': [f"+1-555-{random.randint(1000, 9999)}" for _ in range(row_count)],
156
+ 'last_visit': gen_dates(90),
157
+ 'condition': [random.choice(['Diabetes', 'Hypertension', 'Asthma', 'Healthy'])
158
+ for _ in range(row_count)]
159
+ },
160
+ 'appointments': {
161
+ 'appointment_id': gen_id(),
162
+ 'patient_id': [random.randint(1, 15) for _ in range(row_count)],
163
+ 'doctor_name': gen_names(),
164
+ 'appointment_date': gen_dates(60),
165
+ 'status': [random.choice(['Scheduled', 'Completed', 'Cancelled']) for _ in range(row_count)]
166
+ },
167
+ 'properties': {
168
+ 'property_id': gen_id(),
169
+ 'address': [f"{random.randint(100, 9999)} Main St" for _ in range(row_count)],
170
+ 'city': [random.choice(['Downtown', 'Suburbs', 'Uptown', 'Eastside']) for _ in range(row_count)],
171
+ 'price': [random.randint(150000, 800000) for _ in range(row_count)],
172
+ 'bedrooms': [random.randint(1, 5) for _ in range(row_count)],
173
+ 'bathrooms': [random.randint(1, 3) for _ in range(row_count)],
174
+ 'sqft': [random.randint(800, 3500) for _ in range(row_count)],
175
+ 'status': [random.choice(['Available', 'Sold', 'Pending']) for _ in range(row_count)]
176
+ },
177
+ 'events': {
178
+ 'event_id': gen_id(),
179
+ 'event_name': [f"Event {i}" for i in range(1, row_count + 1)],
180
+ 'event_date': [datetime(2026, 1, random.randint(1, 31)).strftime('%Y-%m-%d')
181
+ for _ in range(row_count)],
182
+ 'location': [random.choice(['Hall A', 'Conference Room', 'Auditorium', 'Stadium'])
183
+ for _ in range(row_count)],
184
+ 'attendees': [random.randint(10, 200) for _ in range(row_count)],
185
+ 'status': [random.choice(['Upcoming', 'Completed', 'Cancelled']) for _ in range(row_count)]
186
+ },
187
+ 'dishes': {
188
+ 'dish_id': gen_id(),
189
+ 'dish_name': [f"Dish {i}" for i in range(1, row_count + 1)],
190
+ 'category': [random.choice(['Appetizer', 'Main Course', 'Dessert', 'Beverage'])
191
+ for _ in range(row_count)],
192
+ 'price': [round(random.uniform(5, 50), 2) for _ in range(row_count)],
193
+ 'preparation_time': [random.randint(10, 60) for _ in range(row_count)]
194
+ },
195
+ 'orders': {
196
+ 'order_id': gen_id(),
197
+ 'customer_id': [random.randint(1, 15) for _ in range(row_count)],
198
+ 'dish_id': [random.randint(1, 15) for _ in range(row_count)],
199
+ 'quantity': [random.randint(1, 5) for _ in range(row_count)],
200
+ 'order_date': gen_dates(30),
201
+ 'total_amount': gen_amounts()
202
+ },
203
+ 'members': {
204
+ 'member_id': gen_id(),
205
+ 'name': gen_names(),
206
+ 'email': gen_emails(gen_names()),
207
+ 'membership_type': [random.choice(['Basic', 'Premium', 'VIP']) for _ in range(row_count)],
208
+ 'join_date': gen_dates(730),
209
+ 'expiry_date': [(datetime.now() + timedelta(days=random.randint(-30, 90))).strftime('%Y-%m-%d')
210
+ for _ in range(row_count)],
211
+ 'status': [random.choice(['Active', 'Active', 'Active', 'Inactive']) for _ in range(row_count)]
212
+ },
213
+ 'customers': {
214
+ 'customer_id': gen_id(),
215
+ 'name': gen_names(),
216
+ 'email': gen_emails(gen_names()),
217
+ 'phone': [f"+1-555-{random.randint(1000, 9999)}" for _ in range(row_count)],
218
+ 'registration_date': gen_dates(365),
219
+ 'status': gen_status()
220
+ },
221
+ 'products': {
222
+ 'product_id': gen_id(),
223
+ 'product_name': [f"Product {i}" for i in range(1, row_count + 1)],
224
+ 'category': [random.choice(['Electronics', 'Clothing', 'Home', 'Sports', 'Books'])
225
+ for _ in range(row_count)],
226
+ 'price': gen_prices(),
227
+ 'stock_quantity': gen_quantities(),
228
+ 'supplier_id': [random.randint(1, 5) for _ in range(row_count)]
229
+ }
230
+ }
231
+
232
+ # Return predefined schema if exists, otherwise create generic one
233
+ if table_name.lower() in table_schemas:
234
+ return table_schemas[table_name.lower()]
235
+
236
+ # Generic fallback for unknown tables
237
+ # Try to infer structure from table name
238
+ generic_data = {
239
+ f'{table_name}_id': gen_id(),
240
+ 'name': gen_names(),
241
+ 'created_date': gen_dates(),
242
+ 'status': gen_status(),
243
+ 'value': gen_amounts()
244
+ }
245
+
246
+ return generic_data
247
 
248
  def create_database_from_tables(tables_used):
249
+ """Create SQLite database with sample data for ALL tables mentioned"""
250
  conn = sqlite3.connect(':memory:')
251
  cursor = conn.cursor()
252
 
253
  sample_data = {}
254
 
255
+ # Generate data for each table mentioned
256
+ for table in tables_used:
257
+ table_name = table.lower().strip()
258
+
259
+ # Generate appropriate sample data
260
+ table_dict = generate_generic_table_data(table_name, row_count=15)
261
+
262
+ # Adjust row count for lookup tables
263
+ if table_name in ['departments']:
264
+ table_dict = {k: v[:5] if isinstance(v, list) else v for k, v in table_dict.items()}
265
+
266
+ df = pd.DataFrame(table_dict)
267
+ df.to_sql(table_name, conn, index=False, if_exists='replace')
268
+ sample_data[table_name] = df
 
 
 
 
269
 
270
  return conn, sample_data
271
 
 
280
  def process_nl_query(api_key, natural_query):
281
  """Main function to process natural language query"""
282
  if not api_key:
283
+ return "❌ Please enter your Groq API key", "", pd.DataFrame(), ""
284
 
285
  if not natural_query:
286
+ return "❌ Please enter a natural language query", "", pd.DataFrame(), ""
287
 
288
  try:
289
  # Initialize Groq client
 
294
  output_text += "### Step 1: Understanding User Intent\n"
295
  output_text += f"**User Query:** {natural_query}\n\n"
296
 
297
+ # Call Groq API for SQL generation with Kimi model
298
  response = client.chat.completions.create(
299
  model="moonshotai/kimi-k2-instruct-0905",
300
  messages=[
 
302
  "role": "system",
303
  "content": """You are a SQL expert. Generate structured SQL queries from natural language descriptions with proper syntax validation and metadata.
304
 
305
+ IMPORTANT: Return your response in JSON format with the following structure:
306
  {
307
  "query": "SQL query string",
308
  "query_type": "SELECT/INSERT/UPDATE/DELETE",
 
315
  }
316
  }
317
 
318
+ Use standard SQL syntax compatible with SQLite.
319
+ - Always use proper JOINs when multiple tables are involved
320
+ - Use WHERE clauses for filtering
321
+ - Use GROUP BY for aggregations
322
+ - For date comparisons, use date('now') and datetime functions
323
+ - Extract ALL table names mentioned or implied in the query and list them in "tables_used"
324
+ - If a query mentions departments and employees, include BOTH tables
325
+ - Be thorough in identifying all tables needed for the query""",
326
  },
327
  {
328
  "role": "user",
 
364
 
365
  # Step 3: Generate Sample Database Tables
366
  output_text += "### Step 3: Auto-Generated Sample Database Tables\n\n"
367
+ output_text += f"**Tables to be created:** {', '.join(sql_query_gen.tables_used)}\n\n"
368
+
369
  conn, sample_data = create_database_from_tables(sql_query_gen.tables_used)
370
 
371
+ # Display sample tables (show first 10 rows for readability)
372
  for table_name, df in sample_data.items():
373
+ output_text += f"**πŸ“Š Sample `{table_name}` Table** ({len(df)} rows):\n\n"
374
+ output_text += df.head(10).to_markdown(index=False)
375
+ if len(df) > 10:
376
+ output_text += f"\n\n*...and {len(df) - 10} more rows*"
377
  output_text += "\n\n"
378
 
379
  # Step 4: Execute SQL Query
 
384
 
385
  if error:
386
  output_text += f"❌ **Execution Error:** {error}\n"
387
+ result_table = pd.DataFrame({"Error": [error]})
388
  else:
389
  output_text += "βœ… **Query executed successfully!**\n\n"
390
+ output_text += f"**πŸ“ˆ SQL Execution Result** ({len(result_df)} rows returned):\n\n"
391
  if len(result_df) > 0:
392
  output_text += result_df.to_markdown(index=False)
393
  else:
 
399
  # Format outputs for Gradio
400
  json_output = json.dumps(sql_query_gen.model_dump(), indent=2)
401
 
402
+ return output_text, json_output, result_table, sql_query_gen.query
 
 
 
 
 
403
 
404
  except Exception as e:
405
+ error_msg = f"❌ **Error:** {str(e)}\n\n**Full error details:**\n```\n{repr(e)}\n```\n\nPlease check your API key and try again."
406
+ return error_msg, "", pd.DataFrame({"Error": [str(e)]}), ""
407
 
408
  # Create Gradio Interface
409
+ with gr.Blocks(title="Natural Language to SQL Query Executor", theme=gr.themes.Ocean()) as demo:
410
  gr.Markdown("""
411
  # πŸ” Natural Language to SQL Query Executor
412
 
 
414
 
415
  **Example queries to try:**
416
  - "Find all customers who made orders over $500 in the last 30 days, show their name, email, and total order amount"
417
+ - "Show all employees who earn more than $75,000 and work in the Engineering department"
418
+ - "List students who scored above 85% in Mathematics"
419
+ - "Find all books published after 2020 that are currently available"
420
+ - "Show properties with price between $200,000 and $500,000"
421
  """)
422
 
423
  with gr.Row():
 
455
  gr.Markdown("### πŸ“ˆ Query Execution Result")
456
  result_output = gr.Dataframe(
457
  label="Result Table",
458
+ interactive=False,
459
+ wrap=True
460
  )
461
 
462
  # Connect the button to the processing function
 
469
  gr.Markdown("""
470
  ---
471
  ### πŸ“– How it works:
472
+ 1. **Enter your Groq API key** - Required for SQL generation (using Kimi K2 Instruct model)
473
  2. **Write your query in plain English** - Describe what data you want to find
474
  3. **Click Generate & Execute** - The system will:
475
  - Convert your query to SQL
476
+ - Automatically detect and create ALL required tables
477
+ - Generate realistic sample data for those tables
478
  - Execute the query
479
  - Show you the results
480
 
481
  ### 🎯 Features:
482
+ - βœ… Natural language to SQL conversion using Kimi K2 Instruct
483
+ - βœ… **Smart table detection** - Creates ANY table mentioned in your query
484
+ - βœ… Automatic sample data generation for 15+ table types
485
  - βœ… Query validation and metadata
486
  - βœ… SQL execution on sample data
487
  - βœ… Structured JSON output format
488
+ - βœ… Support for employees, books, students, movies, patients, properties, events, and more!
489
  """)
490
 
491
  # Launch the app