bhavika24 commited on
Commit
29a0e19
·
verified ·
1 Parent(s): 71ba3ec

Update engine.py

Browse files
Files changed (1) hide show
  1. engine.py +429 -421
engine.py CHANGED
@@ -1,421 +1,429 @@
1
- import os
2
- import sqlite3
3
- from openai import OpenAI
4
- from difflib import get_close_matches
5
- from datetime import datetime
6
-
7
- # =========================
8
- # SETUP
9
- # =========================
10
-
11
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
- conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
-
14
- # =========================
15
- # CONVERSATION STATE
16
- # =========================
17
-
18
- LAST_PROMPT_TYPE = None
19
- LAST_SUGGESTED_DATE = None
20
-
21
- # =========================
22
- # HUMAN RESPONSE HELPERS
23
- # =========================
24
-
25
- def humanize(text):
26
- return f"Sure 🙂\n\n{text}"
27
-
28
- def friendly(text):
29
- return f"{text}\n\nIf you want, I can help you explore this further 🙂"
30
-
31
- def is_confirmation(text):
32
- return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
33
-
34
- def is_why_question(text):
35
- return text.strip().lower().startswith("why")
36
-
37
- # =========================
38
- # SPELL CORRECTION
39
- # =========================
40
-
41
- KNOWN_TERMS = [
42
- "patient", "patients", "condition", "conditions",
43
- "encounter", "encounters", "visit", "visits",
44
- "medication", "medications",
45
- "admitted", "admission",
46
- "year", "month", "last", "recent", "today"
47
- ]
48
-
49
- def correct_spelling(q):
50
- words = q.split()
51
- fixed = []
52
- for w in words:
53
- clean = w.lower().strip(",.?")
54
- match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
55
- fixed.append(match[0] if match else w)
56
- return " ".join(fixed)
57
-
58
- # =========================
59
- # SCHEMA
60
- # =========================
61
-
62
- def load_ai_schema():
63
- cur = conn.cursor()
64
- schema = {}
65
-
66
- tables = cur.execute("""
67
- SELECT table_name, description
68
- FROM ai_tables
69
- WHERE ai_enabled = 1
70
- """).fetchall()
71
-
72
- for table, desc in tables:
73
- cols = cur.execute("""
74
- SELECT column_name, description
75
- FROM ai_columns
76
- WHERE table_name = ? AND ai_allowed = 1
77
- """, (table,)).fetchall()
78
-
79
- schema[table] = {
80
- "description": desc,
81
- "columns": cols
82
- }
83
-
84
- return schema
85
-
86
- # =========================
87
- # HUMAN SCHEMA DESCRIPTION
88
- # =========================
89
-
90
- def describe_schema():
91
- schema = load_ai_schema()
92
-
93
- response = "Here’s the data I currently have access to:\n\n"
94
-
95
- for table, meta in schema.items():
96
- response += f"• **{table.capitalize()}** — {meta['description']}\n"
97
- for col, desc in meta["columns"]:
98
- response += f" - {col}: {desc}\n"
99
- response += "\n"
100
-
101
- response += (
102
- "You can ask things like:\n"
103
- "• How many patients are there?\n"
104
- "• Patient count by gender\n"
105
- "• Admissions by year\n\n"
106
- "Just tell me what you want to explore 🙂"
107
- )
108
-
109
- return response
110
-
111
- # =========================
112
- # TIME HANDLING
113
- # =========================
114
-
115
- def get_latest_data_date():
116
- cur = conn.cursor()
117
- r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
118
- return r[0]
119
-
120
- def normalize_time_question(q):
121
- latest = get_latest_data_date()
122
- if not latest:
123
- return q
124
-
125
- if "today" in q:
126
- return q.replace("today", f"on {latest[:10]}")
127
-
128
- if "yesterday" in q:
129
- return q.replace("yesterday", f"on {latest[:10]}")
130
-
131
- return q
132
-
133
- # =========================
134
- # UNSUPPORTED QUESTIONS
135
- # =========================
136
-
137
- def get_unsupported_reason(q):
138
- q = q.lower()
139
-
140
- if any(w in q for w in ["consultant", "doctor"]):
141
- return {
142
- "reason": "Doctor or consultant-level data is not available.",
143
- "suggestion": "Try asking about patients, visits, or admissions."
144
- }
145
-
146
- if any(w in q for w in ["department", "specialization"]):
147
- return {
148
- "reason": "Department-level data is not stored.",
149
- "suggestion": "Try patient or visit related questions."
150
- }
151
-
152
- return None
153
-
154
- # =========================
155
- # SQL GENERATION
156
- # =========================
157
-
158
- def build_prompt(question):
159
- schema = load_ai_schema()
160
-
161
- prompt = """
162
- You are a hospital SQL assistant.
163
-
164
- Rules:
165
- - Use only SELECT
166
- - SQLite syntax
167
- - Use only listed tables/columns
168
- - Return ONLY SQL or NOT_ANSWERABLE
169
- """
170
-
171
- for table, meta in schema.items():
172
- prompt += f"\nTable: {table}\n"
173
- for col, desc in meta["columns"]:
174
- prompt += f"- {col}: {desc}\n"
175
-
176
- prompt += f"\nQuestion: {question}\n"
177
- return prompt
178
-
179
- def call_llm(prompt):
180
- res = client.chat.completions.create(
181
- model="gpt-4.1-mini",
182
- messages=[
183
- {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
184
- {"role": "user", "content": prompt}
185
- ],
186
- temperature=0
187
- )
188
- return res.choices[0].message.content.strip()
189
-
190
- # =========================
191
- # SQL SAFETY
192
- # =========================
193
-
194
- def sanitize_sql(sql):
195
- sql = sql.replace("```", "").replace("sql", "").strip()
196
- sql = sql.split(";")[0]
197
- return sql.replace("\n", " ").strip()
198
-
199
- def validate_sql(sql):
200
- if not sql.lower().startswith("select"):
201
- raise Exception("Only SELECT allowed")
202
- return sql
203
-
204
- def run_query(sql):
205
- cur = conn.cursor()
206
- rows = cur.execute(sql).fetchall()
207
- cols = [c[0] for c in cur.description]
208
- return cols, rows
209
-
210
- # =========================
211
- # AGGREGATE SAFETY
212
- # =========================
213
-
214
- def is_aggregate_only_query(sql):
215
- s = sql.lower()
216
- return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
217
-
218
- def has_underlying_data(sql):
219
- base = sql.lower()
220
- if "from" not in base:
221
- return False
222
-
223
- base = base.split("from", 1)[1]
224
- test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
225
-
226
- cur = conn.cursor()
227
- return cur.execute(test_sql).fetchone() is not None
228
-
229
- def is_patient_summary_question(text):
230
- t = text.lower()
231
- return any(
232
- phrase in t for phrase in [
233
- "patient summary",
234
- "patients summary",
235
- "patient overview",
236
- "summary of patients",
237
- "give a patient summary",
238
- ]
239
- )
240
-
241
- # =========================
242
- # PATIENT SUMMARY
243
- # =========================
244
-
245
- def build_patient_summary():
246
- cur = conn.cursor()
247
-
248
- total = cur.execute(
249
- "SELECT COUNT(*) FROM patients"
250
- ).fetchone()[0]
251
-
252
- genders = cur.execute(
253
- "SELECT gender, COUNT(*) FROM patients GROUP BY gender"
254
- ).fetchall()
255
-
256
- msg = "Here’s a quick summary of patients:\n\n"
257
- msg += f"• Total patients: {total}\n"
258
-
259
- if genders:
260
- msg += "• Gender distribution:\n"
261
- for g, c in genders:
262
- msg += f" - {g}: {c}\n"
263
-
264
- msg += (
265
- "\nYou can also ask:\n"
266
- "• Patients admitted by year\n"
267
- "• Patient count by age\n"
268
- "• Visit trends"
269
- )
270
-
271
- return msg
272
-
273
-
274
- # =========================
275
- # MAIN ENGINE
276
- # =========================
277
-
278
- def process_question(question):
279
- global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
280
-
281
- q = question.strip().lower()
282
-
283
- # -------------------------------
284
- # Patient summary intent
285
- # -------------------------------
286
- if is_patient_summary_question(q):
287
- return {
288
- "status": "ok",
289
- "message": build_patient_summary(),
290
- "data": [],
291
- "sql": None,
292
- "note": None
293
- }
294
-
295
- # -------------------------------
296
- # WHY follow-up handling
297
- # -------------------------------
298
- if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
299
- year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
300
- return {
301
- "status": "ok",
302
- "message": (
303
- f"I suggested **{year}** because that’s the most recent year "
304
- f"for which data exists in the system.\n\n"
305
- "Your database doesn’t contain newer records yet.\n\n"
306
- "You can explore:\n"
307
- " Data from 2021\n"
308
- "• Trends over time\n"
309
- " Patient summaries"
310
- ),
311
- "data": []
312
- }
313
-
314
- # -------------------------------
315
- # YES / confirmation handling
316
- # -------------------------------
317
- if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
318
- return {
319
- "status": "ok",
320
- "message": (
321
- "Great 🙂\n\n"
322
- "Here are some things you can ask:\n"
323
- "• How many patients were admitted in 2021?\n"
324
- "• Patient count by gender\n"
325
- "• Total visits by month\n"
326
- "• Most common conditions"
327
- ),
328
- "data": []
329
- }
330
-
331
- # -------------------------------
332
- # Normalize question
333
- # -------------------------------
334
- question = correct_spelling(question)
335
- question = normalize_time_question(question)
336
-
337
- # Reset state once user asks a fresh question
338
- LAST_PROMPT_TYPE = None
339
- LAST_SUGGESTED_DATE = None
340
-
341
- # -------------------------------
342
- # Metadata queries
343
- # -------------------------------
344
- if any(x in question for x in ["what data", "what tables", "which data"]):
345
- return {
346
- "status": "ok",
347
- "message": humanize(describe_schema()),
348
- "data": []
349
- }
350
-
351
- # -------------------------------
352
- # Unsupported questions
353
- # -------------------------------
354
- unsupported = get_unsupported_reason(question)
355
- if unsupported:
356
- return {
357
- "status": "ok",
358
- "message": (
359
- f"{unsupported['reason']}\n\n"
360
- f"{unsupported['suggestion']}\n\n"
361
- "Example questions:\n"
362
- "• How many patients were admitted last year?\n"
363
- "• Total visits by month\n"
364
- "• Patient count by gender"
365
- ),
366
- "data": []
367
- }
368
-
369
- # -------------------------------
370
- # LLM SQL
371
- # -------------------------------
372
- sql = call_llm(build_prompt(question))
373
-
374
- if sql == "NOT_ANSWERABLE":
375
- return {
376
- "status": "ok",
377
- "message": "I don’t have enough data to answer that.",
378
- "data": []
379
- }
380
-
381
- sql = validate_sql(sanitize_sql(sql))
382
- cols, rows = run_query(sql)
383
-
384
- # -------------------------------
385
- # No data (aggregate case)
386
- # -------------------------------
387
- if is_aggregate_only_query(sql) and not has_underlying_data(sql):
388
- LAST_PROMPT_TYPE = "NO_DATA"
389
- LAST_SUGGESTED_DATE = get_latest_data_date()
390
-
391
- return {
392
- "status": "ok",
393
- "message": friendly("No data is available for that time period."),
394
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
395
- "data": [],
396
- "sql": None
397
- }
398
-
399
- # -------------------------------
400
- # Empty result set
401
- # -------------------------------
402
- if not rows:
403
- LAST_PROMPT_TYPE = "NO_DATA"
404
- LAST_SUGGESTED_DATE = get_latest_data_date()
405
-
406
- return {
407
- "status": "ok",
408
- "message": friendly("No records found."),
409
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
410
- "data": []
411
- }
412
-
413
- # -------------------------------
414
- # Successful response
415
- # -------------------------------
416
- return {
417
- "status": "ok",
418
- "sql": sql,
419
- "columns": cols,
420
- "data": rows
421
- }
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sqlite3
3
+ from openai import OpenAI
4
+ from difflib import get_close_matches
5
+ from datetime import datetime
6
+
7
+ # =========================
8
+ # SETUP
9
+ # =========================
10
+
11
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
+ conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
+
14
+ # =========================
15
+ # CONVERSATION STATE
16
+ # =========================
17
+
18
+ LAST_PROMPT_TYPE = None
19
+ LAST_SUGGESTED_DATE = None
20
+
21
+ # =========================
22
+ # HUMAN RESPONSE HELPERS
23
+ # =========================
24
+
25
+ def humanize(text):
26
+ return f"Sure 🙂\n\n{text}"
27
+
28
+ def friendly(text):
29
+ return f"{text}\n\nIf you want, I can help you explore this further 🙂"
30
+
31
+ def is_confirmation(text):
32
+ return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
33
+
34
+ def is_why_question(text):
35
+ return text.strip().lower().startswith("why")
36
+
37
+ # =========================
38
+ # SPELL CORRECTION
39
+ # =========================
40
+
41
+ KNOWN_TERMS = [
42
+ "patient", "patients", "condition", "conditions",
43
+ "encounter", "encounters", "visit", "visits",
44
+ "medication", "medications",
45
+ "admitted", "admission",
46
+ "year", "month", "last", "recent", "today"
47
+ ]
48
+
49
+ def correct_spelling(q):
50
+ words = q.split()
51
+ fixed = []
52
+ for w in words:
53
+ clean = w.lower().strip(",.?")
54
+ match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
55
+ fixed.append(match[0] if match else w)
56
+ return " ".join(fixed)
57
+
58
+ # =========================
59
+ # SCHEMA
60
+ # =========================
61
+
62
+ def load_ai_schema():
63
+ cur = conn.cursor()
64
+ schema = {}
65
+
66
+ tables = cur.execute("""
67
+ SELECT table_name, description
68
+ FROM ai_tables
69
+ WHERE ai_enabled = 1
70
+ """).fetchall()
71
+
72
+ for table, desc in tables:
73
+ cols = cur.execute("""
74
+ SELECT column_name, description
75
+ FROM ai_columns
76
+ WHERE table_name = ? AND ai_allowed = 1
77
+ """, (table,)).fetchall()
78
+
79
+ schema[table] = {
80
+ "description": desc,
81
+ "columns": cols
82
+ }
83
+
84
+ return schema
85
+
86
+ # =========================
87
+ # HUMAN SCHEMA DESCRIPTION
88
+ # =========================
89
+
90
+ def describe_schema():
91
+ schema = load_ai_schema()
92
+
93
+ response = "Here’s the data I currently have access to:\n\n"
94
+
95
+ for table, meta in schema.items():
96
+ response += f"• **{table.capitalize()}** — {meta['description']}\n"
97
+ for col, desc in meta["columns"]:
98
+ response += f" - {col}: {desc}\n"
99
+ response += "\n"
100
+
101
+ response += (
102
+ "You can ask things like:\n"
103
+ "• How many patients are there?\n"
104
+ "• Patient count by gender\n"
105
+ "• Admissions by year\n\n"
106
+ "Just tell me what you want to explore 🙂"
107
+ )
108
+
109
+ return response
110
+
111
+ # =========================
112
+ # TIME HANDLING
113
+ # =========================
114
+
115
+ def get_latest_data_date():
116
+ cur = conn.cursor()
117
+ r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
118
+ return r[0]
119
+
120
+ def normalize_time_question(q):
121
+ latest = get_latest_data_date()
122
+ if not latest:
123
+ return q
124
+
125
+ if "today" in q:
126
+ return q.replace("today", f"on {latest[:10]}")
127
+
128
+ if "yesterday" in q:
129
+ return q.replace("yesterday", f"on {latest[:10]}")
130
+
131
+ return q
132
+
133
+ # =========================
134
+ # UNSUPPORTED QUESTIONS
135
+ # =========================
136
+
137
+ def get_unsupported_reason(q):
138
+ q = q.lower()
139
+
140
+ if any(w in q for w in ["consultant", "doctor"]):
141
+ return {
142
+ "reason": "Doctor or consultant-level data is not available.",
143
+ "suggestion": "Try asking about patients, visits, or admissions."
144
+ }
145
+
146
+ if any(w in q for w in ["department", "specialization"]):
147
+ return {
148
+ "reason": "Department-level data is not stored.",
149
+ "suggestion": "Try patient or visit related questions."
150
+ }
151
+
152
+ return None
153
+
154
+ # =========================
155
+ # SQL GENERATION
156
+ # =========================
157
+
158
+ def build_prompt(question):
159
+ schema = load_ai_schema()
160
+
161
+ prompt = """
162
+ You are a hospital SQL assistant.
163
+
164
+ Rules:
165
+ - Use only SELECT
166
+ - SQLite syntax
167
+ - Use only listed tables/columns
168
+ - Return ONLY SQL or NOT_ANSWERABLE
169
+ """
170
+
171
+ for table, meta in schema.items():
172
+ prompt += f"\nTable: {table}\n"
173
+ for col, desc in meta["columns"]:
174
+ prompt += f"- {col}: {desc}\n"
175
+
176
+ prompt += f"\nQuestion: {question}\n"
177
+ return prompt
178
+
179
+ def call_llm(prompt):
180
+ res = client.chat.completions.create(
181
+ model="gpt-4.1-mini",
182
+ messages=[
183
+ {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
184
+ {"role": "user", "content": prompt}
185
+ ],
186
+ temperature=0
187
+ )
188
+ return res.choices[0].message.content.strip()
189
+
190
+ # =========================
191
+ # SQL SAFETY
192
+ # =========================
193
+
194
+ def sanitize_sql(sql):
195
+ sql = sql.replace("```", "").replace("sql", "").strip()
196
+ sql = sql.split(";")[0]
197
+ return sql.replace("\n", " ").strip()
198
+
199
+ def validate_sql(sql):
200
+ if not sql.lower().startswith("select"):
201
+ raise Exception("Only SELECT allowed")
202
+ return sql
203
+
204
+ def run_query(sql):
205
+ cur = conn.cursor()
206
+ rows = cur.execute(sql).fetchall()
207
+ cols = [c[0] for c in cur.description]
208
+ return cols, rows
209
+
210
+ # =========================
211
+ # AGGREGATE SAFETY
212
+ # =========================
213
+
214
+ def is_aggregate_only_query(sql):
215
+ s = sql.lower()
216
+ return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
217
+
218
+ def has_underlying_data(sql):
219
+ base = sql.lower()
220
+ if "from" not in base:
221
+ return False
222
+
223
+ base = base.split("from", 1)[1]
224
+ test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
225
+
226
+ cur = conn.cursor()
227
+ return cur.execute(test_sql).fetchone() is not None
228
+
229
+ def is_patient_summary_question(text):
230
+ t = text.lower()
231
+
232
+ keywords = [
233
+ "patient summary",
234
+ "patients summary",
235
+ "patient overview",
236
+ "summary of patients",
237
+ "give a patient summary",
238
+ "patient summery",
239
+ "patients summery",
240
+ "patent summary",
241
+ "patant summary",
242
+ "patient sumary",
243
+ "patients sumery"
244
+ ]
245
+
246
+ return any(k in t for k in keywords)
247
+
248
+
249
+ # =========================
250
+ # PATIENT SUMMARY
251
+ # =========================
252
+
253
+ def build_patient_summary():
254
+ cur = conn.cursor()
255
+
256
+ total = cur.execute(
257
+ "SELECT COUNT(*) FROM patients"
258
+ ).fetchone()[0]
259
+
260
+ genders = cur.execute(
261
+ "SELECT gender, COUNT(*) FROM patients GROUP BY gender"
262
+ ).fetchall()
263
+
264
+ msg = "Here’s a quick summary of patients:\n\n"
265
+ msg += f" Total patients: {total}\n"
266
+
267
+ if genders:
268
+ msg += "• Gender distribution:\n"
269
+ for g, c in genders:
270
+ msg += f" - {g}: {c}\n"
271
+
272
+ msg += (
273
+ "\nYou can also ask:\n"
274
+ "• Patients admitted by year\n"
275
+ "• Patient count by age\n"
276
+ "• Visit trends"
277
+ )
278
+
279
+ return msg
280
+
281
+
282
+ # =========================
283
+ # MAIN ENGINE
284
+ # =========================
285
+
286
+ def process_question(question):
287
+ global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
288
+
289
+ q = question.strip().lower()
290
+
291
+ # -------------------------------
292
+ # Patient summary intent
293
+ # -------------------------------
294
+ if is_patient_summary_question(q):
295
+ return {
296
+ "status": "ok",
297
+ "message": build_patient_summary(),
298
+ "data": [],
299
+ "sql": None,
300
+ "note": None
301
+ }
302
+
303
+ # -------------------------------
304
+ # WHY follow-up handling
305
+ # -------------------------------
306
+ if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
307
+ year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
308
+ return {
309
+ "status": "ok",
310
+ "message": (
311
+ f"I suggested **{year}** because that’s the most recent year "
312
+ f"for which data exists in the system.\n\n"
313
+ "Your database doesn’t contain newer records yet.\n\n"
314
+ "You can explore:\n"
315
+ "• Data from 2021\n"
316
+ "• Trends over time\n"
317
+ "• Patient summaries"
318
+ ),
319
+ "data": []
320
+ }
321
+
322
+ # -------------------------------
323
+ # YES / confirmation handling
324
+ # -------------------------------
325
+ if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
326
+ return {
327
+ "status": "ok",
328
+ "message": (
329
+ "Great 🙂\n\n"
330
+ "Here are some things you can ask:\n"
331
+ "• How many patients were admitted in 2021?\n"
332
+ "• Patient count by gender\n"
333
+ "• Total visits by month\n"
334
+ "• Most common conditions"
335
+ ),
336
+ "data": []
337
+ }
338
+
339
+ # -------------------------------
340
+ # Normalize question
341
+ # -------------------------------
342
+ question = correct_spelling(question)
343
+ question = normalize_time_question(question)
344
+
345
+ # Reset state once user asks a fresh question
346
+ LAST_PROMPT_TYPE = None
347
+ LAST_SUGGESTED_DATE = None
348
+
349
+ # -------------------------------
350
+ # Metadata queries
351
+ # -------------------------------
352
+ if any(x in question for x in ["what data", "what tables", "which data"]):
353
+ return {
354
+ "status": "ok",
355
+ "message": humanize(describe_schema()),
356
+ "data": []
357
+ }
358
+
359
+ # -------------------------------
360
+ # Unsupported questions
361
+ # -------------------------------
362
+ unsupported = get_unsupported_reason(question)
363
+ if unsupported:
364
+ return {
365
+ "status": "ok",
366
+ "message": (
367
+ f"{unsupported['reason']}\n\n"
368
+ f"{unsupported['suggestion']}\n\n"
369
+ "Example questions:\n"
370
+ "• How many patients were admitted last year?\n"
371
+ "• Total visits by month\n"
372
+ "• Patient count by gender"
373
+ ),
374
+ "data": []
375
+ }
376
+
377
+ # -------------------------------
378
+ # LLM → SQL
379
+ # -------------------------------
380
+ sql = call_llm(build_prompt(question))
381
+
382
+ if sql == "NOT_ANSWERABLE":
383
+ return {
384
+ "status": "ok",
385
+ "message": "I don’t have enough data to answer that.",
386
+ "data": []
387
+ }
388
+
389
+ sql = validate_sql(sanitize_sql(sql))
390
+ cols, rows = run_query(sql)
391
+
392
+ # -------------------------------
393
+ # No data (aggregate case)
394
+ # -------------------------------
395
+ if is_aggregate_only_query(sql) and not has_underlying_data(sql):
396
+ LAST_PROMPT_TYPE = "NO_DATA"
397
+ LAST_SUGGESTED_DATE = get_latest_data_date()
398
+
399
+ return {
400
+ "status": "ok",
401
+ "message": friendly("No data is available for that time period."),
402
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
403
+ "data": [],
404
+ "sql": None
405
+ }
406
+
407
+ # -------------------------------
408
+ # Empty result set
409
+ # -------------------------------
410
+ if not rows:
411
+ LAST_PROMPT_TYPE = "NO_DATA"
412
+ LAST_SUGGESTED_DATE = get_latest_data_date()
413
+
414
+ return {
415
+ "status": "ok",
416
+ "message": friendly("No records found."),
417
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
418
+ "data": []
419
+ }
420
+
421
+ # -------------------------------
422
+ # Successful response
423
+ # -------------------------------
424
+ return {
425
+ "status": "ok",
426
+ "sql": sql,
427
+ "columns": cols,
428
+ "data": rows
429
+ }