bhavika24 commited on
Commit
1932ae9
·
verified ·
1 Parent(s): 52c5aa6

Upload engine.py

Browse files
Files changed (1) hide show
  1. engine.py +429 -471
engine.py CHANGED
@@ -1,471 +1,429 @@
1
- import os
2
- import sqlite3
3
- from openai import OpenAI
4
- from difflib import get_close_matches
5
- from datetime import datetime
6
-
7
- # =========================
8
- # SETUP
9
- # =========================
10
-
11
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
- conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
-
14
- # =========================
15
- # CONVERSATION STATE
16
- # =========================
17
-
18
- LAST_PROMPT_TYPE = None
19
- LAST_SUGGESTED_DATE = None
20
-
21
-
22
-
23
- # =========================
24
- # HUMAN RESPONSE HELPERS
25
- # =========================
26
-
27
- def humanize(text):
28
- return f"Sure 🙂\n\n{text}"
29
-
30
- def friendly(text):
31
- return f"{text}\n\nIf you want, I can help you explore this further 🙂"
32
-
33
- def is_confirmation(text):
34
- return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
35
-
36
- def is_why_question(text):
37
- return text.strip().lower().startswith("why")
38
-
39
- # =========================
40
- # SPELL CORRECTION
41
- # =========================
42
-
43
- KNOWN_TERMS = [
44
- "patient", "patients", "condition", "conditions",
45
- "encounter", "encounters", "visit", "visits",
46
- "medication", "medications",
47
- "admitted", "admission",
48
- "year", "month", "last", "recent", "today"
49
- ]
50
-
51
- def correct_spelling(q):
52
- words = q.split()
53
- fixed = []
54
- for w in words:
55
- clean = w.lower().strip(",.?")
56
- match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
57
- fixed.append(match[0] if match else w)
58
- return " ".join(fixed)
59
-
60
-
61
-
62
- # =========================
63
- # SCHEMA
64
- # =========================
65
- from functools import lru_cache
66
-
67
- @lru_cache(maxsize=1)
68
- def load_ai_schema():
69
- cur = conn.cursor()
70
- schema = {}
71
-
72
- tables = cur.execute("""
73
- SELECT table_name, description
74
- FROM ai_tables
75
- WHERE ai_enabled = 1
76
- """).fetchall()
77
-
78
- for table, desc in tables:
79
- cols = cur.execute("""
80
- SELECT column_name, description
81
- FROM ai_columns
82
- WHERE table_name = ? AND ai_allowed = 1
83
- """, (table,)).fetchall()
84
-
85
- schema[table] = {
86
- "description": desc,
87
- "columns": cols
88
- }
89
-
90
- return schema
91
-
92
- # =========================
93
- # TABLE MATCHING (CORE LOGIC)
94
- # =========================
95
-
96
- def extract_relevant_tables(question):
97
- schema = load_ai_schema()
98
- q = question.lower()
99
-
100
- matched = []
101
-
102
- for table, meta in schema.items():
103
- # match table name
104
- if table.lower() in q:
105
- matched.append(table)
106
- continue
107
-
108
- # match column names
109
- for col, _ in meta["columns"]:
110
- if col.lower() in q:
111
- matched.append(table)
112
- break
113
-
114
- return list(set(matched))
115
-
116
-
117
- # =========================
118
- # HUMAN SCHEMA DESCRIPTION
119
- # =========================
120
-
121
- def describe_schema():
122
- schema = load_ai_schema()
123
-
124
- response = "Here’s the data I currently have access to:\n\n"
125
-
126
- for table, meta in schema.items():
127
- response += f"• **{table.capitalize()}** — {meta['description']}\n"
128
- for col, desc in meta["columns"]:
129
- response += f" - {col}: {desc}\n"
130
- response += "\n"
131
-
132
- response += (
133
- "You can ask things like:\n"
134
- "• How many patients are there?\n"
135
- "• Patient count by gender\n"
136
- "• Admissions by year\n\n"
137
- "Just tell me what you want to explore 🙂"
138
- )
139
-
140
- return response
141
-
142
- # =========================
143
- # TIME HANDLING
144
- # =========================
145
-
146
- def get_latest_data_date():
147
- cur = conn.cursor()
148
- r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
149
- return r[0]
150
-
151
- def normalize_time_question(q):
152
- latest = get_latest_data_date()
153
- if not latest:
154
- return q
155
-
156
- if "today" in q:
157
- return q.replace("today", f"on {latest[:10]}")
158
-
159
- if "yesterday" in q:
160
- return q.replace("yesterday", f"on {latest[:10]}")
161
-
162
- return q
163
-
164
- # =========================
165
- # UNSUPPORTED QUESTIONS
166
- # =========================
167
-
168
- def is_question_supported(question):
169
- schema = load_ai_schema()
170
- q = question.lower()
171
-
172
- for table, meta in schema.items():
173
- if table in q:
174
- return True
175
- for col, _ in meta["columns"]:
176
- if col in q:
177
- return True
178
-
179
- return False
180
-
181
-
182
- # =========================
183
- # SQL GENERATION
184
- # =========================
185
-
186
- def build_prompt(question):
187
- schema = load_ai_schema()
188
-
189
- prompt = """
190
- You are a hospital SQL assistant.
191
-
192
- Rules:
193
- - Use only SELECT
194
- - SQLite syntax
195
- - Use only listed tables/columns
196
- - Return ONLY SQL or NOT_ANSWERABLE
197
- """
198
-
199
- for table, meta in schema.items():
200
- prompt += f"\nTable: {table}\n"
201
- for col, desc in meta["columns"]:
202
- prompt += f"- {col}: {desc}\n"
203
-
204
- prompt += f"\nQuestion: {question}\n"
205
- return prompt
206
-
207
- def call_llm(prompt):
208
- res = client.chat.completions.create(
209
- model="gpt-4.1-mini",
210
- messages=[
211
- {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
212
- {"role": "user", "content": prompt}
213
- ],
214
- temperature=0
215
- )
216
- return res.choices[0].message.content.strip()
217
-
218
- # =========================
219
- # SQL SAFETY
220
- # =========================
221
-
222
- def sanitize_sql(sql):
223
- sql = sql.replace("```", "").replace("sql", "").strip()
224
- sql = sql.split(";")[0]
225
- return sql.replace("\n", " ").strip()
226
-
227
- def validate_sql(sql):
228
- if not sql.lower().startswith("select"):
229
- raise Exception("Only SELECT allowed")
230
- return sql
231
-
232
- def run_query(sql):
233
- cur = conn.cursor()
234
- rows = cur.execute(sql).fetchall()
235
- cols = [c[0] for c in cur.description]
236
- return cols, rows
237
-
238
- # =========================
239
- # AGGREGATE SAFETY
240
- # =========================
241
-
242
- def is_aggregate_only_query(sql):
243
- s = sql.lower()
244
- return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
245
-
246
- def has_underlying_data(sql):
247
- base = sql.lower()
248
- if "from" not in base:
249
- return False
250
-
251
- base = base.split("from", 1)[1]
252
- test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
253
-
254
- cur = conn.cursor()
255
- return cur.execute(test_sql).fetchone() is not None
256
-
257
-
258
-
259
-
260
- # =========================
261
- # PATIENT SUMMARY
262
- # =========================
263
-
264
- def build_table_summary(table_name):
265
- cur = conn.cursor()
266
-
267
- # Total rows
268
- total = cur.execute(
269
- f"SELECT COUNT(*) FROM {table_name}"
270
- ).fetchone()[0]
271
-
272
- # Get column info
273
- columns = cur.execute(
274
- f"PRAGMA table_info({table_name})"
275
- ).fetchall()
276
-
277
- summary = f"Here’s a summary of **{table_name}**:\n\n"
278
- summary += f"• Total records: {total}\n"
279
-
280
- # Try to summarize categorical columns
281
- for col in columns:
282
- col_name = col[1]
283
- col_type = col[2].lower()
284
-
285
- if col_type in ("text", "varchar"):
286
- try:
287
- rows = cur.execute(
288
- f"""
289
- SELECT {col_name}, COUNT(*)
290
- FROM {table_name}
291
- GROUP BY {col_name}
292
- ORDER BY COUNT(*) DESC
293
- LIMIT 5
294
- """
295
- ).fetchall()
296
-
297
- if rows:
298
- summary += f"\n• {col_name.capitalize()} breakdown:\n"
299
- for val, count in rows:
300
- summary += f" - {val}: {count}\n"
301
- except:
302
- pass # ignore columns that can't be grouped
303
-
304
- summary += "\nYou can ask more detailed questions about this data."
305
-
306
- return summary
307
-
308
-
309
-
310
- # =========================
311
- # MAIN ENGINE
312
- # =========================
313
-
314
- def process_question(question):
315
- global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
316
-
317
- q = question.strip().lower()
318
-
319
- # -------------------------------
320
- # Patient summary intent
321
- # -------------------------------
322
- matched_tables = extract_relevant_tables(q)
323
- if len(matched_tables) == 1:
324
- return {
325
- "status": "ok",
326
- "message": build_table_summary(matched_tables[0]),
327
- "data": []
328
- }
329
- elif len(matched_tables) > 1:
330
- return {
331
- "status": "ok",
332
- "message": (
333
- "Your question matches multiple datasets:\n"
334
- + "\n".join(f"- {t}" for t in matched_tables)
335
- + "\n\nPlease be more specific."
336
- ),
337
- "data": []
338
- }
339
-
340
-
341
-
342
-
343
- # -------------------------------
344
- # WHY follow-up handling
345
- # -------------------------------
346
- if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
347
- year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
348
- return {
349
- "status": "ok",
350
- "message": (
351
- f"I suggested **{year}** because that’s the most recent year "
352
- f"for which data exists in the system.\n\n"
353
- "Your database doesn’t contain newer records yet.\n\n"
354
- "You can explore:\n"
355
- " Data from 2021\n"
356
- " Trends over time\n"
357
- "• Patient summaries"
358
- ),
359
- "data": []
360
- }
361
-
362
- # -------------------------------
363
- # YES / confirmation handling
364
- # -------------------------------
365
- if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
366
- return {
367
- "status": "ok",
368
- "message": (
369
- "Great 🙂\n\n"
370
- "Here are some things you can ask:\n"
371
- "• How many patients were admitted in 2021?\n"
372
- "• Patient count by gender\n"
373
- "• Total visits by month\n"
374
- " Most common conditions"
375
- ),
376
- "data": []
377
- }
378
-
379
- # -------------------------------
380
- # Normalize question
381
- # -------------------------------
382
- question = correct_spelling(question)
383
- question = normalize_time_question(question)
384
-
385
- # Reset state once user asks a fresh question
386
- LAST_PROMPT_TYPE = None
387
- LAST_SUGGESTED_DATE = None
388
-
389
- # -------------------------------
390
- # Metadata queries
391
- # -------------------------------
392
- if any(x in question for x in ["what data", "what tables", "which data"]):
393
- return {
394
- "status": "ok",
395
- "message": humanize(describe_schema()),
396
- "data": []
397
- }
398
-
399
- # -------------------------------
400
- # # Unsupported questions (metadata-based)
401
- # # -------------------------------
402
- if not is_question_supported(question):
403
- return {
404
- "status": "ok",
405
- "message": (
406
- "That information isn’t available in the system.\n\n"
407
- "You can ask about:\n"
408
- "• Patients\n"
409
- "• Visits\n"
410
- "• Conditions\n"
411
- "• Medications"
412
- ),
413
- "data": [],
414
- "sql": None,
415
- "note": None
416
- }
417
-
418
-
419
- # -------------------------------
420
- # LLM → SQL
421
- # -------------------------------
422
- sql = call_llm(build_prompt(question))
423
-
424
- if sql == "NOT_ANSWERABLE":
425
- return {
426
- "status": "ok",
427
- "message": "I don’t have enough data to answer that.",
428
- "data": []
429
- }
430
-
431
- sql = validate_sql(sanitize_sql(sql))
432
- cols, rows = run_query(sql)
433
-
434
- # -------------------------------
435
- # No data (aggregate case)
436
- # -------------------------------
437
- if is_aggregate_only_query(sql) and not has_underlying_data(sql):
438
- LAST_PROMPT_TYPE = "NO_DATA"
439
- LAST_SUGGESTED_DATE = get_latest_data_date()
440
-
441
- return {
442
- "status": "ok",
443
- "message": friendly("No data is available for that time period."),
444
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
445
- "data": [],
446
- "sql": None
447
- }
448
-
449
- # -------------------------------
450
- # Empty result set
451
- # -------------------------------
452
- if not rows:
453
- LAST_PROMPT_TYPE = "NO_DATA"
454
- LAST_SUGGESTED_DATE = get_latest_data_date()
455
-
456
- return {
457
- "status": "ok",
458
- "message": friendly("No records found."),
459
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
460
- "data": []
461
- }
462
-
463
- # -------------------------------
464
- # Successful response
465
- # -------------------------------
466
- return {
467
- "status": "ok",
468
- "sql": sql,
469
- "columns": cols,
470
- "data": rows
471
- }
 
1
+ import os
2
+ import sqlite3
3
+ from openai import OpenAI
4
+ from difflib import get_close_matches
5
+ from datetime import datetime
6
+
7
+ # =========================
8
+ # SETUP
9
+ # =========================
10
+
11
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
+ conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
+
14
+ # =========================
15
+ # CONVERSATION STATE
16
+ # =========================
17
+
18
+ LAST_PROMPT_TYPE = None
19
+ LAST_SUGGESTED_DATE = None
20
+
21
+ # =========================
22
+ # HUMAN RESPONSE HELPERS
23
+ # =========================
24
+
25
+ def humanize(text):
26
+ return f"Sure 🙂\n\n{text}"
27
+
28
+ def friendly(text):
29
+ return f"{text}\n\nIf you want, I can help you explore this further 🙂"
30
+
31
+ def is_confirmation(text):
32
+ return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
33
+
34
+ def is_why_question(text):
35
+ return text.strip().lower().startswith("why")
36
+
37
+ # =========================
38
+ # SPELL CORRECTION
39
+ # =========================
40
+
41
+ KNOWN_TERMS = [
42
+ "patient", "patients", "condition", "conditions",
43
+ "encounter", "encounters", "visit", "visits",
44
+ "medication", "medications",
45
+ "admitted", "admission",
46
+ "year", "month", "last", "recent", "today"
47
+ ]
48
+
49
+ def correct_spelling(q):
50
+ words = q.split()
51
+ fixed = []
52
+ for w in words:
53
+ clean = w.lower().strip(",.?")
54
+ match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
55
+ fixed.append(match[0] if match else w)
56
+ return " ".join(fixed)
57
+
58
+ # =========================
59
+ # SCHEMA
60
+ # =========================
61
+
62
+ def load_ai_schema():
63
+ cur = conn.cursor()
64
+ schema = {}
65
+
66
+ tables = cur.execute("""
67
+ SELECT table_name, description
68
+ FROM ai_tables
69
+ WHERE ai_enabled = 1
70
+ """).fetchall()
71
+
72
+ for table, desc in tables:
73
+ cols = cur.execute("""
74
+ SELECT column_name, description
75
+ FROM ai_columns
76
+ WHERE table_name = ? AND ai_allowed = 1
77
+ """, (table,)).fetchall()
78
+
79
+ schema[table] = {
80
+ "description": desc,
81
+ "columns": cols
82
+ }
83
+
84
+ return schema
85
+
86
+ # =========================
87
+ # HUMAN SCHEMA DESCRIPTION
88
+ # =========================
89
+
90
+ def describe_schema():
91
+ schema = load_ai_schema()
92
+
93
+ response = "Here’s the data I currently have access to:\n\n"
94
+
95
+ for table, meta in schema.items():
96
+ response += f"• **{table.capitalize()}** — {meta['description']}\n"
97
+ for col, desc in meta["columns"]:
98
+ response += f" - {col}: {desc}\n"
99
+ response += "\n"
100
+
101
+ response += (
102
+ "You can ask things like:\n"
103
+ "• How many patients are there?\n"
104
+ "• Patient count by gender\n"
105
+ "• Admissions by year\n\n"
106
+ "Just tell me what you want to explore 🙂"
107
+ )
108
+
109
+ return response
110
+
111
+ # =========================
112
+ # TIME HANDLING
113
+ # =========================
114
+
115
+ def get_latest_data_date():
116
+ cur = conn.cursor()
117
+ r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
118
+ return r[0]
119
+
120
+ def normalize_time_question(q):
121
+ latest = get_latest_data_date()
122
+ if not latest:
123
+ return q
124
+
125
+ if "today" in q:
126
+ return q.replace("today", f"on {latest[:10]}")
127
+
128
+ if "yesterday" in q:
129
+ return q.replace("yesterday", f"on {latest[:10]}")
130
+
131
+ return q
132
+
133
+ # =========================
134
+ # UNSUPPORTED QUESTIONS
135
+ # =========================
136
+
137
+ def get_unsupported_reason(q):
138
+ q = q.lower()
139
+
140
+ if any(w in q for w in ["consultant", "doctor"]):
141
+ return {
142
+ "reason": "Doctor or consultant-level data is not available.",
143
+ "suggestion": "Try asking about patients, visits, or admissions."
144
+ }
145
+
146
+ if any(w in q for w in ["department", "specialization"]):
147
+ return {
148
+ "reason": "Department-level data is not stored.",
149
+ "suggestion": "Try patient or visit related questions."
150
+ }
151
+
152
+ return None
153
+
154
+ # =========================
155
+ # SQL GENERATION
156
+ # =========================
157
+
158
+ def build_prompt(question):
159
+ schema = load_ai_schema()
160
+
161
+ prompt = """
162
+ You are a hospital SQL assistant.
163
+
164
+ Rules:
165
+ - Use only SELECT
166
+ - SQLite syntax
167
+ - Use only listed tables/columns
168
+ - Return ONLY SQL or NOT_ANSWERABLE
169
+ """
170
+
171
+ for table, meta in schema.items():
172
+ prompt += f"\nTable: {table}\n"
173
+ for col, desc in meta["columns"]:
174
+ prompt += f"- {col}: {desc}\n"
175
+
176
+ prompt += f"\nQuestion: {question}\n"
177
+ return prompt
178
+
179
+ def call_llm(prompt):
180
+ res = client.chat.completions.create(
181
+ model="gpt-4.1-mini",
182
+ messages=[
183
+ {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
184
+ {"role": "user", "content": prompt}
185
+ ],
186
+ temperature=0
187
+ )
188
+ return res.choices[0].message.content.strip()
189
+
190
+ # =========================
191
+ # SQL SAFETY
192
+ # =========================
193
+
194
+ def sanitize_sql(sql):
195
+ sql = sql.replace("```", "").replace("sql", "").strip()
196
+ sql = sql.split(";")[0]
197
+ return sql.replace("\n", " ").strip()
198
+
199
+ def validate_sql(sql):
200
+ if not sql.lower().startswith("select"):
201
+ raise Exception("Only SELECT allowed")
202
+ return sql
203
+
204
+ def run_query(sql):
205
+ cur = conn.cursor()
206
+ rows = cur.execute(sql).fetchall()
207
+ cols = [c[0] for c in cur.description]
208
+ return cols, rows
209
+
210
+ # =========================
211
+ # AGGREGATE SAFETY
212
+ # =========================
213
+
214
+ def is_aggregate_only_query(sql):
215
+ s = sql.lower()
216
+ return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
217
+
218
+ def has_underlying_data(sql):
219
+ base = sql.lower()
220
+ if "from" not in base:
221
+ return False
222
+
223
+ base = base.split("from", 1)[1]
224
+ test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
225
+
226
+ cur = conn.cursor()
227
+ return cur.execute(test_sql).fetchone() is not None
228
+
229
+ def is_patient_summary_question(text):
230
+ t = text.lower()
231
+
232
+ keywords = [
233
+ "patient summary",
234
+ "patients summary",
235
+ "patient overview",
236
+ "summary of patients",
237
+ "give a patient summary",
238
+ "patient summery",
239
+ "patients summery",
240
+ "patent summary",
241
+ "patant summary",
242
+ "patient sumary",
243
+ "patients sumery"
244
+ ]
245
+
246
+ return any(k in t for k in keywords)
247
+
248
+
249
+ # =========================
250
+ # PATIENT SUMMARY
251
+ # =========================
252
+
253
+ def build_patient_summary():
254
+ cur = conn.cursor()
255
+
256
+ total = cur.execute(
257
+ "SELECT COUNT(*) FROM patients"
258
+ ).fetchone()[0]
259
+
260
+ genders = cur.execute(
261
+ "SELECT gender, COUNT(*) FROM patients GROUP BY gender"
262
+ ).fetchall()
263
+
264
+ msg = "Here’s a quick summary of patients:\n\n"
265
+ msg += f"• Total patients: {total}\n"
266
+
267
+ if genders:
268
+ msg += "• Gender distribution:\n"
269
+ for g, c in genders:
270
+ msg += f" - {g}: {c}\n"
271
+
272
+ msg += (
273
+ "\nYou can also ask:\n"
274
+ " Patients admitted by year\n"
275
+ "• Patient count by age\n"
276
+ "• Visit trends"
277
+ )
278
+
279
+ return msg
280
+
281
+
282
+ # =========================
283
+ # MAIN ENGINE
284
+ # =========================
285
+
286
+ def process_question(question):
287
+ global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
288
+
289
+ q = question.strip().lower()
290
+
291
+ # -------------------------------
292
+ # Patient summary intent
293
+ # -------------------------------
294
+ if is_patient_summary_question(q):
295
+ return {
296
+ "status": "ok",
297
+ "message": build_patient_summary(),
298
+ "data": [],
299
+ "sql": None,
300
+ "note": None
301
+ }
302
+
303
+ # -------------------------------
304
+ # WHY follow-up handling
305
+ # -------------------------------
306
+ if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
307
+ year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
308
+ return {
309
+ "status": "ok",
310
+ "message": (
311
+ f"I suggested **{year}** because that’s the most recent year "
312
+ f"for which data exists in the system.\n\n"
313
+ "Your database doesn’t contain newer records yet.\n\n"
314
+ "You can explore:\n"
315
+ "• Data from 2021\n"
316
+ "• Trends over time\n"
317
+ "• Patient summaries"
318
+ ),
319
+ "data": []
320
+ }
321
+
322
+ # -------------------------------
323
+ # YES / confirmation handling
324
+ # -------------------------------
325
+ if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
326
+ return {
327
+ "status": "ok",
328
+ "message": (
329
+ "Great 🙂\n\n"
330
+ "Here are some things you can ask:\n"
331
+ " How many patients were admitted in 2021?\n"
332
+ "• Patient count by gender\n"
333
+ " Total visits by month\n"
334
+ " Most common conditions"
335
+ ),
336
+ "data": []
337
+ }
338
+
339
+ # -------------------------------
340
+ # Normalize question
341
+ # -------------------------------
342
+ question = correct_spelling(question)
343
+ question = normalize_time_question(question)
344
+
345
+ # Reset state once user asks a fresh question
346
+ LAST_PROMPT_TYPE = None
347
+ LAST_SUGGESTED_DATE = None
348
+
349
+ # -------------------------------
350
+ # Metadata queries
351
+ # -------------------------------
352
+ if any(x in question for x in ["what data", "what tables", "which data"]):
353
+ return {
354
+ "status": "ok",
355
+ "message": humanize(describe_schema()),
356
+ "data": []
357
+ }
358
+
359
+ # -------------------------------
360
+ # Unsupported questions
361
+ # -------------------------------
362
+ unsupported = get_unsupported_reason(question)
363
+ if unsupported:
364
+ return {
365
+ "status": "ok",
366
+ "message": (
367
+ f"{unsupported['reason']}\n\n"
368
+ f"{unsupported['suggestion']}\n\n"
369
+ "Example questions:\n"
370
+ " How many patients were admitted last year?\n"
371
+ "• Total visits by month\n"
372
+ "• Patient count by gender"
373
+ ),
374
+ "data": []
375
+ }
376
+
377
+ # -------------------------------
378
+ # LLM → SQL
379
+ # -------------------------------
380
+ sql = call_llm(build_prompt(question))
381
+
382
+ if sql == "NOT_ANSWERABLE":
383
+ return {
384
+ "status": "ok",
385
+ "message": "I don’t have enough data to answer that.",
386
+ "data": []
387
+ }
388
+
389
+ sql = validate_sql(sanitize_sql(sql))
390
+ cols, rows = run_query(sql)
391
+
392
+ # -------------------------------
393
+ # No data (aggregate case)
394
+ # -------------------------------
395
+ if is_aggregate_only_query(sql) and not has_underlying_data(sql):
396
+ LAST_PROMPT_TYPE = "NO_DATA"
397
+ LAST_SUGGESTED_DATE = get_latest_data_date()
398
+
399
+ return {
400
+ "status": "ok",
401
+ "message": friendly("No data is available for that time period."),
402
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
403
+ "data": [],
404
+ "sql": None
405
+ }
406
+
407
+ # -------------------------------
408
+ # Empty result set
409
+ # -------------------------------
410
+ if not rows:
411
+ LAST_PROMPT_TYPE = "NO_DATA"
412
+ LAST_SUGGESTED_DATE = get_latest_data_date()
413
+
414
+ return {
415
+ "status": "ok",
416
+ "message": friendly("No records found."),
417
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
418
+ "data": []
419
+ }
420
+
421
+ # -------------------------------
422
+ # Successful response
423
+ # -------------------------------
424
+ return {
425
+ "status": "ok",
426
+ "sql": sql,
427
+ "columns": cols,
428
+ "data": rows
429
+ }