bhavika24 commited on
Commit
52c5aa6
·
verified ·
1 Parent(s): 29a0e19

Upload engine.py

Browse files
Files changed (1) hide show
  1. engine.py +471 -429
engine.py CHANGED
@@ -1,429 +1,471 @@
1
- import os
2
- import sqlite3
3
- from openai import OpenAI
4
- from difflib import get_close_matches
5
- from datetime import datetime
6
-
7
- # =========================
8
- # SETUP
9
- # =========================
10
-
11
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
- conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
-
14
- # =========================
15
- # CONVERSATION STATE
16
- # =========================
17
-
18
- LAST_PROMPT_TYPE = None
19
- LAST_SUGGESTED_DATE = None
20
-
21
- # =========================
22
- # HUMAN RESPONSE HELPERS
23
- # =========================
24
-
25
- def humanize(text):
26
- return f"Sure 🙂\n\n{text}"
27
-
28
- def friendly(text):
29
- return f"{text}\n\nIf you want, I can help you explore this further 🙂"
30
-
31
- def is_confirmation(text):
32
- return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
33
-
34
- def is_why_question(text):
35
- return text.strip().lower().startswith("why")
36
-
37
- # =========================
38
- # SPELL CORRECTION
39
- # =========================
40
-
41
- KNOWN_TERMS = [
42
- "patient", "patients", "condition", "conditions",
43
- "encounter", "encounters", "visit", "visits",
44
- "medication", "medications",
45
- "admitted", "admission",
46
- "year", "month", "last", "recent", "today"
47
- ]
48
-
49
- def correct_spelling(q):
50
- words = q.split()
51
- fixed = []
52
- for w in words:
53
- clean = w.lower().strip(",.?")
54
- match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
55
- fixed.append(match[0] if match else w)
56
- return " ".join(fixed)
57
-
58
- # =========================
59
- # SCHEMA
60
- # =========================
61
-
62
- def load_ai_schema():
63
- cur = conn.cursor()
64
- schema = {}
65
-
66
- tables = cur.execute("""
67
- SELECT table_name, description
68
- FROM ai_tables
69
- WHERE ai_enabled = 1
70
- """).fetchall()
71
-
72
- for table, desc in tables:
73
- cols = cur.execute("""
74
- SELECT column_name, description
75
- FROM ai_columns
76
- WHERE table_name = ? AND ai_allowed = 1
77
- """, (table,)).fetchall()
78
-
79
- schema[table] = {
80
- "description": desc,
81
- "columns": cols
82
- }
83
-
84
- return schema
85
-
86
- # =========================
87
- # HUMAN SCHEMA DESCRIPTION
88
- # =========================
89
-
90
- def describe_schema():
91
- schema = load_ai_schema()
92
-
93
- response = "Here’s the data I currently have access to:\n\n"
94
-
95
- for table, meta in schema.items():
96
- response += f"• **{table.capitalize()}** — {meta['description']}\n"
97
- for col, desc in meta["columns"]:
98
- response += f" - {col}: {desc}\n"
99
- response += "\n"
100
-
101
- response += (
102
- "You can ask things like:\n"
103
- "• How many patients are there?\n"
104
- "• Patient count by gender\n"
105
- "• Admissions by year\n\n"
106
- "Just tell me what you want to explore 🙂"
107
- )
108
-
109
- return response
110
-
111
- # =========================
112
- # TIME HANDLING
113
- # =========================
114
-
115
- def get_latest_data_date():
116
- cur = conn.cursor()
117
- r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
118
- return r[0]
119
-
120
- def normalize_time_question(q):
121
- latest = get_latest_data_date()
122
- if not latest:
123
- return q
124
-
125
- if "today" in q:
126
- return q.replace("today", f"on {latest[:10]}")
127
-
128
- if "yesterday" in q:
129
- return q.replace("yesterday", f"on {latest[:10]}")
130
-
131
- return q
132
-
133
- # =========================
134
- # UNSUPPORTED QUESTIONS
135
- # =========================
136
-
137
- def get_unsupported_reason(q):
138
- q = q.lower()
139
-
140
- if any(w in q for w in ["consultant", "doctor"]):
141
- return {
142
- "reason": "Doctor or consultant-level data is not available.",
143
- "suggestion": "Try asking about patients, visits, or admissions."
144
- }
145
-
146
- if any(w in q for w in ["department", "specialization"]):
147
- return {
148
- "reason": "Department-level data is not stored.",
149
- "suggestion": "Try patient or visit related questions."
150
- }
151
-
152
- return None
153
-
154
- # =========================
155
- # SQL GENERATION
156
- # =========================
157
-
158
- def build_prompt(question):
159
- schema = load_ai_schema()
160
-
161
- prompt = """
162
- You are a hospital SQL assistant.
163
-
164
- Rules:
165
- - Use only SELECT
166
- - SQLite syntax
167
- - Use only listed tables/columns
168
- - Return ONLY SQL or NOT_ANSWERABLE
169
- """
170
-
171
- for table, meta in schema.items():
172
- prompt += f"\nTable: {table}\n"
173
- for col, desc in meta["columns"]:
174
- prompt += f"- {col}: {desc}\n"
175
-
176
- prompt += f"\nQuestion: {question}\n"
177
- return prompt
178
-
179
- def call_llm(prompt):
180
- res = client.chat.completions.create(
181
- model="gpt-4.1-mini",
182
- messages=[
183
- {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
184
- {"role": "user", "content": prompt}
185
- ],
186
- temperature=0
187
- )
188
- return res.choices[0].message.content.strip()
189
-
190
- # =========================
191
- # SQL SAFETY
192
- # =========================
193
-
194
- def sanitize_sql(sql):
195
- sql = sql.replace("```", "").replace("sql", "").strip()
196
- sql = sql.split(";")[0]
197
- return sql.replace("\n", " ").strip()
198
-
199
- def validate_sql(sql):
200
- if not sql.lower().startswith("select"):
201
- raise Exception("Only SELECT allowed")
202
- return sql
203
-
204
- def run_query(sql):
205
- cur = conn.cursor()
206
- rows = cur.execute(sql).fetchall()
207
- cols = [c[0] for c in cur.description]
208
- return cols, rows
209
-
210
- # =========================
211
- # AGGREGATE SAFETY
212
- # =========================
213
-
214
- def is_aggregate_only_query(sql):
215
- s = sql.lower()
216
- return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
217
-
218
- def has_underlying_data(sql):
219
- base = sql.lower()
220
- if "from" not in base:
221
- return False
222
-
223
- base = base.split("from", 1)[1]
224
- test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
225
-
226
- cur = conn.cursor()
227
- return cur.execute(test_sql).fetchone() is not None
228
-
229
- def is_patient_summary_question(text):
230
- t = text.lower()
231
-
232
- keywords = [
233
- "patient summary",
234
- "patients summary",
235
- "patient overview",
236
- "summary of patients",
237
- "give a patient summary",
238
- "patient summery",
239
- "patients summery",
240
- "patent summary",
241
- "patant summary",
242
- "patient sumary",
243
- "patients sumery"
244
- ]
245
-
246
- return any(k in t for k in keywords)
247
-
248
-
249
- # =========================
250
- # PATIENT SUMMARY
251
- # =========================
252
-
253
- def build_patient_summary():
254
- cur = conn.cursor()
255
-
256
- total = cur.execute(
257
- "SELECT COUNT(*) FROM patients"
258
- ).fetchone()[0]
259
-
260
- genders = cur.execute(
261
- "SELECT gender, COUNT(*) FROM patients GROUP BY gender"
262
- ).fetchall()
263
-
264
- msg = "Here’s a quick summary of patients:\n\n"
265
- msg += f"• Total patients: {total}\n"
266
-
267
- if genders:
268
- msg += "• Gender distribution:\n"
269
- for g, c in genders:
270
- msg += f" - {g}: {c}\n"
271
-
272
- msg += (
273
- "\nYou can also ask:\n"
274
- " Patients admitted by year\n"
275
- "• Patient count by age\n"
276
- "• Visit trends"
277
- )
278
-
279
- return msg
280
-
281
-
282
- # =========================
283
- # MAIN ENGINE
284
- # =========================
285
-
286
- def process_question(question):
287
- global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
288
-
289
- q = question.strip().lower()
290
-
291
- # -------------------------------
292
- # Patient summary intent
293
- # -------------------------------
294
- if is_patient_summary_question(q):
295
- return {
296
- "status": "ok",
297
- "message": build_patient_summary(),
298
- "data": [],
299
- "sql": None,
300
- "note": None
301
- }
302
-
303
- # -------------------------------
304
- # WHY follow-up handling
305
- # -------------------------------
306
- if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
307
- year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
308
- return {
309
- "status": "ok",
310
- "message": (
311
- f"I suggested **{year}** because that’s the most recent year "
312
- f"for which data exists in the system.\n\n"
313
- "Your database doesn’t contain newer records yet.\n\n"
314
- "You can explore:\n"
315
- "• Data from 2021\n"
316
- "• Trends over time\n"
317
- "• Patient summaries"
318
- ),
319
- "data": []
320
- }
321
-
322
- # -------------------------------
323
- # YES / confirmation handling
324
- # -------------------------------
325
- if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
326
- return {
327
- "status": "ok",
328
- "message": (
329
- "Great 🙂\n\n"
330
- "Here are some things you can ask:\n"
331
- " How many patients were admitted in 2021?\n"
332
- " Patient count by gender\n"
333
- " Total visits by month\n"
334
- " Most common conditions"
335
- ),
336
- "data": []
337
- }
338
-
339
- # -------------------------------
340
- # Normalize question
341
- # -------------------------------
342
- question = correct_spelling(question)
343
- question = normalize_time_question(question)
344
-
345
- # Reset state once user asks a fresh question
346
- LAST_PROMPT_TYPE = None
347
- LAST_SUGGESTED_DATE = None
348
-
349
- # -------------------------------
350
- # Metadata queries
351
- # -------------------------------
352
- if any(x in question for x in ["what data", "what tables", "which data"]):
353
- return {
354
- "status": "ok",
355
- "message": humanize(describe_schema()),
356
- "data": []
357
- }
358
-
359
- # -------------------------------
360
- # Unsupported questions
361
- # -------------------------------
362
- unsupported = get_unsupported_reason(question)
363
- if unsupported:
364
- return {
365
- "status": "ok",
366
- "message": (
367
- f"{unsupported['reason']}\n\n"
368
- f"{unsupported['suggestion']}\n\n"
369
- "Example questions:\n"
370
- " How many patients were admitted last year?\n"
371
- "• Total visits by month\n"
372
- "• Patient count by gender"
373
- ),
374
- "data": []
375
- }
376
-
377
- # -------------------------------
378
- # LLM → SQL
379
- # -------------------------------
380
- sql = call_llm(build_prompt(question))
381
-
382
- if sql == "NOT_ANSWERABLE":
383
- return {
384
- "status": "ok",
385
- "message": "I don’t have enough data to answer that.",
386
- "data": []
387
- }
388
-
389
- sql = validate_sql(sanitize_sql(sql))
390
- cols, rows = run_query(sql)
391
-
392
- # -------------------------------
393
- # No data (aggregate case)
394
- # -------------------------------
395
- if is_aggregate_only_query(sql) and not has_underlying_data(sql):
396
- LAST_PROMPT_TYPE = "NO_DATA"
397
- LAST_SUGGESTED_DATE = get_latest_data_date()
398
-
399
- return {
400
- "status": "ok",
401
- "message": friendly("No data is available for that time period."),
402
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
403
- "data": [],
404
- "sql": None
405
- }
406
-
407
- # -------------------------------
408
- # Empty result set
409
- # -------------------------------
410
- if not rows:
411
- LAST_PROMPT_TYPE = "NO_DATA"
412
- LAST_SUGGESTED_DATE = get_latest_data_date()
413
-
414
- return {
415
- "status": "ok",
416
- "message": friendly("No records found."),
417
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
418
- "data": []
419
- }
420
-
421
- # -------------------------------
422
- # Successful response
423
- # -------------------------------
424
- return {
425
- "status": "ok",
426
- "sql": sql,
427
- "columns": cols,
428
- "data": rows
429
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sqlite3
3
+ from openai import OpenAI
4
+ from difflib import get_close_matches
5
+ from datetime import datetime
6
+
7
+ # =========================
8
+ # SETUP
9
+ # =========================
10
+
11
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
+ conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
+
14
+ # =========================
15
+ # CONVERSATION STATE
16
+ # =========================
17
+
18
+ LAST_PROMPT_TYPE = None
19
+ LAST_SUGGESTED_DATE = None
20
+
21
+
22
+
23
+ # =========================
24
+ # HUMAN RESPONSE HELPERS
25
+ # =========================
26
+
27
+ def humanize(text):
28
+ return f"Sure 🙂\n\n{text}"
29
+
30
+ def friendly(text):
31
+ return f"{text}\n\nIf you want, I can help you explore this further 🙂"
32
+
33
+ def is_confirmation(text):
34
+ return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
35
+
36
+ def is_why_question(text):
37
+ return text.strip().lower().startswith("why")
38
+
39
+ # =========================
40
+ # SPELL CORRECTION
41
+ # =========================
42
+
43
+ KNOWN_TERMS = [
44
+ "patient", "patients", "condition", "conditions",
45
+ "encounter", "encounters", "visit", "visits",
46
+ "medication", "medications",
47
+ "admitted", "admission",
48
+ "year", "month", "last", "recent", "today"
49
+ ]
50
+
51
+ def correct_spelling(q):
52
+ words = q.split()
53
+ fixed = []
54
+ for w in words:
55
+ clean = w.lower().strip(",.?")
56
+ match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
57
+ fixed.append(match[0] if match else w)
58
+ return " ".join(fixed)
59
+
60
+
61
+
62
+ # =========================
63
+ # SCHEMA
64
+ # =========================
65
+ from functools import lru_cache
66
+
67
+ @lru_cache(maxsize=1)
68
+ def load_ai_schema():
69
+ cur = conn.cursor()
70
+ schema = {}
71
+
72
+ tables = cur.execute("""
73
+ SELECT table_name, description
74
+ FROM ai_tables
75
+ WHERE ai_enabled = 1
76
+ """).fetchall()
77
+
78
+ for table, desc in tables:
79
+ cols = cur.execute("""
80
+ SELECT column_name, description
81
+ FROM ai_columns
82
+ WHERE table_name = ? AND ai_allowed = 1
83
+ """, (table,)).fetchall()
84
+
85
+ schema[table] = {
86
+ "description": desc,
87
+ "columns": cols
88
+ }
89
+
90
+ return schema
91
+
92
+ # =========================
93
+ # TABLE MATCHING (CORE LOGIC)
94
+ # =========================
95
+
96
+ def extract_relevant_tables(question):
97
+ schema = load_ai_schema()
98
+ q = question.lower()
99
+
100
+ matched = []
101
+
102
+ for table, meta in schema.items():
103
+ # match table name
104
+ if table.lower() in q:
105
+ matched.append(table)
106
+ continue
107
+
108
+ # match column names
109
+ for col, _ in meta["columns"]:
110
+ if col.lower() in q:
111
+ matched.append(table)
112
+ break
113
+
114
+ return list(set(matched))
115
+
116
+
117
+ # =========================
118
+ # HUMAN SCHEMA DESCRIPTION
119
+ # =========================
120
+
121
+ def describe_schema():
122
+ schema = load_ai_schema()
123
+
124
+ response = "Here’s the data I currently have access to:\n\n"
125
+
126
+ for table, meta in schema.items():
127
+ response += f"• **{table.capitalize()}** — {meta['description']}\n"
128
+ for col, desc in meta["columns"]:
129
+ response += f" - {col}: {desc}\n"
130
+ response += "\n"
131
+
132
+ response += (
133
+ "You can ask things like:\n"
134
+ "• How many patients are there?\n"
135
+ "��� Patient count by gender\n"
136
+ "• Admissions by year\n\n"
137
+ "Just tell me what you want to explore 🙂"
138
+ )
139
+
140
+ return response
141
+
142
+ # =========================
143
+ # TIME HANDLING
144
+ # =========================
145
+
146
+ def get_latest_data_date():
147
+ cur = conn.cursor()
148
+ r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
149
+ return r[0]
150
+
151
+ def normalize_time_question(q):
152
+ latest = get_latest_data_date()
153
+ if not latest:
154
+ return q
155
+
156
+ if "today" in q:
157
+ return q.replace("today", f"on {latest[:10]}")
158
+
159
+ if "yesterday" in q:
160
+ return q.replace("yesterday", f"on {latest[:10]}")
161
+
162
+ return q
163
+
164
+ # =========================
165
+ # UNSUPPORTED QUESTIONS
166
+ # =========================
167
+
168
+ def is_question_supported(question):
169
+ schema = load_ai_schema()
170
+ q = question.lower()
171
+
172
+ for table, meta in schema.items():
173
+ if table in q:
174
+ return True
175
+ for col, _ in meta["columns"]:
176
+ if col in q:
177
+ return True
178
+
179
+ return False
180
+
181
+
182
+ # =========================
183
+ # SQL GENERATION
184
+ # =========================
185
+
186
+ def build_prompt(question):
187
+ schema = load_ai_schema()
188
+
189
+ prompt = """
190
+ You are a hospital SQL assistant.
191
+
192
+ Rules:
193
+ - Use only SELECT
194
+ - SQLite syntax
195
+ - Use only listed tables/columns
196
+ - Return ONLY SQL or NOT_ANSWERABLE
197
+ """
198
+
199
+ for table, meta in schema.items():
200
+ prompt += f"\nTable: {table}\n"
201
+ for col, desc in meta["columns"]:
202
+ prompt += f"- {col}: {desc}\n"
203
+
204
+ prompt += f"\nQuestion: {question}\n"
205
+ return prompt
206
+
207
+ def call_llm(prompt):
208
+ res = client.chat.completions.create(
209
+ model="gpt-4.1-mini",
210
+ messages=[
211
+ {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
212
+ {"role": "user", "content": prompt}
213
+ ],
214
+ temperature=0
215
+ )
216
+ return res.choices[0].message.content.strip()
217
+
218
+ # =========================
219
+ # SQL SAFETY
220
+ # =========================
221
+
222
+ def sanitize_sql(sql):
223
+ sql = sql.replace("```", "").replace("sql", "").strip()
224
+ sql = sql.split(";")[0]
225
+ return sql.replace("\n", " ").strip()
226
+
227
+ def validate_sql(sql):
228
+ if not sql.lower().startswith("select"):
229
+ raise Exception("Only SELECT allowed")
230
+ return sql
231
+
232
+ def run_query(sql):
233
+ cur = conn.cursor()
234
+ rows = cur.execute(sql).fetchall()
235
+ cols = [c[0] for c in cur.description]
236
+ return cols, rows
237
+
238
+ # =========================
239
+ # AGGREGATE SAFETY
240
+ # =========================
241
+
242
+ def is_aggregate_only_query(sql):
243
+ s = sql.lower()
244
+ return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
245
+
246
+ def has_underlying_data(sql):
247
+ base = sql.lower()
248
+ if "from" not in base:
249
+ return False
250
+
251
+ base = base.split("from", 1)[1]
252
+ test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
253
+
254
+ cur = conn.cursor()
255
+ return cur.execute(test_sql).fetchone() is not None
256
+
257
+
258
+
259
+
260
+ # =========================
261
+ # PATIENT SUMMARY
262
+ # =========================
263
+
264
+ def build_table_summary(table_name):
265
+ cur = conn.cursor()
266
+
267
+ # Total rows
268
+ total = cur.execute(
269
+ f"SELECT COUNT(*) FROM {table_name}"
270
+ ).fetchone()[0]
271
+
272
+ # Get column info
273
+ columns = cur.execute(
274
+ f"PRAGMA table_info({table_name})"
275
+ ).fetchall()
276
+
277
+ summary = f"Here’s a summary of **{table_name}**:\n\n"
278
+ summary += f"• Total records: {total}\n"
279
+
280
+ # Try to summarize categorical columns
281
+ for col in columns:
282
+ col_name = col[1]
283
+ col_type = col[2].lower()
284
+
285
+ if col_type in ("text", "varchar"):
286
+ try:
287
+ rows = cur.execute(
288
+ f"""
289
+ SELECT {col_name}, COUNT(*)
290
+ FROM {table_name}
291
+ GROUP BY {col_name}
292
+ ORDER BY COUNT(*) DESC
293
+ LIMIT 5
294
+ """
295
+ ).fetchall()
296
+
297
+ if rows:
298
+ summary += f"\n• {col_name.capitalize()} breakdown:\n"
299
+ for val, count in rows:
300
+ summary += f" - {val}: {count}\n"
301
+ except:
302
+ pass # ignore columns that can't be grouped
303
+
304
+ summary += "\nYou can ask more detailed questions about this data."
305
+
306
+ return summary
307
+
308
+
309
+
310
+ # =========================
311
+ # MAIN ENGINE
312
+ # =========================
313
+
314
+ def process_question(question):
315
+ global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
316
+
317
+ q = question.strip().lower()
318
+
319
+ # -------------------------------
320
+ # Patient summary intent
321
+ # -------------------------------
322
+ matched_tables = extract_relevant_tables(q)
323
+ if len(matched_tables) == 1:
324
+ return {
325
+ "status": "ok",
326
+ "message": build_table_summary(matched_tables[0]),
327
+ "data": []
328
+ }
329
+ elif len(matched_tables) > 1:
330
+ return {
331
+ "status": "ok",
332
+ "message": (
333
+ "Your question matches multiple datasets:\n"
334
+ + "\n".join(f"- {t}" for t in matched_tables)
335
+ + "\n\nPlease be more specific."
336
+ ),
337
+ "data": []
338
+ }
339
+
340
+
341
+
342
+
343
+ # -------------------------------
344
+ # WHY follow-up handling
345
+ # -------------------------------
346
+ if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
347
+ year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
348
+ return {
349
+ "status": "ok",
350
+ "message": (
351
+ f"I suggested **{year}** because that’s the most recent year "
352
+ f"for which data exists in the system.\n\n"
353
+ "Your database doesn’t contain newer records yet.\n\n"
354
+ "You can explore:\n"
355
+ "• Data from 2021\n"
356
+ "• Trends over time\n"
357
+ "• Patient summaries"
358
+ ),
359
+ "data": []
360
+ }
361
+
362
+ # -------------------------------
363
+ # YES / confirmation handling
364
+ # -------------------------------
365
+ if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
366
+ return {
367
+ "status": "ok",
368
+ "message": (
369
+ "Great 🙂\n\n"
370
+ "Here are some things you can ask:\n"
371
+ "• How many patients were admitted in 2021?\n"
372
+ "• Patient count by gender\n"
373
+ "• Total visits by month\n"
374
+ "• Most common conditions"
375
+ ),
376
+ "data": []
377
+ }
378
+
379
+ # -------------------------------
380
+ # Normalize question
381
+ # -------------------------------
382
+ question = correct_spelling(question)
383
+ question = normalize_time_question(question)
384
+
385
+ # Reset state once user asks a fresh question
386
+ LAST_PROMPT_TYPE = None
387
+ LAST_SUGGESTED_DATE = None
388
+
389
+ # -------------------------------
390
+ # Metadata queries
391
+ # -------------------------------
392
+ if any(x in question for x in ["what data", "what tables", "which data"]):
393
+ return {
394
+ "status": "ok",
395
+ "message": humanize(describe_schema()),
396
+ "data": []
397
+ }
398
+
399
+ # -------------------------------
400
+ # # Unsupported questions (metadata-based)
401
+ # # -------------------------------
402
+ if not is_question_supported(question):
403
+ return {
404
+ "status": "ok",
405
+ "message": (
406
+ "That information isn’t available in the system.\n\n"
407
+ "You can ask about:\n"
408
+ "• Patients\n"
409
+ "• Visits\n"
410
+ "• Conditions\n"
411
+ "• Medications"
412
+ ),
413
+ "data": [],
414
+ "sql": None,
415
+ "note": None
416
+ }
417
+
418
+
419
+ # -------------------------------
420
+ # LLM → SQL
421
+ # -------------------------------
422
+ sql = call_llm(build_prompt(question))
423
+
424
+ if sql == "NOT_ANSWERABLE":
425
+ return {
426
+ "status": "ok",
427
+ "message": "I don’t have enough data to answer that.",
428
+ "data": []
429
+ }
430
+
431
+ sql = validate_sql(sanitize_sql(sql))
432
+ cols, rows = run_query(sql)
433
+
434
+ # -------------------------------
435
+ # No data (aggregate case)
436
+ # -------------------------------
437
+ if is_aggregate_only_query(sql) and not has_underlying_data(sql):
438
+ LAST_PROMPT_TYPE = "NO_DATA"
439
+ LAST_SUGGESTED_DATE = get_latest_data_date()
440
+
441
+ return {
442
+ "status": "ok",
443
+ "message": friendly("No data is available for that time period."),
444
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
445
+ "data": [],
446
+ "sql": None
447
+ }
448
+
449
+ # -------------------------------
450
+ # Empty result set
451
+ # -------------------------------
452
+ if not rows:
453
+ LAST_PROMPT_TYPE = "NO_DATA"
454
+ LAST_SUGGESTED_DATE = get_latest_data_date()
455
+
456
+ return {
457
+ "status": "ok",
458
+ "message": friendly("No records found."),
459
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
460
+ "data": []
461
+ }
462
+
463
+ # -------------------------------
464
+ # Successful response
465
+ # -------------------------------
466
+ return {
467
+ "status": "ok",
468
+ "sql": sql,
469
+ "columns": cols,
470
+ "data": rows
471
+ }