bhavika24 commited on
Commit
bfa0b78
·
verified ·
1 Parent(s): 1932ae9

Upload engine.py

Browse files
Files changed (1) hide show
  1. engine.py +465 -429
engine.py CHANGED
@@ -1,429 +1,465 @@
1
- import os
2
- import sqlite3
3
- from openai import OpenAI
4
- from difflib import get_close_matches
5
- from datetime import datetime
6
-
7
- # =========================
8
- # SETUP
9
- # =========================
10
-
11
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
- conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
-
14
- # =========================
15
- # CONVERSATION STATE
16
- # =========================
17
-
18
- LAST_PROMPT_TYPE = None
19
- LAST_SUGGESTED_DATE = None
20
-
21
- # =========================
22
- # HUMAN RESPONSE HELPERS
23
- # =========================
24
-
25
- def humanize(text):
26
- return f"Sure 🙂\n\n{text}"
27
-
28
- def friendly(text):
29
- return f"{text}\n\nIf you want, I can help you explore this further 🙂"
30
-
31
- def is_confirmation(text):
32
- return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
33
-
34
- def is_why_question(text):
35
- return text.strip().lower().startswith("why")
36
-
37
- # =========================
38
- # SPELL CORRECTION
39
- # =========================
40
-
41
- KNOWN_TERMS = [
42
- "patient", "patients", "condition", "conditions",
43
- "encounter", "encounters", "visit", "visits",
44
- "medication", "medications",
45
- "admitted", "admission",
46
- "year", "month", "last", "recent", "today"
47
- ]
48
-
49
- def correct_spelling(q):
50
- words = q.split()
51
- fixed = []
52
- for w in words:
53
- clean = w.lower().strip(",.?")
54
- match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
55
- fixed.append(match[0] if match else w)
56
- return " ".join(fixed)
57
-
58
- # =========================
59
- # SCHEMA
60
- # =========================
61
-
62
- def load_ai_schema():
63
- cur = conn.cursor()
64
- schema = {}
65
-
66
- tables = cur.execute("""
67
- SELECT table_name, description
68
- FROM ai_tables
69
- WHERE ai_enabled = 1
70
- """).fetchall()
71
-
72
- for table, desc in tables:
73
- cols = cur.execute("""
74
- SELECT column_name, description
75
- FROM ai_columns
76
- WHERE table_name = ? AND ai_allowed = 1
77
- """, (table,)).fetchall()
78
-
79
- schema[table] = {
80
- "description": desc,
81
- "columns": cols
82
- }
83
-
84
- return schema
85
-
86
- # =========================
87
- # HUMAN SCHEMA DESCRIPTION
88
- # =========================
89
-
90
- def describe_schema():
91
- schema = load_ai_schema()
92
-
93
- response = "Here’s the data I currently have access to:\n\n"
94
-
95
- for table, meta in schema.items():
96
- response += f"• **{table.capitalize()}** — {meta['description']}\n"
97
- for col, desc in meta["columns"]:
98
- response += f" - {col}: {desc}\n"
99
- response += "\n"
100
-
101
- response += (
102
- "You can ask things like:\n"
103
- "• How many patients are there?\n"
104
- "• Patient count by gender\n"
105
- "• Admissions by year\n\n"
106
- "Just tell me what you want to explore 🙂"
107
- )
108
-
109
- return response
110
-
111
- # =========================
112
- # TIME HANDLING
113
- # =========================
114
-
115
- def get_latest_data_date():
116
- cur = conn.cursor()
117
- r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
118
- return r[0]
119
-
120
- def normalize_time_question(q):
121
- latest = get_latest_data_date()
122
- if not latest:
123
- return q
124
-
125
- if "today" in q:
126
- return q.replace("today", f"on {latest[:10]}")
127
-
128
- if "yesterday" in q:
129
- return q.replace("yesterday", f"on {latest[:10]}")
130
-
131
- return q
132
-
133
- # =========================
134
- # UNSUPPORTED QUESTIONS
135
- # =========================
136
-
137
- def get_unsupported_reason(q):
138
- q = q.lower()
139
-
140
- if any(w in q for w in ["consultant", "doctor"]):
141
- return {
142
- "reason": "Doctor or consultant-level data is not available.",
143
- "suggestion": "Try asking about patients, visits, or admissions."
144
- }
145
-
146
- if any(w in q for w in ["department", "specialization"]):
147
- return {
148
- "reason": "Department-level data is not stored.",
149
- "suggestion": "Try patient or visit related questions."
150
- }
151
-
152
- return None
153
-
154
- # =========================
155
- # SQL GENERATION
156
- # =========================
157
-
158
- def build_prompt(question):
159
- schema = load_ai_schema()
160
-
161
- prompt = """
162
- You are a hospital SQL assistant.
163
-
164
- Rules:
165
- - Use only SELECT
166
- - SQLite syntax
167
- - Use only listed tables/columns
168
- - Return ONLY SQL or NOT_ANSWERABLE
169
- """
170
-
171
- for table, meta in schema.items():
172
- prompt += f"\nTable: {table}\n"
173
- for col, desc in meta["columns"]:
174
- prompt += f"- {col}: {desc}\n"
175
-
176
- prompt += f"\nQuestion: {question}\n"
177
- return prompt
178
-
179
- def call_llm(prompt):
180
- res = client.chat.completions.create(
181
- model="gpt-4.1-mini",
182
- messages=[
183
- {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
184
- {"role": "user", "content": prompt}
185
- ],
186
- temperature=0
187
- )
188
- return res.choices[0].message.content.strip()
189
-
190
- # =========================
191
- # SQL SAFETY
192
- # =========================
193
-
194
- def sanitize_sql(sql):
195
- sql = sql.replace("```", "").replace("sql", "").strip()
196
- sql = sql.split(";")[0]
197
- return sql.replace("\n", " ").strip()
198
-
199
- def validate_sql(sql):
200
- if not sql.lower().startswith("select"):
201
- raise Exception("Only SELECT allowed")
202
- return sql
203
-
204
- def run_query(sql):
205
- cur = conn.cursor()
206
- rows = cur.execute(sql).fetchall()
207
- cols = [c[0] for c in cur.description]
208
- return cols, rows
209
-
210
- # =========================
211
- # AGGREGATE SAFETY
212
- # =========================
213
-
214
- def is_aggregate_only_query(sql):
215
- s = sql.lower()
216
- return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
217
-
218
- def has_underlying_data(sql):
219
- base = sql.lower()
220
- if "from" not in base:
221
- return False
222
-
223
- base = base.split("from", 1)[1]
224
- test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
225
-
226
- cur = conn.cursor()
227
- return cur.execute(test_sql).fetchone() is not None
228
-
229
- def is_patient_summary_question(text):
230
- t = text.lower()
231
-
232
- keywords = [
233
- "patient summary",
234
- "patients summary",
235
- "patient overview",
236
- "summary of patients",
237
- "give a patient summary",
238
- "patient summery",
239
- "patients summery",
240
- "patent summary",
241
- "patant summary",
242
- "patient sumary",
243
- "patients sumery"
244
- ]
245
-
246
- return any(k in t for k in keywords)
247
-
248
-
249
- # =========================
250
- # PATIENT SUMMARY
251
- # =========================
252
-
253
- def build_patient_summary():
254
- cur = conn.cursor()
255
-
256
- total = cur.execute(
257
- "SELECT COUNT(*) FROM patients"
258
- ).fetchone()[0]
259
-
260
- genders = cur.execute(
261
- "SELECT gender, COUNT(*) FROM patients GROUP BY gender"
262
- ).fetchall()
263
-
264
- msg = "Here’s a quick summary of patients:\n\n"
265
- msg += f"• Total patients: {total}\n"
266
-
267
- if genders:
268
- msg += "• Gender distribution:\n"
269
- for g, c in genders:
270
- msg += f" - {g}: {c}\n"
271
-
272
- msg += (
273
- "\nYou can also ask:\n"
274
- "• Patients admitted by year\n"
275
- "• Patient count by age\n"
276
- "• Visit trends"
277
- )
278
-
279
- return msg
280
-
281
-
282
- # =========================
283
- # MAIN ENGINE
284
- # =========================
285
-
286
- def process_question(question):
287
- global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
288
-
289
- q = question.strip().lower()
290
-
291
- # -------------------------------
292
- # Patient summary intent
293
- # -------------------------------
294
- if is_patient_summary_question(q):
295
- return {
296
- "status": "ok",
297
- "message": build_patient_summary(),
298
- "data": [],
299
- "sql": None,
300
- "note": None
301
- }
302
-
303
- # -------------------------------
304
- # WHY follow-up handling
305
- # -------------------------------
306
- if is_why_question(q) and LAST_PROMPT_TYPE == "NO_DATA":
307
- year = LAST_SUGGESTED_DATE[:4] if LAST_SUGGESTED_DATE else "the latest available year"
308
- return {
309
- "status": "ok",
310
- "message": (
311
- f"I suggested **{year}** because that’s the most recent year "
312
- f"for which data exists in the system.\n\n"
313
- "Your database doesn’t contain newer records yet.\n\n"
314
- "You can explore:\n"
315
- "• Data from 2021\n"
316
- "• Trends over time\n"
317
- "• Patient summaries"
318
- ),
319
- "data": []
320
- }
321
-
322
- # -------------------------------
323
- # YES / confirmation handling
324
- # -------------------------------
325
- if is_confirmation(q) and LAST_PROMPT_TYPE == "NO_DATA":
326
- return {
327
- "status": "ok",
328
- "message": (
329
- "Great 🙂\n\n"
330
- "Here are some things you can ask:\n"
331
- "• How many patients were admitted in 2021?\n"
332
- "• Patient count by gender\n"
333
- "• Total visits by month\n"
334
- "• Most common conditions"
335
- ),
336
- "data": []
337
- }
338
-
339
- # -------------------------------
340
- # Normalize question
341
- # -------------------------------
342
- question = correct_spelling(question)
343
- question = normalize_time_question(question)
344
-
345
- # Reset state once user asks a fresh question
346
- LAST_PROMPT_TYPE = None
347
- LAST_SUGGESTED_DATE = None
348
-
349
- # -------------------------------
350
- # Metadata queries
351
- # -------------------------------
352
- if any(x in question for x in ["what data", "what tables", "which data"]):
353
- return {
354
- "status": "ok",
355
- "message": humanize(describe_schema()),
356
- "data": []
357
- }
358
-
359
- # -------------------------------
360
- # Unsupported questions
361
- # -------------------------------
362
- unsupported = get_unsupported_reason(question)
363
- if unsupported:
364
- return {
365
- "status": "ok",
366
- "message": (
367
- f"{unsupported['reason']}\n\n"
368
- f"{unsupported['suggestion']}\n\n"
369
- "Example questions:\n"
370
- "• How many patients were admitted last year?\n"
371
- "• Total visits by month\n"
372
- "• Patient count by gender"
373
- ),
374
- "data": []
375
- }
376
-
377
- # -------------------------------
378
- # LLM → SQL
379
- # -------------------------------
380
- sql = call_llm(build_prompt(question))
381
-
382
- if sql == "NOT_ANSWERABLE":
383
- return {
384
- "status": "ok",
385
- "message": "I don’t have enough data to answer that.",
386
- "data": []
387
- }
388
-
389
- sql = validate_sql(sanitize_sql(sql))
390
- cols, rows = run_query(sql)
391
-
392
- # -------------------------------
393
- # No data (aggregate case)
394
- # -------------------------------
395
- if is_aggregate_only_query(sql) and not has_underlying_data(sql):
396
- LAST_PROMPT_TYPE = "NO_DATA"
397
- LAST_SUGGESTED_DATE = get_latest_data_date()
398
-
399
- return {
400
- "status": "ok",
401
- "message": friendly("No data is available for that time period."),
402
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
403
- "data": [],
404
- "sql": None
405
- }
406
-
407
- # -------------------------------
408
- # Empty result set
409
- # -------------------------------
410
- if not rows:
411
- LAST_PROMPT_TYPE = "NO_DATA"
412
- LAST_SUGGESTED_DATE = get_latest_data_date()
413
-
414
- return {
415
- "status": "ok",
416
- "message": friendly("No records found."),
417
- "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
418
- "data": []
419
- }
420
-
421
- # -------------------------------
422
- # Successful response
423
- # -------------------------------
424
- return {
425
- "status": "ok",
426
- "sql": sql,
427
- "columns": cols,
428
- "data": rows
429
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sqlite3
3
+ from openai import OpenAI
4
+ from difflib import get_close_matches
5
+ from datetime import datetime
6
+
7
+ # =========================
8
+ # SETUP
9
+ # =========================
10
+
11
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
+ conn = sqlite3.connect("hospital.db", check_same_thread=False)
13
+
14
+ # =========================
15
+ # CONVERSATION STATE
16
+ # =========================
17
+
18
+ LAST_PROMPT_TYPE = None
19
+ LAST_SUGGESTED_DATE = None
20
+
21
+
22
+
23
+ # =========================
24
+ # HUMAN RESPONSE HELPERS
25
+ # =========================
26
+
27
+ def humanize(text):
28
+ return f"Sure \n\n{text}"
29
+
30
+ def friendly(text):
31
+ return f"{text}\n\nIf you want, I can help you explore this further "
32
+
33
+ def is_confirmation(text):
34
+ return text.strip().lower() in ["yes", "yep", "yeah", "ok", "okay", "sure"]
35
+
36
+ def is_why_question(text):
37
+ return text.strip().lower().startswith("why")
38
+
39
+ # =========================
40
+ # SPELL CORRECTION
41
+ # =========================
42
+
43
+ KNOWN_TERMS = [
44
+ "patient", "patients", "condition", "conditions",
45
+ "encounter", "encounters", "visit", "visits",
46
+ "medication", "medications",
47
+ "admitted", "admission",
48
+ "year", "month", "last", "recent", "today"
49
+ ]
50
+
51
+ def correct_spelling(q):
52
+ words = q.split()
53
+ fixed = []
54
+ for w in words:
55
+ clean = w.lower().strip(",.?")
56
+ match = get_close_matches(clean, KNOWN_TERMS, n=1, cutoff=0.8)
57
+ fixed.append(match[0] if match else w)
58
+ return " ".join(fixed)
59
+
60
+
61
+
62
+ # =========================
63
+ # SCHEMA
64
+ # =========================
65
+ from functools import lru_cache
66
+
67
+ @lru_cache(maxsize=1)
68
+ def load_ai_schema():
69
+ cur = conn.cursor()
70
+ schema = {}
71
+
72
+ tables = cur.execute("""
73
+ SELECT table_name, description
74
+ FROM ai_tables
75
+ WHERE ai_enabled = 1
76
+ """).fetchall()
77
+
78
+ for table, desc in tables:
79
+ cols = cur.execute("""
80
+ SELECT column_name, description
81
+ FROM ai_columns
82
+ WHERE table_name = ? AND ai_allowed = 1
83
+ """, (table,)).fetchall()
84
+
85
+ schema[table] = {
86
+ "description": desc,
87
+ "columns": cols
88
+ }
89
+
90
+ return schema
91
+
92
+ # =========================
93
+ # TABLE MATCHING (CORE LOGIC)
94
+ # =========================
95
+
96
+ def extract_relevant_tables(question):
97
+ schema = load_ai_schema()
98
+ q = question.lower()
99
+
100
+ matched = []
101
+
102
+ for table, meta in schema.items():
103
+ # match table name
104
+ if table.lower() in q:
105
+ matched.append(table)
106
+ continue
107
+
108
+ # match column names
109
+ for col, _ in meta["columns"]:
110
+ if col.lower() in q:
111
+ matched.append(table)
112
+ break
113
+
114
+ return list(set(matched))[:5]
115
+
116
+
117
+
118
+ # =========================
119
+ # HUMAN SCHEMA DESCRIPTION
120
+ # =========================
121
+
122
+ def describe_schema():
123
+ schema = load_ai_schema()
124
+
125
+ response = "Here’s the data I currently have access to:\n\n"
126
+
127
+ for table, meta in schema.items():
128
+ response += f" **{table.capitalize()}** — {meta['description']}\n"
129
+ for col, desc in meta["columns"]:
130
+ response += f" - {col}: {desc}\n"
131
+ response += "\n"
132
+
133
+ response += (
134
+ "You can ask things like:\n"
135
+ "• How many patients are there?\n"
136
+ "�� Patient count by gender\n"
137
+ "• Admissions by year\n\n"
138
+ "Just tell me what you want to explore "
139
+ )
140
+ if not schema:
141
+ return "No AI-enabled tables are configured."
142
+
143
+
144
+
145
+ return response
146
+
147
+ # =========================
148
+ # TIME HANDLING
149
+ # =========================
150
+
151
+ def get_latest_data_date():
152
+ cur = conn.cursor()
153
+ r = cur.execute("SELECT MAX(start_date) FROM encounters").fetchone()
154
+ return r[0]
155
+
156
+ def normalize_time_question(q):
157
+ latest = get_latest_data_date()
158
+ if not latest:
159
+ return q
160
+
161
+ if "today" in q:
162
+ return q.replace("today", f"on {latest[:10]}")
163
+
164
+ if "yesterday" in q:
165
+ return q.replace("yesterday", f"on {latest[:10]}")
166
+
167
+ return q
168
+
169
+ # =========================
170
+ # UNSUPPORTED QUESTIONS
171
+ # =========================
172
+
173
+ def is_question_supported(question):
174
+ q = question.lower()
175
+
176
+ if any(k in q for k in [
177
+ "count", "total", "average", "sum",
178
+ "how many", "number of", "trend"
179
+ ]):
180
+ return True
181
+
182
+ schema = load_ai_schema()
183
+ for table, meta in schema.items():
184
+ if table in q:
185
+ return True
186
+ for col, _ in meta["columns"]:
187
+ if col in q:
188
+ return True
189
+
190
+ return False
191
+
192
+
193
+
194
+
195
+ # =========================
196
+ # SQL GENERATION
197
+ # =========================
198
+
199
+ def build_prompt(question):
200
+ matched = extract_relevant_tables(question)
201
+
202
+ if matched:
203
+ schema = {t: load_ai_schema()[t] for t in matched}
204
+ else:
205
+ schema = load_ai_schema() # fallback if nothing matched
206
+
207
+ prompt = """
208
+ You are a hospital SQL assistant.
209
+
210
+ Rules:
211
+ - Use only SELECT
212
+ - SQLite syntax
213
+ - Use only listed tables/columns
214
+ - Return ONLY SQL or NOT_ANSWERABLE
215
+ """
216
+
217
+ for table, meta in schema.items():
218
+ prompt += f"\nTable: {table}\n"
219
+ for col, desc in meta["columns"]:
220
+ prompt += f"- {col}: {desc}\n"
221
+
222
+ prompt += f"\nQuestion: {question}\n"
223
+ return prompt
224
+
225
+
226
+ def call_llm(prompt):
227
+ res = client.chat.completions.create(
228
+ model="gpt-4.1-mini",
229
+ messages=[
230
+ {"role": "system", "content": "Return only SQL or NOT_ANSWERABLE"},
231
+ {"role": "user", "content": prompt}
232
+ ],
233
+ temperature=0
234
+ )
235
+ return res.choices[0].message.content.strip()
236
+
237
+ # =========================
238
+ # SQL SAFETY
239
+ # =========================
240
+
241
+ def sanitize_sql(sql):
242
+ sql = sql.replace("```", "").replace("sql", "").strip()
243
+ sql = sql.split(";")[0]
244
+ return sql.replace("\n", " ").strip()
245
+
246
+ def validate_sql(sql):
247
+ if not sql.lower().startswith("select"):
248
+ raise Exception("Only SELECT allowed")
249
+ return sql
250
+
251
+ def run_query(sql):
252
+ cur = conn.cursor()
253
+ rows = cur.execute(sql).fetchall()
254
+ cols = [c[0] for c in cur.description]
255
+ return cols, rows
256
+
257
+ # =========================
258
+ # AGGREGATE SAFETY
259
+ # =========================
260
+
261
+ def is_aggregate_only_query(sql):
262
+ s = sql.lower()
263
+ return ("count(" in s or "sum(" in s or "avg(" in s) and "group by" not in s
264
+
265
+ def has_underlying_data(sql):
266
+ base = sql.lower()
267
+ if "from" not in base:
268
+ return False
269
+
270
+ base = base.split("from", 1)[1]
271
+ test_sql = "SELECT 1 FROM " + base.split("group by")[0] + " LIMIT 1"
272
+
273
+ cur = conn.cursor()
274
+ return cur.execute(test_sql).fetchone() is not None
275
+
276
+
277
+
278
+
279
+ # =========================
280
+ # PATIENT SUMMARY
281
+ # =========================
282
+
283
+ def build_table_summary(table_name):
284
+ cur = conn.cursor()
285
+
286
+ # Total rows
287
+ total = cur.execute(
288
+ f"SELECT COUNT(*) FROM {table_name}"
289
+ ).fetchone()[0]
290
+
291
+ # Get column info
292
+ columns = cur.execute(
293
+ f"PRAGMA table_info({table_name})"
294
+ ).fetchall()
295
+
296
+ summary = f"Here’s a summary of **{table_name}**:\n\n"
297
+ summary += f"• Total records: {total}\n"
298
+
299
+ # Try to summarize categorical columns
300
+ for col in columns:
301
+ col_name = col[1]
302
+ col_type = col[2].lower()
303
+
304
+ if col_type in ("text", "varchar"):
305
+ try:
306
+ rows = cur.execute(
307
+ f"""
308
+ SELECT {col_name}, COUNT(*)
309
+ FROM {table_name}
310
+ GROUP BY {col_name}
311
+ ORDER BY COUNT(*) DESC
312
+ LIMIT 5
313
+ """
314
+ ).fetchall()
315
+
316
+ if rows:
317
+ summary += f"\n{col_name.capitalize()} breakdown:\n"
318
+ for val, count in rows:
319
+ summary += f" - {val}: {count}\n"
320
+ except:
321
+ pass # ignore columns that can't be grouped
322
+
323
+ summary += "\nYou can ask more detailed questions about this data."
324
+
325
+ return summary
326
+
327
+
328
+
329
+ # =========================
330
+ # MAIN ENGINE
331
+ # =========================
332
+
333
+ def process_question(question):
334
+ global LAST_PROMPT_TYPE, LAST_SUGGESTED_DATE
335
+
336
+ q = question.strip().lower()
337
+
338
+ # ----------------------------------
339
+ # Normalize first
340
+ # ----------------------------------
341
+ question = correct_spelling(question)
342
+ question = normalize_time_question(question)
343
+
344
+ LAST_PROMPT_TYPE = None
345
+ LAST_SUGGESTED_DATE = None
346
+
347
+
348
+ # ----------------------------------
349
+ # Handle "data updated till"
350
+ # ----------------------------------
351
+ if any(x in q for x in ["updated", "upto", "up to", "latest data"]):
352
+ return {
353
+ "status": "ok",
354
+ "message": f"Data is available up to {get_latest_data_date()}",
355
+ "data": []
356
+ }
357
+
358
+ # ----------------------------------
359
+ # Extract relevant tables
360
+ # ----------------------------------
361
+ matched_tables = extract_relevant_tables(question)
362
+
363
+ # ----------------------------------
364
+ # SUMMARY ONLY IF USER ASKS FOR IT
365
+ # ----------------------------------
366
+ if (
367
+ len(matched_tables) == 1
368
+ and any(k in q for k in ["summary", "overview", "describe"])
369
+ and not any(k in q for k in ["count", "total", "how many", "average"])
370
+ ):
371
+
372
+ return {
373
+ "status": "ok",
374
+ "message": build_table_summary(matched_tables[0]),
375
+ "data": []
376
+ }
377
+ if len(matched_tables) > 1:
378
+ return {
379
+ "status": "ok",
380
+ "message": (
381
+ "Your question matches multiple datasets:\n"
382
+ + "\n".join(f"- {t}" for t in matched_tables)
383
+ + "\n\nPlease be more specific."
384
+ ),
385
+ "data": []
386
+ }
387
+
388
+
389
+ # ----------------------------------
390
+ # Metadata discovery
391
+ # ----------------------------------
392
+ if any(x in q for x in ["what data", "what tables", "which data"]):
393
+ return {
394
+ "status": "ok",
395
+ "message": humanize(describe_schema()),
396
+ "data": []
397
+ }
398
+
399
+ # ----------------------------------
400
+ # Unsupported question check
401
+ # ----------------------------------
402
+ if not is_question_supported(question):
403
+ return {
404
+ "status": "ok",
405
+ "message": (
406
+ "That information isn’t available in the system.\n\n"
407
+ "You can ask about:\n"
408
+ "• Patients\n"
409
+ "• Visits\n"
410
+ "• Conditions\n"
411
+ "• Medications"
412
+ ),
413
+ "data": []
414
+ }
415
+
416
+ # ----------------------------------
417
+ # Generate SQL
418
+ # ----------------------------------
419
+ sql = call_llm(build_prompt(question))
420
+
421
+ if sql == "NOT_ANSWERABLE":
422
+ return {
423
+ "status": "ok",
424
+ "message": "I don’t have enough data to answer that.",
425
+ "data": []
426
+ }
427
+
428
+ sql = validate_sql(sanitize_sql(sql))
429
+ cols, rows = run_query(sql)
430
+
431
+ # ----------------------------------
432
+ # No data handling
433
+ # ----------------------------------
434
+ if is_aggregate_only_query(sql) and not has_underlying_data(sql):
435
+ LAST_PROMPT_TYPE = "NO_DATA"
436
+ LAST_SUGGESTED_DATE = get_latest_data_date()
437
+
438
+ return {
439
+ "status": "ok",
440
+ "message": friendly("No data is available for that time period."),
441
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
442
+ "data": []
443
+ }
444
+
445
+ if not rows:
446
+ LAST_PROMPT_TYPE = "NO_DATA"
447
+ LAST_SUGGESTED_DATE = get_latest_data_date()
448
+
449
+ return {
450
+ "status": "ok",
451
+ "message": friendly("No records found."),
452
+ "note": f"Available data is only up to {LAST_SUGGESTED_DATE}.",
453
+ "data": []
454
+ }
455
+
456
+ # ----------------------------------
457
+ # Success
458
+ # ----------------------------------
459
+ return {
460
+ "status": "ok",
461
+ "sql": sql,
462
+ "columns": cols,
463
+ "data": rows
464
+ }
465
+