Spaces:
Sleeping
Sleeping
Upload engine.py
Browse files
engine.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import sqlite3
|
| 3 |
from openai import OpenAI
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
# =========================
|
|
@@ -11,6 +12,34 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
| 11 |
conn = sqlite3.connect("hospital.db", check_same_thread=False)
|
| 12 |
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# =========================
|
| 15 |
# Metadata Loader
|
| 16 |
# =========================
|
|
@@ -152,13 +181,10 @@ def run_query(sql: str):
|
|
| 152 |
|
| 153 |
def is_question_answerable(question):
|
| 154 |
schema = load_ai_schema()
|
| 155 |
-
schema_text = " ".join(schema.keys()).lower()
|
| 156 |
-
|
| 157 |
keywords = ["patient", "encounter", "condition", "observation", "medication", "visit", "diagnosis", "lab", "vital"]
|
| 158 |
|
| 159 |
q = question.lower()
|
| 160 |
|
| 161 |
-
# If none of the core domain keywords are present, likely out of scope
|
| 162 |
if not any(k in q for k in keywords):
|
| 163 |
return False
|
| 164 |
|
|
@@ -197,6 +223,9 @@ def interpret_empty_result(question: str):
|
|
| 197 |
# =========================
|
| 198 |
|
| 199 |
def process_question(question: str):
|
|
|
|
|
|
|
|
|
|
| 200 |
# 1. Guardrail
|
| 201 |
if not is_question_answerable(question):
|
| 202 |
return {
|
|
|
|
| 1 |
import os
|
| 2 |
import sqlite3
|
| 3 |
from openai import OpenAI
|
| 4 |
+
from difflib import get_close_matches
|
| 5 |
|
| 6 |
|
| 7 |
# =========================
|
|
|
|
| 12 |
conn = sqlite3.connect("hospital.db", check_same_thread=False)
|
| 13 |
|
| 14 |
|
| 15 |
+
# =========================
|
| 16 |
+
# Known Terms for Spell Correction
|
| 17 |
+
# =========================
|
| 18 |
+
|
| 19 |
+
KNOWN_TERMS = [
|
| 20 |
+
"patient", "patients", "condition", "conditions", "diagnosis", "encounter", "encounters",
|
| 21 |
+
"visit", "visits", "observation", "observations", "lab", "labs", "test", "tests",
|
| 22 |
+
"medication", "medications", "drug", "drugs", "prescription", "prescriptions",
|
| 23 |
+
"diabetes", "hypertension", "asthma", "cancer"
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def correct_spelling(question: str) -> str:
|
| 28 |
+
words = question.split()
|
| 29 |
+
corrected_words = []
|
| 30 |
+
|
| 31 |
+
for word in words:
|
| 32 |
+
clean_word = word.lower().strip(",.?")
|
| 33 |
+
matches = get_close_matches(clean_word, KNOWN_TERMS, n=1, cutoff=0.8)
|
| 34 |
+
|
| 35 |
+
if matches:
|
| 36 |
+
corrected_words.append(matches[0])
|
| 37 |
+
else:
|
| 38 |
+
corrected_words.append(word)
|
| 39 |
+
|
| 40 |
+
return " ".join(corrected_words)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
# =========================
|
| 44 |
# Metadata Loader
|
| 45 |
# =========================
|
|
|
|
| 181 |
|
| 182 |
def is_question_answerable(question):
|
| 183 |
schema = load_ai_schema()
|
|
|
|
|
|
|
| 184 |
keywords = ["patient", "encounter", "condition", "observation", "medication", "visit", "diagnosis", "lab", "vital"]
|
| 185 |
|
| 186 |
q = question.lower()
|
| 187 |
|
|
|
|
| 188 |
if not any(k in q for k in keywords):
|
| 189 |
return False
|
| 190 |
|
|
|
|
| 223 |
# =========================
|
| 224 |
|
| 225 |
def process_question(question: str):
|
| 226 |
+
# 0. Spell correction
|
| 227 |
+
question = correct_spelling(question)
|
| 228 |
+
|
| 229 |
# 1. Guardrail
|
| 230 |
if not is_question_answerable(question):
|
| 231 |
return {
|