Spaces:

bhavika24
/

Text_to_sql

Sleeping

App Files Files Community

bhavika24 commited on 14 days ago

Commit

eb31619

verified ·

1 Parent(s): bfa0b78

Upload engine.py

Browse files

Files changed (1) hide show

engine.py +73 -23

engine.py CHANGED Viewed

@@ -93,25 +93,56 @@ def load_ai_schema():
 # TABLE MATCHING (CORE LOGIC)
 # =========================
-def extract_relevant_tables(question):
     schema = load_ai_schema()
     q = question.lower()
     matched = []
     for table, meta in schema.items():
-        # match table name
-        if table.lower() in q:
-            matched.append(table)
-            continue
-        # match column names
         for col, _ in meta["columns"]:
-            if col.lower() in q:
-                matched.append(table)
-                break
-    return list(set(matched))[:5]
@@ -137,8 +168,7 @@ def describe_schema():
         "• Admissions by year\n\n"
         "Just tell me what you want to explore "
     )
-    if not schema:
-        return "No AI-enabled tables are configured."
@@ -172,24 +202,44 @@ def normalize_time_question(q):
 def is_question_supported(question):
     q = question.lower()
-    if any(k in q for k in [
-        "count", "total", "average", "sum",
-        "how many", "number of", "trend"
-    ]):
         return True
     schema = load_ai_schema()
-    for table, meta in schema.items():
-        if table in q:
-            return True
-        for col, _ in meta["columns"]:
-            if col in q:
-                return True
-    return False
 # =========================

 # TABLE MATCHING (CORE LOGIC)
 # =========================
+def extract_relevant_tables(question, max_tables=5):
     schema = load_ai_schema()
     q = question.lower()
+    tokens = set(q.replace("?", "").replace(",", "").split())
     matched = []
     for table, meta in schema.items():
+        score = 0
+        table_l = table.lower()
+        # 1️⃣ Table name match (strong signal)
+        if table_l in q:
+            score += 5
+        # 2️⃣ Description match
+        if meta.get("description"):
+            desc_words = meta["description"].lower().split()
+            score += len(tokens & set(desc_words)) * 2
+        # 3️⃣ Column name matches
         for col, _ in meta["columns"]:
+            col_l = col.lower()
+            if col_l in q:
+                score += 3
+            elif any(tok in col_l for tok in tokens):
+                score += 1
+        # 4️⃣ Weak semantic hints
+        semantic_map = {
+            "patient": ["patient", "patients"],
+            "visit": ["visit", "encounter"],
+            "medication": ["drug", "medicine"],
+            "admission": ["admit", "admission"],
+            "date": ["date", "year", "month"]
+        }
+        for key, words in semantic_map.items():
+            if any(w in q for w in words) and key in table_l:
+                score += 2
+        if score > 0:
+            matched.append((table, score))
+    # Sort by relevance
+    matched.sort(key=lambda x: x[1], reverse=True)
+    # Return top N tables
+    return [t[0] for t in matched[:max_tables]]
         "• Admissions by year\n\n"
         "Just tell me what you want to explore "
     )
 def is_question_supported(question):
     q = question.lower()
+    tokens = set(q.replace("?", "").replace(",", "").split())
+    # 1️⃣ Allow analytical intent even if table not mentioned
+    analytic_keywords = {
+        "count", "total", "average", "avg", "sum",
+        "how many", "number of", "trend", "trendline",
+        "increase", "decrease", "compare"
+    }
+    if any(k in q for k in analytic_keywords):
         return True
+    # 2️⃣ Schema-based scoring
     schema = load_ai_schema()
+    score = 0
+    for table, meta in schema.items():
+        table_l = table.lower()
+        # Table name match
+        if table_l in q:
+            score += 3
+        # Column name match
+        for col, _ in meta["columns"]:
+            col_l = col.lower()
+            if col_l in q:
+                score += 2
+            elif any(tok in col_l for tok in tokens):
+                score += 1
+        # Description match
+        if meta.get("description"):
+            desc_tokens = meta["description"].lower().split()
+            score += len(tokens & set(desc_tokens))
+    # 3️⃣ Threshold — prevents random questions
+    return score >= 2
 # =========================