Spaces:

bhavika24
/

Text_to_sql

Sleeping

App Files Files Community

bhavika24 commited on Jan 21

Commit

28620f4

verified ·

1 Parent(s): 7b117b4

Upload engine.py

Browse files

Files changed (1) hide show

engine.py +68 -27

engine.py CHANGED Viewed

@@ -80,8 +80,19 @@ from functools import lru_cache
 @lru_cache(maxsize=1)
 def load_ai_schema():
-    with open("hospital_metadata.json", "r") as f:
-        return json.load(f)
 # =========================
 # TABLE MATCHING (CORE LOGIC)
@@ -94,20 +105,33 @@ def extract_relevant_tables(question, max_tables=4):
     matched = []
-    # Lightweight intent hints (NO hard dependency)
-    DOMAIN_HINTS = {
-        "consultant": ["encounters"],
-        "doctor": ["encounters"],
-        "visit": ["encounters"],
-        "visited": ["encounters"],  # Handle past tense
-        "visits": ["encounters"],   # Handle plural
-        "appointment": ["encounters"],
-        "patient": ["patients"],
-        "medication": ["medications"],
-        "drug": ["medications"],
-        "condition": ["conditions"],
-        "diagnosis": ["conditions"]
     }
     # Early exit threshold - if we find a perfect match, we can stop early
     VERY_HIGH_SCORE = 10
@@ -125,7 +149,7 @@ def extract_relevant_tables(question, max_tables=4):
                 continue
         # 2️⃣ Column relevance
-        for col, _ in meta["columns"]:
             col_l = col.lower()
             if col_l in q:
                 score += 3
@@ -173,7 +197,7 @@ def describe_schema(max_tables=10):
     for table, meta in shown_tables:
         response += f"• **{table.capitalize()}** — {meta['description']}\n"
         # Show only first 5 columns per table
-        for col, desc in list(meta["columns"])[:5]:
             response += f"  - {col}: {desc}\n"
         if len(meta["columns"]) > 5:
             response += f"  ... and {len(meta['columns']) - 5} more columns\n"
@@ -198,12 +222,30 @@ def describe_schema(max_tables=10):
 # =========================
 def get_latest_data_date():
-    try:
-        return conn.execute(
-            "SELECT MAX(admittime) FROM admissions"
-        ).fetchone()[0]
-    except:
-        return None
 def normalize_time_question(q):
@@ -249,7 +291,7 @@ def is_question_supported(question):
             score += 3
         # Column name match
-        for col, _ in meta["columns"]:
             col_l = col.lower()
             if col_l in q:
                 score += 2
@@ -295,8 +337,7 @@ Rules:
 - Use only listed tables/columns
 - Return ONLY SQL or NOT_ANSWERABLE
-IMPORTANT: If the question mentions "visit", "visited", or "visits", use the table name "encounters" (NOT "visits" or "visit").
-If the question mentions "consultant" or "doctor", use the table name "encounters".
 """
     for table, meta in schema.items():
@@ -447,7 +488,7 @@ def build_table_summary(table_name):
     except sqlite3.Error as e:
         return f"Error querying table {table_name}: {str(e)}"
-    columns = schema[table_name]["columns"]  # [(col_name, description), ...]
     summary = f"Here's a summary of **{table_name}**:\n\n"
     summary += f"• Total records: {total}\n"

 @lru_cache(maxsize=1)
 def load_ai_schema():
+    """Load schema from metadata JSON file with error handling."""
+    try:
+        with open("hospital_metadata.json", "r") as f:
+            schema = json.load(f)
+            if not isinstance(schema, dict):
+                raise ValueError("Invalid metadata format: expected a dictionary")
+            return schema
+    except FileNotFoundError:
+        raise FileNotFoundError("hospital_metadata.json file not found. Please create it with your table metadata.")
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in hospital_metadata.json: {str(e)}")
+    except Exception as e:
+        raise ValueError(f"Error loading metadata: {str(e)}")
 # =========================
 # TABLE MATCHING (CORE LOGIC)
     matched = []
+    # Lightweight intent hints - dynamically filter to only include tables that exist
+    # Map natural language terms to potential table names (check against schema)
+    all_tables = list(schema.keys())
+    table_names_lower = [t.lower() for t in all_tables]
+    DOMAIN_HINTS = {}
+    # Build hints only for tables that actually exist
+    hint_mappings = {
+        "consultant": ["encounter", "encounters", "visit", "visits"],
+        "doctor": ["encounter", "encounters", "provider", "providers"],
+        "visit": ["encounter", "encounters", "visit", "visits"],
+        "visited": ["encounter", "encounters", "visit", "visits"],
+        "visits": ["encounter", "encounters", "visit", "visits"],
+        "appointment": ["encounter", "encounters", "appointment", "appointments"],
+        "patient": ["patient", "patients"],
+        "medication": ["medication", "medications", "drug", "drugs"],
+        "drug": ["medication", "medications", "drug", "drugs"],
+        "condition": ["condition", "conditions", "diagnosis", "diagnoses"],
+        "diagnosis": ["condition", "conditions", "diagnosis", "diagnoses"]
     }
+    # Only include hints for tables that exist in the schema
+    for intent, possible_tables in hint_mappings.items():
+        matching_tables = [t for t in possible_tables if t in table_names_lower]
+        if matching_tables:
+            DOMAIN_HINTS[intent] = matching_tables
     # Early exit threshold - if we find a perfect match, we can stop early
     VERY_HIGH_SCORE = 10
                 continue
         # 2️⃣ Column relevance
+        for col, desc in meta["columns"].items():
             col_l = col.lower()
             if col_l in q:
                 score += 3
     for table, meta in shown_tables:
         response += f"• **{table.capitalize()}** — {meta['description']}\n"
         # Show only first 5 columns per table
+        for col, desc in list(meta["columns"].items())[:5]:
             response += f"  - {col}: {desc}\n"
         if len(meta["columns"]) > 5:
             response += f"  ... and {len(meta['columns']) - 5} more columns\n"
 # =========================
 def get_latest_data_date():
+    """Get the latest data date by checking tables with date columns."""
+    schema = load_ai_schema()
+    # Common date column names to check
+    date_columns = ["date", "start_date", "end_date", "admission_date", "admittime", "dischtime", "created_at", "updated_at"]
+    # Try to find a table with a date column
+    for table_name in schema.keys():
+        columns = schema[table_name].get("columns", {})
+        # Check if table has any date-like column
+        for col_name in columns.keys():
+            col_lower = col_name.lower()
+            if any(date_col in col_lower for date_col in date_columns):
+                try:
+                    result = conn.execute(
+                        f"SELECT MAX({col_name}) FROM {table_name}"
+                    ).fetchone()
+                    if result and result[0]:
+                        return result[0]
+                except (sqlite3.Error, sqlite3.OperationalError):
+                    continue  # Try next table/column
+    return None
 def normalize_time_question(q):
             score += 3
         # Column name match
+        for col, desc in meta["columns"].items():
             col_l = col.lower()
             if col_l in q:
                 score += 2
 - Use only listed tables/columns
 - Return ONLY SQL or NOT_ANSWERABLE
+IMPORTANT: Use EXACTLY the table names provided in the list below. Do not pluralize, modify, or guess table names.
 """
     for table, meta in schema.items():
     except sqlite3.Error as e:
         return f"Error querying table {table_name}: {str(e)}"
+    columns = schema[table_name]["columns"]  # {col_name: description, ...}
     summary = f"Here's a summary of **{table_name}**:\n\n"
     summary += f"• Total records: {total}\n"