Spaces:

bhavika24
/

Text_to_sql

Sleeping

App Files Files Community

bhavika24 commited on Jan 16

Commit

3a7a8cd

verified ·

1 Parent(s): cf60b47

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
Dockerfile +11 -20
UI.py +69 -0
engine.py +236 -0
hospital.db +3 -0
requirements.txt +3 -3

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hospital.db filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,20 +1,11 @@
-FROM python:3.13.5-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.10-slim
+WORKDIR /app
+COPY . /app
+RUN pip install --no-cache-dir -r requirements.txt
+EXPOSE 8501
+CMD ["streamlit", "run", "ui.py", "--server.port=8501", "--server.address=0.0.0.0"]

UI.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+from engine import process_question
+st.set_page_config(page_title="Hospital AI Assistant", layout="wide")
+st.title("🏥 Hospital AI Assistant")
+st.caption("Ask questions about patients, conditions, visits, medications, labs")
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat history
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+# Chat input
+user_input = st.chat_input("Ask a question about hospital data...")
+if user_input:
+    # Show user message
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.markdown(user_input)
+    # Call AI engine directly
+    with st.spinner("Thinking..."):
+        try:
+            result = process_question(user_input)
+        except Exception as e:
+            result = {"status": "error", "message": str(e)}
+    # Build assistant reply
+    if result.get("status") == "ok":
+        reply = ""
+        # Time note (if any)
+        if result.get("note"):
+            reply += f"🕒 *{result['note']}*\n\n"
+        # Data table
+        if result.get("data"):
+            columns = result.get("columns", [])
+            data = result["data"]
+            table_md = "| " + " | ".join(columns) + " |\n"
+            table_md += "| " + " | ".join(["---"] * len(columns)) + " |\n"
+            for row in data[:10]:
+                table_md += "| " + " | ".join(str(x) for x in row) + " |\n"
+            reply += table_md
+        else:
+            reply += result.get("message", "No data found.")
+        # SQL toggle
+        reply += "\n\n---\n"
+        reply += "<details><summary><b>Generated SQL</b></summary>\n\n"
+        reply += f"```sql\n{result['sql']}\n```"
+        reply += "\n</details>"
+    else:
+        reply = f"❌ {result.get('message', 'Something went wrong')}"
+    # Show assistant message
+    st.session_state.messages.append({"role": "assistant", "content": reply})
+    with st.chat_message("assistant"):
+        st.markdown(reply)

engine.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import os
+import sqlite3
+from openai import OpenAI
+# =========================
+# Setup
+# =========================
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+conn = sqlite3.connect("hospital.db", check_same_thread=False)
+# =========================
+# Metadata Loader
+# =========================
+def load_ai_schema():
+    cur = conn.cursor()
+    schema = {}
+    tables = cur.execute("""
+        SELECT table_name, description
+        FROM ai_tables
+        WHERE ai_enabled = 1
+    """).fetchall()
+    for table_name, desc in tables:
+        cols = cur.execute("""
+            SELECT column_name, description
+            FROM ai_columns
+            WHERE table_name = ? AND ai_allowed = 1
+        """, (table_name,)).fetchall()
+        schema[table_name] = {
+            "description": desc,
+            "columns": cols
+        }
+    return schema
+# =========================
+# Prompt Builder
+# =========================
+def build_prompt(question: str) -> str:
+    schema = load_ai_schema()
+    prompt = """
+You are a hospital data assistant.
+Rules:
+- Generate only SELECT SQL queries.
+- Use only the tables and columns provided.
+- Do not invent tables or columns.
+- This database is SQLite. Use SQLite-compatible date functions.
+- For recent days use: date('now', '-N day')
+- Use case-insensitive matching for text fields.
+- Prefer LIKE with wildcards for medical condition names.
+- Use COUNT, AVG, MIN, MAX, GROUP BY when the question asks for totals, averages, or comparisons.
+- If the question cannot be answered using the schema, return NOT_ANSWERABLE.
+- Do not explain the query.
+- Return only SQL or NOT_ANSWERABLE.
+Available schema:
+"""
+    for table, meta in schema.items():
+        prompt += f"\nTable: {table} - {meta['description']}\n"
+        for col, desc in meta["columns"]:
+            prompt += f"  - {col}: {desc}\n"
+    prompt += f"\nUser question: {question}\n"
+    return prompt
+# =========================
+# LLM Call
+# =========================
+def call_llm(prompt: str) -> str:
+    response = client.chat.completions.create(
+        model="gpt-4.1-mini",
+        messages=[
+            {"role": "system", "content": "You are a SQL generator. Return only SQL. No explanation."},
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0.0
+    )
+    return response.choices[0].message.content.strip()
+# =========================
+# SQL Generation
+# =========================
+def generate_sql(question: str) -> str:
+    prompt = build_prompt(question)
+    sql = call_llm(prompt)
+    return sql.strip()
+# =========================
+# SQL Cleaning & Validation
+# =========================
+def clean_sql(sql: str) -> str:
+    sql = sql.strip()
+    # Remove markdown code fences if present
+    if sql.startswith("```"):
+        parts = sql.split("```")
+        if len(parts) > 1:
+            sql = parts[1]
+    sql = sql.replace("sql\n", "").strip()
+    return sql
+def validate_sql(sql: str) -> str:
+    sql = clean_sql(sql)
+    s = sql.lower()
+    forbidden = ["insert", "update", "delete", "drop", "alter", "truncate"]
+    if not s.startswith("select"):
+        raise Exception("Only SELECT queries allowed")
+    if any(f in s for f in forbidden):
+        raise Exception("Forbidden SQL operation detected")
+    return sql
+# =========================
+# Query Runner
+# =========================
+def run_query(sql: str):
+    cur = conn.cursor()
+    result = cur.execute(sql).fetchall()
+    columns = [desc[0] for desc in cur.description]
+    return columns, result
+# =========================
+# Guardrails
+# =========================
+def is_question_answerable(question):
+    schema = load_ai_schema()
+    schema_text = " ".join(schema.keys()).lower()
+    keywords = ["patient", "encounter", "condition", "observation", "medication", "visit", "diagnosis", "lab", "vital"]
+    q = question.lower()
+    # If none of the core domain keywords are present, likely out of scope
+    if not any(k in q for k in keywords):
+        return False
+    return True
+# =========================
+# Time Awareness
+# =========================
+def get_latest_data_date():
+    sql = "SELECT MAX(start_date) FROM encounters;"
+    _, rows = run_query(sql)
+    return rows[0][0]
+def check_time_relevance(question: str):
+    q = question.lower()
+    if any(word in q for word in ["last", "recent", "today", "this month", "this year"]):
+        latest = get_latest_data_date()
+        return f"Note: Latest available data is from {latest}."
+    return None
+# =========================
+# Empty Result Interpreter
+# =========================
+def interpret_empty_result(question: str):
+    latest = get_latest_data_date()
+    return f"No results found. Available data is up to {latest}."
+# =========================
+# ORCHESTRATOR (Single Entry Point)
+# =========================
+def process_question(question: str):
+    # 1. Guardrail
+    if not is_question_answerable(question):
+        return {
+            "status": "rejected",
+            "message": "This question is not supported by the available data."
+        }
+    # 2. Time relevance
+    time_note = check_time_relevance(question)
+    # 3. Generate SQL
+    sql = generate_sql(question)
+    # 4. Validate SQL
+    sql = validate_sql(sql)
+    # 5. Execute query
+    columns, rows = run_query(sql)
+    # 6. Handle empty result
+    if len(rows) == 0:
+        return {
+            "status": "ok",
+            "sql": sql,
+            "message": interpret_empty_result(question),
+            "data": [],
+            "note": time_note
+        }
+    # 7. Normal response
+    return {
+        "status": "ok",
+        "sql": sql,
+        "columns": columns,
+        "data": rows[:50],  # demo safety limit
+        "note": time_note
+    }

hospital.db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d70473d08ef49bcb62c9c1edbcdb824014bd102e5235631167fb28b0d5732ad5
+size 40407040

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-altair
-pandas
-streamlit

+streamlit
+openai
+pandas