Spaces:

Nishauri
/

ClinicianAssistant

Sleeping

App Files Files Community

JDFPalladium commited on Jul 1, 2025

Commit

97facdb

1 Parent(s): 7023043

adding sql pull from start

Browse files

Files changed (5) hide show

chatlib/patient_all_data.py +162 -0
chatlib/state_types.py +0 -13
iit_test.sqlite +0 -0
main.py +14 -21
patient_demonstration.sqlite +0 -0

chatlib/patient_all_data.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import sqlite3
+import pandas as pd
+from langchain_openai import ChatOpenAI
+llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
+from .state_types import AppState
+# Define the SQL query tool
+def sql_chain(state: AppState) -> AppState:
+    """
+    Annotated function that takes a patient identifer (pk_hash) and returns
+    all data related to that patient from the SQL database.
+    It writes an SQL query to retrieve relevant data, executes the query,
+    and generates a natural language answer based on the query results.
+    Returns the final answer as a string.
+    The function uses the QuerySQLDatabaseTool to handle the SQL operations.
+    The state should contain the following fields:
+    - question: str - the question seeking information on patient data
+    - pk_hash: str - the patient identifier to query the database
+    - rag_result: str - context information from the guidelines retrieval
+    The function will update the state with the answer to the question.
+    The answer will be generated based on the SQL query results and the context information.
+    The function will return the updated state with the answer.
+    """
+    pk_hash = state.get("pk_hash")
+    if not pk_hash:
+        raise ValueError("pk_hash is required in state for SQL queries.")
+    conn = sqlite3.connect('data/patient_demonstration.sqlite')
+    cursor = conn.cursor()
+    # Write the SQL query using the QuerySQLDatabaseTool
+    cursor.execute("SELECT * FROM clinical_visits WHERE PatientPKHash = :pk_hash", {"pk_hash": pk_hash})
+    rows = cursor.fetchall()
+    visits_data = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
+    def summarize_visits(df):
+        if df.empty:
+            return "No clinical visit data available."
+        def safe(val):
+            if pd.isnull(val) or val in ("", "NULL"):
+                return 'missing'
+            return val
+        summaries = []
+        for _, row in df.sort_values("VisitDate", ascending=False).head(5).iterrows():
+            summaries.append(f"- {row['VisitDate']}: WHO Stage {safe(row['WHOStage'])}, Weight {safe(row['Weight'])}kg, "
+                             f"NextAppointmentDate {safe(row['NextAppointmentDate'])}, VisityType {safe(row['VisitType'])}, "
+                             f"VisitBy {safe(row['VisitBy'])}, Pregnant {safe(row['Pregnant'])}, Breastfeeding {safe(row['Breastfeeding'])}, "
+                             f"WHOStage {safe(row['WHOStage'])}, StabilityAssessment {safe(row['StabilityAssessment'])}, "
+                             f"DifferentiatedCare {safe(row['DifferentiatedCare'])}, WHOStagingOI {safe(row['WHOStagingOI'])}, "
+                             f"Height {safe(row['Height'])}cm, Adherence {safe(row['Adherence'])}, BP {safe(row['BP'])}, "
+                             f"OI {safe(row['OI'])}, CurrentRegimen {safe(row['CurrentRegimen'])}"
+            )
+        return "\n".join(summaries)
+    visits_summary = summarize_visits(visits_data)
+    print(visits_summary)
+    cursor.execute("SELECT * FROM pharmacy WHERE PatientPKHash = :pk_hash", {"pk_hash": pk_hash})
+    rows = cursor.fetchall()
+    pharmacy_data = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
+    def summarize_pharmacy(df):
+        if df.empty:
+            return "No pharmacy data available."
+        def safe(val):
+            if pd.isnull(val) or val in ("", "NULL"):
+                return 'missing'
+            return val
+        summaries = []
+        for _, row in df.sort_values("DispenseDate", ascending=False).head(5).iterrows():
+            summaries.append(f"- {row['DispenseDate']}: ExpectedReturn {safe(row['ExpectedReturn'])}, Drug {safe(row['Drug'])}, "
+                             f"Duration {safe(row['Duration'])}, TreatmentType {safe(row['TreatmentType'])}, "
+                             f"RegimenLine {safe(row['RegimenLine'])}, "
+                             f"RegimenChangedSwitched {safe(row['RegimenChangedSwitched'])}, "
+                             f"RegimenChangeSwitchedReason {safe(row['RegimenChangeSwitchedReason'])}, "
+            )
+        return "\n".join(summaries)
+    pharmacy_summary = summarize_pharmacy(pharmacy_data)
+    print(pharmacy_summary)
+    cursor.execute("SELECT * FROM lab WHERE PatientPKHash = :pk_hash", {"pk_hash": pk_hash})
+    rows = cursor.fetchall()
+    lab_data = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
+    def summarize_lab(df):
+        if df.empty:
+            return "No lab data available."
+        def safe(val):
+            if pd.isnull(val) or val in ("", "NULL"):
+                return 'missing'
+            return val
+        summaries = []
+        for _, row in df.sort_values("OrderedbyDate", ascending=False).head(5).iterrows():
+            summaries.append(f"- {row['OrderedbyDate']}: TestName {safe(row['TestName'])}, TestResult {safe(row['TestResult'])},"
+            )
+        return "\n".join(summaries)
+    lab_summary = summarize_lab(lab_data)
+    print(lab_summary)
+    cursor.execute("SELECT * FROM demographics WHERE PatientPKHash = :pk_hash", {"pk_hash": pk_hash})
+    rows = cursor.fetchall()
+    demographic_data = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
+    def summarize_demographics(df):
+        if df.empty:
+            return "No demographic data available."
+        def safe(val):
+            if pd.isnull(val) or val in ("", "NULL"):
+                return 'missing'
+            return val
+        row = df.iloc[0]
+        summary = (
+            f"Sex: {safe(row['Sex'].values[0])}\n"
+            f"MaritalStatus: {safe(row['MaritalStatus'].values[0])}\n"
+            f"EducationLevel: {safe(row['EducationLevel'].values[0])}\n"
+            f"Occupation: {safe(row['Occupation'].values[0])}\n"
+            f"OnIPT: {safe(row['OnIPT'].values[0])}\n"
+            f"ARTOutcomeDescription: {safe(row['ARTOutcomeDescription'].values[0])}\n"
+            f"StartARTDate: {safe(row['StartARTDate'].values[0])}\n"
+            f"Date Of Birth: {safe(row['DOB'].values[0])}"
+        )
+        return summary
+    demographic_summary = summarize_demographics(demographic_data)
+    print(demographic_summary)
+    # cursor.execute("SELECT * FROM data_dictionary")
+    # rows = cursor.fetchall()
+    # data_dictionary = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
+    conn.close()
+    prompt = (
+        "Given the following user question, contextual clinical guidance, "
+        "patient clinical data, patient lab data, patient pharmacy data, "
+        "patient demographic data, answer the user question. "
+        "Try to answer based on the provided data."
+        "If there is essential patient information missing that you need in order to answer, "
+        "do not provide an answer and instead explain what information is missing. \n\n"
+        f'Question: {state["question"]}\n'
+        f'Context: {state.get("rag_result", "No guidelines provided.")}\n'
+        f'Patient Clinical Visits: {visits_summary}\n'
+        f'Patient Pharmacy Data: {pharmacy_summary}\n'
+        f'Patient Lab Data: {lab_summary}\n'
+        f'Patient Demographic Data: {demographic_summary}\n'
+        # f'Data Dictionary: {data_dictionary}\n'
+    )
+    response = llm.invoke(prompt)
+    state["answer"] = response.content
+    return state

chatlib/state_types.py CHANGED Viewed

@@ -27,18 +27,5 @@ class AppState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
     question: str
     rag_result: str
-    query: str
-    result: str
     answer: str
     pk_hash: str
-# initialize state with patient pk hash
-# input_state:State = {
-#     "messages": [HumanMessage(content="was this person typically late or on time to their visits?")],
-#     "question": "",
-#     "rag_result": "",
-#     "query": "",
-#     "result": "",
-#     "answer": "",
-#     "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73"
-# }

     messages: Annotated[list[AnyMessage], add_messages]
     question: str
     rag_result: str
     answer: str
     pk_hash: str

iit_test.sqlite ADDED Viewed

File without changes

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langgraph.graph import START, StateGraph
 from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.prebuilt import tools_condition, ToolNode
 from langgraph.checkpoint.memory import MemorySaver
 memory = MemorySaver()
 load_dotenv("config.env")
@@ -13,7 +14,7 @@ os.environ.get("LANGSMITH_API_KEY")
 from chatlib.state_types import AppState
 from chatlib.guidlines_rag_agent_li import rag_retrieve
-from chatlib.patient_sql_agent import sql_chain
 # from langchain_ollama.chat_models import ChatOllama
 # llm = ChatOllama(model="mistral:latest", temperature=0)
@@ -27,25 +28,19 @@ sys_msg = SystemMessage(content="""
                         You are a helpful assistant tasked with helping clinicians
                         meeting with patients. You have two tools available,
                         rag_retrieve to access information from HIV clinical guidelines,
-                        and sql_chain to access patient data.
-                        In most cases, you should use both tools to answer a question.
-                        In these cases, first call rag_retrieve to get the relevant information,
-                        then call sql_chain to get the patient data, and finally combine the results
-                        to provide a complete answer. For example, if the question is about whether
-                        a patient is on the correct treatment, first retrieve the treatment guidelines
-                        using rag_retrieve, then check the patient's treatment history using sql_chain.
-                        Another example is if the question is about when they should have their next viral load test,
-                        first retrieve the guidelines for viral load testing using rag_retrieve,
-                        then check the patient's last viral load test date and result using sql_chain.
                         You must respond only with a JSON object specifying the tool to call and its arguments.
-                        Do not generate any SQL queries, results or answers yourself. Only the sql_chain
-                        tool should do that.
-                        When calling a tool, provide only the necessary fields required for that tool to run.
-                        Do not include the full state or raw query results in the tool call arguments.
-                        For example, include the question and pk_hash, but exclude the query or result.
                         """
                         )
@@ -90,15 +85,13 @@ builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
 # Specify a thread
-config = {"configurable": {"thread_id": "25"}}
 # initialize state with patient pk hash
 input_state:AppState = {
-    "messages": [HumanMessage(content="my patient is complaining about feeling headaches. should i consider switching their regimen?")],
     "question": "",
     "rag_result": "",
-    "query": "",
-    "result": "",
     "answer": "",
     "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73"
 }

 from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.prebuilt import tools_condition, ToolNode
 from langgraph.checkpoint.memory import MemorySaver
 memory = MemorySaver()
 load_dotenv("config.env")
 from chatlib.state_types import AppState
 from chatlib.guidlines_rag_agent_li import rag_retrieve
+from chatlib.patient_all_data import sql_chain
 # from langchain_ollama.chat_models import ChatOllama
 # llm = ChatOllama(model="mistral:latest", temperature=0)
                         You are a helpful assistant tasked with helping clinicians
                         meeting with patients. You have two tools available,
                         rag_retrieve to access information from HIV clinical guidelines,
+                        and sql_chain to access patient data. When a clinican asks a question about a patient,
+                        you should first run rag_retrieve to get contextual information from the guidelines,
+                        then use sql_chain to query the patient's data from the SQL database.
                         You must respond only with a JSON object specifying the tool to call and its arguments.
+                        Keep your responses concise and focused on the task at hand. Remember, you are
+                        talking to a clinician who needs quick and accurate information about their patient.
+                        Do not tell them to consult a healthcare professional - they are the healthcare professional.
+                        If the clinican questions is not clear, ask for clarification or more information.
+                        If the clinican asks a question that is not related to the patient, then use the rag_retrieve tool
+                        to provide general information about HIV clinical guidelines.
                         """
                         )
 react_graph = builder.compile(checkpointer=memory)
 # Specify a thread
+config = {"configurable": {"thread_id": "30"}}
 # initialize state with patient pk hash
 input_state:AppState = {
+    "messages": [HumanMessage(content="the patient is 30 and is not pregnant or breastfeeding?")],
     "question": "",
     "rag_result": "",
     "answer": "",
     "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73"
 }

patient_demonstration.sqlite ADDED Viewed

File without changes