Spaces:

Nishauri
/

ClinicianAssistant

Sleeping

App Files Files Community

JDFPalladium commited on Jun 24, 2025

Commit

5486ae5

1 Parent(s): 6e5b890

adding main.py with multiple tools to invoke

Browse files

Files changed (6) hide show

README.md +6 -1
chat.ipynb +196 -15
chatlib/patient_sql_agent.py +45 -9
chatlib/state_types.py +2 -1
main.py +40 -7
sql_agent.ipynb +34 -22

README.md CHANGED Viewed

	@@ -1 +1,6 @@
1	- # clinician-assistant-lg

+# clinician-assistant-lg
+curl -fsSL https://ollama.com/install.sh | sh
+ollama pull llama3.1:8b
+ollama serve
+ollama run llama3

chat.ipynb CHANGED Viewed

@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "73bd3df7",
    "metadata": {},
    "outputs": [
@@ -56,13 +56,15 @@
     "    return \"RAG search results for: \" + retrieved_text\n",
     "\n",
     "tools = [rag_retrieve]\n",
-    "llm_lc = ChatOpenAI(temperature = 0.0, model=\"gpt-4o\")\n",
     "llm_with_tools = llm_lc.bind_tools([rag_retrieve])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "2cb76d17",
    "metadata": {},
    "outputs": [],
@@ -84,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "id": "e561b005",
    "metadata": {},
    "outputs": [
@@ -131,27 +133,206 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "01fd23c5",
    "metadata": {},
    "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'HumanMessage' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;66;03m# Specify a thread\u001b[39;00m\n\u001b[32m      2\u001b[39m config = {\u001b[33m\"\u001b[39m\u001b[33mconfigurable\u001b[39m\u001b[33m\"\u001b[39m: {\u001b[33m\"\u001b[39m\u001b[33mthread_id\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33m1\u001b[39m\u001b[33m\"\u001b[39m}}\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m messages = [\u001b[43mHumanMessage\u001b[49m(content=\u001b[33m\"\u001b[39m\u001b[33mwhat if the patient is pregnant and just found out they have HIV?\u001b[39m\u001b[33m\"\u001b[39m)]\n\u001b[32m      5\u001b[39m messages = react_graph.invoke({\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m: messages}, config)\n\u001b[32m      6\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m m \u001b[38;5;129;01min\u001b[39;00m messages[\u001b[33m'\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m'\u001b[39m]:\n",
-      "\u001b[31mNameError\u001b[39m: name 'HumanMessage' is not defined"
      ]
     }
    ],
    "source": [
     "# Specify a thread\n",
-    "config = {\"configurable\": {\"thread_id\": \"1\"}}\n",
     "\n",
-    "messages = [HumanMessage(content=\"what if the patient is pregnant and just found out they have HIV?\")]\n",
     "messages = react_graph.invoke({\"messages\": messages}, config)\n",
     "for m in messages['messages']:\n",
     "    m.pretty_print()"

   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "73bd3df7",
    "metadata": {},
    "outputs": [
     "    return \"RAG search results for: \" + retrieved_text\n",
     "\n",
     "tools = [rag_retrieve]\n",
+    "# llm_lc = ChatOpenAI(temperature = 0.0, model=\"gpt-4o\")\n",
+    "from langchain_ollama import ChatOllama\n",
+    "llm_lc = ChatOllama(model=\"llama3.2:1b\")\n",
     "llm_with_tools = llm_lc.bind_tools([rag_retrieve])"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "2cb76d17",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "e561b005",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "01fd23c5",
    "metadata": {},
    "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "\n",
+      "when should viral loads be taken?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  rag_retrieve (d52d2a7a-e00d-4cb7-afff-4183e6859985)\n",
+      " Call ID: d52d2a7a-e00d-4cb7-afff-4183e6859985\n",
+      "  Args:\n",
+      "    user_prompt: When should viral loads be taken?\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: rag_retrieve\n",
+      "\n",
+      "RAG search results for: Source 1: discontinued  \n",
+      "HIV Viral Load   ● For PCR positive HEIs: baseline at the time of ART initiation  \n",
+      "● Age 0 -24 years: at month 3, then every 6 months  \n",
+      "● Age ≥ 25 years: at month 3, then month 12, then annually thereafter if \n",
+      "suppressed  \n",
+      "● For all: before any drug substitution for patients on ART for a t least 6 \n",
+      "months with no valid VL, at month 3 after regimen modification, and \n",
+      "then as per population group  \n",
+      "● Any patient with a detectable VL during routine monitoring, follow viral \n",
+      "load monitoring algorithm (Figure 6.6)  \n",
+      "HIV Viral Load \n",
+      "(pregnant/ \n",
+      "breastfeeding)   ● If on ART at time of confirming pregnancy: VL done at confirmation of \n",
+      "pregnancy (regardless of when previously done), then every 6 months \n",
+      "until complete cessation of breastfeeding  \n",
+      "● If starting ART during pregnancy or breastfeeding, VL at 3 months after \n",
+      "initiation, and then every 6 months until complete cessation of \n",
+      "breastfeeding\n",
+      "\n",
+      "Source 2: Whenever possible, use same -day point -of-care methods for viral load \n",
+      "testing of pregnant and breastfeeding women to expedite the return of \n",
+      "results and clinical decision -making. If this is not available, viral load \n",
+      "specimens and results for pregnant and breastfeeding women should be \n",
+      "given priority across the laboratory referral process  (including specimen \n",
+      "collection, testing and return of results).  \n",
+      "● For pregnant and breastfeeding women newly initiated on ART, obtain VL 3 \n",
+      "months after initiation, and then every 6 months until complete cessation of \n",
+      "breastfeeding  \n",
+      "● For HIV positive women already on ART at the time of confirming pregnancy \n",
+      "or breastfeeding, obtain a VL irrespective of when prior VL was done, and \n",
+      "then every 6 months until complete cessation of breastfeeding  \n",
+      "● For pregnant or breastfeeding women with a VL ≥ 50 copies/ml: assess for \n",
+      "and address potential reasons for viremia, including intensifying adherence \n",
+      "support, repeat the VL after 3 months of excellent adherence, including \n",
+      "daily witnessed ingestion, where feasible and appropriate  \n",
+      "o If the repeat VL is 200 - 999 copies/ml consul t the Regional or National \n",
+      "HIV Clinical TWG  \n",
+      "o If the repeat VL is ≥ 1,000 copies/ml, change to an effective regimen. \n",
+      "Refer to Table 6.10  \n",
+      "o If the repeat VL is < 200 copies/ml (LDL) then continue routine \n",
+      "monitoring\n",
+      "\n",
+      "Source 3: Kenya HIV Prevention and Treatment Guidelines, 2022  \n",
+      "6 - 18 \n",
+      "Schedule for routine viral load testing1 \n",
+      "• Age 0 -24 years old: at month 3, then every 6 months \n",
+      "• Age      years old: at month 3, then month 12 and then annually \n",
+      "• Pregnant or breastfeeding: at confirmation of pregnancy (if already on ART) or 3 months after ART initiation (if ART initiate d during \n",
+      "pregnancy/breastfeeding), and then every 6 months until complete cessation of breastfeeding \n",
+      "• Before any drug substitution (if no VL result available from the prior 6 months) \n",
+      "• Three months after any regimen modification (including single -drug substitutions) \n",
+      "VL < 200 copies/ml\n",
+      " VL 200 – 999 copies/ml\n",
+      " VL         copies/ml\n",
+      "Increased risk of progression to \n",
+      "treatment failure\n",
+      "Suspected treatment \n",
+      "failure\n",
+      "• Discuss patient in MDT \n",
+      "• Assign a case manager \n",
+      "• Assess for and address likely causes of non -adherence2 \n",
+      "• Provide enhanced adherence support/intervention as appropriate (Section \n",
+      "5.4 of guidelines for enhanced adherence protocol) \n",
+      "• Assess for other causes of viremia and manage as needed3 \n",
+      "• Support daily witnessed ingestion by treatment buddy or healthcare worker\n",
+      "• After 3 months of excellent adherence, repeat VL \n",
+      "VL < 200 copies/ml (LDL)\n",
+      "VL 200 – 999 copies/ml\n",
+      "VL         copies/ml\n",
+      "• Continue ART regimen\n",
+      "• Routine adherence \n",
+      "counselling and monitoring\n",
+      "• Routine VL monitoring\n",
+      "• Reassess adherence and other \n",
+      "causes of viremia2,3\n",
+      "• Repeat VL after another 3 months of \n",
+      "excellent adherence\n",
+      "Confirms treatment failure: \n",
+      "• Begin treatment preparation for new regimen and \n",
+      "continue failing regimen until adherence \n",
+      "preparation completed \n",
+      "• Continue enhanced adherence support \n",
+      "• Take sample for CD4 count and assess for and \n",
+      "manage any OIs \n",
+      "• If failing a DTG or PI based regimen a DRT is \n",
+      "recommended in consult ation with the  regional or \n",
+      "National HIV Clinical TWG or call Uliza Hotline \n",
+      "       (0726 460 000)  \n",
+      "• Schedule clinical appointment at 2 weeks after\n",
+      "\n",
+      "Source 4: Annexes  \n",
+      " \n",
+      "13 - 9 Annex 8: Cont.  \n",
+      "Section 3: Viral load  \n",
+      "• What is viral load  \n",
+      "- Viral load is the amount of HIV in your body  \n",
+      "- When your viral load is high it means you have a lot of HIV in your body; this causes damage to \n",
+      "your body  \n",
+      "- Viral load is measured by a blood test  \n",
+      " \n",
+      "• How often is viral load measured  \n",
+      "- Viral load is measured after being on treatment for 3 months  \n",
+      "- After 3 months of treatment, we expect the amount of virus in your body to be undetectable; if \n",
+      "your VL is detectable then we have to discuss the reasons  \n",
+      "- Having an “undetectable” VL means the test cannot measure the virus in your blood because \n",
+      "your ART is working, but it does not mean you are no longer infected with HIV  \n",
+      "- Repeat viral load tests are done dependin g on how you are doing; if you are doing well on \n",
+      "treatment then the viral load is measured again every 6 months (for children/adolescents and \n",
+      "pregnant/breastfeeding) or annually  \n",
+      "- For HEI with positive PCR, we also measure viral load at the start of treatmen t \n",
+      " \n",
+      "• What do viral load measurements mean  \n",
+      "- After being on treatment for 3 or more months, your viral load should be undetectable  \n",
+      "- If your viral load is undetectable, it means your treatment is working well and you should \n",
+      "continue taking it the same; the virus is not damaging your body any more  \n",
+      "- If your viral load is detectable, it means your treatment is not working properly, usually because \n",
+      "you have been missing some of your pills; the virus is damaging your body and you and the clinic \n",
+      "team will need to work together to figure out how to fix the problem  \n",
+      "Section 4: CD4 cells  \n",
+      "• What are CD4 cells  \n",
+      "- CD4 cells are the immune cells that protect the body from infections  \n",
+      "- CD4 cells are measured through a blood test, called CD4 count. For adults a normal CD4 count is \n",
+      "above 500  \n",
+      " \n",
+      "• How are CD4 cells affected by HIV  \n",
+      "- HIV attacks and destroys CD4 cells\n",
+      "\n",
+      "Source 5: Kenya HIV Prevention and Treatment Guidelines, 2022  \n",
+      " \n",
+      " 13 - 20 Annex 9A: Cont.  \n",
+      "Session 3 (usually 2 weeks after Session 2, preferably with the same provider)  \n",
+      "Review Adherence Plan  \n",
+      "• Ask the patient if he/she thinks adherence has improved since the last visit. Enquire in a \n",
+      "friendly way if any doses have been missed  \n",
+      "• Review the patient’s barriers to adherence documented during the first session and if \n",
+      "strategies identified have been taken up. If not, discuss why  \n",
+      " \n",
+      "Identify Any New Issues  \n",
+      "• Discuss specific reasons why the patient may have missed their pills or a clinic appointment \n",
+      "since the last counselling session, and determine if it is a new issue that wasn’t addressed \n",
+      "during the first session  \n",
+      "• Discuss if other issues have come up because of implementing the adherence plan (e.g., perhaps \n",
+      "the disclosure process had unintended results)  \n",
+      " \n",
+      "Referrals and Networking  \n",
+      "• Follow -up on any referrals made during the previous session  \n",
+      "• Determine if the patient could benefit from a home visit  \n",
+      " \n",
+      "Develop Adherence Plan  \n",
+      "• Go through each of the adherence challenges identified during the session and assist the patient \n",
+      "to modify their original adherence plan to address each of the issues. It is important to let the \n",
+      "patient come up with the solutions so that they own them  \n",
+      "• Give another short motivati onal speech on how you believe in the patient! You know they \n",
+      "can do this! Together you will make sure that they suppress their viral load!!  \n",
+      "• Agree on a follow -up date for the next session  \n",
+      " \n",
+      "Repeat Viral Load  \n",
+      "• If the adherence is good: plan for the next VL testing after 3 months and explain possible ways \n",
+      "forward, emphasizing the roles of the patient, the support systems and the health facility. You \n",
+      "can continue follow -up adherence counselling sessions during the 3 -month period if you and \n",
+      "the patient think th ere would be a benefit to them  \n",
+      "“If your results come back and your VL is undetectable then you will be able to continue with same ART. \n",
+      "If your viral load is still greater than 1,000 copies/ml then you will need to switch to a new regimen, \n",
+      "probably after do ing some additional testing to see which regimen\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "Based on the original question \"When should viral loads be taken?\", I would answer:\n",
+      "\n",
+      "* For HEIs (HIV positive individuals) who are newly initiated on ART: Viral load should be taken at 3 months after initiation.\n",
+      "* If ART is started during pregnancy or breastfeeding, viral load testing should be done at 3 months after initiation, and then every 6 months until complete cessation of breastfeeding.\n",
+      "\n",
+      "These guidelines recommend taking viral loads at 3 months after ART initiation for newly initiated HEIs, as well as at 3 months if the ART starts during pregnancy or breastfeeding.\n"
      ]
     }
    ],
    "source": [
     "# Specify a thread\n",
+    "config = {\"configurable\": {\"thread_id\": \"100\"}}\n",
     "\n",
+    "messages = [HumanMessage(content=\"when should viral loads be taken?\")]\n",
     "messages = react_graph.invoke({\"messages\": messages}, config)\n",
     "for m in messages['messages']:\n",
     "    m.pretty_print()"

chatlib/patient_sql_agent.py CHANGED Viewed

@@ -4,8 +4,8 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 from typing_extensions import TypedDict, Annotated
 from .state_types import State
 db = SQLDatabase.from_uri("sqlite:///data/patient_demonstration.sqlite")
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
@@ -14,8 +14,9 @@ system_message = """
 Given an input question, create a syntactically correct {dialect} query to
 run to help find the answer. Unless the user specifies in his question a
 specific number of examples they wish to obtain, always limit your query to
-at most {top_k} results. You can order the results by a relevant column to
-return the most interesting examples in the database.
 Never query for all the columns from a specific table, only ask for a the
 few relevant columns given the question.
@@ -24,6 +25,31 @@ Pay attention to use only the column names that you can see in the schema
 description. Be careful to not query for columns that do not exist. Also,
 pay attention to which column is in which table.
 Only use the following tables:
 {table_info}
 """
@@ -40,7 +66,7 @@ class QueryOutput(TypedDict):
     query: Annotated[str, ..., "Syntactically valid SQL query."]
-def write_query(state: State) -> State:
     """Generate SQL query to fetch information."""
     prompt = query_prompt_template.invoke(
         {
@@ -48,32 +74,42 @@ def write_query(state: State) -> State:
             "top_k": 10,
             "table_info": db.get_table_info(),
             "input": state["question"],
         }
     )
     structured_llm = llm.with_structured_output(QueryOutput)
     result = structured_llm.invoke(prompt)
     return {**state, "query": result["query"]}
-def execute_query(state: State) -> State:
     """Execute SQL query."""
     execute_query_tool = QuerySQLDatabaseTool(db=db)
     return {**state, "result": execute_query_tool.invoke(state["query"])}
-def generate_answer(state: State) -> State:
-    """Answer question using retrieved information as context."""
     prompt = (
         "Given the following user question, corresponding SQL query, "
         "and SQL result, answer the user question.\n\n"
         f'Question: {state["question"]}\n'
         f'SQL Query: {state["query"]}\n'
-        f'SQL Result: {state["result"]}'
     )
     response = llm.invoke(prompt)
     return {**state, "answer": response.content}
 # now define a stateful tool that does the same thing
 @tool
-def sql_chain(state: State) -> State:
     """
     Annotated function that takes a question string seeking information on patient data
     from a SQL database, writes an SQL query to retrieve relevant data, executes the query,

 from langchain_core.tools import tool
 from langchain_openai import ChatOpenAI
 from typing_extensions import TypedDict, Annotated
 from .state_types import State
 db = SQLDatabase.from_uri("sqlite:///data/patient_demonstration.sqlite")
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 Given an input question, create a syntactically correct {dialect} query to
 run to help find the answer. Unless the user specifies in his question a
 specific number of examples they wish to obtain, always limit your query to
+at most {top_k} results. For questions about specific patients, filter the
+PatientPKHash column using exactly the provided value: {pk_hash}. If questions
+are about all patients or not about a specific patient, do not filter.
 Never query for all the columns from a specific table, only ask for a the
 few relevant columns given the question.
 description. Be careful to not query for columns that do not exist. Also,
 pay attention to which column is in which table.
+When checking if a patient was late for an appointment, for each visit, compare the NextAppointmentDate from the previous visit to the VisitDate of the current visit.
+Do not compare NextAppointmentDate to the VisitDate in the same row. Use SQL to find, for each patient, the next VisitDate after a given VisitDate, and compare it to the NextAppointmentDate from the previous visit.
+Here is an example of how to do this in SQL:
+SELECT
+v1.PatientPKHash,
+v1.VisitDate AS PreviousVisitDate,
+v1.NextAppointmentDate,
+v2.VisitDate AS NextVisitDate,
+CASE
+    WHEN v2.VisitDate <= v1.NextAppointmentDate THEN 'On time'
+    ELSE 'Late'
+END AS AttendanceStatus
+FROM clinical_visits v1
+JOIN clinical_visits v2
+ON v1.PatientPKHash = v2.PatientPKHash
+AND v2.VisitDate > v1.VisitDate
+WHERE NOT EXISTS (
+SELECT 1 FROM clinical_visits v3
+WHERE v3.PatientPKHash = v1.PatientPKHash
+    AND v3.VisitDate > v1.VisitDate
+    AND v3.VisitDate < v2.VisitDate
+)
+ORDER BY v1.PatientPKHash, v1.VisitDate;
 Only use the following tables:
 {table_info}
 """
     query: Annotated[str, ..., "Syntactically valid SQL query."]
+def write_query(state:State) -> State:
     """Generate SQL query to fetch information."""
     prompt = query_prompt_template.invoke(
         {
             "top_k": 10,
             "table_info": db.get_table_info(),
             "input": state["question"],
+            "pk_hash": state["pk_hash"]
         }
     )
     structured_llm = llm.with_structured_output(QueryOutput)
     result = structured_llm.invoke(prompt)
     return {**state, "query": result["query"]}
+def execute_query(state:State) -> State:
     """Execute SQL query."""
     execute_query_tool = QuerySQLDatabaseTool(db=db)
     return {**state, "result": execute_query_tool.invoke(state["query"])}
+def generate_answer(state:State) -> State:
+    """
+    Answer question using retrieved information as context.
+    For awareness, NextAppointmentDate is set during the VisitDate of the same entry.
+    To determine if the patient came on time to their next appointment, compare NextAppointmentDate
+    with the next recorded VisitDate. For example, if a patient has a VisitDate of
+    2023-01-01 and a NextAppointmentDate of 2023-01-15, check if the next VisitDate is on or before
+    2023-01-15 to determine if the patient came on time.
+    """
     prompt = (
         "Given the following user question, corresponding SQL query, "
         "and SQL result, answer the user question.\n\n"
         f'Question: {state["question"]}\n'
         f'SQL Query: {state["query"]}\n'
+        f'SQL Result: {state["result"]}'
     )
     response = llm.invoke(prompt)
     return {**state, "answer": response.content}
 # now define a stateful tool that does the same thing
 @tool
+def sql_chain(state:State) -> State:
     """
     Annotated function that takes a question string seeking information on patient data
     from a SQL database, writes an SQL query to retrieve relevant data, executes the query,

chatlib/state_types.py CHANGED Viewed

@@ -8,4 +8,5 @@ class State(TypedDict):
     rag_result: str
     query: str
     result: str
-    answer: str

     rag_result: str
     query: str
     result: str
+    answer: str
+    pk_hash: str

main.py CHANGED Viewed

@@ -25,15 +25,35 @@ sys_msg = SystemMessage(content="""
                         meeting with patients. You have two tools available,
                         one to access information from HIV clinical guidelines, the other is
                         a SQL tool to access patient data.
                         """
                         )
 # Assistant Node
-def assistant(state: MessagesState):
-   return {"messages": [llm_with_tools.invoke([sys_msg] + state["messages"])]}
 # Graph
-builder = StateGraph(MessagesState)
 # Define nodes: these do the work
 builder.add_node("assistant", assistant)
@@ -51,9 +71,22 @@ builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
 # Specify a thread
-config = {"configurable": {"thread_id": "13"}}
-messages = [HumanMessage(content="what is the proper course of treatment for someone with opportunistic infections?")]
-messages = react_graph.invoke({"messages": messages}, config)
-for m in messages['messages']:
     m.pretty_print()

                         meeting with patients. You have two tools available,
                         one to access information from HIV clinical guidelines, the other is
                         a SQL tool to access patient data.
+                        You must respond only with a JSON object specifying the tool to call and its arguments.
+                        Do not generate any SQL queries or answers yourself.
                         """
                         )
 # Assistant Node
+def assistant(state: State) -> State:
+    pk_hash = state.get("pk_hash", None)
+    if pk_hash:
+        pk_msg = SystemMessage(content=f"The patient identifier (pk_hash) is: {pk_hash}")
+        messages = [sys_msg, pk_msg] + state["messages"]
+    else:
+        messages = [sys_msg] + state["messages"]
+    # Get the LLM/tool response
+    new_message = llm_with_tools.invoke(messages)
+    # Extract the question from the latest HumanMessage, if present
+    latest_question = ""
+    for msg in reversed(messages):
+        if isinstance(msg, HumanMessage):
+            latest_question = msg.content
+            break
+    return {**state, "messages": state['messages'] + [new_message], "question": latest_question}
 # Graph
+builder = StateGraph(State)
 # Define nodes: these do the work
 builder.add_node("assistant", assistant)
 react_graph = builder.compile(checkpointer=memory)
 # Specify a thread
+memory.delete_thread("25")
+config = {"configurable": {"thread_id": "25", "user_id": "1"}}
+# initialize state with patient pk hash
+input_state:State = {
+    "messages": [HumanMessage(content="how many visits were recorded in 2024?")],
+    "question": "",
+    "rag_result": "",
+    "query": "",
+    "result": "",
+    "answer": "",
+    "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73"
+}
+# messages = [HumanMessage(content="how many appointments has this patient had?")]
+message_output = react_graph.invoke(input_state, config)
+for m in message_output['messages']:
     m.pretty_print()

sql_agent.ipynb CHANGED Viewed

@@ -18,7 +18,8 @@
     "    question: str\n",
     "    query: str\n",
     "    result: str\n",
-    "    answer: str"
    ]
   },
   {
@@ -36,12 +37,15 @@
     "os.environ.get(\"OPENAI_API_KEY\")\n",
     "\n",
     "db = SQLDatabase.from_uri(\"sqlite:///data/patient_demonstration.sqlite\")\n",
-    "llm = ChatOpenAI(temperature = 0.0, model=\"gpt-4o\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f9c96976",
    "metadata": {},
    "outputs": [
@@ -56,7 +60,8 @@
       "run to help find the answer. Unless the user specifies in his question a\n",
       "specific number of examples they wish to obtain, always limit your query to\n",
       "at most \u001b[33;1m\u001b[1;3m{top_k}\u001b[0m results. You can order the results by a relevant column to\n",
-      "return the most interesting examples in the database.\n",
       "\n",
       "Never query for all the columns from a specific table, only ask for a the\n",
       "few relevant columns given the question.\n",
@@ -82,7 +87,8 @@
     "run to help find the answer. Unless the user specifies in his question a\n",
     "specific number of examples they wish to obtain, always limit your query to\n",
     "at most {top_k} results. You can order the results by a relevant column to\n",
-    "return the most interesting examples in the database.\n",
     "\n",
     "Never query for all the columns from a specific table, only ask for a the\n",
     "few relevant columns given the question.\n",
@@ -101,13 +107,13 @@
     "    [(\"system\", system_message), (\"user\", user_prompt)]\n",
     ")\n",
     "\n",
-    "# for message in query_prompt_template.messages:\n",
-    "#     message.pretty_print()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "id": "fee4ebcb",
    "metadata": {},
    "outputs": [],
@@ -129,6 +135,7 @@
     "            \"top_k\": 10,\n",
     "            \"table_info\": db.get_table_info(),\n",
     "            \"input\": state[\"question\"],\n",
     "        }\n",
     "    )\n",
     "    structured_llm = llm.with_structured_output(QueryOutput)\n",
@@ -138,7 +145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "cfa94f19",
    "metadata": {},
    "outputs": [],
@@ -153,7 +160,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "id": "7f4e8039",
    "metadata": {},
    "outputs": [],
@@ -181,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "id": "fea8652c",
    "metadata": {},
    "outputs": [],
@@ -202,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "07429d93",
    "metadata": {},
    "outputs": [
@@ -210,12 +217,12 @@
      "data": {
       "text/plain": [
        "{'question': 'What proportion of all regimens is accounted for by the most common regimen?',\n",
-       " 'query': 'SELECT CurrentRegimen, COUNT(*) * 1.0 / (SELECT COUNT(*) FROM clinical_visits) AS proportion\\nFROM clinical_visits\\nGROUP BY CurrentRegimen\\nORDER BY COUNT(*) DESC\\nLIMIT 1;',\n",
-       " 'result': \"[('TDF/3TC/DTG', 0.6232196237031827)]\",\n",
-       " 'answer': 'The most common regimen is \"TDF/3TC/DTG,\" and it accounts for approximately 62.32% of all regimens.'}"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -227,7 +234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "id": "c51497e2",
    "metadata": {},
    "outputs": [],
@@ -254,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "id": "495a5e45",
    "metadata": {},
    "outputs": [],
@@ -268,6 +275,8 @@
     "                        You are a helpful assistant tasked with helping clinicians\n",
     "                        access information from patient records.\n",
     "                        Only call the SQL tool when the user asks questions about patient data. \n",
     "                        For greetings, thanks, or unrelated topics, respond directly without calling any tools.\n",
     "\n",
     "                        \"\"\"\n",
@@ -280,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "id": "3f17bccf",
    "metadata": {},
    "outputs": [
@@ -327,21 +336,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'assistant': {'messages': [AIMessage(content=\"You're welcome! If you have any more questions, feel free to ask.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 3327, 'total_tokens': 3343, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-BkBjCRm7HUlUQZV3ntGa2Sbpz8UPJ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--45e27132-7994-4010-91e7-cd18113d643c-0', usage_metadata={'input_tokens': 3327, 'output_tokens': 16, 'total_tokens': 3343, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}}\n"
      ]
     }
    ],
    "source": [
     "# Specify a thread\n",
-    "config = {\"configurable\": {\"thread_id\": \"4\"}}\n",
     "\n",
-    "user_prompt = \"thanks!?\"\n",
     "input_state = {\n",
     "    \"messages\": [HumanMessage(content=user_prompt)],\n",
     "    \"question\": user_prompt,\n",
     "    \"query\": \"\",\n",
     "    \"result\": \"\",\n",
     "    \"answer\": \"\",\n",
     "}\n",
     "# messages = react_graph.invoke(input_state, config)\n",
     "\n",

     "    question: str\n",
     "    query: str\n",
     "    result: str\n",
+    "    answer: str\n",
+    "    pk_hash: str"
    ]
   },
   {
     "os.environ.get(\"OPENAI_API_KEY\")\n",
     "\n",
     "db = SQLDatabase.from_uri(\"sqlite:///data/patient_demonstration.sqlite\")\n",
+    "llm = ChatOpenAI(temperature = 0.0, model=\"gpt-4o\")\n",
+    "\n",
+    "# from langchain_ollama import ChatOllama\n",
+    "# llm = ChatOllama(model=\"llama3.2:1b\")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "f9c96976",
    "metadata": {},
    "outputs": [
       "run to help find the answer. Unless the user specifies in his question a\n",
       "specific number of examples they wish to obtain, always limit your query to\n",
       "at most \u001b[33;1m\u001b[1;3m{top_k}\u001b[0m results. You can order the results by a relevant column to\n",
+      "return the most interesting examples in the database. For questions about specific patients, filter using the \n",
+      "PatientPKHash column and the provided value \u001b[33;1m\u001b[1;3m{pk_hash}\u001b[0m.\n",
       "\n",
       "Never query for all the columns from a specific table, only ask for a the\n",
       "few relevant columns given the question.\n",
     "run to help find the answer. Unless the user specifies in his question a\n",
     "specific number of examples they wish to obtain, always limit your query to\n",
     "at most {top_k} results. You can order the results by a relevant column to\n",
+    "return the most interesting examples in the database. For questions about specific patients, filter using the \n",
+    "PatientPKHash column and the provided value {pk_hash}.\n",
     "\n",
     "Never query for all the columns from a specific table, only ask for a the\n",
     "few relevant columns given the question.\n",
     "    [(\"system\", system_message), (\"user\", user_prompt)]\n",
     ")\n",
     "\n",
+    "for message in query_prompt_template.messages:\n",
+    "    message.pretty_print()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "fee4ebcb",
    "metadata": {},
    "outputs": [],
     "            \"top_k\": 10,\n",
     "            \"table_info\": db.get_table_info(),\n",
     "            \"input\": state[\"question\"],\n",
+    "            \"pk_hash\": state[\"pk_hash\"],\n",
     "        }\n",
     "    )\n",
     "    structured_llm = llm.with_structured_output(QueryOutput)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "cfa94f19",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "7f4e8039",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "fea8652c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "07429d93",
    "metadata": {},
    "outputs": [
      "data": {
       "text/plain": [
        "{'question': 'What proportion of all regimens is accounted for by the most common regimen?',\n",
+       " 'query': 'SELECT SUM(CASE WHEN CurrentRegimen = ( SELECT MIN(CurrentRegimen) FROM data_dictionary ) THEN 1 ELSE 0 END) / COUNT(DISTINCT VisitBy) FROM clinical_visits',\n",
+       " 'result': 'Error: (sqlite3.OperationalError) misuse of aggregate: MIN()\\n[SQL: SELECT SUM(CASE WHEN CurrentRegimen = ( SELECT MIN(CurrentRegimen) FROM data_dictionary ) THEN 1 ELSE 0 END) / COUNT(DISTINCT VisitBy) FROM clinical_visits]\\n(Background on this error at: https://sqlalche.me/e/20/e3q8)',\n",
+       " 'answer': \"The issue here is that you are trying to calculate the proportion of regimens by using the `MIN` function, which is not allowed in SQL. The `MIN` function returns a single value, but you need a count or sum to get an accurate result.\\n\\nTo fix this, you can use a subquery to find the minimum regimen and then divide the count of regimens by this minimum value. Here's how you can modify your query:\\n\\n```sql\\nSELECT \\n    SUM(CASE WHEN CurrentRegimen = ( SELECT MIN(CurrentRegimen) FROM data_dictionary ) THEN 1 ELSE 0 END) / COUNT(DISTINCT VisitBy)\\nFROM clinical_visits;\\n```\\n\\nThis will give you the proportion of all regimens that are accounted for by the most common regimen. \\n\\nNote: If there are multiple most common regimens, this query will return one result with the highest proportion value. If you want to get all results or handle ties in a specific way (e.g., average proportion), you would need a more complex query.\"}"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "c51497e2",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "495a5e45",
    "metadata": {},
    "outputs": [],
     "                        You are a helpful assistant tasked with helping clinicians\n",
     "                        access information from patient records.\n",
     "                        Only call the SQL tool when the user asks questions about patient data. \n",
+    "                        If the question is about a particular patinet, filter using the PatientPKHash column\n",
+    "                        and the provided value.\n",
     "                        For greetings, thanks, or unrelated topics, respond directly without calling any tools.\n",
     "\n",
     "                        \"\"\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "3f17bccf",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "{'assistant': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_cOYoWvk66qbVV8cLl3SYbRGe', 'function': {'arguments': '{\"state\":{\"messages\":[{\"content\":\"How many visits has the patient with PatientPKHash \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\' had?\",\"type\":\"human\"}],\"question\":\"How many visits has the patient with PatientPKHash \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\' had?\",\"query\":\"SELECT COUNT(*) FROM clinical_visits WHERE PatientPKHash = \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\';\",\"result\":\"\",\"answer\":\"\",\"pk_hash\":\"962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\"}}', 'name': 'sql_chain'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 219, 'prompt_tokens': 3445, 'total_tokens': 3664, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 3328}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Ble8i22AvKkpBDYLnESGEflGoWmZx', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--5f0b7cec-37ad-46a0-8d1d-f77a54adc4d8-0', tool_calls=[{'name': 'sql_chain', 'args': {'state': {'messages': [{'content': \"How many visits has the patient with PatientPKHash '962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73' had?\", 'type': 'human'}], 'question': \"How many visits has the patient with PatientPKHash '962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73' had?\", 'query': \"SELECT COUNT(*) FROM clinical_visits WHERE PatientPKHash = '962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73';\", 'result': '', 'answer': '', 'pk_hash': '962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73'}}, 'id': 'call_cOYoWvk66qbVV8cLl3SYbRGe', 'type': 'tool_call'}], usage_metadata={'input_tokens': 3445, 'output_tokens': 219, 'total_tokens': 3664, 'input_token_details': {'audio': 0, 'cache_read': 3328}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}}\n",
+      "{'tools': {'messages': [ToolMessage(content='{\\'messages\\': [HumanMessage(content=\"How many visits has the patient with PatientPKHash \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\' had?\", additional_kwargs={}, response_metadata={})], \\'question\\': \"How many visits has the patient with PatientPKHash \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\' had?\", \\'query\\': \"SELECT COUNT(*) AS visit_count FROM clinical_visits WHERE PatientPKHash = \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\';\", \\'result\\': \\'[(5,)]\\', \\'answer\\': \"The patient with PatientPKHash \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\' has had 5 visits.\", \\'pk_hash\\': \\'962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\\'}', name='sql_chain', id='3f79fa70-b4b4-488a-8764-78ac01c894cc', tool_call_id='call_cOYoWvk66qbVV8cLl3SYbRGe')]}}\n",
+      "{'assistant': {'messages': [AIMessage(content='The patient with PatientPKHash `962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73` has had 5 visits.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 50, 'prompt_tokens': 3950, 'total_tokens': 4000, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 3584}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_07871e2ad8', 'id': 'chatcmpl-Ble8nFeHj0Q5BMtPtYaT66g95yjb3', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--a837133b-1d53-4c6f-a8f6-d3d16ab61fda-0', usage_metadata={'input_tokens': 3950, 'output_tokens': 50, 'total_tokens': 4000, 'input_token_details': {'audio': 0, 'cache_read': 3584}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}}\n"
      ]
     }
    ],
    "source": [
     "# Specify a thread\n",
+    "config = {\"configurable\": {\"thread_id\": \"6\"}}\n",
     "\n",
+    "user_prompt = \"how many visits has this patient had?\"\n",
     "input_state = {\n",
     "    \"messages\": [HumanMessage(content=user_prompt)],\n",
     "    \"question\": user_prompt,\n",
     "    \"query\": \"\",\n",
     "    \"result\": \"\",\n",
     "    \"answer\": \"\",\n",
+    "    \"pk_hash\": \"962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73\"\n",
     "}\n",
     "# messages = react_graph.invoke(input_state, config)\n",
     "\n",