Spaces:

Nishauri
/

ClinicianAssistant

Sleeping

App Files Files Community

JDFPalladium commited on Jul 21, 2025

Commit

24e3e87

1 Parent(s): faaa805

resolving conflicts

Browse files

Files changed (4) hide show

app.py +12 -11
chatlib/assistant_node.py +73 -5
chatlib/idsr_check.py +103 -24
chatlib/state_types.py +5 -0

app.py CHANGED Viewed

@@ -45,7 +45,9 @@ def idsr_check_tool(query):
     """Check if the patient case description matches any known diseases."""
     result = idsr_check(query, llm=llm)
-    return {"answer": result.get("answer", ""), "last_tool": "idsr_check"}
 tools = [rag_retrieve_tool, sql_chain_tool, idsr_check_tool]
@@ -58,7 +60,7 @@ You are a helpful assistant supporting clinicians during patient visits. You hav
 - rag_retrieve: to access HIV clinical guidelines
 - sql_chain: to access HIV data about the patient with whom the clinician is meeting. When using this tool, always run rag_retrieve first to get context
-- idsr_check: to check if the patient case description matches any known diseases
 When a tool is needed, respond only with a JSON object specifying the tool to call and its minimal arguments, for example:
 {
@@ -100,16 +102,9 @@ def chat_with_patient(question: str, thread_id: str = None):  # type: ignore
     question = detect_and_redact_phi(question)["redacted_text"]
     input_state: AppState = {
-        "messages": [HumanMessage(content=question)],
-        "question": "",
-        "rag_result": "",
-        "answer": "",
-        "last_answer": "",
-        "last_user_message": "",
-        "last_tool": None,
-        "idsr_disclaimer": False,
-        "summary": None,
     }
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
@@ -125,6 +120,12 @@ def chat_with_patient(question: str, thread_id: str = None):  # type: ignore
 with gr.Blocks() as app:
     question_input = gr.Textbox(label="Question")
     thread_id_state = gr.State()
     output_chat = gr.Textbox(label="Assistant Response")

     """Check if the patient case description matches any known diseases."""
     result = idsr_check(query, llm=llm)
+    return {"answer": result.get("answer", ""),
+             "last_tool": "idsr_check",
+             "context": result.get("context", None)}
 tools = [rag_retrieve_tool, sql_chain_tool, idsr_check_tool]
 - rag_retrieve: to access HIV clinical guidelines
 - sql_chain: to access HIV data about the patient with whom the clinician is meeting. When using this tool, always run rag_retrieve first to get context
+- idsr_check: to check if the patient case description matches any known diseases.
 When a tool is needed, respond only with a JSON object specifying the tool to call and its minimal arguments, for example:
 {
     question = detect_and_redact_phi(question)["redacted_text"]
+    # First turn: initialize state
     input_state: AppState = {
+        "messages": [HumanMessage(content=question)]
     }
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
 with gr.Blocks() as app:
+    gr.Markdown(
+        """
+        # Clinician Assistant
+        Welcome! Enter your clinical question below. The assistant can access HIV guidelines, patient data, and disease surveillance tools.
+        """
+    )
     question_input = gr.Textbox(label="Question")
     thread_id_state = gr.State()
     output_chat = gr.Textbox(label="Assistant Response")

chatlib/assistant_node.py CHANGED Viewed

@@ -33,6 +33,22 @@ def summarize_conversation(messages, llm):
 def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
     messages = state.get("messages", [])
     base_messages = [sys_msg]
     messages = base_messages + [m for m in messages if not isinstance(m, SystemMessage)]
@@ -48,18 +64,66 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
         state["answer"] = ""
         state["rag_result"] = ""
-    # Update state from any ToolMessages appended by previous tool calls
-    # Only consider the most recent ToolMessage for updating state
     for msg in reversed(messages):
         if isinstance(msg, ToolMessage):
             try:
                 content = msg.content
                 data = json.loads(content) if isinstance(content, str) else content
-                state.update(data)
-                break  # only process the most recent ToolMessage
             except json.JSONDecodeError:
                 break
     # Invoke LLM with tools (this returns AIMessage with tool_calls if tool call is needed)
     new_message = llm_with_tools.invoke(messages)
     messages.append(new_message)
@@ -99,6 +163,10 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
         final_content = disclaimer + final_content
         state["idsr_disclaimer_shown"] = True
     # Replace the last AIMessage content with final_content to avoid duplicates
     for i in reversed(range(len(messages))):
         if isinstance(messages[i], AIMessage):
@@ -114,7 +182,7 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
         m for m in non_sys_messages if isinstance(m, (HumanMessage, AIMessage))
     ]
-    if len(human_ai_messages) > 15:
         summary_text = summarize_conversation(messages, llm)
         summary_msg = SystemMessage(
             content="Summary of earlier conversation:\n" + summary_text

 def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
+    # Initialize missing keys with defaults
+    state.setdefault("question", "")
+    state.setdefault("rag_result", "")
+    state.setdefault("answer", "")
+    state.setdefault("last_answer", None)
+    state.setdefault("last_user_message", None)
+    state.setdefault("last_tool", None)
+    state.setdefault("idsr_disclaimer_shown", False)
+    state.setdefault("summary", None)
+    state.setdefault("context", None)
+    state.setdefault("context_versions", {})
+    state.setdefault("last_context_injected_versions", {})
+    state.setdefault("context_version_ready_for_injection", 0)
+    state.setdefault("context_first_response_sent", True)
     messages = state.get("messages", [])
     base_messages = [sys_msg]
     messages = base_messages + [m for m in messages if not isinstance(m, SystemMessage)]
         state["answer"] = ""
         state["rag_result"] = ""
+    # Process latest ToolMessage and update context_version
     for msg in reversed(messages):
         if isinstance(msg, ToolMessage):
             try:
                 content = msg.content
                 data = json.loads(content) if isinstance(content, str) else content
+                tool_name = data.get("last_tool")
+                new_context = data.get("context")
+                if tool_name:
+                    old_context = state.get("context", "")
+                    old_version = state["context_versions"].get(tool_name, 0)
+                    if new_context is not None and new_context != old_context:
+                        state["context"] = new_context
+                        state["context_versions"][tool_name] = old_version + 1
+                        state["context_first_response_sent"] = False  # Reset flag on new context
+                    state["last_tool"] = tool_name
+                for k, v in data.items():
+                    if k not in ("context", "last_tool"):
+                        state[k] = v
+                break
             except json.JSONDecodeError:
                 break
+    tool_name = "idsr_check"
+    current_version = state["context_versions"].get(tool_name, 0)
+    last_injected_version = state["last_context_injected_versions"].get(tool_name, 0)
+    # On turns where user message is unchanged, advance ready_for_injection to current_version
+    if not user_message_changed and state["context_version_ready_for_injection"] < current_version:
+        state["context_version_ready_for_injection"] = current_version
+    # Inject context system message only if:
+    # - last_tool matches tool_name
+    # - context exists
+    # - ready_for_injection > last injected version
+    # - AND first AI response after new context has been sent
+    if (
+        state.get("last_tool") == tool_name
+        and state.get("context")
+        and state["context_version_ready_for_injection"] > last_injected_version
+        and state.get("context_first_response_sent", True)
+    ):
+        context_msg = SystemMessage(
+            content=(
+                f"The following information was retrieved from the {tool_name.upper()} database and may help answer the user's question:\n\n"
+                f"{state['context']}\n\n"
+                "Use this information when responding."
+            )
+        )
+        messages.append(context_msg)
+        state["last_context_injected_versions"][tool_name] = state["context_version_ready_for_injection"]
+        state["last_tool"] = None
     # Invoke LLM with tools (this returns AIMessage with tool_calls if tool call is needed)
     new_message = llm_with_tools.invoke(messages)
     messages.append(new_message)
         final_content = disclaimer + final_content
         state["idsr_disclaimer_shown"] = True
+    # After generating AI message, mark first response sent
+    if state.get("last_tool") == tool_name or state.get("context_first_response_sent") is False:
+        state["context_first_response_sent"] = True
     # Replace the last AIMessage content with final_content to avoid duplicates
     for i in reversed(range(len(messages))):
         if isinstance(messages[i], AIMessage):
         m for m in non_sys_messages if isinstance(m, (HumanMessage, AIMessage))
     ]
+    if len(human_ai_messages) > 10:
         summary_text = summarize_conversation(messages, llm)
         summary_msg = SystemMessage(
             content="Summary of earlier conversation:\n" + summary_text

chatlib/idsr_check.py CHANGED Viewed

@@ -9,6 +9,8 @@ from langchain_core.output_parsers import PydanticOutputParser
 import json
 import math
 from collections import Counter
 with open("./guidance_docs/idsr_keywords.txt", "r", encoding="utf-8") as f:
@@ -39,6 +41,15 @@ keyword_weights = {
     kw: math.log(total_docs / (1 + count)) for kw, count in keyword_doc_counts.items()
 }
 def score_doc(doc_to_score, matched_keywords):
     doc_keywords = set(doc_to_score.metadata.get("matched_keywords", []))
@@ -110,9 +121,9 @@ def hybrid_search_with_query_keywords(
     ranked_docs = sorted(scored_docs, key=lambda x: -x[1])
     top_docs = [doc for doc, score in ranked_docs if score > 0]
-    top_3_docs = top_docs[:3]
-    merged = {doc.page_content: doc for doc in semantic_hits + top_3_docs}
     return list(merged.values())
@@ -130,47 +141,96 @@ def idsr_check(query: str, llm) -> AppState:
     results = hybrid_search_with_query_keywords(
         query, vectorstore, tagged_documents, keywords, llm
     )
     disease_definitions = "\n\n".join(
         [
-            f"{doc.metadata.get('disease_name', 'Unknown Disease')}:\n{doc.page_content}"
             for doc in results
         ]
     )
-    prompt = """
-    You are a medical assistant reviewing a brief clinical case in Kenya to help identify which diseases the patient may plausibly have. You have access to several disease definitions.
-    Your task is as follows:
-    1. Carefully compare the case description to each disease definition.
-    2. If a disease seems like a possible match based on the available information, list it and explain why.
-    3. Only include rare diseases (e.g., eradicated or non-endemic to Kenya) if the match is extremely strong. Prioritize common and plausible conditions.
-    4. If no disease clearly matches, say: "No strong match found."
-    5. Ask clarifying questions if helpful to make better match suggestions.
-    6. After asking clarifying questions, proceed with an assessment anyway based on what is already available.
-    Case:
     {query}
-    Diseases:
     {disease_definitions}
-    Your response should be brief and include as appropriate:
     Possible matches:
-    - Disease Name: [Likely] - Reason
-    - Disease Name: [Probable] - Reason
-    (Only include diseases that clearly fit based on the information. If none, say "No strong match found.")
-    Clarifying questions (optional, only if needed):
     - Question 1
     - Question 2
-    At the end, always give a brief recommendation like:
-    - Recommendation: "Suggest monitoring for the listed conditions." OR "No disease meets criteria based on current data — suggest gathering additional history on [x, y, z]."
     """.format(
-        query=query, disease_definitions=disease_definitions
     )
     llm_response = llm.invoke(prompt)
@@ -180,4 +240,23 @@ def idsr_check(query: str, llm) -> AppState:
         else "No relevant disease information found."
     )
-    return {"answer": answer_text, "last_tool": "idsr_check"}  # type: ignore

 import json
 import math
 from collections import Counter
+import sqlite3
+import os
 with open("./guidance_docs/idsr_keywords.txt", "r", encoding="utf-8") as f:
     kw: math.log(total_docs / (1 + count)) for kw, count in keyword_doc_counts.items()
 }
+## prepare to get location data
+# first, get sitecode from environment variable
+sitecode = os.environ.get("SITECODE")
+# next, connect to location database and get county where code = sitecode
+conn = sqlite3.connect('data/location_data.sqlite')
+cursor = conn.cursor()
+cursor.execute("SELECT County FROM sitecode_county_xwalk WHERE Code = ?", (sitecode,))
+county = cursor.fetchone()
+conn.close()
 def score_doc(doc_to_score, matched_keywords):
     doc_keywords = set(doc_to_score.metadata.get("matched_keywords", []))
     ranked_docs = sorted(scored_docs, key=lambda x: -x[1])
     top_docs = [doc for doc, score in ranked_docs if score > 0]
+    top_5_docs = top_docs[:5]
+    merged = {doc.page_content: doc for doc in semantic_hits + top_5_docs}
     return list(merged.values())
     results = hybrid_search_with_query_keywords(
         query, vectorstore, tagged_documents, keywords, llm
     )
+    # set up connection to location database and get EpidemicInfo for any diseases in the disease_name metadata field of the results from the hybrid search
+    conn = sqlite3.connect('data/location_data.sqlite')
+    cursor = conn.cursor()
+    disease_names = [doc.metadata.get("disease_name") for doc in results]
+    placeholders = ",".join("?" * len(disease_names))
+    query_str = f"SELECT Disease, EpidemicInfo FROM who_bulletin WHERE Disease IN ({placeholders})"
+    cursor.execute(query_str, disease_names)
+    epidemic_info = cursor.fetchall()
+    conn.close()
+    # print(doc.metadata.get("disease_name") for doc in results)
+    # set up connection to location database and get results where County = county and Disease is in
+    # the disease_name metadata field of the results from the hybrid search
+    conn = sqlite3.connect('data/location_data.sqlite')
+    cursor = conn.cursor()
+    if county:  # Ensure county is not None
+        county_name = county[0]
+        disease_names = [doc.metadata.get("disease_name") for doc in results]
+        placeholders = ",".join("?" * len(disease_names))
+        query_str = f"SELECT County, Disease, Prevalence, Notes FROM county_disease_info WHERE County = ? AND Disease IN ({placeholders})"
+        cursor.execute(query_str, (county_name, *disease_names))
+        county_info = cursor.fetchall()
+        # Get climate information for the county from the rainy seasons table
+        # Get the current month
+        from datetime import datetime
+        current_month = datetime.now().strftime("%B")  # Full month name, e.g. "March"
+        cursor.execute("SELECT RainySeason FROM county_rainy_seasons WHERE County = ? and Month = ?", (county_name, current_month))
+        rainy_season = cursor.fetchone()
+        rainy_season = rainy_season[0] if rainy_season else "Unknown"
+        # close the connection
+        conn.close()
     disease_definitions = "\n\n".join(
         [
+            f"### Disease: {doc.metadata.get('disease_name', 'Unknown Disease')}:\n{doc.page_content}"
             for doc in results
         ]
     )
+    prompt = """
+    You are a medical assistant reviewing a brief clinical case in Kenya to help identify which diseases the patient may plausibly have.
+    You have access to several disease definitions. You also have access to information about the prevalence of each disease in the county
+    where the patient is located. The prevalence of some diseases varies by season, and some diseases are also more likely when there is a
+    declared epidemic. Information on the timing of the rainy season and any declared epidemics is also provided.
+    ## Instructions:
+    1. Carefully compare the case description to each disease definition, taking into account the prevalence and seasonality information.
+    2. If a disease seems like a possible match based on the available information, list it and explain why.
+    3. Only include rare diseases, or diseases that don't fit seasonally, if the match is extremely strong. Prioritize common and plausible conditions.
+    4. You don't need to suggest matches if none of the diseases seem relevant.
+    5. Ask clarifying questions if helpful to make better match suggestions. Possible questions might include asking about specific symptoms, demographic characteristics, exposures, or travel history.
+    6. At the end, give a brief recommendation on next steps, such as monitoring for certain conditions or gathering additional history.
+    ## Case:
     {query}
+    ## Diseases:
     {disease_definitions}
+    ## Locational context:
+    In {county_name}, the current rainy season status is {rainy_season}.
+    The above diseases have the following prevalence (county, disease name, prevalence, seasonality):
+    {county_info}
+    Here are any relevant epidemic alerts for these diseases:
+    {epidemic_info}
+    ## Expected Output
     Possible matches:
+    - Disease Name: Reason
+    - Disease Name: Reason
+    Clarifying questions:
     - Question 1
     - Question 2
+    Recommendation:
     """.format(
+        query=query, disease_definitions=disease_definitions, county_name=county_name if county else "Unknown County",
+        rainy_season=rainy_season if county else "Unknown",
+        county_info="\n".join([f"- {row[0]}, {row[1]}, Prevalence: {row[2]}, Seasonality: {row[3]}" for row in county_info]) if county else "No county information available.",
+        epidemic_info="\n".join([f"- {row[0]}: {row[1]}" for row in epidemic_info]) if epidemic_info else "No epidemic information available."
     )
     llm_response = llm.invoke(prompt)
         else "No relevant disease information found."
     )
+    # Set up context to return.
+    # First, use an LLM to identify which diseases from disease_definitions were mentioned in the answer_text
+    disease_names_in_answer = [doc.metadata.get("disease_name") for doc in results if doc.metadata.get("disease_name") in answer_text]
+    # Next, filter the results to only include those diseases
+    filtered_results = [doc for doc in results if doc.metadata.get("disease_name") in disease_names_in_answer]
+    # Finally, create context string with only those diseases, plus any county_info and epidemic_info
+    context_parts = []
+    if filtered_results:
+        context_parts.append("### Disease Definitions:\n" + "\n\n".join(
+            [
+                f"### Disease: {doc.metadata.get('disease_name', 'Unknown Disease')}:\n{doc.page_content}"
+                for doc in filtered_results
+            ]
+        ))
+    if county and county_info:
+        context_parts.append("### County Disease Information:\n" + "\n".join([f"- {row[0]}, {row[1]}, Prevalence: {row[2]}, Seasonality: {row[3]}" for row in county_info]))
+    if epidemic_info:
+        context_parts.append("### Epidemic Information:\n" + "\n".join([f"- {row[0]}: {row[1]}" for row in epidemic_info]))
+    return {"answer": answer_text, "last_tool": "idsr_check", "context": context_parts}  # type: ignore

chatlib/state_types.py CHANGED Viewed

@@ -15,3 +15,8 @@ class AppState(TypedDict):
     last_tool: Optional[str] = None
     idsr_disclaimer_shown: bool = False
     summary: Optional[str] = None

     last_tool: Optional[str] = None
     idsr_disclaimer_shown: bool = False
     summary: Optional[str] = None
+    context: Optional[str] = None
+    context_versions: dict[str, int] = {}
+    last_context_injected_versions: dict[str, int] = {}
+    context_version_ready_for_injection: int = 0
+    context_first_response_sent: bool = True