Spaces:

Nishauri
/

ClinicianAssistant

Sleeping

Emmanuel Chinonye Nnajiofor commited on Jul 14, 2025

Commit

4c42be0

1 Parent(s): 8fbb0e2

Migrate project from pip to UV package manager

- Remove requirements.txt and old pip‑based install commands
- Add pyproject.toml and uv.lock for dependency declaration & locking
- Replace `make install` with `make install` → `uv sync`
- Update Makefile targets (lint, test, format, run) to use `uv` commands
- added logger.py file to enable logs across project
- Adjusted README.md for the new logger.py file added.
- Cleaned out comments
- Other minor changes

This change improves reproducibility by using UV’s lockfile and
isolated virtual environments, and centralizes all commands under
`uv run` so tools always execute in the correct env.

Files changed (17) hide show

.python-version +1 -0
Makefile +32 -8
README.md +1 -0
app.py +6 -11
chat.py +12 -17
chatlib/assistant_node.py +13 -16
chatlib/guidlines_rag_agent_li.py +2 -18
chatlib/idsr_check.py +8 -34
chatlib/logger.py +65 -0
chatlib/patient_all_data.py +0 -8
chatlib/patient_sql_agent.py +7 -21
chatlib/phi_filter.py +1 -4
chatlib/state_types.py +4 -24
main.py +5 -0
pyproject.toml +26 -0
requirements.txt +0 -15
uv.lock +0 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

Makefile CHANGED Viewed

@@ -1,15 +1,39 @@
-install:
-	pip install --upgrade pip &&\
-		pip install -r requirements.txt
-lint:
-	pylint --disable=R,C app.py chatlib
 test:
-	PYTHONPATH=. pytest -vv
 format:
-	black app.py chatlib
 run:
-	python app.py

+SHELL := /bin/bash
+VENV := .venv
+ifeq ($(OS),Windows_NT)
+	VENV_BIN := $(VENV)/Scripts
+	PYTHON := $(VENV_BIN)/python.exe
+	RM := del /s /q
+else
+	VENV_BIN := $(VENV)/bin
+	PYTHON := $(VENV_BIN)/python
+	RM := rm -rf
+endif
+.PHONY: venv install install-dev lint test format run clean
+venv:
+	uv venv $(VENV)
+install: venv
+	$(VENV_BIN)/uv sync
+lint:
+	$(VENV_BIN)/pylint --disable=R,C app.py chatlib
 test:
+	PYTHONPATH=. $(VENV_BIN)/pytest -vv
 format:
+	$(VENV_BIN)/black app.py chatlib
 run:
+	$(PYTHON) app.py
+clean:
+	$(RM) $(VENV)
+	$(RM) .pytest_cache
+	$(RM) __pycache__
+	$(RM) .mypy_cache

README.md CHANGED Viewed

@@ -34,6 +34,7 @@ A conversational assistant designed to help clinicians in Kenya access patient d
 │   ├── assistant_node.py
 │   ├── guidlines_rag_agent_li.py
 │   ├── idsr_check.py
 │   ├── patient_all_data.py
 │   ├── patient_sql_agent.py
 │   ├── phi_filter.py

 │   ├── assistant_node.py
 │   ├── guidlines_rag_agent_li.py
 │   ├── idsr_check.py
+│   ├── logger.py
 │   ├── patient_all_data.py
 │   ├── patient_sql_agent.py
 │   ├── phi_filter.py

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.prebuilt import tools_condition, ToolNode
 from langgraph.checkpoint.memory import MemorySaver
-# Initialize your graph and checkpointer once - eventually make this persistent
 memory = MemorySaver()
 if os.path.exists("config.env"):
@@ -29,9 +29,7 @@ def rag_retrieve_tool(query):
     """Retrieve relevant HIV clinical guidelines for the given query."""
     result = rag_retrieve(query, llm=llm)
     return {
-        "rag_result": result.get(
-            "rag_result", ""
-        ),  # adjust based on your rag_retrieve output
         "last_tool": "rag_retrieve",
     }
@@ -89,7 +87,7 @@ builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
-def chat_with_patient(question: str, thread_id: str = None):
     # Generate or reuse thread_id for session persistence
     if thread_id is None or thread_id == "":
         thread_id = str(uuid.uuid4())
@@ -97,8 +95,7 @@ def chat_with_patient(question: str, thread_id: str = None):
     # Check input for PHI and redact if necessary
     question = detect_and_redact_phi(question)["redacted_text"]
     print(question)
-    # Prepare input state with new user message and pk_hash
-    # initialize state with patient pk hash
     input_state: AppState = {
         "messages": [HumanMessage(content=question)],
         "question": "",
@@ -111,13 +108,11 @@ def chat_with_patient(question: str, thread_id: str = None):
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
-    # Invoke the graph with persistent state
-    output_state = react_graph.invoke(input_state, config)
     for m in output_state["messages"]:
         m.pretty_print()
-    # Extract the last AImessage
     assistant_message = output_state["messages"][-1].content
     return assistant_message, thread_id
@@ -125,7 +120,7 @@ def chat_with_patient(question: str, thread_id: str = None):
 with gr.Blocks() as app:
     question_input = gr.Textbox(label="Question")
-    thread_id_state = gr.State()  # to store thread_id between calls
     output_chat = gr.Textbox(label="Assistant Response")
     submit_btn = gr.Button("Ask")

 from langgraph.prebuilt import tools_condition, ToolNode
 from langgraph.checkpoint.memory import MemorySaver
 memory = MemorySaver()
 if os.path.exists("config.env"):
     """Retrieve relevant HIV clinical guidelines for the given query."""
     result = rag_retrieve(query, llm=llm)
     return {
+        "rag_result": result.get("rag_result", ""),
         "last_tool": "rag_retrieve",
     }
 react_graph = builder.compile(checkpointer=memory)
+def chat_with_patient(question: str, thread_id: str = None):  # type: ignore
     # Generate or reuse thread_id for session persistence
     if thread_id is None or thread_id == "":
         thread_id = str(uuid.uuid4())
     # Check input for PHI and redact if necessary
     question = detect_and_redact_phi(question)["redacted_text"]
     print(question)
     input_state: AppState = {
         "messages": [HumanMessage(content=question)],
         "question": "",
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
+    output_state = react_graph.invoke(input_state, config)  # type: ignore
     for m in output_state["messages"]:
         m.pretty_print()
     assistant_message = output_state["messages"][-1].content
     return assistant_message, thread_id
 with gr.Blocks() as app:
     question_input = gr.Textbox(label="Question")
+    thread_id_state = gr.State()
     output_chat = gr.Textbox(label="Assistant Response")
     submit_btn = gr.Button("Ask")

chat.py CHANGED Viewed

@@ -16,15 +16,13 @@ from chatlib.state_types import AppState
 from chatlib.guidlines_rag_agent_li import rag_retrieve
 from chatlib.patient_all_data import sql_chain
 from chatlib.idsr_check import idsr_check
-# from langchain_ollama.chat_models import ChatOllama
-# llm = ChatOllama(model="mistral:latest", temperature=0)
 tools = [rag_retrieve, sql_chain, idsr_check]
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 llm_with_tools = llm.bind_tools([rag_retrieve, sql_chain, idsr_check])
-# System message
 sys_msg = SystemMessage(content="""
 You are a helpful assistant supporting clinicians during patient visits. You have three tools:
@@ -57,7 +55,7 @@ Do not include any text outside the JSON response.
 """)
-# Assistant Node
 def assistant(state: AppState) -> AppState:
     pk_hash = state.get("pk_hash", None)
@@ -68,9 +66,8 @@ def assistant(state: AppState) -> AppState:
     else:
         messages = [sys_msg] + state["messages"]
-    # Get the LLM/tool response
     new_message = llm_with_tools.invoke(messages)
-    # Extract the question from the latest HumanMessage, if present
     latest_question = ""
     for msg in reversed(messages):
@@ -78,39 +75,37 @@ def assistant(state: AppState) -> AppState:
             latest_question = msg.content
             break
-    state['messages'] = state['messages'] + [new_message]
-    state['question'] = latest_question
     return state
-    # return {**state, "messages": state['messages'] + [new_message], "question": latest_question}
 # Graph
 builder = StateGraph(AppState)
-# Define nodes: these do the work
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
-# Define edges: these determine how the control flow moves
 builder.add_edge(START, "assistant")
 builder.add_conditional_edges("assistant", tools_condition)
 builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
-# Specify a thread
 config = {"configurable": {"thread_id": "30"}}
-# initialize state with patient pk hash
 input_state:AppState = {
     "messages": [HumanMessage(content="summarize the patient's clinical visits")],
     "question": "",
     "rag_result": "",
     "answer": "",
-    "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73"
 }
-# messages = [HumanMessage(content="how many appointments has this patient had?")]
-message_output = react_graph.invoke(input_state, config)
 for m in message_output['messages']:
     m.pretty_print()

 from chatlib.guidlines_rag_agent_li import rag_retrieve
 from chatlib.patient_all_data import sql_chain
 from chatlib.idsr_check import idsr_check
+from chatlib.logger import get_logger
 tools = [rag_retrieve, sql_chain, idsr_check]
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 llm_with_tools = llm.bind_tools([rag_retrieve, sql_chain, idsr_check])
 sys_msg = SystemMessage(content="""
 You are a helpful assistant supporting clinicians during patient visits. You have three tools:
 """)
 def assistant(state: AppState) -> AppState:
     pk_hash = state.get("pk_hash", None)
     else:
         messages = [sys_msg] + state["messages"]
     new_message = llm_with_tools.invoke(messages)
     latest_question = ""
     for msg in reversed(messages):
             latest_question = msg.content
             break
+    state['messages'] = state['messages'] + [new_message]  # type: ignore
+    state['question'] = latest_question # type: ignore
     return state
 # Graph
 builder = StateGraph(AppState)
 builder.add_node("assistant", assistant)
 builder.add_node("tools", ToolNode(tools))
 builder.add_edge(START, "assistant")
 builder.add_conditional_edges("assistant", tools_condition)
 builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
 config = {"configurable": {"thread_id": "30"}}
 input_state:AppState = {
     "messages": [HumanMessage(content="summarize the patient's clinical visits")],
     "question": "",
     "rag_result": "",
     "answer": "",
+    "pk_hash": "962885FEADB7CCF19A2CC506D39818EC448D5396C4D1AEFDC59873090C7FBF73" # type: ignore
 }
+message_output = react_graph.invoke(input_state, config) # type: ignore
 for m in message_output['messages']:
     m.pretty_print()

chatlib/assistant_node.py CHANGED Viewed

@@ -4,7 +4,6 @@ from langchain_core.messages import ToolMessage
 import json
-# Assistant Node
 def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
     if state.get("messages") and isinstance(state["messages"][-1], ToolMessage):
@@ -14,11 +13,10 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
             try:
                 tool_content_dict = json.loads(tool_content)
                 state.update(tool_content_dict)
-                # print("Merged tool content into state:", tool_content_dict)
             except json.JSONDecodeError:
                 print("Failed to parse tool content as JSON")
         elif isinstance(tool_content, dict):
-            state.update(tool_content)
     pk_hash = state.get("pk_hash", None)
@@ -30,20 +28,18 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
     else:
         messages = [sys_msg] + state.get("messages", [])
-    # Extract latest human question
     latest_question = ""
     for msg in reversed(messages):
         if isinstance(msg, HumanMessage):
             latest_question = msg.content
             break
-    # Generate AIMessage only if answer is new
     if "answer" in state and state["answer"]:
         if state.get("last_answer") != state["answer"]:
             last_tool = state.get("last_tool")
             if last_tool == "idsr_check":
                 disclaimer_needed = not state.get("idsr_disclaimer_shown", False)
                 print(disclaimer_needed)
                 format_instructions = """
@@ -72,20 +68,22 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
                         "Disclaimer: This is not a diagnosis. This is meant to help\n"
                         "identify possible matches based on priority IDSR diseases for clinician awareness.\n"
                     )
-                    state["idsr_disclaimer_shown"] = True
                 else:
                     disclaimer_text = ""
-                prompt = format_instructions.format(disclaimer=disclaimer_text) + f"\n\nResponse:\n{state['answer']}"
                 print("Prompt sent to LLM:\n", prompt)
-                # Call LLM to reformat the answer
                 llm_response = llm.invoke(prompt)
                 formatted_answer = llm_response.content.strip()
                 ai_message = AIMessage(content=formatted_answer)
-                # Set the flag so disclaimer is not shown again
-                state["idsr_disclaimer_shown"] = True
             else:
                 # For other tools, use the raw answer as is
@@ -93,13 +91,12 @@ def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
             messages = messages + [ai_message]
             state["messages"] = messages
-            state["question"] = latest_question
-            state["last_answer"] = state["answer"]  # track processed answer
             return state
-    # Otherwise, normal LLM with tools invocation
     new_message = llm_with_tools.invoke(messages)
     messages = messages + [new_message]
     state["messages"] = messages
-    state["question"] = latest_question
     return state

 import json
 def assistant(state: AppState, sys_msg, llm, llm_with_tools) -> AppState:
     if state.get("messages") and isinstance(state["messages"][-1], ToolMessage):
             try:
                 tool_content_dict = json.loads(tool_content)
                 state.update(tool_content_dict)
             except json.JSONDecodeError:
                 print("Failed to parse tool content as JSON")
         elif isinstance(tool_content, dict):
+            state.update(tool_content)  # type: ignore
     pk_hash = state.get("pk_hash", None)
     else:
         messages = [sys_msg] + state.get("messages", [])
     latest_question = ""
     for msg in reversed(messages):
         if isinstance(msg, HumanMessage):
             latest_question = msg.content
             break
     if "answer" in state and state["answer"]:
         if state.get("last_answer") != state["answer"]:
             last_tool = state.get("last_tool")
             if last_tool == "idsr_check":
                 disclaimer_needed = not state.get("idsr_disclaimer_shown", False)
                 print(disclaimer_needed)
                 format_instructions = """
                         "Disclaimer: This is not a diagnosis. This is meant to help\n"
                         "identify possible matches based on priority IDSR diseases for clinician awareness.\n"
                     )
+                    state["idsr_disclaimer_shown"] = True  # type: ignore
                 else:
                     disclaimer_text = ""
+                prompt = (
+                    format_instructions.format(disclaimer=disclaimer_text)
+                    + f"\n\nResponse:\n{state['answer']}"
+                )
                 print("Prompt sent to LLM:\n", prompt)
                 llm_response = llm.invoke(prompt)
                 formatted_answer = llm_response.content.strip()
                 ai_message = AIMessage(content=formatted_answer)
+                state["idsr_disclaimer_shown"] = True  # type: ignore
             else:
                 # For other tools, use the raw answer as is
             messages = messages + [ai_message]
             state["messages"] = messages
+            state["question"] = latest_question  # type: ignore
+            state["last_answer"] = state["answer"]
             return state
     new_message = llm_with_tools.invoke(messages)
     messages = messages + [new_message]
     state["messages"] = messages
+    state["question"] = latest_question  # type: ignore
     return state

chatlib/guidlines_rag_agent_li.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from llama_index.core import StorageContext, load_index_from_storage
 from .state_types import AppState
-# Load index for retrieval
 storage_context = StorageContext.from_defaults(persist_dir="guidance_docs/arv_metadata")
 index = load_index_from_storage(storage_context)
 retriever = index.as_retriever(
     similarity_top_k=3,
-    # Similarity threshold for filtering
     similarity_threshold=0.5,
 )
@@ -26,21 +25,6 @@ def rag_retrieve(query: str, llm) -> AppState:
         f"Guideline Text:\n{retrieved_text}"
     )
-    # Call your LLM to generate the summary
     summary_response = llm.invoke(summarization_prompt)
-    return {"rag_result": summary_response.content, "last_tool": "rag_retrieve"}
-# if __name__ == "__main__":
-#     # Test the function
-#     test_state = AppState(
-#         messages=[],
-#         question="What are the first-line treatments for HIV in Kenya?",
-#         rag_result="",
-#         query="",
-#         result="",
-#         answer=""
-#     )
-#     updated_state = rag_retrieve(test_state)
-#     print(updated_state["rag_result"])

 from llama_index.core import StorageContext, load_index_from_storage
 from .state_types import AppState
 storage_context = StorageContext.from_defaults(persist_dir="guidance_docs/arv_metadata")
 index = load_index_from_storage(storage_context)
 retriever = index.as_retriever(
     similarity_top_k=3,
     similarity_threshold=0.5,
 )
         f"Guideline Text:\n{retrieved_text}"
     )
     summary_response = llm.invoke(summarization_prompt)
+    return {"rag_result": summary_response.content, "last_tool": "rag_retrieve"}  # type: ignore

chatlib/idsr_check.py CHANGED Viewed

@@ -10,26 +10,23 @@ import json
 import math
 from collections import Counter
-## Keywords
-# load keywords from file
 with open("./guidance_docs/idsr_keywords.txt", "r", encoding="utf-8") as f:
     keywords = [line.strip() for line in f if line.strip()]
-# load vectorstore
 vectorstore = FAISS.load_local(
     "./guidance_docs/disease_vectorstore",
     OpenAIEmbeddings(),
     allow_dangerous_deserialization=True,
 )
-# load tagged documents from JSON for keyword matching to document metadata
 with open("./guidance_docs/tagged_documents.json", "r", encoding="utf-8") as f:
     doc_dicts = json.load(f)
 tagged_documents = [Document(**d) for d in doc_dicts]
-# Set up metrics for keywords
-# Count how many documents each keyword appears in
 keyword_doc_counts = Counter()
 total_docs = len(tagged_documents)
@@ -38,10 +35,9 @@ for doc in tagged_documents:
     for kw in seen:
         keyword_doc_counts[kw] += 1
-# Use log-scaled inverse frequency to avoid extreme values
 keyword_weights = {
-    kw: math.log(total_docs / (1 + count))  # add 1 to avoid div-by-zero
-    for kw, count in keyword_doc_counts.items()
 }
@@ -51,7 +47,6 @@ def score_doc(doc_to_score, matched_keywords):
     return sum(keyword_weights.get(kw, 0) for kw in overlap)
-## Define helper functions
 class KeywordsOutput(BaseModel):
     keywords: List[str] = Field(
         description="List of relevant keywords extracted from the query"
@@ -75,7 +70,6 @@ Return the matching keywords as a JSON object with a single key "keywords" whose
 """
     )
-    # Compose the chain as a RunnableSequence: prompt -> llm -> parser
     chain = prompt | llm | parser
     output = chain.invoke(
@@ -86,24 +80,17 @@ Return the matching keywords as a JSON object with a single key "keywords" whose
         }
     )
-    # output is a list of strings, not a KeywordsOutput instance
     return output.keywords
-# function to perform hybrid search combining semantic search and keyword matching
 def hybrid_search_with_query_keywords(
     query, vstore, documents, keyword_list, llm, top_k=5
 ):
-    # Step 1: Semantic search
     semantic_hits = vstore.similarity_search(query, k=top_k)
-    # Step 2: Use GPT to extract keywords from the query
     matched_keywords = extract_keywords_with_gpt(query, llm, keyword_list)
-    # print("Matched keywords:", matched_keywords)
-    # Step 3: Filter docs whose metadata has any of those keywords
     keyword_hits = [
         doc
         for doc in documents
@@ -114,33 +101,22 @@ def hybrid_search_with_query_keywords(
         )
     ]
-    # print("Keyword hits:", len(keyword_hits))
-    # Step 4: Score keyword-matching documents by keyword rarity
     scored_docs = [
         (
             doc,
             score_doc(doc, matched_keywords),
-        )  # original (unnormalized) list used for scoring
         for doc in keyword_hits
     ]
-    # # print doc metadata and scores
-    # for doc, score in scored_docs:
-    #     print(f"Document: {doc.metadata.get('disease_name', 'Unknown')}, Score: {score}")
-    #     print(f"Matched Keywords: {doc.metadata.get('matched_keywords', [])}")
-    # Step 5: Rank and select top documents by score
     ranked_docs = sorted(scored_docs, key=lambda x: -x[1])
     top_docs = [doc for doc, score in ranked_docs if score > 0]
     top_3_docs = top_docs[:3]
-    # Step 4: Merge by unique content
     merged = {doc.page_content: doc for doc in semantic_hits + top_3_docs}
     return list(merged.values())
-# Main function to perform the IDSR check
 def idsr_check(query: str, llm) -> AppState:
     """
     Perform hybrid search combining semantic search and keyword matching.
@@ -151,7 +127,7 @@ def idsr_check(query: str, llm) -> AppState:
     Returns:
         AppState: Updated state with search results.
     """
-    # Perform hybrid search
     results = hybrid_search_with_query_keywords(
         query, vectorstore, tagged_documents, keywords, llm
     )
@@ -163,7 +139,6 @@ def idsr_check(query: str, llm) -> AppState:
         ]
     )
-    # Prepare prompt for the LLM
     prompt = """
     You are a medical assistant reviewing a brief clinical case in Kenya to help identify which diseases the patient may plausibly have. You have access to several disease definitions.
@@ -202,7 +177,6 @@ def idsr_check(query: str, llm) -> AppState:
         query=query, disease_definitions=disease_definitions
     )
-    # Call the LLM to generate the answer, passing the case description and disease definitions
     llm_response = llm.invoke(prompt)
     answer_text = (
         llm_response.content.strip()
@@ -210,4 +184,4 @@ def idsr_check(query: str, llm) -> AppState:
         else "No relevant disease information found."
     )
-    return {"answer": answer_text, "last_tool": "idsr_check"}

 import math
 from collections import Counter
 with open("./guidance_docs/idsr_keywords.txt", "r", encoding="utf-8") as f:
     keywords = [line.strip() for line in f if line.strip()]
 vectorstore = FAISS.load_local(
     "./guidance_docs/disease_vectorstore",
     OpenAIEmbeddings(),
     allow_dangerous_deserialization=True,
 )
 with open("./guidance_docs/tagged_documents.json", "r", encoding="utf-8") as f:
     doc_dicts = json.load(f)
 tagged_documents = [Document(**d) for d in doc_dicts]
 keyword_doc_counts = Counter()
 total_docs = len(tagged_documents)
     for kw in seen:
         keyword_doc_counts[kw] += 1
 keyword_weights = {
+    kw: math.log(total_docs / (1 + count)) for kw, count in keyword_doc_counts.items()
 }
     return sum(keyword_weights.get(kw, 0) for kw in overlap)
 class KeywordsOutput(BaseModel):
     keywords: List[str] = Field(
         description="List of relevant keywords extracted from the query"
 """
     )
     chain = prompt | llm | parser
     output = chain.invoke(
         }
     )
     return output.keywords
 def hybrid_search_with_query_keywords(
     query, vstore, documents, keyword_list, llm, top_k=5
 ):
     semantic_hits = vstore.similarity_search(query, k=top_k)
     matched_keywords = extract_keywords_with_gpt(query, llm, keyword_list)
     keyword_hits = [
         doc
         for doc in documents
         )
     ]
     scored_docs = [
         (
             doc,
             score_doc(doc, matched_keywords),
+        )
         for doc in keyword_hits
     ]
     ranked_docs = sorted(scored_docs, key=lambda x: -x[1])
     top_docs = [doc for doc, score in ranked_docs if score > 0]
     top_3_docs = top_docs[:3]
     merged = {doc.page_content: doc for doc in semantic_hits + top_3_docs}
     return list(merged.values())
 def idsr_check(query: str, llm) -> AppState:
     """
     Perform hybrid search combining semantic search and keyword matching.
     Returns:
         AppState: Updated state with search results.
     """
     results = hybrid_search_with_query_keywords(
         query, vectorstore, tagged_documents, keywords, llm
     )
         ]
     )
     prompt = """
     You are a medical assistant reviewing a brief clinical case in Kenya to help identify which diseases the patient may plausibly have. You have access to several disease definitions.
         query=query, disease_definitions=disease_definitions
     )
     llm_response = llm.invoke(prompt)
     answer_text = (
         llm_response.content.strip()
         else "No relevant disease information found."
     )
+    return {"answer": answer_text, "last_tool": "idsr_check"}  # type: ignore

chatlib/logger.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import logging
+import sys
+import os
+from pathlib import Path
+from logging.handlers import RotatingFileHandler
+LOG_DIR = Path(os.getenv("LOG_DIR", Path(__file__).resolve().parent.parent / "logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+LOG_FILE = LOG_DIR / "app.log"
+LOG_CONFIG = {
+    "console_format": "%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s",
+    "file_format": "%(asctime)s | %(levelname)-8s | %(name)-20s | %(funcName)s:%(lineno)d | %(message)s",
+    "date_format": "%Y-%m-%d %H:%M:%S",
+    "max_bytes": 5 * 1024 * 1024,  # 5MB
+    "backup_count": 3,
+}
+_logger_cache = {}
+def get_logger(name: str = "text2sql-app") -> logging.Logger:
+    """Get a configured logger instance with console and file handlers
+    Args:
+        name: Logger name (usually __name__ of calling module)
+    Returns:
+        Configured Logger instance
+    """
+    if name in _logger_cache:
+        return _logger_cache[name]
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    if logger.hasHandlers():
+        _logger_cache[name] = logger
+        return logger
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.INFO)
+    console_formatter = logging.Formatter(
+        LOG_CONFIG["console_format"], LOG_CONFIG["date_format"]
+    )
+    console_handler.setFormatter(console_formatter)
+    file_handler = RotatingFileHandler(
+        LOG_FILE,
+        maxBytes=LOG_CONFIG["max_bytes"],
+        backupCount=LOG_CONFIG["backup_count"],
+    )
+    file_handler.setLevel(logging.DEBUG)
+    file_formatter = logging.Formatter(
+        LOG_CONFIG["file_format"], LOG_CONFIG["date_format"]
+    )
+    file_handler.setFormatter(file_formatter)
+    logger.addHandler(console_handler)
+    logger.addHandler(file_handler)
+    _logger_cache[name] = logger
+    return logger

chatlib/patient_all_data.py CHANGED Viewed

@@ -3,14 +3,12 @@ import pandas as pd
 import os
-# define helper functions
 def safe(val):
     if pd.isnull(val) or val in ("", "NULL"):
         return "missing"
     return val
-# function to return only year of date
 def extract_year(date_str):
     if pd.isnull(date_str) or date_str in ("", "NULL"):
         return "missing"
@@ -20,7 +18,6 @@ def extract_year(date_str):
         return "invalid date"
-# Define the SQL query tool
 def sql_chain(query: str, llm, rag_result: str) -> dict:
     """
     Annotated function that takes a patient identifer (pk_hash) and returns
@@ -44,7 +41,6 @@ def sql_chain(query: str, llm, rag_result: str) -> dict:
     conn = sqlite3.connect("data/patient_demonstration.sqlite")
     cursor = conn.cursor()
-    # Write the SQL query using the QuerySQLDatabaseTool
     cursor.execute(
         "SELECT * FROM clinical_visits WHERE PatientPKHash = :pk_hash",
         {"pk_hash": pk_hash},
@@ -172,10 +168,6 @@ def sql_chain(query: str, llm, rag_result: str) -> dict:
     demographic_summary = summarize_demographics(demographic_data)
-    # cursor.execute("SELECT * FROM data_dictionary")
-    # rows = cursor.fetchall()
-    # data_dictionary = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
     conn.close()
     prompt = (

 import os
 def safe(val):
     if pd.isnull(val) or val in ("", "NULL"):
         return "missing"
     return val
 def extract_year(date_str):
     if pd.isnull(date_str) or date_str in ("", "NULL"):
         return "missing"
         return "invalid date"
 def sql_chain(query: str, llm, rag_result: str) -> dict:
     """
     Annotated function that takes a patient identifer (pk_hash) and returns
     conn = sqlite3.connect("data/patient_demonstration.sqlite")
     cursor = conn.cursor()
     cursor.execute(
         "SELECT * FROM clinical_visits WHERE PatientPKHash = :pk_hash",
         {"pk_hash": pk_hash},
     demographic_summary = summarize_demographics(demographic_data)
     conn.close()
     prompt = (

chatlib/patient_sql_agent.py CHANGED Viewed

@@ -11,10 +11,7 @@ from .state_types import AppState
 db = SQLDatabase.from_uri("sqlite:///data/patient_demonstration.sqlite")
 llm = ChatOpenAI(temperature=0.0, model="gpt-4o")
-# from langchain_ollama.chat_models import ChatOllama
-# local_llm = ChatOllama(model="mistral:latest", temperature=0)
-# setup template for sql query tool
 system_message = """
 Given an input question, create a syntactically correct {dialect} query to
 run to help find the answer. The database contains the following tables and columns:
@@ -109,10 +106,7 @@ def write_query(state: AppState) -> AppState:
     prompt = query_prompt_template.invoke(
         {
             "dialect": db.dialect,
-            # "top_k": 10,
-            "table_info": db.run(
-                "SELECT * FROM data_dictionary;"
-            ),  # db.get_table_info(),
             "input": state["question"],
             "guidelines": state.get("rag_result", "No guidelines provided."),
             "pk_hash": state.get("pk_hash", ""),
@@ -121,19 +115,16 @@ def write_query(state: AppState) -> AppState:
     structured_llm = llm.with_structured_output(QueryOutput)
     result = structured_llm.invoke(prompt)
-    # query_data["query"] = result["query"]
-    state["query"] = result["query"]
     return state
-    # return {**state, "query": result["query"]}
 def execute_query(state: AppState) -> AppState:
     """Execute SQL query."""
     execute_query_tool = QuerySQLDatabaseTool(db=db)
-    state["result"] = execute_query_tool.invoke(state["query"])
     return state
-    # return {**state, "result": execute_query_tool.invoke(state["query"])}
 def generate_answer(state: AppState) -> AppState:
@@ -153,19 +144,14 @@ def generate_answer(state: AppState) -> AppState:
         "In that case, ignore the SQL query too and generate an answer based only on the context. \n\n"
         f'Question: {state["question"]}\n'
         f'Context: {state.get("rag_result", "No guidelines provided.")}\n'
-        f'SQL Query: {state["query"]}\n'
-        f'SQL Result: {state["result"]}'
-        # f'Question: {state["question"]}\n'
-        # f'SQL Query: {state["query"]}\n'
-        # f'SQL Result: {state["result"]}'
     )
     response = llm.invoke(prompt)
-    state["answer"] = response.content
     return state
-    # return {**state, "answer": response.content}
-# now define a stateful tool that does the same thing
 @tool
 def sql_chain(state: AppState) -> dict:
     """
@@ -178,4 +164,4 @@ def sql_chain(state: AppState) -> dict:
     state = execute_query(state)
     state = generate_answer(state)
-    return state

 db = SQLDatabase.from_uri("sqlite:///data/patient_demonstration.sqlite")
 llm = ChatOpenAI(temperature=0.0, model="gpt-4o")
 system_message = """
 Given an input question, create a syntactically correct {dialect} query to
 run to help find the answer. The database contains the following tables and columns:
     prompt = query_prompt_template.invoke(
         {
             "dialect": db.dialect,
+            "table_info": db.run("SELECT * FROM data_dictionary;"),
             "input": state["question"],
             "guidelines": state.get("rag_result", "No guidelines provided."),
             "pk_hash": state.get("pk_hash", ""),
     structured_llm = llm.with_structured_output(QueryOutput)
     result = structured_llm.invoke(prompt)
+    state["query"] = result["query"]  # type: ignore
     return state
 def execute_query(state: AppState) -> AppState:
     """Execute SQL query."""
     execute_query_tool = QuerySQLDatabaseTool(db=db)
+    state["result"] = execute_query_tool.invoke(state["query"])  # type: ignore
     return state
 def generate_answer(state: AppState) -> AppState:
         "In that case, ignore the SQL query too and generate an answer based only on the context. \n\n"
         f'Question: {state["question"]}\n'
         f'Context: {state.get("rag_result", "No guidelines provided.")}\n'
+        f'SQL Query: {state["query"]}\n'  # type: ignore
+        f'SQL Result: {state["result"]}'  # type: ignore
     )
     response = llm.invoke(prompt)
+    state["answer"] = response.content  # type: ignore
     return state
 @tool
 def sql_chain(state: AppState) -> dict:
     """
     state = execute_query(state)
     state = generate_answer(state)
+    return state  # type: ignore

chatlib/phi_filter.py CHANGED Viewed

@@ -4,7 +4,7 @@ import dateparser.search
 from datetime import datetime
 from dateutil.relativedelta import relativedelta
-# List of words indicating relative dates (to filter out)
 RELATIVE_INDICATORS = [
     "ago",
     "later",
@@ -28,7 +28,6 @@ def is_relative_date(text_relative):
     return any(word in text_lower for word in RELATIVE_INDICATORS)
-# Load Kenyan names list (basic txt file, one name per line, all lowercase for comparison)
 def load_kenyan_names(filepath="data/kenyan_names.txt"):
     if not Path(filepath).exists():
         return set()
@@ -78,12 +77,10 @@ def detect_and_redact_phi(text_input):
     phi_detected = bool(names_found or dates_found)
-    # Redact dates with relative descriptions
     for match, dt in dates_found:
         relative = describe_relative_date(dt)
         text_input = text_input.replace(match, relative)
-    # Redact Kenyan names
     for name in names_found:
         pattern = re.compile(rf"\b{name}\b", re.IGNORECASE)
         text_input = pattern.sub("[name]", text_input)

 from datetime import datetime
 from dateutil.relativedelta import relativedelta
 RELATIVE_INDICATORS = [
     "ago",
     "later",
     return any(word in text_lower for word in RELATIVE_INDICATORS)
 def load_kenyan_names(filepath="data/kenyan_names.txt"):
     if not Path(filepath).exists():
         return set()
     phi_detected = bool(names_found or dates_found)
     for match, dt in dates_found:
         relative = describe_relative_date(dt)
         text_input = text_input.replace(match, relative)
     for name in names_found:
         pattern = re.compile(rf"\b{name}\b", re.IGNORECASE)
         text_input = pattern.sub("[name]", text_input)

chatlib/state_types.py CHANGED Viewed

@@ -1,33 +1,13 @@
-from typing_extensions import TypedDict, Annotated
-from typing import Optional
 from langchain_core.messages import AnyMessage
 from langgraph.graph.message import add_messages
-# class ConversationState(TypedDict):
-#     question: str
-#     answer: str
-#     rag_result: str
-#     pk_hash: Optional[str]
-# class QueryState(TypedDict):
-#     query: str
-#     result: Optional[str]
-# class AppState(TypedDict):
-#     messages: Annotated[list[AnyMessage], add_messages]
-#     conversation: ConversationState
-#     query_data: QueryState
-# class SqlChainOutputModel(BaseModel):
-#     messages: List[AnyMessage] = Field(...)
-#     conversation: ConversationState = Field(...)
 class AppState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
     question: str
     rag_result: str
     answer: str
-    last_answer: Optional[str] = None
-    last_tool: Optional[str] = None
-    idsr_disclaimer: bool = False

+from typing_extensions import TypedDict, Annotated, NotRequired
 from langchain_core.messages import AnyMessage
 from langgraph.graph.message import add_messages
 class AppState(TypedDict):
     messages: Annotated[list[AnyMessage], add_messages]
     question: str
     rag_result: str
     answer: str
+    last_answer: NotRequired[str | None]
+    last_tool: NotRequired[str | None]
+    idsr_disclaimer: NotRequired[bool]

main.py ADDED Viewed

	@@ -0,0 +1,5 @@

+def main():
+    print("Hello from clinicalassistant!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[project]
+name = "clinicalassistant"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "black>=25.1.0",
+    "dateparser>=1.2.2",
+    "faiss-cpu>=1.11.0",
+    "gradio>=5.36.2",
+    "langchain-community>=0.3.27",
+    "langchain-openai>=0.3.27",
+    "langgraph>=0.5.2",
+    "llama-index>=0.12.48",
+    "pandas>=2.3.1",
+    "pylint>=3.3.7",
+    "python-dotenv>=1.1.1",
+]
+[dependency-groups]
+dev = [
+    "black>=25.1.0",
+    "mypy>=1.16.1",
+    "pytest>=8.4.1",
+]

requirements.txt DELETED Viewed

@@ -1,15 +0,0 @@
-dateparser==1.2.2
-gradio==5.36.2
-langchain_community==0.3.27
-langchain_core==0.3.68
-langchain_openai==0.3.27
-langgraph==0.5.2
-llama_index==0.12.48
-pandas==2.3.1
-pydantic==2.11.7
-python-dotenv==1.1.1
-python_dateutil==2.9.0.post0
-typing_extensions==4.14.1
-pylint==3.3.7
-black==25.1.0
-faiss-cpu==1.11.0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff