Spaces:

Nishauri
/

ClinicianAssistant

Sleeping

App Files Files Community

JDFPalladium commited on Jul 9, 2025

Commit

c1f3739

1 Parent(s): e887897

adding phi detection

Browse files

Files changed (5) hide show

chatKenyaEMR.py +8 -5
chatlib/patient_all_data.py +4 -4
chatlib/phi_filter.py +99 -0
chatlib/state_types.py +1 -1
requirements.txt +1 -4

chatKenyaEMR.py CHANGED Viewed

@@ -21,6 +21,7 @@ from chatlib.state_types import AppState
 from chatlib.guidlines_rag_agent_li import rag_retrieve
 from chatlib.patient_all_data import sql_chain
 from chatlib.idsr_check import idsr_check
 tools = [rag_retrieve, sql_chain, idsr_check]
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
@@ -51,6 +52,7 @@ Do not include any text outside the JSON response.
 # Assistant Node
 def assistant(state: AppState) -> AppState:
     pk_hash = state.get("pk_hash", None)
     if pk_hash:
@@ -127,19 +129,21 @@ builder.add_conditional_edges("assistant", tools_condition)
 builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
-def chat_with_patient(question: str, pk_hash: str, thread_id: str = None):
     # Generate or reuse thread_id for session persistence
     if thread_id is None or thread_id == "":
         thread_id = str(uuid.uuid4())
     # Prepare input state with new user message and pk_hash
     # initialize state with patient pk hash
     input_state:AppState = {
         "messages": [HumanMessage(content=question)],
         "question": "",
         "rag_result": "",
-        "answer": "",
-        "pk_hash": pk_hash
     }
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
@@ -157,7 +161,6 @@ def chat_with_patient(question: str, pk_hash: str, thread_id: str = None):
 with gr.Blocks() as demo:
     question_input = gr.Textbox(label="Question")
-    pk_hash_input = gr.Textbox(label="Patient pk_hash")
     thread_id_state = gr.State()  # to store thread_id between calls
     output_chat = gr.Textbox(label="Assistant Response")
@@ -165,7 +168,7 @@ with gr.Blocks() as demo:
     submit_btn.click(
         chat_with_patient,
-        inputs=[question_input, pk_hash_input, thread_id_state],
         outputs=[output_chat, thread_id_state],
     )

 from chatlib.guidlines_rag_agent_li import rag_retrieve
 from chatlib.patient_all_data import sql_chain
 from chatlib.idsr_check import idsr_check
+from chatlib.phi_filter import detect_and_redact_phi
 tools = [rag_retrieve, sql_chain, idsr_check]
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 # Assistant Node
 def assistant(state: AppState) -> AppState:
     pk_hash = state.get("pk_hash", None)
     if pk_hash:
 builder.add_edge("tools", "assistant")
 react_graph = builder.compile(checkpointer=memory)
+def chat_with_patient(question: str, thread_id: str = None):
     # Generate or reuse thread_id for session persistence
     if thread_id is None or thread_id == "":
         thread_id = str(uuid.uuid4())
+    # Check input for PHI and redact if necessary
+    question = detect_and_redact_phi(question)["redacted_text"]
+    print(question)
     # Prepare input state with new user message and pk_hash
     # initialize state with patient pk hash
     input_state:AppState = {
         "messages": [HumanMessage(content=question)],
         "question": "",
         "rag_result": "",
+        "answer": ""
     }
     config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
 with gr.Blocks() as demo:
     question_input = gr.Textbox(label="Question")
     thread_id_state = gr.State()  # to store thread_id between calls
     output_chat = gr.Textbox(label="Assistant Response")
     submit_btn.click(
         chat_with_patient,
+        inputs=[question_input, thread_id_state],
         outputs=[output_chat, thread_id_state],
     )

chatlib/patient_all_data.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import sqlite3
 import pandas as pd
 from langchain_openai import ChatOpenAI
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
@@ -25,7 +26,7 @@ def extract_year(date_str):
         return 'invalid date'
 # Define the SQL query tool
-def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
     """
     Annotated function that takes a patient identifer (pk_hash) and returns
     all data related to that patient from the SQL database.
@@ -41,7 +42,7 @@ def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
     The answer will be generated based on the SQL query results and the context information.
     The function will return the updated state with the answer.
     """
     if not pk_hash:
         raise ValueError("pk_hash is required in state for SQL queries.")
@@ -160,8 +161,7 @@ def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
         "You are a clinical assistant. Given the user question, clinical guideline context, "
         "and summarized patient data below, answer the question accurately and concisely. "
         "Only use the provided data; do not guess or hallucinate. "
-        "If essential patient information is missing, explain what is missing instead of guessing. "
-        "Please answer in no more than 100 words. \n\n"
         f"Question: {query}\n"
         f"Guideline Context: {rag_result}\n"
         f"Clinical Visits Summary:\n{visits_summary}\n"

 import sqlite3
 import pandas as pd
+import os
 from langchain_openai import ChatOpenAI
 llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
         return 'invalid date'
 # Define the SQL query tool
+def sql_chain(query: str, rag_result: str) -> dict:
     """
     Annotated function that takes a patient identifer (pk_hash) and returns
     all data related to that patient from the SQL database.
     The answer will be generated based on the SQL query results and the context information.
     The function will return the updated state with the answer.
     """
+    pk_hash = os.environ.get("PK_HASH")
     if not pk_hash:
         raise ValueError("pk_hash is required in state for SQL queries.")
         "You are a clinical assistant. Given the user question, clinical guideline context, "
         "and summarized patient data below, answer the question accurately and concisely. "
         "Only use the provided data; do not guess or hallucinate. "
+        "If essential patient information is missing, explain what is missing instead of guessing. \n\n"
         f"Question: {query}\n"
         f"Guideline Context: {rag_result}\n"
         f"Clinical Visits Summary:\n{visits_summary}\n"

chatlib/phi_filter.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from pathlib import Path
+import re
+import dateparser.search
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+# List of words indicating relative dates (to filter out)
+RELATIVE_INDICATORS = [
+    "ago", "later", "before", "after", "yesterday", "tomorrow",
+    "today", "tonight", "last", "next", "this", "coming",
+    "previous", "past"
+]
+def is_relative_date(text):
+    text_lower = text.lower()
+    return any(word in text_lower for word in RELATIVE_INDICATORS)
+# Load Kenyan names list (basic txt file, one name per line, all lowercase for comparison)
+def load_kenyan_names(filepath="data/kenyan_names.txt"):
+    if not Path(filepath).exists():
+        return set()
+    with open(filepath, "r", encoding="utf-8") as f:
+        return set(line.strip().lower() for line in f if line.strip())
+kenyan_names = load_kenyan_names()
+print(kenyan_names)
+def name_list_detect(text):
+    words = re.findall(r"\b\w+\b", text)
+    matches = [w for w in words if w.lower() in kenyan_names]
+    return matches
+def dateparser_detect(text):
+    results = dateparser.search.search_dates(text)
+    if not results:
+        return []
+    filtered = [r for r in results if not is_relative_date(r[0])]
+    return filtered
+def describe_relative_date(dt, reference=None):
+    if reference is None:
+        reference = datetime.now()
+    delta = relativedelta(reference, dt)
+    if delta.years > 0:
+        return f"{delta.years} year{'s' if delta.years > 1 else ''} ago"
+    elif delta.months > 0:
+        return f"{delta.months} month{'s' if delta.months > 1 else ''} ago"
+    elif delta.days >= 7:
+        weeks = delta.days // 7
+        return f"{weeks} week{'s' if weeks > 1 else ''} ago"
+    elif delta.days > 0:
+        return f"{delta.days} day{'s' if delta.days > 1 else ''} ago"
+    else:
+        return "today"
+def detect_and_redact_phi(text):
+    names_found = name_list_detect(text)
+    dates_found = dateparser_detect(text)
+    phi_detected = bool(names_found or dates_found)
+    # Redact dates with relative descriptions
+    for match, dt in dates_found:
+        relative = describe_relative_date(dt)
+        text = text.replace(match, relative)
+    # Redact Kenyan names
+    for name in names_found:
+        pattern = re.compile(rf"\b{name}\b", re.IGNORECASE)
+        text = pattern.sub("[name]", text)
+    return {
+        "phi_detected": phi_detected,
+        "kenyan_name_matches": names_found,
+        "dates": [d[0] for d in dates_found],
+        "redacted_text": text
+    }
+if __name__ == "__main__":
+    print("\n🔍 PHI Detection Tool (Kenyan context + redaction with relative dates)\n")
+    while True:
+        text = input("Enter clinical text (or 'q' to quit):\n> ")
+        if text.lower() == 'q':
+            break
+        results = detect_and_redact_phi(text)
+        if results["phi_detected"]:
+            print("\n⚠️  Possible PHI detected!")
+            if results["kenyan_name_matches"]:
+                print(" - Possible Kenyan names:", results["kenyan_name_matches"])
+            if results["dates"]:
+                print(" - Dates detected:", results["dates"])
+            print("\n🛡️  Redacted text:")
+            print(results["redacted_text"])
+        else:
+            print("\n✅ No PHI detected.")
+        print("\n---\n")

chatlib/state_types.py CHANGED Viewed

@@ -30,4 +30,4 @@ class AppState(TypedDict):
     answer: str
     last_answer: Optional[str] = None
     last_tool: Optional[str] = None
-    pk_hash: str

     answer: str
     last_answer: Optional[str] = None
     last_tool: Optional[str] = None

requirements.txt CHANGED Viewed

@@ -7,9 +7,6 @@ langchain-community
 langchain-core
 langchain-openai
 notebook
-tavily-python
-wikipedia
-trustcall
 langgraph-cli[inmem]
 llama_index==0.12.34
 pylint
@@ -20,4 +17,4 @@ gradio
 faiss-cpu
 tiktoken
 openai
-rapidfuzz

 langchain-core
 langchain-openai
 notebook
 langgraph-cli[inmem]
 llama_index==0.12.34
 pylint
 faiss-cpu
 tiktoken
 openai
+dateparser