JDFPalladium commited on
Commit
c1f3739
·
1 Parent(s): e887897

adding phi detection

Browse files
chatKenyaEMR.py CHANGED
@@ -21,6 +21,7 @@ from chatlib.state_types import AppState
21
  from chatlib.guidlines_rag_agent_li import rag_retrieve
22
  from chatlib.patient_all_data import sql_chain
23
  from chatlib.idsr_check import idsr_check
 
24
 
25
  tools = [rag_retrieve, sql_chain, idsr_check]
26
  llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
@@ -51,6 +52,7 @@ Do not include any text outside the JSON response.
51
 
52
  # Assistant Node
53
  def assistant(state: AppState) -> AppState:
 
54
  pk_hash = state.get("pk_hash", None)
55
 
56
  if pk_hash:
@@ -127,19 +129,21 @@ builder.add_conditional_edges("assistant", tools_condition)
127
  builder.add_edge("tools", "assistant")
128
  react_graph = builder.compile(checkpointer=memory)
129
 
130
- def chat_with_patient(question: str, pk_hash: str, thread_id: str = None):
131
  # Generate or reuse thread_id for session persistence
132
  if thread_id is None or thread_id == "":
133
  thread_id = str(uuid.uuid4())
134
 
 
 
 
135
  # Prepare input state with new user message and pk_hash
136
  # initialize state with patient pk hash
137
  input_state:AppState = {
138
  "messages": [HumanMessage(content=question)],
139
  "question": "",
140
  "rag_result": "",
141
- "answer": "",
142
- "pk_hash": pk_hash
143
  }
144
 
145
  config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
@@ -157,7 +161,6 @@ def chat_with_patient(question: str, pk_hash: str, thread_id: str = None):
157
 
158
  with gr.Blocks() as demo:
159
  question_input = gr.Textbox(label="Question")
160
- pk_hash_input = gr.Textbox(label="Patient pk_hash")
161
  thread_id_state = gr.State() # to store thread_id between calls
162
  output_chat = gr.Textbox(label="Assistant Response")
163
 
@@ -165,7 +168,7 @@ with gr.Blocks() as demo:
165
 
166
  submit_btn.click(
167
  chat_with_patient,
168
- inputs=[question_input, pk_hash_input, thread_id_state],
169
  outputs=[output_chat, thread_id_state],
170
  )
171
 
 
21
  from chatlib.guidlines_rag_agent_li import rag_retrieve
22
  from chatlib.patient_all_data import sql_chain
23
  from chatlib.idsr_check import idsr_check
24
+ from chatlib.phi_filter import detect_and_redact_phi
25
 
26
  tools = [rag_retrieve, sql_chain, idsr_check]
27
  llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 
52
 
53
  # Assistant Node
54
  def assistant(state: AppState) -> AppState:
55
+
56
  pk_hash = state.get("pk_hash", None)
57
 
58
  if pk_hash:
 
129
  builder.add_edge("tools", "assistant")
130
  react_graph = builder.compile(checkpointer=memory)
131
 
132
+ def chat_with_patient(question: str, thread_id: str = None):
133
  # Generate or reuse thread_id for session persistence
134
  if thread_id is None or thread_id == "":
135
  thread_id = str(uuid.uuid4())
136
 
137
+ # Check input for PHI and redact if necessary
138
+ question = detect_and_redact_phi(question)["redacted_text"]
139
+ print(question)
140
  # Prepare input state with new user message and pk_hash
141
  # initialize state with patient pk hash
142
  input_state:AppState = {
143
  "messages": [HumanMessage(content=question)],
144
  "question": "",
145
  "rag_result": "",
146
+ "answer": ""
 
147
  }
148
 
149
  config = {"configurable": {"thread_id": thread_id, "user_id": thread_id}}
 
161
 
162
  with gr.Blocks() as demo:
163
  question_input = gr.Textbox(label="Question")
 
164
  thread_id_state = gr.State() # to store thread_id between calls
165
  output_chat = gr.Textbox(label="Assistant Response")
166
 
 
168
 
169
  submit_btn.click(
170
  chat_with_patient,
171
+ inputs=[question_input, thread_id_state],
172
  outputs=[output_chat, thread_id_state],
173
  )
174
 
chatlib/patient_all_data.py CHANGED
@@ -1,5 +1,6 @@
1
  import sqlite3
2
  import pandas as pd
 
3
 
4
  from langchain_openai import ChatOpenAI
5
  llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
@@ -25,7 +26,7 @@ def extract_year(date_str):
25
  return 'invalid date'
26
 
27
  # Define the SQL query tool
28
- def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
29
  """
30
  Annotated function that takes a patient identifer (pk_hash) and returns
31
  all data related to that patient from the SQL database.
@@ -41,7 +42,7 @@ def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
41
  The answer will be generated based on the SQL query results and the context information.
42
  The function will return the updated state with the answer.
43
  """
44
-
45
  if not pk_hash:
46
  raise ValueError("pk_hash is required in state for SQL queries.")
47
 
@@ -160,8 +161,7 @@ def sql_chain(query: str, rag_result: str, pk_hash: str) -> dict:
160
  "You are a clinical assistant. Given the user question, clinical guideline context, "
161
  "and summarized patient data below, answer the question accurately and concisely. "
162
  "Only use the provided data; do not guess or hallucinate. "
163
- "If essential patient information is missing, explain what is missing instead of guessing. "
164
- "Please answer in no more than 100 words. \n\n"
165
  f"Question: {query}\n"
166
  f"Guideline Context: {rag_result}\n"
167
  f"Clinical Visits Summary:\n{visits_summary}\n"
 
1
  import sqlite3
2
  import pandas as pd
3
+ import os
4
 
5
  from langchain_openai import ChatOpenAI
6
  llm = ChatOpenAI(temperature = 0.0, model="gpt-4o")
 
26
  return 'invalid date'
27
 
28
  # Define the SQL query tool
29
+ def sql_chain(query: str, rag_result: str) -> dict:
30
  """
31
  Annotated function that takes a patient identifer (pk_hash) and returns
32
  all data related to that patient from the SQL database.
 
42
  The answer will be generated based on the SQL query results and the context information.
43
  The function will return the updated state with the answer.
44
  """
45
+ pk_hash = os.environ.get("PK_HASH")
46
  if not pk_hash:
47
  raise ValueError("pk_hash is required in state for SQL queries.")
48
 
 
161
  "You are a clinical assistant. Given the user question, clinical guideline context, "
162
  "and summarized patient data below, answer the question accurately and concisely. "
163
  "Only use the provided data; do not guess or hallucinate. "
164
+ "If essential patient information is missing, explain what is missing instead of guessing. \n\n"
 
165
  f"Question: {query}\n"
166
  f"Guideline Context: {rag_result}\n"
167
  f"Clinical Visits Summary:\n{visits_summary}\n"
chatlib/phi_filter.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import re
3
+ import dateparser.search
4
+ from datetime import datetime
5
+ from dateutil.relativedelta import relativedelta
6
+
7
+ # List of words indicating relative dates (to filter out)
8
+ RELATIVE_INDICATORS = [
9
+ "ago", "later", "before", "after", "yesterday", "tomorrow",
10
+ "today", "tonight", "last", "next", "this", "coming",
11
+ "previous", "past"
12
+ ]
13
+
14
+ def is_relative_date(text):
15
+ text_lower = text.lower()
16
+ return any(word in text_lower for word in RELATIVE_INDICATORS)
17
+
18
+ # Load Kenyan names list (basic txt file, one name per line, all lowercase for comparison)
19
+ def load_kenyan_names(filepath="data/kenyan_names.txt"):
20
+ if not Path(filepath).exists():
21
+ return set()
22
+ with open(filepath, "r", encoding="utf-8") as f:
23
+ return set(line.strip().lower() for line in f if line.strip())
24
+
25
+ kenyan_names = load_kenyan_names()
26
+ print(kenyan_names)
27
+ def name_list_detect(text):
28
+ words = re.findall(r"\b\w+\b", text)
29
+ matches = [w for w in words if w.lower() in kenyan_names]
30
+ return matches
31
+
32
+ def dateparser_detect(text):
33
+ results = dateparser.search.search_dates(text)
34
+ if not results:
35
+ return []
36
+ filtered = [r for r in results if not is_relative_date(r[0])]
37
+ return filtered
38
+
39
+ def describe_relative_date(dt, reference=None):
40
+ if reference is None:
41
+ reference = datetime.now()
42
+
43
+ delta = relativedelta(reference, dt)
44
+
45
+ if delta.years > 0:
46
+ return f"{delta.years} year{'s' if delta.years > 1 else ''} ago"
47
+ elif delta.months > 0:
48
+ return f"{delta.months} month{'s' if delta.months > 1 else ''} ago"
49
+ elif delta.days >= 7:
50
+ weeks = delta.days // 7
51
+ return f"{weeks} week{'s' if weeks > 1 else ''} ago"
52
+ elif delta.days > 0:
53
+ return f"{delta.days} day{'s' if delta.days > 1 else ''} ago"
54
+ else:
55
+ return "today"
56
+
57
+ def detect_and_redact_phi(text):
58
+ names_found = name_list_detect(text)
59
+ dates_found = dateparser_detect(text)
60
+
61
+ phi_detected = bool(names_found or dates_found)
62
+
63
+ # Redact dates with relative descriptions
64
+ for match, dt in dates_found:
65
+ relative = describe_relative_date(dt)
66
+ text = text.replace(match, relative)
67
+
68
+ # Redact Kenyan names
69
+ for name in names_found:
70
+ pattern = re.compile(rf"\b{name}\b", re.IGNORECASE)
71
+ text = pattern.sub("[name]", text)
72
+
73
+ return {
74
+ "phi_detected": phi_detected,
75
+ "kenyan_name_matches": names_found,
76
+ "dates": [d[0] for d in dates_found],
77
+ "redacted_text": text
78
+ }
79
+
80
+ if __name__ == "__main__":
81
+ print("\n🔍 PHI Detection Tool (Kenyan context + redaction with relative dates)\n")
82
+ while True:
83
+ text = input("Enter clinical text (or 'q' to quit):\n> ")
84
+ if text.lower() == 'q':
85
+ break
86
+ results = detect_and_redact_phi(text)
87
+
88
+ if results["phi_detected"]:
89
+ print("\n⚠️ Possible PHI detected!")
90
+ if results["kenyan_name_matches"]:
91
+ print(" - Possible Kenyan names:", results["kenyan_name_matches"])
92
+ if results["dates"]:
93
+ print(" - Dates detected:", results["dates"])
94
+
95
+ print("\n🛡️ Redacted text:")
96
+ print(results["redacted_text"])
97
+ else:
98
+ print("\n✅ No PHI detected.")
99
+ print("\n---\n")
chatlib/state_types.py CHANGED
@@ -30,4 +30,4 @@ class AppState(TypedDict):
30
  answer: str
31
  last_answer: Optional[str] = None
32
  last_tool: Optional[str] = None
33
- pk_hash: str
 
30
  answer: str
31
  last_answer: Optional[str] = None
32
  last_tool: Optional[str] = None
33
+
requirements.txt CHANGED
@@ -7,9 +7,6 @@ langchain-community
7
  langchain-core
8
  langchain-openai
9
  notebook
10
- tavily-python
11
- wikipedia
12
- trustcall
13
  langgraph-cli[inmem]
14
  llama_index==0.12.34
15
  pylint
@@ -20,4 +17,4 @@ gradio
20
  faiss-cpu
21
  tiktoken
22
  openai
23
- rapidfuzz
 
7
  langchain-core
8
  langchain-openai
9
  notebook
 
 
 
10
  langgraph-cli[inmem]
11
  llama_index==0.12.34
12
  pylint
 
17
  faiss-cpu
18
  tiktoken
19
  openai
20
+ dateparser