sujataprakashdatycs commited on
Commit
a7fab2a
·
verified ·
1 Parent(s): baef59a

Update ComorbidityCheckerAgent.py

Browse files
Files changed (1) hide show
  1. ComorbidityCheckerAgent.py +231 -107
ComorbidityCheckerAgent.py CHANGED
@@ -1,168 +1,292 @@
1
- import os
2
  import json
3
- import pandas as pd
4
- from PyPDF2 import PdfReader
5
  from json_repair import repair_json
6
- from typing import List, Dict, Any, Optional
7
  from crewai import Agent, Task, Crew, Process
8
- from crewai_tools import SerperDevTool,tool
9
  from langchain_openai import ChatOpenAI
10
- from langchain_community.vectorstores import Chroma
11
- from embedding_manager import DirectoryEmbeddingManager
12
-
13
- SEED_SOURCES = [
14
- "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment",
15
- "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program",
16
- "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf",
17
- "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/",
18
- ]
19
 
 
20
 
21
  class ComorbidityCheckerAgent:
22
  """
23
- Uses a two-step AI agent process to first identify potential comorbidities for a
24
- MEAT-validated diagnosis and then verifies each one against the patient chart context.
 
25
  """
26
- def __init__(self, pdf_dir: str, hcc_code: str, model_version: str):
27
- self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
28
- self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4-turbo"), temperature=0)
29
  self.hcc_code = hcc_code.strip()
30
  self.model_version = model_version.strip().upper()
31
- self.search_tool = SerperDevTool()
32
- #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES)
33
 
34
  @tool("patient_chart_search")
35
  def patient_chart_search(query: str) -> str:
36
  """
37
- Search the patient chart embeddings and return all top 15 results as a single string.
38
- Each result is preserved individually and then combined at the end.
39
  """
40
  print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
41
  vectordb = self.embed_manager.get_or_create_embeddings()
42
  results = vectordb.similarity_search(query, k=15)
 
43
 
44
- # Keep all 15 results separate internally
45
- all_results = [res.page_content for res in results]
46
-
47
- # Combine into a single string for output (same format as before)
48
- combined_results = "\n---\n".join(all_results)
49
-
50
- return combined_results
51
-
52
 
53
- # Register the agent with the tool
54
  self.agent = Agent(
55
  role="Clinical Coding and Comorbidity Analyst",
56
  goal=(
57
- "First, identify all clinically significant comorbidities for a given primary diagnosis, "
58
- "focusing on those relevant to HCC risk adjustment. Second, verify the presence of "
59
- "these comorbidities in a patient's chart and present the findings in a structured JSON format."
60
  ),
61
  backstory=(
62
- "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). "
63
- "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings "
64
- "to ensure accurate documentation and coding. You provide clear, evidence-based findings."
65
  ),
66
- tools=[self.search_tool,patient_chart_search],
67
  verbose=True,
68
  memory=False,
69
  llm=self.llm,
70
  )
71
 
72
- def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
73
- """
74
- Orchestrates the two-task process for a single primary diagnosis.
75
- """
76
  primary_diagnosis = diagnosis_entry["diagnosis"]
77
- final_result = {"diagnosis": primary_diagnosis, "comorbidities": []}
78
 
79
- # --- Task 1: Identify Potential Comorbidities ---
80
  identify_task = Task(
81
  description=(
82
- f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically "
83
- f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment "
84
- f"in the {self.model_version} model. Use your search tool for research if needed."
85
- ),
86
- expected_output=(
87
- "A JSON object with a single key 'potential_comorbidities' containing a list of strings. "
88
- "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}"
89
  ),
 
90
  agent=self.agent,
91
  json_mode=True
92
  )
93
 
94
- print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...")
95
  crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
96
- result = crew.kickoff()
97
-
98
  try:
99
- comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", [])
100
- if not comorbidities_to_check:
101
- print("[INFO] No potential comorbidities were identified by the agent.")
102
- return final_result
103
- print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}")
104
- except (json.JSONDecodeError, TypeError):
105
- print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.")
106
  return final_result
107
 
108
- # --- Task 2: Verify Each Comorbidity in the Chart ---
109
  verify_task = Task(
110
- description=(
111
- f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n"
112
- f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n"
113
- "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent "
114
- "embedding database of the patient's chart. **Use all 15 retrieved results individually** to "
115
- "determine presence or absence of each comorbidity.\n\n"
116
- "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. "
117
- "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should "
118
- "combine all relevant evidence snippets into a single string."
119
- ),
120
- expected_output=(
121
- "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list "
122
- "where each item has the structure: \n"
123
- '{\n'
124
- ' "condition": "<name of comorbidity>",\n'
125
- ' "is_present": true/false,\n'
126
- ' "context": "<Use all 15 retrieved results individually and combined them according to comorbidity>",\n'
127
- ' "rationale": "<one-line explanation of your finding>"\n'
128
- '}'
129
- ),
130
- agent=self.agent,
131
- json_mode=True
132
- )
133
-
134
-
135
-
136
- print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...")
137
- crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
138
- result = crew.kickoff()
139
 
 
 
140
  try:
141
- analysis = json.loads(repair_json(result))
142
  final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
143
- except (json.JSONDecodeError, TypeError):
144
- print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.")
145
  final_result["comorbidities"] = []
146
 
147
  return final_result
148
 
149
  def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
150
  """
151
- Main execution loop. It iterates through diagnoses that have met MEAT criteria
152
- and runs the comorbidity check for each.
153
  """
154
- final_results = []
155
  for entry in meat_validated_results:
156
- meat_criteria = entry.get("meat", {})
157
- if isinstance(meat_criteria, dict) and any(meat_criteria.values()):
158
- print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}")
159
- entry_with_comorbidities = self.check_comorbidities_for_one(entry)
160
- final_results.append(entry_with_comorbidities)
161
- print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.")
162
  else:
163
- if entry.get("answer", "").lower() == "yes":
 
164
  entry["comorbidities"] = {
165
  "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
166
  }
167
- final_results.append(entry)
168
- return final_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # comorbidity_checker.py
2
  import json
3
+ from typing import List, Dict
 
4
  from json_repair import repair_json
 
5
  from crewai import Agent, Task, Crew, Process
6
+ from crewai_tools import tool, SerperDevTool
7
  from langchain_openai import ChatOpenAI
 
 
 
 
 
 
 
 
 
8
 
9
+ from embedding_manager import DirectoryEmbeddingManager
10
 
11
  class ComorbidityCheckerAgent:
12
  """
13
+ Two-step flow:
14
+ 1) Identify clinically significant comorbidities for the primary diagnosis (HCC-aware).
15
+ 2) Verify each comorbidity against the patient chart embeddings (top-15).
16
  """
17
+ def __init__(self, pdf_dir_or_file: str, hcc_code: str, model_version: str, model: str = "gpt-4o"):
18
+ self.embed_manager = DirectoryEmbeddingManager(pdf_dir_or_file)
19
+ self.llm = ChatOpenAI(model=model, temperature=0)
20
  self.hcc_code = hcc_code.strip()
21
  self.model_version = model_version.strip().upper()
22
+ self.search_tool = SerperDevTool() # available if you want to expand later
 
23
 
24
  @tool("patient_chart_search")
25
  def patient_chart_search(query: str) -> str:
26
  """
27
+ Query persistent patient-chart embeddings.
28
+ Returns the top-15 results concatenated with separators.
29
  """
30
  print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
31
  vectordb = self.embed_manager.get_or_create_embeddings()
32
  results = vectordb.similarity_search(query, k=15)
33
+ return "\n---\n".join([res.page_content for res in results])
34
 
35
+ self.patient_chart_search = patient_chart_search
 
 
 
 
 
 
 
36
 
 
37
  self.agent = Agent(
38
  role="Clinical Coding and Comorbidity Analyst",
39
  goal=(
40
+ "Identify clinically significant comorbidities for a primary diagnosis relevant to HCC; "
41
+ "verify presence in the patient's chart with embeddings."
 
42
  ),
43
  backstory=(
44
+ "Expert risk-adjustment analyst who cross-references guidelines with chart evidence."
 
 
45
  ),
46
+ tools=[self.patient_chart_search],
47
  verbose=True,
48
  memory=False,
49
  llm=self.llm,
50
  )
51
 
52
+ def _check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
 
 
 
53
  primary_diagnosis = diagnosis_entry["diagnosis"]
54
+ final_result: Dict = {"diagnosis": primary_diagnosis, "comorbidities": []}
55
 
56
+ # Task 1 Identify comorbidities
57
  identify_task = Task(
58
  description=(
59
+ f"For primary diagnosis '{primary_diagnosis}', list common and clinically meaningful comorbidities "
60
+ f"that matter for HCC {self.hcc_code} in {self.model_version}."
61
+ "Return STRICT JSON: {\"potential_comorbidities\": [\"...\"]}"
 
 
 
 
62
  ),
63
+ expected_output="Strict JSON with key potential_comorbidities (list of strings).",
64
  agent=self.agent,
65
  json_mode=True
66
  )
67
 
 
68
  crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
69
+ identified = crew.kickoff()
 
70
  try:
71
+ comorbidities = json.loads(repair_json(identified)).get("potential_comorbidities", [])
72
+ except Exception:
73
+ comorbidities = []
74
+
75
+ if not comorbidities:
 
 
76
  return final_result
77
 
78
+ # Task 2 Verify each comorbidity via patient_chart_search
79
  verify_task = Task(
80
+ description=(
81
+ f"Primary diagnosis: '{primary_diagnosis}'. Potential comorbidities: {comorbidities}.\n"
82
+ "For EACH comorbidity, call the patient_chart_search tool (top-15). "
83
+ "Decide presence/absence using ONLY returned snippets.\n\n"
84
+ "Return STRICT JSON:\n"
85
+ "{ \"comorbidity_analysis\": [\n"
86
+ " {\"condition\":\"...\",\"is_present\":true/false,\"context\":\"<combined snippets>\",\"rationale\":\"...\"},\n"
87
+ " ... ] }"
88
+ ),
89
+ expected_output="Strict JSON with key comorbidity_analysis (list of objects).",
90
+ agent=self.agent,
91
+ json_mode=True
92
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
95
+ verified = crew.kickoff()
96
  try:
97
+ analysis = json.loads(repair_json(verified))
98
  final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
99
+ except Exception:
 
100
  final_result["comorbidities"] = []
101
 
102
  return final_result
103
 
104
  def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
105
  """
106
+ Accepts entries that already passed MEAT (i.e., meat dict exists and has True somewhere).
 
107
  """
108
+ out: List[Dict] = []
109
  for entry in meat_validated_results:
110
+ meat = entry.get("meat", {})
111
+ if isinstance(meat, dict) and any(meat.values()):
112
+ print(f"[INFO] Checking structured comorbidities for: {entry['diagnosis']}")
113
+ out.append(self._check_comorbidities_for_one(entry))
 
 
114
  else:
115
+ # If earlier stages claim 'yes' but MEAT not met, pass through with a note.
116
+ if entry.get("answer_explicit", "").lower() == "yes" or entry.get("answer_implicit", "").lower() == "yes":
117
  entry["comorbidities"] = {
118
  "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
119
  }
120
+ out.append(entry)
121
+ return out
122
+
123
+
124
+
125
+ # import os
126
+ # import json
127
+ # import pandas as pd
128
+ # from PyPDF2 import PdfReader
129
+ # from json_repair import repair_json
130
+ # from typing import List, Dict, Any, Optional
131
+ # from crewai import Agent, Task, Crew, Process
132
+ # from crewai_tools import SerperDevTool,tool
133
+ # from langchain_openai import ChatOpenAI
134
+ # from langchain_community.vectorstores import Chroma
135
+ # from embedding_manager import DirectoryEmbeddingManager
136
+
137
+ # SEED_SOURCES = [
138
+ # "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment",
139
+ # "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program",
140
+ # "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf",
141
+ # "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/",
142
+ # ]
143
+
144
+
145
+ # class ComorbidityCheckerAgent:
146
+ # """
147
+ # Uses a two-step AI agent process to first identify potential comorbidities for a
148
+ # MEAT-validated diagnosis and then verifies each one against the patient chart context.
149
+ # """
150
+ # def __init__(self, pdf_dir: str, hcc_code: str, model_version: str):
151
+ # self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
152
+ # self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4o"), temperature=0)
153
+ # self.hcc_code = hcc_code.strip()
154
+ # self.model_version = model_version.strip().upper()
155
+ # self.search_tool = SerperDevTool()
156
+ # #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES)
157
+
158
+ # @tool("patient_chart_search")
159
+ # def patient_chart_search(query: str) -> str:
160
+ # """
161
+ # Search the patient chart embeddings and return all top 15 results as a single string.
162
+ # Each result is preserved individually and then combined at the end.
163
+ # """
164
+ # print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
165
+ # vectordb = self.embed_manager.get_or_create_embeddings()
166
+ # results = vectordb.similarity_search(query, k=15)
167
+
168
+ # # Keep all 15 results separate internally
169
+ # all_results = [res.page_content for res in results]
170
+
171
+ # # Combine into a single string for output (same format as before)
172
+ # combined_results = "\n---\n".join(all_results)
173
+
174
+ # return combined_results
175
+
176
+
177
+ # # Register the agent with the tool
178
+ # self.agent = Agent(
179
+ # role="Clinical Coding and Comorbidity Analyst",
180
+ # goal=(
181
+ # "First, identify all clinically significant comorbidities for a given primary diagnosis, "
182
+ # "focusing on those relevant to HCC risk adjustment. Second, verify the presence of "
183
+ # "these comorbidities in a patient's chart and present the findings in a structured JSON format."
184
+ # ),
185
+ # backstory=(
186
+ # "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). "
187
+ # "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings "
188
+ # "to ensure accurate documentation and coding. You provide clear, evidence-based findings."
189
+ # ),
190
+ # tools=[patient_chart_search],
191
+ # verbose=True,
192
+ # memory=False,
193
+ # llm=self.llm,
194
+ # )
195
+
196
+ # def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
197
+ # """
198
+ # Orchestrates the two-task process for a single primary diagnosis.
199
+ # """
200
+ # primary_diagnosis = diagnosis_entry["diagnosis"]
201
+ # final_result = {"diagnosis": primary_diagnosis, "comorbidities": []}
202
+
203
+ # # --- Task 1: Identify Potential Comorbidities ---
204
+ # identify_task = Task(
205
+ # description=(
206
+ # f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically "
207
+ # f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment "
208
+ # f"in the {self.model_version} model. Use your search tool for research if needed."
209
+ # ),
210
+ # expected_output=(
211
+ # "A JSON object with a single key 'potential_comorbidities' containing a list of strings. "
212
+ # "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}"
213
+ # ),
214
+ # agent=self.agent,
215
+ # json_mode=True
216
+ # )
217
+
218
+ # print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...")
219
+ # crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
220
+ # result = crew.kickoff()
221
+
222
+ # try:
223
+ # comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", [])
224
+ # if not comorbidities_to_check:
225
+ # print("[INFO] No potential comorbidities were identified by the agent.")
226
+ # return final_result
227
+ # print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}")
228
+ # except (json.JSONDecodeError, TypeError):
229
+ # print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.")
230
+ # return final_result
231
+
232
+ # # --- Task 2: Verify Each Comorbidity in the Chart ---
233
+ # verify_task = Task(
234
+ # description=(
235
+ # f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n"
236
+ # f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n"
237
+ # "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent "
238
+ # "embedding database of the patient's chart. **Use all 15 retrieved results individually** to "
239
+ # "determine presence or absence of each comorbidity.\n\n"
240
+ # "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. "
241
+ # "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should "
242
+ # "combine all relevant evidence snippets into a single string."
243
+ # ),
244
+ # expected_output=(
245
+ # "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list "
246
+ # "where each item has the structure: \n"
247
+ # '{\n'
248
+ # ' "condition": "<name of comorbidity>",\n'
249
+ # ' "is_present": true/false,\n'
250
+ # ' "context": "<Use all 15 retrieved results individually and combined them according to comorbidity>",\n'
251
+ # ' "rationale": "<one-line explanation of your finding>"\n'
252
+ # '}'
253
+ # ),
254
+ # agent=self.agent,
255
+ # json_mode=True
256
+ # )
257
+
258
+
259
+
260
+ # print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...")
261
+ # crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
262
+ # result = crew.kickoff()
263
+
264
+ # try:
265
+ # analysis = json.loads(repair_json(result))
266
+ # final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
267
+ # except (json.JSONDecodeError, TypeError):
268
+ # print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.")
269
+ # final_result["comorbidities"] = []
270
+
271
+ # return final_result
272
+
273
+ # def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
274
+ # """
275
+ # Main execution loop. It iterates through diagnoses that have met MEAT criteria
276
+ # and runs the comorbidity check for each.
277
+ # """
278
+ # final_results = []
279
+ # for entry in meat_validated_results:
280
+ # meat_criteria = entry.get("meat", {})
281
+ # if isinstance(meat_criteria, dict) and any(meat_criteria.values()):
282
+ # print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}")
283
+ # entry_with_comorbidities = self.check_comorbidities_for_one(entry)
284
+ # final_results.append(entry_with_comorbidities)
285
+ # print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.")
286
+ # else:
287
+ # if entry.get("answer", "").lower() == "yes":
288
+ # entry["comorbidities"] = {
289
+ # "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
290
+ # }
291
+ # final_results.append(entry)
292
+ # return final_results