# comorbidity_checker.py import json from typing import List, Dict from json_repair import repair_json from crewai import Agent, Task, Crew, Process from crewai_tools import tool, SerperDevTool from langchain_openai import ChatOpenAI from embedding_manager import DirectoryEmbeddingManager class ComorbidityCheckerAgent: """ Two-step flow: 1) Identify clinically significant comorbidities for the primary diagnosis (HCC-aware). 2) Verify each comorbidity against the patient chart embeddings (top-15). """ def __init__(self, pdf_dir_or_file: str, hcc_code: str, model_version: str, model: str = "gpt-4o"): self.embed_manager = DirectoryEmbeddingManager(pdf_dir_or_file) self.llm = ChatOpenAI(model=model, temperature=0) self.hcc_code = hcc_code.strip() self.model_version = model_version.strip().upper() self.search_tool = SerperDevTool() # available if you want to expand later @tool("patient_chart_search") def patient_chart_search(query: str) -> str: """ Query persistent patient-chart embeddings. Returns the top-15 results concatenated with separators. """ print(f"\n[TOOL LOG] Searching patient chart for: '{query}'") vectordb = self.embed_manager.get_or_create_embeddings() results = vectordb.similarity_search(query, k=15) return "\n---\n".join([res.page_content for res in results]) self.patient_chart_search = patient_chart_search self.agent = Agent( role="Clinical Coding and Comorbidity Analyst", goal=( "Identify clinically significant comorbidities for a primary diagnosis relevant to HCC; " "verify presence in the patient's chart with embeddings." ), backstory=( "Expert risk-adjustment analyst who cross-references guidelines with chart evidence." ), tools=[self.patient_chart_search], verbose=True, memory=False, llm=self.llm, ) def _check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict: primary_diagnosis = diagnosis_entry["diagnosis"] final_result: Dict = {"diagnosis": primary_diagnosis, "comorbidities": []} # Task 1 — Identify comorbidities identify_task = Task( description=( f"For primary diagnosis '{primary_diagnosis}', list common and clinically meaningful comorbidities " f"that matter for HCC {self.hcc_code} in {self.model_version}." "Return STRICT JSON: {\"potential_comorbidities\": [\"...\"]}" ), expected_output="Strict JSON with key potential_comorbidities (list of strings).", agent=self.agent, json_mode=True ) crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential) identified = crew.kickoff() try: comorbidities = json.loads(repair_json(identified)).get("potential_comorbidities", []) except Exception: comorbidities = [] if not comorbidities: return final_result # Task 2 — Verify each comorbidity via patient_chart_search verify_task = Task( description=( f"Primary diagnosis: '{primary_diagnosis}'. Potential comorbidities: {comorbidities}.\n" "For EACH comorbidity, call the patient_chart_search tool (top-15). " "Decide presence/absence using ONLY returned snippets.\n\n" "Return STRICT JSON:\n" "{ \"comorbidity_analysis\": [\n" " {\"condition\":\"...\",\"is_present\":true/false,\"context\":\"\",\"rationale\":\"...\"},\n" " ... ] }" ), expected_output="Strict JSON with key comorbidity_analysis (list of objects).", agent=self.agent, json_mode=True ) crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential) verified = crew.kickoff() try: analysis = json.loads(repair_json(verified)) final_result["comorbidities"] = analysis.get("comorbidity_analysis", []) except Exception: final_result["comorbidities"] = [] return final_result def run(self, meat_validated_results: List[Dict]) -> List[Dict]: """ Accepts entries that already passed MEAT (i.e., meat dict exists and has True somewhere). """ out: List[Dict] = [] for entry in meat_validated_results: meat = entry.get("meat", {}) if isinstance(meat, dict) and any(meat.values()): print(f"[INFO] Checking structured comorbidities for: {entry['diagnosis']}") out.append(self._check_comorbidities_for_one(entry)) else: # If earlier stages claim 'yes' but MEAT not met, pass through with a note. if entry.get("answer_explicit", "").lower() == "yes" or entry.get("answer_implicit", "").lower() == "yes": entry["comorbidities"] = { "status": "MEAT criteria not met; not proceeding with comorbidity analysis." } out.append(entry) return out # import os # import json # import pandas as pd # from PyPDF2 import PdfReader # from json_repair import repair_json # from typing import List, Dict, Any, Optional # from crewai import Agent, Task, Crew, Process # from crewai_tools import SerperDevTool,tool # from langchain_openai import ChatOpenAI # from langchain_community.vectorstores import Chroma # from embedding_manager import DirectoryEmbeddingManager # SEED_SOURCES = [ # "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment", # "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program", # "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf", # "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/", # ] # class ComorbidityCheckerAgent: # """ # Uses a two-step AI agent process to first identify potential comorbidities for a # MEAT-validated diagnosis and then verifies each one against the patient chart context. # """ # def __init__(self, pdf_dir: str, hcc_code: str, model_version: str): # self.embed_manager = DirectoryEmbeddingManager(pdf_dir) # self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4o"), temperature=0) # self.hcc_code = hcc_code.strip() # self.model_version = model_version.strip().upper() # self.search_tool = SerperDevTool() # #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES) # @tool("patient_chart_search") # def patient_chart_search(query: str) -> str: # """ # Search the patient chart embeddings and return all top 15 results as a single string. # Each result is preserved individually and then combined at the end. # """ # print(f"\n[TOOL LOG] Searching patient chart for: '{query}'") # vectordb = self.embed_manager.get_or_create_embeddings() # results = vectordb.similarity_search(query, k=15) # # Keep all 15 results separate internally # all_results = [res.page_content for res in results] # # Combine into a single string for output (same format as before) # combined_results = "\n---\n".join(all_results) # return combined_results # # Register the agent with the tool # self.agent = Agent( # role="Clinical Coding and Comorbidity Analyst", # goal=( # "First, identify all clinically significant comorbidities for a given primary diagnosis, " # "focusing on those relevant to HCC risk adjustment. Second, verify the presence of " # "these comorbidities in a patient's chart and present the findings in a structured JSON format." # ), # backstory=( # "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). " # "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings " # "to ensure accurate documentation and coding. You provide clear, evidence-based findings." # ), # tools=[patient_chart_search], # verbose=True, # memory=False, # llm=self.llm, # ) # def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict: # """ # Orchestrates the two-task process for a single primary diagnosis. # """ # primary_diagnosis = diagnosis_entry["diagnosis"] # final_result = {"diagnosis": primary_diagnosis, "comorbidities": []} # # --- Task 1: Identify Potential Comorbidities --- # identify_task = Task( # description=( # f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically " # f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment " # f"in the {self.model_version} model. Use your search tool for research if needed." # ), # expected_output=( # "A JSON object with a single key 'potential_comorbidities' containing a list of strings. " # "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}" # ), # agent=self.agent, # json_mode=True # ) # print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...") # crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential) # result = crew.kickoff() # try: # comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", []) # if not comorbidities_to_check: # print("[INFO] No potential comorbidities were identified by the agent.") # return final_result # print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}") # except (json.JSONDecodeError, TypeError): # print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.") # return final_result # # --- Task 2: Verify Each Comorbidity in the Chart --- # verify_task = Task( # description=( # f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n" # f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n" # "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent " # "embedding database of the patient's chart. **Use all 15 retrieved results individually** to " # "determine presence or absence of each comorbidity.\n\n" # "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. " # "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should " # "combine all relevant evidence snippets into a single string." # ), # expected_output=( # "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list " # "where each item has the structure: \n" # '{\n' # ' "condition": "",\n' # ' "is_present": true/false,\n' # ' "context": "",\n' # ' "rationale": ""\n' # '}' # ), # agent=self.agent, # json_mode=True # ) # print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...") # crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential) # result = crew.kickoff() # try: # analysis = json.loads(repair_json(result)) # final_result["comorbidities"] = analysis.get("comorbidity_analysis", []) # except (json.JSONDecodeError, TypeError): # print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.") # final_result["comorbidities"] = [] # return final_result # def run(self, meat_validated_results: List[Dict]) -> List[Dict]: # """ # Main execution loop. It iterates through diagnoses that have met MEAT criteria # and runs the comorbidity check for each. # """ # final_results = [] # for entry in meat_validated_results: # meat_criteria = entry.get("meat", {}) # if isinstance(meat_criteria, dict) and any(meat_criteria.values()): # print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}") # entry_with_comorbidities = self.check_comorbidities_for_one(entry) # final_results.append(entry_with_comorbidities) # print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.") # else: # if entry.get("answer", "").lower() == "yes": # entry["comorbidities"] = { # "status": "MEAT criteria not met; not proceeding with comorbidity analysis." # } # final_results.append(entry) # return final_results