import os import json import pandas as pd from PyPDF2 import PdfReader from json_repair import repair_json from typing import List, Dict, Any, Optional from crewai import Agent, Task, Crew, Process from crewai_tools import SerperDevTool from langchain_openai import ChatOpenAI from langchain_community.vectorstores import Chroma class ClinicalStatusAgent: def __init__(self, model: str = "gpt-4o"): self.llm = ChatOpenAI(model=model, temperature=0) self.agent = Agent( role="Clinical Status Classifier", goal="Determine the clinical status of confirmed diagnoses from patient chart excerpts.", backstory="You are a meticulous clinical analyst. Your expertise lies in reviewing " "medical documentation to classify a patient's condition as either active, " "under monitoring, or purely historical.", verbose=True, memory=False, llm=self.llm, ) def determine_status_one(self, diagnosis_entry: dict) -> dict: """Determine the clinical status for a single confirmed diagnosis.""" diagnosis = diagnosis_entry["diagnosis"] icd10 = diagnosis_entry["icd10"] context = diagnosis_entry["context"] task = Task( description=( f"Diagnosis: {diagnosis} (ICD-10: {icd10})\n\n" f"Patient chart excerpts:\n{context}\n\n" "Based *only* on the provided text, determine the Clinical Status of this diagnosis. " "Choose exactly one of the following: {ACTIVE, MONITORING, HISTORICAL}.\n" "- ACTIVE: The condition is currently under treatment, requires ongoing management, or is an acute issue.\n" "- MONITORING: The condition is being watched via surveillance visits, follow-ups, or observation without active treatment.\n" "- HISTORICAL: The condition is a past issue with no current clinical impact or management.\n\n" "Output must be strict JSON:\n" "{'clinical_status': 'ACTIVE/MONITORING/HISTORICAL', 'rationale': 'short justification for your choice'}" ), expected_output="JSON with keys clinical_status and rationale", agent=self.agent, json_mode=True, ) crew = Crew(agents=[self.agent], tasks=[task], process=Process.sequential, verbose=True) result = crew.kickoff() result = json.loads(repair_json(result)) return { **diagnosis_entry, "clinical_status": result.get("clinical_status"), "status_rationale": result.get("rationale", "") } def run(self, confirmed_diagnoses: list[dict]) -> list[dict]: """Loop through all confirmed (yes) diagnoses and determine their clinical status.""" enriched_results = [] for entry in confirmed_diagnoses: # CORRECTED: Checks for both explicit and implicit 'yes' answers if entry.get("answer_explicit", "").lower() == "yes" or entry.get("answer_implicit", "").lower() == "yes": print(f"\n[INFO] Determining clinical status for: {entry['diagnosis']} ({entry['icd10']})") enriched = self.determine_status_one(entry) enriched_results.append(enriched) print(f"[STATUS RESULT] {enriched}") else: enriched_results.append(entry) # Pass through non-confirmed diagnoses return enriched_results