Risk-Adjustment-Version1 / chartdiagnosischecker.py
sujataprakashdatycs's picture
Update chartdiagnosischecker.py
6b0ff68 verified
import os
import json
import pandas as pd
from PyPDF2 import PdfReader
from json_repair import repair_json
from typing import List, Dict, Any
from typing import List, Dict, Any, Optional
from crewai import Agent, Task, Crew, Process
from crewai_tools import SerperDevTool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from embedding_manager import DirectoryEmbeddingManager
# ---------- Chart Checker ----------
class ChartDiagnosisChecker:
def __init__(self, pdf_dir: str, model: str = "gpt-4o"):
self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
self.llm = ChatOpenAI(model=model, temperature=0)
self.agent = Agent(
role="Patient Chart Diagnosis Checker",
goal="Verify whether specific diagnoses are documented in the patient chart.",
backstory="You are a medical chart reviewer specialized in confirming diagnoses for HCC coding validation.",
verbose=True,
memory=False,
llm=self.llm,
)
def check_one(self, diagnosis_entry: dict) -> dict:
"""Check if the patient has the given diagnosis (Yes/No)."""
diagnosis = diagnosis_entry["diagnosis"]
icd10 = diagnosis_entry["icd10"]
ref = diagnosis_entry.get("reference", "N/A")
# Build question
question = f"Does the patient have {diagnosis} (ICD-10: {icd10})?"
# Query patient chart embeddings
context = self.embed_manager.query(diagnosis, top_k=15)
task = Task(
description=(
f"Diagnosis to validate: {diagnosis} (ICD-10: {icd10})\n\n"
f"Reference: {ref}\n\n"
f"Patient chart excerpts:\n{context}\n\n"
f"Does the patient have {diagnosis} from the patient contexts provided"
"Answer Yes or No, with a short justification using ONLY the provided chart text.\n"
"Also check whether the diagnois can be inferred implicitly like by the medication taken by patient"
"or any other means. Compare the drugs and tests done by the patient with those required for the diagnosis"
"Output must be valid JSON in the form:\n"
"{'answer_explicit': 'yes/no', 'rationale_explicit': 'one-line rationale', answer_implicit': 'yes/no', 'rationale_implicit': 'one-line rationale'}"
),
expected_output="JSON with keys answer and rationale",
agent=self.agent,
json_mode=True,
)
crew = Crew(agents=[self.agent], tasks=[task], process=Process.sequential, verbose=True)
result = crew.kickoff()
result = eval(repair_json(result))
# Enrich output
return {
"diagnosis": diagnosis,
"icd10": icd10,
"reference": ref,
"answer_explicit": result.get("answer_explicit", "unknown"),
"rationale_explicit": result.get("rationale_explicit", ""),
"answer_implicit": result.get("answer_implicit", "unknown"),
"rationale_implicit": result.get("rationale_implicit", ""),
"context": context
}
def run(self, diagnoses: list[dict]) -> list[dict]:
"""Loop over all diagnoses and check them in the chart."""
results = []
for entry in diagnoses:
print(f"\n[INFO] Checking: {entry['diagnosis']} ({entry['icd10']})")
result = self.check_one(entry)
results.append(result)
print(f"[ANSWER] {result}")
return results