Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import pandas as pd | |
| from PyPDF2 import PdfReader | |
| from json_repair import repair_json | |
| from typing import List, Dict, Any | |
| from typing import List, Dict, Any, Optional | |
| from crewai import Agent, Task, Crew, Process | |
| from crewai_tools import SerperDevTool | |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from embedding_manager import DirectoryEmbeddingManager | |
| # ---------- Chart Checker ---------- | |
| class ChartDiagnosisChecker: | |
| def __init__(self, pdf_dir: str, model: str = "gpt-4o"): | |
| self.embed_manager = DirectoryEmbeddingManager(pdf_dir) | |
| self.llm = ChatOpenAI(model=model, temperature=0) | |
| self.agent = Agent( | |
| role="Patient Chart Diagnosis Checker", | |
| goal="Verify whether specific diagnoses are documented in the patient chart.", | |
| backstory="You are a medical chart reviewer specialized in confirming diagnoses for HCC coding validation.", | |
| verbose=True, | |
| memory=False, | |
| llm=self.llm, | |
| ) | |
| def check_one(self, diagnosis_entry: dict) -> dict: | |
| """Check if the patient has the given diagnosis (Yes/No).""" | |
| diagnosis = diagnosis_entry["diagnosis"] | |
| icd10 = diagnosis_entry["icd10"] | |
| ref = diagnosis_entry.get("reference", "N/A") | |
| # Build question | |
| question = f"Does the patient have {diagnosis} (ICD-10: {icd10})?" | |
| # Query patient chart embeddings | |
| context = self.embed_manager.query(diagnosis, top_k=15) | |
| task = Task( | |
| description=( | |
| f"Diagnosis to validate: {diagnosis} (ICD-10: {icd10})\n\n" | |
| f"Reference: {ref}\n\n" | |
| f"Patient chart excerpts:\n{context}\n\n" | |
| f"Does the patient have {diagnosis} from the patient contexts provided" | |
| "Answer Yes or No, with a short justification using ONLY the provided chart text.\n" | |
| "Also check whether the diagnois can be inferred implicitly like by the medication taken by patient" | |
| "or any other means. Compare the drugs and tests done by the patient with those required for the diagnosis" | |
| "Output must be valid JSON in the form:\n" | |
| "{'answer_explicit': 'yes/no', 'rationale_explicit': 'one-line rationale', answer_implicit': 'yes/no', 'rationale_implicit': 'one-line rationale'}" | |
| ), | |
| expected_output="JSON with keys answer and rationale", | |
| agent=self.agent, | |
| json_mode=True, | |
| ) | |
| crew = Crew(agents=[self.agent], tasks=[task], process=Process.sequential, verbose=True) | |
| result = crew.kickoff() | |
| result = eval(repair_json(result)) | |
| # Enrich output | |
| return { | |
| "diagnosis": diagnosis, | |
| "icd10": icd10, | |
| "reference": ref, | |
| "answer_explicit": result.get("answer_explicit", "unknown"), | |
| "rationale_explicit": result.get("rationale_explicit", ""), | |
| "answer_implicit": result.get("answer_implicit", "unknown"), | |
| "rationale_implicit": result.get("rationale_implicit", ""), | |
| "context": context | |
| } | |
| def run(self, diagnoses: list[dict]) -> list[dict]: | |
| """Loop over all diagnoses and check them in the chart.""" | |
| results = [] | |
| for entry in diagnoses: | |
| print(f"\n[INFO] Checking: {entry['diagnosis']} ({entry['icd10']})") | |
| result = self.check_one(entry) | |
| results.append(result) | |
| print(f"[ANSWER] {result}") | |
| return results | |