Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import pandas as pd | |
| from PyPDF2 import PdfReader | |
| from json_repair import repair_json | |
| from typing import List, Dict, Any, Optional | |
| from crewai import Agent, Task, Crew, Process | |
| from crewai_tools import SerperDevTool | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.vectorstores import Chroma | |
| SEED_SOURCES = [ | |
| "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment", | |
| "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program", | |
| "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf", | |
| "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/", | |
| ] | |
| class TestFindingAgent: | |
| def __init__(self, hcc_code: str, model_version: str, | |
| model: str = "gpt-4o", output_file: Optional[str] = None): | |
| self.hcc_code = hcc_code.strip() | |
| self.model_version = model_version.strip().upper() | |
| self.llm = ChatOpenAI(model=model, temperature=0) | |
| self.search = SerperDevTool(seed_sources=SEED_SOURCES) | |
| safe_code = self.hcc_code.lower().replace(" ", "_") | |
| safe_ver = self.model_version.lower() | |
| self.output_file = output_file or f"{safe_code}_{safe_ver}_tests.json" | |
| self.agent = Agent( | |
| role="HCC Test & Procedure Extractor", | |
| goal="For each HCC diagnosis, find labs, procedures, and vitals required to support it.", | |
| backstory=( | |
| "You specialize in mapping diagnoses to supporting labs, vitals, and procedures. " | |
| "You always rely on CMS/AAPC sources to find the tests required for the diagnosis for the hcc code and extract available values from the patient chart context." | |
| ), | |
| tools=[self.search], | |
| verbose=True, | |
| memory=False, | |
| llm=self.llm, | |
| ) | |
| def _extract_json_from_llm(self, raw_response: str) -> Dict[str, Any]: | |
| """Extracts and repairs JSON from an LLM response safely.""" | |
| import re | |
| match = re.search(r"\{.*\}", raw_response, re.DOTALL) | |
| if not match: | |
| print("[ERROR] No JSON object found in LLM response") | |
| return {} | |
| clean_json_str = match.group(0) | |
| # Step 1: Try direct JSON parse | |
| try: | |
| return json.loads(clean_json_str) | |
| except json.JSONDecodeError as e: | |
| print(f"[WARN] Direct JSON parsing failed: {e}") | |
| # Step 2: Try repairing JSON | |
| try: | |
| repaired = repair_json(clean_json_str) | |
| return json.loads(repaired) | |
| except Exception as e: | |
| print(f"[ERROR] Failed to repair and parse JSON: {e}") | |
| return {} | |
| def run(self, input_diagnoses: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
| updated_list = [] | |
| for diag in input_diagnoses: | |
| task = Task( | |
| description=( | |
| f"For HCC {self.hcc_code} ({self.model_version}), analyze this patient context:\n\n" | |
| f"{diag['context']} for the diagnosis {diag['diagnosis']}\n\n" | |
| "Instructions:\n" | |
| "- Identify all **lab tests, procedures, and vitals** that are required to validate this diagnosis for that hcc given per CMS/AAPC.\n" | |
| "- Extract actual values if present in the `context`. For example: BMI, blood pressure, HbA1c, lipids.\n" | |
| "- If something is not in the context, return an empty dict for that category.\n" | |
| "- Give the output as JSON given below:\n" | |
| " {\n" | |
| " 'vitals': {...},\n" | |
| " 'procedures': {...},\n" | |
| " 'lab_test': {...}\n" | |
| " }\n" | |
| "- Return the output as strict JSON only." | |
| ), | |
| expected_output="One JSON object: the updated diagnosis with `test` included.", | |
| agent=self.agent, | |
| json_mode=True, | |
| ) | |
| crew = Crew( | |
| agents=[self.agent], | |
| tasks=[task], | |
| process=Process.sequential, | |
| verbose=True | |
| ) | |
| result = crew.kickoff() | |
| # Use safe extractor | |
| result_dict = self._extract_json_from_llm(result) | |
| diag["tests"] = result_dict | |
| updated_list.append(diag) | |
| return updated_list | |