Risk-Adjustment-Version1

Sleeping

App Files Files Community

sujataprakashdatycs commited on Oct 1, 2025

Commit

a7fab2a

verified ·

1 Parent(s): baef59a

Update ComorbidityCheckerAgent.py

Browse files

Files changed (1) hide show

ComorbidityCheckerAgent.py +231 -107

ComorbidityCheckerAgent.py CHANGED Viewed

@@ -1,168 +1,292 @@
-import os
 import json
-import pandas as pd
-from PyPDF2 import PdfReader
 from json_repair import repair_json
-from typing import List, Dict, Any, Optional
 from crewai import Agent, Task, Crew, Process
-from crewai_tools import SerperDevTool,tool
 from langchain_openai import ChatOpenAI
-from langchain_community.vectorstores import Chroma
-from embedding_manager import DirectoryEmbeddingManager
-SEED_SOURCES = [
-    "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment",
-    "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program",
-    "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf",
-    "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/",
-]
 class ComorbidityCheckerAgent:
     """
-    Uses a two-step AI agent process to first identify potential comorbidities for a
-    MEAT-validated diagnosis and then verifies each one against the patient chart context.
     """
-    def __init__(self, pdf_dir: str, hcc_code: str, model_version: str):
-        self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
-        self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4-turbo"), temperature=0)
         self.hcc_code = hcc_code.strip()
         self.model_version = model_version.strip().upper()
-        self.search_tool = SerperDevTool()
-        #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES)
         @tool("patient_chart_search")
         def patient_chart_search(query: str) -> str:
             """
-            Search the patient chart embeddings and return all top 15 results as a single string.
-            Each result is preserved individually and then combined at the end.
             """
             print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
             vectordb = self.embed_manager.get_or_create_embeddings()
             results = vectordb.similarity_search(query, k=15)
-            # Keep all 15 results separate internally
-            all_results = [res.page_content for res in results]
-            # Combine into a single string for output (same format as before)
-            combined_results = "\n---\n".join(all_results)
-            return combined_results
-        # Register the agent with the tool
         self.agent = Agent(
             role="Clinical Coding and Comorbidity Analyst",
             goal=(
-                "First, identify all clinically significant comorbidities for a given primary diagnosis, "
-                "focusing on those relevant to HCC risk adjustment. Second, verify the presence of "
-                "these comorbidities in a patient's chart and present the findings in a structured JSON format."
             ),
             backstory=(
-                "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). "
-                "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings "
-                "to ensure accurate documentation and coding. You provide clear, evidence-based findings."
             ),
-            tools=[self.search_tool,patient_chart_search],
             verbose=True,
             memory=False,
             llm=self.llm,
         )
-    def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
-        """
-        Orchestrates the two-task process for a single primary diagnosis.
-        """
         primary_diagnosis = diagnosis_entry["diagnosis"]
-        final_result = {"diagnosis": primary_diagnosis, "comorbidities": []}
-        # --- Task 1: Identify Potential Comorbidities ---
         identify_task = Task(
             description=(
-                f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically "
-                f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment "
-                f"in the {self.model_version} model. Use your search tool for research if needed."
-            ),
-            expected_output=(
-                "A JSON object with a single key 'potential_comorbidities' containing a list of strings. "
-                "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}"
             ),
             agent=self.agent,
             json_mode=True
         )
-        print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...")
         crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
-        result = crew.kickoff()
         try:
-            comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", [])
-            if not comorbidities_to_check:
-                print("[INFO] No potential comorbidities were identified by the agent.")
-                return final_result
-            print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}")
-        except (json.JSONDecodeError, TypeError):
-            print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.")
             return final_result
-        # --- Task 2: Verify Each Comorbidity in the Chart ---
         verify_task = Task(
-        description=(
-            f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n"
-            f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n"
-            "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent "
-            "embedding database of the patient's chart. **Use all 15 retrieved results individually** to "
-            "determine presence or absence of each comorbidity.\n\n"
-            "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. "
-            "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should "
-            "combine all relevant evidence snippets into a single string."
-        ),
-        expected_output=(
-            "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list "
-            "where each item has the structure: \n"
-            '{\n'
-            '  "condition": "<name of comorbidity>",\n'
-            '  "is_present": true/false,\n'
-            '  "context": "<Use all 15 retrieved results individually and combined them according to comorbidity>",\n'
-            '  "rationale": "<one-line explanation of your finding>"\n'
-            '}'
-        ),
-        agent=self.agent,
-        json_mode=True
-    )
-        print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...")
-        crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
-        result = crew.kickoff()
         try:
-            analysis = json.loads(repair_json(result))
             final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
-        except (json.JSONDecodeError, TypeError):
-            print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.")
             final_result["comorbidities"] = []
         return final_result
     def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
         """
-        Main execution loop. It iterates through diagnoses that have met MEAT criteria
-        and runs the comorbidity check for each.
         """
-        final_results = []
         for entry in meat_validated_results:
-            meat_criteria = entry.get("meat", {})
-            if isinstance(meat_criteria, dict) and any(meat_criteria.values()):
-                print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}")
-                entry_with_comorbidities = self.check_comorbidities_for_one(entry)
-                final_results.append(entry_with_comorbidities)
-                print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.")
             else:
-                if entry.get("answer", "").lower() == "yes":
                     entry["comorbidities"] = {
                         "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
                     }
-                final_results.append(entry)
-        return final_results

+# comorbidity_checker.py
 import json
+from typing import List, Dict
 from json_repair import repair_json
 from crewai import Agent, Task, Crew, Process
+from crewai_tools import tool, SerperDevTool
 from langchain_openai import ChatOpenAI
+from embedding_manager import DirectoryEmbeddingManager
 class ComorbidityCheckerAgent:
     """
+    Two-step flow:
+    1) Identify clinically significant comorbidities for the primary diagnosis (HCC-aware).
+    2) Verify each comorbidity against the patient chart embeddings (top-15).
     """
+    def __init__(self, pdf_dir_or_file: str, hcc_code: str, model_version: str, model: str = "gpt-4o"):
+        self.embed_manager = DirectoryEmbeddingManager(pdf_dir_or_file)
+        self.llm = ChatOpenAI(model=model, temperature=0)
         self.hcc_code = hcc_code.strip()
         self.model_version = model_version.strip().upper()
+        self.search_tool = SerperDevTool()  # available if you want to expand later
         @tool("patient_chart_search")
         def patient_chart_search(query: str) -> str:
             """
+            Query persistent patient-chart embeddings.
+            Returns the top-15 results concatenated with separators.
             """
             print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
             vectordb = self.embed_manager.get_or_create_embeddings()
             results = vectordb.similarity_search(query, k=15)
+            return "\n---\n".join([res.page_content for res in results])
+        self.patient_chart_search = patient_chart_search
         self.agent = Agent(
             role="Clinical Coding and Comorbidity Analyst",
             goal=(
+                "Identify clinically significant comorbidities for a primary diagnosis relevant to HCC; "
+                "verify presence in the patient's chart with embeddings."
             ),
             backstory=(
+                "Expert risk-adjustment analyst who cross-references guidelines with chart evidence."
             ),
+            tools=[self.patient_chart_search],
             verbose=True,
             memory=False,
             llm=self.llm,
         )
+    def _check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
         primary_diagnosis = diagnosis_entry["diagnosis"]
+        final_result: Dict = {"diagnosis": primary_diagnosis, "comorbidities": []}
+        # Task 1 — Identify comorbidities
         identify_task = Task(
             description=(
+                f"For primary diagnosis '{primary_diagnosis}', list common and clinically meaningful comorbidities "
+                f"that matter for HCC {self.hcc_code} in {self.model_version}."
+                "Return STRICT JSON: {\"potential_comorbidities\": [\"...\"]}"
             ),
+            expected_output="Strict JSON with key potential_comorbidities (list of strings).",
             agent=self.agent,
             json_mode=True
         )
         crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
+        identified = crew.kickoff()
         try:
+            comorbidities = json.loads(repair_json(identified)).get("potential_comorbidities", [])
+        except Exception:
+            comorbidities = []
+        if not comorbidities:
             return final_result
+        # Task 2 — Verify each comorbidity via patient_chart_search
         verify_task = Task(
+            description=(
+                f"Primary diagnosis: '{primary_diagnosis}'. Potential comorbidities: {comorbidities}.\n"
+                "For EACH comorbidity, call the patient_chart_search tool (top-15). "
+                "Decide presence/absence using ONLY returned snippets.\n\n"
+                "Return STRICT JSON:\n"
+                "{ \"comorbidity_analysis\": [\n"
+                "  {\"condition\":\"...\",\"is_present\":true/false,\"context\":\"<combined snippets>\",\"rationale\":\"...\"},\n"
+                "  ... ] }"
+            ),
+            expected_output="Strict JSON with key comorbidity_analysis (list of objects).",
+            agent=self.agent,
+            json_mode=True
+        )
+        crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
+        verified = crew.kickoff()
         try:
+            analysis = json.loads(repair_json(verified))
             final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
+        except Exception:
             final_result["comorbidities"] = []
         return final_result
     def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
         """
+        Accepts entries that already passed MEAT (i.e., meat dict exists and has True somewhere).
         """
+        out: List[Dict] = []
         for entry in meat_validated_results:
+            meat = entry.get("meat", {})
+            if isinstance(meat, dict) and any(meat.values()):
+                print(f"[INFO] Checking structured comorbidities for: {entry['diagnosis']}")
+                out.append(self._check_comorbidities_for_one(entry))
             else:
+                # If earlier stages claim 'yes' but MEAT not met, pass through with a note.
+                if entry.get("answer_explicit", "").lower() == "yes" or entry.get("answer_implicit", "").lower() == "yes":
                     entry["comorbidities"] = {
                         "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
                     }
+                out.append(entry)
+        return out
+# import os
+# import json
+# import pandas as pd
+# from PyPDF2 import PdfReader
+# from json_repair import repair_json
+# from typing import List, Dict, Any, Optional
+# from crewai import Agent, Task, Crew, Process
+# from crewai_tools import SerperDevTool,tool
+# from langchain_openai import ChatOpenAI
+# from langchain_community.vectorstores import Chroma
+# from embedding_manager import DirectoryEmbeddingManager
+# SEED_SOURCES = [
+#     "https://www.cms.gov/medicare/payment/medicare-advantage-rates-statistics/risk-adjustment",
+#     "https://www.cms.gov/data-research/monitoring-programs/medicare-risk-adjustment-data-validation-program",
+#     "https://www.cms.gov/files/document/fy-2024-icd-10-cm-coding-guidelines-updated-02/01/2024.pdf",
+#     "https://www.aapc.com/blog/41212-include-meat-in-your-risk-adjustment-documentation/",
+# ]
+# class ComorbidityCheckerAgent:
+#     """
+#     Uses a two-step AI agent process to first identify potential comorbidities for a
+#     MEAT-validated diagnosis and then verifies each one against the patient chart context.
+#     """
+#     def __init__(self, pdf_dir: str, hcc_code: str, model_version: str):
+#         self.embed_manager = DirectoryEmbeddingManager(pdf_dir)
+#         self.llm = ChatOpenAI(model=os.environ.get("OPENAI_MODEL_NAME", "gpt-4o"), temperature=0)
+#         self.hcc_code = hcc_code.strip()
+#         self.model_version = model_version.strip().upper()
+#         self.search_tool = SerperDevTool()
+#         #self.search_tool = SerperDevTool(seed_sources=SEED_SOURCES)
+#         @tool("patient_chart_search")
+#         def patient_chart_search(query: str) -> str:
+#             """
+#             Search the patient chart embeddings and return all top 15 results as a single string.
+#             Each result is preserved individually and then combined at the end.
+#             """
+#             print(f"\n[TOOL LOG] Searching patient chart for: '{query}'")
+#             vectordb = self.embed_manager.get_or_create_embeddings()
+#             results = vectordb.similarity_search(query, k=15)
+#             # Keep all 15 results separate internally
+#             all_results = [res.page_content for res in results]
+#             # Combine into a single string for output (same format as before)
+#             combined_results = "\n---\n".join(all_results)
+#             return combined_results
+#         # Register the agent with the tool
+#         self.agent = Agent(
+#             role="Clinical Coding and Comorbidity Analyst",
+#             goal=(
+#                 "First, identify all clinically significant comorbidities for a given primary diagnosis, "
+#                 "focusing on those relevant to HCC risk adjustment. Second, verify the presence of "
+#                 "these comorbidities in a patient's chart and present the findings in a structured JSON format."
+#             ),
+#             backstory=(
+#                 "You are an expert clinical coding analyst specializing in risk adjustment and Hierarchical Condition Categories (HCC). "
+#                 "Your primary skill is to research disease patterns and then meticulously cross-reference them with patient records embeddings "
+#                 "to ensure accurate documentation and coding. You provide clear, evidence-based findings."
+#             ),
+#             tools=[patient_chart_search],
+#             verbose=True,
+#             memory=False,
+#             llm=self.llm,
+#         )
+#     def check_comorbidities_for_one(self, diagnosis_entry: Dict) -> Dict:
+#         """
+#         Orchestrates the two-task process for a single primary diagnosis.
+#         """
+#         primary_diagnosis = diagnosis_entry["diagnosis"]
+#         final_result = {"diagnosis": primary_diagnosis, "comorbidities": []}
+#         # --- Task 1: Identify Potential Comorbidities ---
+#         identify_task = Task(
+#             description=(
+#                 f"For the primary diagnosis of '{primary_diagnosis}', generate a full list of common and clinically "
+#                 f"significant comorbidities. Focus on conditions relevant for HCC {self.hcc_code} risk adjustment "
+#                 f"in the {self.model_version} model. Use your search tool for research if needed."
+#             ),
+#             expected_output=(
+#                 "A JSON object with a single key 'potential_comorbidities' containing a list of strings. "
+#                 "Example: {\"potential_comorbidities\": [\"Hypertension\", \"Diabetes Mellitus Type 2\"]}"
+#             ),
+#             agent=self.agent,
+#             json_mode=True
+#         )
+#         print(f"\n[TASK 1] Identifying potential comorbidities for '{primary_diagnosis}'...")
+#         crew = Crew(agents=[self.agent], tasks=[identify_task], process=Process.sequential)
+#         result = crew.kickoff()
+#         try:
+#             comorbidities_to_check = json.loads(repair_json(result)).get("potential_comorbidities", [])
+#             if not comorbidities_to_check:
+#                 print("[INFO] No potential comorbidities were identified by the agent.")
+#                 return final_result
+#             print(f"[INFO] Identified potential comorbidities: {comorbidities_to_check}")
+#         except (json.JSONDecodeError, TypeError):
+#             print("[ERROR] Failed to decode the list of potential comorbidities. Aborting.")
+#             return final_result
+#         # --- Task 2: Verify Each Comorbidity in the Chart ---
+#         verify_task = Task(
+#         description=(
+#             f"The patient has a primary diagnosis of '{primary_diagnosis}'.\n"
+#             f"A list of potential comorbidities has been identified: {comorbidities_to_check}.\n\n"
+#             "For EACH comorbidity, you MUST use the `patient_chart_search` tool, which queries the persistent "
+#             "embedding database of the patient's chart. **Use all 15 retrieved results individually** to "
+#             "determine presence or absence of each comorbidity.\n\n"
+#             "After reviewing all results, construct a final JSON object with a single key 'comorbidity_analysis'. "
+#             "Ensure there is one object for EACH comorbidity from the initial list. The 'context' field should "
+#             "combine all relevant evidence snippets into a single string."
+#         ),
+#         expected_output=(
+#             "A final JSON object with the key 'comorbidity_analysis'. This key should contain a list "
+#             "where each item has the structure: \n"
+#             '{\n'
+#             '  "condition": "<name of comorbidity>",\n'
+#             '  "is_present": true/false,\n'
+#             '  "context": "<Use all 15 retrieved results individually and combined them according to comorbidity>",\n'
+#             '  "rationale": "<one-line explanation of your finding>"\n'
+#             '}'
+#         ),
+#         agent=self.agent,
+#         json_mode=True
+#     )
+#         print(f"\n[TASK 2] Verifying identified comorbidities in the patient chart...")
+#         crew = Crew(agents=[self.agent], tasks=[verify_task], process=Process.sequential)
+#         result = crew.kickoff()
+#         try:
+#             analysis = json.loads(repair_json(result))
+#             final_result["comorbidities"] = analysis.get("comorbidity_analysis", [])
+#         except (json.JSONDecodeError, TypeError):
+#             print(f"[ERROR] Failed to decode the final comorbidity analysis for '{primary_diagnosis}'.")
+#             final_result["comorbidities"] = []
+#         return final_result
+#     def run(self, meat_validated_results: List[Dict]) -> List[Dict]:
+#         """
+#         Main execution loop. It iterates through diagnoses that have met MEAT criteria
+#         and runs the comorbidity check for each.
+#         """
+#         final_results = []
+#         for entry in meat_validated_results:
+#             meat_criteria = entry.get("meat", {})
+#             if isinstance(meat_criteria, dict) and any(meat_criteria.values()):
+#                 print(f"\n[INFO] Checking for structured comorbidities for: {entry['diagnosis']}")
+#                 entry_with_comorbidities = self.check_comorbidities_for_one(entry)
+#                 final_results.append(entry_with_comorbidities)
+#                 print(f"[COMORBIDITIES CHECKED] Analysis complete for {entry['diagnosis']}.")
+#             else:
+#                 if entry.get("answer", "").lower() == "yes":
+#                     entry["comorbidities"] = {
+#                         "status": "MEAT criteria not met; not proceeding with comorbidity analysis."
+#                     }
+#                 final_results.append(entry)
+#         return final_results