Spaces:
Sleeping
Sleeping
| """ | |
| MediGuard AI RAG-Helper | |
| Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease | |
| """ | |
| from src.llm_config import llm_config | |
| from src.state import AgentOutput, GuildState, KeyDriver | |
| class BiomarkerDiseaseLinkerAgent: | |
| """Agent that links specific biomarker values to the predicted disease""" | |
| def __init__(self, retriever): | |
| """ | |
| Initialize with a retriever for biomarker-disease connections. | |
| Args: | |
| retriever: Vector store retriever for biomarker evidence | |
| """ | |
| self.retriever = retriever | |
| self.llm = llm_config.explainer | |
| def link(self, state: GuildState) -> GuildState: | |
| """ | |
| Link biomarkers to disease prediction. | |
| Args: | |
| state: Current guild state | |
| Returns: | |
| Updated state with biomarker-disease links | |
| """ | |
| print("\n" + "=" * 70) | |
| print("EXECUTING: Biomarker-Disease Linker Agent (RAG)") | |
| print("=" * 70) | |
| model_prediction = state["model_prediction"] | |
| disease = model_prediction["disease"] | |
| biomarkers = state["patient_biomarkers"] | |
| # Get biomarker analysis from previous agent | |
| biomarker_analysis = state.get("biomarker_analysis") or {} | |
| # Identify key drivers | |
| print(f"\nIdentifying key drivers for {disease}...") | |
| key_drivers, citations_missing = self._identify_key_drivers(disease, biomarkers, biomarker_analysis, state) | |
| print(f"Identified {len(key_drivers)} key biomarker drivers") | |
| # Create agent output | |
| output = AgentOutput( | |
| agent_name="Biomarker-Disease Linker", | |
| findings={ | |
| "disease": disease, | |
| "key_drivers": [kd.model_dump() for kd in key_drivers], | |
| "total_drivers": len(key_drivers), | |
| "feature_importance_calculated": True, | |
| "citations_missing": citations_missing, | |
| }, | |
| ) | |
| # Update state | |
| print("\nBiomarker-disease linking complete") | |
| return {"agent_outputs": [output]} | |
| def _identify_key_drivers( | |
| self, disease: str, biomarkers: dict[str, float], analysis: dict, state: GuildState | |
| ) -> tuple[list[KeyDriver], bool]: | |
| """Identify which biomarkers are driving the disease prediction""" | |
| # Get out-of-range biomarkers from analysis | |
| flags = analysis.get("biomarker_flags", []) | |
| abnormal_biomarkers = [f for f in flags if f["status"] != "NORMAL"] | |
| # Get disease-relevant biomarkers | |
| relevant = analysis.get("relevant_biomarkers", []) | |
| # Focus on biomarkers that are both abnormal AND disease-relevant | |
| key_biomarkers = [f for f in abnormal_biomarkers if f["name"] in relevant] | |
| # If no key biomarkers found, use top abnormal ones | |
| if not key_biomarkers: | |
| key_biomarkers = abnormal_biomarkers[:5] | |
| print(f" Analyzing {len(key_biomarkers)} key biomarkers...") | |
| # Generate key drivers with evidence | |
| key_drivers: list[KeyDriver] = [] | |
| citations_missing = False | |
| for biomarker_flag in key_biomarkers[:5]: # Top 5 | |
| driver, driver_missing = self._create_key_driver(biomarker_flag, disease, state) | |
| key_drivers.append(driver) | |
| citations_missing = citations_missing or driver_missing | |
| return key_drivers, citations_missing | |
| def _create_key_driver(self, biomarker_flag: dict, disease: str, state: GuildState) -> tuple[KeyDriver, bool]: | |
| """Create a KeyDriver object with evidence from RAG""" | |
| name = biomarker_flag["name"] | |
| value = biomarker_flag["value"] | |
| unit = biomarker_flag["unit"] | |
| status = biomarker_flag["status"] | |
| # Retrieve evidence linking this biomarker to the disease | |
| query = f"How does {name} relate to {disease}? What does {status} {name} indicate?" | |
| citations_missing = False | |
| try: | |
| docs = self.retriever.invoke(query) | |
| if state["sop"].require_pdf_citations and not docs: | |
| evidence_text = "Insufficient evidence available in the knowledge base." | |
| contribution = "Unknown" | |
| citations_missing = True | |
| else: | |
| evidence_text = self._extract_evidence(docs, name, disease) | |
| contribution = self._estimate_contribution(biomarker_flag, len(docs)) | |
| except Exception as e: | |
| print(f" Warning: Evidence retrieval failed for {name}: {e}") | |
| evidence_text = f"{status} {name} may be related to {disease}." | |
| contribution = "Unknown" | |
| citations_missing = True | |
| # Generate explanation using LLM | |
| explanation = self._generate_explanation(name, value, unit, status, disease, evidence_text) | |
| driver = KeyDriver( | |
| biomarker=name, | |
| value=value, | |
| contribution=contribution, | |
| explanation=explanation, | |
| evidence=evidence_text[:500], # Truncate long evidence | |
| ) | |
| return driver, citations_missing | |
| def _extract_evidence(self, docs: list, biomarker: str, disease: str) -> str: | |
| """Extract relevant evidence from retrieved documents""" | |
| if not docs: | |
| return f"Limited evidence available for {biomarker} in {disease}." | |
| # Combine relevant passages | |
| evidence = [] | |
| for doc in docs[:2]: # Top 2 docs | |
| content = doc.page_content | |
| # Extract sentences mentioning the biomarker | |
| sentences = content.split(".") | |
| relevant_sentences = [ | |
| s.strip() for s in sentences if biomarker.lower() in s.lower() or disease.lower() in s.lower() | |
| ] | |
| evidence.extend(relevant_sentences[:2]) | |
| return ". ".join(evidence[:3]) + "." if evidence else content[:300] | |
| def _estimate_contribution(self, biomarker_flag: dict, doc_count: int) -> str: | |
| """Estimate the contribution percentage (simplified)""" | |
| status = biomarker_flag["status"] | |
| # Simple heuristic based on severity | |
| if "CRITICAL" in status: | |
| base = 40 | |
| elif status in ["HIGH", "LOW"]: | |
| base = 25 | |
| else: | |
| base = 10 | |
| # Adjust based on evidence strength | |
| evidence_boost = min(doc_count * 2, 15) | |
| total = min(base + evidence_boost, 60) | |
| return f"{total}%" | |
| def _generate_explanation( | |
| self, biomarker: str, value: float, unit: str, status: str, disease: str, evidence: str | |
| ) -> str: | |
| """Generate patient-friendly explanation""" | |
| prompt = f"""Explain in 1-2 sentences how this biomarker result relates to {disease}: | |
| Biomarker: {biomarker} | |
| Value: {value} {unit} | |
| Status: {status} | |
| Medical Evidence: {evidence} | |
| Write in patient-friendly language, explaining what this means for the diagnosis.""" | |
| try: | |
| response = self.llm.invoke(prompt) | |
| return response.content.strip() | |
| except Exception: | |
| return f"{biomarker} at {value} {unit} is {status}, which may be associated with {disease}." | |
| def create_biomarker_linker_agent(retriever): | |
| """Factory function to create agent with retriever""" | |
| return BiomarkerDiseaseLinkerAgent(retriever) | |