File size: 7,289 Bytes
6dc9d46
 
 
 
 
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
 
9659593
6dc9d46
 
 
 
 
696f787
6dc9d46
 
 
9659593
6dc9d46
 
9659593
6dc9d46
 
 
9659593
6dc9d46
9659593
696f787
9659593
 
 
696f787
6dc9d46
9659593
696f787
6dc9d46
 
9659593
696f787
aefac4f
696f787
6dc9d46
 
 
 
 
 
 
aefac4f
9659593
 
6dc9d46
696f787
6dc9d46
aefac4f
696f787
9659593
696f787
6dc9d46
9659593
696f787
6dc9d46
696f787
6dc9d46
9659593
 
696f787
6dc9d46
9659593
696f787
6dc9d46
9659593
696f787
6dc9d46
 
 
696f787
6dc9d46
696f787
6dc9d46
696f787
aefac4f
6dc9d46
9659593
6dc9d46
aefac4f
 
 
696f787
9659593
6dc9d46
696f787
9659593
 
 
 
696f787
6dc9d46
 
696f787
aefac4f
6dc9d46
 
9659593
aefac4f
 
 
 
 
 
6dc9d46
 
 
 
aefac4f
696f787
6dc9d46
9659593
696f787
aefac4f
6dc9d46
 
 
 
9659593
6dc9d46
aefac4f
 
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
 
 
 
9659593
6dc9d46
9659593
6dc9d46
 
696f787
6dc9d46
696f787
6dc9d46
 
9659593
696f787
6dc9d46
9659593
6dc9d46
9659593
6dc9d46
 
 
696f787
6dc9d46
 
696f787
6dc9d46
 
696f787
6dc9d46
9659593
6dc9d46
 
696f787
6dc9d46
 
 
 
 
 
 
 
 
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""
MediGuard AI RAG-Helper
Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
"""

from src.llm_config import llm_config
from src.state import AgentOutput, GuildState, KeyDriver


class BiomarkerDiseaseLinkerAgent:
    """Agent that links specific biomarker values to the predicted disease"""

    def __init__(self, retriever):
        """
        Initialize with a retriever for biomarker-disease connections.

        Args:
            retriever: Vector store retriever for biomarker evidence
        """
        self.retriever = retriever
        self.llm = llm_config.explainer

    def link(self, state: GuildState) -> GuildState:
        """
        Link biomarkers to disease prediction.

        Args:
            state: Current guild state

        Returns:
            Updated state with biomarker-disease links
        """
        print("\n" + "=" * 70)
        print("EXECUTING: Biomarker-Disease Linker Agent (RAG)")
        print("=" * 70)

        model_prediction = state["model_prediction"]
        disease = model_prediction["disease"]
        biomarkers = state["patient_biomarkers"]

        # Get biomarker analysis from previous agent
        biomarker_analysis = state.get("biomarker_analysis") or {}

        # Identify key drivers
        print(f"\nIdentifying key drivers for {disease}...")
        key_drivers, citations_missing = self._identify_key_drivers(disease, biomarkers, biomarker_analysis, state)

        print(f"Identified {len(key_drivers)} key biomarker drivers")

        # Create agent output
        output = AgentOutput(
            agent_name="Biomarker-Disease Linker",
            findings={
                "disease": disease,
                "key_drivers": [kd.model_dump() for kd in key_drivers],
                "total_drivers": len(key_drivers),
                "feature_importance_calculated": True,
                "citations_missing": citations_missing,
            },
        )

        # Update state
        print("\nBiomarker-disease linking complete")

        return {"agent_outputs": [output]}

    def _identify_key_drivers(
        self, disease: str, biomarkers: dict[str, float], analysis: dict, state: GuildState
    ) -> tuple[list[KeyDriver], bool]:
        """Identify which biomarkers are driving the disease prediction"""

        # Get out-of-range biomarkers from analysis
        flags = analysis.get("biomarker_flags", [])
        abnormal_biomarkers = [f for f in flags if f["status"] != "NORMAL"]

        # Get disease-relevant biomarkers
        relevant = analysis.get("relevant_biomarkers", [])

        # Focus on biomarkers that are both abnormal AND disease-relevant
        key_biomarkers = [f for f in abnormal_biomarkers if f["name"] in relevant]

        # If no key biomarkers found, use top abnormal ones
        if not key_biomarkers:
            key_biomarkers = abnormal_biomarkers[:5]

        print(f"  Analyzing {len(key_biomarkers)} key biomarkers...")

        # Generate key drivers with evidence
        key_drivers: list[KeyDriver] = []
        citations_missing = False
        for biomarker_flag in key_biomarkers[:5]:  # Top 5
            driver, driver_missing = self._create_key_driver(biomarker_flag, disease, state)
            key_drivers.append(driver)
            citations_missing = citations_missing or driver_missing

        return key_drivers, citations_missing

    def _create_key_driver(self, biomarker_flag: dict, disease: str, state: GuildState) -> tuple[KeyDriver, bool]:
        """Create a KeyDriver object with evidence from RAG"""

        name = biomarker_flag["name"]
        value = biomarker_flag["value"]
        unit = biomarker_flag["unit"]
        status = biomarker_flag["status"]

        # Retrieve evidence linking this biomarker to the disease
        query = f"How does {name} relate to {disease}? What does {status} {name} indicate?"

        citations_missing = False
        try:
            docs = self.retriever.invoke(query)
            if state["sop"].require_pdf_citations and not docs:
                evidence_text = "Insufficient evidence available in the knowledge base."
                contribution = "Unknown"
                citations_missing = True
            else:
                evidence_text = self._extract_evidence(docs, name, disease)
                contribution = self._estimate_contribution(biomarker_flag, len(docs))
        except Exception as e:
            print(f"  Warning: Evidence retrieval failed for {name}: {e}")
            evidence_text = f"{status} {name} may be related to {disease}."
            contribution = "Unknown"
            citations_missing = True

        # Generate explanation using LLM
        explanation = self._generate_explanation(name, value, unit, status, disease, evidence_text)

        driver = KeyDriver(
            biomarker=name,
            value=value,
            contribution=contribution,
            explanation=explanation,
            evidence=evidence_text[:500],  # Truncate long evidence
        )

        return driver, citations_missing

    def _extract_evidence(self, docs: list, biomarker: str, disease: str) -> str:
        """Extract relevant evidence from retrieved documents"""
        if not docs:
            return f"Limited evidence available for {biomarker} in {disease}."

        # Combine relevant passages
        evidence = []
        for doc in docs[:2]:  # Top 2 docs
            content = doc.page_content
            # Extract sentences mentioning the biomarker
            sentences = content.split(".")
            relevant_sentences = [
                s.strip() for s in sentences if biomarker.lower() in s.lower() or disease.lower() in s.lower()
            ]
            evidence.extend(relevant_sentences[:2])

        return ". ".join(evidence[:3]) + "." if evidence else content[:300]

    def _estimate_contribution(self, biomarker_flag: dict, doc_count: int) -> str:
        """Estimate the contribution percentage (simplified)"""
        status = biomarker_flag["status"]

        # Simple heuristic based on severity
        if "CRITICAL" in status:
            base = 40
        elif status in ["HIGH", "LOW"]:
            base = 25
        else:
            base = 10

        # Adjust based on evidence strength
        evidence_boost = min(doc_count * 2, 15)

        total = min(base + evidence_boost, 60)
        return f"{total}%"

    def _generate_explanation(
        self, biomarker: str, value: float, unit: str, status: str, disease: str, evidence: str
    ) -> str:
        """Generate patient-friendly explanation"""

        prompt = f"""Explain in 1-2 sentences how this biomarker result relates to {disease}:

Biomarker: {biomarker}
Value: {value} {unit}
Status: {status}

Medical Evidence: {evidence}

Write in patient-friendly language, explaining what this means for the diagnosis."""

        try:
            response = self.llm.invoke(prompt)
            return response.content.strip()
        except Exception:
            return f"{biomarker} at {value} {unit} is {status}, which may be associated with {disease}."


def create_biomarker_linker_agent(retriever):
    """Factory function to create agent with retriever"""
    return BiomarkerDiseaseLinkerAgent(retriever)