File size: 9,287 Bytes
6dc9d46
 
 
 
 
696f787
 
aefac4f
6dc9d46
696f787
6dc9d46
 
 
 
696f787
6dc9d46
 
696f787
6dc9d46
 
 
9659593
6dc9d46
 
9659593
6dc9d46
 
 
9659593
6dc9d46
9659593
696f787
9659593
 
 
 
 
696f787
6dc9d46
9659593
6dc9d46
 
696f787
6dc9d46
696f787
6dc9d46
9659593
696f787
6dc9d46
9659593
696f787
6dc9d46
9659593
696f787
6dc9d46
 
9659593
6dc9d46
696f787
6dc9d46
 
 
 
 
 
 
 
 
 
9659593
 
6dc9d46
696f787
6dc9d46
aefac4f
6dc9d46
 
 
696f787
9659593
696f787
6dc9d46
 
9659593
6dc9d46
 
 
696f787
6dc9d46
9659593
6dc9d46
 
696f787
6dc9d46
 
696f787
6dc9d46
9659593
 
6dc9d46
 
 
 
696f787
6dc9d46
9659593
6dc9d46
696f787
6dc9d46
9659593
6dc9d46
 
 
 
696f787
6dc9d46
 
 
 
 
 
 
696f787
6dc9d46
9659593
696f787
6dc9d46
 
696f787
6dc9d46
aefac4f
6dc9d46
 
 
696f787
6dc9d46
 
 
 
 
 
9659593
696f787
6dc9d46
9659593
 
 
6dc9d46
9659593
696f787
6dc9d46
9659593
6dc9d46
9659593
696f787
6dc9d46
696f787
9659593
6dc9d46
696f787
6dc9d46
696f787
6dc9d46
 
 
 
 
 
 
696f787
6dc9d46
 
 
 
 
 
 
696f787
6dc9d46
 
696f787
6dc9d46
 
 
 
 
 
 
696f787
6dc9d46
9659593
6dc9d46
 
696f787
6dc9d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696f787
6dc9d46
 
 
 
 
 
 
 
696f787
 
6dc9d46
 
696f787
6dc9d46
 
 
9659593
 
 
696f787
6dc9d46
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
"""
MediGuard AI RAG-Helper
Confidence Assessor Agent - Evaluates prediction reliability
"""

from typing import Any

from src.biomarker_validator import BiomarkerValidator
from src.llm_config import llm_config
from src.state import AgentOutput, GuildState


class ConfidenceAssessorAgent:
    """Agent that assesses the reliability and limitations of the prediction"""

    def __init__(self):
        self.llm = llm_config.analyzer

    def assess(self, state: GuildState) -> GuildState:
        """
        Assess prediction confidence and identify limitations.

        Args:
            state: Current guild state

        Returns:
            Updated state with confidence assessment
        """
        print("\n" + "=" * 70)
        print("EXECUTING: Confidence Assessor Agent")
        print("=" * 70)

        model_prediction = state["model_prediction"]
        disease = model_prediction["disease"]
        ml_confidence = model_prediction["confidence"]
        probabilities = model_prediction.get("probabilities", {})
        biomarkers = state["patient_biomarkers"]

        # Collect previous agent findings
        biomarker_analysis = state.get("biomarker_analysis") or {}
        disease_explanation = self._get_agent_findings(state, "Disease Explainer")
        linker_findings = self._get_agent_findings(state, "Biomarker-Disease Linker")

        print(f"\nAssessing confidence for {disease} prediction...")

        # Evaluate evidence strength
        evidence_strength = self._evaluate_evidence_strength(biomarker_analysis, disease_explanation, linker_findings)

        # Identify limitations
        limitations = self._identify_limitations(biomarkers, biomarker_analysis, probabilities)

        # Calculate aggregate reliability
        reliability = self._calculate_reliability(ml_confidence, evidence_strength, len(limitations))

        # Generate assessment summary
        assessment_summary = self._generate_assessment(
            disease, ml_confidence, reliability, evidence_strength, limitations
        )

        # Create agent output
        output = AgentOutput(
            agent_name="Confidence Assessor",
            findings={
                "prediction_reliability": reliability,
                "ml_confidence": ml_confidence,
                "evidence_strength": evidence_strength,
                "limitations": limitations,
                "assessment_summary": assessment_summary,
                "recommendation": self._get_recommendation(reliability),
                "alternative_diagnoses": self._get_alternatives(probabilities),
            },
        )

        # Update state
        print("\nConfidence assessment complete")
        print(f"  - Prediction reliability: {reliability}")
        print(f"  - Evidence strength: {evidence_strength}")
        print(f"  - Limitations identified: {len(limitations)}")

        return {"agent_outputs": [output]}

    def _get_agent_findings(self, state: GuildState, agent_name: str) -> dict:
        """Extract findings from a specific agent"""
        for output in state.get("agent_outputs", []):
            if output.agent_name == agent_name:
                return output.findings
        return {}

    def _evaluate_evidence_strength(
        self, biomarker_analysis: dict, disease_explanation: dict, linker_findings: dict
    ) -> str:
        """Evaluate the strength of supporting evidence"""

        score = 0
        max_score = 5

        # Check biomarker validation quality
        flags = biomarker_analysis.get("biomarker_flags", [])
        abnormal_count = len([f for f in flags if f.get("status") != "NORMAL"])
        if abnormal_count >= 3:
            score += 1
        if abnormal_count >= 5:
            score += 1

        # Check disease explanation quality
        if disease_explanation.get("retrieval_quality", 0) >= 3:
            score += 1

        # Check biomarker-disease linking
        key_drivers = linker_findings.get("key_drivers", [])
        if len(key_drivers) >= 2:
            score += 1
        if len(key_drivers) >= 4:
            score += 1

        # Map score to categorical rating
        if score >= 4:
            return "STRONG"
        elif score >= 2:
            return "MODERATE"
        else:
            return "WEAK"

    def _identify_limitations(
        self, biomarkers: dict[str, float], biomarker_analysis: dict, probabilities: dict[str, float]
    ) -> list[str]:
        """Identify limitations and uncertainties"""
        limitations = []

        # Check for missing biomarkers
        expected_biomarkers = BiomarkerValidator().expected_biomarker_count()
        if len(biomarkers) < expected_biomarkers:
            missing = expected_biomarkers - len(biomarkers)
            limitations.append(f"Missing data: {missing} biomarker(s) not provided")

        # Check for close alternative predictions
        sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)
        if len(sorted_probs) >= 2:
            top1, prob1 = sorted_probs[0]
            top2, prob2 = sorted_probs[1]
            if prob2 > 0.15:  # Alternative is significant
                limitations.append(f"Differential diagnosis: {top2} also possible ({prob2:.1%} probability)")

        # Check for normal biomarkers despite prediction
        flags = biomarker_analysis.get("biomarker_flags", [])
        relevant = biomarker_analysis.get("relevant_biomarkers", [])
        normal_relevant = [f for f in flags if f.get("name") in relevant and f.get("status") == "NORMAL"]
        if len(normal_relevant) >= 2:
            limitations.append("Some disease-relevant biomarkers are within normal range")

        # Check for safety alerts (indicates complexity)
        alerts = biomarker_analysis.get("safety_alerts", [])
        if len(alerts) >= 2:
            limitations.append("Multiple critical values detected; professional evaluation essential")

        return limitations

    def _calculate_reliability(self, ml_confidence: float, evidence_strength: str, limitation_count: int) -> str:
        """Calculate overall prediction reliability"""

        score = 0

        # ML confidence contribution
        if ml_confidence >= 0.8:
            score += 3
        elif ml_confidence >= 0.6:
            score += 2
        elif ml_confidence >= 0.4:
            score += 1

        # Evidence strength contribution
        if evidence_strength == "STRONG":
            score += 3
        elif evidence_strength == "MODERATE":
            score += 2
        else:
            score += 1

        # Limitation penalty
        score -= min(limitation_count, 3)

        # Map to categorical
        if score >= 5:
            return "HIGH"
        elif score >= 3:
            return "MODERATE"
        else:
            return "LOW"

    def _generate_assessment(
        self, disease: str, ml_confidence: float, reliability: str, evidence_strength: str, limitations: list[str]
    ) -> str:
        """Generate human-readable assessment summary"""

        prompt = f"""As a medical AI assessment system, provide a brief confidence statement about this prediction:

Disease Predicted: {disease}
ML Model Confidence: {ml_confidence:.1%}
Overall Reliability: {reliability}
Evidence Strength: {evidence_strength}
Limitations: {len(limitations)} identified

Write a 2-3 sentence assessment that:
1. States the overall reliability
2. Mentions key strengths or weaknesses
3. Emphasizes the need for professional medical consultation

Be honest about uncertainty. Patient safety is paramount."""

        try:
            response = self.llm.invoke(prompt)
            return response.content.strip()
        except Exception as e:
            print(f"Warning: Assessment generation failed: {e}")
            return f"The {disease} prediction has {reliability.lower()} reliability based on available data. Professional medical evaluation is strongly recommended for accurate diagnosis."

    def _get_recommendation(self, reliability: str) -> str:
        """Get action recommendation based on reliability"""
        if reliability == "HIGH":
            return "High confidence prediction. Schedule medical consultation to confirm diagnosis and discuss treatment options."
        elif reliability == "MODERATE":
            return "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed."
        else:
            return "Low confidence prediction. Professional medical assessment essential. Additional tests may be required for accurate diagnosis."

    def _get_alternatives(self, probabilities: dict[str, float]) -> list[dict[str, Any]]:
        """Get alternative diagnoses to consider"""
        sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)

        alternatives = []
        for disease, prob in sorted_probs[1:4]:  # Top 3 alternatives
            if prob > 0.05:  # Only significant alternatives
                alternatives.append(
                    {"disease": disease, "probability": prob, "note": "Consider discussing with healthcare provider"}
                )

        return alternatives


# Create agent instance for import
confidence_assessor_agent = ConfidenceAssessorAgent()