File size: 8,763 Bytes
6dc9d46
 
 
 
 
 
696f787
6dc9d46
 
696f787
 
 
6dc9d46
 
 
696f787
6dc9d46
 
 
9659593
6dc9d46
 
 
 
 
696f787
6dc9d46
 
 
9659593
6dc9d46
 
9659593
6dc9d46
 
 
9659593
6dc9d46
9659593
696f787
9659593
 
 
696f787
ad2e847
9659593
ad2e847
9659593
696f787
6dc9d46
 
 
696f787
6dc9d46
 
696f787
ad2e847
 
 
 
 
aefac4f
 
 
9659593
aefac4f
 
 
 
9659593
aefac4f
 
 
 
 
 
9659593
 
 
 
aefac4f
 
 
9659593
 
aefac4f
 
 
 
 
9659593
696f787
6dc9d46
 
696f787
6dc9d46
 
696f787
6dc9d46
 
 
 
 
9659593
 
 
 
6dc9d46
 
aefac4f
9659593
 
6dc9d46
696f787
6dc9d46
aefac4f
6dc9d46
 
696f787
9659593
696f787
6dc9d46
 
696f787
6dc9d46
9659593
 
 
696f787
9659593
 
 
 
 
6dc9d46
 
 
 
 
 
 
9659593
 
 
 
 
6dc9d46
 
 
 
 
9659593
 
 
 
696f787
6dc9d46
696f787
6dc9d46
9659593
696f787
6dc9d46
 
 
696f787
6dc9d46
 
 
 
 
 
9659593
6dc9d46
696f787
6dc9d46
696f787
6dc9d46
 
9659593
696f787
6dc9d46
 
9659593
696f787
6dc9d46
 
696f787
9659593
 
 
 
 
 
 
 
6dc9d46
 
696f787
6dc9d46
 
9659593
696f787
6dc9d46
696f787
6dc9d46
 
 
696f787
6dc9d46
9659593
 
696f787
6dc9d46
9659593
6dc9d46
696f787
6dc9d46
9659593
6dc9d46
696f787
6dc9d46
696f787
6dc9d46
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""
MediGuard AI RAG-Helper
Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs
"""

from pathlib import Path

from langchain_core.prompts import ChatPromptTemplate

from src.llm_config import llm_config
from src.state import AgentOutput, GuildState


class DiseaseExplainerAgent:
    """Agent that retrieves and explains disease mechanisms using RAG"""

    def __init__(self, retriever):
        """
        Initialize with a retriever for medical PDFs.

        Args:
            retriever: Vector store retriever for disease documents
        """
        self.retriever = retriever
        self.llm = llm_config.explainer

    def explain(self, state: GuildState) -> GuildState:
        """
        Retrieve and explain disease pathophysiology.

        Args:
            state: Current guild state

        Returns:
            Updated state with disease explanation
        """
        print("\n" + "=" * 70)
        print("EXECUTING: Disease Explainer Agent (RAG)")
        print("=" * 70)

        model_prediction = state["model_prediction"]
        disease = model_prediction["disease"]
        confidence = model_prediction["confidence"]

        # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
        retrieval_k = state["sop"].disease_explainer_k
        original_search_kwargs = dict(self.retriever.search_kwargs)
        self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k}

        # Retrieve relevant documents
        print(f"\nRetrieving information about: {disease}")
        print(f"Retrieval k={state['sop'].disease_explainer_k}")

        query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria, 
        and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""

        try:
            docs = self.retriever.invoke(query)
        finally:
            # Restore original search_kwargs to avoid side effects
            self.retriever.search_kwargs = original_search_kwargs

        print(f"Retrieved {len(docs)} relevant document chunks")

        if state["sop"].require_pdf_citations and not docs:
            explanation = {
                "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
                "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
                "clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
                "summary": "Insufficient evidence available for a detailed explanation.",
            }
            citations = []
            output = AgentOutput(
                agent_name="Disease Explainer",
                findings={
                    "disease": disease,
                    "pathophysiology": explanation["pathophysiology"],
                    "diagnostic_criteria": explanation["diagnostic_criteria"],
                    "clinical_presentation": explanation["clinical_presentation"],
                    "mechanism_summary": explanation["summary"],
                    "citations": citations,
                    "confidence": confidence,
                    "retrieval_quality": 0,
                    "citations_missing": True,
                },
            )

            print("\nDisease explanation generated")
            print("  - Pathophysiology: insufficient evidence")
            print("  - Citations: 0 sources")
            return {"agent_outputs": [output]}

        # Generate explanation
        explanation = self._generate_explanation(disease, docs, confidence)

        # Extract citations
        citations = self._extract_citations(docs)

        # Create agent output
        output = AgentOutput(
            agent_name="Disease Explainer",
            findings={
                "disease": disease,
                "pathophysiology": explanation["pathophysiology"],
                "diagnostic_criteria": explanation["diagnostic_criteria"],
                "clinical_presentation": explanation["clinical_presentation"],
                "mechanism_summary": explanation["summary"],
                "citations": citations,
                "confidence": confidence,
                "retrieval_quality": len(docs),
                "citations_missing": False,
            },
        )

        # Update state
        print("\nDisease explanation generated")
        print(f"  - Pathophysiology: {len(explanation['pathophysiology'])} chars")
        print(f"  - Citations: {len(citations)} sources")

        return {"agent_outputs": [output]}

    def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict:
        """Generate structured disease explanation using LLM and retrieved docs"""

        # Format retrieved context
        context = "\n\n---\n\n".join(
            [f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs]
        )

        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    """You are a medical expert explaining diseases for patient self-assessment. 
            Based on the provided medical literature, explain the disease in clear, accessible language.
            Structure your response with these sections:
            1. PATHOPHYSIOLOGY: The underlying biological mechanisms
            2. DIAGNOSTIC_CRITERIA: How the disease is diagnosed
            3. CLINICAL_PRESENTATION: Common symptoms and signs
            4. SUMMARY: A 2-3 sentence overview
            
            Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""",
                ),
                (
                    "human",
                    """Disease: {disease}
            Prediction Confidence: {confidence:.1%}
            
            Medical Literature Context:
            {context}
            
            Please provide a structured explanation.""",
                ),
            ]
        )

        chain = prompt | self.llm

        try:
            response = chain.invoke({"disease": disease, "confidence": confidence, "context": context})

            # Parse structured response
            content = response.content
            explanation = self._parse_explanation(content)

        except Exception as e:
            print(f"Warning: LLM explanation generation failed: {e}")
            explanation = {
                "pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.",
                "diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.",
                "clinical_presentation": "Clinical presentation varies by individual.",
                "summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.",
            }

        return explanation

    def _parse_explanation(self, content: str) -> dict:
        """Parse LLM response into structured sections"""
        sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""}

        # Simple parsing logic
        current_section = None
        lines = content.split("\n")

        for line in lines:
            line_upper = line.upper().strip()

            if "PATHOPHYSIOLOGY" in line_upper:
                current_section = "pathophysiology"
            elif "DIAGNOSTIC" in line_upper:
                current_section = "diagnostic_criteria"
            elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper:
                current_section = "clinical_presentation"
            elif "SUMMARY" in line_upper:
                current_section = "summary"
            elif current_section and line.strip():
                sections[current_section] += line + "\n"

        # If parsing failed, use full content as summary
        if not any(sections.values()):
            sections["summary"] = content[:500]

        return sections

    def _extract_citations(self, docs: list) -> list:
        """Extract citations from retrieved documents"""
        citations = []

        for doc in docs:
            source = doc.metadata.get("source", "Unknown")
            page = doc.metadata.get("page", "N/A")

            # Clean up source path
            if "\\" in source or "/" in source:
                source = Path(source).name

            citation = f"{source}"
            if page != "N/A":
                citation += f" (Page {page})"

            citations.append(citation)

        return citations


def create_disease_explainer_agent(retriever):
    """Factory function to create agent with retriever"""
    return DiseaseExplainerAgent(retriever)