Spaces:
Running
Running
File size: 8,763 Bytes
6dc9d46 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 9659593 6dc9d46 9659593 6dc9d46 9659593 696f787 9659593 696f787 ad2e847 9659593 ad2e847 9659593 696f787 6dc9d46 696f787 6dc9d46 696f787 ad2e847 aefac4f 9659593 aefac4f 9659593 aefac4f 9659593 aefac4f 9659593 aefac4f 9659593 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 aefac4f 9659593 6dc9d46 696f787 6dc9d46 aefac4f 6dc9d46 696f787 9659593 696f787 6dc9d46 696f787 6dc9d46 9659593 696f787 9659593 6dc9d46 9659593 6dc9d46 9659593 696f787 6dc9d46 696f787 6dc9d46 9659593 696f787 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 696f787 6dc9d46 9659593 696f787 6dc9d46 696f787 9659593 6dc9d46 696f787 6dc9d46 9659593 696f787 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 9659593 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 9659593 6dc9d46 696f787 6dc9d46 696f787 6dc9d46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | """
MediGuard AI RAG-Helper
Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs
"""
from pathlib import Path
from langchain_core.prompts import ChatPromptTemplate
from src.llm_config import llm_config
from src.state import AgentOutput, GuildState
class DiseaseExplainerAgent:
"""Agent that retrieves and explains disease mechanisms using RAG"""
def __init__(self, retriever):
"""
Initialize with a retriever for medical PDFs.
Args:
retriever: Vector store retriever for disease documents
"""
self.retriever = retriever
self.llm = llm_config.explainer
def explain(self, state: GuildState) -> GuildState:
"""
Retrieve and explain disease pathophysiology.
Args:
state: Current guild state
Returns:
Updated state with disease explanation
"""
print("\n" + "=" * 70)
print("EXECUTING: Disease Explainer Agent (RAG)")
print("=" * 70)
model_prediction = state["model_prediction"]
disease = model_prediction["disease"]
confidence = model_prediction["confidence"]
# Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
retrieval_k = state["sop"].disease_explainer_k
original_search_kwargs = dict(self.retriever.search_kwargs)
self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k}
# Retrieve relevant documents
print(f"\nRetrieving information about: {disease}")
print(f"Retrieval k={state['sop'].disease_explainer_k}")
query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria,
and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""
try:
docs = self.retriever.invoke(query)
finally:
# Restore original search_kwargs to avoid side effects
self.retriever.search_kwargs = original_search_kwargs
print(f"Retrieved {len(docs)} relevant document chunks")
if state["sop"].require_pdf_citations and not docs:
explanation = {
"pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
"diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
"clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
"summary": "Insufficient evidence available for a detailed explanation.",
}
citations = []
output = AgentOutput(
agent_name="Disease Explainer",
findings={
"disease": disease,
"pathophysiology": explanation["pathophysiology"],
"diagnostic_criteria": explanation["diagnostic_criteria"],
"clinical_presentation": explanation["clinical_presentation"],
"mechanism_summary": explanation["summary"],
"citations": citations,
"confidence": confidence,
"retrieval_quality": 0,
"citations_missing": True,
},
)
print("\nDisease explanation generated")
print(" - Pathophysiology: insufficient evidence")
print(" - Citations: 0 sources")
return {"agent_outputs": [output]}
# Generate explanation
explanation = self._generate_explanation(disease, docs, confidence)
# Extract citations
citations = self._extract_citations(docs)
# Create agent output
output = AgentOutput(
agent_name="Disease Explainer",
findings={
"disease": disease,
"pathophysiology": explanation["pathophysiology"],
"diagnostic_criteria": explanation["diagnostic_criteria"],
"clinical_presentation": explanation["clinical_presentation"],
"mechanism_summary": explanation["summary"],
"citations": citations,
"confidence": confidence,
"retrieval_quality": len(docs),
"citations_missing": False,
},
)
# Update state
print("\nDisease explanation generated")
print(f" - Pathophysiology: {len(explanation['pathophysiology'])} chars")
print(f" - Citations: {len(citations)} sources")
return {"agent_outputs": [output]}
def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict:
"""Generate structured disease explanation using LLM and retrieved docs"""
# Format retrieved context
context = "\n\n---\n\n".join(
[f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs]
)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are a medical expert explaining diseases for patient self-assessment.
Based on the provided medical literature, explain the disease in clear, accessible language.
Structure your response with these sections:
1. PATHOPHYSIOLOGY: The underlying biological mechanisms
2. DIAGNOSTIC_CRITERIA: How the disease is diagnosed
3. CLINICAL_PRESENTATION: Common symptoms and signs
4. SUMMARY: A 2-3 sentence overview
Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""",
),
(
"human",
"""Disease: {disease}
Prediction Confidence: {confidence:.1%}
Medical Literature Context:
{context}
Please provide a structured explanation.""",
),
]
)
chain = prompt | self.llm
try:
response = chain.invoke({"disease": disease, "confidence": confidence, "context": context})
# Parse structured response
content = response.content
explanation = self._parse_explanation(content)
except Exception as e:
print(f"Warning: LLM explanation generation failed: {e}")
explanation = {
"pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.",
"diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.",
"clinical_presentation": "Clinical presentation varies by individual.",
"summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.",
}
return explanation
def _parse_explanation(self, content: str) -> dict:
"""Parse LLM response into structured sections"""
sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""}
# Simple parsing logic
current_section = None
lines = content.split("\n")
for line in lines:
line_upper = line.upper().strip()
if "PATHOPHYSIOLOGY" in line_upper:
current_section = "pathophysiology"
elif "DIAGNOSTIC" in line_upper:
current_section = "diagnostic_criteria"
elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper:
current_section = "clinical_presentation"
elif "SUMMARY" in line_upper:
current_section = "summary"
elif current_section and line.strip():
sections[current_section] += line + "\n"
# If parsing failed, use full content as summary
if not any(sections.values()):
sections["summary"] = content[:500]
return sections
def _extract_citations(self, docs: list) -> list:
"""Extract citations from retrieved documents"""
citations = []
for doc in docs:
source = doc.metadata.get("source", "Unknown")
page = doc.metadata.get("page", "N/A")
# Clean up source path
if "\\" in source or "/" in source:
source = Path(source).name
citation = f"{source}"
if page != "N/A":
citation += f" (Page {page})"
citations.append(citation)
return citations
def create_disease_explainer_agent(retriever):
"""Factory function to create agent with retriever"""
return DiseaseExplainerAgent(retriever)
|