""" MediGuard AI RAG-Helper Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs """ from pathlib import Path from langchain_core.prompts import ChatPromptTemplate from src.llm_config import llm_config from src.state import AgentOutput, GuildState class DiseaseExplainerAgent: """Agent that retrieves and explains disease mechanisms using RAG""" def __init__(self, retriever): """ Initialize with a retriever for medical PDFs. Args: retriever: Vector store retriever for disease documents """ self.retriever = retriever self.llm = llm_config.explainer def explain(self, state: GuildState) -> GuildState: """ Retrieve and explain disease pathophysiology. Args: state: Current guild state Returns: Updated state with disease explanation """ print("\n" + "=" * 70) print("EXECUTING: Disease Explainer Agent (RAG)") print("=" * 70) model_prediction = state["model_prediction"] disease = model_prediction["disease"] confidence = model_prediction["confidence"] # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever retrieval_k = state["sop"].disease_explainer_k original_search_kwargs = dict(self.retriever.search_kwargs) self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k} # Retrieve relevant documents print(f"\nRetrieving information about: {disease}") print(f"Retrieval k={state['sop'].disease_explainer_k}") query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria, and clinical presentation. Focus on mechanisms relevant to blood biomarkers.""" try: docs = self.retriever.invoke(query) finally: # Restore original search_kwargs to avoid side effects self.retriever.search_kwargs = original_search_kwargs print(f"Retrieved {len(docs)} relevant document chunks") if state["sop"].require_pdf_citations and not docs: explanation = { "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.", "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.", "clinical_presentation": "Insufficient evidence available to describe clinical presentation.", "summary": "Insufficient evidence available for a detailed explanation.", } citations = [] output = AgentOutput( agent_name="Disease Explainer", findings={ "disease": disease, "pathophysiology": explanation["pathophysiology"], "diagnostic_criteria": explanation["diagnostic_criteria"], "clinical_presentation": explanation["clinical_presentation"], "mechanism_summary": explanation["summary"], "citations": citations, "confidence": confidence, "retrieval_quality": 0, "citations_missing": True, }, ) print("\nDisease explanation generated") print(" - Pathophysiology: insufficient evidence") print(" - Citations: 0 sources") return {"agent_outputs": [output]} # Generate explanation explanation = self._generate_explanation(disease, docs, confidence) # Extract citations citations = self._extract_citations(docs) # Create agent output output = AgentOutput( agent_name="Disease Explainer", findings={ "disease": disease, "pathophysiology": explanation["pathophysiology"], "diagnostic_criteria": explanation["diagnostic_criteria"], "clinical_presentation": explanation["clinical_presentation"], "mechanism_summary": explanation["summary"], "citations": citations, "confidence": confidence, "retrieval_quality": len(docs), "citations_missing": False, }, ) # Update state print("\nDisease explanation generated") print(f" - Pathophysiology: {len(explanation['pathophysiology'])} chars") print(f" - Citations: {len(citations)} sources") return {"agent_outputs": [output]} def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict: """Generate structured disease explanation using LLM and retrieved docs""" # Format retrieved context context = "\n\n---\n\n".join( [f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs] ) prompt = ChatPromptTemplate.from_messages( [ ( "system", """You are a medical expert explaining diseases for patient self-assessment. Based on the provided medical literature, explain the disease in clear, accessible language. Structure your response with these sections: 1. PATHOPHYSIOLOGY: The underlying biological mechanisms 2. DIAGNOSTIC_CRITERIA: How the disease is diagnosed 3. CLINICAL_PRESENTATION: Common symptoms and signs 4. SUMMARY: A 2-3 sentence overview Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""", ), ( "human", """Disease: {disease} Prediction Confidence: {confidence:.1%} Medical Literature Context: {context} Please provide a structured explanation.""", ), ] ) chain = prompt | self.llm try: response = chain.invoke({"disease": disease, "confidence": confidence, "context": context}) # Parse structured response content = response.content explanation = self._parse_explanation(content) except Exception as e: print(f"Warning: LLM explanation generation failed: {e}") explanation = { "pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.", "diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.", "clinical_presentation": "Clinical presentation varies by individual.", "summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.", } return explanation def _parse_explanation(self, content: str) -> dict: """Parse LLM response into structured sections""" sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""} # Simple parsing logic current_section = None lines = content.split("\n") for line in lines: line_upper = line.upper().strip() if "PATHOPHYSIOLOGY" in line_upper: current_section = "pathophysiology" elif "DIAGNOSTIC" in line_upper: current_section = "diagnostic_criteria" elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper: current_section = "clinical_presentation" elif "SUMMARY" in line_upper: current_section = "summary" elif current_section and line.strip(): sections[current_section] += line + "\n" # If parsing failed, use full content as summary if not any(sections.values()): sections["summary"] = content[:500] return sections def _extract_citations(self, docs: list) -> list: """Extract citations from retrieved documents""" citations = [] for doc in docs: source = doc.metadata.get("source", "Unknown") page = doc.metadata.get("page", "N/A") # Clean up source path if "\\" in source or "/" in source: source = Path(source).name citation = f"{source}" if page != "N/A": citation += f" (Page {page})" citations.append(citation) return citations def create_disease_explainer_agent(retriever): """Factory function to create agent with retriever""" return DiseaseExplainerAgent(retriever)