Agentic-RagBot / src /agents /biomarker_linker.py
T0X1N's picture
chore: codebase audit and fixes (ruff, mypy, pytest)
9659593
"""
MediGuard AI RAG-Helper
Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
"""
from src.llm_config import llm_config
from src.state import AgentOutput, GuildState, KeyDriver
class BiomarkerDiseaseLinkerAgent:
"""Agent that links specific biomarker values to the predicted disease"""
def __init__(self, retriever):
"""
Initialize with a retriever for biomarker-disease connections.
Args:
retriever: Vector store retriever for biomarker evidence
"""
self.retriever = retriever
self.llm = llm_config.explainer
def link(self, state: GuildState) -> GuildState:
"""
Link biomarkers to disease prediction.
Args:
state: Current guild state
Returns:
Updated state with biomarker-disease links
"""
print("\n" + "=" * 70)
print("EXECUTING: Biomarker-Disease Linker Agent (RAG)")
print("=" * 70)
model_prediction = state["model_prediction"]
disease = model_prediction["disease"]
biomarkers = state["patient_biomarkers"]
# Get biomarker analysis from previous agent
biomarker_analysis = state.get("biomarker_analysis") or {}
# Identify key drivers
print(f"\nIdentifying key drivers for {disease}...")
key_drivers, citations_missing = self._identify_key_drivers(disease, biomarkers, biomarker_analysis, state)
print(f"Identified {len(key_drivers)} key biomarker drivers")
# Create agent output
output = AgentOutput(
agent_name="Biomarker-Disease Linker",
findings={
"disease": disease,
"key_drivers": [kd.model_dump() for kd in key_drivers],
"total_drivers": len(key_drivers),
"feature_importance_calculated": True,
"citations_missing": citations_missing,
},
)
# Update state
print("\nBiomarker-disease linking complete")
return {"agent_outputs": [output]}
def _identify_key_drivers(
self, disease: str, biomarkers: dict[str, float], analysis: dict, state: GuildState
) -> tuple[list[KeyDriver], bool]:
"""Identify which biomarkers are driving the disease prediction"""
# Get out-of-range biomarkers from analysis
flags = analysis.get("biomarker_flags", [])
abnormal_biomarkers = [f for f in flags if f["status"] != "NORMAL"]
# Get disease-relevant biomarkers
relevant = analysis.get("relevant_biomarkers", [])
# Focus on biomarkers that are both abnormal AND disease-relevant
key_biomarkers = [f for f in abnormal_biomarkers if f["name"] in relevant]
# If no key biomarkers found, use top abnormal ones
if not key_biomarkers:
key_biomarkers = abnormal_biomarkers[:5]
print(f" Analyzing {len(key_biomarkers)} key biomarkers...")
# Generate key drivers with evidence
key_drivers: list[KeyDriver] = []
citations_missing = False
for biomarker_flag in key_biomarkers[:5]: # Top 5
driver, driver_missing = self._create_key_driver(biomarker_flag, disease, state)
key_drivers.append(driver)
citations_missing = citations_missing or driver_missing
return key_drivers, citations_missing
def _create_key_driver(self, biomarker_flag: dict, disease: str, state: GuildState) -> tuple[KeyDriver, bool]:
"""Create a KeyDriver object with evidence from RAG"""
name = biomarker_flag["name"]
value = biomarker_flag["value"]
unit = biomarker_flag["unit"]
status = biomarker_flag["status"]
# Retrieve evidence linking this biomarker to the disease
query = f"How does {name} relate to {disease}? What does {status} {name} indicate?"
citations_missing = False
try:
docs = self.retriever.invoke(query)
if state["sop"].require_pdf_citations and not docs:
evidence_text = "Insufficient evidence available in the knowledge base."
contribution = "Unknown"
citations_missing = True
else:
evidence_text = self._extract_evidence(docs, name, disease)
contribution = self._estimate_contribution(biomarker_flag, len(docs))
except Exception as e:
print(f" Warning: Evidence retrieval failed for {name}: {e}")
evidence_text = f"{status} {name} may be related to {disease}."
contribution = "Unknown"
citations_missing = True
# Generate explanation using LLM
explanation = self._generate_explanation(name, value, unit, status, disease, evidence_text)
driver = KeyDriver(
biomarker=name,
value=value,
contribution=contribution,
explanation=explanation,
evidence=evidence_text[:500], # Truncate long evidence
)
return driver, citations_missing
def _extract_evidence(self, docs: list, biomarker: str, disease: str) -> str:
"""Extract relevant evidence from retrieved documents"""
if not docs:
return f"Limited evidence available for {biomarker} in {disease}."
# Combine relevant passages
evidence = []
for doc in docs[:2]: # Top 2 docs
content = doc.page_content
# Extract sentences mentioning the biomarker
sentences = content.split(".")
relevant_sentences = [
s.strip() for s in sentences if biomarker.lower() in s.lower() or disease.lower() in s.lower()
]
evidence.extend(relevant_sentences[:2])
return ". ".join(evidence[:3]) + "." if evidence else content[:300]
def _estimate_contribution(self, biomarker_flag: dict, doc_count: int) -> str:
"""Estimate the contribution percentage (simplified)"""
status = biomarker_flag["status"]
# Simple heuristic based on severity
if "CRITICAL" in status:
base = 40
elif status in ["HIGH", "LOW"]:
base = 25
else:
base = 10
# Adjust based on evidence strength
evidence_boost = min(doc_count * 2, 15)
total = min(base + evidence_boost, 60)
return f"{total}%"
def _generate_explanation(
self, biomarker: str, value: float, unit: str, status: str, disease: str, evidence: str
) -> str:
"""Generate patient-friendly explanation"""
prompt = f"""Explain in 1-2 sentences how this biomarker result relates to {disease}:
Biomarker: {biomarker}
Value: {value} {unit}
Status: {status}
Medical Evidence: {evidence}
Write in patient-friendly language, explaining what this means for the diagnosis."""
try:
response = self.llm.invoke(prompt)
return response.content.strip()
except Exception:
return f"{biomarker} at {value} {unit} is {status}, which may be associated with {disease}."
def create_biomarker_linker_agent(retriever):
"""Factory function to create agent with retriever"""
return BiomarkerDiseaseLinkerAgent(retriever)