Spaces:

T0X1N
/

Agentic-RagBot

Running

App Files Files Community

Agentic-RagBot / src /agents /confidence_assessor.py

T0X1N

chore: codebase audit and fixes (ruff, mypy, pytest)

9659593 about 19 hours ago

raw

history blame contribute delete

9.29 kB

	"""
	MediGuard AI RAG-Helper
	Confidence Assessor Agent - Evaluates prediction reliability
	"""

	from typing import Any

	from src.biomarker_validator import BiomarkerValidator
	from src.llm_config import llm_config
	from src.state import AgentOutput, GuildState


	class ConfidenceAssessorAgent:
	"""Agent that assesses the reliability and limitations of the prediction"""

	def __init__(self):
	self.llm = llm_config.analyzer

	def assess(self, state: GuildState) -> GuildState:
	"""
	Assess prediction confidence and identify limitations.

	Args:
	state: Current guild state

	Returns:
	Updated state with confidence assessment
	"""
	print("\n" + "=" * 70)
	print("EXECUTING: Confidence Assessor Agent")
	print("=" * 70)

	model_prediction = state["model_prediction"]
	disease = model_prediction["disease"]
	ml_confidence = model_prediction["confidence"]
	probabilities = model_prediction.get("probabilities", {})
	biomarkers = state["patient_biomarkers"]

	# Collect previous agent findings
	biomarker_analysis = state.get("biomarker_analysis") or {}
	disease_explanation = self._get_agent_findings(state, "Disease Explainer")
	linker_findings = self._get_agent_findings(state, "Biomarker-Disease Linker")

	print(f"\nAssessing confidence for {disease} prediction...")

	# Evaluate evidence strength
	evidence_strength = self._evaluate_evidence_strength(biomarker_analysis, disease_explanation, linker_findings)

	# Identify limitations
	limitations = self._identify_limitations(biomarkers, biomarker_analysis, probabilities)

	# Calculate aggregate reliability
	reliability = self._calculate_reliability(ml_confidence, evidence_strength, len(limitations))

	# Generate assessment summary
	assessment_summary = self._generate_assessment(
	disease, ml_confidence, reliability, evidence_strength, limitations
	)

	# Create agent output
	output = AgentOutput(
	agent_name="Confidence Assessor",
	findings={
	"prediction_reliability": reliability,
	"ml_confidence": ml_confidence,
	"evidence_strength": evidence_strength,
	"limitations": limitations,
	"assessment_summary": assessment_summary,
	"recommendation": self._get_recommendation(reliability),
	"alternative_diagnoses": self._get_alternatives(probabilities),
	},
	)

	# Update state
	print("\nConfidence assessment complete")
	print(f" - Prediction reliability: {reliability}")
	print(f" - Evidence strength: {evidence_strength}")
	print(f" - Limitations identified: {len(limitations)}")

	return {"agent_outputs": [output]}

	def _get_agent_findings(self, state: GuildState, agent_name: str) -> dict:
	"""Extract findings from a specific agent"""
	for output in state.get("agent_outputs", []):
	if output.agent_name == agent_name:
	return output.findings
	return {}

	def _evaluate_evidence_strength(
	self, biomarker_analysis: dict, disease_explanation: dict, linker_findings: dict
	) -> str:
	"""Evaluate the strength of supporting evidence"""

	score = 0
	max_score = 5

	# Check biomarker validation quality
	flags = biomarker_analysis.get("biomarker_flags", [])
	abnormal_count = len([f for f in flags if f.get("status") != "NORMAL"])
	if abnormal_count >= 3:
	score += 1
	if abnormal_count >= 5:
	score += 1

	# Check disease explanation quality
	if disease_explanation.get("retrieval_quality", 0) >= 3:
	score += 1

	# Check biomarker-disease linking
	key_drivers = linker_findings.get("key_drivers", [])
	if len(key_drivers) >= 2:
	score += 1
	if len(key_drivers) >= 4:
	score += 1

	# Map score to categorical rating
	if score >= 4:
	return "STRONG"
	elif score >= 2:
	return "MODERATE"
	else:
	return "WEAK"

	def _identify_limitations(
	self, biomarkers: dict[str, float], biomarker_analysis: dict, probabilities: dict[str, float]
	) -> list[str]:
	"""Identify limitations and uncertainties"""
	limitations = []

	# Check for missing biomarkers
	expected_biomarkers = BiomarkerValidator().expected_biomarker_count()
	if len(biomarkers) < expected_biomarkers:
	missing = expected_biomarkers - len(biomarkers)
	limitations.append(f"Missing data: {missing} biomarker(s) not provided")

	# Check for close alternative predictions
	sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)
	if len(sorted_probs) >= 2:
	top1, prob1 = sorted_probs[0]
	top2, prob2 = sorted_probs[1]
	if prob2 > 0.15: # Alternative is significant
	limitations.append(f"Differential diagnosis: {top2} also possible ({prob2:.1%} probability)")

	# Check for normal biomarkers despite prediction
	flags = biomarker_analysis.get("biomarker_flags", [])
	relevant = biomarker_analysis.get("relevant_biomarkers", [])
	normal_relevant = [f for f in flags if f.get("name") in relevant and f.get("status") == "NORMAL"]
	if len(normal_relevant) >= 2:
	limitations.append("Some disease-relevant biomarkers are within normal range")

	# Check for safety alerts (indicates complexity)
	alerts = biomarker_analysis.get("safety_alerts", [])
	if len(alerts) >= 2:
	limitations.append("Multiple critical values detected; professional evaluation essential")

	return limitations

	def _calculate_reliability(self, ml_confidence: float, evidence_strength: str, limitation_count: int) -> str:
	"""Calculate overall prediction reliability"""

	score = 0

	# ML confidence contribution
	if ml_confidence >= 0.8:
	score += 3
	elif ml_confidence >= 0.6:
	score += 2
	elif ml_confidence >= 0.4:
	score += 1

	# Evidence strength contribution
	if evidence_strength == "STRONG":
	score += 3
	elif evidence_strength == "MODERATE":
	score += 2
	else:
	score += 1

	# Limitation penalty
	score -= min(limitation_count, 3)

	# Map to categorical
	if score >= 5:
	return "HIGH"
	elif score >= 3:
	return "MODERATE"
	else:
	return "LOW"

	def _generate_assessment(
	self, disease: str, ml_confidence: float, reliability: str, evidence_strength: str, limitations: list[str]
	) -> str:
	"""Generate human-readable assessment summary"""

	prompt = f"""As a medical AI assessment system, provide a brief confidence statement about this prediction:

	Disease Predicted: {disease}
	ML Model Confidence: {ml_confidence:.1%}
	Overall Reliability: {reliability}
	Evidence Strength: {evidence_strength}
	Limitations: {len(limitations)} identified

	Write a 2-3 sentence assessment that:
	1. States the overall reliability
	2. Mentions key strengths or weaknesses
	3. Emphasizes the need for professional medical consultation

	Be honest about uncertainty. Patient safety is paramount."""

	try:
	response = self.llm.invoke(prompt)
	return response.content.strip()
	except Exception as e:
	print(f"Warning: Assessment generation failed: {e}")
	return f"The {disease} prediction has {reliability.lower()} reliability based on available data. Professional medical evaluation is strongly recommended for accurate diagnosis."

	def _get_recommendation(self, reliability: str) -> str:
	"""Get action recommendation based on reliability"""
	if reliability == "HIGH":
	return "High confidence prediction. Schedule medical consultation to confirm diagnosis and discuss treatment options."
	elif reliability == "MODERATE":
	return "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed."
	else:
	return "Low confidence prediction. Professional medical assessment essential. Additional tests may be required for accurate diagnosis."

	def _get_alternatives(self, probabilities: dict[str, float]) -> list[dict[str, Any]]:
	"""Get alternative diagnoses to consider"""
	sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)

	alternatives = []
	for disease, prob in sorted_probs[1:4]: # Top 3 alternatives
	if prob > 0.05: # Only significant alternatives
	alternatives.append(
	{"disease": disease, "probability": prob, "note": "Consider discussing with healthcare provider"}
	)

	return alternatives


	# Create agent instance for import
	confidence_assessor_agent = ConfidenceAssessorAgent()