Spaces:

A-R-F
/

Agentic-Reliability-Framework-v4

Running

App Files Files Community

Agentic-Reliability-Framework-v4 / hallucination_detective.py

petter2025

Update hallucination_detective.py

fa2c5e7 verified about 2 months ago

raw

history blame

2.42 kB

	import logging
	from typing import Dict, Any, Optional
	from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
	from ai_event import AIEvent
	from nli_detector import NLIDetector

	logger = logging.getLogger(__name__)

	class HallucinationDetectiveAgent(BaseAgent):
	"""Detects hallucinations using confidence and NLI consistency."""
	def __init__(self, nli_detector: Optional[NLIDetector] = None):
	super().__init__(AgentSpecialization.DETECTIVE)
	self._thresholds = {
	'confidence': 0.7,
	'entailment': 0.6
	}
	self.nli = nli_detector or NLIDetector()

	async def analyze(self, event: AIEvent) -> Dict[str, Any]:
	try:
	flags = []
	risk_score = 1.0

	# 1. Check confidence
	if event.confidence < self._thresholds['confidence']:
	flags.append('low_confidence')
	risk_score *= 0.5

	# 2. Check NLI entailment (if available)
	if event.prompt and event.response and self.nli.pipeline is not None:
	entail_prob = self.nli.check(event.prompt, event.response)
	if entail_prob is not None and entail_prob < self._thresholds['entailment']:
	flags.append('low_entailment')
	risk_score *= 0.6
	else:
	# No NLI, so just use confidence
	pass

	is_hallucination = len(flags) > 0
	return {
	'specialization': 'ai_hallucination',
	'confidence': 1 - risk_score if is_hallucination else 0,
	'findings': {
	'is_hallucination': is_hallucination,
	'flags': flags,
	'risk_score': risk_score,
	'confidence': event.confidence,
	'entailment': entail_prob if 'entail_prob' in locals() else None
	},
	'recommendations': [
	"Regenerate with lower temperature",
	"Provide more context",
	"Use a different model"
	] if is_hallucination else []
	}
	except Exception as e:
	logger.error(f"HallucinationDetective error: {e}", exc_info=True)
	return {'specialization': 'ai_hallucination', 'confidence': 0.0, 'findings': {}, 'recommendations': []}