petter2025 commited on
Commit
9a21b1a
·
verified ·
1 Parent(s): 885d2fe

Create hallucination_detective.py

Browse files
Files changed (1) hide show
  1. hallucination_detective.py +49 -0
hallucination_detective.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, Any
3
+ from agentic_reliability_framework.runtime.agents.base import BaseAgent
4
+ from ai_event import AIEvent
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class HallucinationDetectiveAgent(BaseAgent):
9
+ """Detects hallucinations using confidence, perplexity, and consistency checks."""
10
+ def __init__(self):
11
+ super().__init__(AgentSpecialization.DETECTIVE)
12
+ self._thresholds = {
13
+ 'confidence': 0.7,
14
+ 'perplexity': 10.0,
15
+ 'retrieval_similarity': 0.6
16
+ }
17
+
18
+ async def analyze(self, event: AIEvent) -> Dict[str, Any]:
19
+ try:
20
+ flags = []
21
+ score = 1.0
22
+ if event.confidence < self._thresholds['confidence']:
23
+ flags.append('low_confidence')
24
+ score *= 0.5
25
+ if event.perplexity and event.perplexity > self._thresholds['perplexity']:
26
+ flags.append('high_perplexity')
27
+ score *= 0.6
28
+ if event.retrieval_scores and max(event.retrieval_scores) < self._thresholds['retrieval_similarity']:
29
+ flags.append('low_retrieval_relevance')
30
+ score *= 0.7
31
+
32
+ is_hallucination = len(flags) > 0
33
+ return {
34
+ 'specialization': 'ai_hallucination',
35
+ 'confidence': 1 - score if is_hallucination else 0,
36
+ 'findings': {
37
+ 'is_hallucination': is_hallucination,
38
+ 'flags': flags,
39
+ 'risk_score': score
40
+ },
41
+ 'recommendations': [
42
+ "Regenerate with lower temperature",
43
+ "Use a different model variant",
44
+ "Provide more context"
45
+ ] if is_hallucination else []
46
+ }
47
+ except Exception as e:
48
+ logger.error(f"HallucinationDetective error: {e}", exc_info=True)
49
+ return {'specialization': 'ai_hallucination', 'confidence': 0.0, 'findings': {}, 'recommendations': []}