petter2025 commited on
Commit
16a200d
·
verified ·
1 Parent(s): 1080e05

Delete hallucination_detective.py

Browse files
Files changed (1) hide show
  1. hallucination_detective.py +0 -65
hallucination_detective.py DELETED
@@ -1,65 +0,0 @@
1
- import logging
2
- from typing import Dict, Any, Optional
3
- from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
4
- from ai_event import AIEvent
5
- from nli_detector import NLIDetector
6
-
7
- logger = logging.getLogger(__name__)
8
-
9
- class HallucinationDetectiveAgent(BaseAgent):
10
- """
11
- Detects potential hallucinations in generated text by combining:
12
- - Model confidence score (lower confidence → higher risk)
13
- - Natural Language Inference (NLI) entailment score (lower entailment → higher risk)
14
- """
15
-
16
- def __init__(self, nli_detector: Optional[NLIDetector] = None):
17
- super().__init__(AgentSpecialization.DETECTIVE)
18
- self._thresholds = {
19
- 'confidence': 0.7,
20
- 'entailment': 0.6
21
- }
22
- self.nli = nli_detector or NLIDetector()
23
-
24
- async def analyze(self, event: AIEvent) -> Dict[str, Any]:
25
- try:
26
- flags = []
27
- risk_score = 1.0
28
- entail_prob = None
29
-
30
- if event.confidence < self._thresholds['confidence']:
31
- flags.append('low_confidence')
32
- risk_score *= 0.5
33
-
34
- if event.prompt and event.response and self.nli.pipeline is not None:
35
- entail_prob = self.nli.check(event.prompt, event.response)
36
- if entail_prob is not None and entail_prob < self._thresholds['entailment']:
37
- flags.append('low_entailment')
38
- risk_score *= 0.6
39
-
40
- is_hallucination = len(flags) > 0
41
-
42
- return {
43
- 'specialization': 'ai_hallucination',
44
- 'confidence': 1 - risk_score if is_hallucination else 0,
45
- 'findings': {
46
- 'is_hallucination': is_hallucination,
47
- 'flags': flags,
48
- 'risk_score': risk_score,
49
- 'confidence': event.confidence,
50
- 'entailment': entail_prob
51
- },
52
- 'recommendations': [
53
- "Regenerate with lower temperature",
54
- "Provide more context",
55
- "Use a different model"
56
- ] if is_hallucination else []
57
- }
58
- except Exception as e:
59
- logger.error(f"HallucinationDetective error: {e}", exc_info=True)
60
- return {
61
- 'specialization': 'ai_hallucination',
62
- 'confidence': 0.0,
63
- 'findings': {},
64
- 'recommendations': []
65
- }