petter2025 commited on
Commit
fa2c5e7
·
verified ·
1 Parent(s): d22a44b

Update hallucination_detective.py

Browse files
Files changed (1) hide show
  1. hallucination_detective.py +26 -17
hallucination_detective.py CHANGED
@@ -1,47 +1,56 @@
1
  import logging
2
- from typing import Dict, Any
3
  from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
4
  from ai_event import AIEvent
 
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
  class HallucinationDetectiveAgent(BaseAgent):
9
- """Detects hallucinations using confidence, perplexity, and consistency checks."""
10
- def __init__(self):
11
  super().__init__(AgentSpecialization.DETECTIVE)
12
  self._thresholds = {
13
  'confidence': 0.7,
14
- 'perplexity': 10.0,
15
- 'retrieval_similarity': 0.6
16
  }
 
17
 
18
  async def analyze(self, event: AIEvent) -> Dict[str, Any]:
19
  try:
20
  flags = []
21
- score = 1.0
 
 
22
  if event.confidence < self._thresholds['confidence']:
23
  flags.append('low_confidence')
24
- score *= 0.5
25
- if event.perplexity and event.perplexity > self._thresholds['perplexity']:
26
- flags.append('high_perplexity')
27
- score *= 0.6
28
- if event.retrieval_scores and max(event.retrieval_scores) < self._thresholds['retrieval_similarity']:
29
- flags.append('low_retrieval_relevance')
30
- score *= 0.7
 
 
 
 
31
 
32
  is_hallucination = len(flags) > 0
33
  return {
34
  'specialization': 'ai_hallucination',
35
- 'confidence': 1 - score if is_hallucination else 0,
36
  'findings': {
37
  'is_hallucination': is_hallucination,
38
  'flags': flags,
39
- 'risk_score': score
 
 
40
  },
41
  'recommendations': [
42
  "Regenerate with lower temperature",
43
- "Use a different model variant",
44
- "Provide more context"
45
  ] if is_hallucination else []
46
  }
47
  except Exception as e:
 
1
  import logging
2
+ from typing import Dict, Any, Optional
3
  from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
4
  from ai_event import AIEvent
5
+ from nli_detector import NLIDetector
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
  class HallucinationDetectiveAgent(BaseAgent):
10
+ """Detects hallucinations using confidence and NLI consistency."""
11
+ def __init__(self, nli_detector: Optional[NLIDetector] = None):
12
  super().__init__(AgentSpecialization.DETECTIVE)
13
  self._thresholds = {
14
  'confidence': 0.7,
15
+ 'entailment': 0.6
 
16
  }
17
+ self.nli = nli_detector or NLIDetector()
18
 
19
  async def analyze(self, event: AIEvent) -> Dict[str, Any]:
20
  try:
21
  flags = []
22
+ risk_score = 1.0
23
+
24
+ # 1. Check confidence
25
  if event.confidence < self._thresholds['confidence']:
26
  flags.append('low_confidence')
27
+ risk_score *= 0.5
28
+
29
+ # 2. Check NLI entailment (if available)
30
+ if event.prompt and event.response and self.nli.pipeline is not None:
31
+ entail_prob = self.nli.check(event.prompt, event.response)
32
+ if entail_prob is not None and entail_prob < self._thresholds['entailment']:
33
+ flags.append('low_entailment')
34
+ risk_score *= 0.6
35
+ else:
36
+ # No NLI, so just use confidence
37
+ pass
38
 
39
  is_hallucination = len(flags) > 0
40
  return {
41
  'specialization': 'ai_hallucination',
42
+ 'confidence': 1 - risk_score if is_hallucination else 0,
43
  'findings': {
44
  'is_hallucination': is_hallucination,
45
  'flags': flags,
46
+ 'risk_score': risk_score,
47
+ 'confidence': event.confidence,
48
+ 'entailment': entail_prob if 'entail_prob' in locals() else None
49
  },
50
  'recommendations': [
51
  "Regenerate with lower temperature",
52
+ "Provide more context",
53
+ "Use a different model"
54
  ] if is_hallucination else []
55
  }
56
  except Exception as e: