Spaces:

A-R-F
/

Agentic-Reliability-Framework-v4

Runtime error

App Files Files Community

petter2025 commited on about 1 month ago

Commit

fa2c5e7

verified ·

1 Parent(s): d22a44b

Update hallucination_detective.py

Browse files

Files changed (1) hide show

hallucination_detective.py +26 -17

hallucination_detective.py CHANGED Viewed

@@ -1,47 +1,56 @@
 import logging
-from typing import Dict, Any
 from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
 from ai_event import AIEvent
 logger = logging.getLogger(__name__)
 class HallucinationDetectiveAgent(BaseAgent):
-    """Detects hallucinations using confidence, perplexity, and consistency checks."""
-    def __init__(self):
         super().__init__(AgentSpecialization.DETECTIVE)
         self._thresholds = {
             'confidence': 0.7,
-            'perplexity': 10.0,
-            'retrieval_similarity': 0.6
         }
     async def analyze(self, event: AIEvent) -> Dict[str, Any]:
         try:
             flags = []
-            score = 1.0
             if event.confidence < self._thresholds['confidence']:
                 flags.append('low_confidence')
-                score *= 0.5
-            if event.perplexity and event.perplexity > self._thresholds['perplexity']:
-                flags.append('high_perplexity')
-                score *= 0.6
-            if event.retrieval_scores and max(event.retrieval_scores) < self._thresholds['retrieval_similarity']:
-                flags.append('low_retrieval_relevance')
-                score *= 0.7
             is_hallucination = len(flags) > 0
             return {
                 'specialization': 'ai_hallucination',
-                'confidence': 1 - score if is_hallucination else 0,
                 'findings': {
                     'is_hallucination': is_hallucination,
                     'flags': flags,
-                    'risk_score': score
                 },
                 'recommendations': [
                     "Regenerate with lower temperature",
-                    "Use a different model variant",
-                    "Provide more context"
                 ] if is_hallucination else []
             }
         except Exception as e:

 import logging
+from typing import Dict, Any, Optional
 from agentic_reliability_framework.runtime.agents.base import BaseAgent, AgentSpecialization
 from ai_event import AIEvent
+from nli_detector import NLIDetector
 logger = logging.getLogger(__name__)
 class HallucinationDetectiveAgent(BaseAgent):
+    """Detects hallucinations using confidence and NLI consistency."""
+    def __init__(self, nli_detector: Optional[NLIDetector] = None):
         super().__init__(AgentSpecialization.DETECTIVE)
         self._thresholds = {
             'confidence': 0.7,
+            'entailment': 0.6
         }
+        self.nli = nli_detector or NLIDetector()
     async def analyze(self, event: AIEvent) -> Dict[str, Any]:
         try:
             flags = []
+            risk_score = 1.0
+            # 1. Check confidence
             if event.confidence < self._thresholds['confidence']:
                 flags.append('low_confidence')
+                risk_score *= 0.5
+            # 2. Check NLI entailment (if available)
+            if event.prompt and event.response and self.nli.pipeline is not None:
+                entail_prob = self.nli.check(event.prompt, event.response)
+                if entail_prob is not None and entail_prob < self._thresholds['entailment']:
+                    flags.append('low_entailment')
+                    risk_score *= 0.6
+            else:
+                # No NLI, so just use confidence
+                pass
             is_hallucination = len(flags) > 0
             return {
                 'specialization': 'ai_hallucination',
+                'confidence': 1 - risk_score if is_hallucination else 0,
                 'findings': {
                     'is_hallucination': is_hallucination,
                     'flags': flags,
+                    'risk_score': risk_score,
+                    'confidence': event.confidence,
+                    'entailment': entail_prob if 'entail_prob' in locals() else None
                 },
                 'recommendations': [
                     "Regenerate with lower temperature",
+                    "Provide more context",
+                    "Use a different model"
                 ] if is_hallucination else []
             }
         except Exception as e: