Spaces:

A-R-F
/

Agentic-Reliability-Framework-v4

Running

App Files Files Community

petter2025 commited on Feb 28

Commit

f6265f6

verified ·

1 Parent(s): 48e9ee1

Update hallucination_detective.py

Browse files

Files changed (1) hide show

hallucination_detective.py +31 -8

hallucination_detective.py CHANGED Viewed

@@ -7,19 +7,39 @@ from nli_detector import NLIDetector
 logger = logging.getLogger(__name__)
 class HallucinationDetectiveAgent(BaseAgent):
-    """Detects hallucinations using confidence and NLI consistency."""
     def __init__(self, nli_detector: Optional[NLIDetector] = None):
         super().__init__(AgentSpecialization.DETECTIVE)
         self._thresholds = {
-            'confidence': 0.7,
-            'entailment': 0.6
         }
         self.nli = nli_detector or NLIDetector()
     async def analyze(self, event: AIEvent) -> Dict[str, Any]:
         try:
             flags = []
             risk_score = 1.0
             # 1. Check confidence
             if event.confidence < self._thresholds['confidence']:
@@ -32,11 +52,9 @@ class HallucinationDetectiveAgent(BaseAgent):
                 if entail_prob is not None and entail_prob < self._thresholds['entailment']:
                     flags.append('low_entailment')
                     risk_score *= 0.6
-            else:
-                # No NLI, so just use confidence
-                pass
             is_hallucination = len(flags) > 0
             return {
                 'specialization': 'ai_hallucination',
                 'confidence': 1 - risk_score if is_hallucination else 0,
@@ -45,7 +63,7 @@ class HallucinationDetectiveAgent(BaseAgent):
                     'flags': flags,
                     'risk_score': risk_score,
                     'confidence': event.confidence,
-                    'entailment': entail_prob if 'entail_prob' in locals() else None
                 },
                 'recommendations': [
                     "Regenerate with lower temperature",
@@ -55,4 +73,9 @@ class HallucinationDetectiveAgent(BaseAgent):
             }
         except Exception as e:
             logger.error(f"HallucinationDetective error: {e}", exc_info=True)
-            return {'specialization': 'ai_hallucination', 'confidence': 0.0, 'findings': {}, 'recommendations': []}

 logger = logging.getLogger(__name__)
 class HallucinationDetectiveAgent(BaseAgent):
+    """
+    Detects potential hallucinations in generated text by combining:
+    - Model confidence score (lower confidence → higher risk)
+    - Natural Language Inference (NLI) entailment score (lower entailment → higher risk)
+    """
     def __init__(self, nli_detector: Optional[NLIDetector] = None):
         super().__init__(AgentSpecialization.DETECTIVE)
+        # Thresholds for flagging – can be overridden by subclass or config
         self._thresholds = {
+            'confidence': 0.7,    # below this → low confidence
+            'entailment': 0.6     # below this → low entailment (possible hallucination)
         }
         self.nli = nli_detector or NLIDetector()
     async def analyze(self, event: AIEvent) -> Dict[str, Any]:
+        """
+        Analyze an AIEvent and return hallucination risk assessment.
+        Args:
+            event: AIEvent containing prompt, response, and confidence.
+        Returns:
+            Dictionary with keys:
+                - specialization: str
+                - confidence: float (0‑1, where higher means more likely hallucination)
+                - findings: dict with detailed flags
+                - recommendations: list of strings
+        """
         try:
             flags = []
             risk_score = 1.0
+            entail_prob = None
             # 1. Check confidence
             if event.confidence < self._thresholds['confidence']:
                 if entail_prob is not None and entail_prob < self._thresholds['entailment']:
                     flags.append('low_entailment')
                     risk_score *= 0.6
             is_hallucination = len(flags) > 0
             return {
                 'specialization': 'ai_hallucination',
                 'confidence': 1 - risk_score if is_hallucination else 0,
                     'flags': flags,
                     'risk_score': risk_score,
                     'confidence': event.confidence,
+                    'entailment': entail_prob
                 },
                 'recommendations': [
                     "Regenerate with lower temperature",
             }
         except Exception as e:
             logger.error(f"HallucinationDetective error: {e}", exc_info=True)
+            return {
+                'specialization': 'ai_hallucination',
+                'confidence': 0.0,
+                'findings': {},
+                'recommendations': []
+            }