Spaces:

A-R-F
/

Agentic-Reliability-Framework-v4

Running

App Files Files Community

petter2025 commited on Feb 28

Commit

d22a44b

verified ·

1 Parent(s): 550ce84

Create nli_detector.py

Browse files

Files changed (1) hide show

nli_detector.py +46 -0

nli_detector.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+Natural Language Inference detector – checks if generated response is consistent with input.
+"""
+import logging
+from typing import Optional
+import torch
+from transformers import pipeline
+logger = logging.getLogger(__name__)
+class NLIDetector:
+    """Uses an NLI model to detect contradictions/hallucinations."""
+    def __init__(self, model_name: str = "typeform/distilroberta-base-mnli"):
+        try:
+            self.pipeline = pipeline(
+                "text-classification",
+                model=model_name,
+                device=0 if torch.cuda.is_available() else -1
+            )
+            logger.info(f"NLI model {model_name} loaded.")
+        except Exception as e:
+            logger.error(f"Failed to load NLI model: {e}")
+            self.pipeline = None
+    def check(self, premise: str, hypothesis: str) -> Optional[float]:
+        """
+        Returns probability of entailment (higher means more consistent).
+        """
+        if self.pipeline is None:
+            return None
+        try:
+            result = self.pipeline(f"{premise} </s></s> {hypothesis}")[0]
+            # The model outputs label and score
+            if result['label'] == 'ENTAILMENT':
+                return result['score']
+            else:
+                # For contradiction/neutral, return 1 - score? Better to return entailment probability directly.
+                # Some models give 'CONTRADICTION' and 'NEUTRAL' – we can treat as low consistency.
+                # We'll use the score of the entailment class if present, else 0.
+                # But the pipeline might return only the top label. Let's get probabilities for all labels.
+                # This is more complex. For simplicity, we'll assume the model gives entailment score.
+                # In practice, we'd use a dedicated NLI model that returns probabilities.
+                return 0.0
+        except Exception as e:
+            logger.error(f"NLI error: {e}")
+            return None