Spaces:

A-R-F
/

Agentic-Reliability-Framework-v4

Running

App Files Files Community

petter2025 commited on Feb 28

Commit

141f2db

verified ·

1 Parent(s): 162cb4f

Update nli_detector.py

Browse files

Files changed (1) hide show

nli_detector.py +33 -15

nli_detector.py CHANGED Viewed

@@ -9,15 +9,26 @@ from transformers import pipeline
 logger = logging.getLogger(__name__)
 class NLIDetector:
-    """Uses an NLI model to detect contradictions/hallucinations."""
     def __init__(self, model_name: str = "typeform/distilroberta-base-mnli"):
         try:
             self.pipeline = pipeline(
                 "text-classification",
                 model=model_name,
-                device=0 if torch.cuda.is_available() else -1
             )
-            logger.info(f"NLI model {model_name} loaded.")
         except Exception as e:
             logger.error(f"Failed to load NLI model: {e}")
             self.pipeline = None
@@ -25,22 +36,29 @@ class NLIDetector:
     def check(self, premise: str, hypothesis: str) -> Optional[float]:
         """
         Returns probability of entailment (higher means more consistent).
         """
         if self.pipeline is None:
             return None
         try:
-            result = self.pipeline(f"{premise} </s></s> {hypothesis}")[0]
-            # The model outputs label and score
-            if result['label'] == 'ENTAILMENT':
-                return result['score']
-            else:
-                # For contradiction/neutral, return 1 - score? Better to return entailment probability directly.
-                # Some models give 'CONTRADICTION' and 'NEUTRAL' – we can treat as low consistency.
-                # We'll use the score of the entailment class if present, else 0.
-                # But the pipeline might return only the top label. Let's get probabilities for all labels.
-                # This is more complex. For simplicity, we'll assume the model gives entailment score.
-                # In practice, we'd use a dedicated NLI model that returns probabilities.
-                return 0.0
         except Exception as e:
             logger.error(f"NLI error: {e}")
             return None

 logger = logging.getLogger(__name__)
 class NLIDetector:
+    """
+    Uses an NLI model to detect contradictions/hallucinations.
+    Returns entailment probability (0 to 1) for a given premise‑hypothesis pair.
+    """
     def __init__(self, model_name: str = "typeform/distilroberta-base-mnli"):
+        """
+        Args:
+            model_name: Hugging Face model identifier for NLI.
+        """
         try:
+            # Request all scores to obtain probabilities for each class.
+            # The pipeline returns a list of lists of dicts: each dict has 'label' and 'score'.
             self.pipeline = pipeline(
                 "text-classification",
                 model=model_name,
+                device=0 if torch.cuda.is_available() else -1,
+                return_all_scores=True
             )
+            logger.info(f"NLI model {model_name} loaded with return_all_scores=True.")
         except Exception as e:
             logger.error(f"Failed to load NLI model: {e}")
             self.pipeline = None
     def check(self, premise: str, hypothesis: str) -> Optional[float]:
         """
         Returns probability of entailment (higher means more consistent).
+        Args:
+            premise: The original input/context.
+            hypothesis: The generated response.
+        Returns:
+            Float between 0 and 1, or None if model unavailable.
         """
         if self.pipeline is None:
             return None
         try:
+            # For a single input, the pipeline returns a list containing one element,
+            # which is itself a list of class-score dicts.
+            result = self.pipeline(f"{premise} </s></s> {hypothesis}")
+            # result[0] is the list of scores for all classes.
+            scores = result[0]
+            # Find the score corresponding to 'ENTAILMENT' (typical label for this model).
+            for item in scores:
+                if item['label'] == 'ENTAILMENT':
+                    return item['score']
+            # If the label is not found (should not happen), fall back to 0.0.
+            logger.warning("ENTAILMENT label not found in NLI output; returning 0.0.")
+            return 0.0
         except Exception as e:
             logger.error(f"NLI error: {e}")
             return None