Spaces:
Sleeping
Sleeping
| """ | |
| Consistency Checker Module | |
| Uses NLI models to detect logical contradictions and inconsistencies | |
| """ | |
| from typing import List, Dict | |
| from transformers import pipeline | |
| import itertools | |
| class ConsistencyChecker: | |
| def __init__(self): | |
| try: | |
| # Using smaller model for speed - might upgrade to base later | |
| self.nli_model = pipeline( | |
| "text-classification", | |
| model="microsoft/deberta-v3-xsmall", | |
| device=-1 # CPU only for now | |
| ) | |
| self._ready = True | |
| except Exception as e: | |
| print(f"NLI model initialization error: {e}") | |
| self._ready = False | |
| self.nli_model = None | |
| def is_ready(self) -> bool: | |
| return self._ready | |
| async def check_consistency(self, claims: List[Dict]) -> Dict: | |
| """ | |
| Check logical consistency between claims using NLI | |
| Returns: | |
| { | |
| 'consistency_score': float (0-100), | |
| 'contradictions': List[Dict], | |
| 'circular_definitions': List[Dict], | |
| 'entailment_failures': List[Dict] | |
| } | |
| """ | |
| if not claims or len(claims) < 2: | |
| return { | |
| 'consistency_score': 100.0, | |
| 'contradictions': [], | |
| 'circular_definitions': [], | |
| 'entailment_failures': [] | |
| } | |
| contradictions = [] | |
| circular_refs = [] | |
| # Check pairwise consistency | |
| claim_texts = [claim['text'] for claim in claims] | |
| for i, j in itertools.combinations(range(len(claim_texts)), 2): | |
| claim1 = claim_texts[i] | |
| claim2 = claim_texts[j] | |
| # Check for contradiction | |
| if self._ready and self.nli_model: | |
| try: | |
| relation = self._check_entailment(claim1, claim2) | |
| if relation == 'contradiction': | |
| contradictions.append({ | |
| 'claim1': claim1, | |
| 'claim2': claim2, | |
| 'confidence': 0.85, | |
| 'suggestion': 'These statements appear to contradict each other. Review the logical relationship.' | |
| }) | |
| except Exception as e: | |
| print(f"NLI check error: {e}") | |
| # Check for circular definitions (simple heuristic) | |
| if self._is_circular(claim1, claim2): | |
| circular_refs.append({ | |
| 'claim1': claim1, | |
| 'claim2': claim2 | |
| }) | |
| # Calculate consistency score | |
| # TODO: might need to adjust penalty weights based on user feedback | |
| total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2))) | |
| issues = len(contradictions) + len(circular_refs) | |
| consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100) | |
| return { | |
| 'consistency_score': consistency_score, | |
| 'contradictions': contradictions[:5], # Limit to top 5 | |
| 'circular_definitions': circular_refs[:3], | |
| 'entailment_failures': [] | |
| } | |
| def _check_entailment(self, premise: str, hypothesis: str) -> str: | |
| """Check logical relationship between two statements""" | |
| if not self.nli_model: | |
| return 'neutral' | |
| try: | |
| # Prepare input for NLI model | |
| result = self.nli_model(f"{premise} [SEP] {hypothesis}") | |
| # Map label to relationship | |
| label = result[0]['label'].lower() | |
| if 'contradiction' in label or 'contradict' in label: | |
| return 'contradiction' | |
| elif 'entailment' in label or 'entail' in label: | |
| return 'entailment' | |
| else: | |
| return 'neutral' | |
| except Exception as e: | |
| print(f"Entailment check error: {e}") | |
| return 'neutral' | |
| def _is_circular(self, claim1: str, claim2: str) -> bool: | |
| """Simple heuristic to detect circular definitions""" | |
| # This is pretty basic - just checks word overlap | |
| # Works okay for most cases but could be improved | |
| words1 = set(claim1.lower().split()) | |
| words2 = set(claim2.lower().split()) | |
| # Remove common words | |
| stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', | |
| 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', | |
| 'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'} | |
| words1 = words1 - stopwords | |
| words2 = words2 - stopwords | |
| # Check for high overlap (potential circular definition) | |
| if len(words1) > 2 and len(words2) > 2: | |
| overlap = len(words1 & words2) | |
| return overlap >= min(len(words1), len(words2)) * 0.7 | |
| return False | |