Spaces:

tawhidbinomar
/

conceptvector

Sleeping

File size: 5,087 Bytes

"""
Consistency Checker Module
Uses NLI models to detect logical contradictions and inconsistencies
"""

from typing import List, Dict
from transformers import pipeline
import itertools

class ConsistencyChecker:
    def __init__(self):
        try:
            # Using smaller model for speed - might upgrade to base later
            self.nli_model = pipeline(
                "text-classification",
                model="microsoft/deberta-v3-xsmall",
                device=-1  # CPU only for now
            )
            self._ready = True
        except Exception as e:
            print(f"NLI model initialization error: {e}")
            self._ready = False
            self.nli_model = None
    
    def is_ready(self) -> bool:
        return self._ready
    
    async def check_consistency(self, claims: List[Dict]) -> Dict:
        """
        Check logical consistency between claims using NLI
        
        Returns:
            {
                'consistency_score': float (0-100),
                'contradictions': List[Dict],
                'circular_definitions': List[Dict],
                'entailment_failures': List[Dict]
            }
        """
        if not claims or len(claims) < 2:
            return {
                'consistency_score': 100.0,
                'contradictions': [],
                'circular_definitions': [],
                'entailment_failures': []
            }
        
        contradictions = []
        circular_refs = []
        
        # Check pairwise consistency
        claim_texts = [claim['text'] for claim in claims]
        
        for i, j in itertools.combinations(range(len(claim_texts)), 2):
            claim1 = claim_texts[i]
            claim2 = claim_texts[j]
            
            # Check for contradiction
            if self._ready and self.nli_model:
                try:
                    relation = self._check_entailment(claim1, claim2)
                    
                    if relation == 'contradiction':
                        contradictions.append({
                            'claim1': claim1,
                            'claim2': claim2,
                            'confidence': 0.85,
                            'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
                        })
                except Exception as e:
                    print(f"NLI check error: {e}")
            
            # Check for circular definitions (simple heuristic)
            if self._is_circular(claim1, claim2):
                circular_refs.append({
                    'claim1': claim1,
                    'claim2': claim2
                })
        
        # Calculate consistency score
        # TODO: might need to adjust penalty weights based on user feedback
        total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
        issues = len(contradictions) + len(circular_refs)
        consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
        
        return {
            'consistency_score': consistency_score,
            'contradictions': contradictions[:5],  # Limit to top 5
            'circular_definitions': circular_refs[:3],
            'entailment_failures': []
        }
    
    def _check_entailment(self, premise: str, hypothesis: str) -> str:
        """Check logical relationship between two statements"""
        if not self.nli_model:
            return 'neutral'
        
        try:
            # Prepare input for NLI model
            result = self.nli_model(f"{premise} [SEP] {hypothesis}")
            
            # Map label to relationship
            label = result[0]['label'].lower()
            
            if 'contradiction' in label or 'contradict' in label:
                return 'contradiction'
            elif 'entailment' in label or 'entail' in label:
                return 'entailment'
            else:
                return 'neutral'
        except Exception as e:
            print(f"Entailment check error: {e}")
            return 'neutral'
    
    def _is_circular(self, claim1: str, claim2: str) -> bool:
        """Simple heuristic to detect circular definitions"""
        # This is pretty basic - just checks word overlap
        # Works okay for most cases but could be improved
        words1 = set(claim1.lower().split())
        words2 = set(claim2.lower().split())
        
        # Remove common words
        stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 
                     'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 
                     'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}
        
        words1 = words1 - stopwords
        words2 = words2 - stopwords
        
        # Check for high overlap (potential circular definition)
        if len(words1) > 2 and len(words2) > 2:
            overlap = len(words1 & words2)
            return overlap >= min(len(words1), len(words2)) * 0.7
        
        return False