File size: 5,087 Bytes
8176754
 
 
 
 
 
 
 
 
 
 
 
892d4dd
8176754
 
892d4dd
 
8176754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892d4dd
8176754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
892d4dd
 
8176754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
Consistency Checker Module
Uses NLI models to detect logical contradictions and inconsistencies
"""

from typing import List, Dict
from transformers import pipeline
import itertools

class ConsistencyChecker:
    def __init__(self):
        try:
            # Using smaller model for speed - might upgrade to base later
            self.nli_model = pipeline(
                "text-classification",
                model="microsoft/deberta-v3-xsmall",
                device=-1  # CPU only for now
            )
            self._ready = True
        except Exception as e:
            print(f"NLI model initialization error: {e}")
            self._ready = False
            self.nli_model = None
    
    def is_ready(self) -> bool:
        return self._ready
    
    async def check_consistency(self, claims: List[Dict]) -> Dict:
        """
        Check logical consistency between claims using NLI
        
        Returns:
            {
                'consistency_score': float (0-100),
                'contradictions': List[Dict],
                'circular_definitions': List[Dict],
                'entailment_failures': List[Dict]
            }
        """
        if not claims or len(claims) < 2:
            return {
                'consistency_score': 100.0,
                'contradictions': [],
                'circular_definitions': [],
                'entailment_failures': []
            }
        
        contradictions = []
        circular_refs = []
        
        # Check pairwise consistency
        claim_texts = [claim['text'] for claim in claims]
        
        for i, j in itertools.combinations(range(len(claim_texts)), 2):
            claim1 = claim_texts[i]
            claim2 = claim_texts[j]
            
            # Check for contradiction
            if self._ready and self.nli_model:
                try:
                    relation = self._check_entailment(claim1, claim2)
                    
                    if relation == 'contradiction':
                        contradictions.append({
                            'claim1': claim1,
                            'claim2': claim2,
                            'confidence': 0.85,
                            'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
                        })
                except Exception as e:
                    print(f"NLI check error: {e}")
            
            # Check for circular definitions (simple heuristic)
            if self._is_circular(claim1, claim2):
                circular_refs.append({
                    'claim1': claim1,
                    'claim2': claim2
                })
        
        # Calculate consistency score
        # TODO: might need to adjust penalty weights based on user feedback
        total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
        issues = len(contradictions) + len(circular_refs)
        consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
        
        return {
            'consistency_score': consistency_score,
            'contradictions': contradictions[:5],  # Limit to top 5
            'circular_definitions': circular_refs[:3],
            'entailment_failures': []
        }
    
    def _check_entailment(self, premise: str, hypothesis: str) -> str:
        """Check logical relationship between two statements"""
        if not self.nli_model:
            return 'neutral'
        
        try:
            # Prepare input for NLI model
            result = self.nli_model(f"{premise} [SEP] {hypothesis}")
            
            # Map label to relationship
            label = result[0]['label'].lower()
            
            if 'contradiction' in label or 'contradict' in label:
                return 'contradiction'
            elif 'entailment' in label or 'entail' in label:
                return 'entailment'
            else:
                return 'neutral'
        except Exception as e:
            print(f"Entailment check error: {e}")
            return 'neutral'
    
    def _is_circular(self, claim1: str, claim2: str) -> bool:
        """Simple heuristic to detect circular definitions"""
        # This is pretty basic - just checks word overlap
        # Works okay for most cases but could be improved
        words1 = set(claim1.lower().split())
        words2 = set(claim2.lower().split())
        
        # Remove common words
        stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 
                     'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 
                     'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}
        
        words1 = words1 - stopwords
        words2 = words2 - stopwords
        
        # Check for high overlap (potential circular definition)
        if len(words1) > 2 and len(words2) > 2:
            overlap = len(words1 & words2)
            return overlap >= min(len(words1), len(words2)) * 0.7
        
        return False