conceptvector / analysis /consistency_checker.py
Tawhid Bin Omar
cleaned up code comments and docs
892d4dd
"""
Consistency Checker Module
Uses NLI models to detect logical contradictions and inconsistencies
"""
from typing import List, Dict
from transformers import pipeline
import itertools
class ConsistencyChecker:
def __init__(self):
try:
# Using smaller model for speed - might upgrade to base later
self.nli_model = pipeline(
"text-classification",
model="microsoft/deberta-v3-xsmall",
device=-1 # CPU only for now
)
self._ready = True
except Exception as e:
print(f"NLI model initialization error: {e}")
self._ready = False
self.nli_model = None
def is_ready(self) -> bool:
return self._ready
async def check_consistency(self, claims: List[Dict]) -> Dict:
"""
Check logical consistency between claims using NLI
Returns:
{
'consistency_score': float (0-100),
'contradictions': List[Dict],
'circular_definitions': List[Dict],
'entailment_failures': List[Dict]
}
"""
if not claims or len(claims) < 2:
return {
'consistency_score': 100.0,
'contradictions': [],
'circular_definitions': [],
'entailment_failures': []
}
contradictions = []
circular_refs = []
# Check pairwise consistency
claim_texts = [claim['text'] for claim in claims]
for i, j in itertools.combinations(range(len(claim_texts)), 2):
claim1 = claim_texts[i]
claim2 = claim_texts[j]
# Check for contradiction
if self._ready and self.nli_model:
try:
relation = self._check_entailment(claim1, claim2)
if relation == 'contradiction':
contradictions.append({
'claim1': claim1,
'claim2': claim2,
'confidence': 0.85,
'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
})
except Exception as e:
print(f"NLI check error: {e}")
# Check for circular definitions (simple heuristic)
if self._is_circular(claim1, claim2):
circular_refs.append({
'claim1': claim1,
'claim2': claim2
})
# Calculate consistency score
# TODO: might need to adjust penalty weights based on user feedback
total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
issues = len(contradictions) + len(circular_refs)
consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
return {
'consistency_score': consistency_score,
'contradictions': contradictions[:5], # Limit to top 5
'circular_definitions': circular_refs[:3],
'entailment_failures': []
}
def _check_entailment(self, premise: str, hypothesis: str) -> str:
"""Check logical relationship between two statements"""
if not self.nli_model:
return 'neutral'
try:
# Prepare input for NLI model
result = self.nli_model(f"{premise} [SEP] {hypothesis}")
# Map label to relationship
label = result[0]['label'].lower()
if 'contradiction' in label or 'contradict' in label:
return 'contradiction'
elif 'entailment' in label or 'entail' in label:
return 'entailment'
else:
return 'neutral'
except Exception as e:
print(f"Entailment check error: {e}")
return 'neutral'
def _is_circular(self, claim1: str, claim2: str) -> bool:
"""Simple heuristic to detect circular definitions"""
# This is pretty basic - just checks word overlap
# Works okay for most cases but could be improved
words1 = set(claim1.lower().split())
words2 = set(claim2.lower().split())
# Remove common words
stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}
words1 = words1 - stopwords
words2 = words2 - stopwords
# Check for high overlap (potential circular definition)
if len(words1) > 2 and len(words2) > 2:
overlap = len(words1 & words2)
return overlap >= min(len(words1), len(words2)) * 0.7
return False