Spaces:

tawhidbinomar
/

conceptvector

Sleeping

conceptvector / analysis /consistency_checker.py

Tawhid Bin Omar

cleaned up code comments and docs

892d4dd 3 months ago

5.09 kB

	"""
	Consistency Checker Module
	Uses NLI models to detect logical contradictions and inconsistencies
	"""

	from typing import List, Dict
	from transformers import pipeline
	import itertools

	class ConsistencyChecker:
	def __init__(self):
	try:
	# Using smaller model for speed - might upgrade to base later
	self.nli_model = pipeline(
	"text-classification",
	model="microsoft/deberta-v3-xsmall",
	device=-1 # CPU only for now
	)
	self._ready = True
	except Exception as e:
	print(f"NLI model initialization error: {e}")
	self._ready = False
	self.nli_model = None

	def is_ready(self) -> bool:
	return self._ready

	async def check_consistency(self, claims: List[Dict]) -> Dict:
	"""
	Check logical consistency between claims using NLI

	Returns:
	{
	'consistency_score': float (0-100),
	'contradictions': List[Dict],
	'circular_definitions': List[Dict],
	'entailment_failures': List[Dict]
	}
	"""
	if not claims or len(claims) < 2:
	return {
	'consistency_score': 100.0,
	'contradictions': [],
	'circular_definitions': [],
	'entailment_failures': []
	}

	contradictions = []
	circular_refs = []

	# Check pairwise consistency
	claim_texts = [claim['text'] for claim in claims]

	for i, j in itertools.combinations(range(len(claim_texts)), 2):
	claim1 = claim_texts[i]
	claim2 = claim_texts[j]

	# Check for contradiction
	if self._ready and self.nli_model:
	try:
	relation = self._check_entailment(claim1, claim2)

	if relation == 'contradiction':
	contradictions.append({
	'claim1': claim1,
	'claim2': claim2,
	'confidence': 0.85,
	'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
	})
	except Exception as e:
	print(f"NLI check error: {e}")

	# Check for circular definitions (simple heuristic)
	if self._is_circular(claim1, claim2):
	circular_refs.append({
	'claim1': claim1,
	'claim2': claim2
	})

	# Calculate consistency score
	# TODO: might need to adjust penalty weights based on user feedback
	total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
	issues = len(contradictions) + len(circular_refs)
	consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)

	return {
	'consistency_score': consistency_score,
	'contradictions': contradictions[:5], # Limit to top 5
	'circular_definitions': circular_refs[:3],
	'entailment_failures': []
	}

	def _check_entailment(self, premise: str, hypothesis: str) -> str:
	"""Check logical relationship between two statements"""
	if not self.nli_model:
	return 'neutral'

	try:
	# Prepare input for NLI model
	result = self.nli_model(f"{premise} [SEP] {hypothesis}")

	# Map label to relationship
	label = result[0]['label'].lower()

	if 'contradiction' in label or 'contradict' in label:
	return 'contradiction'
	elif 'entailment' in label or 'entail' in label:
	return 'entailment'
	else:
	return 'neutral'
	except Exception as e:
	print(f"Entailment check error: {e}")
	return 'neutral'

	def _is_circular(self, claim1: str, claim2: str) -> bool:
	"""Simple heuristic to detect circular definitions"""
	# This is pretty basic - just checks word overlap
	# Works okay for most cases but could be improved
	words1 = set(claim1.lower().split())
	words2 = set(claim2.lower().split())

	# Remove common words
	stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
	'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
	'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}

	words1 = words1 - stopwords
	words2 = words2 - stopwords

	# Check for high overlap (potential circular definition)
	if len(words1) > 2 and len(words2) > 2:
	overlap = len(words1 & words2)
	return overlap >= min(len(words1), len(words2)) * 0.7

	return False