conceptvector / analysis /stability_tester.py
Tawhid Bin Omar
Initial deployment of RealityCheck AI backend
8176754
"""
Stability Tester Module
Tests if understanding holds under reformulation and stress testing
"""
from typing import List, Dict, Optional
import os
import requests
import numpy as np
from sentence_transformers import SentenceTransformer
class StabilityTester:
def __init__(self):
self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
self._ready = True
def is_ready(self) -> bool:
return self._ready
async def test_stability(
self,
concept: str,
original_explanation: str,
claims: List[Dict]
) -> Dict:
"""
Test explanation stability through reformulation
Strategy:
1. Generate re-prompts asking user to explain differently
2. Simulate alternative explanations (or use original for drift)
3. Measure semantic drift from original
4. Identify claims that become unclear/contradictory
Returns:
{
'stability_score': float (0-100),
'drift_scores': Dict[str, float],
'unstable_claims': List[Dict],
'stress_test_results': List[Dict]
}
"""
# Generate stress test prompts
stress_prompts = self._generate_stress_prompts(concept)
# For demo, analyze stability of original explanation
# In production, would actually re-prompt user or use LLM to generate alternatives
original_embedding = self.embedding_model.encode(original_explanation)
# Test claim stability
unstable_claims = []
claim_drift_scores = {}
for claim in claims:
# Check if claim relies on other claims
stability = await self._test_claim_stability(
claim=claim,
concept=concept,
all_claims=claims
)
claim_drift_scores[claim['id']] = stability['drift_score']
if stability['is_unstable']:
unstable_claims.append({
'claim': claim['text'],
'reason': stability['reason'],
'drift_score': stability['drift_score']
})
# Calculate overall stability score
avg_drift = np.mean(list(claim_drift_scores.values())) if claim_drift_scores else 0.0
stability_score = max(0, 100 - (avg_drift * 100))
return {
'stability_score': stability_score,
'drift_scores': claim_drift_scores,
'unstable_claims': unstable_claims[:3], # Top 3
'stress_test_results': [
{
'prompt': prompt,
'passes': len(unstable_claims) == 0
}
for prompt in stress_prompts[:2]
]
}
def _generate_stress_prompts(self, concept: str) -> List[str]:
"""Generate stress test prompts"""
return [
f"Explain {concept} in a different way",
f"What would happen if {concept} didn't exist?",
f"Explain {concept} to a 10-year-old",
f"What are the limits or boundary conditions of {concept}?"
]
async def _test_claim_stability(
self,
claim: Dict,
concept: str,
all_claims: List[Dict]
) -> Dict:
"""Test if a single claim is stable"""
# Heuristic: claims that are very short or vague are unstable
claim_text = claim['text']
word_count = len(claim_text.split())
# Very short claims (<5 words) are often unstable
if word_count < 5:
return {
'is_unstable': True,
'reason': 'Claim is too brief to demonstrate understanding',
'drift_score': 0.6
}
# Check for vague language
vague_terms = ['thing', 'stuff', 'kind of', 'sort of', 'basically', 'just', 'simply']
vague_count = sum(1 for term in vague_terms if term in claim_text.lower())
if vague_count >= 2:
return {
'is_unstable': True,
'reason': 'Contains vague language suggesting surface understanding',
'drift_score': 0.5
}
# Check if claim is standalone or depends on others
# Claims that reference "this" or "that" without clear antecedent are unstable
unclear_refs = ['this', 'that', 'it', 'these', 'those']
has_unclear_ref = any(claim_text.lower().startswith(ref + ' ') for ref in unclear_refs)
if has_unclear_ref and len(all_claims) > 1:
return {
'is_unstable': True,
'reason': 'Claim has unclear references and may not stand alone',
'drift_score': 0.4
}
# Claim appears stable
return {
'is_unstable': False,
'reason': 'Claim appears well-formed',
'drift_score': 0.1
}