Spaces:
Sleeping
Sleeping
| """ | |
| Stability Tester Module | |
| Tests if understanding holds under reformulation and stress testing | |
| """ | |
| from typing import List, Dict, Optional | |
| import os | |
| import requests | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| class StabilityTester: | |
| def __init__(self): | |
| self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY') | |
| self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" | |
| self._ready = True | |
| def is_ready(self) -> bool: | |
| return self._ready | |
| async def test_stability( | |
| self, | |
| concept: str, | |
| original_explanation: str, | |
| claims: List[Dict] | |
| ) -> Dict: | |
| """ | |
| Test explanation stability through reformulation | |
| Strategy: | |
| 1. Generate re-prompts asking user to explain differently | |
| 2. Simulate alternative explanations (or use original for drift) | |
| 3. Measure semantic drift from original | |
| 4. Identify claims that become unclear/contradictory | |
| Returns: | |
| { | |
| 'stability_score': float (0-100), | |
| 'drift_scores': Dict[str, float], | |
| 'unstable_claims': List[Dict], | |
| 'stress_test_results': List[Dict] | |
| } | |
| """ | |
| # Generate stress test prompts | |
| stress_prompts = self._generate_stress_prompts(concept) | |
| # For demo, analyze stability of original explanation | |
| # In production, would actually re-prompt user or use LLM to generate alternatives | |
| original_embedding = self.embedding_model.encode(original_explanation) | |
| # Test claim stability | |
| unstable_claims = [] | |
| claim_drift_scores = {} | |
| for claim in claims: | |
| # Check if claim relies on other claims | |
| stability = await self._test_claim_stability( | |
| claim=claim, | |
| concept=concept, | |
| all_claims=claims | |
| ) | |
| claim_drift_scores[claim['id']] = stability['drift_score'] | |
| if stability['is_unstable']: | |
| unstable_claims.append({ | |
| 'claim': claim['text'], | |
| 'reason': stability['reason'], | |
| 'drift_score': stability['drift_score'] | |
| }) | |
| # Calculate overall stability score | |
| avg_drift = np.mean(list(claim_drift_scores.values())) if claim_drift_scores else 0.0 | |
| stability_score = max(0, 100 - (avg_drift * 100)) | |
| return { | |
| 'stability_score': stability_score, | |
| 'drift_scores': claim_drift_scores, | |
| 'unstable_claims': unstable_claims[:3], # Top 3 | |
| 'stress_test_results': [ | |
| { | |
| 'prompt': prompt, | |
| 'passes': len(unstable_claims) == 0 | |
| } | |
| for prompt in stress_prompts[:2] | |
| ] | |
| } | |
| def _generate_stress_prompts(self, concept: str) -> List[str]: | |
| """Generate stress test prompts""" | |
| return [ | |
| f"Explain {concept} in a different way", | |
| f"What would happen if {concept} didn't exist?", | |
| f"Explain {concept} to a 10-year-old", | |
| f"What are the limits or boundary conditions of {concept}?" | |
| ] | |
| async def _test_claim_stability( | |
| self, | |
| claim: Dict, | |
| concept: str, | |
| all_claims: List[Dict] | |
| ) -> Dict: | |
| """Test if a single claim is stable""" | |
| # Heuristic: claims that are very short or vague are unstable | |
| claim_text = claim['text'] | |
| word_count = len(claim_text.split()) | |
| # Very short claims (<5 words) are often unstable | |
| if word_count < 5: | |
| return { | |
| 'is_unstable': True, | |
| 'reason': 'Claim is too brief to demonstrate understanding', | |
| 'drift_score': 0.6 | |
| } | |
| # Check for vague language | |
| vague_terms = ['thing', 'stuff', 'kind of', 'sort of', 'basically', 'just', 'simply'] | |
| vague_count = sum(1 for term in vague_terms if term in claim_text.lower()) | |
| if vague_count >= 2: | |
| return { | |
| 'is_unstable': True, | |
| 'reason': 'Contains vague language suggesting surface understanding', | |
| 'drift_score': 0.5 | |
| } | |
| # Check if claim is standalone or depends on others | |
| # Claims that reference "this" or "that" without clear antecedent are unstable | |
| unclear_refs = ['this', 'that', 'it', 'these', 'those'] | |
| has_unclear_ref = any(claim_text.lower().startswith(ref + ' ') for ref in unclear_refs) | |
| if has_unclear_ref and len(all_claims) > 1: | |
| return { | |
| 'is_unstable': True, | |
| 'reason': 'Claim has unclear references and may not stand alone', | |
| 'drift_score': 0.4 | |
| } | |
| # Claim appears stable | |
| return { | |
| 'is_unstable': False, | |
| 'reason': 'Claim appears well-formed', | |
| 'drift_score': 0.1 | |
| } | |