Spaces:

tawhidbinomar
/

conceptvector

Sleeping

App Files Files Community

Tawhid Bin Omar commited on Jan 11

Commit

8176754

0 Parent(s):

Initial deployment of RealityCheck AI backend

Browse files

Files changed (14) hide show

.env.example +1 -0
.gitignore +34 -0
Dockerfile +27 -0
README.md +46 -0
README_HF.md +35 -0
analysis/__init__.py +16 -0
analysis/claim_extractor.py +141 -0
analysis/consistency_checker.py +133 -0
analysis/coverage_analyzer.py +179 -0
analysis/graph_generator.py +200 -0
analysis/scorer.py +98 -0
analysis/stability_tester.py +146 -0
main.py +336 -0
requirements.txt +13 -0

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here

.gitignore ADDED Viewed

	@@ -0,0 +1,34 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+env/
+ENV/
+.venv
+# Environment
+.env
+.env.local
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Build
+dist/
+build/
+*.egg-info/

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.9-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Download models at build time
+RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
+RUN python -c "from transformers import pipeline; pipeline('text-classification', model='microsoft/deberta-v3-xsmall')"
+# Copy application code
+COPY . .
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+---
+title: RealityCheck AI Backend
+emoji: 🧠
+colorFrom: blue
+colorTo: indigo
+sdk: docker
+sdk_version: 3.9
+app_port: 7860
+pinned: false
+license: mit
+---
+# RealityCheck AI - Backend API
+Understanding analysis engine that evaluates how well someone understands a concept by analyzing their explanation.
+## What This Does
+- Extracts claims from explanations
+- Checks logical consistency
+- Analyzes concept coverage
+- Tests explanation stability
+- Returns understanding scores
+## API Endpoints
+- `POST /analyze` - Analyze user explanation
+- `GET /concepts` - Sample concepts list
+- `GET /health` - Health check
+## Setup
+This Space requires:
+- `HUGGINGFACE_API_KEY` in Settings → Repository secrets
+## Tech Stack
+- FastAPI (Python)
+- Sentence Transformers
+- Mistral-7B-Instruct (via API)
+- DeBERTa (NLI)
+## Links
+- Frontend: [Add your Netlify URL]
+- GitHub: [Add your repo URL]

README_HF.md ADDED Viewed

	@@ -0,0 +1,35 @@

+name: RealityCheck AI Backend
+emoji: 🧠
+colorFrom: blue
+colorTo: indigo
+sdk: docker
+pinned: false
+license: mit
+app_file: main.py
+# RealityCheck AI - Conceptual Understanding Diagnostic Engine
+This Space hosts the FastAPI backend for RealityCheck AI, a system that evaluates conceptual understanding through explanation analysis.
+## Features
+- Multi-signal understanding analysis
+- Pretrained AI models (no custom training)
+- Logical consistency checking
+- Concept coverage analysis
+- Stability testing
+## API Endpoints
+- `POST /analyze` - Analyze user explanation
+- `GET /concepts` - Get sample concepts
+- `GET /health` - Health check
+## Environment Variables
+Required:
+- `HUGGINGFACE_API_KEY` - Your Hugging Face API token
+## Usage
+See full documentation at: [GitHub Repository URL]

analysis/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Analysis module initialization
+from .claim_extractor import ClaimExtractor
+from .graph_generator import ConceptGraphGenerator
+from .consistency_checker import ConsistencyChecker
+from .coverage_analyzer import CoverageAnalyzer
+from .stability_tester import StabilityTester
+from .scorer import UnderstandingScorer
+__all__ = [
+    'ClaimExtractor',
+    'ConceptGraphGenerator',
+    'ConsistencyChecker',
+    'CoverageAnalyzer',
+    'StabilityTester',
+    'UnderstandingScorer'
+]

analysis/claim_extractor.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""
+Claim Extractor
+Breaks down user explanations into individual claims/statements
+"""
+from typing import List, Dict
+import os
+import requests
+from sentence_transformers import SentenceTransformer
+import json
+class ClaimExtractor:
+    def __init__(self):
+        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+        self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
+        self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+        self._ready = False
+        self._initialize()
+    def _initialize(self):
+        """Initialize models"""
+        try:
+            # Test embedding model - this takes a few seconds on first run
+            test_embedding = self.embedding_model.encode("test")
+            self._ready = True
+        except Exception as e:
+            print(f"Claim extractor initialization error: {e}")  # TODO: better error handling
+            self._ready = False
+    def is_ready(self) -> bool:
+        return self._ready
+    async def extract_claims(self, explanation: str) -> List[Dict[str, any]]:
+        """
+        Extract atomic claims from user explanation
+        Returns:
+            List of claims with metadata:
+            - text: the claim itself
+            - type: 'definition', 'causal', 'assumption', 'example'
+            - embedding: semantic vector
+            - confidence: extraction confidence
+        """
+        # Use LLM to extract structured claims
+        claims_raw = await self._llm_extract_claims(explanation)
+        # Add embeddings and metadata
+        claims = []
+        for i, claim_text in enumerate(claims_raw):
+            embedding = self.embedding_model.encode(claim_text)
+            claim_type = self._classify_claim_type(claim_text)
+            claims.append({
+                'id': f'claim_{i}',
+                'text': claim_text,
+                'type': claim_type,
+                'embedding': embedding.tolist(),
+                'confidence': 0.85  # Simplified for demo
+            })
+        return claims
+    async def _llm_extract_claims(self, explanation: str) -> List[str]:
+        """Use LLM to extract atomic claims"""
+        prompt = f"""<s>[INST] You are a precise claim extraction system. Break down the following explanation into atomic claims. Each claim should be a single, testable statement.
+Explanation: {explanation}
+Extract each claim on a new line, numbered. Focus on:
+1. Definitions (what things are)
+2. Causal relationships (X causes Y)
+3. Assumptions (implicit or explicit)
+4. Properties and characteristics
+Output only the numbered claims, nothing else. [/INST]"""
+        try:
+            headers = {"Authorization": f"Bearer {self.hf_api_key}"}
+            payload = {
+                "inputs": prompt,
+                "parameters": {
+                    "max_new_tokens": 500,
+                    "temperature": 0.3,
+                    "return_full_text": False
+                }
+            }
+            response = requests.post(self.llm_endpoint, headers=headers, json=payload, timeout=30)
+            if response.status_code == 200:
+                result = response.json()
+                text = result[0]['generated_text'] if isinstance(result, list) else result.get('generated_text', '')
+                # Parse numbered claims
+                claims = []
+                for line in text.split('\n'):
+                    line = line.strip()
+                    # Remove numbering like "1.", "2)", etc.
+                    if line and (line[0].isdigit() or line.startswith('-')):
+                        # Clean up the claim
+                        claim = line.lstrip('0123456789.-) ').strip()
+                        if claim:
+                            claims.append(claim)
+                return claims if claims else [explanation]  # Fallback to full explanation
+            else:
+                # Fallback: simple sentence splitting
+                return self._fallback_extraction(explanation)
+        except Exception as e:
+            print(f"LLM extraction error: {e}")
+            return self._fallback_extraction(explanation)
+    def _fallback_extraction(self, explanation: str) -> List[str]:
+        """Fallback: simple sentence-based extraction"""
+        import re
+        sentences = re.split(r'[.!?]+', explanation)
+        return [s.strip() for s in sentences if s.strip() and len(s.strip()) > 10]
+    def _classify_claim_type(self, claim: str) -> str:
+        """Classify claim type based on linguistic patterns"""
+        claim_lower = claim.lower()
+        # Definition patterns
+        if any(pattern in claim_lower for pattern in ['is a', 'is the', 'refers to', 'means', 'defined as']):
+            return 'definition'
+        # Causal patterns
+        elif any(pattern in claim_lower for pattern in ['causes', 'leads to', 'results in', 'because', 'therefore']):
+            return 'causal'
+        # Example patterns
+        elif any(pattern in claim_lower for pattern in ['for example', 'such as', 'like', 'instance']):
+            return 'example'
+        # Assumption patterns
+        elif any(pattern in claim_lower for pattern in ['assume', 'given that', 'suppose', 'if']):
+            return 'assumption'
+        else:
+            return 'statement'

analysis/consistency_checker.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""
+Consistency Checker Module
+Uses NLI models to detect logical contradictions and inconsistencies
+"""
+from typing import List, Dict
+from transformers import pipeline
+import itertools
+class ConsistencyChecker:
+    def __init__(self):
+        try:
+            # Use a smaller NLI model for faster inference
+            self.nli_model = pipeline(
+                "text-classification",
+                model="microsoft/deberta-v3-xsmall",  # Smaller, faster model
+                device=-1  # CPU
+            )
+            self._ready = True
+        except Exception as e:
+            print(f"NLI model initialization error: {e}")
+            self._ready = False
+            self.nli_model = None
+    def is_ready(self) -> bool:
+        return self._ready
+    async def check_consistency(self, claims: List[Dict]) -> Dict:
+        """
+        Check logical consistency between claims using NLI
+        Returns:
+            {
+                'consistency_score': float (0-100),
+                'contradictions': List[Dict],
+                'circular_definitions': List[Dict],
+                'entailment_failures': List[Dict]
+            }
+        """
+        if not claims or len(claims) < 2:
+            return {
+                'consistency_score': 100.0,
+                'contradictions': [],
+                'circular_definitions': [],
+                'entailment_failures': []
+            }
+        contradictions = []
+        circular_refs = []
+        # Check pairwise consistency
+        claim_texts = [claim['text'] for claim in claims]
+        for i, j in itertools.combinations(range(len(claim_texts)), 2):
+            claim1 = claim_texts[i]
+            claim2 = claim_texts[j]
+            # Check for contradiction
+            if self._ready and self.nli_model:
+                try:
+                    relation = self._check_entailment(claim1, claim2)
+                    if relation == 'contradiction':
+                        contradictions.append({
+                            'claim1': claim1,
+                            'claim2': claim2,
+                            'confidence': 0.85,
+                            'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
+                        })
+                except Exception as e:
+                    print(f"NLI check error: {e}")
+            # Check for circular definitions (simple heuristic)
+            if self._is_circular(claim1, claim2):
+                circular_refs.append({
+                    'claim1': claim1,
+                    'claim2': claim2
+                })
+        # Calculate consistency score
+        total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
+        issues = len(contradictions) + len(circular_refs)
+        consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
+        return {
+            'consistency_score': consistency_score,
+            'contradictions': contradictions[:5],  # Limit to top 5
+            'circular_definitions': circular_refs[:3],
+            'entailment_failures': []
+        }
+    def _check_entailment(self, premise: str, hypothesis: str) -> str:
+        """Check logical relationship between two statements"""
+        if not self.nli_model:
+            return 'neutral'
+        try:
+            # Prepare input for NLI model
+            result = self.nli_model(f"{premise} [SEP] {hypothesis}")
+            # Map label to relationship
+            label = result[0]['label'].lower()
+            if 'contradiction' in label or 'contradict' in label:
+                return 'contradiction'
+            elif 'entailment' in label or 'entail' in label:
+                return 'entailment'
+            else:
+                return 'neutral'
+        except Exception as e:
+            print(f"Entailment check error: {e}")
+            return 'neutral'
+    def _is_circular(self, claim1: str, claim2: str) -> bool:
+        """Simple heuristic to detect circular definitions"""
+        # Extract key terms (simple word-based approach)
+        words1 = set(claim1.lower().split())
+        words2 = set(claim2.lower().split())
+        # Remove common words
+        stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+                     'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
+                     'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}
+        words1 = words1 - stopwords
+        words2 = words2 - stopwords
+        # Check for high overlap (potential circular definition)
+        if len(words1) > 2 and len(words2) > 2:
+            overlap = len(words1 & words2)
+            return overlap >= min(len(words1), len(words2)) * 0.7
+        return False

analysis/coverage_analyzer.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""
+Coverage Analyzer Module
+Analyzes how well user explanation covers canonical concept graph
+"""
+from typing import List, Dict
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+class CoverageAnalyzer:
+    def __init__(self):
+        self._ready = True
+    def is_ready(self) -> bool:
+        return self._ready
+    async def analyze_coverage(
+        self,
+        user_claims: List[Dict],
+        canonical_graph: Dict,
+        explanation: str
+    ) -> Dict:
+        """
+        Analyze concept coverage by matching user claims to graph nodes
+        Returns:
+            {
+                'coverage_score': float (0-100),
+                'node_coverage': Dict[node_id, status],
+                'missing_concepts': List[Dict],
+                'weak_links': List[Dict],
+                'name_dropping': List[str]
+            }
+        """
+        # Extract embeddings from user claims
+        claim_embeddings = [claim['embedding'] for claim in user_claims]
+        claim_texts = [claim['text'] for claim in user_claims]
+        # Analyze coverage for each node
+        node_coverage = {}
+        missing_concepts = []
+        weak_links = []
+        for node in canonical_graph['nodes']:
+            node_id = node['id']
+            node_label = node['label']
+            # Check if concept is mentioned
+            coverage_status = self._check_node_coverage(
+                node_label=node_label,
+                claim_texts=claim_texts,
+                claim_embeddings=claim_embeddings,
+                explanation=explanation
+            )
+            node_coverage[node_id] = coverage_status
+            if coverage_status['status'] == 'missing':
+                severity = 'high' if node.get('type') == 'prerequisite' else 'medium'
+                missing_concepts.append({
+                    'concept': node_label,
+                    'severity': severity,
+                    'description': f"This is a key {'prerequisite' if node.get('type') == 'prerequisite' else 'component'} for understanding the concept."
+                })
+            elif coverage_status['status'] == 'weak':
+                weak_links.append({
+                    'concept': node_label,
+                    'user_quote': coverage_status.get('user_quote', ''),
+                    'suggestion': 'Explain the mechanism or relationship, not just mention the term.'
+                })
+        # Calculate coverage score
+        coverage_score = self._calculate_coverage_score(node_coverage, canonical_graph)
+        # Detect name-dropping (mentioned but not explained)
+        name_dropping = self._detect_name_dropping(claim_texts, node_coverage)
+        return {
+            'coverage_score': coverage_score,
+            'node_coverage': node_coverage,
+            'missing_concepts': missing_concepts,
+            'weak_links': weak_links,
+            'name_dropping': name_dropping
+        }
+    def _check_node_coverage(
+        self,
+        node_label: str,
+        claim_texts: List[str],
+        claim_embeddings: List[List[float]],
+        explanation: str
+    ) -> Dict:
+        """Check if and how well a concept node is covered"""
+        # Simple keyword matching first
+        node_lower = node_label.lower()
+        explanation_lower = explanation.lower()
+        # Check if mentioned at all
+        if node_lower not in explanation_lower:
+            return {
+                'status': 'missing',
+                'user_quote': None,
+                'coverage_strength': 0.0
+            }
+        # Find best matching claim via semantic similarity
+        # (In full implementation, would use actual embeddings of node_label)
+        best_match_idx = None
+        best_score = 0.0
+        for idx, claim_text in enumerate(claim_texts):
+            if node_lower in claim_text.lower():
+                # Simple heuristic: longer explanation = better coverage
+                coverage_strength = min(1.0, len(claim_text.split()) / 15.0)
+                if coverage_strength > best_score:
+                    best_score = coverage_strength
+                    best_match_idx = idx
+        if best_match_idx is not None:
+            user_quote = claim_texts[best_match_idx]
+            # Determine status based on coverage strength
+            if best_score > 0.6:
+                status = 'covered'
+            elif best_score > 0.2:
+                status = 'weak'
+            else:
+                status = 'missing'
+            return {
+                'status': status,
+                'user_quote': user_quote,
+                'coverage_strength': best_score
+            }
+        # Mentioned but not in any claim (name-dropping)
+        return {
+            'status': 'weak',
+            'user_quote': None,
+            'coverage_strength': 0.1
+        }
+    def _calculate_coverage_score(self, node_coverage: Dict, canonical_graph: Dict) -> float:
+        """Calculate overall coverage score"""
+        if not node_coverage:
+            return 0.0
+        # Weight by node importance
+        total_weight = 0.0
+        covered_weight = 0.0
+        for node in canonical_graph['nodes']:
+            node_id = node['id']
+            # Prerequisites are more important
+            weight = 2.0 if node.get('type') == 'prerequisite' else 1.0
+            total_weight += weight
+            coverage = node_coverage.get(node_id, {})
+            status = coverage.get('status', 'missing')
+            if status == 'covered':
+                covered_weight += weight
+            elif status == 'weak':
+                covered_weight += weight * 0.4
+        return (covered_weight / total_weight * 100) if total_weight > 0 else 0.0
+    def _detect_name_dropping(self, claim_texts: List[str], node_coverage: Dict) -> List[str]:
+        """Detect concepts that are mentioned but not explained"""
+        name_dropped = []
+        for node_id, coverage in node_coverage.items():
+            if coverage.get('coverage_strength', 0) < 0.3 and coverage.get('user_quote'):
+                # Mentioned but weakly explained
+                if coverage.get('user_quote'):
+                    name_dropped.append(coverage['user_quote'])
+        return name_dropped[:3]  # Limit to top 3

analysis/graph_generator.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+Concept Graph Generator
+Generates canonical concept dependency graphs for given concepts
+"""
+from typing import Dict, List
+import os
+import requests
+import json
+class ConceptGraphGenerator:
+    def __init__(self):
+        self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
+        self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+        self._ready = True
+    def is_ready(self) -> bool:
+        return self._ready
+    async def generate_graph(self, concept: str) -> Dict:
+        """
+        Generate canonical concept dependency graph
+        Returns:
+            {
+                'nodes': [{'id': str, 'label': str, 'level': int}],
+                'edges': [{'source': str, 'target': str, 'relationship': str}]
+            }
+        """
+        # Use LLM to generate concept structure
+        graph_structure = await self._llm_generate_structure(concept)
+        # Validate and format
+        return self._format_graph(graph_structure, concept)
+    async def _llm_generate_structure(self, concept: str) -> Dict:
+        """Use LLM to generate concept prerequisite structure"""
+        prompt = f"""<s>[INST] You are a concept structure expert. For the concept "{concept}", identify the core prerequisite concepts that must be understood first, and their relationships.
+Output a JSON structure with:
+1. "prerequisites": list of prerequisite concepts needed to understand {concept}
+2. "core_components": main parts/aspects of {concept} itself
+3. "relationships": how concepts connect (prerequisite, causal, etc.)
+Be precise and pedagogical. Focus on understanding order.
+Output only valid JSON, no other text. [/INST]"""
+        try:
+            headers = {"Authorization": f"Bearer {self.hf_api_key}"}
+            payload = {
+                "inputs": prompt,
+                "parameters": {
+                    "max_new_tokens": 800,
+                    "temperature": 0.4,
+                    "return_full_text": False
+                }
+            }
+            response = requests.post(self.llm_endpoint, headers=headers, json=payload, timeout=30)
+            if response.status_code == 200:
+                result = response.json()
+                text = result[0]['generated_text'] if isinstance(result, list) else result.get('generated_text', '')
+                # Try to parse JSON from response
+                try:
+                    # Extract JSON if wrapped in other text
+                    start = text.find('{')
+                    end = text.rfind('}') + 1
+                    if start != -1 and end > start:
+                        json_str = text[start:end]
+                        return json.loads(json_str)
+                except:
+                    pass
+                return self._fallback_graph(concept)
+            else:
+                return self._fallback_graph(concept)
+        except Exception as e:
+            print(f"Graph generation error: {e}")
+            return self._fallback_graph(concept)
+    def _fallback_graph(self, concept: str) -> Dict:
+        """Fallback: create a basic graph structure"""
+        # Predefined templates for common concepts
+        templates = {
+            'entropy': {
+                'prerequisites': ['energy', 'system states', 'probability'],
+                'core_components': ['disorder measure', 'thermodynamic entropy', 'information entropy'],
+                'relationships': [
+                    ('energy', 'entropy', 'prerequisite'),
+                    ('system states', 'entropy', 'prerequisite'),
+                    ('probability', 'entropy', 'prerequisite')
+                ]
+            },
+            'neural networks': {
+                'prerequisites': ['linear algebra', 'calculus', 'probability'],
+                'core_components': ['neurons', 'layers', 'weights', 'activation functions', 'backpropagation'],
+                'relationships': [
+                    ('linear algebra', 'neural networks', 'prerequisite'),
+                    ('neurons', 'layers', 'component'),
+                    ('weights', 'neurons', 'component'),
+                    ('backpropagation', 'weights', 'causal')
+                ]
+            },
+            'photosynthesis': {
+                'prerequisites': ['energy', 'chemical reactions', 'cells'],
+                'core_components': ['light reactions', 'dark reactions', 'chlorophyll', 'glucose production'],
+                'relationships': [
+                    ('energy', 'light reactions', 'prerequisite'),
+                    ('light reactions', 'dark reactions', 'causal'),
+                    ('dark reactions', 'glucose production', 'causal')
+                ]
+            }
+        }
+        # Check if concept matches template
+        concept_lower = concept.lower()
+        for key, template in templates.items():
+            if key in concept_lower:
+                return template
+        # Generic fallback
+        return {
+            'prerequisites': ['foundational knowledge'],
+            'core_components': [concept, f'{concept} principles', f'{concept} applications'],
+            'relationships': [
+                ('foundational knowledge', concept, 'prerequisite')
+            ]
+        }
+    def _format_graph(self, structure: Dict, concept: str) -> Dict:
+        """Format graph structure for frontend"""
+        nodes = []
+        edges = []
+        node_id = 0
+        node_map = {}
+        # Add prerequisite nodes
+        for prereq in structure.get('prerequisites', []):
+            node_map[prereq] = f'node_{node_id}'
+            nodes.append({
+                'id': f'node_{node_id}',
+                'label': prereq,
+                'level': 0,
+                'type': 'prerequisite'
+            })
+            node_id += 1
+        # Add main concept node
+        node_map[concept] = f'node_{node_id}'
+        nodes.append({
+            'id': f'node_{node_id}',
+            'label': concept,
+            'level': 1,
+            'type': 'main'
+        })
+        concept_node_id = f'node_{node_id}'
+        node_id += 1
+        # Add core component nodes
+        for component in structure.get('core_components', []):
+            node_map[component] = f'node_{node_id}'
+            nodes.append({
+                'id': f'node_{node_id}',
+                'label': component,
+                'level': 2,
+                'type': 'component'
+            })
+            node_id += 1
+        # Add edges from relationships
+        for rel in structure.get('relationships', []):
+            if len(rel) >= 3:
+                source_key, target_key, rel_type = rel[0], rel[1], rel[2]
+                source_id = node_map.get(source_key, node_map.get(concept))
+                target_id = node_map.get(target_key, concept_node_id)
+                edges.append({
+                    'source': source_id,
+                    'target': target_id,
+                    'relationship': rel_type
+                })
+        # Add default prerequisite edges if none exist
+        if not edges:
+            for prereq in structure.get('prerequisites', []):
+                edges.append({
+                    'source': node_map[prereq],
+                    'target': concept_node_id,
+                    'relationship': 'prerequisite'
+                })
+        return {
+            'nodes': nodes,
+            'edges': edges,
+            'concept': concept
+        }

analysis/scorer.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""
+Understanding Scorer Module
+Calculates final understanding scores from analysis results
+"""
+from typing import Dict, Optional
+class UnderstandingScorer:
+    def __init__(self):
+        # Scoring weights
+        self.weights = {
+            'consistency': 0.25,
+            'coverage': 0.35,
+            'stability': 0.25,
+            'assumptions': 0.15
+        }
+    def calculate_scores(
+        self,
+        consistency_result: Dict,
+        coverage_result: Dict,
+        stability_result: Optional[Dict]
+    ) -> Dict:
+        """
+        Calculate multi-dimensional understanding scores
+        Returns:
+            {
+                'overall': float (0-100),
+                'consistency': float (0-100),
+                'coverage': float (0-100),
+                'stability': float (0-100),
+                'assumptions': float (0-100)
+            }
+        """
+        # Extract individual scores
+        consistency_score = consistency_result.get('consistency_score', 0)
+        coverage_score = coverage_result.get('coverage_score', 0)
+        stability_score = stability_result.get('stability_score', 100) if stability_result else 100
+        # Calculate assumption completeness score
+        # Based on whether key assumptions are made explicit
+        assumptions_score = self._calculate_assumption_score(
+            coverage_result=coverage_result,
+            consistency_result=consistency_result
+        )
+        # Calculate weighted overall score
+        overall_score = (
+            consistency_score * self.weights['consistency'] +
+            coverage_score * self.weights['coverage'] +
+            stability_score * self.weights['stability'] +
+            assumptions_score * self.weights['assumptions']
+        )
+        return {
+            'overall': round(overall_score, 1),
+            'consistency': round(consistency_score, 1),
+            'coverage': round(coverage_score, 1),
+            'stability': round(stability_score, 1),
+            'assumptions': round(assumptions_score, 1)
+        }
+    def _calculate_assumption_score(
+        self,
+        coverage_result: Dict,
+        consistency_result: Dict
+    ) -> float:
+        """
+        Calculate assumption completeness score
+        High score = explicit about assumptions, boundary conditions, limitations
+        Low score = makes implicit assumptions without stating them
+        """
+        # Heuristic: if there are no contradictions and good coverage,
+        # assumptions are likely being handled well
+        consistency_score = consistency_result.get('consistency_score', 0)
+        coverage_score = coverage_result.get('coverage_score', 0)
+        # Missing concepts indicate unstated assumptions
+        missing_count = len(coverage_result.get('missing_concepts', []))
+        # Contradictions indicate conflicting implicit assumptions
+        contradiction_count = len(consistency_result.get('contradictions', []))
+        # Base score on coverage and consistency
+        base_score = (consistency_score + coverage_score) / 2
+        # Penalize for missing concepts (unstated prerequisites)
+        missing_penalty = min(30, missing_count * 10)
+        # Penalize for contradictions (conflicting assumptions)
+        contradiction_penalty = min(20, contradiction_count * 15)
+        final_score = base_score - missing_penalty - contradiction_penalty
+        return max(0, min(100, final_score))

analysis/stability_tester.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""
+Stability Tester Module
+Tests if understanding holds under reformulation and stress testing
+"""
+from typing import List, Dict, Optional
+import os
+import requests
+import numpy as np
+from sentence_transformers import SentenceTransformer
+class StabilityTester:
+    def __init__(self):
+        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+        self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
+        self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
+        self._ready = True
+    def is_ready(self) -> bool:
+        return self._ready
+    async def test_stability(
+        self,
+        concept: str,
+        original_explanation: str,
+        claims: List[Dict]
+    ) -> Dict:
+        """
+        Test explanation stability through reformulation
+        Strategy:
+        1. Generate re-prompts asking user to explain differently
+        2. Simulate alternative explanations (or use original for drift)
+        3. Measure semantic drift from original
+        4. Identify claims that become unclear/contradictory
+        Returns:
+            {
+                'stability_score': float (0-100),
+                'drift_scores': Dict[str, float],
+                'unstable_claims': List[Dict],
+                'stress_test_results': List[Dict]
+            }
+        """
+        # Generate stress test prompts
+        stress_prompts = self._generate_stress_prompts(concept)
+        # For demo, analyze stability of original explanation
+        # In production, would actually re-prompt user or use LLM to generate alternatives
+        original_embedding = self.embedding_model.encode(original_explanation)
+        # Test claim stability
+        unstable_claims = []
+        claim_drift_scores = {}
+        for claim in claims:
+            # Check if claim relies on other claims
+            stability = await self._test_claim_stability(
+                claim=claim,
+                concept=concept,
+                all_claims=claims
+            )
+            claim_drift_scores[claim['id']] = stability['drift_score']
+            if stability['is_unstable']:
+                unstable_claims.append({
+                    'claim': claim['text'],
+                    'reason': stability['reason'],
+                    'drift_score': stability['drift_score']
+                })
+        # Calculate overall stability score
+        avg_drift = np.mean(list(claim_drift_scores.values())) if claim_drift_scores else 0.0
+        stability_score = max(0, 100 - (avg_drift * 100))
+        return {
+            'stability_score': stability_score,
+            'drift_scores': claim_drift_scores,
+            'unstable_claims': unstable_claims[:3],  # Top 3
+            'stress_test_results': [
+                {
+                    'prompt': prompt,
+                    'passes': len(unstable_claims) == 0
+                }
+                for prompt in stress_prompts[:2]
+            ]
+        }
+    def _generate_stress_prompts(self, concept: str) -> List[str]:
+        """Generate stress test prompts"""
+        return [
+            f"Explain {concept} in a different way",
+            f"What would happen if {concept} didn't exist?",
+            f"Explain {concept} to a 10-year-old",
+            f"What are the limits or boundary conditions of {concept}?"
+        ]
+    async def _test_claim_stability(
+        self,
+        claim: Dict,
+        concept: str,
+        all_claims: List[Dict]
+    ) -> Dict:
+        """Test if a single claim is stable"""
+        # Heuristic: claims that are very short or vague are unstable
+        claim_text = claim['text']
+        word_count = len(claim_text.split())
+        # Very short claims (<5 words) are often unstable
+        if word_count < 5:
+            return {
+                'is_unstable': True,
+                'reason': 'Claim is too brief to demonstrate understanding',
+                'drift_score': 0.6
+            }
+        # Check for vague language
+        vague_terms = ['thing', 'stuff', 'kind of', 'sort of', 'basically', 'just', 'simply']
+        vague_count = sum(1 for term in vague_terms if term in claim_text.lower())
+        if vague_count >= 2:
+            return {
+                'is_unstable': True,
+                'reason': 'Contains vague language suggesting surface understanding',
+                'drift_score': 0.5
+            }
+        # Check if claim is standalone or depends on others
+        # Claims that reference "this" or "that" without clear antecedent are unstable
+        unclear_refs = ['this', 'that', 'it', 'these', 'those']
+        has_unclear_ref = any(claim_text.lower().startswith(ref + ' ') for ref in unclear_refs)
+        if has_unclear_ref and len(all_claims) > 1:
+            return {
+                'is_unstable': True,
+                'reason': 'Claim has unclear references and may not stand alone',
+                'drift_score': 0.4
+            }
+        # Claim appears stable
+        return {
+            'is_unstable': False,
+            'reason': 'Claim appears well-formed',
+            'drift_score': 0.1
+        }

main.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+RealityCheck AI - Backend API
+FastAPI server for analyzing how well someone understands a concept
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import List, Dict, Optional
+import os
+from dotenv import load_dotenv
+from analysis.claim_extractor import ClaimExtractor
+from analysis.graph_generator import ConceptGraphGenerator
+from analysis.consistency_checker import ConsistencyChecker
+from analysis.coverage_analyzer import CoverageAnalyzer
+from analysis.stability_tester import StabilityTester
+from analysis.scorer import UnderstandingScorer
+load_dotenv()
+app = FastAPI(
+    title="RealityCheck AI API",
+    description="Understanding analysis engine",
+    version="1.0.0"
+)
+# CORS - TODO: lock this down for production
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # TODO: change this before deploying
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize analysis pipeline components
+claim_extractor = ClaimExtractor()
+graph_generator = ConceptGraphGenerator()
+consistency_checker = ConsistencyChecker()
+coverage_analyzer = CoverageAnalyzer()
+stability_tester = StabilityTester()
+scorer = UnderstandingScorer()
+class AnalysisRequest(BaseModel):
+    concept: str
+    explanation: str
+    test_stability: Optional[bool] = True
+class ConceptNode(BaseModel):
+    id: str
+    label: str
+    status: str  # 'covered', 'weak', 'missing'
+    user_quote: Optional[str] = None
+class ConceptEdge(BaseModel):
+    source: str
+    target: str
+    relationship: str  # 'prerequisite', 'causal', 'related'
+class ConceptGraph(BaseModel):
+    nodes: List[ConceptNode]
+    edges: List[ConceptEdge]
+class ScoreBreakdown(BaseModel):
+    consistency: float
+    coverage: float
+    stability: float
+    assumption_completeness: float
+class FeedbackItem(BaseModel):
+    type: str  # 'missing_concept', 'contradiction', 'weak_link'
+    severity: str  # 'high', 'medium', 'low'
+    message: str
+    suggestion: str
+class AnalysisResponse(BaseModel):
+    overall_score: float
+    score_breakdown: ScoreBreakdown
+    concept_graph: ConceptGraph
+    feedback: List[FeedbackItem]
+    confidence_mismatch_warning: Optional[str] = None
+    explanation_stability: Optional[Dict[str, float]] = None
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {
+        "message": "RealityCheck AI API",
+        "status": "operational",
+        "version": "1.0.0"
+    }
+@app.get("/health")
+async def health_check():
+    """Detailed health check"""
+    return {
+        "status": "healthy",
+        "models_loaded": {
+            "embeddings": claim_extractor.is_ready(),
+            "nli": consistency_checker.is_ready(),
+            "llm": graph_generator.is_ready()
+        }
+    }
+@app.post("/analyze", response_model=AnalysisResponse)
+async def analyze_understanding(request: AnalysisRequest):
+    """
+    Main endpoint: Analyze user's conceptual understanding
+    This endpoint orchestrates the entire analysis pipeline:
+    1. Extract claims from explanation
+    2. Generate canonical concept graph
+    3. Check logical consistency
+    4. Analyze concept coverage
+    5. Test explanation stability
+    6. Calculate understanding scores
+    """
+    try:
+        # Step 1: Extract atomic claims from user explanation
+        claims = await claim_extractor.extract_claims(request.explanation)
+        # Step 2: Generate canonical concept graph for the concept
+        canonical_graph = await graph_generator.generate_graph(request.concept)
+        # Step 3: Check logical consistency between claims
+        consistency_result = await consistency_checker.check_consistency(claims)
+        # Step 4: Analyze concept coverage
+        coverage_result = await coverage_analyzer.analyze_coverage(
+            user_claims=claims,
+            canonical_graph=canonical_graph,
+            explanation=request.explanation
+        )
+        # Step 5: Test stability (if requested)
+        stability_result = None
+        if request.test_stability:
+            stability_result = await stability_tester.test_stability(
+                concept=request.concept,
+                original_explanation=request.explanation,
+                claims=claims
+            )
+        # Step 6: Calculate overall understanding score
+        scores = scorer.calculate_scores(
+            consistency_result=consistency_result,
+            coverage_result=coverage_result,
+            stability_result=stability_result
+        )
+        # Build concept graph with user coverage
+        concept_graph = _build_concept_graph(
+            canonical_graph=canonical_graph,
+            coverage_result=coverage_result
+        )
+        # Generate targeted feedback
+        feedback = _generate_feedback(
+            consistency_result=consistency_result,
+            coverage_result=coverage_result,
+            stability_result=stability_result
+        )
+        # Detect confidence-understanding mismatch
+        confidence_warning = _check_confidence_mismatch(
+            explanation=request.explanation,
+            overall_score=scores['overall']
+        )
+        return AnalysisResponse(
+            overall_score=scores['overall'],
+            score_breakdown=ScoreBreakdown(
+                consistency=scores['consistency'],
+                coverage=scores['coverage'],
+                stability=scores['stability'],
+                assumption_completeness=scores['assumptions']
+            ),
+            concept_graph=concept_graph,
+            feedback=feedback,
+            confidence_mismatch_warning=confidence_warning,
+            explanation_stability=stability_result.get('drift_scores') if stability_result else None
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@app.get("/concepts")
+async def get_sample_concepts():
+    """Get list of sample concepts for testing"""
+    return {
+        "concepts": [
+            {
+                "name": "Entropy (Physics)",
+                "category": "Physics",
+                "difficulty": "intermediate"
+            },
+            {
+                "name": "Neural Networks",
+                "category": "Computer Science",
+                "difficulty": "intermediate"
+            },
+            {
+                "name": "Photosynthesis",
+                "category": "Biology",
+                "difficulty": "beginner"
+            },
+            {
+                "name": "Supply and Demand",
+                "category": "Economics",
+                "difficulty": "beginner"
+            },
+            {
+                "name": "Recursion",
+                "category": "Computer Science",
+                "difficulty": "intermediate"
+            },
+            {
+                "name": "Natural Selection",
+                "category": "Biology",
+                "difficulty": "intermediate"
+            }
+        ]
+    }
+def _build_concept_graph(canonical_graph: Dict, coverage_result: Dict) -> ConceptGraph:
+    """Build concept graph with user coverage information"""
+    nodes = []
+    for node in canonical_graph['nodes']:
+        node_id = node['id']
+        coverage_info = coverage_result.get('node_coverage', {}).get(node_id, {})
+        nodes.append(ConceptNode(
+            id=node_id,
+            label=node['label'],
+            status=coverage_info.get('status', 'missing'),
+            user_quote=coverage_info.get('user_quote')
+        ))
+    edges = [
+        ConceptEdge(
+            source=edge['source'],
+            target=edge['target'],
+            relationship=edge['relationship']
+        )
+        for edge in canonical_graph['edges']
+    ]
+    return ConceptGraph(nodes=nodes, edges=edges)
+def _generate_feedback(
+    consistency_result: Dict,
+    coverage_result: Dict,
+    stability_result: Optional[Dict]
+) -> List[FeedbackItem]:
+    """Generate targeted feedback items"""
+    feedback = []
+    # Consistency issues
+    for contradiction in consistency_result.get('contradictions', []):
+        feedback.append(FeedbackItem(
+            type='contradiction',
+            severity='high',
+            message=f"Contradiction detected between: '{contradiction['claim1']}' and '{contradiction['claim2']}'",
+            suggestion=contradiction.get('suggestion', 'Review these claims for logical consistency')
+        ))
+    # Missing concepts
+    for missing in coverage_result.get('missing_concepts', []):
+        feedback.append(FeedbackItem(
+            type='missing_concept',
+            severity=missing.get('severity', 'medium'),
+            message=f"Missing prerequisite concept: {missing['concept']}",
+            suggestion=f"Consider explaining: {missing.get('description', '')}"
+        ))
+    # Weak links
+    for weak in coverage_result.get('weak_links', []):
+        feedback.append(FeedbackItem(
+            type='weak_link',
+            severity='low',
+            message=f"Weak explanation of: {weak['concept']}",
+            suggestion=weak.get('suggestion', 'Provide more detail')
+        ))
+    # Stability issues
+    if stability_result and stability_result.get('unstable_claims'):
+        for unstable in stability_result['unstable_claims']:
+            feedback.append(FeedbackItem(
+                type='instability',
+                severity='medium',
+                message=f"Explanation becomes unclear when reformulated: {unstable['claim']}",
+                suggestion="This may indicate surface-level understanding. Try explaining the underlying mechanism."
+            ))
+    return feedback
+def _check_confidence_mismatch(explanation: str, overall_score: float) -> Optional[str]:
+    """Detect when explanation sounds confident but scores low"""
+    # Simple heuristic: check for confident language markers
+    confident_markers = [
+        'obviously', 'clearly', 'of course', 'everyone knows',
+        'it is evident', 'undoubtedly', 'certainly', 'definitely'
+    ]
+    explanation_lower = explanation.lower()
+    confidence_indicators = sum(1 for marker in confident_markers if marker in explanation_lower)
+    # If high confidence language but low score, warn
+    if confidence_indicators >= 2 and overall_score < 60:
+        return (
+            "⚠️ Confidence-Understanding Mismatch Detected: "
+            "Your explanation uses confident language, but analysis suggests potential gaps. "
+            "This is common when we're familiar with terminology but haven't fully internalized the concepts."
+        )
+    return None
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi>=0.100.0
+uvicorn[standard]>=0.25.0
+pydantic>=2.0.0
+python-multipart>=0.0.6
+sentence-transformers>=2.0.0
+transformers>=4.30.0
+torch>=2.0.0
+numpy>=1.24.0
+networkx>=3.0.0
+python-dotenv>=1.0.0
+huggingface-hub>=0.16.0
+requests>=2.31.0
+scikit-learn>=1.3.0