Tawhid Bin Omar commited on
Commit
8176754
·
0 Parent(s):

Initial deployment of RealityCheck AI backend

Browse files
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here
.gitignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ venv/
8
+ env/
9
+ ENV/
10
+ .venv
11
+
12
+ # Environment
13
+ .env
14
+ .env.local
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
25
+
26
+ # Testing
27
+ .pytest_cache/
28
+ .coverage
29
+ htmlcov/
30
+
31
+ # Build
32
+ dist/
33
+ build/
34
+ *.egg-info/
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements
11
+ COPY requirements.txt .
12
+
13
+ # Install Python dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Download models at build time
17
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
18
+ RUN python -c "from transformers import pipeline; pipeline('text-classification', model='microsoft/deberta-v3-xsmall')"
19
+
20
+ # Copy application code
21
+ COPY . .
22
+
23
+ # Expose port
24
+ EXPOSE 7860
25
+
26
+ # Run the application
27
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RealityCheck AI Backend
3
+ emoji: 🧠
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ sdk_version: 3.9
8
+ app_port: 7860
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # RealityCheck AI - Backend API
14
+
15
+ Understanding analysis engine that evaluates how well someone understands a concept by analyzing their explanation.
16
+
17
+ ## What This Does
18
+
19
+ - Extracts claims from explanations
20
+ - Checks logical consistency
21
+ - Analyzes concept coverage
22
+ - Tests explanation stability
23
+ - Returns understanding scores
24
+
25
+ ## API Endpoints
26
+
27
+ - `POST /analyze` - Analyze user explanation
28
+ - `GET /concepts` - Sample concepts list
29
+ - `GET /health` - Health check
30
+
31
+ ## Setup
32
+
33
+ This Space requires:
34
+ - `HUGGINGFACE_API_KEY` in Settings → Repository secrets
35
+
36
+ ## Tech Stack
37
+
38
+ - FastAPI (Python)
39
+ - Sentence Transformers
40
+ - Mistral-7B-Instruct (via API)
41
+ - DeBERTa (NLI)
42
+
43
+ ## Links
44
+
45
+ - Frontend: [Add your Netlify URL]
46
+ - GitHub: [Add your repo URL]
README_HF.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: RealityCheck AI Backend
2
+ emoji: 🧠
3
+ colorFrom: blue
4
+ colorTo: indigo
5
+ sdk: docker
6
+ pinned: false
7
+ license: mit
8
+ app_file: main.py
9
+
10
+ # RealityCheck AI - Conceptual Understanding Diagnostic Engine
11
+
12
+ This Space hosts the FastAPI backend for RealityCheck AI, a system that evaluates conceptual understanding through explanation analysis.
13
+
14
+ ## Features
15
+
16
+ - Multi-signal understanding analysis
17
+ - Pretrained AI models (no custom training)
18
+ - Logical consistency checking
19
+ - Concept coverage analysis
20
+ - Stability testing
21
+
22
+ ## API Endpoints
23
+
24
+ - `POST /analyze` - Analyze user explanation
25
+ - `GET /concepts` - Get sample concepts
26
+ - `GET /health` - Health check
27
+
28
+ ## Environment Variables
29
+
30
+ Required:
31
+ - `HUGGINGFACE_API_KEY` - Your Hugging Face API token
32
+
33
+ ## Usage
34
+
35
+ See full documentation at: [GitHub Repository URL]
analysis/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Analysis module initialization
2
+ from .claim_extractor import ClaimExtractor
3
+ from .graph_generator import ConceptGraphGenerator
4
+ from .consistency_checker import ConsistencyChecker
5
+ from .coverage_analyzer import CoverageAnalyzer
6
+ from .stability_tester import StabilityTester
7
+ from .scorer import UnderstandingScorer
8
+
9
+ __all__ = [
10
+ 'ClaimExtractor',
11
+ 'ConceptGraphGenerator',
12
+ 'ConsistencyChecker',
13
+ 'CoverageAnalyzer',
14
+ 'StabilityTester',
15
+ 'UnderstandingScorer'
16
+ ]
analysis/claim_extractor.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Claim Extractor
3
+ Breaks down user explanations into individual claims/statements
4
+ """
5
+
6
+ from typing import List, Dict
7
+ import os
8
+ import requests
9
+ from sentence_transformers import SentenceTransformer
10
+ import json
11
+
12
+ class ClaimExtractor:
13
+ def __init__(self):
14
+ self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
15
+ self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
16
+ self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
17
+ self._ready = False
18
+ self._initialize()
19
+
20
+ def _initialize(self):
21
+ """Initialize models"""
22
+ try:
23
+ # Test embedding model - this takes a few seconds on first run
24
+ test_embedding = self.embedding_model.encode("test")
25
+ self._ready = True
26
+ except Exception as e:
27
+ print(f"Claim extractor initialization error: {e}") # TODO: better error handling
28
+ self._ready = False
29
+
30
+ def is_ready(self) -> bool:
31
+ return self._ready
32
+
33
+ async def extract_claims(self, explanation: str) -> List[Dict[str, any]]:
34
+ """
35
+ Extract atomic claims from user explanation
36
+
37
+ Returns:
38
+ List of claims with metadata:
39
+ - text: the claim itself
40
+ - type: 'definition', 'causal', 'assumption', 'example'
41
+ - embedding: semantic vector
42
+ - confidence: extraction confidence
43
+ """
44
+ # Use LLM to extract structured claims
45
+ claims_raw = await self._llm_extract_claims(explanation)
46
+
47
+ # Add embeddings and metadata
48
+ claims = []
49
+ for i, claim_text in enumerate(claims_raw):
50
+ embedding = self.embedding_model.encode(claim_text)
51
+ claim_type = self._classify_claim_type(claim_text)
52
+
53
+ claims.append({
54
+ 'id': f'claim_{i}',
55
+ 'text': claim_text,
56
+ 'type': claim_type,
57
+ 'embedding': embedding.tolist(),
58
+ 'confidence': 0.85 # Simplified for demo
59
+ })
60
+
61
+ return claims
62
+
63
+ async def _llm_extract_claims(self, explanation: str) -> List[str]:
64
+ """Use LLM to extract atomic claims"""
65
+ prompt = f"""<s>[INST] You are a precise claim extraction system. Break down the following explanation into atomic claims. Each claim should be a single, testable statement.
66
+
67
+ Explanation: {explanation}
68
+
69
+ Extract each claim on a new line, numbered. Focus on:
70
+ 1. Definitions (what things are)
71
+ 2. Causal relationships (X causes Y)
72
+ 3. Assumptions (implicit or explicit)
73
+ 4. Properties and characteristics
74
+
75
+ Output only the numbered claims, nothing else. [/INST]"""
76
+
77
+ try:
78
+ headers = {"Authorization": f"Bearer {self.hf_api_key}"}
79
+ payload = {
80
+ "inputs": prompt,
81
+ "parameters": {
82
+ "max_new_tokens": 500,
83
+ "temperature": 0.3,
84
+ "return_full_text": False
85
+ }
86
+ }
87
+
88
+ response = requests.post(self.llm_endpoint, headers=headers, json=payload, timeout=30)
89
+
90
+ if response.status_code == 200:
91
+ result = response.json()
92
+ text = result[0]['generated_text'] if isinstance(result, list) else result.get('generated_text', '')
93
+
94
+ # Parse numbered claims
95
+ claims = []
96
+ for line in text.split('\n'):
97
+ line = line.strip()
98
+ # Remove numbering like "1.", "2)", etc.
99
+ if line and (line[0].isdigit() or line.startswith('-')):
100
+ # Clean up the claim
101
+ claim = line.lstrip('0123456789.-) ').strip()
102
+ if claim:
103
+ claims.append(claim)
104
+
105
+ return claims if claims else [explanation] # Fallback to full explanation
106
+ else:
107
+ # Fallback: simple sentence splitting
108
+ return self._fallback_extraction(explanation)
109
+
110
+ except Exception as e:
111
+ print(f"LLM extraction error: {e}")
112
+ return self._fallback_extraction(explanation)
113
+
114
+ def _fallback_extraction(self, explanation: str) -> List[str]:
115
+ """Fallback: simple sentence-based extraction"""
116
+ import re
117
+ sentences = re.split(r'[.!?]+', explanation)
118
+ return [s.strip() for s in sentences if s.strip() and len(s.strip()) > 10]
119
+
120
+ def _classify_claim_type(self, claim: str) -> str:
121
+ """Classify claim type based on linguistic patterns"""
122
+ claim_lower = claim.lower()
123
+
124
+ # Definition patterns
125
+ if any(pattern in claim_lower for pattern in ['is a', 'is the', 'refers to', 'means', 'defined as']):
126
+ return 'definition'
127
+
128
+ # Causal patterns
129
+ elif any(pattern in claim_lower for pattern in ['causes', 'leads to', 'results in', 'because', 'therefore']):
130
+ return 'causal'
131
+
132
+ # Example patterns
133
+ elif any(pattern in claim_lower for pattern in ['for example', 'such as', 'like', 'instance']):
134
+ return 'example'
135
+
136
+ # Assumption patterns
137
+ elif any(pattern in claim_lower for pattern in ['assume', 'given that', 'suppose', 'if']):
138
+ return 'assumption'
139
+
140
+ else:
141
+ return 'statement'
analysis/consistency_checker.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Consistency Checker Module
3
+ Uses NLI models to detect logical contradictions and inconsistencies
4
+ """
5
+
6
+ from typing import List, Dict
7
+ from transformers import pipeline
8
+ import itertools
9
+
10
+ class ConsistencyChecker:
11
+ def __init__(self):
12
+ try:
13
+ # Use a smaller NLI model for faster inference
14
+ self.nli_model = pipeline(
15
+ "text-classification",
16
+ model="microsoft/deberta-v3-xsmall", # Smaller, faster model
17
+ device=-1 # CPU
18
+ )
19
+ self._ready = True
20
+ except Exception as e:
21
+ print(f"NLI model initialization error: {e}")
22
+ self._ready = False
23
+ self.nli_model = None
24
+
25
+ def is_ready(self) -> bool:
26
+ return self._ready
27
+
28
+ async def check_consistency(self, claims: List[Dict]) -> Dict:
29
+ """
30
+ Check logical consistency between claims using NLI
31
+
32
+ Returns:
33
+ {
34
+ 'consistency_score': float (0-100),
35
+ 'contradictions': List[Dict],
36
+ 'circular_definitions': List[Dict],
37
+ 'entailment_failures': List[Dict]
38
+ }
39
+ """
40
+ if not claims or len(claims) < 2:
41
+ return {
42
+ 'consistency_score': 100.0,
43
+ 'contradictions': [],
44
+ 'circular_definitions': [],
45
+ 'entailment_failures': []
46
+ }
47
+
48
+ contradictions = []
49
+ circular_refs = []
50
+
51
+ # Check pairwise consistency
52
+ claim_texts = [claim['text'] for claim in claims]
53
+
54
+ for i, j in itertools.combinations(range(len(claim_texts)), 2):
55
+ claim1 = claim_texts[i]
56
+ claim2 = claim_texts[j]
57
+
58
+ # Check for contradiction
59
+ if self._ready and self.nli_model:
60
+ try:
61
+ relation = self._check_entailment(claim1, claim2)
62
+
63
+ if relation == 'contradiction':
64
+ contradictions.append({
65
+ 'claim1': claim1,
66
+ 'claim2': claim2,
67
+ 'confidence': 0.85,
68
+ 'suggestion': 'These statements appear to contradict each other. Review the logical relationship.'
69
+ })
70
+ except Exception as e:
71
+ print(f"NLI check error: {e}")
72
+
73
+ # Check for circular definitions (simple heuristic)
74
+ if self._is_circular(claim1, claim2):
75
+ circular_refs.append({
76
+ 'claim1': claim1,
77
+ 'claim2': claim2
78
+ })
79
+
80
+ # Calculate consistency score
81
+ total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
82
+ issues = len(contradictions) + len(circular_refs)
83
+ consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
84
+
85
+ return {
86
+ 'consistency_score': consistency_score,
87
+ 'contradictions': contradictions[:5], # Limit to top 5
88
+ 'circular_definitions': circular_refs[:3],
89
+ 'entailment_failures': []
90
+ }
91
+
92
+ def _check_entailment(self, premise: str, hypothesis: str) -> str:
93
+ """Check logical relationship between two statements"""
94
+ if not self.nli_model:
95
+ return 'neutral'
96
+
97
+ try:
98
+ # Prepare input for NLI model
99
+ result = self.nli_model(f"{premise} [SEP] {hypothesis}")
100
+
101
+ # Map label to relationship
102
+ label = result[0]['label'].lower()
103
+
104
+ if 'contradiction' in label or 'contradict' in label:
105
+ return 'contradiction'
106
+ elif 'entailment' in label or 'entail' in label:
107
+ return 'entailment'
108
+ else:
109
+ return 'neutral'
110
+ except Exception as e:
111
+ print(f"Entailment check error: {e}")
112
+ return 'neutral'
113
+
114
+ def _is_circular(self, claim1: str, claim2: str) -> bool:
115
+ """Simple heuristic to detect circular definitions"""
116
+ # Extract key terms (simple word-based approach)
117
+ words1 = set(claim1.lower().split())
118
+ words2 = set(claim2.lower().split())
119
+
120
+ # Remove common words
121
+ stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
122
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
123
+ 'should', 'may', 'might', 'can', 'to', 'of', 'in', 'for', 'on', 'with'}
124
+
125
+ words1 = words1 - stopwords
126
+ words2 = words2 - stopwords
127
+
128
+ # Check for high overlap (potential circular definition)
129
+ if len(words1) > 2 and len(words2) > 2:
130
+ overlap = len(words1 & words2)
131
+ return overlap >= min(len(words1), len(words2)) * 0.7
132
+
133
+ return False
analysis/coverage_analyzer.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Coverage Analyzer Module
3
+ Analyzes how well user explanation covers canonical concept graph
4
+ """
5
+
6
+ from typing import List, Dict
7
+ import numpy as np
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ class CoverageAnalyzer:
11
+ def __init__(self):
12
+ self._ready = True
13
+
14
+ def is_ready(self) -> bool:
15
+ return self._ready
16
+
17
+ async def analyze_coverage(
18
+ self,
19
+ user_claims: List[Dict],
20
+ canonical_graph: Dict,
21
+ explanation: str
22
+ ) -> Dict:
23
+ """
24
+ Analyze concept coverage by matching user claims to graph nodes
25
+
26
+ Returns:
27
+ {
28
+ 'coverage_score': float (0-100),
29
+ 'node_coverage': Dict[node_id, status],
30
+ 'missing_concepts': List[Dict],
31
+ 'weak_links': List[Dict],
32
+ 'name_dropping': List[str]
33
+ }
34
+ """
35
+ # Extract embeddings from user claims
36
+ claim_embeddings = [claim['embedding'] for claim in user_claims]
37
+ claim_texts = [claim['text'] for claim in user_claims]
38
+
39
+ # Analyze coverage for each node
40
+ node_coverage = {}
41
+ missing_concepts = []
42
+ weak_links = []
43
+
44
+ for node in canonical_graph['nodes']:
45
+ node_id = node['id']
46
+ node_label = node['label']
47
+
48
+ # Check if concept is mentioned
49
+ coverage_status = self._check_node_coverage(
50
+ node_label=node_label,
51
+ claim_texts=claim_texts,
52
+ claim_embeddings=claim_embeddings,
53
+ explanation=explanation
54
+ )
55
+
56
+ node_coverage[node_id] = coverage_status
57
+
58
+ if coverage_status['status'] == 'missing':
59
+ severity = 'high' if node.get('type') == 'prerequisite' else 'medium'
60
+ missing_concepts.append({
61
+ 'concept': node_label,
62
+ 'severity': severity,
63
+ 'description': f"This is a key {'prerequisite' if node.get('type') == 'prerequisite' else 'component'} for understanding the concept."
64
+ })
65
+ elif coverage_status['status'] == 'weak':
66
+ weak_links.append({
67
+ 'concept': node_label,
68
+ 'user_quote': coverage_status.get('user_quote', ''),
69
+ 'suggestion': 'Explain the mechanism or relationship, not just mention the term.'
70
+ })
71
+
72
+ # Calculate coverage score
73
+ coverage_score = self._calculate_coverage_score(node_coverage, canonical_graph)
74
+
75
+ # Detect name-dropping (mentioned but not explained)
76
+ name_dropping = self._detect_name_dropping(claim_texts, node_coverage)
77
+
78
+ return {
79
+ 'coverage_score': coverage_score,
80
+ 'node_coverage': node_coverage,
81
+ 'missing_concepts': missing_concepts,
82
+ 'weak_links': weak_links,
83
+ 'name_dropping': name_dropping
84
+ }
85
+
86
+ def _check_node_coverage(
87
+ self,
88
+ node_label: str,
89
+ claim_texts: List[str],
90
+ claim_embeddings: List[List[float]],
91
+ explanation: str
92
+ ) -> Dict:
93
+ """Check if and how well a concept node is covered"""
94
+ # Simple keyword matching first
95
+ node_lower = node_label.lower()
96
+ explanation_lower = explanation.lower()
97
+
98
+ # Check if mentioned at all
99
+ if node_lower not in explanation_lower:
100
+ return {
101
+ 'status': 'missing',
102
+ 'user_quote': None,
103
+ 'coverage_strength': 0.0
104
+ }
105
+
106
+ # Find best matching claim via semantic similarity
107
+ # (In full implementation, would use actual embeddings of node_label)
108
+ best_match_idx = None
109
+ best_score = 0.0
110
+
111
+ for idx, claim_text in enumerate(claim_texts):
112
+ if node_lower in claim_text.lower():
113
+ # Simple heuristic: longer explanation = better coverage
114
+ coverage_strength = min(1.0, len(claim_text.split()) / 15.0)
115
+ if coverage_strength > best_score:
116
+ best_score = coverage_strength
117
+ best_match_idx = idx
118
+
119
+ if best_match_idx is not None:
120
+ user_quote = claim_texts[best_match_idx]
121
+
122
+ # Determine status based on coverage strength
123
+ if best_score > 0.6:
124
+ status = 'covered'
125
+ elif best_score > 0.2:
126
+ status = 'weak'
127
+ else:
128
+ status = 'missing'
129
+
130
+ return {
131
+ 'status': status,
132
+ 'user_quote': user_quote,
133
+ 'coverage_strength': best_score
134
+ }
135
+
136
+ # Mentioned but not in any claim (name-dropping)
137
+ return {
138
+ 'status': 'weak',
139
+ 'user_quote': None,
140
+ 'coverage_strength': 0.1
141
+ }
142
+
143
+ def _calculate_coverage_score(self, node_coverage: Dict, canonical_graph: Dict) -> float:
144
+ """Calculate overall coverage score"""
145
+ if not node_coverage:
146
+ return 0.0
147
+
148
+ # Weight by node importance
149
+ total_weight = 0.0
150
+ covered_weight = 0.0
151
+
152
+ for node in canonical_graph['nodes']:
153
+ node_id = node['id']
154
+
155
+ # Prerequisites are more important
156
+ weight = 2.0 if node.get('type') == 'prerequisite' else 1.0
157
+ total_weight += weight
158
+
159
+ coverage = node_coverage.get(node_id, {})
160
+ status = coverage.get('status', 'missing')
161
+
162
+ if status == 'covered':
163
+ covered_weight += weight
164
+ elif status == 'weak':
165
+ covered_weight += weight * 0.4
166
+
167
+ return (covered_weight / total_weight * 100) if total_weight > 0 else 0.0
168
+
169
+ def _detect_name_dropping(self, claim_texts: List[str], node_coverage: Dict) -> List[str]:
170
+ """Detect concepts that are mentioned but not explained"""
171
+ name_dropped = []
172
+
173
+ for node_id, coverage in node_coverage.items():
174
+ if coverage.get('coverage_strength', 0) < 0.3 and coverage.get('user_quote'):
175
+ # Mentioned but weakly explained
176
+ if coverage.get('user_quote'):
177
+ name_dropped.append(coverage['user_quote'])
178
+
179
+ return name_dropped[:3] # Limit to top 3
analysis/graph_generator.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Concept Graph Generator
3
+ Generates canonical concept dependency graphs for given concepts
4
+ """
5
+
6
+ from typing import Dict, List
7
+ import os
8
+ import requests
9
+ import json
10
+
11
+ class ConceptGraphGenerator:
12
+ def __init__(self):
13
+ self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
14
+ self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
15
+ self._ready = True
16
+
17
+ def is_ready(self) -> bool:
18
+ return self._ready
19
+
20
+ async def generate_graph(self, concept: str) -> Dict:
21
+ """
22
+ Generate canonical concept dependency graph
23
+
24
+ Returns:
25
+ {
26
+ 'nodes': [{'id': str, 'label': str, 'level': int}],
27
+ 'edges': [{'source': str, 'target': str, 'relationship': str}]
28
+ }
29
+ """
30
+ # Use LLM to generate concept structure
31
+ graph_structure = await self._llm_generate_structure(concept)
32
+
33
+ # Validate and format
34
+ return self._format_graph(graph_structure, concept)
35
+
36
+ async def _llm_generate_structure(self, concept: str) -> Dict:
37
+ """Use LLM to generate concept prerequisite structure"""
38
+ prompt = f"""<s>[INST] You are a concept structure expert. For the concept "{concept}", identify the core prerequisite concepts that must be understood first, and their relationships.
39
+
40
+ Output a JSON structure with:
41
+ 1. "prerequisites": list of prerequisite concepts needed to understand {concept}
42
+ 2. "core_components": main parts/aspects of {concept} itself
43
+ 3. "relationships": how concepts connect (prerequisite, causal, etc.)
44
+
45
+ Be precise and pedagogical. Focus on understanding order.
46
+
47
+ Output only valid JSON, no other text. [/INST]"""
48
+
49
+ try:
50
+ headers = {"Authorization": f"Bearer {self.hf_api_key}"}
51
+ payload = {
52
+ "inputs": prompt,
53
+ "parameters": {
54
+ "max_new_tokens": 800,
55
+ "temperature": 0.4,
56
+ "return_full_text": False
57
+ }
58
+ }
59
+
60
+ response = requests.post(self.llm_endpoint, headers=headers, json=payload, timeout=30)
61
+
62
+ if response.status_code == 200:
63
+ result = response.json()
64
+ text = result[0]['generated_text'] if isinstance(result, list) else result.get('generated_text', '')
65
+
66
+ # Try to parse JSON from response
67
+ try:
68
+ # Extract JSON if wrapped in other text
69
+ start = text.find('{')
70
+ end = text.rfind('}') + 1
71
+ if start != -1 and end > start:
72
+ json_str = text[start:end]
73
+ return json.loads(json_str)
74
+ except:
75
+ pass
76
+
77
+ return self._fallback_graph(concept)
78
+ else:
79
+ return self._fallback_graph(concept)
80
+
81
+ except Exception as e:
82
+ print(f"Graph generation error: {e}")
83
+ return self._fallback_graph(concept)
84
+
85
+ def _fallback_graph(self, concept: str) -> Dict:
86
+ """Fallback: create a basic graph structure"""
87
+ # Predefined templates for common concepts
88
+ templates = {
89
+ 'entropy': {
90
+ 'prerequisites': ['energy', 'system states', 'probability'],
91
+ 'core_components': ['disorder measure', 'thermodynamic entropy', 'information entropy'],
92
+ 'relationships': [
93
+ ('energy', 'entropy', 'prerequisite'),
94
+ ('system states', 'entropy', 'prerequisite'),
95
+ ('probability', 'entropy', 'prerequisite')
96
+ ]
97
+ },
98
+ 'neural networks': {
99
+ 'prerequisites': ['linear algebra', 'calculus', 'probability'],
100
+ 'core_components': ['neurons', 'layers', 'weights', 'activation functions', 'backpropagation'],
101
+ 'relationships': [
102
+ ('linear algebra', 'neural networks', 'prerequisite'),
103
+ ('neurons', 'layers', 'component'),
104
+ ('weights', 'neurons', 'component'),
105
+ ('backpropagation', 'weights', 'causal')
106
+ ]
107
+ },
108
+ 'photosynthesis': {
109
+ 'prerequisites': ['energy', 'chemical reactions', 'cells'],
110
+ 'core_components': ['light reactions', 'dark reactions', 'chlorophyll', 'glucose production'],
111
+ 'relationships': [
112
+ ('energy', 'light reactions', 'prerequisite'),
113
+ ('light reactions', 'dark reactions', 'causal'),
114
+ ('dark reactions', 'glucose production', 'causal')
115
+ ]
116
+ }
117
+ }
118
+
119
+ # Check if concept matches template
120
+ concept_lower = concept.lower()
121
+ for key, template in templates.items():
122
+ if key in concept_lower:
123
+ return template
124
+
125
+ # Generic fallback
126
+ return {
127
+ 'prerequisites': ['foundational knowledge'],
128
+ 'core_components': [concept, f'{concept} principles', f'{concept} applications'],
129
+ 'relationships': [
130
+ ('foundational knowledge', concept, 'prerequisite')
131
+ ]
132
+ }
133
+
134
+ def _format_graph(self, structure: Dict, concept: str) -> Dict:
135
+ """Format graph structure for frontend"""
136
+ nodes = []
137
+ edges = []
138
+ node_id = 0
139
+ node_map = {}
140
+
141
+ # Add prerequisite nodes
142
+ for prereq in structure.get('prerequisites', []):
143
+ node_map[prereq] = f'node_{node_id}'
144
+ nodes.append({
145
+ 'id': f'node_{node_id}',
146
+ 'label': prereq,
147
+ 'level': 0,
148
+ 'type': 'prerequisite'
149
+ })
150
+ node_id += 1
151
+
152
+ # Add main concept node
153
+ node_map[concept] = f'node_{node_id}'
154
+ nodes.append({
155
+ 'id': f'node_{node_id}',
156
+ 'label': concept,
157
+ 'level': 1,
158
+ 'type': 'main'
159
+ })
160
+ concept_node_id = f'node_{node_id}'
161
+ node_id += 1
162
+
163
+ # Add core component nodes
164
+ for component in structure.get('core_components', []):
165
+ node_map[component] = f'node_{node_id}'
166
+ nodes.append({
167
+ 'id': f'node_{node_id}',
168
+ 'label': component,
169
+ 'level': 2,
170
+ 'type': 'component'
171
+ })
172
+ node_id += 1
173
+
174
+ # Add edges from relationships
175
+ for rel in structure.get('relationships', []):
176
+ if len(rel) >= 3:
177
+ source_key, target_key, rel_type = rel[0], rel[1], rel[2]
178
+ source_id = node_map.get(source_key, node_map.get(concept))
179
+ target_id = node_map.get(target_key, concept_node_id)
180
+
181
+ edges.append({
182
+ 'source': source_id,
183
+ 'target': target_id,
184
+ 'relationship': rel_type
185
+ })
186
+
187
+ # Add default prerequisite edges if none exist
188
+ if not edges:
189
+ for prereq in structure.get('prerequisites', []):
190
+ edges.append({
191
+ 'source': node_map[prereq],
192
+ 'target': concept_node_id,
193
+ 'relationship': 'prerequisite'
194
+ })
195
+
196
+ return {
197
+ 'nodes': nodes,
198
+ 'edges': edges,
199
+ 'concept': concept
200
+ }
analysis/scorer.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Understanding Scorer Module
3
+ Calculates final understanding scores from analysis results
4
+ """
5
+
6
+ from typing import Dict, Optional
7
+
8
+ class UnderstandingScorer:
9
+ def __init__(self):
10
+ # Scoring weights
11
+ self.weights = {
12
+ 'consistency': 0.25,
13
+ 'coverage': 0.35,
14
+ 'stability': 0.25,
15
+ 'assumptions': 0.15
16
+ }
17
+
18
+ def calculate_scores(
19
+ self,
20
+ consistency_result: Dict,
21
+ coverage_result: Dict,
22
+ stability_result: Optional[Dict]
23
+ ) -> Dict:
24
+ """
25
+ Calculate multi-dimensional understanding scores
26
+
27
+ Returns:
28
+ {
29
+ 'overall': float (0-100),
30
+ 'consistency': float (0-100),
31
+ 'coverage': float (0-100),
32
+ 'stability': float (0-100),
33
+ 'assumptions': float (0-100)
34
+ }
35
+ """
36
+ # Extract individual scores
37
+ consistency_score = consistency_result.get('consistency_score', 0)
38
+ coverage_score = coverage_result.get('coverage_score', 0)
39
+ stability_score = stability_result.get('stability_score', 100) if stability_result else 100
40
+
41
+ # Calculate assumption completeness score
42
+ # Based on whether key assumptions are made explicit
43
+ assumptions_score = self._calculate_assumption_score(
44
+ coverage_result=coverage_result,
45
+ consistency_result=consistency_result
46
+ )
47
+
48
+ # Calculate weighted overall score
49
+ overall_score = (
50
+ consistency_score * self.weights['consistency'] +
51
+ coverage_score * self.weights['coverage'] +
52
+ stability_score * self.weights['stability'] +
53
+ assumptions_score * self.weights['assumptions']
54
+ )
55
+
56
+ return {
57
+ 'overall': round(overall_score, 1),
58
+ 'consistency': round(consistency_score, 1),
59
+ 'coverage': round(coverage_score, 1),
60
+ 'stability': round(stability_score, 1),
61
+ 'assumptions': round(assumptions_score, 1)
62
+ }
63
+
64
+ def _calculate_assumption_score(
65
+ self,
66
+ coverage_result: Dict,
67
+ consistency_result: Dict
68
+ ) -> float:
69
+ """
70
+ Calculate assumption completeness score
71
+
72
+ High score = explicit about assumptions, boundary conditions, limitations
73
+ Low score = makes implicit assumptions without stating them
74
+ """
75
+ # Heuristic: if there are no contradictions and good coverage,
76
+ # assumptions are likely being handled well
77
+
78
+ consistency_score = consistency_result.get('consistency_score', 0)
79
+ coverage_score = coverage_result.get('coverage_score', 0)
80
+
81
+ # Missing concepts indicate unstated assumptions
82
+ missing_count = len(coverage_result.get('missing_concepts', []))
83
+
84
+ # Contradictions indicate conflicting implicit assumptions
85
+ contradiction_count = len(consistency_result.get('contradictions', []))
86
+
87
+ # Base score on coverage and consistency
88
+ base_score = (consistency_score + coverage_score) / 2
89
+
90
+ # Penalize for missing concepts (unstated prerequisites)
91
+ missing_penalty = min(30, missing_count * 10)
92
+
93
+ # Penalize for contradictions (conflicting assumptions)
94
+ contradiction_penalty = min(20, contradiction_count * 15)
95
+
96
+ final_score = base_score - missing_penalty - contradiction_penalty
97
+
98
+ return max(0, min(100, final_score))
analysis/stability_tester.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Stability Tester Module
3
+ Tests if understanding holds under reformulation and stress testing
4
+ """
5
+
6
+ from typing import List, Dict, Optional
7
+ import os
8
+ import requests
9
+ import numpy as np
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ class StabilityTester:
13
+ def __init__(self):
14
+ self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
15
+ self.hf_api_key = os.getenv('HUGGINGFACE_API_KEY')
16
+ self.llm_endpoint = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
17
+ self._ready = True
18
+
19
+ def is_ready(self) -> bool:
20
+ return self._ready
21
+
22
+ async def test_stability(
23
+ self,
24
+ concept: str,
25
+ original_explanation: str,
26
+ claims: List[Dict]
27
+ ) -> Dict:
28
+ """
29
+ Test explanation stability through reformulation
30
+
31
+ Strategy:
32
+ 1. Generate re-prompts asking user to explain differently
33
+ 2. Simulate alternative explanations (or use original for drift)
34
+ 3. Measure semantic drift from original
35
+ 4. Identify claims that become unclear/contradictory
36
+
37
+ Returns:
38
+ {
39
+ 'stability_score': float (0-100),
40
+ 'drift_scores': Dict[str, float],
41
+ 'unstable_claims': List[Dict],
42
+ 'stress_test_results': List[Dict]
43
+ }
44
+ """
45
+ # Generate stress test prompts
46
+ stress_prompts = self._generate_stress_prompts(concept)
47
+
48
+ # For demo, analyze stability of original explanation
49
+ # In production, would actually re-prompt user or use LLM to generate alternatives
50
+ original_embedding = self.embedding_model.encode(original_explanation)
51
+
52
+ # Test claim stability
53
+ unstable_claims = []
54
+ claim_drift_scores = {}
55
+
56
+ for claim in claims:
57
+ # Check if claim relies on other claims
58
+ stability = await self._test_claim_stability(
59
+ claim=claim,
60
+ concept=concept,
61
+ all_claims=claims
62
+ )
63
+
64
+ claim_drift_scores[claim['id']] = stability['drift_score']
65
+
66
+ if stability['is_unstable']:
67
+ unstable_claims.append({
68
+ 'claim': claim['text'],
69
+ 'reason': stability['reason'],
70
+ 'drift_score': stability['drift_score']
71
+ })
72
+
73
+ # Calculate overall stability score
74
+ avg_drift = np.mean(list(claim_drift_scores.values())) if claim_drift_scores else 0.0
75
+ stability_score = max(0, 100 - (avg_drift * 100))
76
+
77
+ return {
78
+ 'stability_score': stability_score,
79
+ 'drift_scores': claim_drift_scores,
80
+ 'unstable_claims': unstable_claims[:3], # Top 3
81
+ 'stress_test_results': [
82
+ {
83
+ 'prompt': prompt,
84
+ 'passes': len(unstable_claims) == 0
85
+ }
86
+ for prompt in stress_prompts[:2]
87
+ ]
88
+ }
89
+
90
+ def _generate_stress_prompts(self, concept: str) -> List[str]:
91
+ """Generate stress test prompts"""
92
+ return [
93
+ f"Explain {concept} in a different way",
94
+ f"What would happen if {concept} didn't exist?",
95
+ f"Explain {concept} to a 10-year-old",
96
+ f"What are the limits or boundary conditions of {concept}?"
97
+ ]
98
+
99
+ async def _test_claim_stability(
100
+ self,
101
+ claim: Dict,
102
+ concept: str,
103
+ all_claims: List[Dict]
104
+ ) -> Dict:
105
+ """Test if a single claim is stable"""
106
+ # Heuristic: claims that are very short or vague are unstable
107
+ claim_text = claim['text']
108
+ word_count = len(claim_text.split())
109
+
110
+ # Very short claims (<5 words) are often unstable
111
+ if word_count < 5:
112
+ return {
113
+ 'is_unstable': True,
114
+ 'reason': 'Claim is too brief to demonstrate understanding',
115
+ 'drift_score': 0.6
116
+ }
117
+
118
+ # Check for vague language
119
+ vague_terms = ['thing', 'stuff', 'kind of', 'sort of', 'basically', 'just', 'simply']
120
+ vague_count = sum(1 for term in vague_terms if term in claim_text.lower())
121
+
122
+ if vague_count >= 2:
123
+ return {
124
+ 'is_unstable': True,
125
+ 'reason': 'Contains vague language suggesting surface understanding',
126
+ 'drift_score': 0.5
127
+ }
128
+
129
+ # Check if claim is standalone or depends on others
130
+ # Claims that reference "this" or "that" without clear antecedent are unstable
131
+ unclear_refs = ['this', 'that', 'it', 'these', 'those']
132
+ has_unclear_ref = any(claim_text.lower().startswith(ref + ' ') for ref in unclear_refs)
133
+
134
+ if has_unclear_ref and len(all_claims) > 1:
135
+ return {
136
+ 'is_unstable': True,
137
+ 'reason': 'Claim has unclear references and may not stand alone',
138
+ 'drift_score': 0.4
139
+ }
140
+
141
+ # Claim appears stable
142
+ return {
143
+ 'is_unstable': False,
144
+ 'reason': 'Claim appears well-formed',
145
+ 'drift_score': 0.1
146
+ }
main.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RealityCheck AI - Backend API
3
+ FastAPI server for analyzing how well someone understands a concept
4
+ """
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ from typing import List, Dict, Optional
10
+ import os
11
+ from dotenv import load_dotenv
12
+
13
+ from analysis.claim_extractor import ClaimExtractor
14
+ from analysis.graph_generator import ConceptGraphGenerator
15
+ from analysis.consistency_checker import ConsistencyChecker
16
+ from analysis.coverage_analyzer import CoverageAnalyzer
17
+ from analysis.stability_tester import StabilityTester
18
+ from analysis.scorer import UnderstandingScorer
19
+
20
+ load_dotenv()
21
+
22
+ app = FastAPI(
23
+ title="RealityCheck AI API",
24
+ description="Understanding analysis engine",
25
+ version="1.0.0"
26
+ )
27
+
28
+ # CORS - TODO: lock this down for production
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["*"], # TODO: change this before deploying
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ # Initialize analysis pipeline components
38
+ claim_extractor = ClaimExtractor()
39
+ graph_generator = ConceptGraphGenerator()
40
+ consistency_checker = ConsistencyChecker()
41
+ coverage_analyzer = CoverageAnalyzer()
42
+ stability_tester = StabilityTester()
43
+ scorer = UnderstandingScorer()
44
+
45
+
46
+ class AnalysisRequest(BaseModel):
47
+ concept: str
48
+ explanation: str
49
+ test_stability: Optional[bool] = True
50
+
51
+
52
+ class ConceptNode(BaseModel):
53
+ id: str
54
+ label: str
55
+ status: str # 'covered', 'weak', 'missing'
56
+ user_quote: Optional[str] = None
57
+
58
+
59
+ class ConceptEdge(BaseModel):
60
+ source: str
61
+ target: str
62
+ relationship: str # 'prerequisite', 'causal', 'related'
63
+
64
+
65
+ class ConceptGraph(BaseModel):
66
+ nodes: List[ConceptNode]
67
+ edges: List[ConceptEdge]
68
+
69
+
70
+ class ScoreBreakdown(BaseModel):
71
+ consistency: float
72
+ coverage: float
73
+ stability: float
74
+ assumption_completeness: float
75
+
76
+
77
+ class FeedbackItem(BaseModel):
78
+ type: str # 'missing_concept', 'contradiction', 'weak_link'
79
+ severity: str # 'high', 'medium', 'low'
80
+ message: str
81
+ suggestion: str
82
+
83
+
84
+ class AnalysisResponse(BaseModel):
85
+ overall_score: float
86
+ score_breakdown: ScoreBreakdown
87
+ concept_graph: ConceptGraph
88
+ feedback: List[FeedbackItem]
89
+ confidence_mismatch_warning: Optional[str] = None
90
+ explanation_stability: Optional[Dict[str, float]] = None
91
+
92
+
93
+ @app.get("/")
94
+ async def root():
95
+ """Health check endpoint"""
96
+ return {
97
+ "message": "RealityCheck AI API",
98
+ "status": "operational",
99
+ "version": "1.0.0"
100
+ }
101
+
102
+
103
+ @app.get("/health")
104
+ async def health_check():
105
+ """Detailed health check"""
106
+ return {
107
+ "status": "healthy",
108
+ "models_loaded": {
109
+ "embeddings": claim_extractor.is_ready(),
110
+ "nli": consistency_checker.is_ready(),
111
+ "llm": graph_generator.is_ready()
112
+ }
113
+ }
114
+
115
+
116
+ @app.post("/analyze", response_model=AnalysisResponse)
117
+ async def analyze_understanding(request: AnalysisRequest):
118
+ """
119
+ Main endpoint: Analyze user's conceptual understanding
120
+
121
+ This endpoint orchestrates the entire analysis pipeline:
122
+ 1. Extract claims from explanation
123
+ 2. Generate canonical concept graph
124
+ 3. Check logical consistency
125
+ 4. Analyze concept coverage
126
+ 5. Test explanation stability
127
+ 6. Calculate understanding scores
128
+ """
129
+ try:
130
+ # Step 1: Extract atomic claims from user explanation
131
+ claims = await claim_extractor.extract_claims(request.explanation)
132
+
133
+ # Step 2: Generate canonical concept graph for the concept
134
+ canonical_graph = await graph_generator.generate_graph(request.concept)
135
+
136
+ # Step 3: Check logical consistency between claims
137
+ consistency_result = await consistency_checker.check_consistency(claims)
138
+
139
+ # Step 4: Analyze concept coverage
140
+ coverage_result = await coverage_analyzer.analyze_coverage(
141
+ user_claims=claims,
142
+ canonical_graph=canonical_graph,
143
+ explanation=request.explanation
144
+ )
145
+
146
+ # Step 5: Test stability (if requested)
147
+ stability_result = None
148
+ if request.test_stability:
149
+ stability_result = await stability_tester.test_stability(
150
+ concept=request.concept,
151
+ original_explanation=request.explanation,
152
+ claims=claims
153
+ )
154
+
155
+ # Step 6: Calculate overall understanding score
156
+ scores = scorer.calculate_scores(
157
+ consistency_result=consistency_result,
158
+ coverage_result=coverage_result,
159
+ stability_result=stability_result
160
+ )
161
+
162
+ # Build concept graph with user coverage
163
+ concept_graph = _build_concept_graph(
164
+ canonical_graph=canonical_graph,
165
+ coverage_result=coverage_result
166
+ )
167
+
168
+ # Generate targeted feedback
169
+ feedback = _generate_feedback(
170
+ consistency_result=consistency_result,
171
+ coverage_result=coverage_result,
172
+ stability_result=stability_result
173
+ )
174
+
175
+ # Detect confidence-understanding mismatch
176
+ confidence_warning = _check_confidence_mismatch(
177
+ explanation=request.explanation,
178
+ overall_score=scores['overall']
179
+ )
180
+
181
+ return AnalysisResponse(
182
+ overall_score=scores['overall'],
183
+ score_breakdown=ScoreBreakdown(
184
+ consistency=scores['consistency'],
185
+ coverage=scores['coverage'],
186
+ stability=scores['stability'],
187
+ assumption_completeness=scores['assumptions']
188
+ ),
189
+ concept_graph=concept_graph,
190
+ feedback=feedback,
191
+ confidence_mismatch_warning=confidence_warning,
192
+ explanation_stability=stability_result.get('drift_scores') if stability_result else None
193
+ )
194
+
195
+ except Exception as e:
196
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
197
+
198
+
199
+ @app.get("/concepts")
200
+ async def get_sample_concepts():
201
+ """Get list of sample concepts for testing"""
202
+ return {
203
+ "concepts": [
204
+ {
205
+ "name": "Entropy (Physics)",
206
+ "category": "Physics",
207
+ "difficulty": "intermediate"
208
+ },
209
+ {
210
+ "name": "Neural Networks",
211
+ "category": "Computer Science",
212
+ "difficulty": "intermediate"
213
+ },
214
+ {
215
+ "name": "Photosynthesis",
216
+ "category": "Biology",
217
+ "difficulty": "beginner"
218
+ },
219
+ {
220
+ "name": "Supply and Demand",
221
+ "category": "Economics",
222
+ "difficulty": "beginner"
223
+ },
224
+ {
225
+ "name": "Recursion",
226
+ "category": "Computer Science",
227
+ "difficulty": "intermediate"
228
+ },
229
+ {
230
+ "name": "Natural Selection",
231
+ "category": "Biology",
232
+ "difficulty": "intermediate"
233
+ }
234
+ ]
235
+ }
236
+
237
+
238
+ def _build_concept_graph(canonical_graph: Dict, coverage_result: Dict) -> ConceptGraph:
239
+ """Build concept graph with user coverage information"""
240
+ nodes = []
241
+ for node in canonical_graph['nodes']:
242
+ node_id = node['id']
243
+ coverage_info = coverage_result.get('node_coverage', {}).get(node_id, {})
244
+
245
+ nodes.append(ConceptNode(
246
+ id=node_id,
247
+ label=node['label'],
248
+ status=coverage_info.get('status', 'missing'),
249
+ user_quote=coverage_info.get('user_quote')
250
+ ))
251
+
252
+ edges = [
253
+ ConceptEdge(
254
+ source=edge['source'],
255
+ target=edge['target'],
256
+ relationship=edge['relationship']
257
+ )
258
+ for edge in canonical_graph['edges']
259
+ ]
260
+
261
+ return ConceptGraph(nodes=nodes, edges=edges)
262
+
263
+
264
+ def _generate_feedback(
265
+ consistency_result: Dict,
266
+ coverage_result: Dict,
267
+ stability_result: Optional[Dict]
268
+ ) -> List[FeedbackItem]:
269
+ """Generate targeted feedback items"""
270
+ feedback = []
271
+
272
+ # Consistency issues
273
+ for contradiction in consistency_result.get('contradictions', []):
274
+ feedback.append(FeedbackItem(
275
+ type='contradiction',
276
+ severity='high',
277
+ message=f"Contradiction detected between: '{contradiction['claim1']}' and '{contradiction['claim2']}'",
278
+ suggestion=contradiction.get('suggestion', 'Review these claims for logical consistency')
279
+ ))
280
+
281
+ # Missing concepts
282
+ for missing in coverage_result.get('missing_concepts', []):
283
+ feedback.append(FeedbackItem(
284
+ type='missing_concept',
285
+ severity=missing.get('severity', 'medium'),
286
+ message=f"Missing prerequisite concept: {missing['concept']}",
287
+ suggestion=f"Consider explaining: {missing.get('description', '')}"
288
+ ))
289
+
290
+ # Weak links
291
+ for weak in coverage_result.get('weak_links', []):
292
+ feedback.append(FeedbackItem(
293
+ type='weak_link',
294
+ severity='low',
295
+ message=f"Weak explanation of: {weak['concept']}",
296
+ suggestion=weak.get('suggestion', 'Provide more detail')
297
+ ))
298
+
299
+ # Stability issues
300
+ if stability_result and stability_result.get('unstable_claims'):
301
+ for unstable in stability_result['unstable_claims']:
302
+ feedback.append(FeedbackItem(
303
+ type='instability',
304
+ severity='medium',
305
+ message=f"Explanation becomes unclear when reformulated: {unstable['claim']}",
306
+ suggestion="This may indicate surface-level understanding. Try explaining the underlying mechanism."
307
+ ))
308
+
309
+ return feedback
310
+
311
+
312
+ def _check_confidence_mismatch(explanation: str, overall_score: float) -> Optional[str]:
313
+ """Detect when explanation sounds confident but scores low"""
314
+ # Simple heuristic: check for confident language markers
315
+ confident_markers = [
316
+ 'obviously', 'clearly', 'of course', 'everyone knows',
317
+ 'it is evident', 'undoubtedly', 'certainly', 'definitely'
318
+ ]
319
+
320
+ explanation_lower = explanation.lower()
321
+ confidence_indicators = sum(1 for marker in confident_markers if marker in explanation_lower)
322
+
323
+ # If high confidence language but low score, warn
324
+ if confidence_indicators >= 2 and overall_score < 60:
325
+ return (
326
+ "⚠️ Confidence-Understanding Mismatch Detected: "
327
+ "Your explanation uses confident language, but analysis suggests potential gaps. "
328
+ "This is common when we're familiar with terminology but haven't fully internalized the concepts."
329
+ )
330
+
331
+ return None
332
+
333
+
334
+ if __name__ == "__main__":
335
+ import uvicorn
336
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.100.0
2
+ uvicorn[standard]>=0.25.0
3
+ pydantic>=2.0.0
4
+ python-multipart>=0.0.6
5
+ sentence-transformers>=2.0.0
6
+ transformers>=4.30.0
7
+ torch>=2.0.0
8
+ numpy>=1.24.0
9
+ networkx>=3.0.0
10
+ python-dotenv>=1.0.0
11
+ huggingface-hub>=0.16.0
12
+ requests>=2.31.0
13
+ scikit-learn>=1.3.0