Spaces:

tawhidbinomar
/

conceptvector

Sleeping

File size: 10,828 Bytes

"""
RealityCheck AI - Backend API
FastAPI server for analyzing how well someone understands a concept
"""

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Optional
import os
from dotenv import load_dotenv

from analysis.claim_extractor import ClaimExtractor
from analysis.graph_generator import ConceptGraphGenerator
from analysis.consistency_checker import ConsistencyChecker
from analysis.coverage_analyzer import CoverageAnalyzer
from analysis.stability_tester import StabilityTester
from analysis.scorer import UnderstandingScorer

load_dotenv()

app = FastAPI(
    title="RealityCheck AI API",
    description="Understanding analysis engine",
    version="1.0.0"
)

# CORS - TODO: lock this down for production
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # TODO: change this before deploying
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize analysis pipeline components
claim_extractor = ClaimExtractor()
graph_generator = ConceptGraphGenerator()
consistency_checker = ConsistencyChecker()
coverage_analyzer = CoverageAnalyzer()
stability_tester = StabilityTester()
scorer = UnderstandingScorer()


class AnalysisRequest(BaseModel):
    concept: str
    explanation: str
    test_stability: Optional[bool] = True


class ConceptNode(BaseModel):
    id: str
    label: str
    status: str  # 'covered', 'weak', 'missing'
    user_quote: Optional[str] = None


class ConceptEdge(BaseModel):
    source: str
    target: str
    relationship: str  # 'prerequisite', 'causal', 'related'


class ConceptGraph(BaseModel):
    nodes: List[ConceptNode]
    edges: List[ConceptEdge]


class ScoreBreakdown(BaseModel):
    consistency: float
    coverage: float
    stability: float
    assumption_completeness: float


class FeedbackItem(BaseModel):
    type: str  # 'missing_concept', 'contradiction', 'weak_link'
    severity: str  # 'high', 'medium', 'low'
    message: str
    suggestion: str


class AnalysisResponse(BaseModel):
    overall_score: float
    score_breakdown: ScoreBreakdown
    concept_graph: ConceptGraph
    feedback: List[FeedbackItem]
    confidence_mismatch_warning: Optional[str] = None
    explanation_stability: Optional[Dict[str, float]] = None


@app.get("/")
async def root():
    """Health check endpoint"""
    return {
        "message": "ConceptVector API",
        "status": "operational",
        "version": "1.0.0"
    }


@app.get("/health")
async def health_check():
    """Detailed health check"""
    return {
        "status": "healthy",
        "models_loaded": {
            "embeddings": claim_extractor.is_ready(),
            "nli": consistency_checker.is_ready(),
            "llm": graph_generator.is_ready()
        }
    }


@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_understanding(request: AnalysisRequest):
    """
    Main endpoint: Analyze user's conceptual understanding
    
    This endpoint orchestrates the entire analysis pipeline:
    1. Extract claims from explanation
    2. Generate canonical concept graph
    3. Check logical consistency
    4. Analyze concept coverage
    5. Test explanation stability
    6. Calculate understanding scores
    """
    try:
        # Step 1: Extract atomic claims from user explanation
        claims = await claim_extractor.extract_claims(request.explanation)
        
        # Step 2: Generate canonical concept graph for the concept
        canonical_graph = await graph_generator.generate_graph(request.concept)
        
        # Step 3: Check logical consistency between claims
        consistency_result = await consistency_checker.check_consistency(claims)
        
        # Step 4: Analyze concept coverage
        coverage_result = await coverage_analyzer.analyze_coverage(
            user_claims=claims,
            canonical_graph=canonical_graph,
            explanation=request.explanation
        )
        
        # Step 5: Test stability (if requested)
        stability_result = None
        if request.test_stability:
            stability_result = await stability_tester.test_stability(
                concept=request.concept,
                original_explanation=request.explanation,
                claims=claims
            )
        
        # Step 6: Calculate overall understanding score
        scores = scorer.calculate_scores(
            consistency_result=consistency_result,
            coverage_result=coverage_result,
            stability_result=stability_result
        )
        
        # Build concept graph with user coverage
        concept_graph = _build_concept_graph(
            canonical_graph=canonical_graph,
            coverage_result=coverage_result
        )
        
        # Generate targeted feedback
        feedback = _generate_feedback(
            consistency_result=consistency_result,
            coverage_result=coverage_result,
            stability_result=stability_result
        )
        
        # Detect confidence-understanding mismatch
        confidence_warning = _check_confidence_mismatch(
            explanation=request.explanation,
            overall_score=scores['overall']
        )
        
        return AnalysisResponse(
            overall_score=scores['overall'],
            score_breakdown=ScoreBreakdown(
                consistency=scores['consistency'],
                coverage=scores['coverage'],
                stability=scores['stability'],
                assumption_completeness=scores['assumptions']
            ),
            concept_graph=concept_graph,
            feedback=feedback,
            confidence_mismatch_warning=confidence_warning,
            explanation_stability=stability_result.get('drift_scores') if stability_result else None
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")


@app.get("/concepts")
async def get_sample_concepts():
    """Get list of sample concepts for testing"""
    return {
        "concepts": [
            {
                "name": "Entropy (Physics)",
                "category": "Physics",
                "difficulty": "intermediate"
            },
            {
                "name": "Neural Networks",
                "category": "Computer Science",
                "difficulty": "intermediate"
            },
            {
                "name": "Photosynthesis",
                "category": "Biology",
                "difficulty": "beginner"
            },
            {
                "name": "Supply and Demand",
                "category": "Economics",
                "difficulty": "beginner"
            },
            {
                "name": "Recursion",
                "category": "Computer Science",
                "difficulty": "intermediate"
            },
            {
                "name": "Natural Selection",
                "category": "Biology",
                "difficulty": "intermediate"
            }
        ]
    }


def _build_concept_graph(canonical_graph: Dict, coverage_result: Dict) -> ConceptGraph:
    """Build concept graph with user coverage information"""
    nodes = []
    for node in canonical_graph['nodes']:
        node_id = node['id']
        coverage_info = coverage_result.get('node_coverage', {}).get(node_id, {})
        
        nodes.append(ConceptNode(
            id=node_id,
            label=node['label'],
            status=coverage_info.get('status', 'missing'),
            user_quote=coverage_info.get('user_quote')
        ))
    
    edges = [
        ConceptEdge(
            source=edge['source'],
            target=edge['target'],
            relationship=edge['relationship']
        )
        for edge in canonical_graph['edges']
    ]
    
    return ConceptGraph(nodes=nodes, edges=edges)


def _generate_feedback(
    consistency_result: Dict,
    coverage_result: Dict,
    stability_result: Optional[Dict]
) -> List[FeedbackItem]:
    """Generate targeted feedback items"""
    feedback = []
    
    # Consistency issues
    for contradiction in consistency_result.get('contradictions', []):
        feedback.append(FeedbackItem(
            type='contradiction',
            severity='high',
            message=f"Contradiction detected between: '{contradiction['claim1']}' and '{contradiction['claim2']}'",
            suggestion=contradiction.get('suggestion', 'Review these claims for logical consistency')
        ))
    
    # Missing concepts
    for missing in coverage_result.get('missing_concepts', []):
        feedback.append(FeedbackItem(
            type='missing_concept',
            severity=missing.get('severity', 'medium'),
            message=f"Missing prerequisite concept: {missing['concept']}",
            suggestion=f"Consider explaining: {missing.get('description', '')}"
        ))
    
    # Weak links
    for weak in coverage_result.get('weak_links', []):
        feedback.append(FeedbackItem(
            type='weak_link',
            severity='low',
            message=f"Weak explanation of: {weak['concept']}",
            suggestion=weak.get('suggestion', 'Provide more detail')
        ))
    
    # Stability issues
    if stability_result and stability_result.get('unstable_claims'):
        for unstable in stability_result['unstable_claims']:
            feedback.append(FeedbackItem(
                type='instability',
                severity='medium',
                message=f"Explanation becomes unclear when reformulated: {unstable['claim']}",
                suggestion="This may indicate surface-level understanding. Try explaining the underlying mechanism."
            ))
    
    return feedback


def _check_confidence_mismatch(explanation: str, overall_score: float) -> Optional[str]:
    """Detect when explanation sounds confident but scores low"""
    # Simple heuristic: check for confident language markers
    confident_markers = [
        'obviously', 'clearly', 'of course', 'everyone knows',
        'it is evident', 'undoubtedly', 'certainly', 'definitely'
    ]
    
    explanation_lower = explanation.lower()
    confidence_indicators = sum(1 for marker in confident_markers if marker in explanation_lower)
    
    # If high confidence language but low score, warn
    if confidence_indicators >= 2 and overall_score < 60:
        return (
            "⚠️ Confidence-Understanding Mismatch Detected: "
            "Your explanation uses confident language, but analysis suggests potential gaps. "
            "This is common when we're familiar with terminology but haven't fully internalized the concepts."
        )
    
    return None


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)