"""
IncludEd AI Service – FastAPI Application v4.0
===============================================
v3.0 features retained +
New in v4.0 (ML Components for Dyslexia Support):
  • POST /ner/extract               — Full NER character + location graph for a book
  • GET  /ner/section-view          — Spoiler-safe graph up to current section
  • POST /vocab/batch-analyze       — Pre-compute vocabulary for a full book at upload
  • POST /tts/synthesize            — TTS with word-level timestamps (disability-aware)
  • POST /quiz/record-attempt       — Record quiz result → update IRT difficulty model
  • GET  /quiz/recommend-difficulty — Get recommended difficulty for next quiz
  • GET  /quiz/student-state        — Full adaptive state for teacher dashboard
"""

from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
import os
from dotenv import load_dotenv

# Load environment variables IMMEDIATELY
load_dotenv(os.path.join(os.path.dirname(__file__), '.env'))

import asyncio
import time
from services.tts_service                import TTSService
from services.accessibility_adapter      import FreeAccessibilityAdapter
from services.rl_agent_service           import RLAgentService
from services.smart_question_generator   import SmartQuestionGenerator
from services.gemini_service             import GeminiService
from services.simplification_service     import SimplificationService
from services.learner_embedding          import LearnerEmbedding, SessionMetrics
from services.comprehension_tracker      import ComprehensionTracker
from services.teacher_intelligence       import TeacherIntelligence
from services.teacher_recommendations    import get_recommendation_engine, StudentRecommendation
from services.stt_service                import STTAssessmentService
from services.word_difficulty_service    import WordDifficultyService
from ml_pipeline import LiteratureAnalyzer, BookBrain
from ml_pipeline.quiz_generator      import PedagogicalQuestionGenerator
from ml_pipeline.ner_extractor       import get_ner_extractor
from ml_pipeline.vocab_analyzer      import get_vocab_analyzer
from ml_pipeline.difficulty_adapter  import get_difficulty_adapter
from services.tts_service            import get_tts_service
from services.hf_inference_service   import HFInferenceService
from services.pronunciation_service  import PronunciationService

app = FastAPI(title="IncludEd AI Service", version="3.0.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ── Lazy proxy — instantiates service on first attribute access ───────────────
class _Lazy:
    def __init__(self, factory):
        object.__setattr__(self, '_factory', factory)
        object.__setattr__(self, '_instance', None)

    def _load(self):
        if object.__getattribute__(self, '_instance') is None:
            factory = object.__getattribute__(self, '_factory')
            object.__setattr__(self, '_instance', factory())
        return object.__getattribute__(self, '_instance')

    def __getattr__(self, name):
        return getattr(self._load(), name)

    def __call__(self, *args, **kwargs):
        return self._load()(*args, **kwargs)

# ML pipeline singletons — loaded on first use, not at startup
_literature_analyzer = _Lazy(LiteratureAnalyzer)
_book_brain          = _Lazy(BookBrain)
_quiz_generator      = _Lazy(PedagogicalQuestionGenerator)
question_gen         = _Lazy(SmartQuestionGenerator)
accessibility_adapter= _Lazy(FreeAccessibilityAdapter)
rl_agent             = _Lazy(RLAgentService)
tts_service          = _Lazy(TTSService)
gemini_service       = _Lazy(lambda: GeminiService(os.getenv("GEMINI_API_KEY")))
simplification_svc   = _Lazy(SimplificationService)
learner_embedding    = _Lazy(LearnerEmbedding)
comprehension_tracker= _Lazy(ComprehensionTracker)
teacher_intelligence = _Lazy(TeacherIntelligence)
stt_assessment       = _Lazy(STTAssessmentService)
word_difficulty      = _Lazy(WordDifficultyService)
ner_extractor        = _Lazy(get_ner_extractor)
vocab_analyzer       = _Lazy(lambda: get_vocab_analyzer(gemini_service=gemini_service))
difficulty_adapter   = _Lazy(get_difficulty_adapter)
tts_svc              = _Lazy(get_tts_service)
hf_inference         = _Lazy(lambda: HFInferenceService(os.getenv("HF_API_TOKEN")))
pronunciation_svc    = _Lazy(PronunciationService)

# ── Request/Response Models ──────────────────────────────────────────────────

class AnalyzeTextRequest(BaseModel):
    text: str
    filename: Optional[str] = "document.txt"
    generate_questions: bool = True
    question_count: int = 5


class AnalyzeResponse(BaseModel):
    document_type: str
    title: str
    author: Optional[str] = None
    confidence: float
    units: List[Dict[str, Any]]
    flat_units: List[Dict[str, Any]]
    questions: List[Dict[str, Any]]
    metadata: Dict[str, Any]
    book_brain: Optional[Dict[str, Any]] = None


class SimplifyRequest(BaseModel):
    highlighted_text: str
    book_title: str = ""
    author: str = ""
    doc_type: str = "generic"
    chapter_context: str = ""
    speaker: str = ""
    reading_level: str = "intermediate"
    language: str = "en"
    student_id: Optional[str] = None
    book_id: Optional[str] = None


class ComprehensionRecordRequest(BaseModel):
    student_id: str
    book_id: str
    section_id: str
    section_title: str = ""
    chapter_title: str = ""
    time_spent_s: float = 0
    quiz_score: Optional[float] = None
    characters_seen: Optional[List[str]] = None
    section_text: str = ""  # raw section content for theme extraction (D4)


class HighlightRecordRequest(BaseModel):
    student_id: str
    book_id: str
    highlighted_text: str
    section_id: str


class VocabRecordRequest(BaseModel):
    student_id: str
    book_id: str
    word: str


class SessionUpdateRequest(BaseModel):
    student_id: str
    session_duration_s: float = 0
    words_read: int = 0
    reading_speed_wpm: float = 0
    backtrack_count: int = 0
    scroll_events: int = 0
    attention_lapses: int = 0
    highlights_made: int = 0
    vocab_lookups: int = 0
    time_of_day_hour: int = 12
    disability_type: float = 0
    doc_type: str = "generic"
    adaptations_applied: List[int] = []
    adaptation_accepted: List[bool] = []
    quiz_score: Optional[float] = None
    avg_dwell_time_ms: float = 0
    session_fatigue: float = 0


class RLPredictRequest(BaseModel):
    state_vector: List[float]
    content_type: float = 0.5  # 0.0=generic, 0.5=novel, 1.0=play


class StudentSummaryRequest(BaseModel):
    student_name: str
    student_id: str
    book_id: str
    class_average_chapter: int = 0


class ClassAlertsRequest(BaseModel):
    student_summaries: List[Dict[str, Any]]
    book_title: str = ""


class RecapTextRequest(BaseModel):
    student_id: str
    book_id: str
    language: str = "en"


class CommonHighlightRequest(BaseModel):
    highlights: List[Dict[str, Any]]  # [{student_name, text, section_id, timestamp}]
    book_title: str = ""
    min_students: int = 2


# ── Core Literature Analysis Endpoints ───────────────────────────────────────

@app.get("/")
async def root():
    return {"status": "healthy", "service": "IncludEd AI Service", "version": "3.0.0"}


@app.post("/analyze", response_model=AnalyzeResponse, tags=["literature"])
async def analyze_pdf(
    file: UploadFile = File(...),
    generate_questions: bool = True,
    question_count: int = 5,
):
    pdf_bytes = await file.read()
    result = await asyncio.to_thread(
        _literature_analyzer.analyze,
        pdf_bytes,
        file.filename,
        generate_questions,
        question_count,
    )

    # Run Book Brain pre-analysis
    brain_result = await asyncio.to_thread(
        _book_brain.analyze,
        result.units,
        result.document_type,
        result.language,
        result.title,
        result.author or "",
    )

    return AnalyzeResponse(
        document_type = result.document_type,
        title         = result.title,
        author        = result.author,
        confidence    = result.confidence,
        units         = result.units,
        flat_units    = result.flat_units,
        questions     = result.questions,
        metadata      = result.metadata,
        book_brain    = {
            "difficulty_map":        brain_result.difficulty_map,
            "vocabulary":            brain_result.vocabulary,
            "characters":            brain_result.characters,
            "summary_stats":         brain_result.summary_stats,
            "struggle_zones":        brain_result.struggle_zones,
            "cultural_context_bank": brain_result.cultural_context_bank,
        },
    )


@app.post("/reanalyze-text", response_model=AnalyzeResponse, tags=["literature"])
async def reanalyze_text(req: AnalyzeTextRequest):
    result = await asyncio.to_thread(
        _literature_analyzer.analyze_text,
        req.text,
        req.filename or "legacy_content",
        req.generate_questions,
        req.question_count,
    )

    brain_result = await asyncio.to_thread(
        _book_brain.analyze,
        result.units,
        result.document_type,
        result.language,
        result.title,
        result.author or "",
    )

    return AnalyzeResponse(
        document_type = result.document_type,
        title         = result.title,
        author        = result.author,
        confidence    = result.confidence,
        units         = result.units,
        flat_units    = result.flat_units,
        questions     = result.questions,
        metadata      = result.metadata,
        book_brain    = {
            "difficulty_map":        brain_result.difficulty_map,
            "vocabulary":            brain_result.vocabulary,
            "characters":            brain_result.characters,
            "summary_stats":         brain_result.summary_stats,
            "struggle_zones":        brain_result.struggle_zones,
            "cultural_context_bank": brain_result.cultural_context_bank,
        },
    )


# ── Book Brain Standalone ────────────────────────────────────────────────────

class BookBrainRequest(BaseModel):
    units: List[Dict[str, Any]]
    doc_type: str = "generic"
    language: str = "en"
    title: str = ""
    author: str = ""


@app.post("/book-brain/analyze", tags=["book-brain"])
async def book_brain_analyze(req: BookBrainRequest):
    """Run Book Brain pre-analysis on already-parsed units."""
    result = await asyncio.to_thread(
        _book_brain.analyze,
        req.units, req.doc_type, req.language, req.title, req.author,
    )
    return {
        "difficulty_map":        result.difficulty_map,
        "vocabulary":            result.vocabulary,
        "characters":            result.characters,
        "summary_stats":         result.summary_stats,
        "struggle_zones":        result.struggle_zones,
        "cultural_context_bank": result.cultural_context_bank,
    }


# ── Highlight-to-Understand (Simplification) ─────────────────────────────────

@app.post("/simplify", tags=["simplification"])
async def simplify_text(req: SimplifyRequest):
    """
    Context-aware simplification for highlighted text.
    Returns simplified version, author intent, vocabulary help, literary devices.
    """
    # Get student reading level if available
    reading_level = req.reading_level
    if req.student_id:
        reading_level = learner_embedding.get_reading_level(req.student_id)

    result = await asyncio.to_thread(
        simplification_svc.simplify,
        req.highlighted_text,
        req.book_title,
        req.author,
        req.doc_type,
        req.chapter_context,
        req.speaker,
        reading_level,
        req.language,
    )

    # Record highlight in comprehension tracker
    if req.student_id and req.book_id:
        comprehension_tracker.record_highlight(
            req.student_id, req.book_id,
            req.highlighted_text, "",
        )

    return result


# ── Introduction Generation ──────────────────────────────────────────────────

class IntroductionRequest(BaseModel):
    title:           str
    author:          str
    content_summary: str = ""
    doc_type:        str = "generic"
    language:        str = "en"


@app.post("/introduction/generate", tags=["literature"])
async def generate_introduction(req: IntroductionRequest):
    """
    Generate an engaging literature introduction (v3.0).
    Now uses Gemini as primary for high-quality pedagogical context.
    """
    prompt = f"""Generate an engaging, highly accurate, and short pedagogical introduction (2-3 paragraphs) for a student about to read:
Title: {req.title}
Author: {req.author}
Content Genre: {req.doc_type}

Summary of initial content:
{req.content_summary}

Do not hallucinate plot points. Base your introduction strictly on the provided summary and the known context of the book. Focus on the themes, characters introduced, and why this work matters.
Format: Return only the text of the introduction.
"""
    system_instruction = "You are an expert literature teacher helping students with learning differences feel excited about reading."
    
    # Primary: Gemini
    if gemini_service.is_available():
        intro = gemini_service.generate(prompt, system_instruction)
        if intro:
            return {"introduction": intro, "tier": "gemini"}

    # Fallback: Static template
    return {
        "introduction": f"Welcome to {req.title} by {req.author}. This {req.doc_type} is a classic work that explores important themes and characters.",
        "tier": "template"
    }


# ── Quiz Generation ─────────────────────────────────────────────────────────

class QuizGenerateRequest(BaseModel):
    content: str
    doc_type: str = "generic"
    count: int = 5
    language: str = "en"


@app.post("/quiz/generate", tags=["quiz"])
async def quiz_generate(req: QuizGenerateRequest):
    """On-demand quiz generation for specific content."""
    questions = await asyncio.to_thread(
        _quiz_generator.generate,
        req.content,
        req.doc_type,
        req.count,
        req.language,
    )
    return {"questions": questions}


# ── Character Intelligence ────────────────────────────────────────────────────

class CharacterDescribeRequest(BaseModel):
    character: str
    context: str
    max_context_chars: Optional[int] = 4000

class CharacterNERRequest(BaseModel):
    text: str

@app.post("/characters/describe", tags=["characters"])
async def character_describe(req: CharacterDescribeRequest):
    """Describe a character based on the text read so far (DeBERTa Q&A, no spoilers)."""
    from services.character_service import get_character_service
    svc = get_character_service()
    result = await asyncio.to_thread(
        svc.describe_character, req.character, req.context, req.max_context_chars
    )
    return result

@app.post("/characters/extract-names", tags=["characters"])
async def character_extract_names(req: CharacterNERRequest):
    """Extract PERSON entity names from text using BERT NER."""
    from services.character_service import get_character_service
    svc = get_character_service()
    names = await asyncio.to_thread(svc.extract_person_names, req.text)
    return {"names": names, "count": len(names)}


# ── Comprehension Tracking ───────────────────────────────────────────────────

@app.post("/comprehension/record", tags=["comprehension"])
async def record_section_read(req: ComprehensionRecordRequest):
    """Record that a student read a section."""
    comprehension_tracker.record_section_read(
        req.student_id, req.book_id, req.section_id,
        req.section_title, req.chapter_title,
        req.time_spent_s, req.quiz_score, req.characters_seen,
        req.section_text,
    )
    return {"status": "recorded"}


@app.post("/comprehension/highlight", tags=["comprehension"])
async def record_highlight(req: HighlightRecordRequest):
    """Record a text highlight."""
    comprehension_tracker.record_highlight(
        req.student_id, req.book_id,
        req.highlighted_text, req.section_id,
    )
    return {"status": "recorded"}


@app.post("/comprehension/vocab", tags=["comprehension"])
async def record_vocab_lookup(req: VocabRecordRequest):
    """Record a vocabulary word lookup."""
    comprehension_tracker.record_vocab_lookup(
        req.student_id, req.book_id, req.word,
    )
    return {"status": "recorded"}


@app.post("/comprehension/vocab-mastered", tags=["comprehension"])
async def record_vocab_mastered(req: VocabRecordRequest):
    """Record that a student has mastered a vocabulary word."""
    comprehension_tracker.record_vocab_mastered(
        req.student_id, req.book_id, req.word,
    )
    return {"status": "recorded"}


class VocabExplainRequest(BaseModel):
    word: str
    context: str = ""
    language: str = "en"


@app.post("/vocab/explain", tags=["vocabulary"])
async def explain_vocab(req: VocabExplainRequest):
    """Generate a child-friendly explanation for a word in context."""
    prompt = f"""Explain the word "{req.word}" in this context: "{req.context}"
Provide:
1. modern_meaning: a very simple definition for a 10-year old.
2. analogy: a simple comparison helpful for a child.
3. category: a short label for the word type (e.g. Vocabulary, Archaic, Idiom).

Respond in JSON with keys: "modern_meaning", "analogy", "category".
"""
    system_instruction = "You are a kind teacher explaining difficult words to children with dyslexia and ADHD. Keep it simple, visual, and encouraging."
    
    # Primary: Gemini
    if gemini_service.is_available():
        res = gemini_service.generate_json(prompt, system_instruction)
        if res: return res

    return {
        "modern_meaning": f"A word used to describe something in the story.",
        "analogy": "Like a puzzle piece that fits in this sentence.",
        "category": "Vocabulary"
    }


# ── Word Phonics & Pronunciation (Project Revamp) ───────────────────────────

class PhonicsRequest(BaseModel):
    word: str

@app.post("/word/phonics", tags=["vocabulary"])
async def get_word_phonics(req: PhonicsRequest):
    """Get syllable breakdown and phonics for a word."""
    return pronunciation_svc.get_phonics_breakdown(req.word)

@app.post("/word/pronunciation-guide", tags=["vocabulary"])
async def get_word_pronunciation_guide(req: PhonicsRequest):
    """
    Get a Google-style pronunciation guide.
    Uses HF Inference (Mistral) if available for better phonetics.
    """
    if os.getenv("USE_HF_INFERENCE") == "1" and hf_inference.api_token:
        # We could use Mistral here to get even better phonics, 
        # but for now, use the dedicated service.
        pass
    
    return pronunciation_svc.get_phonics_breakdown(req.word)


@app.get("/comprehension/summary", tags=["comprehension"])
async def get_comprehension_summary(student_id: str, book_id: str):
    """Get comprehension summary for a student + book."""
    return comprehension_tracker.get_summary(student_id, book_id)


@app.get("/comprehension/recap", tags=["comprehension"])
async def get_comprehension_recap(student_id: str, book_id: str):
    """Get 'Story So Far' recap data."""
    return comprehension_tracker.get_recap(student_id, book_id)


# ── Learner Embedding ────────────────────────────────────────────────────────

@app.post("/learner/update", tags=["learner"])
async def update_learner_embedding(req: SessionUpdateRequest):
    """Update learner embedding from a completed reading session."""
    metrics = SessionMetrics(
        session_duration_s=req.session_duration_s,
        words_read=req.words_read,
        reading_speed_wpm=req.reading_speed_wpm,
        backtrack_count=req.backtrack_count,
        scroll_events=req.scroll_events,
        attention_lapses=req.attention_lapses,
        highlights_made=req.highlights_made,
        vocab_lookups=req.vocab_lookups,
        time_of_day_hour=req.time_of_day_hour,
        disability_type=req.disability_type,
        doc_type=req.doc_type,
        adaptations_applied=req.adaptations_applied,
        adaptation_accepted=req.adaptation_accepted,
        quiz_score=req.quiz_score,
        avg_dwell_time_ms=req.avg_dwell_time_ms,
        session_fatigue=req.session_fatigue,
    )
    vec = learner_embedding.update_from_session(req.student_id, metrics)
    return {
        "status": "updated",
        "embedding_dim": len(vec),
        "session_count": learner_embedding._session_counts.get(req.student_id, 0),
    }


@app.get("/learner/profile", tags=["learner"])
async def get_learner_profile(student_id: str):
    """Get human-readable learner profile summary."""
    return learner_embedding.get_profile_summary(student_id)


@app.get("/learner/reading-level", tags=["learner"])
async def get_reading_level(student_id: str):
    """Get student's reading level from embedding."""
    level = learner_embedding.get_reading_level(student_id)
    return {"student_id": student_id, "reading_level": level}


class HighlightFeedbackRequest(BaseModel):
    student_id: str
    category: str  # figurative_language | archaic_idiom | cultural_reference | vocabulary_gap | general
    highlighted_text: str = ""
    difficulty_estimate: float = 0.5  # 0-1 how hard the passage seemed


@app.post("/learner/highlight-feedback", tags=["learner"])
async def learner_highlight_feedback(req: HighlightFeedbackRequest):
    """
    Apply targeted EMA update to learner embedding based on highlight category.

    Category → embedding dimension mapping:
      figurative_language → vec[95] (literary device recognition = adaptation slot 7)
      archaic_idiom       → vec[65] (frustration proxy) + vec[117] (vocab lookup freq)
      cultural_reference  → vec[67] (help-seeking behaviour)
      vocabulary_gap      → vec[117] (vocab lookup freq) + vec[65] (mild frustration)
      general             → vec[116] (highlight frequency)
    """
    import numpy as np
    ALPHA = 0.2  # stronger signal than session-end EMA (α=0.15) for real-time feedback
    vec = learner_embedding.get_or_create(req.student_id)
    diff = req.difficulty_estimate

    if req.category == "figurative_language":
        # Student struggled with a literary device — update recognition signal
        vec[95] = (1 - ALPHA) * vec[95] + ALPHA * diff
        vec[67] = (1 - ALPHA) * vec[67] + ALPHA * 0.7  # boost help-seeking
    elif req.category == "archaic_idiom":
        # Archaic language = frustration + high vocab lookup
        vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff          # frustration
        vec[117] = (1 - ALPHA) * vec[117] + ALPHA * 0.8         # vocab lookup freq
    elif req.category == "cultural_reference":
        # Cultural gap — strengthen help-seeking signal
        vec[67] = (1 - ALPHA) * vec[67] + ALPHA * 0.75
        vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff * 0.5    # mild frustration
    elif req.category == "vocabulary_gap":
        # Pure vocabulary difficulty
        vec[117] = (1 - ALPHA) * vec[117] + ALPHA * 0.7
        vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff * 0.4
    else:
        # General highlight — just update highlight frequency
        vec[116] = (1 - ALPHA) * vec[116] + ALPHA * 0.6

    # Clip to [0, 1]
    vec = np.clip(vec, 0.0, 1.0)
    learner_embedding._cache[req.student_id] = vec
    learner_embedding._save(req.student_id, vec)

    return {
        "student_id": req.student_id,
        "category": req.category,
        "embedding_updated": True,
    }


# ── Teacher Intelligence ─────────────────────────────────────────────────────

@app.post("/teacher/student-summary", tags=["teacher"])
async def teacher_student_summary(req: StudentSummaryRequest):
    """Generate natural language summary for a student."""
    comp_data = comprehension_tracker.get_summary(req.student_id, req.book_id)
    profile = learner_embedding.get_profile_summary(req.student_id)
    return teacher_intelligence.student_summary(
        req.student_name, comp_data, profile, req.class_average_chapter,
    )


@app.post("/teacher/class-alerts", tags=["teacher"])
async def teacher_class_alerts(req: ClassAlertsRequest):
    """Generate class-wide pattern alerts."""
    alerts = teacher_intelligence.class_alerts(
        req.student_summaries, req.book_title,
    )
    return {"alerts": alerts}


@app.post("/teacher/common-highlights", tags=["teacher"])
async def teacher_common_highlights(req: CommonHighlightRequest):
    """
    Detect passages highlighted by multiple students (D6 deliverable).
    Alerts teacher when ≥ min_students highlight the same passage.
    """
    alerts = teacher_intelligence.common_highlight_alerts(
        req.highlights, req.book_title, req.min_students,
    )
    return {"alerts": alerts}


@app.post("/teacher/recap", tags=["teacher"])
async def teacher_generate_recap(req: RecapTextRequest):
    """Generate 'Story So Far' recap text."""
    recap_data = comprehension_tracker.get_recap(req.student_id, req.book_id)
    recap_text = teacher_intelligence.generate_recap_text(recap_data, req.language)
    return {"recap": recap_text, "data": recap_data}


# ── Teacher Recommendations (D6: Actionable insights) ──────────────────────────

class StudentRecommendationRequest(BaseModel):
    student_id: str
    student_name: str
    student_profile: Dict[str, Any]
    recent_sessions: List[Dict[str, Any]]


class ClassRecommendationRequest(BaseModel):
    class_id: str
    students_profiles: List[Dict[str, Any]]
    current_book: Optional[Dict[str, Any]] = None


class RiskAlertRequest(BaseModel):
    student_id: str
    student_name: str
    student_profile: Dict[str, Any]
    recent_sessions: List[Dict[str, Any]]
    alert_threshold: float = 0.3


@app.post("/teacher/recommendations/student", tags=["recommendations"])
async def get_student_recommendations(req: StudentRecommendationRequest):
    """
    Generate 1–3 actionable recommendations for an individual student (D6).
    
    Example use:
    - Student has low attention score → recommend scheduling sessions in morning
    - Student shows high frustration → suggest simplified text or smaller chunks
    - Student frequently backtracks → recommend pre-reading vocabulary
    """
    engine = get_recommendation_engine()
    recommendations = engine.recommend_for_student(
        req.student_id,
        req.student_name,
        req.student_profile,
        req.recent_sessions,
    )
    return {
        "student_id": req.student_id,
        "recommendations": [
            {
                "priority": r.priority,
                "action": r.action,
                "rationale": r.rationale,
                "expected_impact": r.expected_impact,
            }
            for r in recommendations
        ],
    }


@app.post("/teacher/recommendations/class", tags=["recommendations"])
async def get_class_recommendations(req: ClassRecommendationRequest):
    """
    Analyze class-wide patterns and generate cohort recommendations (D6).
    
    Detects patterns like:
    - Widespread attention drift → introduce 10-min chunks with breathing breaks
    - High vocabulary lookup rates → pre-teach key words
    - Subgroup disengagement → check-in or switch book genre
    """
    engine = get_recommendation_engine()
    recommendations = engine.recommend_for_class(
        req.class_id,
        req.students_profiles,
        req.current_book,
    )
    return {
        "class_id": req.class_id,
        "recommendations": [
            {
                "pattern": r.pattern,
                "affected_students": r.affected_students,
                "intervention": r.intervention,
                "resource": r.resource,
                "timeline": r.timeline,
            }
            for r in recommendations
        ],
    }


@app.post("/teacher/alerts/risk", tags=["recommendations"])
async def get_risk_alert(req: RiskAlertRequest):
    """
    Identify students at risk of disengagement or learning loss (D6).
    
    Returns high-priority alert if student shows:
    - Critically low attention (< 0.3)
    - High frustration signals
    - Few sessions completed (early dropout indicators)
    - Declining comprehension scores
    """
    engine = get_recommendation_engine()
    alert = engine.generate_risk_alert(
        req.student_id,
        req.student_name,
        req.student_profile,
        req.recent_sessions,
        req.alert_threshold,
    )
    return {"alert": alert} if alert else {"alert": None, "status": "low_risk"}


# ── Poem Analysis (Phase 2 + Phase 7) ────────────────────────────────────────

class PoemAnalyzeRequest(BaseModel):
    text: str
    language: str = "en"


@app.post("/poem/analyze", tags=["poem"])
async def poem_analyze(req: PoemAnalyzeRequest):
    """
    Analyse a poem: split into stanzas, detect emotion + rhyme scheme per stanza.

    Returns:
      {
        "stanzas": [
          {
            "stanza_index": int,
            "lines": [str],
            "emotion": str,
            "intensity": float,
            "rhyme_scheme": str,
            "end_words": [str],
            "color_tint": str
          }
        ],
        "dominant_emotion": str,
        "rhyme_pattern": str  # overall pattern
      }
    """
    from ml_pipeline.emotion_analyzer import get_emotion_analyzer
    analyzer = get_emotion_analyzer()
    stanzas = await asyncio.to_thread(
        analyzer.analyze_poem_stanzas, req.text, req.language
    )
    if not stanzas:
        return {"stanzas": [], "dominant_emotion": "neutral", "rhyme_pattern": "free verse"}

    # Dominant emotion: most frequent non-neutral emotion
    from collections import Counter as _Counter
    emotion_counts = _Counter(s["emotion"] for s in stanzas if s["emotion"] != "neutral")
    dominant = emotion_counts.most_common(1)[0][0] if emotion_counts else "neutral"

    # Overall rhyme pattern: concatenate first stanza's scheme
    rhyme_pattern = stanzas[0]["rhyme_scheme"] if stanzas else "free verse"

    return {
        "stanzas": stanzas,
        "dominant_emotion": dominant,
        "rhyme_pattern": rhyme_pattern,
    }


# ── Helper Endpoints ─────────────────────────────────────────────────────────

@app.post("/adapt-text")
async def adapt_text(req: Dict[str, Any]):
    """
    Batch adaptation endpoint used by background workers (D2).
    """
    text = req.get("text", "")
    doc_type = req.get("doc_type", "generic")
    
    if not text:
        return {"adaptedText": "", "strategy": "empty"}

    result = await asyncio.to_thread(
        simplification_svc.simplify,
        text,
        "",   # book_title
        "",   # author
        doc_type,
        "",   # chapter_context
        "",   # speaker
        "intermediate",
        "en",
    )
    return {
        "adaptedText": result.get("simple_version", text),
        "strategy": result.get("tier", "rule_based"),
    }

@app.post("/rl/predict", tags=["rl"])
async def rl_predict(req: RLPredictRequest):
    """Get pedagogical action recommendation from RL agent."""
    action_id, action_label = rl_agent.predict_from_state_vector(
        req.state_vector, req.content_type
    )
    return {
        "action_id": action_id,
        "action_label": action_label,
        "fallback": not rl_agent.model_ready
    }

@app.post("/tts/generate")
async def generate_tts(request: Any):
    return await tts_service.generate_with_timestamps(text=request.text)

@app.post("/session/start")
async def start_session(request: Any):
    return {"session_id": "mock_session"}

@app.post("/session/telemetry")
async def push_telemetry(request: Any):
    return {"status": "ok"}

@app.post("/session/end")
async def end_session(request: Any):
    return {"status": "finished"}


@app.post("/teacher/insights", tags=["teacher"])
async def teacher_insights(req: Dict[str, Any]):
    """Generate NL insights using Gemini based on analytics data."""
    import json
    analytics_data = req.get("analytics_data", {})
    if not analytics_data:
        return {"insights": "No data available to generate insights."}
        
    if not gemini_service.is_available():
        return {"insights": "Gemini AI service is not configured. Please check API keys."}
        
    prompt = f"""You are an expert special education teacher. Analyze the following student platform data and provide actionable teaching insights, specifically focusing on ADHD and Dyslexia.
    
Data: {json.dumps(analytics_data)}

Format your response as a professional, encouraging report. Include:
- A brief summary of overall class engagement.
- Specific insights regarding students with ADHD/Dyslexia.
- Actionable recommendations based on the struggle zones and quiz scores.
Keep it strictly under 250 words."""

    try:
        import asyncio
        insights = await asyncio.to_thread(gemini_service.generate, prompt)
        return {"insights": insights}
    except Exception as e:
        print(f"Error generating insights: {e}")
        return {"insights": "Error generating insights."}


# ── STT Reading Assessment ──────────────────────────────────────────────────

class STTAssessmentRequest(BaseModel):
    expected_text: str
    spoken_text: str
    duration_seconds: float = 0
    student_id: Optional[str] = None
    book_id: Optional[str] = None


@app.post("/stt/assess", tags=["stt"])
async def assess_reading(req: STTAssessmentRequest):
    """
    Assess a student's reading by comparing expected text with spoken text.
    Returns accuracy, WPM, missed words, mispronunciations, and feedback.
    """
    result = await asyncio.to_thread(
        stt_assessment.assess_reading,
        req.expected_text,
        req.spoken_text,
        req.duration_seconds,
    )

    # Update comprehension tracker if student context provided
    if req.student_id and req.book_id:
        comprehension_tracker.record_section_read(
            req.student_id, req.book_id, "stt_assessment",
            "Reading Assessment", "",
            req.duration_seconds, result.get("accuracy", 0) / 100,
        )

    return result


# ── Word Difficulty & Pronunciation ─────────────────────────────────────────

class WordDifficultyRequest(BaseModel):
    text: str
    difficulty_threshold: float = 0.5


@app.post("/vocab/difficulty", tags=["vocabulary"])
async def analyze_word_difficulty(req: WordDifficultyRequest):
    """
    Analyze a passage and return difficult words with pronunciation guides.
    """
    words = await asyncio.to_thread(
        word_difficulty.analyze_passage,
        req.text,
        req.difficulty_threshold,
    )
    return {"difficult_words": words, "count": len(words)}


class PronunciationRequest(BaseModel):
    word: str


@app.post("/vocab/pronunciation", tags=["vocabulary"])
async def get_pronunciation(req: PronunciationRequest):
    """Get phonetic pronunciation guide for a single word."""
    pronunciation = word_difficulty.generate_pronunciation(req.word)
    syllables = word_difficulty.count_syllables(req.word)
    difficulty = word_difficulty.estimate_difficulty(req.word)
    return {
        "word": req.word,
        "pronunciation": pronunciation,
        "syllables": syllables,
        "difficulty": round(difficulty, 3),
    }


# ── NER / Character Graph ─────────────────────────────────────────────────────

class NERExtractRequest(BaseModel):
    sections:              List[str]           # list of chapter/section texts
    title:                 Optional[str] = ""
    existing_characters:   Optional[List[str]] = None


class NERSectionViewRequest(BaseModel):
    characters:    List[Any]
    relationships: List[Any]
    locations:     List[Any]
    up_to_section: int


@app.post("/ner/extract", tags=["characters"])
async def ner_extract(req: NERExtractRequest):
    """
    Extract a full character + location graph from all book sections.
    Called once at upload time; result stored in Literature.bookBrain.

    Returns full graph: characters (with importance, first_seen_index,
    relationships), relationships (flat list), locations.
    """
    graph = await asyncio.to_thread(
        ner_extractor.extract,
        req.sections,
        req.title or "",
        req.existing_characters,
    )
    return graph


@app.post("/ner/section-view", tags=["characters"])
async def ner_section_view(req: NERSectionViewRequest):
    """
    Return a spoiler-safe graph filtered to characters seen up to
    the current section index. Called by CharacterMapPanel.tsx.
    """
    full_graph = {
        "characters":    req.characters,
        "relationships": req.relationships,
        "locations":     req.locations,
    }
    return ner_extractor.extract_for_section(full_graph, req.up_to_section)


# ── Vocabulary Batch Analysis ─────────────────────────────────────────────────

class VocabBatchRequest(BaseModel):
    sections:        List[str]           # chapter texts
    section_titles:  Optional[List[str]] = None


class VocabSectionRequest(BaseModel):
    section_text:    str
    all_words:       List[Any]           # previously computed word list
    chapter_index:   int = 0
    max_display:     int = 20


class VocabIdentifyRequest(BaseModel):
    text:            str
    language:        str = "en"
    max_words:       int = 8


@app.post("/vocab/batch-analyze", tags=["vocabulary"])
async def vocab_batch_analyze(req: VocabBatchRequest):
    """
    Pre-compute vocabulary difficulty + explanations for a whole book.
    Stored in Literature.bookBrain.vocabulary at upload time.
    Returns a flat list of word dicts (see vocab_analyzer.py schema).
    """
    # Analyzer already initialized with gemini_service in v4.0 main.py
    words = await asyncio.to_thread(
        vocab_analyzer.analyze_book,
        req.sections,
        req.section_titles,
    )
    return {"vocabulary": words, "count": len(words)}


@app.post("/vocab/section-words", tags=["vocabulary"])
async def vocab_section_words(req: VocabSectionRequest):
    """
    Filter the book vocabulary to words present in the current section.
    Used by VocabSidebar.tsx on chapter navigation.
    """
    from ml_pipeline.vocab_analyzer import VocabAnalyzer
    words = VocabAnalyzer.words_for_section(
        req.all_words,
        req.section_text,
        req.chapter_index,
        req.max_display,
    )
    return {"words": words, "count": len(words)}


@app.post("/vocab/identify", tags=["vocabulary"])
async def vocab_identify(req: VocabIdentifyRequest):
    """
    Identify hard words in a text snippet and provide child-friendly 
    definitions + analogies (proactive mode).
    """
    from ml_pipeline.vocab_analyzer import get_vocab_analyzer
    
    # Use analyzer with Gemini if possible
    analyzer = get_vocab_analyzer(gemini_service=gemini_service if gemini_service.is_available() else None)
    
    # 1. Identify hard words using WordDifficultyService (wrapped in analyzer)
    # We use a slightly lower threshold for proactive suggestions to catch more "stretch" words
    words = await asyncio.to_thread(
        analyzer._difficulty_svc.analyze_passage,
        req.text,
        difficulty_threshold=0.45
    )
    
    # 2. Slice to requested limit
    words = words[:req.max_words]
    
    # 3. Enrich with Gemini analogies for each
    results = []
    for w in words:
        enriched = await asyncio.to_thread(
            analyzer.explain_word,
            w["word"],
            w["context"],
            0 # chapter index placeholder
        )
        results.append(enriched)

    return {"words": results, "count": len(results)}


# ── TTS with word-level sync ──────────────────────────────────────────────────

class TTSSynthesizeRequest(BaseModel):
    text:            str
    disability_type: Optional[str] = "none"   # none | dyslexia | adhd | both
    language:        Optional[str] = "english"
    voice_override:  Optional[str] = None
    rate_override:   Optional[str] = None


@app.post("/tts/synthesize", tags=["tts"])
async def tts_synthesize(req: TTSSynthesizeRequest):
    """
    Synthesize text to speech with word-level timestamps.

    Automatically selects disability-appropriate voice and speaking rate.
    Long texts are chunked at sentence boundaries; timestamps are
    offset-adjusted across chunks so they represent absolute positions.

    Returns: audio_base64 (MP3), timestamps [{word, start_ms, end_ms}],
             duration_ms, voice, rate, word_count.
    """
    result = await tts_svc.synthesize(
        text            = req.text,
        disability_type = req.disability_type or "none",
        language        = req.language or "english",
        voice_override  = req.voice_override,
        rate_override   = req.rate_override,
    )
    return result


# ── Adaptive Quiz Difficulty ──────────────────────────────────────────────────

class QuizAttemptRequest(BaseModel):
    student_id:      str
    literature_id:   str
    chapter_index:   int = 0
    score:           float                    # fraction correct 0.0–1.0
    difficulty:      Optional[str] = "medium" # easy | medium | hard
    disability_type: Optional[str] = "none"


class QuizDifficultyQuery(BaseModel):
    student_id:      str
    literature_id:   str
    disability_type: Optional[str] = "none"


@app.post("/quiz/record-attempt", tags=["quiz"])
async def quiz_record_attempt(req: QuizAttemptRequest):
    """
    Record a completed quiz attempt and update the student's IRT ability model.

    Returns:
      - next_difficulty: recommended difficulty for the next quiz
      - theta: updated ability estimate (–2.5 to +2.5)
      - performance_message: encouraging feedback for the student
      - recommendation: teaching insight for the teacher
    """
    result = difficulty_adapter.record_attempt(
        student_id      = req.student_id,
        literature_id   = req.literature_id,
        chapter_index   = req.chapter_index,
        score           = max(0.0, min(1.0, req.score)),
        difficulty      = req.difficulty or "medium",
        disability_type = req.disability_type or "none",
    )
    return result


@app.post("/quiz/recommend-difficulty", tags=["quiz"])
async def quiz_recommend_difficulty(req: QuizDifficultyQuery):
    """
    Get the recommended quiz difficulty for a student's next quiz.
    Call this before generating questions to get the right difficulty level.
    Returns: { "difficulty": "easy" | "medium" | "hard" }
    """
    diff = difficulty_adapter.recommend_difficulty(
        student_id      = req.student_id,
        literature_id   = req.literature_id,
        disability_type = req.disability_type or "none",
    )
    return {"difficulty": diff}


@app.get("/quiz/student-state", tags=["quiz"])
async def quiz_student_state(student_id: str, literature_id: str):
    """
    Get the full adaptive difficulty state for a student + book.
    Used by the teacher dashboard to see progress history and ability level.
    Returns null if no attempts recorded yet.
    """
    state = difficulty_adapter.get_state(student_id, literature_id)
    return state or {"message": "No quiz attempts recorded yet."}


# ── System ───────────────────────────────────────────────────────────────────

@app.get("/health", tags=["system"])
async def health():
    return {
        "status":         "healthy",
        "version":        "3.1.0",
        "rl_model_ready": rl_agent.model_ready,
        "features": [
            "highlight_to_understand",
            "book_brain_preanalysis",
            "learner_embedding_128dim",
            "comprehension_graph",
            "teacher_intelligence",
            "teacher_recommendations",
            "story_recaps",
            "poem_mode",
            "adhd_chunking",
            "dyslexia_rendering",
            "focus_sounds",
            "gemini_acceleration",
            "stt_reading_assessment",
            "word_difficulty_analysis",
            "pronunciation_guides",
            "vocabulary_mastery_tracking",
            # v4.0
            "ner_character_graph",
            "vocab_batch_analysis",
            "tts_word_sync",
            "adaptive_quiz_difficulty_irt",
        ],
        "timestamp":      time.time(),
    }


# ── Startup: Model Size Check (D7 compliance) ────────────────────────────────

@app.on_event("startup")
async def check_model_sizes():
    """
    Verify that loaded model files fit within the 500 MB offline spec (D7).
    This is a non-blocking advisory check — the service starts regardless.
    """
    MODEL_SIZE_LIMIT_MB = 500
    MODEL_EXTENSIONS   = {".bin", ".pt", ".gguf", ".safetensors", ".pkl", ".joblib"}

    # Directories to scan (relative to this file)
    base_dir = os.path.dirname(os.path.abspath(__file__))
    scan_dirs = [
        os.path.join(base_dir, "models"),
        os.path.join(base_dir, "services"),
        os.path.join(base_dir, "..", "rl-engine"),
    ]

    total_bytes = 0
    large_files: list[tuple[str, float]] = []

    for scan_dir in scan_dirs:
        if not os.path.isdir(scan_dir):
            continue
        for root, _, files in os.walk(scan_dir):
            for fname in files:
                if any(fname.endswith(ext) for ext in MODEL_EXTENSIONS):
                    fpath = os.path.join(root, fname)
                    try:
                        size_bytes = os.path.getsize(fpath)
                        total_bytes += size_bytes
                        size_mb = size_bytes / (1024 * 1024)
                        if size_mb > 50:
                            large_files.append((fpath, size_mb))
                    except OSError:
                        pass

    total_mb = total_bytes / (1024 * 1024)
    if large_files:
        large_files.sort(key=lambda x: x[1], reverse=True)

    if total_mb > MODEL_SIZE_LIMIT_MB:
        import logging
        logging.getLogger("included.startup").warning(
            "[MODEL SIZE] ⚠️  Total model files = %.1f MB — EXCEEDS 500 MB offline spec (D7)! "
            "Consider INT4 quantization. Largest files: %s",
            total_mb,
            ", ".join(f"{os.path.basename(f)} ({s:.0f} MB)" for f, s in large_files[:3]),
        )
    else:
        import logging
        logging.getLogger("included.startup").info(
            "[MODEL SIZE] ✅ Total model files = %.1f MB — within 500 MB offline spec (D7).",
            total_mb,
        )


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8082)