IncludEd-AI / main.py
nkubana0's picture
initial: IncludEd AI service
162cb6f
"""
IncludEd AI Service – FastAPI Application v4.0
===============================================
v3.0 features retained +
New in v4.0 (ML Components for Dyslexia Support):
β€’ POST /ner/extract β€” Full NER character + location graph for a book
β€’ GET /ner/section-view β€” Spoiler-safe graph up to current section
β€’ POST /vocab/batch-analyze β€” Pre-compute vocabulary for a full book at upload
β€’ POST /tts/synthesize β€” TTS with word-level timestamps (disability-aware)
β€’ POST /quiz/record-attempt β€” Record quiz result β†’ update IRT difficulty model
β€’ GET /quiz/recommend-difficulty β€” Get recommended difficulty for next quiz
β€’ GET /quiz/student-state β€” Full adaptive state for teacher dashboard
"""
from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
import os
from dotenv import load_dotenv
# Load environment variables IMMEDIATELY
load_dotenv(os.path.join(os.path.dirname(__file__), '.env'))
import asyncio
import time
from services.tts_service import TTSService
from services.accessibility_adapter import FreeAccessibilityAdapter
from services.rl_agent_service import RLAgentService
from services.smart_question_generator import SmartQuestionGenerator
from services.gemini_service import GeminiService
from services.simplification_service import SimplificationService
from services.learner_embedding import LearnerEmbedding, SessionMetrics
from services.comprehension_tracker import ComprehensionTracker
from services.teacher_intelligence import TeacherIntelligence
from services.teacher_recommendations import get_recommendation_engine, StudentRecommendation
from services.stt_service import STTAssessmentService
from services.word_difficulty_service import WordDifficultyService
from ml_pipeline import LiteratureAnalyzer, BookBrain
from ml_pipeline.quiz_generator import PedagogicalQuestionGenerator
from ml_pipeline.ner_extractor import get_ner_extractor
from ml_pipeline.vocab_analyzer import get_vocab_analyzer
from ml_pipeline.difficulty_adapter import get_difficulty_adapter
from services.tts_service import get_tts_service
from services.hf_inference_service import HFInferenceService
from services.pronunciation_service import PronunciationService
app = FastAPI(title="IncludEd AI Service", version="3.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ── Lazy proxy β€” instantiates service on first attribute access ───────────────
class _Lazy:
def __init__(self, factory):
object.__setattr__(self, '_factory', factory)
object.__setattr__(self, '_instance', None)
def _load(self):
if object.__getattribute__(self, '_instance') is None:
factory = object.__getattribute__(self, '_factory')
object.__setattr__(self, '_instance', factory())
return object.__getattribute__(self, '_instance')
def __getattr__(self, name):
return getattr(self._load(), name)
def __call__(self, *args, **kwargs):
return self._load()(*args, **kwargs)
# ML pipeline singletons β€” loaded on first use, not at startup
_literature_analyzer = _Lazy(LiteratureAnalyzer)
_book_brain = _Lazy(BookBrain)
_quiz_generator = _Lazy(PedagogicalQuestionGenerator)
question_gen = _Lazy(SmartQuestionGenerator)
accessibility_adapter= _Lazy(FreeAccessibilityAdapter)
rl_agent = _Lazy(RLAgentService)
tts_service = _Lazy(TTSService)
gemini_service = _Lazy(lambda: GeminiService(os.getenv("GEMINI_API_KEY")))
simplification_svc = _Lazy(SimplificationService)
learner_embedding = _Lazy(LearnerEmbedding)
comprehension_tracker= _Lazy(ComprehensionTracker)
teacher_intelligence = _Lazy(TeacherIntelligence)
stt_assessment = _Lazy(STTAssessmentService)
word_difficulty = _Lazy(WordDifficultyService)
ner_extractor = _Lazy(get_ner_extractor)
vocab_analyzer = _Lazy(lambda: get_vocab_analyzer(gemini_service=gemini_service))
difficulty_adapter = _Lazy(get_difficulty_adapter)
tts_svc = _Lazy(get_tts_service)
hf_inference = _Lazy(lambda: HFInferenceService(os.getenv("HF_API_TOKEN")))
pronunciation_svc = _Lazy(PronunciationService)
# ── Request/Response Models ──────────────────────────────────────────────────
class AnalyzeTextRequest(BaseModel):
text: str
filename: Optional[str] = "document.txt"
generate_questions: bool = True
question_count: int = 5
class AnalyzeResponse(BaseModel):
document_type: str
title: str
author: Optional[str] = None
confidence: float
units: List[Dict[str, Any]]
flat_units: List[Dict[str, Any]]
questions: List[Dict[str, Any]]
metadata: Dict[str, Any]
book_brain: Optional[Dict[str, Any]] = None
class SimplifyRequest(BaseModel):
highlighted_text: str
book_title: str = ""
author: str = ""
doc_type: str = "generic"
chapter_context: str = ""
speaker: str = ""
reading_level: str = "intermediate"
language: str = "en"
student_id: Optional[str] = None
book_id: Optional[str] = None
class ComprehensionRecordRequest(BaseModel):
student_id: str
book_id: str
section_id: str
section_title: str = ""
chapter_title: str = ""
time_spent_s: float = 0
quiz_score: Optional[float] = None
characters_seen: Optional[List[str]] = None
section_text: str = "" # raw section content for theme extraction (D4)
class HighlightRecordRequest(BaseModel):
student_id: str
book_id: str
highlighted_text: str
section_id: str
class VocabRecordRequest(BaseModel):
student_id: str
book_id: str
word: str
class SessionUpdateRequest(BaseModel):
student_id: str
session_duration_s: float = 0
words_read: int = 0
reading_speed_wpm: float = 0
backtrack_count: int = 0
scroll_events: int = 0
attention_lapses: int = 0
highlights_made: int = 0
vocab_lookups: int = 0
time_of_day_hour: int = 12
disability_type: float = 0
doc_type: str = "generic"
adaptations_applied: List[int] = []
adaptation_accepted: List[bool] = []
quiz_score: Optional[float] = None
avg_dwell_time_ms: float = 0
session_fatigue: float = 0
class RLPredictRequest(BaseModel):
state_vector: List[float]
content_type: float = 0.5 # 0.0=generic, 0.5=novel, 1.0=play
class StudentSummaryRequest(BaseModel):
student_name: str
student_id: str
book_id: str
class_average_chapter: int = 0
class ClassAlertsRequest(BaseModel):
student_summaries: List[Dict[str, Any]]
book_title: str = ""
class RecapTextRequest(BaseModel):
student_id: str
book_id: str
language: str = "en"
class CommonHighlightRequest(BaseModel):
highlights: List[Dict[str, Any]] # [{student_name, text, section_id, timestamp}]
book_title: str = ""
min_students: int = 2
# ── Core Literature Analysis Endpoints ───────────────────────────────────────
@app.get("/")
async def root():
return {"status": "healthy", "service": "IncludEd AI Service", "version": "3.0.0"}
@app.post("/analyze", response_model=AnalyzeResponse, tags=["literature"])
async def analyze_pdf(
file: UploadFile = File(...),
generate_questions: bool = True,
question_count: int = 5,
):
pdf_bytes = await file.read()
result = await asyncio.to_thread(
_literature_analyzer.analyze,
pdf_bytes,
file.filename,
generate_questions,
question_count,
)
# Run Book Brain pre-analysis
brain_result = await asyncio.to_thread(
_book_brain.analyze,
result.units,
result.document_type,
result.language,
result.title,
result.author or "",
)
return AnalyzeResponse(
document_type = result.document_type,
title = result.title,
author = result.author,
confidence = result.confidence,
units = result.units,
flat_units = result.flat_units,
questions = result.questions,
metadata = result.metadata,
book_brain = {
"difficulty_map": brain_result.difficulty_map,
"vocabulary": brain_result.vocabulary,
"characters": brain_result.characters,
"summary_stats": brain_result.summary_stats,
"struggle_zones": brain_result.struggle_zones,
"cultural_context_bank": brain_result.cultural_context_bank,
},
)
@app.post("/reanalyze-text", response_model=AnalyzeResponse, tags=["literature"])
async def reanalyze_text(req: AnalyzeTextRequest):
result = await asyncio.to_thread(
_literature_analyzer.analyze_text,
req.text,
req.filename or "legacy_content",
req.generate_questions,
req.question_count,
)
brain_result = await asyncio.to_thread(
_book_brain.analyze,
result.units,
result.document_type,
result.language,
result.title,
result.author or "",
)
return AnalyzeResponse(
document_type = result.document_type,
title = result.title,
author = result.author,
confidence = result.confidence,
units = result.units,
flat_units = result.flat_units,
questions = result.questions,
metadata = result.metadata,
book_brain = {
"difficulty_map": brain_result.difficulty_map,
"vocabulary": brain_result.vocabulary,
"characters": brain_result.characters,
"summary_stats": brain_result.summary_stats,
"struggle_zones": brain_result.struggle_zones,
"cultural_context_bank": brain_result.cultural_context_bank,
},
)
# ── Book Brain Standalone ────────────────────────────────────────────────────
class BookBrainRequest(BaseModel):
units: List[Dict[str, Any]]
doc_type: str = "generic"
language: str = "en"
title: str = ""
author: str = ""
@app.post("/book-brain/analyze", tags=["book-brain"])
async def book_brain_analyze(req: BookBrainRequest):
"""Run Book Brain pre-analysis on already-parsed units."""
result = await asyncio.to_thread(
_book_brain.analyze,
req.units, req.doc_type, req.language, req.title, req.author,
)
return {
"difficulty_map": result.difficulty_map,
"vocabulary": result.vocabulary,
"characters": result.characters,
"summary_stats": result.summary_stats,
"struggle_zones": result.struggle_zones,
"cultural_context_bank": result.cultural_context_bank,
}
# ── Highlight-to-Understand (Simplification) ─────────────────────────────────
@app.post("/simplify", tags=["simplification"])
async def simplify_text(req: SimplifyRequest):
"""
Context-aware simplification for highlighted text.
Returns simplified version, author intent, vocabulary help, literary devices.
"""
# Get student reading level if available
reading_level = req.reading_level
if req.student_id:
reading_level = learner_embedding.get_reading_level(req.student_id)
result = await asyncio.to_thread(
simplification_svc.simplify,
req.highlighted_text,
req.book_title,
req.author,
req.doc_type,
req.chapter_context,
req.speaker,
reading_level,
req.language,
)
# Record highlight in comprehension tracker
if req.student_id and req.book_id:
comprehension_tracker.record_highlight(
req.student_id, req.book_id,
req.highlighted_text, "",
)
return result
# ── Introduction Generation ──────────────────────────────────────────────────
class IntroductionRequest(BaseModel):
title: str
author: str
content_summary: str = ""
doc_type: str = "generic"
language: str = "en"
@app.post("/introduction/generate", tags=["literature"])
async def generate_introduction(req: IntroductionRequest):
"""
Generate an engaging literature introduction (v3.0).
Now uses Gemini as primary for high-quality pedagogical context.
"""
prompt = f"""Generate an engaging, highly accurate, and short pedagogical introduction (2-3 paragraphs) for a student about to read:
Title: {req.title}
Author: {req.author}
Content Genre: {req.doc_type}
Summary of initial content:
{req.content_summary}
Do not hallucinate plot points. Base your introduction strictly on the provided summary and the known context of the book. Focus on the themes, characters introduced, and why this work matters.
Format: Return only the text of the introduction.
"""
system_instruction = "You are an expert literature teacher helping students with learning differences feel excited about reading."
# Primary: Gemini
if gemini_service.is_available():
intro = gemini_service.generate(prompt, system_instruction)
if intro:
return {"introduction": intro, "tier": "gemini"}
# Fallback: Static template
return {
"introduction": f"Welcome to {req.title} by {req.author}. This {req.doc_type} is a classic work that explores important themes and characters.",
"tier": "template"
}
# ── Quiz Generation ─────────────────────────────────────────────────────────
class QuizGenerateRequest(BaseModel):
content: str
doc_type: str = "generic"
count: int = 5
language: str = "en"
@app.post("/quiz/generate", tags=["quiz"])
async def quiz_generate(req: QuizGenerateRequest):
"""On-demand quiz generation for specific content."""
questions = await asyncio.to_thread(
_quiz_generator.generate,
req.content,
req.doc_type,
req.count,
req.language,
)
return {"questions": questions}
# ── Character Intelligence ────────────────────────────────────────────────────
class CharacterDescribeRequest(BaseModel):
character: str
context: str
max_context_chars: Optional[int] = 4000
class CharacterNERRequest(BaseModel):
text: str
@app.post("/characters/describe", tags=["characters"])
async def character_describe(req: CharacterDescribeRequest):
"""Describe a character based on the text read so far (DeBERTa Q&A, no spoilers)."""
from services.character_service import get_character_service
svc = get_character_service()
result = await asyncio.to_thread(
svc.describe_character, req.character, req.context, req.max_context_chars
)
return result
@app.post("/characters/extract-names", tags=["characters"])
async def character_extract_names(req: CharacterNERRequest):
"""Extract PERSON entity names from text using BERT NER."""
from services.character_service import get_character_service
svc = get_character_service()
names = await asyncio.to_thread(svc.extract_person_names, req.text)
return {"names": names, "count": len(names)}
# ── Comprehension Tracking ───────────────────────────────────────────────────
@app.post("/comprehension/record", tags=["comprehension"])
async def record_section_read(req: ComprehensionRecordRequest):
"""Record that a student read a section."""
comprehension_tracker.record_section_read(
req.student_id, req.book_id, req.section_id,
req.section_title, req.chapter_title,
req.time_spent_s, req.quiz_score, req.characters_seen,
req.section_text,
)
return {"status": "recorded"}
@app.post("/comprehension/highlight", tags=["comprehension"])
async def record_highlight(req: HighlightRecordRequest):
"""Record a text highlight."""
comprehension_tracker.record_highlight(
req.student_id, req.book_id,
req.highlighted_text, req.section_id,
)
return {"status": "recorded"}
@app.post("/comprehension/vocab", tags=["comprehension"])
async def record_vocab_lookup(req: VocabRecordRequest):
"""Record a vocabulary word lookup."""
comprehension_tracker.record_vocab_lookup(
req.student_id, req.book_id, req.word,
)
return {"status": "recorded"}
@app.post("/comprehension/vocab-mastered", tags=["comprehension"])
async def record_vocab_mastered(req: VocabRecordRequest):
"""Record that a student has mastered a vocabulary word."""
comprehension_tracker.record_vocab_mastered(
req.student_id, req.book_id, req.word,
)
return {"status": "recorded"}
class VocabExplainRequest(BaseModel):
word: str
context: str = ""
language: str = "en"
@app.post("/vocab/explain", tags=["vocabulary"])
async def explain_vocab(req: VocabExplainRequest):
"""Generate a child-friendly explanation for a word in context."""
prompt = f"""Explain the word "{req.word}" in this context: "{req.context}"
Provide:
1. modern_meaning: a very simple definition for a 10-year old.
2. analogy: a simple comparison helpful for a child.
3. category: a short label for the word type (e.g. Vocabulary, Archaic, Idiom).
Respond in JSON with keys: "modern_meaning", "analogy", "category".
"""
system_instruction = "You are a kind teacher explaining difficult words to children with dyslexia and ADHD. Keep it simple, visual, and encouraging."
# Primary: Gemini
if gemini_service.is_available():
res = gemini_service.generate_json(prompt, system_instruction)
if res: return res
return {
"modern_meaning": f"A word used to describe something in the story.",
"analogy": "Like a puzzle piece that fits in this sentence.",
"category": "Vocabulary"
}
# ── Word Phonics & Pronunciation (Project Revamp) ───────────────────────────
class PhonicsRequest(BaseModel):
word: str
@app.post("/word/phonics", tags=["vocabulary"])
async def get_word_phonics(req: PhonicsRequest):
"""Get syllable breakdown and phonics for a word."""
return pronunciation_svc.get_phonics_breakdown(req.word)
@app.post("/word/pronunciation-guide", tags=["vocabulary"])
async def get_word_pronunciation_guide(req: PhonicsRequest):
"""
Get a Google-style pronunciation guide.
Uses HF Inference (Mistral) if available for better phonetics.
"""
if os.getenv("USE_HF_INFERENCE") == "1" and hf_inference.api_token:
# We could use Mistral here to get even better phonics,
# but for now, use the dedicated service.
pass
return pronunciation_svc.get_phonics_breakdown(req.word)
@app.get("/comprehension/summary", tags=["comprehension"])
async def get_comprehension_summary(student_id: str, book_id: str):
"""Get comprehension summary for a student + book."""
return comprehension_tracker.get_summary(student_id, book_id)
@app.get("/comprehension/recap", tags=["comprehension"])
async def get_comprehension_recap(student_id: str, book_id: str):
"""Get 'Story So Far' recap data."""
return comprehension_tracker.get_recap(student_id, book_id)
# ── Learner Embedding ────────────────────────────────────────────────────────
@app.post("/learner/update", tags=["learner"])
async def update_learner_embedding(req: SessionUpdateRequest):
"""Update learner embedding from a completed reading session."""
metrics = SessionMetrics(
session_duration_s=req.session_duration_s,
words_read=req.words_read,
reading_speed_wpm=req.reading_speed_wpm,
backtrack_count=req.backtrack_count,
scroll_events=req.scroll_events,
attention_lapses=req.attention_lapses,
highlights_made=req.highlights_made,
vocab_lookups=req.vocab_lookups,
time_of_day_hour=req.time_of_day_hour,
disability_type=req.disability_type,
doc_type=req.doc_type,
adaptations_applied=req.adaptations_applied,
adaptation_accepted=req.adaptation_accepted,
quiz_score=req.quiz_score,
avg_dwell_time_ms=req.avg_dwell_time_ms,
session_fatigue=req.session_fatigue,
)
vec = learner_embedding.update_from_session(req.student_id, metrics)
return {
"status": "updated",
"embedding_dim": len(vec),
"session_count": learner_embedding._session_counts.get(req.student_id, 0),
}
@app.get("/learner/profile", tags=["learner"])
async def get_learner_profile(student_id: str):
"""Get human-readable learner profile summary."""
return learner_embedding.get_profile_summary(student_id)
@app.get("/learner/reading-level", tags=["learner"])
async def get_reading_level(student_id: str):
"""Get student's reading level from embedding."""
level = learner_embedding.get_reading_level(student_id)
return {"student_id": student_id, "reading_level": level}
class HighlightFeedbackRequest(BaseModel):
student_id: str
category: str # figurative_language | archaic_idiom | cultural_reference | vocabulary_gap | general
highlighted_text: str = ""
difficulty_estimate: float = 0.5 # 0-1 how hard the passage seemed
@app.post("/learner/highlight-feedback", tags=["learner"])
async def learner_highlight_feedback(req: HighlightFeedbackRequest):
"""
Apply targeted EMA update to learner embedding based on highlight category.
Category β†’ embedding dimension mapping:
figurative_language β†’ vec[95] (literary device recognition = adaptation slot 7)
archaic_idiom β†’ vec[65] (frustration proxy) + vec[117] (vocab lookup freq)
cultural_reference β†’ vec[67] (help-seeking behaviour)
vocabulary_gap β†’ vec[117] (vocab lookup freq) + vec[65] (mild frustration)
general β†’ vec[116] (highlight frequency)
"""
import numpy as np
ALPHA = 0.2 # stronger signal than session-end EMA (Ξ±=0.15) for real-time feedback
vec = learner_embedding.get_or_create(req.student_id)
diff = req.difficulty_estimate
if req.category == "figurative_language":
# Student struggled with a literary device β€” update recognition signal
vec[95] = (1 - ALPHA) * vec[95] + ALPHA * diff
vec[67] = (1 - ALPHA) * vec[67] + ALPHA * 0.7 # boost help-seeking
elif req.category == "archaic_idiom":
# Archaic language = frustration + high vocab lookup
vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff # frustration
vec[117] = (1 - ALPHA) * vec[117] + ALPHA * 0.8 # vocab lookup freq
elif req.category == "cultural_reference":
# Cultural gap β€” strengthen help-seeking signal
vec[67] = (1 - ALPHA) * vec[67] + ALPHA * 0.75
vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff * 0.5 # mild frustration
elif req.category == "vocabulary_gap":
# Pure vocabulary difficulty
vec[117] = (1 - ALPHA) * vec[117] + ALPHA * 0.7
vec[65] = (1 - ALPHA) * vec[65] + ALPHA * diff * 0.4
else:
# General highlight β€” just update highlight frequency
vec[116] = (1 - ALPHA) * vec[116] + ALPHA * 0.6
# Clip to [0, 1]
vec = np.clip(vec, 0.0, 1.0)
learner_embedding._cache[req.student_id] = vec
learner_embedding._save(req.student_id, vec)
return {
"student_id": req.student_id,
"category": req.category,
"embedding_updated": True,
}
# ── Teacher Intelligence ─────────────────────────────────────────────────────
@app.post("/teacher/student-summary", tags=["teacher"])
async def teacher_student_summary(req: StudentSummaryRequest):
"""Generate natural language summary for a student."""
comp_data = comprehension_tracker.get_summary(req.student_id, req.book_id)
profile = learner_embedding.get_profile_summary(req.student_id)
return teacher_intelligence.student_summary(
req.student_name, comp_data, profile, req.class_average_chapter,
)
@app.post("/teacher/class-alerts", tags=["teacher"])
async def teacher_class_alerts(req: ClassAlertsRequest):
"""Generate class-wide pattern alerts."""
alerts = teacher_intelligence.class_alerts(
req.student_summaries, req.book_title,
)
return {"alerts": alerts}
@app.post("/teacher/common-highlights", tags=["teacher"])
async def teacher_common_highlights(req: CommonHighlightRequest):
"""
Detect passages highlighted by multiple students (D6 deliverable).
Alerts teacher when β‰₯ min_students highlight the same passage.
"""
alerts = teacher_intelligence.common_highlight_alerts(
req.highlights, req.book_title, req.min_students,
)
return {"alerts": alerts}
@app.post("/teacher/recap", tags=["teacher"])
async def teacher_generate_recap(req: RecapTextRequest):
"""Generate 'Story So Far' recap text."""
recap_data = comprehension_tracker.get_recap(req.student_id, req.book_id)
recap_text = teacher_intelligence.generate_recap_text(recap_data, req.language)
return {"recap": recap_text, "data": recap_data}
# ── Teacher Recommendations (D6: Actionable insights) ──────────────────────────
class StudentRecommendationRequest(BaseModel):
student_id: str
student_name: str
student_profile: Dict[str, Any]
recent_sessions: List[Dict[str, Any]]
class ClassRecommendationRequest(BaseModel):
class_id: str
students_profiles: List[Dict[str, Any]]
current_book: Optional[Dict[str, Any]] = None
class RiskAlertRequest(BaseModel):
student_id: str
student_name: str
student_profile: Dict[str, Any]
recent_sessions: List[Dict[str, Any]]
alert_threshold: float = 0.3
@app.post("/teacher/recommendations/student", tags=["recommendations"])
async def get_student_recommendations(req: StudentRecommendationRequest):
"""
Generate 1–3 actionable recommendations for an individual student (D6).
Example use:
- Student has low attention score β†’ recommend scheduling sessions in morning
- Student shows high frustration β†’ suggest simplified text or smaller chunks
- Student frequently backtracks β†’ recommend pre-reading vocabulary
"""
engine = get_recommendation_engine()
recommendations = engine.recommend_for_student(
req.student_id,
req.student_name,
req.student_profile,
req.recent_sessions,
)
return {
"student_id": req.student_id,
"recommendations": [
{
"priority": r.priority,
"action": r.action,
"rationale": r.rationale,
"expected_impact": r.expected_impact,
}
for r in recommendations
],
}
@app.post("/teacher/recommendations/class", tags=["recommendations"])
async def get_class_recommendations(req: ClassRecommendationRequest):
"""
Analyze class-wide patterns and generate cohort recommendations (D6).
Detects patterns like:
- Widespread attention drift β†’ introduce 10-min chunks with breathing breaks
- High vocabulary lookup rates β†’ pre-teach key words
- Subgroup disengagement β†’ check-in or switch book genre
"""
engine = get_recommendation_engine()
recommendations = engine.recommend_for_class(
req.class_id,
req.students_profiles,
req.current_book,
)
return {
"class_id": req.class_id,
"recommendations": [
{
"pattern": r.pattern,
"affected_students": r.affected_students,
"intervention": r.intervention,
"resource": r.resource,
"timeline": r.timeline,
}
for r in recommendations
],
}
@app.post("/teacher/alerts/risk", tags=["recommendations"])
async def get_risk_alert(req: RiskAlertRequest):
"""
Identify students at risk of disengagement or learning loss (D6).
Returns high-priority alert if student shows:
- Critically low attention (< 0.3)
- High frustration signals
- Few sessions completed (early dropout indicators)
- Declining comprehension scores
"""
engine = get_recommendation_engine()
alert = engine.generate_risk_alert(
req.student_id,
req.student_name,
req.student_profile,
req.recent_sessions,
req.alert_threshold,
)
return {"alert": alert} if alert else {"alert": None, "status": "low_risk"}
# ── Poem Analysis (Phase 2 + Phase 7) ────────────────────────────────────────
class PoemAnalyzeRequest(BaseModel):
text: str
language: str = "en"
@app.post("/poem/analyze", tags=["poem"])
async def poem_analyze(req: PoemAnalyzeRequest):
"""
Analyse a poem: split into stanzas, detect emotion + rhyme scheme per stanza.
Returns:
{
"stanzas": [
{
"stanza_index": int,
"lines": [str],
"emotion": str,
"intensity": float,
"rhyme_scheme": str,
"end_words": [str],
"color_tint": str
}
],
"dominant_emotion": str,
"rhyme_pattern": str # overall pattern
}
"""
from ml_pipeline.emotion_analyzer import get_emotion_analyzer
analyzer = get_emotion_analyzer()
stanzas = await asyncio.to_thread(
analyzer.analyze_poem_stanzas, req.text, req.language
)
if not stanzas:
return {"stanzas": [], "dominant_emotion": "neutral", "rhyme_pattern": "free verse"}
# Dominant emotion: most frequent non-neutral emotion
from collections import Counter as _Counter
emotion_counts = _Counter(s["emotion"] for s in stanzas if s["emotion"] != "neutral")
dominant = emotion_counts.most_common(1)[0][0] if emotion_counts else "neutral"
# Overall rhyme pattern: concatenate first stanza's scheme
rhyme_pattern = stanzas[0]["rhyme_scheme"] if stanzas else "free verse"
return {
"stanzas": stanzas,
"dominant_emotion": dominant,
"rhyme_pattern": rhyme_pattern,
}
# ── Helper Endpoints ─────────────────────────────────────────────────────────
@app.post("/adapt-text")
async def adapt_text(req: Dict[str, Any]):
"""
Batch adaptation endpoint used by background workers (D2).
"""
text = req.get("text", "")
doc_type = req.get("doc_type", "generic")
if not text:
return {"adaptedText": "", "strategy": "empty"}
result = await asyncio.to_thread(
simplification_svc.simplify,
text,
"", # book_title
"", # author
doc_type,
"", # chapter_context
"", # speaker
"intermediate",
"en",
)
return {
"adaptedText": result.get("simple_version", text),
"strategy": result.get("tier", "rule_based"),
}
@app.post("/rl/predict", tags=["rl"])
async def rl_predict(req: RLPredictRequest):
"""Get pedagogical action recommendation from RL agent."""
action_id, action_label = rl_agent.predict_from_state_vector(
req.state_vector, req.content_type
)
return {
"action_id": action_id,
"action_label": action_label,
"fallback": not rl_agent.model_ready
}
@app.post("/tts/generate")
async def generate_tts(request: Any):
return await tts_service.generate_with_timestamps(text=request.text)
@app.post("/session/start")
async def start_session(request: Any):
return {"session_id": "mock_session"}
@app.post("/session/telemetry")
async def push_telemetry(request: Any):
return {"status": "ok"}
@app.post("/session/end")
async def end_session(request: Any):
return {"status": "finished"}
@app.post("/teacher/insights", tags=["teacher"])
async def teacher_insights(req: Dict[str, Any]):
"""Generate NL insights using Gemini based on analytics data."""
import json
analytics_data = req.get("analytics_data", {})
if not analytics_data:
return {"insights": "No data available to generate insights."}
if not gemini_service.is_available():
return {"insights": "Gemini AI service is not configured. Please check API keys."}
prompt = f"""You are an expert special education teacher. Analyze the following student platform data and provide actionable teaching insights, specifically focusing on ADHD and Dyslexia.
Data: {json.dumps(analytics_data)}
Format your response as a professional, encouraging report. Include:
- A brief summary of overall class engagement.
- Specific insights regarding students with ADHD/Dyslexia.
- Actionable recommendations based on the struggle zones and quiz scores.
Keep it strictly under 250 words."""
try:
import asyncio
insights = await asyncio.to_thread(gemini_service.generate, prompt)
return {"insights": insights}
except Exception as e:
print(f"Error generating insights: {e}")
return {"insights": "Error generating insights."}
# ── STT Reading Assessment ──────────────────────────────────────────────────
class STTAssessmentRequest(BaseModel):
expected_text: str
spoken_text: str
duration_seconds: float = 0
student_id: Optional[str] = None
book_id: Optional[str] = None
@app.post("/stt/assess", tags=["stt"])
async def assess_reading(req: STTAssessmentRequest):
"""
Assess a student's reading by comparing expected text with spoken text.
Returns accuracy, WPM, missed words, mispronunciations, and feedback.
"""
result = await asyncio.to_thread(
stt_assessment.assess_reading,
req.expected_text,
req.spoken_text,
req.duration_seconds,
)
# Update comprehension tracker if student context provided
if req.student_id and req.book_id:
comprehension_tracker.record_section_read(
req.student_id, req.book_id, "stt_assessment",
"Reading Assessment", "",
req.duration_seconds, result.get("accuracy", 0) / 100,
)
return result
# ── Word Difficulty & Pronunciation ─────────────────────────────────────────
class WordDifficultyRequest(BaseModel):
text: str
difficulty_threshold: float = 0.5
@app.post("/vocab/difficulty", tags=["vocabulary"])
async def analyze_word_difficulty(req: WordDifficultyRequest):
"""
Analyze a passage and return difficult words with pronunciation guides.
"""
words = await asyncio.to_thread(
word_difficulty.analyze_passage,
req.text,
req.difficulty_threshold,
)
return {"difficult_words": words, "count": len(words)}
class PronunciationRequest(BaseModel):
word: str
@app.post("/vocab/pronunciation", tags=["vocabulary"])
async def get_pronunciation(req: PronunciationRequest):
"""Get phonetic pronunciation guide for a single word."""
pronunciation = word_difficulty.generate_pronunciation(req.word)
syllables = word_difficulty.count_syllables(req.word)
difficulty = word_difficulty.estimate_difficulty(req.word)
return {
"word": req.word,
"pronunciation": pronunciation,
"syllables": syllables,
"difficulty": round(difficulty, 3),
}
# ── NER / Character Graph ─────────────────────────────────────────────────────
class NERExtractRequest(BaseModel):
sections: List[str] # list of chapter/section texts
title: Optional[str] = ""
existing_characters: Optional[List[str]] = None
class NERSectionViewRequest(BaseModel):
characters: List[Any]
relationships: List[Any]
locations: List[Any]
up_to_section: int
@app.post("/ner/extract", tags=["characters"])
async def ner_extract(req: NERExtractRequest):
"""
Extract a full character + location graph from all book sections.
Called once at upload time; result stored in Literature.bookBrain.
Returns full graph: characters (with importance, first_seen_index,
relationships), relationships (flat list), locations.
"""
graph = await asyncio.to_thread(
ner_extractor.extract,
req.sections,
req.title or "",
req.existing_characters,
)
return graph
@app.post("/ner/section-view", tags=["characters"])
async def ner_section_view(req: NERSectionViewRequest):
"""
Return a spoiler-safe graph filtered to characters seen up to
the current section index. Called by CharacterMapPanel.tsx.
"""
full_graph = {
"characters": req.characters,
"relationships": req.relationships,
"locations": req.locations,
}
return ner_extractor.extract_for_section(full_graph, req.up_to_section)
# ── Vocabulary Batch Analysis ─────────────────────────────────────────────────
class VocabBatchRequest(BaseModel):
sections: List[str] # chapter texts
section_titles: Optional[List[str]] = None
class VocabSectionRequest(BaseModel):
section_text: str
all_words: List[Any] # previously computed word list
chapter_index: int = 0
max_display: int = 20
class VocabIdentifyRequest(BaseModel):
text: str
language: str = "en"
max_words: int = 8
@app.post("/vocab/batch-analyze", tags=["vocabulary"])
async def vocab_batch_analyze(req: VocabBatchRequest):
"""
Pre-compute vocabulary difficulty + explanations for a whole book.
Stored in Literature.bookBrain.vocabulary at upload time.
Returns a flat list of word dicts (see vocab_analyzer.py schema).
"""
# Analyzer already initialized with gemini_service in v4.0 main.py
words = await asyncio.to_thread(
vocab_analyzer.analyze_book,
req.sections,
req.section_titles,
)
return {"vocabulary": words, "count": len(words)}
@app.post("/vocab/section-words", tags=["vocabulary"])
async def vocab_section_words(req: VocabSectionRequest):
"""
Filter the book vocabulary to words present in the current section.
Used by VocabSidebar.tsx on chapter navigation.
"""
from ml_pipeline.vocab_analyzer import VocabAnalyzer
words = VocabAnalyzer.words_for_section(
req.all_words,
req.section_text,
req.chapter_index,
req.max_display,
)
return {"words": words, "count": len(words)}
@app.post("/vocab/identify", tags=["vocabulary"])
async def vocab_identify(req: VocabIdentifyRequest):
"""
Identify hard words in a text snippet and provide child-friendly
definitions + analogies (proactive mode).
"""
from ml_pipeline.vocab_analyzer import get_vocab_analyzer
# Use analyzer with Gemini if possible
analyzer = get_vocab_analyzer(gemini_service=gemini_service if gemini_service.is_available() else None)
# 1. Identify hard words using WordDifficultyService (wrapped in analyzer)
# We use a slightly lower threshold for proactive suggestions to catch more "stretch" words
words = await asyncio.to_thread(
analyzer._difficulty_svc.analyze_passage,
req.text,
difficulty_threshold=0.45
)
# 2. Slice to requested limit
words = words[:req.max_words]
# 3. Enrich with Gemini analogies for each
results = []
for w in words:
enriched = await asyncio.to_thread(
analyzer.explain_word,
w["word"],
w["context"],
0 # chapter index placeholder
)
results.append(enriched)
return {"words": results, "count": len(results)}
# ── TTS with word-level sync ──────────────────────────────────────────────────
class TTSSynthesizeRequest(BaseModel):
text: str
disability_type: Optional[str] = "none" # none | dyslexia | adhd | both
language: Optional[str] = "english"
voice_override: Optional[str] = None
rate_override: Optional[str] = None
@app.post("/tts/synthesize", tags=["tts"])
async def tts_synthesize(req: TTSSynthesizeRequest):
"""
Synthesize text to speech with word-level timestamps.
Automatically selects disability-appropriate voice and speaking rate.
Long texts are chunked at sentence boundaries; timestamps are
offset-adjusted across chunks so they represent absolute positions.
Returns: audio_base64 (MP3), timestamps [{word, start_ms, end_ms}],
duration_ms, voice, rate, word_count.
"""
result = await tts_svc.synthesize(
text = req.text,
disability_type = req.disability_type or "none",
language = req.language or "english",
voice_override = req.voice_override,
rate_override = req.rate_override,
)
return result
# ── Adaptive Quiz Difficulty ──────────────────────────────────────────────────
class QuizAttemptRequest(BaseModel):
student_id: str
literature_id: str
chapter_index: int = 0
score: float # fraction correct 0.0–1.0
difficulty: Optional[str] = "medium" # easy | medium | hard
disability_type: Optional[str] = "none"
class QuizDifficultyQuery(BaseModel):
student_id: str
literature_id: str
disability_type: Optional[str] = "none"
@app.post("/quiz/record-attempt", tags=["quiz"])
async def quiz_record_attempt(req: QuizAttemptRequest):
"""
Record a completed quiz attempt and update the student's IRT ability model.
Returns:
- next_difficulty: recommended difficulty for the next quiz
- theta: updated ability estimate (–2.5 to +2.5)
- performance_message: encouraging feedback for the student
- recommendation: teaching insight for the teacher
"""
result = difficulty_adapter.record_attempt(
student_id = req.student_id,
literature_id = req.literature_id,
chapter_index = req.chapter_index,
score = max(0.0, min(1.0, req.score)),
difficulty = req.difficulty or "medium",
disability_type = req.disability_type or "none",
)
return result
@app.post("/quiz/recommend-difficulty", tags=["quiz"])
async def quiz_recommend_difficulty(req: QuizDifficultyQuery):
"""
Get the recommended quiz difficulty for a student's next quiz.
Call this before generating questions to get the right difficulty level.
Returns: { "difficulty": "easy" | "medium" | "hard" }
"""
diff = difficulty_adapter.recommend_difficulty(
student_id = req.student_id,
literature_id = req.literature_id,
disability_type = req.disability_type or "none",
)
return {"difficulty": diff}
@app.get("/quiz/student-state", tags=["quiz"])
async def quiz_student_state(student_id: str, literature_id: str):
"""
Get the full adaptive difficulty state for a student + book.
Used by the teacher dashboard to see progress history and ability level.
Returns null if no attempts recorded yet.
"""
state = difficulty_adapter.get_state(student_id, literature_id)
return state or {"message": "No quiz attempts recorded yet."}
# ── System ───────────────────────────────────────────────────────────────────
@app.get("/health", tags=["system"])
async def health():
return {
"status": "healthy",
"version": "3.1.0",
"rl_model_ready": rl_agent.model_ready,
"features": [
"highlight_to_understand",
"book_brain_preanalysis",
"learner_embedding_128dim",
"comprehension_graph",
"teacher_intelligence",
"teacher_recommendations",
"story_recaps",
"poem_mode",
"adhd_chunking",
"dyslexia_rendering",
"focus_sounds",
"gemini_acceleration",
"stt_reading_assessment",
"word_difficulty_analysis",
"pronunciation_guides",
"vocabulary_mastery_tracking",
# v4.0
"ner_character_graph",
"vocab_batch_analysis",
"tts_word_sync",
"adaptive_quiz_difficulty_irt",
],
"timestamp": time.time(),
}
# ── Startup: Model Size Check (D7 compliance) ────────────────────────────────
@app.on_event("startup")
async def check_model_sizes():
"""
Verify that loaded model files fit within the 500 MB offline spec (D7).
This is a non-blocking advisory check β€” the service starts regardless.
"""
MODEL_SIZE_LIMIT_MB = 500
MODEL_EXTENSIONS = {".bin", ".pt", ".gguf", ".safetensors", ".pkl", ".joblib"}
# Directories to scan (relative to this file)
base_dir = os.path.dirname(os.path.abspath(__file__))
scan_dirs = [
os.path.join(base_dir, "models"),
os.path.join(base_dir, "services"),
os.path.join(base_dir, "..", "rl-engine"),
]
total_bytes = 0
large_files: list[tuple[str, float]] = []
for scan_dir in scan_dirs:
if not os.path.isdir(scan_dir):
continue
for root, _, files in os.walk(scan_dir):
for fname in files:
if any(fname.endswith(ext) for ext in MODEL_EXTENSIONS):
fpath = os.path.join(root, fname)
try:
size_bytes = os.path.getsize(fpath)
total_bytes += size_bytes
size_mb = size_bytes / (1024 * 1024)
if size_mb > 50:
large_files.append((fpath, size_mb))
except OSError:
pass
total_mb = total_bytes / (1024 * 1024)
if large_files:
large_files.sort(key=lambda x: x[1], reverse=True)
if total_mb > MODEL_SIZE_LIMIT_MB:
import logging
logging.getLogger("included.startup").warning(
"[MODEL SIZE] ⚠️ Total model files = %.1f MB β€” EXCEEDS 500 MB offline spec (D7)! "
"Consider INT4 quantization. Largest files: %s",
total_mb,
", ".join(f"{os.path.basename(f)} ({s:.0f} MB)" for f, s in large_files[:3]),
)
else:
import logging
logging.getLogger("included.startup").info(
"[MODEL SIZE] βœ… Total model files = %.1f MB β€” within 500 MB offline spec (D7).",
total_mb,
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8082)