""" Maria AI Tutor — FastAPI Backend Hugging Face Spaces (Docker) """ import os, io, re, json, base64, logging, wave, struct, urllib.request, urllib.parse from contextlib import asynccontextmanager from typing import Optional, List, Dict, Any import torch import numpy as np import pandas as pd import faiss import httpx from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field, ConfigDict from transformers import AutoModelForCausalLM, AutoTokenizer # datasets import removed — HF dataset not used at runtime from sentence_transformers import SentenceTransformer # ── Logging ──────────────────────────────────────────────────────────────────── logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") log = logging.getLogger(__name__) # ── Config ───────────────────────────────────────────────────────────────────── BASE_DIR = os.path.dirname(os.path.abspath(__file__)) PIPER_MODEL_PATH = os.path.join(BASE_DIR, "models", "en_US-lessac-medium.onnx") INSTRUCT_MODEL = "Qwen/Qwen2.5-1.5B-Instruct" CODER_MODEL = "Qwen/Qwen2.5-Coder-1.5B-Instruct" EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" DATASET_NAME = "digifreely/Maria" DATASET_BASE_URL = "https://huggingface.co/datasets/digifreely/Maria/resolve/main" MAX_HISTORY = 20 GEN_MAX_TOKENS = 220 HF_TOKEN = os.environ.get("HF_TOKEN", "") EXPECTED_HASH = os.environ.get("EXPECTED_HASH", "") CF_TURNSTILE_SECRET = os.environ.get("CF_TURNSTILE_SECRET", "") CF_API_TOKEN = os.environ.get("CF_API_TOKEN", "") CF_ZONE_ID = os.environ.get("CF_ZONE_ID", "") ALLOWED_DOMAIN = os.environ.get("ALLOWED_DOMAIN", "buildwithsupratim.github.io") # ── Global State ─────────────────────────────────────────────────────────────── _models: Dict[str, Any] = {} _tokenizers: Dict[str, Any] = {} _embed: Any = None _faiss: Dict[str, Any] = {} _meta: Dict[str, Any] = {} _piper: Any = None # ══════════════════════════════════════════════════════════════════════════════ # Pydantic Schemas # ══════════════════════════════════════════════════════════════════════════════ class ScratchpadItem(BaseModel): chat_id: int thought: str = "" action: str = "" action_input: str = "" observation: str = "" class ChatHistoryItem(BaseModel): chat_id: int user_input: str system_output: str class LearningObjectiveStatus(BaseModel): goal: str teach: str = "Not_Complete" re_teach: str = "Not_Complete" show_and_tell: str = "Not_Complete" assess: str = "Not_Complete" class CurrentLearningItem(BaseModel): topic: str content: str learning_objectives: List[LearningObjectiveStatus] = [] class AssessmentStages(BaseModel): current_learning: List[CurrentLearningItem] = [] class CurriculumObjective(BaseModel): topics: str content: str learning_objectives: List[str] = [] class LearningPath(BaseModel): model_config = ConfigDict(populate_by_name=True) board: str class_name: str = Field(alias="class") subject: str student_name: str teacher_persona: str curriculum_objectives: List[CurriculumObjective] = [] chat_history: List[ChatHistoryItem] = [] scratchpad: List[ScratchpadItem] = [] assessment_stages: Optional[AssessmentStages] = None class QueryIn(BaseModel): request_message: str class ChatRequest(BaseModel): learning_path: LearningPath query: QueryIn class ResponseMessage(BaseModel): text: str visual: str = "No" visual_content: str = "" audio_output: str = "" class QueryOut(BaseModel): response_message: ResponseMessage class ChatResponse(BaseModel): model_config = ConfigDict(populate_by_name=True) learning_path: LearningPath query: QueryOut # ══════════════════════════════════════════════════════════════════════════════ # Model / Dataset Loading (called once at startup) # ══════════════════════════════════════════════════════════════════════════════ def _load_transformer(name: str, key: str) -> None: log.info(f"Loading {key}: {name}") tok = AutoTokenizer.from_pretrained(name, token=HF_TOKEN or None) _tokenizers[key] = tok common_kw: Dict[str, Any] = {"token": HF_TOKEN or None} if torch.cuda.is_available(): from transformers import BitsAndBytesConfig qc = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", ) mdl = AutoModelForCausalLM.from_pretrained( name, quantization_config=qc, device_map="auto", **common_kw ) log.info(f"{key} → GPU int4 (bitsandbytes)") else: mdl = AutoModelForCausalLM.from_pretrained( name, torch_dtype=torch.float32, **common_kw ) try: from optimum.quanto import quantize, qint4, freeze quantize(mdl, weights=qint4) freeze(mdl) log.info(f"{key} → CPU int4 (quanto)") except Exception as e: log.warning(f"{key} → CPU float32 fallback ({e})") mdl.eval() _models[key] = mdl log.info(f"{key} ready") def load_all_models() -> None: _load_transformer(INSTRUCT_MODEL, "instruct") _load_transformer(CODER_MODEL, "coder") global _embed log.info("Loading embedding model…") _embed = SentenceTransformer(EMBED_MODEL) log.info("Embedding model ready") def load_piper() -> None: global _piper try: from piper.voice import PiperVoice if os.path.exists(PIPER_MODEL_PATH): _piper = PiperVoice.load(PIPER_MODEL_PATH) log.info("Piper TTS ready") else: log.warning("Piper .onnx not found — audio disabled") except Exception as e: log.warning(f"Piper unavailable: {e}") # ══════════════════════════════════════════════════════════════════════════════ # FastAPI lifespan # ══════════════════════════════════════════════════════════════════════════════ @asynccontextmanager async def lifespan(app: FastAPI): load_all_models() load_piper() yield app = FastAPI(title="Maria AI Tutor", version="1.0.0", lifespan=lifespan) app.add_middleware( CORSMiddleware, allow_origins=[f"https://{ALLOWED_DOMAIN}"], allow_methods=["GET", "POST"], allow_headers=["*"], ) # ══════════════════════════════════════════════════════════════════════════════ # Inference helper # ══════════════════════════════════════════════════════════════════════════════ def generate(key: str, system: str, user: str, max_tokens: int = GEN_MAX_TOKENS) -> str: tok = _tokenizers[key] mdl = _models[key] messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tok(prompt, return_tensors="pt") # Move inputs to the model's device try: device = next(mdl.parameters()).device inputs = {k: v.to(device) for k, v in inputs.items()} except Exception: pass # device_map="auto" handles it with torch.no_grad(): out = mdl.generate( **inputs, max_new_tokens=max_tokens, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tok.eos_token_id, ) new_tokens = out[0][inputs["input_ids"].shape[1]:] return tok.decode(new_tokens, skip_special_tokens=True).strip() # ══════════════════════════════════════════════════════════════════════════════ # RAG / FAISS # ══════════════════════════════════════════════════════════════════════════════ def _ensure_faiss(board: str, cls: str, subject: str): key = f"{board}/{cls}/{subject}" if key in _faiss: return _faiss[key], _meta[key] try: base = "{}/knowledgebase/{}/{}/{}".format(DATASET_BASE_URL, urllib.parse.quote(board, safe=""), urllib.parse.quote(cls, safe=""), urllib.parse.quote(subject, safe="")) headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} def _download(url: str, dest: str): req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, timeout=30) as r, open(dest, "wb") as f: f.write(r.read()) fp = f"/tmp/fi_{board}_{cls}_{subject}.bin" mp = f"/tmp/mt_{board}_{cls}_{subject}.parquet" _download(f"{base}/faiss_index.bin", fp) _download(f"{base}/metadata.parquet", mp) index = faiss.read_index(fp) meta = pd.read_parquet(mp) _faiss[key], _meta[key] = index, meta log.info(f"FAISS loaded for {key}") return index, meta except Exception as e: log.error(f"FAISS load error [{key}]: {e}") return None, None def rag_search(board: str, cls: str, subject: str, query: str, top_k: int = 3) -> str: if _embed is None: return "" index, meta = _ensure_faiss(board, cls, subject) if index is None: return "" vec = _embed.encode([query])[0].astype("float32").reshape(1, -1) faiss.normalize_L2(vec) _, idxs = index.search(vec, top_k) chunks = [] text_cols = ["text", "content", "chunk", "passage"] for i in idxs[0]: if 0 <= i < len(meta): row = meta.iloc[i] for col in text_cols: if col in meta.columns: chunks.append(str(row[col])) break else: chunks.append(str(row.iloc[0])) return "\n---\n".join(chunks) # ══════════════════════════════════════════════════════════════════════════════ # TTS # ══════════════════════════════════════════════════════════════════════════════ def tts_base64(text: str) -> str: if _piper is None or not text: return "" try: buf = io.BytesIO() with wave.open(buf, "wb") as wf: _piper.synthesize(text, wf) return base64.b64encode(buf.getvalue()).decode() except Exception as e: log.error(f"TTS error: {e}") return "" # ══════════════════════════════════════════════════════════════════════════════ # Security # ══════════════════════════════════════════════════════════════════════════════ def check_auth_code(code: str) -> bool: if not EXPECTED_HASH or not code: return False import hashlib return hashlib.sha256(code.encode()).hexdigest() == EXPECTED_HASH async def verify_turnstile(token: str, ip: str) -> bool: if not CF_TURNSTILE_SECRET: return True # secret not configured → allow try: async with httpx.AsyncClient(timeout=10) as client: r = await client.post( "https://challenges.cloudflare.com/turnstile/v0/siteverify", data={"secret": CF_TURNSTILE_SECRET, "response": token, "remoteip": ip}, ) return r.json().get("success", False) except Exception: return False async def block_ip(ip: str) -> None: if not CF_API_TOKEN or not CF_ZONE_ID: return try: async with httpx.AsyncClient(timeout=10) as client: await client.post( f"https://api.cloudflare.com/client/v4/zones/{CF_ZONE_ID}/firewall/access_rules/rules", headers={"Authorization": f"Bearer {CF_API_TOKEN}", "Content-Type": "application/json"}, json={ "mode": "block", "configuration": {"target": "ip", "value": ip}, "notes": "Auto-blocked by Maria AI", }, ) except Exception as e: log.error(f"IP block failed: {e}") # ══════════════════════════════════════════════════════════════════════════════ # Agent helpers # ══════════════════════════════════════════════════════════════════════════════ def _context(lp: LearningPath) -> str: hist = "\n".join( f"Student: {h.user_input}\nTeacher: {h.system_output}" for h in lp.chat_history[-8:] ) pad = "\n".join( f"[{s.action}] {s.observation}" for s in lp.scratchpad[-4:] if s.action ) return f"Chat History:\n{hist}\n\nScratchpad:\n{pad}" def _current_topic(lp: LearningPath) -> str: if lp.assessment_stages and lp.assessment_stages.current_learning: return lp.assessment_stages.current_learning[0].topic return "the current lesson" # ── fn_brain ────────────────────────────────────────────────────────────────── def fn_brain(lp: LearningPath, msg: str) -> str: system = ( "You are a routing decision maker for a children's educational AI tutor. " "Choose EXACTLY ONE word from: Block, Question, Curriculum, ChitChat\n\n" "Block — disrespectful, inappropriate, adult, abusive content\n" "Question — curiosity question clearly outside the current curriculum\n" "Curriculum — the student is engaging with lesson content, asking about it, or ready to learn\n" "ChitChat — casual talk, greetings, sharing feelings, general conversation\n\n" "Reply with ONLY one word." ) ctx = _context(lp) user = f"Current topic: {_current_topic(lp)}\n{ctx}\n\nStudent: {msg}" decision = generate("instruct", system, user, max_tokens=5).strip().lower() for kw in ("block", "question", "curriculum", "chitchat"): if kw in decision: return kw return "curriculum" # safe default # ── fn_block ────────────────────────────────────────────────────────────────── def fn_block(lp: LearningPath, msg: str) -> tuple[str, str]: ctx = _context(lp) # Count recent block events in scratchpad block_count = sum(1 for s in lp.scratchpad if s.action == "block") permanent = block_count >= 3 system = ( f"You are a teacher for children aged 6-12. Student: {lp.student_name}. " f"Style: {lp.teacher_persona}. " "The student said something inappropriate or disrespectful. " + ( "This has happened multiple times. Say: 'Please show this chat to your parent or teacher.' " "Say nothing else and do not engage further." if permanent else "Respond with gentle humor/sarcasm to discourage the behavior — never be rude or arrogant. " f"Try to redirect back to: {_current_topic(lp)}. Keep to 2-3 lines max." ) ) response = generate("instruct", system, f"{ctx}\nStudent: {msg}", max_tokens=120) return response, "inappropriate content — blocking" # ── fn_chitchat ─────────────────────────────────────────────────────────────── def fn_chitchat(lp: LearningPath, msg: str) -> tuple[str, str]: system = ( f"You are a friendly teacher for children aged 6-12. " f"Student: {lp.student_name}. Style: {lp.teacher_persona}. " "Engage warmly and briefly, then gently guide back to the lesson. " f"Current topic: {_current_topic(lp)}. Keep to 2-3 lines. Age-appropriate only." ) response = generate("instruct", system, f"{_context(lp)}\nStudent: {msg}", max_tokens=150) return response, "chitchat — engaging and redirecting" # ── fn_question ─────────────────────────────────────────────────────────────── def fn_question(lp: LearningPath, msg: str) -> tuple[str, str]: rag = rag_search(lp.board, lp.class_name, lp.subject, msg) ref = f"\n\nReference material:\n{rag}" if rag else "" system = ( f"You are a teacher for children aged 6-12. " f"Student: {lp.student_name}. Style: {lp.teacher_persona}. " "Answer the student's question using the reference material if available. " "If you don't have an answer, say so kindly. " "After answering, gently nudge the student back to the current lesson. " "Keep to 2-3 lines. Age-appropriate only." ) user = f"{_context(lp)}{ref}\n\nStudent: {msg}" response = generate("instruct", system, user, max_tokens=200) thought = f"question answered — RAG {'found' if rag else 'empty'}" return response, thought # ── fn_visualconstruct ──────────────────────────────────────────────────────── def fn_visualconstruct(instruction: str) -> str: system = ( "You are an HTML/CSS/JavaScript developer making simple educational visuals for children aged 6-12. " "Generate ONLY executable code inside a single