Spaces:

Deign86
/

mathpulse-api-v3test

Running

App Files Files Community

github-actions[bot] commited on 25 days ago

Commit

1959397

1 Parent(s): 61efd60

🚀 Auto-deploy backend from GitHub (baba3a4)

Browse files

Files changed (8) hide show

.env.example +6 -1
main.py +5 -0
rag/pdf_ingestion.py +368 -0
requirements.txt +2 -0
routes/quiz_battle.py +205 -0
services/question_bank_service.py +123 -0
services/variance_engine.py +115 -0
tests/test_quiz_battle.py +223 -0

.env.example CHANGED Viewed

@@ -25,4 +25,9 @@ VITE_HF_MODEL_ID=Qwen/QwQ-32B
 HF_MODEL_ID=deepseek-chat
 # PRODUCTION — deepseek-reasoner for step-by-step solutions
-# HF_MODEL_ID=deepseek-reasoner

 HF_MODEL_ID=deepseek-chat
 # PRODUCTION — deepseek-reasoner for step-by-step solutions
+# HF_MODEL_ID=deepseek-reasoner
+# ── Quiz Battle Internal Auth ─────────────────────────────────────
+# Shared secret between Firebase Cloud Functions and FastAPI backend
+# Used to authenticate server-to-server requests for correct answers
+QUIZ_BATTLE_INTERNAL_SECRET=change_this_to_a_random_string

main.py CHANGED Viewed

@@ -81,6 +81,7 @@ from routes.rag_routes import router as rag_router
 from routes.admin_model_routes import router as admin_model_router
 from routes.diagnostic import router as diagnostic_router
 from routes.video_routes import router as video_router
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
@@ -365,6 +366,9 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
     "/api/admin/model-config/override": ADMIN_ONLY,
     "/api/admin/model-config/reset": ADMIN_ONLY,
     "/api/lessons/videos/search": ALL_APP_ROLES,
 }
 if not os.getenv("DEEPSEEK_API_KEY"):
@@ -1016,6 +1020,7 @@ app.include_router(rag_router)
 app.include_router(admin_model_router)
 app.include_router(diagnostic_router)
 app.include_router(video_router)
 # ─── Global Exception Handler ─────────────────────────────────

 from routes.admin_model_routes import router as admin_model_router
 from routes.diagnostic import router as diagnostic_router
 from routes.video_routes import router as video_router
+from routes.quiz_battle import router as quiz_battle_router
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
     "/api/admin/model-config/override": ADMIN_ONLY,
     "/api/admin/model-config/reset": ADMIN_ONLY,
     "/api/lessons/videos/search": ALL_APP_ROLES,
+    "/api/quiz-battle/generate": ALL_APP_ROLES,
+    "/api/quiz-battle/ingest-pdf": TEACHER_OR_ADMIN,
+    "/api/quiz-battle/bank-status": TEACHER_OR_ADMIN,
 }
 if not os.getenv("DEEPSEEK_API_KEY"):
 app.include_router(admin_model_router)
 app.include_router(diagnostic_router)
 app.include_router(video_router)
+app.include_router(quiz_battle_router)
 # ─── Global Exception Handler ─────────────────────────────────

rag/pdf_ingestion.py ADDED Viewed

	@@ -0,0 +1,368 @@

+"""
+PDF Ingestion Module for Quiz Battle RAG Question Bank.
+Ingests PDFs from Firebase Storage, extracts text, chunks content,
+generates embeddings, calls DeepSeek to produce base questions,
+and stores results in Firestore.
+"""
+import asyncio
+import hashlib
+import io
+import json
+import logging
+import os
+import random
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Optional
+from google.cloud.firestore import Client
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+import pypdf
+from rag.firebase_storage_loader import _init_firebase_storage
+from services.ai_client import get_deepseek_client, CHAT_MODEL
+logger = logging.getLogger(__name__)
+EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
+DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
+@dataclass
+class IngestionResult:
+    """Result of a PDF ingestion operation."""
+    filename: str
+    processed: bool
+    question_count: int
+    grade_level: int
+    topic: str
+    storage_path: str
+    timestamp: datetime
+def _extract_filename(storage_path: str) -> str:
+    """Extract filename from a Firebase Storage path."""
+    return storage_path.split("/")[-1]
+def _generate_chunk_id(source_chunk_id: str, question_text: str) -> str:
+    """Generate a unique document ID for a question."""
+    return hashlib.md5(f"{source_chunk_id}:{question_text}".encode()).hexdigest()
+def _strip_json_fences(text: str) -> str:
+    """Strip markdown JSON fences from text."""
+    text = text.strip()
+    if text.startswith("```json"):
+        text = text[7:]
+    if text.startswith("```"):
+        text = text[3:]
+    if text.endswith("```"):
+        text = text[:-3]
+    return text.strip()
+async def _generate_questions_for_chunk(
+    chunk_text: str,
+    chunk_id: str,
+    topic: str,
+    grade_level: int,
+    deepseek_client,
+) -> list[dict]:
+    """Call DeepSeek to generate MCQs for a text chunk."""
+    system_prompt = (
+        "You are a DepEd-aligned math question generator for Filipino students. "
+        "Given a curriculum excerpt, generate 5 multiple-choice questions. "
+        "Return ONLY a JSON array. No markdown, no explanation."
+    )
+    user_prompt = f"""Given this curriculum excerpt:
+<chunk>
+{chunk_text}
+</chunk>
+Generate 5 multiple-choice questions. For each question output JSON:
+{{
+  "question": "...",
+  "choices": ["A) ...", "B) ...", "C) ...", "D) ..."],
+  "correct_answer": "A",
+  "explanation": "...",
+  "topic": "{topic}",
+  "difficulty": "easy|medium|hard",
+  "grade_level": {grade_level},
+  "source_chunk_id": "{chunk_id}"
+}}
+Return a JSON array only, no extra text."""
+    try:
+        response = deepseek_client.chat.completions.create(
+            model=CHAT_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.7,
+        )
+        raw_response = response.choices[0].message.content
+        clean_response = _strip_json_fences(raw_response)
+        questions = json.loads(clean_response)
+        return questions if isinstance(questions, list) else []
+    except json.JSONDecodeError as e:
+        logger.error(f"Failed to parse DeepSeek response as JSON for chunk {chunk_id}: {e}")
+        return []
+    except Exception as e:
+        logger.error(f"Error calling DeepSeek for chunk {chunk_id}: {e}")
+        return []
+def _chunk_text(text: str) -> list[str]:
+    """Split text into chunks using RecursiveCharacterTextSplitter."""
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        length_function=len,
+        separators=["\n\n", "\n", " ", ""],
+    )
+    return splitter.split_text(text)
+def _extract_pdf_text(pdf_bytes: bytes) -> str:
+    """Extract text from PDF bytes using pypdf."""
+    reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
+    text_parts = []
+    for page in reader.pages:
+        text_parts.append(page.extract_text())
+    return "\n".join(text_parts)
+async def _save_questions_batch(
+    firestore_client: Client,
+    questions: list[dict],
+    grade_level: int,
+    topic: str,
+) -> int:
+    """Save questions to Firestore using batch writes. Returns count saved."""
+    batch = firestore_client.batch()
+    question_count = 0
+    for question in questions:
+        doc_id = question.get("id") or _generate_chunk_id(
+            question.get("source_chunk_id", ""),
+            question.get("question", ""),
+        )
+        doc_ref = firestore_client.collection("question_bank").document(
+            str(grade_level)
+        ).collection(topic).document("questions").collection("questions").document(doc_id)
+        doc_data = {
+            "question": question.get("question", ""),
+            "choices": question.get("choices", []),
+            "correct_answer": question.get("correct_answer", ""),
+            "explanation": question.get("explanation", ""),
+            "topic": question.get("topic", topic),
+            "difficulty": question.get("difficulty", "medium"),
+            "grade_level": question.get("grade_level", grade_level),
+            "source_chunk_id": question.get("source_chunk_id", ""),
+            "random_seed": random.random(),
+            "created_at": datetime.now(timezone.utc),
+        }
+        batch.set(doc_ref, doc_data)
+        question_count += 1
+        if question_count % 500 == 0:
+            await batch.commit()
+            batch = firestore_client.batch()
+    await batch.commit()
+    return question_count
+async def _save_embeddings_batch(
+    firestore_client: Client,
+    chunks: list[dict],
+    filename: str,
+) -> int:
+    """Save chunk embeddings to Firestore. Returns count saved."""
+    batch = firestore_client.batch()
+    count = 0
+    for chunk in chunks:
+        chunk_id = chunk["id"]
+        doc_ref = firestore_client.collection("question_bank_embeddings").document(chunk_id)
+        doc_data = {
+            "chunk_id": chunk_id,
+            "text": chunk["text"],
+            "embedding": chunk["embedding"],
+            "filename": filename,
+            "created_at": datetime.now(timezone.utc),
+        }
+        batch.set(doc_ref, doc_data)
+        count += 1
+        if count % 500 == 0:
+            await batch.commit()
+            batch = firestore_client.batch()
+    await batch.commit()
+    return count
+async def _save_processing_manifest(
+    firestore_client: Client,
+    filename: str,
+    question_count: int,
+    chunk_count: int,
+    grade_level: int,
+    topic: str,
+    storage_path: str,
+) -> None:
+    """Save processing manifest to Firestore."""
+    doc_ref = firestore_client.collection("pdf_processing_status").document(filename)
+    doc_data = {
+        "filename": filename,
+        "question_count": question_count,
+        "chunk_count": chunk_count,
+        "grade_level": grade_level,
+        "topic": topic,
+        "storage_path": storage_path,
+        "processed_at": datetime.now(timezone.utc),
+        "status": "completed",
+    }
+    await doc_ref.set(doc_data)
+async def ingest_pdf(
+    storage_path: str,
+    grade_level: int,
+    topic: str,
+    force_reingest: bool = False,
+) -> IngestionResult:
+    """
+    Ingest a PDF from Firebase Storage, generate questions, and store in Firestore.
+    Args:
+        storage_path: Path to PDF in Firebase Storage (e.g., "rag-pdfs/filename.pdf")
+        grade_level: Grade level (11 or 12)
+        topic: Topic identifier for the questions
+        force_reingest: If True, reprocess even if already processed
+    Returns:
+        IngestionResult with processing summary
+    """
+    filename = _extract_filename(storage_path)
+    project_id = os.getenv("FIREBASE_AUTH_PROJECT_ID", DEFAULT_FIREBASE_PROJECT)
+    firestore_client = Client(project=project_id)
+    # Step 1: Check if already processed
+    if not force_reingest:
+        status_ref = firestore_client.collection("pdf_processing_status").document(filename)
+        status_doc = await status_ref.get()
+        if status_doc.exists:
+            logger.info(f"PDF {filename} already processed, skipping (use force_reingest=True to override)")
+            data = status_doc.to_dict() or {}
+            return IngestionResult(
+                filename=filename,
+                processed=True,
+                question_count=data.get("question_count", 0),
+                grade_level=data.get("grade_level", grade_level),
+                topic=data.get("topic", topic),
+                storage_path=data.get("storage_path", storage_path),
+                timestamp=data.get("timestamp", datetime.now(timezone.utc)),
+            )
+    # Step 2: Download PDF from Firebase Storage
+    try:
+        _, bucket = _init_firebase_storage()
+        blob = bucket.blob(storage_path)
+        pdf_bytes = blob.download_as_bytes()
+    except Exception as e:
+        logger.error(f"Failed to download PDF from Firebase Storage: {e}")
+        return IngestionResult(
+            filename=filename,
+            processed=False,
+            question_count=0,
+            grade_level=grade_level,
+            topic=topic,
+            storage_path=storage_path,
+            timestamp=datetime.now(timezone.utc),
+        )
+    # Step 3: Extract text from PDF
+    try:
+        text = _extract_pdf_text(pdf_bytes)
+    except Exception as e:
+        logger.error(f"Failed to extract text from PDF: {e}")
+        return IngestionResult(
+            filename=filename,
+            processed=False,
+            question_count=0,
+            grade_level=grade_level,
+            topic=topic,
+            storage_path=storage_path,
+            timestamp=datetime.now(timezone.utc),
+        )
+    # Step 4: Chunk text
+    chunks = _chunk_text(text)
+    # Step 5: Generate embeddings
+    embedding_model = SentenceTransformer(EMBEDDING_MODEL)
+    chunk_ids = []
+    chunk_data = []
+    for i, chunk_text in enumerate(chunks):
+        chunk_id = hashlib.md5(f"{filename}:{i}:{chunk_text[:100]}".encode()).hexdigest()
+        embedding = embedding_model.encode(chunk_text).tolist()
+        chunk_ids.append(chunk_id)
+        chunk_data.append({
+            "id": chunk_id,
+            "text": chunk_text,
+            "embedding": embedding,
+        })
+    # Step 6: Initialize DeepSeek client
+    deepseek_client = get_deepseek_client()
+    # Step 7: Generate questions for each chunk
+    all_questions = []
+    for i, chunk_text in enumerate(chunks):
+        chunk_id = chunk_ids[i]
+        questions = await _generate_questions_for_chunk(
+            chunk_text, chunk_id, topic, grade_level, deepseek_client
+        )
+        for q in questions:
+            q["id"] = _generate_chunk_id(chunk_id, q.get("question", ""))
+        all_questions.extend(questions)
+    # Step 8: Save questions to Firestore
+    question_count = await _save_questions_batch(
+        firestore_client, all_questions, grade_level, topic
+    )
+    # Step 9: Save embeddings to Firestore
+    await _save_embeddings_batch(firestore_client, chunk_data, filename)
+    # Step 10: Save manifest to Firestore
+    await _save_processing_manifest(
+        firestore_client, filename, question_count, len(chunks),
+        grade_level, topic, storage_path
+    )
+    logger.info(
+        f"Completed ingestion for {filename}: {question_count} questions, "
+        f"{len(chunks)} chunks"
+    )
+    return IngestionResult(
+        filename=filename,
+        processed=True,
+        question_count=question_count,
+        grade_level=grade_level,
+        topic=topic,
+        storage_path=storage_path,
+        timestamp=datetime.now(timezone.utc),
+    )

requirements.txt CHANGED Viewed

@@ -22,4 +22,6 @@ redis[hiredis]>=5.0.0
 PyYAML>=6.0.0
 mypy>=1.20.0
 pytest>=9.0.0
 google-api-python-client>=2.0.0

 PyYAML>=6.0.0
 mypy>=1.20.0
 pytest>=9.0.0
+pytest-asyncio>=0.23.0
 google-api-python-client>=2.0.0
+pypdf>=4.0.0

routes/quiz_battle.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""
+Quiz Battle API Routes.
+Endpoints:
+- POST /api/quiz-battle/generate       → Generate varied questions for a battle session
+- POST /api/quiz-battle/ingest-pdf     → Trigger PDF ingestion (teacher/admin)
+- GET  /api/quiz-battle/bank-status    → List processed PDFs (teacher/admin)
+"""
+import os
+from typing import List, Optional, Dict, Any
+from datetime import datetime, timezone
+from fastapi import APIRouter, Request, HTTPException, Depends
+from pydantic import BaseModel, Field
+from rag.pdf_ingestion import ingest_pdf, IngestionResult
+from services.question_bank_service import get_questions_for_battle, cache_session_questions, get_cached_session
+from services.variance_engine import apply_variance
+router = APIRouter(prefix="/api/quiz-battle", tags=["quiz-battle"])
+# ── Pydantic Models ──────────────────────────────────────────────────
+class GenerateRequest(BaseModel):
+    grade_level: int = Field(..., ge=7, le=12)
+    topic: str = Field(..., min_length=1)
+    question_count: int = Field(default=10, ge=1, le=50)
+    session_id: str = Field(..., min_length=1)
+    player_ids: List[str] = Field(default_factory=list)
+class GenerateResponse(BaseModel):
+    questions: List[Dict[str, Any]]
+    session_id: str
+class IngestPdfRequest(BaseModel):
+    storage_path: str = Field(..., min_length=1)
+    grade_level: int = Field(..., ge=7, le=12)
+    topic: str = Field(..., min_length=1)
+    force_reingest: bool = False
+class IngestPdfResponse(BaseModel):
+    status: str
+    filename: str
+    question_count: int
+    grade_level: int
+    topic: str
+    storage_path: str
+    timestamp: datetime
+class BankStatusItem(BaseModel):
+    filename: str
+    processed: bool
+    timestamp: Optional[datetime]
+    question_count: int
+    grade_level: int
+    topic: str
+    storage_path: str
+class BankStatusResponse(BaseModel):
+    pdfs: List[BankStatusItem]
+# ── Helper ───────────────────────────────────────────────────────────
+def _get_current_user(request: Request):
+    user = getattr(request.state, "user", None)
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    return user
+def _is_internal_request(request: Request) -> bool:
+    """Check if request is from an internal service (Cloud Functions)."""
+    internal_secret = request.headers.get("X-Internal-Service")
+    expected = os.getenv("QUIZ_BATTLE_INTERNAL_SECRET")
+    if expected and internal_secret == expected:
+        return True
+    return False
+# ── Endpoints ────────────────────────────────────────────────────────
+@router.post("/generate", response_model=GenerateResponse)
+async def generate_questions(
+    body: GenerateRequest,
+    request: Request,
+):
+    """
+    Generate varied questions for a quiz battle session.
+    Returns questions with choices but WITHOUT correct_answer (unless called
+    by an internal service with X-Internal-Service header).
+    """
+    # 1. Fetch base questions
+    questions = await get_questions_for_battle(
+        body.grade_level,
+        body.topic,
+        body.question_count,
+    )
+    if not questions:
+        raise HTTPException(
+            status_code=404,
+            detail=f"No questions found for grade {body.grade_level}, topic '{body.topic}'",
+        )
+    # 2. Apply variance (with 24h cache)
+    varied = await apply_variance(questions, body.session_id)
+    # 3. Cache session metadata
+    await cache_session_questions(
+        body.session_id,
+        varied,
+        body.player_ids,
+        body.grade_level,
+        body.topic,
+    )
+    # 4. Prepare response
+    is_internal = _is_internal_request(request)
+    response_questions = []
+    for q in varied:
+        q_copy = dict(q)
+        if not is_internal:
+            q_copy.pop("correct_answer", None)
+        response_questions.append(q_copy)
+    return GenerateResponse(questions=response_questions, session_id=body.session_id)
+@router.post("/ingest-pdf", response_model=IngestPdfResponse)
+async def ingest_pdf_endpoint(
+    body: IngestPdfRequest,
+    user=Depends(_get_current_user),
+):
+    """
+    Trigger PDF ingestion into the question bank.
+    Requires teacher or admin role.
+    """
+    if user.role not in ("teacher", "admin"):
+        raise HTTPException(status_code=403, detail="Teacher or admin access required")
+    try:
+        result = await ingest_pdf(
+            storage_path=body.storage_path,
+            grade_level=body.grade_level,
+            topic=body.topic,
+            force_reingest=body.force_reingest,
+        )
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
+    return IngestPdfResponse(
+        status="processed" if result.processed else "skipped",
+        filename=result.filename,
+        question_count=result.question_count,
+        grade_level=result.grade_level,
+        topic=result.topic,
+        storage_path=result.storage_path,
+        timestamp=result.timestamp,
+    )
+@router.get("/bank-status", response_model=BankStatusResponse)
+async def bank_status(
+    user=Depends(_get_current_user),
+):
+    """
+    Get the status of all processed PDFs in the question bank.
+    Requires teacher or admin role.
+    """
+    if user.role not in ("teacher", "admin"):
+        raise HTTPException(status_code=403, detail="Teacher or admin access required")
+    from google.cloud import firestore
+    db = firestore.Client(project=os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026"))
+    docs = db.collection("pdf_processing_status").stream()
+    pdfs = []
+    for doc in docs:
+        data = doc.to_dict()
+        pdfs.append(BankStatusItem(
+            filename=doc.id,
+            processed=data.get("processed", False),
+            timestamp=data.get("timestamp"),
+            question_count=data.get("question_count", 0),
+            grade_level=data.get("grade_level", 0),
+            topic=data.get("topic", ""),
+            storage_path=data.get("storage_path", ""),
+        ))
+    return BankStatusResponse(pdfs=pdfs)

services/question_bank_service.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+Question Bank Service for Quiz Battle.
+Handles querying the question bank with random ordering,
+caching session questions, and 24-hour debounce for variance results.
+"""
+import os
+import random
+from datetime import datetime, timezone, timedelta
+from typing import List, Dict, Optional
+from google.cloud import firestore
+DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
+def _get_db() -> firestore.Client:
+    """Get Firestore client."""
+    return firestore.Client(project=DEFAULT_FIREBASE_PROJECT)
+async def get_questions_for_battle(
+    grade_level: int,
+    topic: str,
+    count: int = 10,
+) -> List[Dict]:
+    """
+    Fetch random questions from the question bank for a battle session.
+    Uses Firestore random_seed field for pseudo-random ordering.
+    If fewer than `count` questions exist, returns all available.
+    """
+    db = _get_db()
+    collection_path = f"question_bank/{grade_level}/{topic}/questions"
+    collection_ref = db.collection(collection_path)
+    # Pseudo-random query using random_seed >= random threshold
+    threshold = random.random()
+    query = (
+        collection_ref
+        .where("random_seed", ">=", threshold)
+        .order_by("random_seed")
+        .limit(count)
+    )
+    docs = list(query.stream())
+    # If we didn't get enough, query from the start to fill shortfall
+    if len(docs) < count:
+        remaining = count - len(docs)
+        fallback_query = (
+            collection_ref
+            .where("random_seed", "<", threshold)
+            .order_by("random_seed")
+            .limit(remaining)
+        )
+        docs.extend(list(fallback_query.stream()))
+    questions = [doc.to_dict() for doc in docs]
+    # Ensure all required fields are present
+    valid_questions = []
+    for q in questions:
+        if q and all(k in q for k in ("question", "choices", "correct_answer", "difficulty")):
+            valid_questions.append(q)
+    return valid_questions
+async def cache_session_questions(
+    session_id: str,
+    questions: List[Dict],
+    player_ids: List[str],
+    grade_level: int,
+    topic: str,
+) -> None:
+    """Cache varied questions for a battle session with 24-hour TTL."""
+    db = _get_db()
+    session_ref = db.collection("quiz_battle_sessions").document(session_id)
+    session_ref.set({
+        "player_ids": player_ids,
+        "grade_level": grade_level,
+        "topic": topic,
+        "created_at": firestore.SERVER_TIMESTAMP,
+        "variance_cached_until": datetime.now(timezone.utc) + timedelta(hours=24),
+    })
+    # Write questions to subcollection
+    batch = db.batch()
+    for idx, q in enumerate(questions):
+        q_ref = session_ref.collection("questions").document(str(idx))
+        batch.set(q_ref, q)
+    batch.commit()
+async def get_cached_session(session_id: str) -> Optional[List[Dict]]:
+    """
+    Check if a session has cached varied questions within 24 hours.
+    Returns the cached questions if valid, otherwise None.
+    """
+    db = _get_db()
+    session_doc = db.collection("quiz_battle_sessions").document(session_id).get()
+    if not session_doc.exists:
+        return None
+    data = session_doc.to_dict()
+    cached_until = data.get("variance_cached_until")
+    if cached_until:
+        if isinstance(cached_until, datetime):
+            if cached_until.tzinfo is None:
+                cached_until = cached_until.replace(tzinfo=timezone.utc)
+        elif hasattr(cached_until, 'timestamp'):
+            # Firestore Timestamp object
+            cached_until = datetime.fromtimestamp(cached_until.timestamp(), tz=timezone.utc)
+        if cached_until > datetime.now(timezone.utc):
+            # Return cached questions
+            q_docs = db.collection("quiz_battle_sessions").document(session_id).collection("questions").stream()
+            questions = [doc.to_dict() for doc in q_docs]
+            return questions if questions else None
+    return None

services/variance_engine.py ADDED Viewed

	@@ -0,0 +1,115 @@

+"""
+Variance Engine for Quiz Battle Questions.
+Applies per-session variance techniques via DeepSeek,
+with pure-Python fallback for choice shuffling.
+"""
+import json
+import random
+import re
+from typing import List, Dict
+from services.ai_client import get_deepseek_client, CHAT_MODEL
+from services.question_bank_service import get_cached_session, cache_session_questions
+def _fallback_shuffle(questions: List[Dict], seed: int) -> List[Dict]:
+    """
+    Pure-Python fallback: shuffle choices deterministically.
+    """
+    rng = random.Random(seed)
+    for q in questions:
+        choices = q["choices"].copy()
+        correct_letter = q["correct_answer"]
+        correct_index = ord(correct_letter) - ord("A")
+        correct_text = choices[correct_index]
+        rng.shuffle(choices)
+        q["choices"] = choices
+        q["correct_answer"] = chr(ord("A") + choices.index(correct_text))
+        q["variance_applied"] = ["choice_shuffle"]
+    return questions
+async def apply_variance(questions: List[Dict], session_id: str) -> List[Dict]:
+    """
+    Apply per-session variance to a list of questions.
+    1. Check 24h Firestore cache first
+    2. Call DeepSeek with variance prompt
+    3. Parse JSON response
+    4. Fall back to pure-Python shuffle if DeepSeek fails
+    5. Cache result for 24 hours
+    """
+    # 1. Check cache
+    cached = await get_cached_session(session_id)
+    if cached:
+        return cached
+    # 2. Generate deterministic seed from session_id
+    seed = hash(session_id) % (2**32)
+    # 3. Call DeepSeek
+    client = get_deepseek_client()
+    system_prompt = (
+        "You are a math quiz variance engine for MathPulse AI, an educational platform for "
+        "Filipino high school students following the DepEd K-12 curriculum. "
+        "Your job is to make quiz questions feel fresh each session WITHOUT changing the "
+        "correct answer or difficulty level."
+    )
+    user_prompt = f"""Given these {len(questions)} quiz battle questions as JSON:
+{json.dumps(questions, indent=2)}
+Apply the following variance techniques. Use session_seed={seed} for deterministic but varied output:
+PARAPHRASE (30% chance per question): Reword the question stem using different phrasing, synonyms, or sentence structure. Do NOT change the math or the answer.
+CHOICE SHUFFLE (always): Randomize the order of answer choices A/B/C/D. Update "correct_answer" to reflect the new position.
+DISTRACTOR REFRESH (20% chance per question): Replace 1-2 wrong choices with new plausible-but-incorrect distractors that represent common student misconceptions for this topic. Keep the correct answer unchanged.
+CONTEXT SWAP (10% chance per question): Replace real-world context variables (names, objects, currencies) with Filipino-localized equivalents (e.g., "pesos", "jeepney", "barangay") to increase cultural relevance.
+NUMERIC SCALING (10% chance, only for computation problems): Scale numbers by a small integer factor (2x or 3x) so the method remains the same but the answer changes. Recompute the correct answer and all distractors accordingly.
+Return the full modified questions array as valid JSON only. Keep all original fields.
+Add a "variance_applied": ["paraphrase", "distractor_refresh", ...] field per question.
+Do NOT change "topic", "difficulty", "grade_level", or "source_chunk_id"."""
+    try:
+        response = client.chat.completions.create(
+            model=CHAT_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.5,
+            max_tokens=4000,
+        )
+        content = response.choices[0].message.content.strip()
+        # Strip markdown code fences
+        content = re.sub(r"^```json\s*", "", content)
+        content = re.sub(r"\s*```$", "", content)
+        varied_questions = json.loads(content)
+        if not isinstance(varied_questions, list) or len(varied_questions) != len(questions):
+            raise ValueError("Invalid response format from DeepSeek")
+        # Validate required fields
+        for q in varied_questions:
+            if not all(k in q for k in ("question", "choices", "correct_answer", "variance_applied")):
+                raise ValueError("Missing required fields in varied question")
+    except Exception as e:
+        print(f"[variance_engine] DeepSeek variance failed, falling back to shuffle: {e}")
+        varied_questions = _fallback_shuffle(questions, seed)
+    # 4. Cache for 24 hours
+    # Extract player_ids, grade_level, topic from original questions if available
+    player_ids = []
+    grade_level = questions[0].get("grade_level", 11) if questions else 11
+    topic = questions[0].get("topic", "general_mathematics") if questions else "general_mathematics"
+    await cache_session_questions(session_id, varied_questions, player_ids, grade_level, topic)
+    return varied_questions

tests/test_quiz_battle.py ADDED Viewed

	@@ -0,0 +1,223 @@

+"""
+Tests for Quiz Battle RAG-powered question bank.
+"""
+import pytest
+from unittest.mock import patch, MagicMock, AsyncMock
+from datetime import datetime, timezone, timedelta
+from fastapi.testclient import TestClient
+# Mock firebase_admin before imports
+import sys
+from unittest.mock import MagicMock
+_original_firebase_admin = sys.modules.get("firebase_admin")
+firebase_mock = MagicMock()
+sys.modules["firebase_admin"] = firebase_mock
+sys.modules["firebase_admin.credentials"] = MagicMock()
+sys.modules["google.cloud.firestore"] = MagicMock()
+from main import app
+client = TestClient(app)
+@pytest.fixture(scope="module", autouse=True)
+def _cleanup_firebase_mock():
+    """Restore original firebase_admin module after all tests in this module."""
+    yield
+    if _original_firebase_admin is not None:
+        sys.modules["firebase_admin"] = _original_firebase_admin
+    elif "firebase_admin" in sys.modules:
+        del sys.modules["firebase_admin"]
+# ── PDF Ingestion Tests ──────────────────────────────────────────────
+class TestPdfIngestion:
+    @pytest.mark.asyncio
+    async def test_ingest_pdf_skips_already_processed(self):
+        """If pdf_processing_status says processed, skip re-ingestion."""
+        with patch("rag.pdf_ingestion.Client") as mock_firestore:
+            mock_doc = MagicMock()
+            mock_doc.exists = True
+            mock_doc.to_dict.return_value = {
+                "processed": True,
+                "question_count": 10,
+                "grade_level": 8,
+                "topic": "linear_equations",
+                "storage_path": "quiz_pdfs/grade_8/test.pdf",
+                "timestamp": datetime.now(timezone.utc),
+            }
+            # Make get() return an awaitable
+            async def async_get():
+                return mock_doc
+            mock_ref = MagicMock()
+            mock_ref.get = async_get
+            mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
+            from rag.pdf_ingestion import ingest_pdf
+            result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations")
+            assert result.processed is True
+            assert result.question_count == 10
+    @pytest.mark.asyncio
+    async def test_ingest_pdf_force_reingest(self):
+        """If force_reingest=True, process even if already done."""
+        with patch("rag.pdf_ingestion.Client") as mock_firestore, \
+             patch("rag.pdf_ingestion._init_firebase_storage") as mock_storage, \
+             patch("rag.pdf_ingestion._extract_pdf_text") as mock_extract, \
+             patch("rag.pdf_ingestion._chunk_text") as mock_chunk, \
+             patch("rag.pdf_ingestion._generate_questions_for_chunk") as mock_gen, \
+             patch("rag.pdf_ingestion._save_questions_batch") as mock_save, \
+             patch("rag.pdf_ingestion._save_embeddings_batch") as mock_save_emb, \
+             patch("rag.pdf_ingestion._save_processing_manifest") as mock_save_status, \
+             patch("rag.pdf_ingestion.get_deepseek_client") as mock_deepseek:
+            mock_doc = MagicMock()
+            mock_doc.exists = True
+            mock_doc.to_dict.return_value = {"processed": True}
+            async def async_get():
+                return mock_doc
+            mock_ref = MagicMock()
+            mock_ref.get = async_get
+            mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
+            mock_blob = MagicMock()
+            mock_blob.exists.return_value = True
+            mock_blob.download_as_bytes.return_value = b"pdf bytes"
+            mock_storage.return_value = (None, MagicMock())
+            mock_storage.return_value[1].blob.return_value = mock_blob
+            mock_extract.return_value = "Some math content"
+            mock_chunk.return_value = ["chunk1"]
+            mock_gen.return_value = [{
+                "question": "What is 2+2?",
+                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
+                "correct_answer": "B",
+                "explanation": "Basic addition",
+                "topic": "linear_equations",
+                "difficulty": "easy",
+                "grade_level": 8,
+                "source_chunk_id": "chunk1",
+            }]
+            mock_save.return_value = 1
+            mock_deepseek.return_value = MagicMock()
+            from rag.pdf_ingestion import ingest_pdf
+            result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations", force_reingest=True)
+            assert result.processed is True
+            assert result.question_count == 1
+# ── Question Bank Service Tests ──────────────────────────────────────
+class TestQuestionBankService:
+    @pytest.mark.asyncio
+    async def test_get_questions_for_battle(self):
+        """Fetch questions with random ordering."""
+        with patch("services.question_bank_service._get_db") as mock_db:
+            mock_doc = MagicMock()
+            mock_doc.to_dict.return_value = {
+                "question": "What is 2+2?",
+                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
+                "correct_answer": "B",
+                "difficulty": "easy",
+                "random_seed": 0.5,
+            }
+            mock_collection = MagicMock()
+            mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
+            mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
+            mock_db.return_value.collection.return_value = mock_collection
+            from services.question_bank_service import get_questions_for_battle
+            questions = await get_questions_for_battle(8, "linear_equations", 1)
+            assert len(questions) == 1
+            assert questions[0]["question"] == "What is 2+2?"
+    @pytest.mark.asyncio
+    async def test_cache_session_questions(self):
+        """Cache questions for 24 hours."""
+        with patch("services.question_bank_service._get_db") as mock_db:
+            mock_session_ref = MagicMock()
+            mock_db.return_value.collection.return_value.document.return_value = mock_session_ref
+            from services.question_bank_service import cache_session_questions
+            await cache_session_questions(
+                "session_123",
+                [{"question": "Q1", "correct_answer": "A"}],
+                ["uid1"],
+                8,
+                "linear_equations",
+            )
+            mock_session_ref.set.assert_called_once()
+# ── Variance Engine Tests ────────────────────────────────────────────
+class TestVarianceEngine:
+    @pytest.mark.asyncio
+    async def test_apply_variance_uses_cache(self):
+        """If cache exists, return cached questions."""
+        with patch("services.variance_engine.get_cached_session") as mock_cache:
+            mock_cache.return_value = [{"question": "Cached?", "correct_answer": "A"}]
+            from services.variance_engine import apply_variance
+            result = await apply_variance([], "session_123")
+            assert result[0]["question"] == "Cached?"
+    @pytest.mark.asyncio
+    async def test_apply_variance_fallback_shuffle(self):
+        """If DeepSeek fails, fallback to pure Python shuffle."""
+        with patch("services.variance_engine.get_cached_session") as mock_cache, \
+             patch("services.variance_engine.get_deepseek_client") as mock_client, \
+             patch("services.variance_engine.cache_session_questions") as mock_save:
+            mock_cache.return_value = None
+            mock_client.return_value.chat.completions.create.side_effect = Exception("API error")
+            mock_save.return_value = None
+            from services.variance_engine import apply_variance
+            questions = [{
+                "question": "What is 2+2?",
+                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
+                "correct_answer": "B",
+                "difficulty": "easy",
+                "topic": "math",
+                "grade_level": 8,
+                "source_chunk_id": "c1",
+            }]
+            result = await apply_variance(questions, "session_123")
+            assert len(result) == 1
+            assert result[0]["variance_applied"] == ["choice_shuffle"]
+            # Correct answer should still point to the right text
+            correct_index = ord(result[0]["correct_answer"]) - ord("A")
+            assert "4" in result[0]["choices"][correct_index]
+# ── Route Integration Tests ──────────────────────────────────────────
+class TestQuizBattleRoutes:
+    def test_generate_unauthorized(self):
+        """Generate without auth should 401 or 403 depending on middleware."""
+        response = client.post("/api/quiz-battle/generate", json={
+            "grade_level": 8,
+            "topic": "linear_equations",
+            "question_count": 10,
+            "session_id": "test-session",
+            "player_ids": ["uid1"],
+        })
+        # Auth middleware may reject or allow in test env
+        assert response.status_code in (200, 401, 403)
+    def test_ingest_pdf_unauthorized(self):
+        """Ingest-pdf without teacher role should 403."""
+        response = client.post("/api/quiz-battle/ingest-pdf", json={
+            "storage_path": "quiz_pdfs/grade_8/test.pdf",
+            "grade_level": 8,
+            "topic": "linear_equations",
+        })
+        assert response.status_code in (401, 403)
+    def test_bank_status_unauthorized(self):
+        """Bank-status without teacher role should 403."""
+        response = client.get("/api/quiz-battle/bank-status")
+        assert response.status_code in (401, 403)