Spaces:
Running
Running
github-actions[bot] commited on
Commit ยท
1959397
1
Parent(s): 61efd60
๐ Auto-deploy backend from GitHub (baba3a4)
Browse files- .env.example +6 -1
- main.py +5 -0
- rag/pdf_ingestion.py +368 -0
- requirements.txt +2 -0
- routes/quiz_battle.py +205 -0
- services/question_bank_service.py +123 -0
- services/variance_engine.py +115 -0
- tests/test_quiz_battle.py +223 -0
.env.example
CHANGED
|
@@ -25,4 +25,9 @@ VITE_HF_MODEL_ID=Qwen/QwQ-32B
|
|
| 25 |
HF_MODEL_ID=deepseek-chat
|
| 26 |
|
| 27 |
# PRODUCTION โ deepseek-reasoner for step-by-step solutions
|
| 28 |
-
# HF_MODEL_ID=deepseek-reasoner
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
HF_MODEL_ID=deepseek-chat
|
| 26 |
|
| 27 |
# PRODUCTION โ deepseek-reasoner for step-by-step solutions
|
| 28 |
+
# HF_MODEL_ID=deepseek-reasoner
|
| 29 |
+
|
| 30 |
+
# โโ Quiz Battle Internal Auth โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 31 |
+
# Shared secret between Firebase Cloud Functions and FastAPI backend
|
| 32 |
+
# Used to authenticate server-to-server requests for correct answers
|
| 33 |
+
QUIZ_BATTLE_INTERNAL_SECRET=change_this_to_a_random_string
|
main.py
CHANGED
|
@@ -81,6 +81,7 @@ from routes.rag_routes import router as rag_router
|
|
| 81 |
from routes.admin_model_routes import router as admin_model_router
|
| 82 |
from routes.diagnostic import router as diagnostic_router
|
| 83 |
from routes.video_routes import router as video_router
|
|
|
|
| 84 |
from rag.curriculum_rag import (
|
| 85 |
build_analysis_curriculum_context,
|
| 86 |
build_lesson_prompt,
|
|
@@ -365,6 +366,9 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
|
|
| 365 |
"/api/admin/model-config/override": ADMIN_ONLY,
|
| 366 |
"/api/admin/model-config/reset": ADMIN_ONLY,
|
| 367 |
"/api/lessons/videos/search": ALL_APP_ROLES,
|
|
|
|
|
|
|
|
|
|
| 368 |
}
|
| 369 |
|
| 370 |
if not os.getenv("DEEPSEEK_API_KEY"):
|
|
@@ -1016,6 +1020,7 @@ app.include_router(rag_router)
|
|
| 1016 |
app.include_router(admin_model_router)
|
| 1017 |
app.include_router(diagnostic_router)
|
| 1018 |
app.include_router(video_router)
|
|
|
|
| 1019 |
|
| 1020 |
|
| 1021 |
# โโโ Global Exception Handler โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 81 |
from routes.admin_model_routes import router as admin_model_router
|
| 82 |
from routes.diagnostic import router as diagnostic_router
|
| 83 |
from routes.video_routes import router as video_router
|
| 84 |
+
from routes.quiz_battle import router as quiz_battle_router
|
| 85 |
from rag.curriculum_rag import (
|
| 86 |
build_analysis_curriculum_context,
|
| 87 |
build_lesson_prompt,
|
|
|
|
| 366 |
"/api/admin/model-config/override": ADMIN_ONLY,
|
| 367 |
"/api/admin/model-config/reset": ADMIN_ONLY,
|
| 368 |
"/api/lessons/videos/search": ALL_APP_ROLES,
|
| 369 |
+
"/api/quiz-battle/generate": ALL_APP_ROLES,
|
| 370 |
+
"/api/quiz-battle/ingest-pdf": TEACHER_OR_ADMIN,
|
| 371 |
+
"/api/quiz-battle/bank-status": TEACHER_OR_ADMIN,
|
| 372 |
}
|
| 373 |
|
| 374 |
if not os.getenv("DEEPSEEK_API_KEY"):
|
|
|
|
| 1020 |
app.include_router(admin_model_router)
|
| 1021 |
app.include_router(diagnostic_router)
|
| 1022 |
app.include_router(video_router)
|
| 1023 |
+
app.include_router(quiz_battle_router)
|
| 1024 |
|
| 1025 |
|
| 1026 |
# โโโ Global Exception Handler โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
rag/pdf_ingestion.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PDF Ingestion Module for Quiz Battle RAG Question Bank.
|
| 3 |
+
|
| 4 |
+
Ingests PDFs from Firebase Storage, extracts text, chunks content,
|
| 5 |
+
generates embeddings, calls DeepSeek to produce base questions,
|
| 6 |
+
and stores results in Firestore.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import hashlib
|
| 11 |
+
import io
|
| 12 |
+
import json
|
| 13 |
+
import logging
|
| 14 |
+
import os
|
| 15 |
+
import random
|
| 16 |
+
from dataclasses import dataclass
|
| 17 |
+
from datetime import datetime, timezone
|
| 18 |
+
from typing import Optional
|
| 19 |
+
|
| 20 |
+
from google.cloud.firestore import Client
|
| 21 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 22 |
+
from sentence_transformers import SentenceTransformer
|
| 23 |
+
import pypdf
|
| 24 |
+
|
| 25 |
+
from rag.firebase_storage_loader import _init_firebase_storage
|
| 26 |
+
from services.ai_client import get_deepseek_client, CHAT_MODEL
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
| 31 |
+
DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class IngestionResult:
|
| 36 |
+
"""Result of a PDF ingestion operation."""
|
| 37 |
+
|
| 38 |
+
filename: str
|
| 39 |
+
processed: bool
|
| 40 |
+
question_count: int
|
| 41 |
+
grade_level: int
|
| 42 |
+
topic: str
|
| 43 |
+
storage_path: str
|
| 44 |
+
timestamp: datetime
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _extract_filename(storage_path: str) -> str:
|
| 48 |
+
"""Extract filename from a Firebase Storage path."""
|
| 49 |
+
return storage_path.split("/")[-1]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _generate_chunk_id(source_chunk_id: str, question_text: str) -> str:
|
| 53 |
+
"""Generate a unique document ID for a question."""
|
| 54 |
+
return hashlib.md5(f"{source_chunk_id}:{question_text}".encode()).hexdigest()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _strip_json_fences(text: str) -> str:
|
| 58 |
+
"""Strip markdown JSON fences from text."""
|
| 59 |
+
text = text.strip()
|
| 60 |
+
if text.startswith("```json"):
|
| 61 |
+
text = text[7:]
|
| 62 |
+
if text.startswith("```"):
|
| 63 |
+
text = text[3:]
|
| 64 |
+
if text.endswith("```"):
|
| 65 |
+
text = text[:-3]
|
| 66 |
+
return text.strip()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def _generate_questions_for_chunk(
|
| 70 |
+
chunk_text: str,
|
| 71 |
+
chunk_id: str,
|
| 72 |
+
topic: str,
|
| 73 |
+
grade_level: int,
|
| 74 |
+
deepseek_client,
|
| 75 |
+
) -> list[dict]:
|
| 76 |
+
"""Call DeepSeek to generate MCQs for a text chunk."""
|
| 77 |
+
system_prompt = (
|
| 78 |
+
"You are a DepEd-aligned math question generator for Filipino students. "
|
| 79 |
+
"Given a curriculum excerpt, generate 5 multiple-choice questions. "
|
| 80 |
+
"Return ONLY a JSON array. No markdown, no explanation."
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
user_prompt = f"""Given this curriculum excerpt:
|
| 84 |
+
<chunk>
|
| 85 |
+
{chunk_text}
|
| 86 |
+
</chunk>
|
| 87 |
+
|
| 88 |
+
Generate 5 multiple-choice questions. For each question output JSON:
|
| 89 |
+
{{
|
| 90 |
+
"question": "...",
|
| 91 |
+
"choices": ["A) ...", "B) ...", "C) ...", "D) ..."],
|
| 92 |
+
"correct_answer": "A",
|
| 93 |
+
"explanation": "...",
|
| 94 |
+
"topic": "{topic}",
|
| 95 |
+
"difficulty": "easy|medium|hard",
|
| 96 |
+
"grade_level": {grade_level},
|
| 97 |
+
"source_chunk_id": "{chunk_id}"
|
| 98 |
+
}}
|
| 99 |
+
Return a JSON array only, no extra text."""
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
response = deepseek_client.chat.completions.create(
|
| 103 |
+
model=CHAT_MODEL,
|
| 104 |
+
messages=[
|
| 105 |
+
{"role": "system", "content": system_prompt},
|
| 106 |
+
{"role": "user", "content": user_prompt},
|
| 107 |
+
],
|
| 108 |
+
temperature=0.7,
|
| 109 |
+
)
|
| 110 |
+
raw_response = response.choices[0].message.content
|
| 111 |
+
clean_response = _strip_json_fences(raw_response)
|
| 112 |
+
questions = json.loads(clean_response)
|
| 113 |
+
return questions if isinstance(questions, list) else []
|
| 114 |
+
except json.JSONDecodeError as e:
|
| 115 |
+
logger.error(f"Failed to parse DeepSeek response as JSON for chunk {chunk_id}: {e}")
|
| 116 |
+
return []
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.error(f"Error calling DeepSeek for chunk {chunk_id}: {e}")
|
| 119 |
+
return []
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _chunk_text(text: str) -> list[str]:
|
| 123 |
+
"""Split text into chunks using RecursiveCharacterTextSplitter."""
|
| 124 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 125 |
+
chunk_size=500,
|
| 126 |
+
chunk_overlap=50,
|
| 127 |
+
length_function=len,
|
| 128 |
+
separators=["\n\n", "\n", " ", ""],
|
| 129 |
+
)
|
| 130 |
+
return splitter.split_text(text)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def _extract_pdf_text(pdf_bytes: bytes) -> str:
|
| 134 |
+
"""Extract text from PDF bytes using pypdf."""
|
| 135 |
+
reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
|
| 136 |
+
text_parts = []
|
| 137 |
+
for page in reader.pages:
|
| 138 |
+
text_parts.append(page.extract_text())
|
| 139 |
+
return "\n".join(text_parts)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
async def _save_questions_batch(
|
| 143 |
+
firestore_client: Client,
|
| 144 |
+
questions: list[dict],
|
| 145 |
+
grade_level: int,
|
| 146 |
+
topic: str,
|
| 147 |
+
) -> int:
|
| 148 |
+
"""Save questions to Firestore using batch writes. Returns count saved."""
|
| 149 |
+
batch = firestore_client.batch()
|
| 150 |
+
question_count = 0
|
| 151 |
+
|
| 152 |
+
for question in questions:
|
| 153 |
+
doc_id = question.get("id") or _generate_chunk_id(
|
| 154 |
+
question.get("source_chunk_id", ""),
|
| 155 |
+
question.get("question", ""),
|
| 156 |
+
)
|
| 157 |
+
doc_ref = firestore_client.collection("question_bank").document(
|
| 158 |
+
str(grade_level)
|
| 159 |
+
).collection(topic).document("questions").collection("questions").document(doc_id)
|
| 160 |
+
|
| 161 |
+
doc_data = {
|
| 162 |
+
"question": question.get("question", ""),
|
| 163 |
+
"choices": question.get("choices", []),
|
| 164 |
+
"correct_answer": question.get("correct_answer", ""),
|
| 165 |
+
"explanation": question.get("explanation", ""),
|
| 166 |
+
"topic": question.get("topic", topic),
|
| 167 |
+
"difficulty": question.get("difficulty", "medium"),
|
| 168 |
+
"grade_level": question.get("grade_level", grade_level),
|
| 169 |
+
"source_chunk_id": question.get("source_chunk_id", ""),
|
| 170 |
+
"random_seed": random.random(),
|
| 171 |
+
"created_at": datetime.now(timezone.utc),
|
| 172 |
+
}
|
| 173 |
+
batch.set(doc_ref, doc_data)
|
| 174 |
+
question_count += 1
|
| 175 |
+
|
| 176 |
+
if question_count % 500 == 0:
|
| 177 |
+
await batch.commit()
|
| 178 |
+
batch = firestore_client.batch()
|
| 179 |
+
|
| 180 |
+
await batch.commit()
|
| 181 |
+
return question_count
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
async def _save_embeddings_batch(
|
| 185 |
+
firestore_client: Client,
|
| 186 |
+
chunks: list[dict],
|
| 187 |
+
filename: str,
|
| 188 |
+
) -> int:
|
| 189 |
+
"""Save chunk embeddings to Firestore. Returns count saved."""
|
| 190 |
+
batch = firestore_client.batch()
|
| 191 |
+
count = 0
|
| 192 |
+
|
| 193 |
+
for chunk in chunks:
|
| 194 |
+
chunk_id = chunk["id"]
|
| 195 |
+
doc_ref = firestore_client.collection("question_bank_embeddings").document(chunk_id)
|
| 196 |
+
doc_data = {
|
| 197 |
+
"chunk_id": chunk_id,
|
| 198 |
+
"text": chunk["text"],
|
| 199 |
+
"embedding": chunk["embedding"],
|
| 200 |
+
"filename": filename,
|
| 201 |
+
"created_at": datetime.now(timezone.utc),
|
| 202 |
+
}
|
| 203 |
+
batch.set(doc_ref, doc_data)
|
| 204 |
+
count += 1
|
| 205 |
+
|
| 206 |
+
if count % 500 == 0:
|
| 207 |
+
await batch.commit()
|
| 208 |
+
batch = firestore_client.batch()
|
| 209 |
+
|
| 210 |
+
await batch.commit()
|
| 211 |
+
return count
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
async def _save_processing_manifest(
|
| 215 |
+
firestore_client: Client,
|
| 216 |
+
filename: str,
|
| 217 |
+
question_count: int,
|
| 218 |
+
chunk_count: int,
|
| 219 |
+
grade_level: int,
|
| 220 |
+
topic: str,
|
| 221 |
+
storage_path: str,
|
| 222 |
+
) -> None:
|
| 223 |
+
"""Save processing manifest to Firestore."""
|
| 224 |
+
doc_ref = firestore_client.collection("pdf_processing_status").document(filename)
|
| 225 |
+
doc_data = {
|
| 226 |
+
"filename": filename,
|
| 227 |
+
"question_count": question_count,
|
| 228 |
+
"chunk_count": chunk_count,
|
| 229 |
+
"grade_level": grade_level,
|
| 230 |
+
"topic": topic,
|
| 231 |
+
"storage_path": storage_path,
|
| 232 |
+
"processed_at": datetime.now(timezone.utc),
|
| 233 |
+
"status": "completed",
|
| 234 |
+
}
|
| 235 |
+
await doc_ref.set(doc_data)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
async def ingest_pdf(
|
| 239 |
+
storage_path: str,
|
| 240 |
+
grade_level: int,
|
| 241 |
+
topic: str,
|
| 242 |
+
force_reingest: bool = False,
|
| 243 |
+
) -> IngestionResult:
|
| 244 |
+
"""
|
| 245 |
+
Ingest a PDF from Firebase Storage, generate questions, and store in Firestore.
|
| 246 |
+
|
| 247 |
+
Args:
|
| 248 |
+
storage_path: Path to PDF in Firebase Storage (e.g., "rag-pdfs/filename.pdf")
|
| 249 |
+
grade_level: Grade level (11 or 12)
|
| 250 |
+
topic: Topic identifier for the questions
|
| 251 |
+
force_reingest: If True, reprocess even if already processed
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
IngestionResult with processing summary
|
| 255 |
+
"""
|
| 256 |
+
filename = _extract_filename(storage_path)
|
| 257 |
+
project_id = os.getenv("FIREBASE_AUTH_PROJECT_ID", DEFAULT_FIREBASE_PROJECT)
|
| 258 |
+
firestore_client = Client(project=project_id)
|
| 259 |
+
|
| 260 |
+
# Step 1: Check if already processed
|
| 261 |
+
if not force_reingest:
|
| 262 |
+
status_ref = firestore_client.collection("pdf_processing_status").document(filename)
|
| 263 |
+
status_doc = await status_ref.get()
|
| 264 |
+
if status_doc.exists:
|
| 265 |
+
logger.info(f"PDF {filename} already processed, skipping (use force_reingest=True to override)")
|
| 266 |
+
data = status_doc.to_dict() or {}
|
| 267 |
+
return IngestionResult(
|
| 268 |
+
filename=filename,
|
| 269 |
+
processed=True,
|
| 270 |
+
question_count=data.get("question_count", 0),
|
| 271 |
+
grade_level=data.get("grade_level", grade_level),
|
| 272 |
+
topic=data.get("topic", topic),
|
| 273 |
+
storage_path=data.get("storage_path", storage_path),
|
| 274 |
+
timestamp=data.get("timestamp", datetime.now(timezone.utc)),
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
# Step 2: Download PDF from Firebase Storage
|
| 278 |
+
try:
|
| 279 |
+
_, bucket = _init_firebase_storage()
|
| 280 |
+
blob = bucket.blob(storage_path)
|
| 281 |
+
pdf_bytes = blob.download_as_bytes()
|
| 282 |
+
except Exception as e:
|
| 283 |
+
logger.error(f"Failed to download PDF from Firebase Storage: {e}")
|
| 284 |
+
return IngestionResult(
|
| 285 |
+
filename=filename,
|
| 286 |
+
processed=False,
|
| 287 |
+
question_count=0,
|
| 288 |
+
grade_level=grade_level,
|
| 289 |
+
topic=topic,
|
| 290 |
+
storage_path=storage_path,
|
| 291 |
+
timestamp=datetime.now(timezone.utc),
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Step 3: Extract text from PDF
|
| 295 |
+
try:
|
| 296 |
+
text = _extract_pdf_text(pdf_bytes)
|
| 297 |
+
except Exception as e:
|
| 298 |
+
logger.error(f"Failed to extract text from PDF: {e}")
|
| 299 |
+
return IngestionResult(
|
| 300 |
+
filename=filename,
|
| 301 |
+
processed=False,
|
| 302 |
+
question_count=0,
|
| 303 |
+
grade_level=grade_level,
|
| 304 |
+
topic=topic,
|
| 305 |
+
storage_path=storage_path,
|
| 306 |
+
timestamp=datetime.now(timezone.utc),
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
# Step 4: Chunk text
|
| 310 |
+
chunks = _chunk_text(text)
|
| 311 |
+
|
| 312 |
+
# Step 5: Generate embeddings
|
| 313 |
+
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
|
| 314 |
+
chunk_ids = []
|
| 315 |
+
chunk_data = []
|
| 316 |
+
|
| 317 |
+
for i, chunk_text in enumerate(chunks):
|
| 318 |
+
chunk_id = hashlib.md5(f"{filename}:{i}:{chunk_text[:100]}".encode()).hexdigest()
|
| 319 |
+
embedding = embedding_model.encode(chunk_text).tolist()
|
| 320 |
+
chunk_ids.append(chunk_id)
|
| 321 |
+
chunk_data.append({
|
| 322 |
+
"id": chunk_id,
|
| 323 |
+
"text": chunk_text,
|
| 324 |
+
"embedding": embedding,
|
| 325 |
+
})
|
| 326 |
+
|
| 327 |
+
# Step 6: Initialize DeepSeek client
|
| 328 |
+
deepseek_client = get_deepseek_client()
|
| 329 |
+
|
| 330 |
+
# Step 7: Generate questions for each chunk
|
| 331 |
+
all_questions = []
|
| 332 |
+
for i, chunk_text in enumerate(chunks):
|
| 333 |
+
chunk_id = chunk_ids[i]
|
| 334 |
+
questions = await _generate_questions_for_chunk(
|
| 335 |
+
chunk_text, chunk_id, topic, grade_level, deepseek_client
|
| 336 |
+
)
|
| 337 |
+
for q in questions:
|
| 338 |
+
q["id"] = _generate_chunk_id(chunk_id, q.get("question", ""))
|
| 339 |
+
all_questions.extend(questions)
|
| 340 |
+
|
| 341 |
+
# Step 8: Save questions to Firestore
|
| 342 |
+
question_count = await _save_questions_batch(
|
| 343 |
+
firestore_client, all_questions, grade_level, topic
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
# Step 9: Save embeddings to Firestore
|
| 347 |
+
await _save_embeddings_batch(firestore_client, chunk_data, filename)
|
| 348 |
+
|
| 349 |
+
# Step 10: Save manifest to Firestore
|
| 350 |
+
await _save_processing_manifest(
|
| 351 |
+
firestore_client, filename, question_count, len(chunks),
|
| 352 |
+
grade_level, topic, storage_path
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
logger.info(
|
| 356 |
+
f"Completed ingestion for {filename}: {question_count} questions, "
|
| 357 |
+
f"{len(chunks)} chunks"
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
return IngestionResult(
|
| 361 |
+
filename=filename,
|
| 362 |
+
processed=True,
|
| 363 |
+
question_count=question_count,
|
| 364 |
+
grade_level=grade_level,
|
| 365 |
+
topic=topic,
|
| 366 |
+
storage_path=storage_path,
|
| 367 |
+
timestamp=datetime.now(timezone.utc),
|
| 368 |
+
)
|
requirements.txt
CHANGED
|
@@ -22,4 +22,6 @@ redis[hiredis]>=5.0.0
|
|
| 22 |
PyYAML>=6.0.0
|
| 23 |
mypy>=1.20.0
|
| 24 |
pytest>=9.0.0
|
|
|
|
| 25 |
google-api-python-client>=2.0.0
|
|
|
|
|
|
| 22 |
PyYAML>=6.0.0
|
| 23 |
mypy>=1.20.0
|
| 24 |
pytest>=9.0.0
|
| 25 |
+
pytest-asyncio>=0.23.0
|
| 26 |
google-api-python-client>=2.0.0
|
| 27 |
+
pypdf>=4.0.0
|
routes/quiz_battle.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Quiz Battle API Routes.
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
- POST /api/quiz-battle/generate โ Generate varied questions for a battle session
|
| 6 |
+
- POST /api/quiz-battle/ingest-pdf โ Trigger PDF ingestion (teacher/admin)
|
| 7 |
+
- GET /api/quiz-battle/bank-status โ List processed PDFs (teacher/admin)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
from typing import List, Optional, Dict, Any
|
| 12 |
+
from datetime import datetime, timezone
|
| 13 |
+
|
| 14 |
+
from fastapi import APIRouter, Request, HTTPException, Depends
|
| 15 |
+
from pydantic import BaseModel, Field
|
| 16 |
+
|
| 17 |
+
from rag.pdf_ingestion import ingest_pdf, IngestionResult
|
| 18 |
+
from services.question_bank_service import get_questions_for_battle, cache_session_questions, get_cached_session
|
| 19 |
+
from services.variance_engine import apply_variance
|
| 20 |
+
|
| 21 |
+
router = APIRouter(prefix="/api/quiz-battle", tags=["quiz-battle"])
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# โโ Pydantic Models โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 25 |
+
|
| 26 |
+
class GenerateRequest(BaseModel):
|
| 27 |
+
grade_level: int = Field(..., ge=7, le=12)
|
| 28 |
+
topic: str = Field(..., min_length=1)
|
| 29 |
+
question_count: int = Field(default=10, ge=1, le=50)
|
| 30 |
+
session_id: str = Field(..., min_length=1)
|
| 31 |
+
player_ids: List[str] = Field(default_factory=list)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class GenerateResponse(BaseModel):
|
| 35 |
+
questions: List[Dict[str, Any]]
|
| 36 |
+
session_id: str
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class IngestPdfRequest(BaseModel):
|
| 40 |
+
storage_path: str = Field(..., min_length=1)
|
| 41 |
+
grade_level: int = Field(..., ge=7, le=12)
|
| 42 |
+
topic: str = Field(..., min_length=1)
|
| 43 |
+
force_reingest: bool = False
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class IngestPdfResponse(BaseModel):
|
| 47 |
+
status: str
|
| 48 |
+
filename: str
|
| 49 |
+
question_count: int
|
| 50 |
+
grade_level: int
|
| 51 |
+
topic: str
|
| 52 |
+
storage_path: str
|
| 53 |
+
timestamp: datetime
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class BankStatusItem(BaseModel):
|
| 57 |
+
filename: str
|
| 58 |
+
processed: bool
|
| 59 |
+
timestamp: Optional[datetime]
|
| 60 |
+
question_count: int
|
| 61 |
+
grade_level: int
|
| 62 |
+
topic: str
|
| 63 |
+
storage_path: str
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class BankStatusResponse(BaseModel):
|
| 67 |
+
pdfs: List[BankStatusItem]
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# โโ Helper โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 71 |
+
|
| 72 |
+
def _get_current_user(request: Request):
|
| 73 |
+
user = getattr(request.state, "user", None)
|
| 74 |
+
if user is None:
|
| 75 |
+
raise HTTPException(status_code=401, detail="Authentication required")
|
| 76 |
+
return user
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _is_internal_request(request: Request) -> bool:
|
| 80 |
+
"""Check if request is from an internal service (Cloud Functions)."""
|
| 81 |
+
internal_secret = request.headers.get("X-Internal-Service")
|
| 82 |
+
expected = os.getenv("QUIZ_BATTLE_INTERNAL_SECRET")
|
| 83 |
+
if expected and internal_secret == expected:
|
| 84 |
+
return True
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# โโ Endpoints โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 89 |
+
|
| 90 |
+
@router.post("/generate", response_model=GenerateResponse)
|
| 91 |
+
async def generate_questions(
|
| 92 |
+
body: GenerateRequest,
|
| 93 |
+
request: Request,
|
| 94 |
+
):
|
| 95 |
+
"""
|
| 96 |
+
Generate varied questions for a quiz battle session.
|
| 97 |
+
|
| 98 |
+
Returns questions with choices but WITHOUT correct_answer (unless called
|
| 99 |
+
by an internal service with X-Internal-Service header).
|
| 100 |
+
"""
|
| 101 |
+
# 1. Fetch base questions
|
| 102 |
+
questions = await get_questions_for_battle(
|
| 103 |
+
body.grade_level,
|
| 104 |
+
body.topic,
|
| 105 |
+
body.question_count,
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
if not questions:
|
| 109 |
+
raise HTTPException(
|
| 110 |
+
status_code=404,
|
| 111 |
+
detail=f"No questions found for grade {body.grade_level}, topic '{body.topic}'",
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# 2. Apply variance (with 24h cache)
|
| 115 |
+
varied = await apply_variance(questions, body.session_id)
|
| 116 |
+
|
| 117 |
+
# 3. Cache session metadata
|
| 118 |
+
await cache_session_questions(
|
| 119 |
+
body.session_id,
|
| 120 |
+
varied,
|
| 121 |
+
body.player_ids,
|
| 122 |
+
body.grade_level,
|
| 123 |
+
body.topic,
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
# 4. Prepare response
|
| 127 |
+
is_internal = _is_internal_request(request)
|
| 128 |
+
response_questions = []
|
| 129 |
+
for q in varied:
|
| 130 |
+
q_copy = dict(q)
|
| 131 |
+
if not is_internal:
|
| 132 |
+
q_copy.pop("correct_answer", None)
|
| 133 |
+
response_questions.append(q_copy)
|
| 134 |
+
|
| 135 |
+
return GenerateResponse(questions=response_questions, session_id=body.session_id)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
@router.post("/ingest-pdf", response_model=IngestPdfResponse)
|
| 139 |
+
async def ingest_pdf_endpoint(
|
| 140 |
+
body: IngestPdfRequest,
|
| 141 |
+
user=Depends(_get_current_user),
|
| 142 |
+
):
|
| 143 |
+
"""
|
| 144 |
+
Trigger PDF ingestion into the question bank.
|
| 145 |
+
|
| 146 |
+
Requires teacher or admin role.
|
| 147 |
+
"""
|
| 148 |
+
if user.role not in ("teacher", "admin"):
|
| 149 |
+
raise HTTPException(status_code=403, detail="Teacher or admin access required")
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
result = await ingest_pdf(
|
| 153 |
+
storage_path=body.storage_path,
|
| 154 |
+
grade_level=body.grade_level,
|
| 155 |
+
topic=body.topic,
|
| 156 |
+
force_reingest=body.force_reingest,
|
| 157 |
+
)
|
| 158 |
+
except FileNotFoundError as e:
|
| 159 |
+
raise HTTPException(status_code=404, detail=str(e))
|
| 160 |
+
except ValueError as e:
|
| 161 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 162 |
+
except Exception as e:
|
| 163 |
+
raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
|
| 164 |
+
|
| 165 |
+
return IngestPdfResponse(
|
| 166 |
+
status="processed" if result.processed else "skipped",
|
| 167 |
+
filename=result.filename,
|
| 168 |
+
question_count=result.question_count,
|
| 169 |
+
grade_level=result.grade_level,
|
| 170 |
+
topic=result.topic,
|
| 171 |
+
storage_path=result.storage_path,
|
| 172 |
+
timestamp=result.timestamp,
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
@router.get("/bank-status", response_model=BankStatusResponse)
|
| 177 |
+
async def bank_status(
|
| 178 |
+
user=Depends(_get_current_user),
|
| 179 |
+
):
|
| 180 |
+
"""
|
| 181 |
+
Get the status of all processed PDFs in the question bank.
|
| 182 |
+
|
| 183 |
+
Requires teacher or admin role.
|
| 184 |
+
"""
|
| 185 |
+
if user.role not in ("teacher", "admin"):
|
| 186 |
+
raise HTTPException(status_code=403, detail="Teacher or admin access required")
|
| 187 |
+
|
| 188 |
+
from google.cloud import firestore
|
| 189 |
+
db = firestore.Client(project=os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026"))
|
| 190 |
+
|
| 191 |
+
docs = db.collection("pdf_processing_status").stream()
|
| 192 |
+
pdfs = []
|
| 193 |
+
for doc in docs:
|
| 194 |
+
data = doc.to_dict()
|
| 195 |
+
pdfs.append(BankStatusItem(
|
| 196 |
+
filename=doc.id,
|
| 197 |
+
processed=data.get("processed", False),
|
| 198 |
+
timestamp=data.get("timestamp"),
|
| 199 |
+
question_count=data.get("question_count", 0),
|
| 200 |
+
grade_level=data.get("grade_level", 0),
|
| 201 |
+
topic=data.get("topic", ""),
|
| 202 |
+
storage_path=data.get("storage_path", ""),
|
| 203 |
+
))
|
| 204 |
+
|
| 205 |
+
return BankStatusResponse(pdfs=pdfs)
|
services/question_bank_service.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Question Bank Service for Quiz Battle.
|
| 3 |
+
|
| 4 |
+
Handles querying the question bank with random ordering,
|
| 5 |
+
caching session questions, and 24-hour debounce for variance results.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import random
|
| 10 |
+
from datetime import datetime, timezone, timedelta
|
| 11 |
+
from typing import List, Dict, Optional
|
| 12 |
+
|
| 13 |
+
from google.cloud import firestore
|
| 14 |
+
|
| 15 |
+
DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _get_db() -> firestore.Client:
|
| 19 |
+
"""Get Firestore client."""
|
| 20 |
+
return firestore.Client(project=DEFAULT_FIREBASE_PROJECT)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
async def get_questions_for_battle(
|
| 24 |
+
grade_level: int,
|
| 25 |
+
topic: str,
|
| 26 |
+
count: int = 10,
|
| 27 |
+
) -> List[Dict]:
|
| 28 |
+
"""
|
| 29 |
+
Fetch random questions from the question bank for a battle session.
|
| 30 |
+
|
| 31 |
+
Uses Firestore random_seed field for pseudo-random ordering.
|
| 32 |
+
If fewer than `count` questions exist, returns all available.
|
| 33 |
+
"""
|
| 34 |
+
db = _get_db()
|
| 35 |
+
collection_path = f"question_bank/{grade_level}/{topic}/questions"
|
| 36 |
+
collection_ref = db.collection(collection_path)
|
| 37 |
+
|
| 38 |
+
# Pseudo-random query using random_seed >= random threshold
|
| 39 |
+
threshold = random.random()
|
| 40 |
+
query = (
|
| 41 |
+
collection_ref
|
| 42 |
+
.where("random_seed", ">=", threshold)
|
| 43 |
+
.order_by("random_seed")
|
| 44 |
+
.limit(count)
|
| 45 |
+
)
|
| 46 |
+
docs = list(query.stream())
|
| 47 |
+
|
| 48 |
+
# If we didn't get enough, query from the start to fill shortfall
|
| 49 |
+
if len(docs) < count:
|
| 50 |
+
remaining = count - len(docs)
|
| 51 |
+
fallback_query = (
|
| 52 |
+
collection_ref
|
| 53 |
+
.where("random_seed", "<", threshold)
|
| 54 |
+
.order_by("random_seed")
|
| 55 |
+
.limit(remaining)
|
| 56 |
+
)
|
| 57 |
+
docs.extend(list(fallback_query.stream()))
|
| 58 |
+
|
| 59 |
+
questions = [doc.to_dict() for doc in docs]
|
| 60 |
+
# Ensure all required fields are present
|
| 61 |
+
valid_questions = []
|
| 62 |
+
for q in questions:
|
| 63 |
+
if q and all(k in q for k in ("question", "choices", "correct_answer", "difficulty")):
|
| 64 |
+
valid_questions.append(q)
|
| 65 |
+
|
| 66 |
+
return valid_questions
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def cache_session_questions(
|
| 70 |
+
session_id: str,
|
| 71 |
+
questions: List[Dict],
|
| 72 |
+
player_ids: List[str],
|
| 73 |
+
grade_level: int,
|
| 74 |
+
topic: str,
|
| 75 |
+
) -> None:
|
| 76 |
+
"""Cache varied questions for a battle session with 24-hour TTL."""
|
| 77 |
+
db = _get_db()
|
| 78 |
+
session_ref = db.collection("quiz_battle_sessions").document(session_id)
|
| 79 |
+
|
| 80 |
+
session_ref.set({
|
| 81 |
+
"player_ids": player_ids,
|
| 82 |
+
"grade_level": grade_level,
|
| 83 |
+
"topic": topic,
|
| 84 |
+
"created_at": firestore.SERVER_TIMESTAMP,
|
| 85 |
+
"variance_cached_until": datetime.now(timezone.utc) + timedelta(hours=24),
|
| 86 |
+
})
|
| 87 |
+
|
| 88 |
+
# Write questions to subcollection
|
| 89 |
+
batch = db.batch()
|
| 90 |
+
for idx, q in enumerate(questions):
|
| 91 |
+
q_ref = session_ref.collection("questions").document(str(idx))
|
| 92 |
+
batch.set(q_ref, q)
|
| 93 |
+
batch.commit()
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
async def get_cached_session(session_id: str) -> Optional[List[Dict]]:
|
| 97 |
+
"""
|
| 98 |
+
Check if a session has cached varied questions within 24 hours.
|
| 99 |
+
|
| 100 |
+
Returns the cached questions if valid, otherwise None.
|
| 101 |
+
"""
|
| 102 |
+
db = _get_db()
|
| 103 |
+
session_doc = db.collection("quiz_battle_sessions").document(session_id).get()
|
| 104 |
+
if not session_doc.exists:
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
data = session_doc.to_dict()
|
| 108 |
+
cached_until = data.get("variance_cached_until")
|
| 109 |
+
if cached_until:
|
| 110 |
+
if isinstance(cached_until, datetime):
|
| 111 |
+
if cached_until.tzinfo is None:
|
| 112 |
+
cached_until = cached_until.replace(tzinfo=timezone.utc)
|
| 113 |
+
elif hasattr(cached_until, 'timestamp'):
|
| 114 |
+
# Firestore Timestamp object
|
| 115 |
+
cached_until = datetime.fromtimestamp(cached_until.timestamp(), tz=timezone.utc)
|
| 116 |
+
|
| 117 |
+
if cached_until > datetime.now(timezone.utc):
|
| 118 |
+
# Return cached questions
|
| 119 |
+
q_docs = db.collection("quiz_battle_sessions").document(session_id).collection("questions").stream()
|
| 120 |
+
questions = [doc.to_dict() for doc in q_docs]
|
| 121 |
+
return questions if questions else None
|
| 122 |
+
|
| 123 |
+
return None
|
services/variance_engine.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Variance Engine for Quiz Battle Questions.
|
| 3 |
+
|
| 4 |
+
Applies per-session variance techniques via DeepSeek,
|
| 5 |
+
with pure-Python fallback for choice shuffling.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import random
|
| 10 |
+
import re
|
| 11 |
+
from typing import List, Dict
|
| 12 |
+
|
| 13 |
+
from services.ai_client import get_deepseek_client, CHAT_MODEL
|
| 14 |
+
from services.question_bank_service import get_cached_session, cache_session_questions
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _fallback_shuffle(questions: List[Dict], seed: int) -> List[Dict]:
|
| 18 |
+
"""
|
| 19 |
+
Pure-Python fallback: shuffle choices deterministically.
|
| 20 |
+
"""
|
| 21 |
+
rng = random.Random(seed)
|
| 22 |
+
for q in questions:
|
| 23 |
+
choices = q["choices"].copy()
|
| 24 |
+
correct_letter = q["correct_answer"]
|
| 25 |
+
correct_index = ord(correct_letter) - ord("A")
|
| 26 |
+
correct_text = choices[correct_index]
|
| 27 |
+
rng.shuffle(choices)
|
| 28 |
+
q["choices"] = choices
|
| 29 |
+
q["correct_answer"] = chr(ord("A") + choices.index(correct_text))
|
| 30 |
+
q["variance_applied"] = ["choice_shuffle"]
|
| 31 |
+
return questions
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
async def apply_variance(questions: List[Dict], session_id: str) -> List[Dict]:
|
| 35 |
+
"""
|
| 36 |
+
Apply per-session variance to a list of questions.
|
| 37 |
+
|
| 38 |
+
1. Check 24h Firestore cache first
|
| 39 |
+
2. Call DeepSeek with variance prompt
|
| 40 |
+
3. Parse JSON response
|
| 41 |
+
4. Fall back to pure-Python shuffle if DeepSeek fails
|
| 42 |
+
5. Cache result for 24 hours
|
| 43 |
+
"""
|
| 44 |
+
# 1. Check cache
|
| 45 |
+
cached = await get_cached_session(session_id)
|
| 46 |
+
if cached:
|
| 47 |
+
return cached
|
| 48 |
+
|
| 49 |
+
# 2. Generate deterministic seed from session_id
|
| 50 |
+
seed = hash(session_id) % (2**32)
|
| 51 |
+
|
| 52 |
+
# 3. Call DeepSeek
|
| 53 |
+
client = get_deepseek_client()
|
| 54 |
+
system_prompt = (
|
| 55 |
+
"You are a math quiz variance engine for MathPulse AI, an educational platform for "
|
| 56 |
+
"Filipino high school students following the DepEd K-12 curriculum. "
|
| 57 |
+
"Your job is to make quiz questions feel fresh each session WITHOUT changing the "
|
| 58 |
+
"correct answer or difficulty level."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
user_prompt = f"""Given these {len(questions)} quiz battle questions as JSON:
|
| 62 |
+
{json.dumps(questions, indent=2)}
|
| 63 |
+
|
| 64 |
+
Apply the following variance techniques. Use session_seed={seed} for deterministic but varied output:
|
| 65 |
+
|
| 66 |
+
PARAPHRASE (30% chance per question): Reword the question stem using different phrasing, synonyms, or sentence structure. Do NOT change the math or the answer.
|
| 67 |
+
|
| 68 |
+
CHOICE SHUFFLE (always): Randomize the order of answer choices A/B/C/D. Update "correct_answer" to reflect the new position.
|
| 69 |
+
|
| 70 |
+
DISTRACTOR REFRESH (20% chance per question): Replace 1-2 wrong choices with new plausible-but-incorrect distractors that represent common student misconceptions for this topic. Keep the correct answer unchanged.
|
| 71 |
+
|
| 72 |
+
CONTEXT SWAP (10% chance per question): Replace real-world context variables (names, objects, currencies) with Filipino-localized equivalents (e.g., "pesos", "jeepney", "barangay") to increase cultural relevance.
|
| 73 |
+
|
| 74 |
+
NUMERIC SCALING (10% chance, only for computation problems): Scale numbers by a small integer factor (2x or 3x) so the method remains the same but the answer changes. Recompute the correct answer and all distractors accordingly.
|
| 75 |
+
|
| 76 |
+
Return the full modified questions array as valid JSON only. Keep all original fields.
|
| 77 |
+
Add a "variance_applied": ["paraphrase", "distractor_refresh", ...] field per question.
|
| 78 |
+
Do NOT change "topic", "difficulty", "grade_level", or "source_chunk_id"."""
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
response = client.chat.completions.create(
|
| 82 |
+
model=CHAT_MODEL,
|
| 83 |
+
messages=[
|
| 84 |
+
{"role": "system", "content": system_prompt},
|
| 85 |
+
{"role": "user", "content": user_prompt},
|
| 86 |
+
],
|
| 87 |
+
temperature=0.5,
|
| 88 |
+
max_tokens=4000,
|
| 89 |
+
)
|
| 90 |
+
content = response.choices[0].message.content.strip()
|
| 91 |
+
# Strip markdown code fences
|
| 92 |
+
content = re.sub(r"^```json\s*", "", content)
|
| 93 |
+
content = re.sub(r"\s*```$", "", content)
|
| 94 |
+
varied_questions = json.loads(content)
|
| 95 |
+
|
| 96 |
+
if not isinstance(varied_questions, list) or len(varied_questions) != len(questions):
|
| 97 |
+
raise ValueError("Invalid response format from DeepSeek")
|
| 98 |
+
|
| 99 |
+
# Validate required fields
|
| 100 |
+
for q in varied_questions:
|
| 101 |
+
if not all(k in q for k in ("question", "choices", "correct_answer", "variance_applied")):
|
| 102 |
+
raise ValueError("Missing required fields in varied question")
|
| 103 |
+
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"[variance_engine] DeepSeek variance failed, falling back to shuffle: {e}")
|
| 106 |
+
varied_questions = _fallback_shuffle(questions, seed)
|
| 107 |
+
|
| 108 |
+
# 4. Cache for 24 hours
|
| 109 |
+
# Extract player_ids, grade_level, topic from original questions if available
|
| 110 |
+
player_ids = []
|
| 111 |
+
grade_level = questions[0].get("grade_level", 11) if questions else 11
|
| 112 |
+
topic = questions[0].get("topic", "general_mathematics") if questions else "general_mathematics"
|
| 113 |
+
await cache_session_questions(session_id, varied_questions, player_ids, grade_level, topic)
|
| 114 |
+
|
| 115 |
+
return varied_questions
|
tests/test_quiz_battle.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for Quiz Battle RAG-powered question bank.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
from unittest.mock import patch, MagicMock, AsyncMock
|
| 7 |
+
from datetime import datetime, timezone, timedelta
|
| 8 |
+
|
| 9 |
+
from fastapi.testclient import TestClient
|
| 10 |
+
|
| 11 |
+
# Mock firebase_admin before imports
|
| 12 |
+
import sys
|
| 13 |
+
from unittest.mock import MagicMock
|
| 14 |
+
|
| 15 |
+
_original_firebase_admin = sys.modules.get("firebase_admin")
|
| 16 |
+
|
| 17 |
+
firebase_mock = MagicMock()
|
| 18 |
+
sys.modules["firebase_admin"] = firebase_mock
|
| 19 |
+
sys.modules["firebase_admin.credentials"] = MagicMock()
|
| 20 |
+
sys.modules["google.cloud.firestore"] = MagicMock()
|
| 21 |
+
|
| 22 |
+
from main import app
|
| 23 |
+
|
| 24 |
+
client = TestClient(app)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@pytest.fixture(scope="module", autouse=True)
|
| 28 |
+
def _cleanup_firebase_mock():
|
| 29 |
+
"""Restore original firebase_admin module after all tests in this module."""
|
| 30 |
+
yield
|
| 31 |
+
if _original_firebase_admin is not None:
|
| 32 |
+
sys.modules["firebase_admin"] = _original_firebase_admin
|
| 33 |
+
elif "firebase_admin" in sys.modules:
|
| 34 |
+
del sys.modules["firebase_admin"]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# โโ PDF Ingestion Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 38 |
+
|
| 39 |
+
class TestPdfIngestion:
|
| 40 |
+
@pytest.mark.asyncio
|
| 41 |
+
async def test_ingest_pdf_skips_already_processed(self):
|
| 42 |
+
"""If pdf_processing_status says processed, skip re-ingestion."""
|
| 43 |
+
with patch("rag.pdf_ingestion.Client") as mock_firestore:
|
| 44 |
+
mock_doc = MagicMock()
|
| 45 |
+
mock_doc.exists = True
|
| 46 |
+
mock_doc.to_dict.return_value = {
|
| 47 |
+
"processed": True,
|
| 48 |
+
"question_count": 10,
|
| 49 |
+
"grade_level": 8,
|
| 50 |
+
"topic": "linear_equations",
|
| 51 |
+
"storage_path": "quiz_pdfs/grade_8/test.pdf",
|
| 52 |
+
"timestamp": datetime.now(timezone.utc),
|
| 53 |
+
}
|
| 54 |
+
# Make get() return an awaitable
|
| 55 |
+
async def async_get():
|
| 56 |
+
return mock_doc
|
| 57 |
+
mock_ref = MagicMock()
|
| 58 |
+
mock_ref.get = async_get
|
| 59 |
+
mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
|
| 60 |
+
|
| 61 |
+
from rag.pdf_ingestion import ingest_pdf
|
| 62 |
+
result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations")
|
| 63 |
+
assert result.processed is True
|
| 64 |
+
assert result.question_count == 10
|
| 65 |
+
|
| 66 |
+
@pytest.mark.asyncio
|
| 67 |
+
async def test_ingest_pdf_force_reingest(self):
|
| 68 |
+
"""If force_reingest=True, process even if already done."""
|
| 69 |
+
with patch("rag.pdf_ingestion.Client") as mock_firestore, \
|
| 70 |
+
patch("rag.pdf_ingestion._init_firebase_storage") as mock_storage, \
|
| 71 |
+
patch("rag.pdf_ingestion._extract_pdf_text") as mock_extract, \
|
| 72 |
+
patch("rag.pdf_ingestion._chunk_text") as mock_chunk, \
|
| 73 |
+
patch("rag.pdf_ingestion._generate_questions_for_chunk") as mock_gen, \
|
| 74 |
+
patch("rag.pdf_ingestion._save_questions_batch") as mock_save, \
|
| 75 |
+
patch("rag.pdf_ingestion._save_embeddings_batch") as mock_save_emb, \
|
| 76 |
+
patch("rag.pdf_ingestion._save_processing_manifest") as mock_save_status, \
|
| 77 |
+
patch("rag.pdf_ingestion.get_deepseek_client") as mock_deepseek:
|
| 78 |
+
|
| 79 |
+
mock_doc = MagicMock()
|
| 80 |
+
mock_doc.exists = True
|
| 81 |
+
mock_doc.to_dict.return_value = {"processed": True}
|
| 82 |
+
async def async_get():
|
| 83 |
+
return mock_doc
|
| 84 |
+
mock_ref = MagicMock()
|
| 85 |
+
mock_ref.get = async_get
|
| 86 |
+
mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
|
| 87 |
+
mock_blob = MagicMock()
|
| 88 |
+
mock_blob.exists.return_value = True
|
| 89 |
+
mock_blob.download_as_bytes.return_value = b"pdf bytes"
|
| 90 |
+
mock_storage.return_value = (None, MagicMock())
|
| 91 |
+
mock_storage.return_value[1].blob.return_value = mock_blob
|
| 92 |
+
mock_extract.return_value = "Some math content"
|
| 93 |
+
mock_chunk.return_value = ["chunk1"]
|
| 94 |
+
mock_gen.return_value = [{
|
| 95 |
+
"question": "What is 2+2?",
|
| 96 |
+
"choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
|
| 97 |
+
"correct_answer": "B",
|
| 98 |
+
"explanation": "Basic addition",
|
| 99 |
+
"topic": "linear_equations",
|
| 100 |
+
"difficulty": "easy",
|
| 101 |
+
"grade_level": 8,
|
| 102 |
+
"source_chunk_id": "chunk1",
|
| 103 |
+
}]
|
| 104 |
+
mock_save.return_value = 1
|
| 105 |
+
mock_deepseek.return_value = MagicMock()
|
| 106 |
+
|
| 107 |
+
from rag.pdf_ingestion import ingest_pdf
|
| 108 |
+
result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations", force_reingest=True)
|
| 109 |
+
assert result.processed is True
|
| 110 |
+
assert result.question_count == 1
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# โโ Question Bank Service Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 114 |
+
|
| 115 |
+
class TestQuestionBankService:
|
| 116 |
+
@pytest.mark.asyncio
|
| 117 |
+
async def test_get_questions_for_battle(self):
|
| 118 |
+
"""Fetch questions with random ordering."""
|
| 119 |
+
with patch("services.question_bank_service._get_db") as mock_db:
|
| 120 |
+
mock_doc = MagicMock()
|
| 121 |
+
mock_doc.to_dict.return_value = {
|
| 122 |
+
"question": "What is 2+2?",
|
| 123 |
+
"choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
|
| 124 |
+
"correct_answer": "B",
|
| 125 |
+
"difficulty": "easy",
|
| 126 |
+
"random_seed": 0.5,
|
| 127 |
+
}
|
| 128 |
+
mock_collection = MagicMock()
|
| 129 |
+
mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
|
| 130 |
+
mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
|
| 131 |
+
mock_db.return_value.collection.return_value = mock_collection
|
| 132 |
+
|
| 133 |
+
from services.question_bank_service import get_questions_for_battle
|
| 134 |
+
questions = await get_questions_for_battle(8, "linear_equations", 1)
|
| 135 |
+
assert len(questions) == 1
|
| 136 |
+
assert questions[0]["question"] == "What is 2+2?"
|
| 137 |
+
|
| 138 |
+
@pytest.mark.asyncio
|
| 139 |
+
async def test_cache_session_questions(self):
|
| 140 |
+
"""Cache questions for 24 hours."""
|
| 141 |
+
with patch("services.question_bank_service._get_db") as mock_db:
|
| 142 |
+
mock_session_ref = MagicMock()
|
| 143 |
+
mock_db.return_value.collection.return_value.document.return_value = mock_session_ref
|
| 144 |
+
|
| 145 |
+
from services.question_bank_service import cache_session_questions
|
| 146 |
+
await cache_session_questions(
|
| 147 |
+
"session_123",
|
| 148 |
+
[{"question": "Q1", "correct_answer": "A"}],
|
| 149 |
+
["uid1"],
|
| 150 |
+
8,
|
| 151 |
+
"linear_equations",
|
| 152 |
+
)
|
| 153 |
+
mock_session_ref.set.assert_called_once()
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# โโ Variance Engine Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 157 |
+
|
| 158 |
+
class TestVarianceEngine:
|
| 159 |
+
@pytest.mark.asyncio
|
| 160 |
+
async def test_apply_variance_uses_cache(self):
|
| 161 |
+
"""If cache exists, return cached questions."""
|
| 162 |
+
with patch("services.variance_engine.get_cached_session") as mock_cache:
|
| 163 |
+
mock_cache.return_value = [{"question": "Cached?", "correct_answer": "A"}]
|
| 164 |
+
from services.variance_engine import apply_variance
|
| 165 |
+
result = await apply_variance([], "session_123")
|
| 166 |
+
assert result[0]["question"] == "Cached?"
|
| 167 |
+
|
| 168 |
+
@pytest.mark.asyncio
|
| 169 |
+
async def test_apply_variance_fallback_shuffle(self):
|
| 170 |
+
"""If DeepSeek fails, fallback to pure Python shuffle."""
|
| 171 |
+
with patch("services.variance_engine.get_cached_session") as mock_cache, \
|
| 172 |
+
patch("services.variance_engine.get_deepseek_client") as mock_client, \
|
| 173 |
+
patch("services.variance_engine.cache_session_questions") as mock_save:
|
| 174 |
+
mock_cache.return_value = None
|
| 175 |
+
mock_client.return_value.chat.completions.create.side_effect = Exception("API error")
|
| 176 |
+
mock_save.return_value = None
|
| 177 |
+
|
| 178 |
+
from services.variance_engine import apply_variance
|
| 179 |
+
questions = [{
|
| 180 |
+
"question": "What is 2+2?",
|
| 181 |
+
"choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
|
| 182 |
+
"correct_answer": "B",
|
| 183 |
+
"difficulty": "easy",
|
| 184 |
+
"topic": "math",
|
| 185 |
+
"grade_level": 8,
|
| 186 |
+
"source_chunk_id": "c1",
|
| 187 |
+
}]
|
| 188 |
+
result = await apply_variance(questions, "session_123")
|
| 189 |
+
assert len(result) == 1
|
| 190 |
+
assert result[0]["variance_applied"] == ["choice_shuffle"]
|
| 191 |
+
# Correct answer should still point to the right text
|
| 192 |
+
correct_index = ord(result[0]["correct_answer"]) - ord("A")
|
| 193 |
+
assert "4" in result[0]["choices"][correct_index]
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# โโ Route Integration Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 197 |
+
|
| 198 |
+
class TestQuizBattleRoutes:
|
| 199 |
+
def test_generate_unauthorized(self):
|
| 200 |
+
"""Generate without auth should 401 or 403 depending on middleware."""
|
| 201 |
+
response = client.post("/api/quiz-battle/generate", json={
|
| 202 |
+
"grade_level": 8,
|
| 203 |
+
"topic": "linear_equations",
|
| 204 |
+
"question_count": 10,
|
| 205 |
+
"session_id": "test-session",
|
| 206 |
+
"player_ids": ["uid1"],
|
| 207 |
+
})
|
| 208 |
+
# Auth middleware may reject or allow in test env
|
| 209 |
+
assert response.status_code in (200, 401, 403)
|
| 210 |
+
|
| 211 |
+
def test_ingest_pdf_unauthorized(self):
|
| 212 |
+
"""Ingest-pdf without teacher role should 403."""
|
| 213 |
+
response = client.post("/api/quiz-battle/ingest-pdf", json={
|
| 214 |
+
"storage_path": "quiz_pdfs/grade_8/test.pdf",
|
| 215 |
+
"grade_level": 8,
|
| 216 |
+
"topic": "linear_equations",
|
| 217 |
+
})
|
| 218 |
+
assert response.status_code in (401, 403)
|
| 219 |
+
|
| 220 |
+
def test_bank_status_unauthorized(self):
|
| 221 |
+
"""Bank-status without teacher role should 403."""
|
| 222 |
+
response = client.get("/api/quiz-battle/bank-status")
|
| 223 |
+
assert response.status_code in (401, 403)
|