Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
f717a11
1
Parent(s): ac19778
🚀 Auto-deploy backend from GitHub (46778ac)
Browse files- backend/main.py +0 -0
- backend/rag/curriculum_rag.py +0 -318
- backend/routes/rag_routes.py +0 -427
- main.py +2 -0
- rag/curriculum_rag.py +3 -3
- routes/rag_routes.py +115 -58
- test_full_rag.py +75 -0
- test_retrieval.py +39 -0
- tests/test_rag_pipeline.py +16 -10
backend/main.py
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
backend/rag/curriculum_rag.py
DELETED
|
@@ -1,318 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Updated curriculum RAG with exact match retrieval and 7-section notebook output.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
from __future__ import annotations
|
| 6 |
-
|
| 7 |
-
from typing import Dict, List, Optional, Tuple
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
def _to_where(
|
| 11 |
-
subject: Optional[str] = None,
|
| 12 |
-
quarter: Optional[int] = None,
|
| 13 |
-
content_domain: Optional[str] = None,
|
| 14 |
-
chunk_type: Optional[str] = None,
|
| 15 |
-
module_id: Optional[str] = None,
|
| 16 |
-
lesson_id: Optional[str] = None,
|
| 17 |
-
competency_code: Optional[str] = None,
|
| 18 |
-
storage_path: Optional[str] = None,
|
| 19 |
-
) -> Optional[Dict[str, object]]:
|
| 20 |
-
clauses = []
|
| 21 |
-
if subject:
|
| 22 |
-
clauses.append({"subject": {"$eq": subject}})
|
| 23 |
-
if quarter is not None:
|
| 24 |
-
clauses.append({"quarter": {"$eq": int(quarter)}})
|
| 25 |
-
if content_domain:
|
| 26 |
-
clauses.append({"content_domain": {"$eq": content_domain}})
|
| 27 |
-
if chunk_type:
|
| 28 |
-
clauses.append({"chunk_type": {"$eq": chunk_type}})
|
| 29 |
-
if module_id:
|
| 30 |
-
clauses.append({"module_id": {"$eq": module_id}})
|
| 31 |
-
if lesson_id:
|
| 32 |
-
clauses.append({"lesson_id": {"$eq": lesson_id}})
|
| 33 |
-
if competency_code:
|
| 34 |
-
clauses.append({"competency_code": {"$eq": competency_code}})
|
| 35 |
-
if storage_path:
|
| 36 |
-
clauses.append({"storage_path": {"$eq": storage_path}})
|
| 37 |
-
if not clauses:
|
| 38 |
-
return None
|
| 39 |
-
if len(clauses) == 1:
|
| 40 |
-
return clauses[0]
|
| 41 |
-
return {"$and": clauses}
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def _distance_to_score(distance: float) -> float:
|
| 45 |
-
return round(1.0 / (1.0 + max(distance, 0.0)), 4)
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def retrieve_curriculum_context(
|
| 49 |
-
query: str,
|
| 50 |
-
subject: str | None = None,
|
| 51 |
-
quarter: int | None = None,
|
| 52 |
-
content_domain: str | None = None,
|
| 53 |
-
chunk_type: str | None = None,
|
| 54 |
-
module_id: str | None = None,
|
| 55 |
-
lesson_id: str | None = None,
|
| 56 |
-
competency_code: str | None = None,
|
| 57 |
-
storage_path: str | None = None,
|
| 58 |
-
top_k: int = 8,
|
| 59 |
-
) -> list[dict]:
|
| 60 |
-
from rag.vectorstore_loader import get_vectorstore_components
|
| 61 |
-
|
| 62 |
-
_, collection, embedder = get_vectorstore_components()
|
| 63 |
-
where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
|
| 64 |
-
|
| 65 |
-
prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
|
| 66 |
-
query_embedding = embedder.encode(
|
| 67 |
-
prefixed_query,
|
| 68 |
-
normalize_embeddings=True,
|
| 69 |
-
).tolist()
|
| 70 |
-
|
| 71 |
-
result = collection.query(
|
| 72 |
-
query_embeddings=[query_embedding],
|
| 73 |
-
n_results=max(1, top_k),
|
| 74 |
-
where=where,
|
| 75 |
-
include=["documents", "metadatas", "distances"],
|
| 76 |
-
)
|
| 77 |
-
|
| 78 |
-
documents = (result.get("documents") or [[]])[0]
|
| 79 |
-
metadatas = (result.get("metadatas") or [[]])[0]
|
| 80 |
-
distances = (result.get("distances") or [[]])[0]
|
| 81 |
-
|
| 82 |
-
rows: List[dict] = []
|
| 83 |
-
for idx, content in enumerate(documents):
|
| 84 |
-
md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
|
| 85 |
-
distance = float(distances[idx]) if idx < len(distances) else 1.0
|
| 86 |
-
rows.append({
|
| 87 |
-
"content": str(content or ""),
|
| 88 |
-
"subject": str(md.get("subject") or "unknown"),
|
| 89 |
-
"quarter": int(md.get("quarter") or 0),
|
| 90 |
-
"content_domain": str(md.get("content_domain") or "general"),
|
| 91 |
-
"chunk_type": str(md.get("chunk_type") or "concept"),
|
| 92 |
-
"source_file": str(md.get("source_file") or ""),
|
| 93 |
-
"storage_path": str(md.get("storage_path") or ""),
|
| 94 |
-
"module_id": str(md.get("module_id") or ""),
|
| 95 |
-
"lesson_id": str(md.get("lesson_id") or ""),
|
| 96 |
-
"competency_code": str(md.get("competency_code") or ""),
|
| 97 |
-
"page": int(md.get("page") or 0),
|
| 98 |
-
"score": _distance_to_score(distance),
|
| 99 |
-
})
|
| 100 |
-
return rows
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
def build_exact_lesson_query(
|
| 104 |
-
topic: str,
|
| 105 |
-
subject: str,
|
| 106 |
-
quarter: int,
|
| 107 |
-
lesson_title: str | None = None,
|
| 108 |
-
competency: str | None = None,
|
| 109 |
-
module_unit: str | None = None,
|
| 110 |
-
learner_level: str | None = None,
|
| 111 |
-
competency_code: str | None = None,
|
| 112 |
-
) -> str:
|
| 113 |
-
parts = [topic, subject, f"Quarter {quarter}"]
|
| 114 |
-
for value in (lesson_title, competency, module_unit, learner_level, competency_code):
|
| 115 |
-
clean = str(value or "").strip()
|
| 116 |
-
if clean:
|
| 117 |
-
parts.append(clean)
|
| 118 |
-
return " | ".join(parts)
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
def build_lesson_query(
|
| 122 |
-
topic: str,
|
| 123 |
-
subject: str,
|
| 124 |
-
quarter: int,
|
| 125 |
-
*,
|
| 126 |
-
lesson_title: Optional[str] = None,
|
| 127 |
-
competency: Optional[str] = None,
|
| 128 |
-
module_unit: Optional[str] = None,
|
| 129 |
-
learner_level: Optional[str] = None,
|
| 130 |
-
) -> str:
|
| 131 |
-
parts = [topic, subject, f"Quarter {quarter}"]
|
| 132 |
-
for value in (lesson_title, competency, module_unit, learner_level):
|
| 133 |
-
clean_value = str(value or "").strip()
|
| 134 |
-
if clean_value:
|
| 135 |
-
parts.append(clean_value)
|
| 136 |
-
return " | ".join(parts)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def retrieve_lesson_pdf_context(
|
| 140 |
-
topic: str,
|
| 141 |
-
subject: str,
|
| 142 |
-
quarter: int,
|
| 143 |
-
lesson_title: str | None = None,
|
| 144 |
-
competency: str | None = None,
|
| 145 |
-
module_id: str | None = None,
|
| 146 |
-
lesson_id: str | None = None,
|
| 147 |
-
competency_code: str | None = None,
|
| 148 |
-
storage_path: str | None = None,
|
| 149 |
-
top_k: int = 8,
|
| 150 |
-
) -> Tuple[list[dict], str]:
|
| 151 |
-
"""Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query."""
|
| 152 |
-
if storage_path:
|
| 153 |
-
exact_chunks = retrieve_curriculum_context(
|
| 154 |
-
query=topic,
|
| 155 |
-
subject=subject,
|
| 156 |
-
quarter=quarter,
|
| 157 |
-
storage_path=storage_path,
|
| 158 |
-
top_k=top_k,
|
| 159 |
-
)
|
| 160 |
-
if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
|
| 161 |
-
return exact_chunks, "exact"
|
| 162 |
-
|
| 163 |
-
general_chunks = retrieve_curriculum_context(
|
| 164 |
-
query=topic,
|
| 165 |
-
subject=subject,
|
| 166 |
-
quarter=quarter,
|
| 167 |
-
top_k=top_k,
|
| 168 |
-
)
|
| 169 |
-
|
| 170 |
-
if storage_path and exact_chunks:
|
| 171 |
-
all_chunks = exact_chunks + general_chunks
|
| 172 |
-
seen = set()
|
| 173 |
-
deduped = []
|
| 174 |
-
for c in all_chunks:
|
| 175 |
-
key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
|
| 176 |
-
if key not in seen:
|
| 177 |
-
seen.add(key)
|
| 178 |
-
deduped.append(c)
|
| 179 |
-
deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
|
| 180 |
-
return deduped[:top_k], "hybrid"
|
| 181 |
-
|
| 182 |
-
return general_chunks, "general"
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
|
| 186 |
-
refs = []
|
| 187 |
-
for i, chunk in enumerate(curriculum_chunks, start=1):
|
| 188 |
-
refs.append(
|
| 189 |
-
f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
|
| 190 |
-
f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
|
| 191 |
-
f" Excerpt: {chunk.get('content', '')}"
|
| 192 |
-
)
|
| 193 |
-
return "\n".join(refs) if refs else "No curriculum context retrieved."
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
|
| 197 |
-
if not curriculum_chunks:
|
| 198 |
-
return {"confidence": 0.0, "band": "low"}
|
| 199 |
-
|
| 200 |
-
top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
|
| 201 |
-
score = sum(top_scores) / max(1, len(top_scores))
|
| 202 |
-
band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
|
| 203 |
-
return {"confidence": round(score, 3), "band": band}
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
|
| 207 |
-
"""Organize retrieved chunks into lesson section categories."""
|
| 208 |
-
sections: Dict[str, List[dict]] = {
|
| 209 |
-
"introduction": [],
|
| 210 |
-
"key_concepts": [],
|
| 211 |
-
"worked_examples": [],
|
| 212 |
-
"important_notes": [],
|
| 213 |
-
"practice": [],
|
| 214 |
-
"summary": [],
|
| 215 |
-
"assessment": [],
|
| 216 |
-
"general": [],
|
| 217 |
-
}
|
| 218 |
-
domain_priority = {
|
| 219 |
-
"introduction": 1, "key_concepts": 2, "worked_examples": 3,
|
| 220 |
-
"important_notes": 4, "practice": 5, "summary": 6,
|
| 221 |
-
"assessment": 7, "general": 8,
|
| 222 |
-
}
|
| 223 |
-
for chunk in chunks:
|
| 224 |
-
domain = chunk.get("content_domain", "general")
|
| 225 |
-
if domain in sections:
|
| 226 |
-
sections[domain].append(chunk)
|
| 227 |
-
else:
|
| 228 |
-
sections["general"].append(chunk)
|
| 229 |
-
return sections
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
def build_lesson_prompt(
|
| 233 |
-
*,
|
| 234 |
-
lesson_title: str,
|
| 235 |
-
competency: str,
|
| 236 |
-
grade_level: str,
|
| 237 |
-
subject: str,
|
| 238 |
-
quarter: int,
|
| 239 |
-
learner_level: Optional[str],
|
| 240 |
-
module_unit: Optional[str],
|
| 241 |
-
curriculum_chunks: list[dict],
|
| 242 |
-
competency_code: Optional[str] = None,
|
| 243 |
-
) -> str:
|
| 244 |
-
refs_text = format_retrieved_chunks(curriculum_chunks)
|
| 245 |
-
organized = organize_chunks_by_section(curriculum_chunks)
|
| 246 |
-
|
| 247 |
-
return (
|
| 248 |
-
"You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
|
| 249 |
-
"Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
|
| 250 |
-
"Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
|
| 251 |
-
f"Lesson title: {lesson_title}\n"
|
| 252 |
-
f"Competency code: {competency_code or 'n/a'}\n"
|
| 253 |
-
f"Curriculum competency: {competency}\n"
|
| 254 |
-
f"Grade level: {grade_level}\n"
|
| 255 |
-
f"Subject: {subject}\n"
|
| 256 |
-
f"Quarter: Q{quarter}\n"
|
| 257 |
-
f"Learner level: {learner_level or 'Grade 11-12'}\n"
|
| 258 |
-
f"Module/unit: {module_unit or 'n/a'}\n\n"
|
| 259 |
-
"[CURRICULUM CONTEXT]\n"
|
| 260 |
-
f"{refs_text}\n\n"
|
| 261 |
-
"Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
|
| 262 |
-
"{\n"
|
| 263 |
-
' "sections": [\n'
|
| 264 |
-
' {"type": "introduction", "title": "Introduction", "content": "..."},\n'
|
| 265 |
-
' {"type": "key_concepts", "title": "Key Concepts", "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
|
| 266 |
-
' {"type": "video", "title": "Video Lesson", "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
|
| 267 |
-
' {"type": "worked_examples", "title": "Worked Examples", "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
|
| 268 |
-
' {"type": "important_notes", "title": "Important Notes", "bulletPoints": ["...","..."]},\n'
|
| 269 |
-
' {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": [{"question":"...","solution":"..."}]},\n'
|
| 270 |
-
' {"type": "summary", "title": "Summary", "content": "..."}\n'
|
| 271 |
-
" ],\n"
|
| 272 |
-
' "needsReview": false\n'
|
| 273 |
-
"}\n\n"
|
| 274 |
-
"Rules:\n"
|
| 275 |
-
"- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
|
| 276 |
-
"- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
|
| 277 |
-
"- practiceProblems should be derivable from worked examples\n"
|
| 278 |
-
"- callouts: type is 'important', 'tip', or 'warning'\n"
|
| 279 |
-
"- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
|
| 280 |
-
"- Do not use placeholder text like 'placeholder' or 'example text'\n"
|
| 281 |
-
"- Do not fabricate worked examples - use actual curriculum content\n"
|
| 282 |
-
)
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
|
| 286 |
-
refs = []
|
| 287 |
-
for i, chunk in enumerate(curriculum_chunks, start=1):
|
| 288 |
-
refs.append(
|
| 289 |
-
f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
|
| 290 |
-
f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
|
| 291 |
-
)
|
| 292 |
-
refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
|
| 293 |
-
|
| 294 |
-
return (
|
| 295 |
-
"Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
|
| 296 |
-
"Do not include topics outside the competency context.\n\n"
|
| 297 |
-
f"Topic: {topic}\n"
|
| 298 |
-
f"Difficulty: {difficulty}\n\n"
|
| 299 |
-
"[CURRICULUM CONTEXT]\n"
|
| 300 |
-
f"{refs_text}\n\n"
|
| 301 |
-
"Return JSON with keys: problem, solution, competencyReference"
|
| 302 |
-
)
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> list[dict]:
|
| 306 |
-
dedup: Dict[str, dict] = {}
|
| 307 |
-
for weak_topic in weak_topics:
|
| 308 |
-
rows = retrieve_curriculum_context(
|
| 309 |
-
query=f"DepEd learning competency for {weak_topic}",
|
| 310 |
-
subject=subject,
|
| 311 |
-
chunk_type="learning_competency",
|
| 312 |
-
top_k=2,
|
| 313 |
-
)
|
| 314 |
-
for row in rows:
|
| 315 |
-
key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
|
| 316 |
-
if key not in dedup:
|
| 317 |
-
dedup[key] = row
|
| 318 |
-
return list(dedup.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/routes/rag_routes.py
DELETED
|
@@ -1,427 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import json
|
| 4 |
-
import logging
|
| 5 |
-
import os
|
| 6 |
-
import re
|
| 7 |
-
from datetime import datetime, timezone
|
| 8 |
-
from threading import Lock
|
| 9 |
-
from typing import Any, Dict, List, Optional
|
| 10 |
-
|
| 11 |
-
from fastapi import APIRouter, HTTPException, Request
|
| 12 |
-
from pydantic import BaseModel, Field
|
| 13 |
-
|
| 14 |
-
from services.inference_client import (
|
| 15 |
-
InferenceRequest,
|
| 16 |
-
create_default_client,
|
| 17 |
-
is_sequential_model,
|
| 18 |
-
get_model_for_task,
|
| 19 |
-
)
|
| 20 |
-
from rag.curriculum_rag import (
|
| 21 |
-
build_analysis_curriculum_context,
|
| 22 |
-
build_lesson_prompt,
|
| 23 |
-
build_lesson_query,
|
| 24 |
-
build_problem_generation_prompt,
|
| 25 |
-
format_retrieved_chunks,
|
| 26 |
-
retrieve_curriculum_context,
|
| 27 |
-
retrieve_lesson_pdf_context,
|
| 28 |
-
summarize_retrieval_confidence,
|
| 29 |
-
)
|
| 30 |
-
from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
|
| 31 |
-
|
| 32 |
-
try:
|
| 33 |
-
from firebase_admin import firestore as firebase_firestore
|
| 34 |
-
except Exception:
|
| 35 |
-
firebase_firestore = None
|
| 36 |
-
|
| 37 |
-
logger = logging.getLogger("mathpulse.rag")
|
| 38 |
-
router = APIRouter(prefix="/api/rag", tags=["rag"])
|
| 39 |
-
|
| 40 |
-
_inference_client = None
|
| 41 |
-
_inference_lock = Lock()
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def _get_inference_client():
|
| 45 |
-
global _inference_client
|
| 46 |
-
if _inference_client is None:
|
| 47 |
-
with _inference_lock:
|
| 48 |
-
if _inference_client is None:
|
| 49 |
-
_inference_client = create_default_client()
|
| 50 |
-
return _inference_client
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
async def _generate_text(
|
| 54 |
-
prompt: str,
|
| 55 |
-
task_type: str,
|
| 56 |
-
max_new_tokens: int = 900,
|
| 57 |
-
enable_thinking: bool = False,
|
| 58 |
-
) -> str:
|
| 59 |
-
request = InferenceRequest(
|
| 60 |
-
messages=[
|
| 61 |
-
{"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
|
| 62 |
-
{"role": "user", "content": prompt},
|
| 63 |
-
],
|
| 64 |
-
task_type=task_type,
|
| 65 |
-
max_new_tokens=max_new_tokens,
|
| 66 |
-
temperature=0.2,
|
| 67 |
-
top_p=0.9,
|
| 68 |
-
enable_thinking=enable_thinking,
|
| 69 |
-
)
|
| 70 |
-
return _get_inference_client().generate_from_messages(request)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def _log_rag_usage(
|
| 74 |
-
request: Request,
|
| 75 |
-
*,
|
| 76 |
-
event_type: str,
|
| 77 |
-
topic: str,
|
| 78 |
-
subject: str,
|
| 79 |
-
quarter: Optional[int],
|
| 80 |
-
chunks: List[Dict[str, Any]],
|
| 81 |
-
) -> None:
|
| 82 |
-
if firebase_firestore is None:
|
| 83 |
-
return
|
| 84 |
-
try:
|
| 85 |
-
user = getattr(request.state, "user", None)
|
| 86 |
-
uid = getattr(user, "uid", None)
|
| 87 |
-
domains = sorted({str(chunk.get("content_domain") or "").strip() for chunk in chunks if chunk.get("content_domain")})
|
| 88 |
-
top_score = max((float(chunk.get("score") or 0.0) for chunk in chunks), default=0.0)
|
| 89 |
-
payload = {
|
| 90 |
-
"userId": uid,
|
| 91 |
-
"type": event_type,
|
| 92 |
-
"topic": topic,
|
| 93 |
-
"subject": subject,
|
| 94 |
-
"quarter": quarter,
|
| 95 |
-
"retrievedChunks": len(chunks),
|
| 96 |
-
"topScore": top_score,
|
| 97 |
-
"curriculumDomainsHit": domains,
|
| 98 |
-
"timestamp": firebase_firestore.SERVER_TIMESTAMP,
|
| 99 |
-
"createdAtIso": datetime.now(timezone.utc).isoformat(),
|
| 100 |
-
}
|
| 101 |
-
firebase_firestore.client().collection("rag_usage").add(payload)
|
| 102 |
-
except Exception as exc:
|
| 103 |
-
logger.warning("rag_usage logging skipped: %s", exc)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
def _strip_thinking_and_parse(text: str) -> dict:
|
| 107 |
-
cleaned = text.strip()
|
| 108 |
-
cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
|
| 109 |
-
if "{" in cleaned and "}" in cleaned:
|
| 110 |
-
try:
|
| 111 |
-
start = cleaned.find("{")
|
| 112 |
-
end = cleaned.rfind("}") + 1
|
| 113 |
-
parsed = json.loads(cleaned[start:end])
|
| 114 |
-
if isinstance(parsed, dict):
|
| 115 |
-
return parsed
|
| 116 |
-
except Exception:
|
| 117 |
-
pass
|
| 118 |
-
return {"explanation": text}
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
class RagLessonRequest(BaseModel):
|
| 122 |
-
topic: str
|
| 123 |
-
subject: str
|
| 124 |
-
quarter: int
|
| 125 |
-
lessonTitle: Optional[str] = None
|
| 126 |
-
learningCompetency: Optional[str] = None
|
| 127 |
-
moduleUnit: Optional[str] = None
|
| 128 |
-
learnerLevel: Optional[str] = None
|
| 129 |
-
userId: Optional[str] = None
|
| 130 |
-
moduleId: Optional[str] = None
|
| 131 |
-
lessonId: Optional[str] = None
|
| 132 |
-
competencyCode: Optional[str] = None
|
| 133 |
-
storagePath: Optional[str] = None
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
class RagProblemRequest(BaseModel):
|
| 137 |
-
topic: str
|
| 138 |
-
subject: str
|
| 139 |
-
quarter: int
|
| 140 |
-
difficulty: str = Field(default="medium")
|
| 141 |
-
userId: Optional[str] = None
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
class RagAnalysisContextRequest(BaseModel):
|
| 145 |
-
weakTopics: List[str]
|
| 146 |
-
subject: str
|
| 147 |
-
userId: Optional[str] = None
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
@router.get("/health")
|
| 151 |
-
async def rag_health():
|
| 152 |
-
active_model = get_model_for_task("rag_lesson")
|
| 153 |
-
is_seq = is_sequential_model(active_model)
|
| 154 |
-
try:
|
| 155 |
-
health = get_vectorstore_health()
|
| 156 |
-
return {
|
| 157 |
-
"status": "ok",
|
| 158 |
-
"chunkCount": health["chunkCount"],
|
| 159 |
-
"subjects": health["subjects"],
|
| 160 |
-
"lastIngested": datetime.now(timezone.utc).isoformat(),
|
| 161 |
-
"activeModel": active_model,
|
| 162 |
-
"isSequentialModel": is_seq,
|
| 163 |
-
}
|
| 164 |
-
except Exception as exc:
|
| 165 |
-
return {
|
| 166 |
-
"status": "degraded",
|
| 167 |
-
"chunkCount": 0,
|
| 168 |
-
"subjects": {},
|
| 169 |
-
"lastIngested": None,
|
| 170 |
-
"activeModel": active_model,
|
| 171 |
-
"isSequentialModel": is_seq,
|
| 172 |
-
"warning": str(exc),
|
| 173 |
-
}
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
def _fetch_youtube_video(lesson_title: str, subject: str, competency: str, quarter: int) -> dict:
|
| 177 |
-
try:
|
| 178 |
-
from backend.services.youtube_service import get_video_for_lesson
|
| 179 |
-
except ImportError:
|
| 180 |
-
return {}
|
| 181 |
-
try:
|
| 182 |
-
video = get_video_for_lesson(lesson_title, subject, competency, quarter)
|
| 183 |
-
return video or {}
|
| 184 |
-
except Exception as e:
|
| 185 |
-
logger.warning("YouTube search failed: %s", e)
|
| 186 |
-
return {}
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
|
| 190 |
-
sections = lesson_data.get("sections", [])
|
| 191 |
-
section_types = {s.get("type") for s in sections}
|
| 192 |
-
required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
|
| 193 |
-
|
| 194 |
-
default_content = {
|
| 195 |
-
"introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}."},
|
| 196 |
-
"key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": "Below are the key concepts covered in this lesson.", "callouts": []},
|
| 197 |
-
"video": {"type": "video", "title": "Video Lesson", "content": "Watch this explanation to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
|
| 198 |
-
"worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": []},
|
| 199 |
-
"important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": []},
|
| 200 |
-
"try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": []},
|
| 201 |
-
"summary": {"type": "summary", "title": "Summary", "content": f"Great job completing the lesson on {lesson_title}!"},
|
| 202 |
-
}
|
| 203 |
-
|
| 204 |
-
filled = {}
|
| 205 |
-
for req_type in required:
|
| 206 |
-
for existing in sections:
|
| 207 |
-
if existing.get("type") == req_type:
|
| 208 |
-
filled[req_type] = existing
|
| 209 |
-
break
|
| 210 |
-
else:
|
| 211 |
-
filled[req_type] = default_content[req_type]
|
| 212 |
-
|
| 213 |
-
ordered = [filled[t] for t in required]
|
| 214 |
-
|
| 215 |
-
for i, section in enumerate(ordered):
|
| 216 |
-
s_type = section.get("type")
|
| 217 |
-
if s_type == "key_concepts" and not section.get("callouts"):
|
| 218 |
-
section["callouts"] = []
|
| 219 |
-
if s_type == "worked_examples" and not section.get("examples"):
|
| 220 |
-
section["examples"] = []
|
| 221 |
-
if s_type == "important_notes" and not section.get("bulletPoints"):
|
| 222 |
-
section["bulletPoints"] = []
|
| 223 |
-
if s_type == "try_it_yourself" and not section.get("practiceProblems"):
|
| 224 |
-
section["practiceProblems"] = []
|
| 225 |
-
ordered[i] = section
|
| 226 |
-
|
| 227 |
-
return {**lesson_data, "sections": ordered}
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
@router.post("/lesson")
|
| 231 |
-
async def rag_lesson(request: Request, payload: RagLessonRequest):
|
| 232 |
-
try:
|
| 233 |
-
chunks, retrieval_mode = retrieve_lesson_pdf_context(
|
| 234 |
-
query=build_lesson_query(
|
| 235 |
-
payload.topic,
|
| 236 |
-
payload.subject,
|
| 237 |
-
payload.quarter,
|
| 238 |
-
lesson_title=payload.lessonTitle,
|
| 239 |
-
competency=payload.learningCompetency,
|
| 240 |
-
module_unit=payload.moduleUnit,
|
| 241 |
-
learner_level=payload.learnerLevel,
|
| 242 |
-
),
|
| 243 |
-
subject=payload.subject,
|
| 244 |
-
quarter=payload.quarter,
|
| 245 |
-
lesson_title=payload.lessonTitle,
|
| 246 |
-
competency=payload.learningCompetency,
|
| 247 |
-
module_id=payload.moduleId,
|
| 248 |
-
lesson_id=payload.lessonId,
|
| 249 |
-
competency_code=payload.competencyCode,
|
| 250 |
-
storage_path=payload.storagePath,
|
| 251 |
-
top_k=8,
|
| 252 |
-
)
|
| 253 |
-
|
| 254 |
-
if not chunks:
|
| 255 |
-
raise HTTPException(
|
| 256 |
-
status_code=404,
|
| 257 |
-
detail={
|
| 258 |
-
"error": "no_curriculum_context",
|
| 259 |
-
"message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
|
| 260 |
-
"retrievalBand": "low",
|
| 261 |
-
"sources": [],
|
| 262 |
-
},
|
| 263 |
-
)
|
| 264 |
-
|
| 265 |
-
prompt = build_lesson_prompt(
|
| 266 |
-
lesson_title=payload.lessonTitle or payload.topic,
|
| 267 |
-
competency=payload.learningCompetency or payload.topic,
|
| 268 |
-
grade_level="Grade 11-12",
|
| 269 |
-
subject=payload.subject,
|
| 270 |
-
quarter=payload.quarter,
|
| 271 |
-
learner_level=payload.learnerLevel,
|
| 272 |
-
module_unit=payload.moduleUnit,
|
| 273 |
-
curriculum_chunks=chunks,
|
| 274 |
-
competency_code=payload.competencyCode,
|
| 275 |
-
)
|
| 276 |
-
|
| 277 |
-
raw_explanation = await _generate_text(
|
| 278 |
-
prompt,
|
| 279 |
-
task_type="lesson_generation",
|
| 280 |
-
max_new_tokens=1800,
|
| 281 |
-
enable_thinking=True,
|
| 282 |
-
)
|
| 283 |
-
|
| 284 |
-
parsed_lesson = _strip_thinking_and_parse(raw_explanation)
|
| 285 |
-
parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
|
| 286 |
-
|
| 287 |
-
if parsed_lesson.get("sections"):
|
| 288 |
-
video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
|
| 289 |
-
if video_section:
|
| 290 |
-
video_data = _fetch_youtube_video(
|
| 291 |
-
payload.lessonTitle or payload.topic,
|
| 292 |
-
payload.subject,
|
| 293 |
-
payload.learningCompetency or "",
|
| 294 |
-
payload.quarter,
|
| 295 |
-
)
|
| 296 |
-
if video_data:
|
| 297 |
-
video_section["videoId"] = video_data.get("videoId", "")
|
| 298 |
-
video_section["videoTitle"] = video_data.get("videoTitle", "")
|
| 299 |
-
video_section["videoChannel"] = video_data.get("videoChannel", "")
|
| 300 |
-
video_section["embedUrl"] = video_data.get("embedUrl", "")
|
| 301 |
-
video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
|
| 302 |
-
|
| 303 |
-
retrieval_summary = summarize_retrieval_confidence(chunks)
|
| 304 |
-
|
| 305 |
-
_log_rag_usage(
|
| 306 |
-
request,
|
| 307 |
-
event_type="lesson",
|
| 308 |
-
topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
|
| 309 |
-
subject=payload.subject,
|
| 310 |
-
quarter=payload.quarter,
|
| 311 |
-
chunks=chunks,
|
| 312 |
-
)
|
| 313 |
-
|
| 314 |
-
needs_review = parsed_lesson.get("needsReview", False)
|
| 315 |
-
if retrieval_summary.get("band") == "low":
|
| 316 |
-
needs_review = True
|
| 317 |
-
|
| 318 |
-
return {
|
| 319 |
-
**parsed_lesson,
|
| 320 |
-
"retrievalConfidence": retrieval_summary.get("confidence", 0.0),
|
| 321 |
-
"retrievalBand": retrieval_summary.get("band", "low"),
|
| 322 |
-
"retrievalMode": retrieval_mode,
|
| 323 |
-
"needsReview": needs_review,
|
| 324 |
-
"sources": [
|
| 325 |
-
{
|
| 326 |
-
"subject": row.get("subject"),
|
| 327 |
-
"quarter": row.get("quarter"),
|
| 328 |
-
"source_file": row.get("source_file"),
|
| 329 |
-
"storage_path": row.get("storage_path"),
|
| 330 |
-
"page": row.get("page"),
|
| 331 |
-
"score": row.get("score"),
|
| 332 |
-
"content_domain": row.get("content_domain"),
|
| 333 |
-
"chunk_type": row.get("chunk_type"),
|
| 334 |
-
"content": row.get("content"),
|
| 335 |
-
}
|
| 336 |
-
for row in chunks
|
| 337 |
-
],
|
| 338 |
-
"activeModel": get_model_for_task("rag_lesson"),
|
| 339 |
-
}
|
| 340 |
-
except Exception as exc:
|
| 341 |
-
import traceback
|
| 342 |
-
logger.error(f"RAG lesson error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
|
| 343 |
-
raise HTTPException(
|
| 344 |
-
status_code=500,
|
| 345 |
-
detail={
|
| 346 |
-
"error": type(exc).__name__,
|
| 347 |
-
"message": str(exc),
|
| 348 |
-
"traceback": traceback.format_exc(),
|
| 349 |
-
},
|
| 350 |
-
)
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
@router.post("/generate-problem")
|
| 354 |
-
async def rag_generate_problem(request: Request, payload: RagProblemRequest):
|
| 355 |
-
chunks = retrieve_curriculum_context(
|
| 356 |
-
query=payload.topic,
|
| 357 |
-
subject=payload.subject,
|
| 358 |
-
quarter=payload.quarter,
|
| 359 |
-
top_k=5,
|
| 360 |
-
)
|
| 361 |
-
prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
|
| 362 |
-
raw = await _generate_text(
|
| 363 |
-
prompt,
|
| 364 |
-
task_type="quiz_generation",
|
| 365 |
-
max_new_tokens=600,
|
| 366 |
-
enable_thinking=False,
|
| 367 |
-
)
|
| 368 |
-
|
| 369 |
-
parsed = _strip_thinking_and_parse(raw)
|
| 370 |
-
|
| 371 |
-
problem = str(parsed.get("problem") or raw)
|
| 372 |
-
if not problem or problem.startswith("{"):
|
| 373 |
-
problem = str(parsed.get("content") or str(parsed))
|
| 374 |
-
if len(problem) < 3 or problem.startswith("{"):
|
| 375 |
-
problem = raw
|
| 376 |
-
solution = str(parsed.get("solution") or "")
|
| 377 |
-
competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
|
| 378 |
-
|
| 379 |
-
_log_rag_usage(
|
| 380 |
-
request,
|
| 381 |
-
event_type="problem_generation",
|
| 382 |
-
topic=payload.topic,
|
| 383 |
-
subject=payload.subject,
|
| 384 |
-
quarter=payload.quarter,
|
| 385 |
-
chunks=chunks,
|
| 386 |
-
)
|
| 387 |
-
|
| 388 |
-
return {
|
| 389 |
-
"problem": problem,
|
| 390 |
-
"solution": solution,
|
| 391 |
-
"competencyReference": competency_ref,
|
| 392 |
-
"sources": [
|
| 393 |
-
{
|
| 394 |
-
"subject": row.get("subject"),
|
| 395 |
-
"quarter": row.get("quarter"),
|
| 396 |
-
"source_file": row.get("source_file"),
|
| 397 |
-
"page": row.get("page"),
|
| 398 |
-
"score": row.get("score"),
|
| 399 |
-
}
|
| 400 |
-
for row in chunks
|
| 401 |
-
],
|
| 402 |
-
}
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
@router.post("/analysis-context")
|
| 406 |
-
async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequest):
|
| 407 |
-
if not payload.weakTopics:
|
| 408 |
-
raise HTTPException(status_code=400, detail="weakTopics must be a non-empty list")
|
| 409 |
-
|
| 410 |
-
chunks = build_analysis_curriculum_context(payload.weakTopics, payload.subject)
|
| 411 |
-
lines = ["LEARNING COMPETENCIES:"]
|
| 412 |
-
for index, row in enumerate(chunks, start=1):
|
| 413 |
-
lines.append(
|
| 414 |
-
f"{index}. {row.get('content')} (Source: {row.get('source_file')} p.{row.get('page')}, "
|
| 415 |
-
f"Q{row.get('quarter')}, {row.get('content_domain')})"
|
| 416 |
-
)
|
| 417 |
-
|
| 418 |
-
_log_rag_usage(
|
| 419 |
-
request,
|
| 420 |
-
event_type="analysis_context",
|
| 421 |
-
topic=", ".join(payload.weakTopics),
|
| 422 |
-
subject=payload.subject,
|
| 423 |
-
quarter=None,
|
| 424 |
-
chunks=chunks,
|
| 425 |
-
)
|
| 426 |
-
|
| 427 |
-
return {"curriculumContext": "\n".join(lines)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
|
@@ -1000,6 +1000,8 @@ class RequestMiddleware(BaseHTTPMiddleware):
|
|
| 1000 |
status_code=500,
|
| 1001 |
content={
|
| 1002 |
"detail": "Internal server error",
|
|
|
|
|
|
|
| 1003 |
"requestId": request_id,
|
| 1004 |
},
|
| 1005 |
headers={"X-Request-ID": request_id},
|
|
|
|
| 1000 |
status_code=500,
|
| 1001 |
content={
|
| 1002 |
"detail": "Internal server error",
|
| 1003 |
+
"error": type(exc).__name__,
|
| 1004 |
+
"message": str(exc),
|
| 1005 |
"requestId": request_id,
|
| 1006 |
},
|
| 1007 |
headers={"X-Request-ID": request_id},
|
rag/curriculum_rag.py
CHANGED
|
@@ -57,7 +57,7 @@ def retrieve_curriculum_context(
|
|
| 57 |
storage_path: str | None = None,
|
| 58 |
top_k: int = 8,
|
| 59 |
) -> list[dict]:
|
| 60 |
-
from
|
| 61 |
|
| 62 |
_, collection, embedder = get_vectorstore_components()
|
| 63 |
where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
|
|
@@ -195,12 +195,12 @@ def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
|
|
| 195 |
|
| 196 |
def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
|
| 197 |
if not curriculum_chunks:
|
| 198 |
-
return {"confidence": 0.0, "band": "low"}
|
| 199 |
|
| 200 |
top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
|
| 201 |
score = sum(top_scores) / max(1, len(top_scores))
|
| 202 |
band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
|
| 203 |
-
return {"confidence": round(score, 3), "band": band}
|
| 204 |
|
| 205 |
|
| 206 |
def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
|
|
|
|
| 57 |
storage_path: str | None = None,
|
| 58 |
top_k: int = 8,
|
| 59 |
) -> list[dict]:
|
| 60 |
+
from rag.vectorstore_loader import get_vectorstore_components
|
| 61 |
|
| 62 |
_, collection, embedder = get_vectorstore_components()
|
| 63 |
where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
|
|
|
|
| 195 |
|
| 196 |
def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
|
| 197 |
if not curriculum_chunks:
|
| 198 |
+
return {"confidence": 0.0, "band": "low", "chunkCount": 0}
|
| 199 |
|
| 200 |
top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
|
| 201 |
score = sum(top_scores) / max(1, len(top_scores))
|
| 202 |
band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
|
| 203 |
+
return {"confidence": round(score, 3), "band": band, "chunkCount": len(curriculum_chunks)}
|
| 204 |
|
| 205 |
|
| 206 |
def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
|
routes/rag_routes.py
CHANGED
|
@@ -229,26 +229,39 @@ def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
|
|
| 229 |
|
| 230 |
@router.post("/lesson")
|
| 231 |
async def rag_lesson(request: Request, payload: RagLessonRequest):
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
lesson_title=payload.lessonTitle,
|
| 238 |
competency=payload.learningCompetency,
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
if not chunks:
|
| 254 |
raise HTTPException(
|
|
@@ -261,54 +274,98 @@ async def rag_lesson(request: Request, payload: RagLessonRequest):
|
|
| 261 |
},
|
| 262 |
)
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
|
|
|
| 286 |
if parsed_lesson.get("sections"):
|
| 287 |
video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
|
| 288 |
if video_section:
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
retrieval_summary = summarize_retrieval_confidence(chunks)
|
| 303 |
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
needs_review = parsed_lesson.get("needsReview", False)
|
| 314 |
if retrieval_summary.get("band") == "low":
|
|
|
|
| 229 |
|
| 230 |
@router.post("/lesson")
|
| 231 |
async def rag_lesson(request: Request, payload: RagLessonRequest):
|
| 232 |
+
# ── Step 1: Retrieve curriculum chunks ───────────────────────────────────
|
| 233 |
+
try:
|
| 234 |
+
chunks, retrieval_mode = retrieve_lesson_pdf_context(
|
| 235 |
+
topic=build_lesson_query(
|
| 236 |
+
payload.topic,
|
| 237 |
+
payload.subject,
|
| 238 |
+
payload.quarter,
|
| 239 |
+
lesson_title=payload.lessonTitle,
|
| 240 |
+
competency=payload.learningCompetency,
|
| 241 |
+
module_unit=payload.moduleUnit,
|
| 242 |
+
learner_level=payload.learnerLevel,
|
| 243 |
+
),
|
| 244 |
+
subject=payload.subject,
|
| 245 |
+
quarter=payload.quarter,
|
| 246 |
lesson_title=payload.lessonTitle,
|
| 247 |
competency=payload.learningCompetency,
|
| 248 |
+
module_id=payload.moduleId,
|
| 249 |
+
lesson_id=payload.lessonId,
|
| 250 |
+
competency_code=payload.competencyCode,
|
| 251 |
+
storage_path=payload.storagePath,
|
| 252 |
+
top_k=8,
|
| 253 |
+
)
|
| 254 |
+
except Exception as exc:
|
| 255 |
+
import traceback
|
| 256 |
+
logger.error(f"RAG retrieval error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
|
| 257 |
+
raise HTTPException(
|
| 258 |
+
status_code=503,
|
| 259 |
+
detail={
|
| 260 |
+
"error": "retrieval_failed",
|
| 261 |
+
"message": f"Curriculum retrieval failed: {exc}",
|
| 262 |
+
"type": type(exc).__name__,
|
| 263 |
+
},
|
| 264 |
+
)
|
| 265 |
|
| 266 |
if not chunks:
|
| 267 |
raise HTTPException(
|
|
|
|
| 274 |
},
|
| 275 |
)
|
| 276 |
|
| 277 |
+
# ── Step 2: Build prompt ─────────────────────────────────────────────────
|
| 278 |
+
try:
|
| 279 |
+
prompt = build_lesson_prompt(
|
| 280 |
+
lesson_title=payload.lessonTitle or payload.topic,
|
| 281 |
+
competency=payload.learningCompetency or payload.topic,
|
| 282 |
+
grade_level="Grade 11-12",
|
| 283 |
+
subject=payload.subject,
|
| 284 |
+
quarter=payload.quarter,
|
| 285 |
+
learner_level=payload.learnerLevel,
|
| 286 |
+
module_unit=payload.moduleUnit,
|
| 287 |
+
curriculum_chunks=chunks,
|
| 288 |
+
competency_code=payload.competencyCode,
|
| 289 |
+
)
|
| 290 |
+
except Exception as exc:
|
| 291 |
+
logger.error(f"RAG prompt build error: {type(exc).__name__}: {exc}")
|
| 292 |
+
raise HTTPException(
|
| 293 |
+
status_code=500,
|
| 294 |
+
detail={
|
| 295 |
+
"error": "prompt_build_failed",
|
| 296 |
+
"message": f"Failed to build lesson prompt: {exc}",
|
| 297 |
+
"type": type(exc).__name__,
|
| 298 |
+
},
|
| 299 |
+
)
|
| 300 |
|
| 301 |
+
# ── Step 3: AI inference ─────────────────────────────────────────────────
|
| 302 |
+
try:
|
| 303 |
+
raw_explanation = await _generate_text(
|
| 304 |
+
prompt,
|
| 305 |
+
task_type="rag_lesson",
|
| 306 |
+
max_new_tokens=1800,
|
| 307 |
+
enable_thinking=True,
|
| 308 |
+
)
|
| 309 |
+
except Exception as exc:
|
| 310 |
+
logger.error(f"RAG inference error: {type(exc).__name__}: {exc}")
|
| 311 |
+
raise HTTPException(
|
| 312 |
+
status_code=502,
|
| 313 |
+
detail={
|
| 314 |
+
"error": "inference_failed",
|
| 315 |
+
"message": f"AI model call failed: {exc}",
|
| 316 |
+
"type": type(exc).__name__,
|
| 317 |
+
},
|
| 318 |
+
)
|
| 319 |
|
| 320 |
+
# ── Step 4: Parse & validate response ────────────────────────────────────
|
| 321 |
+
try:
|
| 322 |
+
parsed_lesson = _strip_thinking_and_parse(raw_explanation)
|
| 323 |
+
parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
|
| 324 |
+
except Exception as exc:
|
| 325 |
+
logger.error(f"RAG parse error: {type(exc).__name__}: {exc}")
|
| 326 |
+
raise HTTPException(
|
| 327 |
+
status_code=500,
|
| 328 |
+
detail={
|
| 329 |
+
"error": "parse_failed",
|
| 330 |
+
"message": f"Failed to parse AI response: {exc}",
|
| 331 |
+
"type": type(exc).__name__,
|
| 332 |
+
},
|
| 333 |
+
)
|
| 334 |
|
| 335 |
+
# ── Step 5: Enrich with video ────────────────────────────────────────────
|
| 336 |
if parsed_lesson.get("sections"):
|
| 337 |
video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
|
| 338 |
if video_section:
|
| 339 |
+
try:
|
| 340 |
+
video_data = _fetch_youtube_video(
|
| 341 |
+
payload.lessonTitle or payload.topic,
|
| 342 |
+
payload.subject,
|
| 343 |
+
payload.learningCompetency or "",
|
| 344 |
+
payload.quarter,
|
| 345 |
+
)
|
| 346 |
+
if video_data:
|
| 347 |
+
video_section["videoId"] = video_data.get("videoId", "")
|
| 348 |
+
video_section["videoTitle"] = video_data.get("videoTitle", "")
|
| 349 |
+
video_section["videoChannel"] = video_data.get("videoChannel", "")
|
| 350 |
+
video_section["embedUrl"] = video_data.get("embedUrl", "")
|
| 351 |
+
video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
|
| 352 |
+
except Exception as exc:
|
| 353 |
+
logger.warning("YouTube enrichment skipped: %s", exc)
|
| 354 |
+
|
| 355 |
+
# ── Step 6: Assemble response ────────────────────────────────────────────
|
| 356 |
retrieval_summary = summarize_retrieval_confidence(chunks)
|
| 357 |
|
| 358 |
+
try:
|
| 359 |
+
_log_rag_usage(
|
| 360 |
+
request,
|
| 361 |
+
event_type="lesson",
|
| 362 |
+
topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
|
| 363 |
+
subject=payload.subject,
|
| 364 |
+
quarter=payload.quarter,
|
| 365 |
+
chunks=chunks,
|
| 366 |
+
)
|
| 367 |
+
except Exception as exc:
|
| 368 |
+
logger.warning("RAG usage logging skipped: %s", exc)
|
| 369 |
|
| 370 |
needs_review = parsed_lesson.get("needsReview", False)
|
| 371 |
if retrieval_summary.get("band") == "low":
|
test_full_rag.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
sys.path.insert(0, 'backend')
|
| 4 |
+
|
| 5 |
+
# Set required env vars
|
| 6 |
+
os.environ['DEEPSEEK_API_KEY'] = os.getenv('DEEPSEEK_API_KEY', '')
|
| 7 |
+
os.environ['DEEPSEEK_BASE_URL'] = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com')
|
| 8 |
+
|
| 9 |
+
from rag.curriculum_rag import retrieve_lesson_pdf_context, build_lesson_prompt
|
| 10 |
+
from services.inference_client import InferenceClient, InferenceRequest
|
| 11 |
+
|
| 12 |
+
# Test retrieval
|
| 13 |
+
print("Testing retrieval...")
|
| 14 |
+
try:
|
| 15 |
+
chunks, mode = retrieve_lesson_pdf_context(
|
| 16 |
+
topic="Represent real-life relationships as functions and interpret domain/range.",
|
| 17 |
+
subject="General Mathematics",
|
| 18 |
+
quarter=2,
|
| 19 |
+
lesson_title="Represent real-life relationships as functions and interpret domain/range.",
|
| 20 |
+
module_id="gen-math",
|
| 21 |
+
lesson_id="gm-q2-functions-graphs-l1",
|
| 22 |
+
competency_code="GM11-FG-1",
|
| 23 |
+
top_k=8,
|
| 24 |
+
)
|
| 25 |
+
print(f"Retrieved {len(chunks)} chunks, mode={mode}")
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"Retrieval ERROR: {type(e).__name__}: {e}")
|
| 28 |
+
import traceback
|
| 29 |
+
traceback.print_exc()
|
| 30 |
+
sys.exit(1)
|
| 31 |
+
|
| 32 |
+
# Test prompt building
|
| 33 |
+
print("\nTesting prompt building...")
|
| 34 |
+
try:
|
| 35 |
+
prompt = build_lesson_prompt(
|
| 36 |
+
lesson_title="Represent real-life relationships as functions and interpret domain/range.",
|
| 37 |
+
competency="Represent real-life relationships as functions and interpret domain/range.",
|
| 38 |
+
grade_level="Grade 11-12",
|
| 39 |
+
subject="General Mathematics",
|
| 40 |
+
quarter=2,
|
| 41 |
+
learner_level="Grade 11-12",
|
| 42 |
+
module_unit="n/a",
|
| 43 |
+
curriculum_chunks=chunks,
|
| 44 |
+
competency_code="GM11-FG-1",
|
| 45 |
+
)
|
| 46 |
+
print(f"Prompt length: {len(prompt)} chars")
|
| 47 |
+
print(f"Prompt preview: {prompt[:200]}...")
|
| 48 |
+
except Exception as e:
|
| 49 |
+
print(f"Prompt building ERROR: {type(e).__name__}: {e}")
|
| 50 |
+
import traceback
|
| 51 |
+
traceback.print_exc()
|
| 52 |
+
sys.exit(1)
|
| 53 |
+
|
| 54 |
+
# Test inference (optional - might cost money)
|
| 55 |
+
print("\nTesting inference...")
|
| 56 |
+
try:
|
| 57 |
+
client = InferenceClient()
|
| 58 |
+
req = InferenceRequest(
|
| 59 |
+
messages=[
|
| 60 |
+
{"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
|
| 61 |
+
{"role": "user", "content": prompt},
|
| 62 |
+
],
|
| 63 |
+
task_type="lesson_generation",
|
| 64 |
+
max_new_tokens=100, # Small for testing
|
| 65 |
+
temperature=0.2,
|
| 66 |
+
top_p=0.9,
|
| 67 |
+
enable_thinking=True,
|
| 68 |
+
)
|
| 69 |
+
result = client.generate_from_messages(req)
|
| 70 |
+
print(f"Inference result: {result[:200]}...")
|
| 71 |
+
print("SUCCESS!")
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"Inference ERROR: {type(e).__name__}: {e}")
|
| 74 |
+
import traceback
|
| 75 |
+
traceback.print_exc()
|
test_retrieval.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
sys.path.insert(0, '.')
|
| 3 |
+
|
| 4 |
+
from rag.curriculum_rag import retrieve_lesson_pdf_context, retrieve_curriculum_context
|
| 5 |
+
|
| 6 |
+
# Test retrieval with the same params as the frontend
|
| 7 |
+
try:
|
| 8 |
+
chunks, mode = retrieve_lesson_pdf_context(
|
| 9 |
+
topic="Represent real-life relationships as functions and interpret domain/range.",
|
| 10 |
+
subject="General Mathematics",
|
| 11 |
+
quarter=2,
|
| 12 |
+
lesson_title="Represent real-life relationships as functions and interpret domain/range.",
|
| 13 |
+
module_id="gen-math",
|
| 14 |
+
lesson_id="gm-q2-functions-graphs-l1",
|
| 15 |
+
competency_code="GM11-FG-1",
|
| 16 |
+
top_k=8,
|
| 17 |
+
)
|
| 18 |
+
print(f"Retrieved {len(chunks)} chunks, mode={mode}")
|
| 19 |
+
for i, chunk in enumerate(chunks[:3]):
|
| 20 |
+
print(f" Chunk {i}: score={chunk.get('score')}, domain={chunk.get('content_domain')}, source={chunk.get('source_file')}")
|
| 21 |
+
print(f" Content: {chunk.get('content', '')[:100]}...")
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"ERROR: {type(e).__name__}: {e}")
|
| 24 |
+
import traceback
|
| 25 |
+
traceback.print_exc()
|
| 26 |
+
|
| 27 |
+
# Also test without module/lesson filters
|
| 28 |
+
try:
|
| 29 |
+
chunks2 = retrieve_curriculum_context(
|
| 30 |
+
query="Represent real-life relationships as functions and interpret domain/range.",
|
| 31 |
+
subject="General Mathematics",
|
| 32 |
+
quarter=2,
|
| 33 |
+
top_k=8,
|
| 34 |
+
)
|
| 35 |
+
print(f"\nGeneral retrieval: {len(chunks2)} chunks")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
print(f"\nGeneral ERROR: {type(e).__name__}: {e}")
|
| 38 |
+
import traceback
|
| 39 |
+
traceback.print_exc()
|
tests/test_rag_pipeline.py
CHANGED
|
@@ -23,13 +23,18 @@ def _mock_vectorstore_components(collection_mock, embedder_mock):
|
|
| 23 |
class TestRetrieveCurriculumContext:
|
| 24 |
def test_empty_collection_returns_empty_list(self):
|
| 25 |
collection = MagicMock()
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
embedder = MagicMock()
|
|
|
|
|
|
|
| 30 |
|
| 31 |
with patch(
|
| 32 |
-
"rag.
|
| 33 |
return_value=(MagicMock(), collection, embedder),
|
| 34 |
):
|
| 35 |
result = retrieve_curriculum_context(
|
|
@@ -73,14 +78,12 @@ class TestBuildLessonPrompt:
|
|
| 73 |
],
|
| 74 |
)
|
| 75 |
assert "JSON" in prompt
|
| 76 |
-
assert "
|
| 77 |
assert "needsReview" in prompt
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
]
|
| 81 |
-
assert any(term in prompt for term in ph_context_terms)
|
| 82 |
|
| 83 |
-
def
|
| 84 |
prompt = build_lesson_prompt(
|
| 85 |
lesson_title="Functions",
|
| 86 |
competency="M11GM-Ia-1",
|
|
@@ -91,7 +94,10 @@ class TestBuildLessonPrompt:
|
|
| 91 |
module_unit=None,
|
| 92 |
curriculum_chunks=[],
|
| 93 |
)
|
| 94 |
-
assert "
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
class TestSummarizeRetrievalConfidence:
|
|
|
|
| 23 |
class TestRetrieveCurriculumContext:
|
| 24 |
def test_empty_collection_returns_empty_list(self):
|
| 25 |
collection = MagicMock()
|
| 26 |
+
collection.query.return_value = {
|
| 27 |
+
"documents": [[]],
|
| 28 |
+
"metadatas": [[]],
|
| 29 |
+
"distances": [[]],
|
| 30 |
+
}
|
| 31 |
|
| 32 |
embedder = MagicMock()
|
| 33 |
+
embedder.encode.return_value = MagicMock()
|
| 34 |
+
embedder.encode.return_value.tolist.return_value = [0.0] * 768
|
| 35 |
|
| 36 |
with patch(
|
| 37 |
+
"rag.vectorstore_loader.get_vectorstore_components",
|
| 38 |
return_value=(MagicMock(), collection, embedder),
|
| 39 |
):
|
| 40 |
result = retrieve_curriculum_context(
|
|
|
|
| 78 |
],
|
| 79 |
)
|
| 80 |
assert "JSON" in prompt
|
| 81 |
+
assert "Lesson title:" in prompt
|
| 82 |
assert "needsReview" in prompt
|
| 83 |
+
assert "DepEd-aligned" in prompt
|
| 84 |
+
assert "7 sections" in prompt
|
|
|
|
|
|
|
| 85 |
|
| 86 |
+
def test_contains_required_sections_in_prompt(self):
|
| 87 |
prompt = build_lesson_prompt(
|
| 88 |
lesson_title="Functions",
|
| 89 |
competency="M11GM-Ia-1",
|
|
|
|
| 94 |
module_unit=None,
|
| 95 |
curriculum_chunks=[],
|
| 96 |
)
|
| 97 |
+
assert "introduction" in prompt
|
| 98 |
+
assert "key_concepts" in prompt
|
| 99 |
+
assert "worked_examples" in prompt
|
| 100 |
+
assert "try_it_yourself" in prompt
|
| 101 |
|
| 102 |
|
| 103 |
class TestSummarizeRetrievalConfidence:
|