github-actions[bot] commited on
Commit
f717a11
·
1 Parent(s): ac19778

🚀 Auto-deploy backend from GitHub (46778ac)

Browse files
backend/main.py DELETED
The diff for this file is too large to render. See raw diff
 
backend/rag/curriculum_rag.py DELETED
@@ -1,318 +0,0 @@
1
- """
2
- Updated curriculum RAG with exact match retrieval and 7-section notebook output.
3
- """
4
-
5
- from __future__ import annotations
6
-
7
- from typing import Dict, List, Optional, Tuple
8
-
9
-
10
- def _to_where(
11
- subject: Optional[str] = None,
12
- quarter: Optional[int] = None,
13
- content_domain: Optional[str] = None,
14
- chunk_type: Optional[str] = None,
15
- module_id: Optional[str] = None,
16
- lesson_id: Optional[str] = None,
17
- competency_code: Optional[str] = None,
18
- storage_path: Optional[str] = None,
19
- ) -> Optional[Dict[str, object]]:
20
- clauses = []
21
- if subject:
22
- clauses.append({"subject": {"$eq": subject}})
23
- if quarter is not None:
24
- clauses.append({"quarter": {"$eq": int(quarter)}})
25
- if content_domain:
26
- clauses.append({"content_domain": {"$eq": content_domain}})
27
- if chunk_type:
28
- clauses.append({"chunk_type": {"$eq": chunk_type}})
29
- if module_id:
30
- clauses.append({"module_id": {"$eq": module_id}})
31
- if lesson_id:
32
- clauses.append({"lesson_id": {"$eq": lesson_id}})
33
- if competency_code:
34
- clauses.append({"competency_code": {"$eq": competency_code}})
35
- if storage_path:
36
- clauses.append({"storage_path": {"$eq": storage_path}})
37
- if not clauses:
38
- return None
39
- if len(clauses) == 1:
40
- return clauses[0]
41
- return {"$and": clauses}
42
-
43
-
44
- def _distance_to_score(distance: float) -> float:
45
- return round(1.0 / (1.0 + max(distance, 0.0)), 4)
46
-
47
-
48
- def retrieve_curriculum_context(
49
- query: str,
50
- subject: str | None = None,
51
- quarter: int | None = None,
52
- content_domain: str | None = None,
53
- chunk_type: str | None = None,
54
- module_id: str | None = None,
55
- lesson_id: str | None = None,
56
- competency_code: str | None = None,
57
- storage_path: str | None = None,
58
- top_k: int = 8,
59
- ) -> list[dict]:
60
- from rag.vectorstore_loader import get_vectorstore_components
61
-
62
- _, collection, embedder = get_vectorstore_components()
63
- where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
64
-
65
- prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
66
- query_embedding = embedder.encode(
67
- prefixed_query,
68
- normalize_embeddings=True,
69
- ).tolist()
70
-
71
- result = collection.query(
72
- query_embeddings=[query_embedding],
73
- n_results=max(1, top_k),
74
- where=where,
75
- include=["documents", "metadatas", "distances"],
76
- )
77
-
78
- documents = (result.get("documents") or [[]])[0]
79
- metadatas = (result.get("metadatas") or [[]])[0]
80
- distances = (result.get("distances") or [[]])[0]
81
-
82
- rows: List[dict] = []
83
- for idx, content in enumerate(documents):
84
- md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
85
- distance = float(distances[idx]) if idx < len(distances) else 1.0
86
- rows.append({
87
- "content": str(content or ""),
88
- "subject": str(md.get("subject") or "unknown"),
89
- "quarter": int(md.get("quarter") or 0),
90
- "content_domain": str(md.get("content_domain") or "general"),
91
- "chunk_type": str(md.get("chunk_type") or "concept"),
92
- "source_file": str(md.get("source_file") or ""),
93
- "storage_path": str(md.get("storage_path") or ""),
94
- "module_id": str(md.get("module_id") or ""),
95
- "lesson_id": str(md.get("lesson_id") or ""),
96
- "competency_code": str(md.get("competency_code") or ""),
97
- "page": int(md.get("page") or 0),
98
- "score": _distance_to_score(distance),
99
- })
100
- return rows
101
-
102
-
103
- def build_exact_lesson_query(
104
- topic: str,
105
- subject: str,
106
- quarter: int,
107
- lesson_title: str | None = None,
108
- competency: str | None = None,
109
- module_unit: str | None = None,
110
- learner_level: str | None = None,
111
- competency_code: str | None = None,
112
- ) -> str:
113
- parts = [topic, subject, f"Quarter {quarter}"]
114
- for value in (lesson_title, competency, module_unit, learner_level, competency_code):
115
- clean = str(value or "").strip()
116
- if clean:
117
- parts.append(clean)
118
- return " | ".join(parts)
119
-
120
-
121
- def build_lesson_query(
122
- topic: str,
123
- subject: str,
124
- quarter: int,
125
- *,
126
- lesson_title: Optional[str] = None,
127
- competency: Optional[str] = None,
128
- module_unit: Optional[str] = None,
129
- learner_level: Optional[str] = None,
130
- ) -> str:
131
- parts = [topic, subject, f"Quarter {quarter}"]
132
- for value in (lesson_title, competency, module_unit, learner_level):
133
- clean_value = str(value or "").strip()
134
- if clean_value:
135
- parts.append(clean_value)
136
- return " | ".join(parts)
137
-
138
-
139
- def retrieve_lesson_pdf_context(
140
- topic: str,
141
- subject: str,
142
- quarter: int,
143
- lesson_title: str | None = None,
144
- competency: str | None = None,
145
- module_id: str | None = None,
146
- lesson_id: str | None = None,
147
- competency_code: str | None = None,
148
- storage_path: str | None = None,
149
- top_k: int = 8,
150
- ) -> Tuple[list[dict], str]:
151
- """Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query."""
152
- if storage_path:
153
- exact_chunks = retrieve_curriculum_context(
154
- query=topic,
155
- subject=subject,
156
- quarter=quarter,
157
- storage_path=storage_path,
158
- top_k=top_k,
159
- )
160
- if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
161
- return exact_chunks, "exact"
162
-
163
- general_chunks = retrieve_curriculum_context(
164
- query=topic,
165
- subject=subject,
166
- quarter=quarter,
167
- top_k=top_k,
168
- )
169
-
170
- if storage_path and exact_chunks:
171
- all_chunks = exact_chunks + general_chunks
172
- seen = set()
173
- deduped = []
174
- for c in all_chunks:
175
- key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
176
- if key not in seen:
177
- seen.add(key)
178
- deduped.append(c)
179
- deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
180
- return deduped[:top_k], "hybrid"
181
-
182
- return general_chunks, "general"
183
-
184
-
185
- def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
186
- refs = []
187
- for i, chunk in enumerate(curriculum_chunks, start=1):
188
- refs.append(
189
- f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
190
- f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
191
- f" Excerpt: {chunk.get('content', '')}"
192
- )
193
- return "\n".join(refs) if refs else "No curriculum context retrieved."
194
-
195
-
196
- def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
197
- if not curriculum_chunks:
198
- return {"confidence": 0.0, "band": "low"}
199
-
200
- top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
201
- score = sum(top_scores) / max(1, len(top_scores))
202
- band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
203
- return {"confidence": round(score, 3), "band": band}
204
-
205
-
206
- def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
207
- """Organize retrieved chunks into lesson section categories."""
208
- sections: Dict[str, List[dict]] = {
209
- "introduction": [],
210
- "key_concepts": [],
211
- "worked_examples": [],
212
- "important_notes": [],
213
- "practice": [],
214
- "summary": [],
215
- "assessment": [],
216
- "general": [],
217
- }
218
- domain_priority = {
219
- "introduction": 1, "key_concepts": 2, "worked_examples": 3,
220
- "important_notes": 4, "practice": 5, "summary": 6,
221
- "assessment": 7, "general": 8,
222
- }
223
- for chunk in chunks:
224
- domain = chunk.get("content_domain", "general")
225
- if domain in sections:
226
- sections[domain].append(chunk)
227
- else:
228
- sections["general"].append(chunk)
229
- return sections
230
-
231
-
232
- def build_lesson_prompt(
233
- *,
234
- lesson_title: str,
235
- competency: str,
236
- grade_level: str,
237
- subject: str,
238
- quarter: int,
239
- learner_level: Optional[str],
240
- module_unit: Optional[str],
241
- curriculum_chunks: list[dict],
242
- competency_code: Optional[str] = None,
243
- ) -> str:
244
- refs_text = format_retrieved_chunks(curriculum_chunks)
245
- organized = organize_chunks_by_section(curriculum_chunks)
246
-
247
- return (
248
- "You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
249
- "Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
250
- "Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
251
- f"Lesson title: {lesson_title}\n"
252
- f"Competency code: {competency_code or 'n/a'}\n"
253
- f"Curriculum competency: {competency}\n"
254
- f"Grade level: {grade_level}\n"
255
- f"Subject: {subject}\n"
256
- f"Quarter: Q{quarter}\n"
257
- f"Learner level: {learner_level or 'Grade 11-12'}\n"
258
- f"Module/unit: {module_unit or 'n/a'}\n\n"
259
- "[CURRICULUM CONTEXT]\n"
260
- f"{refs_text}\n\n"
261
- "Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
262
- "{\n"
263
- ' "sections": [\n'
264
- ' {"type": "introduction", "title": "Introduction", "content": "..."},\n'
265
- ' {"type": "key_concepts", "title": "Key Concepts", "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
266
- ' {"type": "video", "title": "Video Lesson", "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
267
- ' {"type": "worked_examples", "title": "Worked Examples", "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
268
- ' {"type": "important_notes", "title": "Important Notes", "bulletPoints": ["...","..."]},\n'
269
- ' {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": [{"question":"...","solution":"..."}]},\n'
270
- ' {"type": "summary", "title": "Summary", "content": "..."}\n'
271
- " ],\n"
272
- ' "needsReview": false\n'
273
- "}\n\n"
274
- "Rules:\n"
275
- "- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
276
- "- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
277
- "- practiceProblems should be derivable from worked examples\n"
278
- "- callouts: type is 'important', 'tip', or 'warning'\n"
279
- "- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
280
- "- Do not use placeholder text like 'placeholder' or 'example text'\n"
281
- "- Do not fabricate worked examples - use actual curriculum content\n"
282
- )
283
-
284
-
285
- def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
286
- refs = []
287
- for i, chunk in enumerate(curriculum_chunks, start=1):
288
- refs.append(
289
- f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
290
- f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
291
- )
292
- refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
293
-
294
- return (
295
- "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
296
- "Do not include topics outside the competency context.\n\n"
297
- f"Topic: {topic}\n"
298
- f"Difficulty: {difficulty}\n\n"
299
- "[CURRICULUM CONTEXT]\n"
300
- f"{refs_text}\n\n"
301
- "Return JSON with keys: problem, solution, competencyReference"
302
- )
303
-
304
-
305
- def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> list[dict]:
306
- dedup: Dict[str, dict] = {}
307
- for weak_topic in weak_topics:
308
- rows = retrieve_curriculum_context(
309
- query=f"DepEd learning competency for {weak_topic}",
310
- subject=subject,
311
- chunk_type="learning_competency",
312
- top_k=2,
313
- )
314
- for row in rows:
315
- key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
316
- if key not in dedup:
317
- dedup[key] = row
318
- return list(dedup.values())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/routes/rag_routes.py DELETED
@@ -1,427 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import logging
5
- import os
6
- import re
7
- from datetime import datetime, timezone
8
- from threading import Lock
9
- from typing import Any, Dict, List, Optional
10
-
11
- from fastapi import APIRouter, HTTPException, Request
12
- from pydantic import BaseModel, Field
13
-
14
- from services.inference_client import (
15
- InferenceRequest,
16
- create_default_client,
17
- is_sequential_model,
18
- get_model_for_task,
19
- )
20
- from rag.curriculum_rag import (
21
- build_analysis_curriculum_context,
22
- build_lesson_prompt,
23
- build_lesson_query,
24
- build_problem_generation_prompt,
25
- format_retrieved_chunks,
26
- retrieve_curriculum_context,
27
- retrieve_lesson_pdf_context,
28
- summarize_retrieval_confidence,
29
- )
30
- from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
31
-
32
- try:
33
- from firebase_admin import firestore as firebase_firestore
34
- except Exception:
35
- firebase_firestore = None
36
-
37
- logger = logging.getLogger("mathpulse.rag")
38
- router = APIRouter(prefix="/api/rag", tags=["rag"])
39
-
40
- _inference_client = None
41
- _inference_lock = Lock()
42
-
43
-
44
- def _get_inference_client():
45
- global _inference_client
46
- if _inference_client is None:
47
- with _inference_lock:
48
- if _inference_client is None:
49
- _inference_client = create_default_client()
50
- return _inference_client
51
-
52
-
53
- async def _generate_text(
54
- prompt: str,
55
- task_type: str,
56
- max_new_tokens: int = 900,
57
- enable_thinking: bool = False,
58
- ) -> str:
59
- request = InferenceRequest(
60
- messages=[
61
- {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
62
- {"role": "user", "content": prompt},
63
- ],
64
- task_type=task_type,
65
- max_new_tokens=max_new_tokens,
66
- temperature=0.2,
67
- top_p=0.9,
68
- enable_thinking=enable_thinking,
69
- )
70
- return _get_inference_client().generate_from_messages(request)
71
-
72
-
73
- def _log_rag_usage(
74
- request: Request,
75
- *,
76
- event_type: str,
77
- topic: str,
78
- subject: str,
79
- quarter: Optional[int],
80
- chunks: List[Dict[str, Any]],
81
- ) -> None:
82
- if firebase_firestore is None:
83
- return
84
- try:
85
- user = getattr(request.state, "user", None)
86
- uid = getattr(user, "uid", None)
87
- domains = sorted({str(chunk.get("content_domain") or "").strip() for chunk in chunks if chunk.get("content_domain")})
88
- top_score = max((float(chunk.get("score") or 0.0) for chunk in chunks), default=0.0)
89
- payload = {
90
- "userId": uid,
91
- "type": event_type,
92
- "topic": topic,
93
- "subject": subject,
94
- "quarter": quarter,
95
- "retrievedChunks": len(chunks),
96
- "topScore": top_score,
97
- "curriculumDomainsHit": domains,
98
- "timestamp": firebase_firestore.SERVER_TIMESTAMP,
99
- "createdAtIso": datetime.now(timezone.utc).isoformat(),
100
- }
101
- firebase_firestore.client().collection("rag_usage").add(payload)
102
- except Exception as exc:
103
- logger.warning("rag_usage logging skipped: %s", exc)
104
-
105
-
106
- def _strip_thinking_and_parse(text: str) -> dict:
107
- cleaned = text.strip()
108
- cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
109
- if "{" in cleaned and "}" in cleaned:
110
- try:
111
- start = cleaned.find("{")
112
- end = cleaned.rfind("}") + 1
113
- parsed = json.loads(cleaned[start:end])
114
- if isinstance(parsed, dict):
115
- return parsed
116
- except Exception:
117
- pass
118
- return {"explanation": text}
119
-
120
-
121
- class RagLessonRequest(BaseModel):
122
- topic: str
123
- subject: str
124
- quarter: int
125
- lessonTitle: Optional[str] = None
126
- learningCompetency: Optional[str] = None
127
- moduleUnit: Optional[str] = None
128
- learnerLevel: Optional[str] = None
129
- userId: Optional[str] = None
130
- moduleId: Optional[str] = None
131
- lessonId: Optional[str] = None
132
- competencyCode: Optional[str] = None
133
- storagePath: Optional[str] = None
134
-
135
-
136
- class RagProblemRequest(BaseModel):
137
- topic: str
138
- subject: str
139
- quarter: int
140
- difficulty: str = Field(default="medium")
141
- userId: Optional[str] = None
142
-
143
-
144
- class RagAnalysisContextRequest(BaseModel):
145
- weakTopics: List[str]
146
- subject: str
147
- userId: Optional[str] = None
148
-
149
-
150
- @router.get("/health")
151
- async def rag_health():
152
- active_model = get_model_for_task("rag_lesson")
153
- is_seq = is_sequential_model(active_model)
154
- try:
155
- health = get_vectorstore_health()
156
- return {
157
- "status": "ok",
158
- "chunkCount": health["chunkCount"],
159
- "subjects": health["subjects"],
160
- "lastIngested": datetime.now(timezone.utc).isoformat(),
161
- "activeModel": active_model,
162
- "isSequentialModel": is_seq,
163
- }
164
- except Exception as exc:
165
- return {
166
- "status": "degraded",
167
- "chunkCount": 0,
168
- "subjects": {},
169
- "lastIngested": None,
170
- "activeModel": active_model,
171
- "isSequentialModel": is_seq,
172
- "warning": str(exc),
173
- }
174
-
175
-
176
- def _fetch_youtube_video(lesson_title: str, subject: str, competency: str, quarter: int) -> dict:
177
- try:
178
- from backend.services.youtube_service import get_video_for_lesson
179
- except ImportError:
180
- return {}
181
- try:
182
- video = get_video_for_lesson(lesson_title, subject, competency, quarter)
183
- return video or {}
184
- except Exception as e:
185
- logger.warning("YouTube search failed: %s", e)
186
- return {}
187
-
188
-
189
- def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
190
- sections = lesson_data.get("sections", [])
191
- section_types = {s.get("type") for s in sections}
192
- required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
193
-
194
- default_content = {
195
- "introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}."},
196
- "key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": "Below are the key concepts covered in this lesson.", "callouts": []},
197
- "video": {"type": "video", "title": "Video Lesson", "content": "Watch this explanation to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
198
- "worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": []},
199
- "important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": []},
200
- "try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": []},
201
- "summary": {"type": "summary", "title": "Summary", "content": f"Great job completing the lesson on {lesson_title}!"},
202
- }
203
-
204
- filled = {}
205
- for req_type in required:
206
- for existing in sections:
207
- if existing.get("type") == req_type:
208
- filled[req_type] = existing
209
- break
210
- else:
211
- filled[req_type] = default_content[req_type]
212
-
213
- ordered = [filled[t] for t in required]
214
-
215
- for i, section in enumerate(ordered):
216
- s_type = section.get("type")
217
- if s_type == "key_concepts" and not section.get("callouts"):
218
- section["callouts"] = []
219
- if s_type == "worked_examples" and not section.get("examples"):
220
- section["examples"] = []
221
- if s_type == "important_notes" and not section.get("bulletPoints"):
222
- section["bulletPoints"] = []
223
- if s_type == "try_it_yourself" and not section.get("practiceProblems"):
224
- section["practiceProblems"] = []
225
- ordered[i] = section
226
-
227
- return {**lesson_data, "sections": ordered}
228
-
229
-
230
- @router.post("/lesson")
231
- async def rag_lesson(request: Request, payload: RagLessonRequest):
232
- try:
233
- chunks, retrieval_mode = retrieve_lesson_pdf_context(
234
- query=build_lesson_query(
235
- payload.topic,
236
- payload.subject,
237
- payload.quarter,
238
- lesson_title=payload.lessonTitle,
239
- competency=payload.learningCompetency,
240
- module_unit=payload.moduleUnit,
241
- learner_level=payload.learnerLevel,
242
- ),
243
- subject=payload.subject,
244
- quarter=payload.quarter,
245
- lesson_title=payload.lessonTitle,
246
- competency=payload.learningCompetency,
247
- module_id=payload.moduleId,
248
- lesson_id=payload.lessonId,
249
- competency_code=payload.competencyCode,
250
- storage_path=payload.storagePath,
251
- top_k=8,
252
- )
253
-
254
- if not chunks:
255
- raise HTTPException(
256
- status_code=404,
257
- detail={
258
- "error": "no_curriculum_context",
259
- "message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
260
- "retrievalBand": "low",
261
- "sources": [],
262
- },
263
- )
264
-
265
- prompt = build_lesson_prompt(
266
- lesson_title=payload.lessonTitle or payload.topic,
267
- competency=payload.learningCompetency or payload.topic,
268
- grade_level="Grade 11-12",
269
- subject=payload.subject,
270
- quarter=payload.quarter,
271
- learner_level=payload.learnerLevel,
272
- module_unit=payload.moduleUnit,
273
- curriculum_chunks=chunks,
274
- competency_code=payload.competencyCode,
275
- )
276
-
277
- raw_explanation = await _generate_text(
278
- prompt,
279
- task_type="lesson_generation",
280
- max_new_tokens=1800,
281
- enable_thinking=True,
282
- )
283
-
284
- parsed_lesson = _strip_thinking_and_parse(raw_explanation)
285
- parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
286
-
287
- if parsed_lesson.get("sections"):
288
- video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
289
- if video_section:
290
- video_data = _fetch_youtube_video(
291
- payload.lessonTitle or payload.topic,
292
- payload.subject,
293
- payload.learningCompetency or "",
294
- payload.quarter,
295
- )
296
- if video_data:
297
- video_section["videoId"] = video_data.get("videoId", "")
298
- video_section["videoTitle"] = video_data.get("videoTitle", "")
299
- video_section["videoChannel"] = video_data.get("videoChannel", "")
300
- video_section["embedUrl"] = video_data.get("embedUrl", "")
301
- video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
302
-
303
- retrieval_summary = summarize_retrieval_confidence(chunks)
304
-
305
- _log_rag_usage(
306
- request,
307
- event_type="lesson",
308
- topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
309
- subject=payload.subject,
310
- quarter=payload.quarter,
311
- chunks=chunks,
312
- )
313
-
314
- needs_review = parsed_lesson.get("needsReview", False)
315
- if retrieval_summary.get("band") == "low":
316
- needs_review = True
317
-
318
- return {
319
- **parsed_lesson,
320
- "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
321
- "retrievalBand": retrieval_summary.get("band", "low"),
322
- "retrievalMode": retrieval_mode,
323
- "needsReview": needs_review,
324
- "sources": [
325
- {
326
- "subject": row.get("subject"),
327
- "quarter": row.get("quarter"),
328
- "source_file": row.get("source_file"),
329
- "storage_path": row.get("storage_path"),
330
- "page": row.get("page"),
331
- "score": row.get("score"),
332
- "content_domain": row.get("content_domain"),
333
- "chunk_type": row.get("chunk_type"),
334
- "content": row.get("content"),
335
- }
336
- for row in chunks
337
- ],
338
- "activeModel": get_model_for_task("rag_lesson"),
339
- }
340
- except Exception as exc:
341
- import traceback
342
- logger.error(f"RAG lesson error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
343
- raise HTTPException(
344
- status_code=500,
345
- detail={
346
- "error": type(exc).__name__,
347
- "message": str(exc),
348
- "traceback": traceback.format_exc(),
349
- },
350
- )
351
-
352
-
353
- @router.post("/generate-problem")
354
- async def rag_generate_problem(request: Request, payload: RagProblemRequest):
355
- chunks = retrieve_curriculum_context(
356
- query=payload.topic,
357
- subject=payload.subject,
358
- quarter=payload.quarter,
359
- top_k=5,
360
- )
361
- prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
362
- raw = await _generate_text(
363
- prompt,
364
- task_type="quiz_generation",
365
- max_new_tokens=600,
366
- enable_thinking=False,
367
- )
368
-
369
- parsed = _strip_thinking_and_parse(raw)
370
-
371
- problem = str(parsed.get("problem") or raw)
372
- if not problem or problem.startswith("{"):
373
- problem = str(parsed.get("content") or str(parsed))
374
- if len(problem) < 3 or problem.startswith("{"):
375
- problem = raw
376
- solution = str(parsed.get("solution") or "")
377
- competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
378
-
379
- _log_rag_usage(
380
- request,
381
- event_type="problem_generation",
382
- topic=payload.topic,
383
- subject=payload.subject,
384
- quarter=payload.quarter,
385
- chunks=chunks,
386
- )
387
-
388
- return {
389
- "problem": problem,
390
- "solution": solution,
391
- "competencyReference": competency_ref,
392
- "sources": [
393
- {
394
- "subject": row.get("subject"),
395
- "quarter": row.get("quarter"),
396
- "source_file": row.get("source_file"),
397
- "page": row.get("page"),
398
- "score": row.get("score"),
399
- }
400
- for row in chunks
401
- ],
402
- }
403
-
404
-
405
- @router.post("/analysis-context")
406
- async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequest):
407
- if not payload.weakTopics:
408
- raise HTTPException(status_code=400, detail="weakTopics must be a non-empty list")
409
-
410
- chunks = build_analysis_curriculum_context(payload.weakTopics, payload.subject)
411
- lines = ["LEARNING COMPETENCIES:"]
412
- for index, row in enumerate(chunks, start=1):
413
- lines.append(
414
- f"{index}. {row.get('content')} (Source: {row.get('source_file')} p.{row.get('page')}, "
415
- f"Q{row.get('quarter')}, {row.get('content_domain')})"
416
- )
417
-
418
- _log_rag_usage(
419
- request,
420
- event_type="analysis_context",
421
- topic=", ".join(payload.weakTopics),
422
- subject=payload.subject,
423
- quarter=None,
424
- chunks=chunks,
425
- )
426
-
427
- return {"curriculumContext": "\n".join(lines)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -1000,6 +1000,8 @@ class RequestMiddleware(BaseHTTPMiddleware):
1000
  status_code=500,
1001
  content={
1002
  "detail": "Internal server error",
 
 
1003
  "requestId": request_id,
1004
  },
1005
  headers={"X-Request-ID": request_id},
 
1000
  status_code=500,
1001
  content={
1002
  "detail": "Internal server error",
1003
+ "error": type(exc).__name__,
1004
+ "message": str(exc),
1005
  "requestId": request_id,
1006
  },
1007
  headers={"X-Request-ID": request_id},
rag/curriculum_rag.py CHANGED
@@ -57,7 +57,7 @@ def retrieve_curriculum_context(
57
  storage_path: str | None = None,
58
  top_k: int = 8,
59
  ) -> list[dict]:
60
- from backend.rag.vectorstore_loader import get_vectorstore_components
61
 
62
  _, collection, embedder = get_vectorstore_components()
63
  where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
@@ -195,12 +195,12 @@ def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
195
 
196
  def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
197
  if not curriculum_chunks:
198
- return {"confidence": 0.0, "band": "low"}
199
 
200
  top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
201
  score = sum(top_scores) / max(1, len(top_scores))
202
  band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
203
- return {"confidence": round(score, 3), "band": band}
204
 
205
 
206
  def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
 
57
  storage_path: str | None = None,
58
  top_k: int = 8,
59
  ) -> list[dict]:
60
+ from rag.vectorstore_loader import get_vectorstore_components
61
 
62
  _, collection, embedder = get_vectorstore_components()
63
  where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
 
195
 
196
  def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
197
  if not curriculum_chunks:
198
+ return {"confidence": 0.0, "band": "low", "chunkCount": 0}
199
 
200
  top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
201
  score = sum(top_scores) / max(1, len(top_scores))
202
  band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
203
+ return {"confidence": round(score, 3), "band": band, "chunkCount": len(curriculum_chunks)}
204
 
205
 
206
  def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
routes/rag_routes.py CHANGED
@@ -229,26 +229,39 @@ def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
229
 
230
  @router.post("/lesson")
231
  async def rag_lesson(request: Request, payload: RagLessonRequest):
232
- chunks, retrieval_mode = retrieve_lesson_pdf_context(
233
- query=build_lesson_query(
234
- payload.topic,
235
- payload.subject,
236
- payload.quarter,
 
 
 
 
 
 
 
 
 
237
  lesson_title=payload.lessonTitle,
238
  competency=payload.learningCompetency,
239
- module_unit=payload.moduleUnit,
240
- learner_level=payload.learnerLevel,
241
- ),
242
- subject=payload.subject,
243
- quarter=payload.quarter,
244
- lesson_title=payload.lessonTitle,
245
- competency=payload.learningCompetency,
246
- module_id=payload.moduleId,
247
- lesson_id=payload.lessonId,
248
- competency_code=payload.competencyCode,
249
- storage_path=payload.storagePath,
250
- top_k=8,
251
- )
 
 
 
 
252
 
253
  if not chunks:
254
  raise HTTPException(
@@ -261,54 +274,98 @@ async def rag_lesson(request: Request, payload: RagLessonRequest):
261
  },
262
  )
263
 
264
- prompt = build_lesson_prompt(
265
- lesson_title=payload.lessonTitle or payload.topic,
266
- competency=payload.learningCompetency or payload.topic,
267
- grade_level="Grade 11-12",
268
- subject=payload.subject,
269
- quarter=payload.quarter,
270
- learner_level=payload.learnerLevel,
271
- module_unit=payload.moduleUnit,
272
- curriculum_chunks=chunks,
273
- competency_code=payload.competencyCode,
274
- )
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- raw_explanation = await _generate_text(
277
- prompt,
278
- task_type="lesson_generation",
279
- max_new_tokens=1800,
280
- enable_thinking=True,
281
- )
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
- parsed_lesson = _strip_thinking_and_parse(raw_explanation)
284
- parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
 
 
 
 
 
 
 
 
 
 
 
 
285
 
 
286
  if parsed_lesson.get("sections"):
287
  video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
288
  if video_section:
289
- video_data = _fetch_youtube_video(
290
- payload.lessonTitle or payload.topic,
291
- payload.subject,
292
- payload.learningCompetency or "",
293
- payload.quarter,
294
- )
295
- if video_data:
296
- video_section["videoId"] = video_data.get("videoId", "")
297
- video_section["videoTitle"] = video_data.get("videoTitle", "")
298
- video_section["videoChannel"] = video_data.get("videoChannel", "")
299
- video_section["embedUrl"] = video_data.get("embedUrl", "")
300
- video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
301
-
 
 
 
 
302
  retrieval_summary = summarize_retrieval_confidence(chunks)
303
 
304
- _log_rag_usage(
305
- request,
306
- event_type="lesson",
307
- topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
308
- subject=payload.subject,
309
- quarter=payload.quarter,
310
- chunks=chunks,
311
- )
 
 
 
312
 
313
  needs_review = parsed_lesson.get("needsReview", False)
314
  if retrieval_summary.get("band") == "low":
 
229
 
230
  @router.post("/lesson")
231
  async def rag_lesson(request: Request, payload: RagLessonRequest):
232
+ # ── Step 1: Retrieve curriculum chunks ───────────────────────────────────
233
+ try:
234
+ chunks, retrieval_mode = retrieve_lesson_pdf_context(
235
+ topic=build_lesson_query(
236
+ payload.topic,
237
+ payload.subject,
238
+ payload.quarter,
239
+ lesson_title=payload.lessonTitle,
240
+ competency=payload.learningCompetency,
241
+ module_unit=payload.moduleUnit,
242
+ learner_level=payload.learnerLevel,
243
+ ),
244
+ subject=payload.subject,
245
+ quarter=payload.quarter,
246
  lesson_title=payload.lessonTitle,
247
  competency=payload.learningCompetency,
248
+ module_id=payload.moduleId,
249
+ lesson_id=payload.lessonId,
250
+ competency_code=payload.competencyCode,
251
+ storage_path=payload.storagePath,
252
+ top_k=8,
253
+ )
254
+ except Exception as exc:
255
+ import traceback
256
+ logger.error(f"RAG retrieval error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
257
+ raise HTTPException(
258
+ status_code=503,
259
+ detail={
260
+ "error": "retrieval_failed",
261
+ "message": f"Curriculum retrieval failed: {exc}",
262
+ "type": type(exc).__name__,
263
+ },
264
+ )
265
 
266
  if not chunks:
267
  raise HTTPException(
 
274
  },
275
  )
276
 
277
+ # ── Step 2: Build prompt ─────────────────────────────────────────────────
278
+ try:
279
+ prompt = build_lesson_prompt(
280
+ lesson_title=payload.lessonTitle or payload.topic,
281
+ competency=payload.learningCompetency or payload.topic,
282
+ grade_level="Grade 11-12",
283
+ subject=payload.subject,
284
+ quarter=payload.quarter,
285
+ learner_level=payload.learnerLevel,
286
+ module_unit=payload.moduleUnit,
287
+ curriculum_chunks=chunks,
288
+ competency_code=payload.competencyCode,
289
+ )
290
+ except Exception as exc:
291
+ logger.error(f"RAG prompt build error: {type(exc).__name__}: {exc}")
292
+ raise HTTPException(
293
+ status_code=500,
294
+ detail={
295
+ "error": "prompt_build_failed",
296
+ "message": f"Failed to build lesson prompt: {exc}",
297
+ "type": type(exc).__name__,
298
+ },
299
+ )
300
 
301
+ # ── Step 3: AI inference ─────────────────────────────────────────────────
302
+ try:
303
+ raw_explanation = await _generate_text(
304
+ prompt,
305
+ task_type="rag_lesson",
306
+ max_new_tokens=1800,
307
+ enable_thinking=True,
308
+ )
309
+ except Exception as exc:
310
+ logger.error(f"RAG inference error: {type(exc).__name__}: {exc}")
311
+ raise HTTPException(
312
+ status_code=502,
313
+ detail={
314
+ "error": "inference_failed",
315
+ "message": f"AI model call failed: {exc}",
316
+ "type": type(exc).__name__,
317
+ },
318
+ )
319
 
320
+ # ── Step 4: Parse & validate response ────────────────────────────────────
321
+ try:
322
+ parsed_lesson = _strip_thinking_and_parse(raw_explanation)
323
+ parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
324
+ except Exception as exc:
325
+ logger.error(f"RAG parse error: {type(exc).__name__}: {exc}")
326
+ raise HTTPException(
327
+ status_code=500,
328
+ detail={
329
+ "error": "parse_failed",
330
+ "message": f"Failed to parse AI response: {exc}",
331
+ "type": type(exc).__name__,
332
+ },
333
+ )
334
 
335
+ # ── Step 5: Enrich with video ────────────────────────────────────────────
336
  if parsed_lesson.get("sections"):
337
  video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
338
  if video_section:
339
+ try:
340
+ video_data = _fetch_youtube_video(
341
+ payload.lessonTitle or payload.topic,
342
+ payload.subject,
343
+ payload.learningCompetency or "",
344
+ payload.quarter,
345
+ )
346
+ if video_data:
347
+ video_section["videoId"] = video_data.get("videoId", "")
348
+ video_section["videoTitle"] = video_data.get("videoTitle", "")
349
+ video_section["videoChannel"] = video_data.get("videoChannel", "")
350
+ video_section["embedUrl"] = video_data.get("embedUrl", "")
351
+ video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
352
+ except Exception as exc:
353
+ logger.warning("YouTube enrichment skipped: %s", exc)
354
+
355
+ # ── Step 6: Assemble response ────────────────────────────────────────────
356
  retrieval_summary = summarize_retrieval_confidence(chunks)
357
 
358
+ try:
359
+ _log_rag_usage(
360
+ request,
361
+ event_type="lesson",
362
+ topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
363
+ subject=payload.subject,
364
+ quarter=payload.quarter,
365
+ chunks=chunks,
366
+ )
367
+ except Exception as exc:
368
+ logger.warning("RAG usage logging skipped: %s", exc)
369
 
370
  needs_review = parsed_lesson.get("needsReview", False)
371
  if retrieval_summary.get("band") == "low":
test_full_rag.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.insert(0, 'backend')
4
+
5
+ # Set required env vars
6
+ os.environ['DEEPSEEK_API_KEY'] = os.getenv('DEEPSEEK_API_KEY', '')
7
+ os.environ['DEEPSEEK_BASE_URL'] = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com')
8
+
9
+ from rag.curriculum_rag import retrieve_lesson_pdf_context, build_lesson_prompt
10
+ from services.inference_client import InferenceClient, InferenceRequest
11
+
12
+ # Test retrieval
13
+ print("Testing retrieval...")
14
+ try:
15
+ chunks, mode = retrieve_lesson_pdf_context(
16
+ topic="Represent real-life relationships as functions and interpret domain/range.",
17
+ subject="General Mathematics",
18
+ quarter=2,
19
+ lesson_title="Represent real-life relationships as functions and interpret domain/range.",
20
+ module_id="gen-math",
21
+ lesson_id="gm-q2-functions-graphs-l1",
22
+ competency_code="GM11-FG-1",
23
+ top_k=8,
24
+ )
25
+ print(f"Retrieved {len(chunks)} chunks, mode={mode}")
26
+ except Exception as e:
27
+ print(f"Retrieval ERROR: {type(e).__name__}: {e}")
28
+ import traceback
29
+ traceback.print_exc()
30
+ sys.exit(1)
31
+
32
+ # Test prompt building
33
+ print("\nTesting prompt building...")
34
+ try:
35
+ prompt = build_lesson_prompt(
36
+ lesson_title="Represent real-life relationships as functions and interpret domain/range.",
37
+ competency="Represent real-life relationships as functions and interpret domain/range.",
38
+ grade_level="Grade 11-12",
39
+ subject="General Mathematics",
40
+ quarter=2,
41
+ learner_level="Grade 11-12",
42
+ module_unit="n/a",
43
+ curriculum_chunks=chunks,
44
+ competency_code="GM11-FG-1",
45
+ )
46
+ print(f"Prompt length: {len(prompt)} chars")
47
+ print(f"Prompt preview: {prompt[:200]}...")
48
+ except Exception as e:
49
+ print(f"Prompt building ERROR: {type(e).__name__}: {e}")
50
+ import traceback
51
+ traceback.print_exc()
52
+ sys.exit(1)
53
+
54
+ # Test inference (optional - might cost money)
55
+ print("\nTesting inference...")
56
+ try:
57
+ client = InferenceClient()
58
+ req = InferenceRequest(
59
+ messages=[
60
+ {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
61
+ {"role": "user", "content": prompt},
62
+ ],
63
+ task_type="lesson_generation",
64
+ max_new_tokens=100, # Small for testing
65
+ temperature=0.2,
66
+ top_p=0.9,
67
+ enable_thinking=True,
68
+ )
69
+ result = client.generate_from_messages(req)
70
+ print(f"Inference result: {result[:200]}...")
71
+ print("SUCCESS!")
72
+ except Exception as e:
73
+ print(f"Inference ERROR: {type(e).__name__}: {e}")
74
+ import traceback
75
+ traceback.print_exc()
test_retrieval.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.insert(0, '.')
3
+
4
+ from rag.curriculum_rag import retrieve_lesson_pdf_context, retrieve_curriculum_context
5
+
6
+ # Test retrieval with the same params as the frontend
7
+ try:
8
+ chunks, mode = retrieve_lesson_pdf_context(
9
+ topic="Represent real-life relationships as functions and interpret domain/range.",
10
+ subject="General Mathematics",
11
+ quarter=2,
12
+ lesson_title="Represent real-life relationships as functions and interpret domain/range.",
13
+ module_id="gen-math",
14
+ lesson_id="gm-q2-functions-graphs-l1",
15
+ competency_code="GM11-FG-1",
16
+ top_k=8,
17
+ )
18
+ print(f"Retrieved {len(chunks)} chunks, mode={mode}")
19
+ for i, chunk in enumerate(chunks[:3]):
20
+ print(f" Chunk {i}: score={chunk.get('score')}, domain={chunk.get('content_domain')}, source={chunk.get('source_file')}")
21
+ print(f" Content: {chunk.get('content', '')[:100]}...")
22
+ except Exception as e:
23
+ print(f"ERROR: {type(e).__name__}: {e}")
24
+ import traceback
25
+ traceback.print_exc()
26
+
27
+ # Also test without module/lesson filters
28
+ try:
29
+ chunks2 = retrieve_curriculum_context(
30
+ query="Represent real-life relationships as functions and interpret domain/range.",
31
+ subject="General Mathematics",
32
+ quarter=2,
33
+ top_k=8,
34
+ )
35
+ print(f"\nGeneral retrieval: {len(chunks2)} chunks")
36
+ except Exception as e:
37
+ print(f"\nGeneral ERROR: {type(e).__name__}: {e}")
38
+ import traceback
39
+ traceback.print_exc()
tests/test_rag_pipeline.py CHANGED
@@ -23,13 +23,18 @@ def _mock_vectorstore_components(collection_mock, embedder_mock):
23
  class TestRetrieveCurriculumContext:
24
  def test_empty_collection_returns_empty_list(self):
25
  collection = MagicMock()
26
- collection_get_result = collection.get.return_value
27
- collection_get_result.__getitem__.return_value = []
 
 
 
28
 
29
  embedder = MagicMock()
 
 
30
 
31
  with patch(
32
- "rag.curriculum_rag.get_vectorstore_components",
33
  return_value=(MagicMock(), collection, embedder),
34
  ):
35
  result = retrieve_curriculum_context(
@@ -73,14 +78,12 @@ class TestBuildLessonPrompt:
73
  ],
74
  )
75
  assert "JSON" in prompt
76
- assert "lessonTitle" in prompt
77
  assert "needsReview" in prompt
78
- ph_context_terms = [
79
- "payroll", "VAT", "discounts", "loans", "Pag-IBIG", "school",
80
- ]
81
- assert any(term in prompt for term in ph_context_terms)
82
 
83
- def test_contains_thinking_hint(self):
84
  prompt = build_lesson_prompt(
85
  lesson_title="Functions",
86
  competency="M11GM-Ia-1",
@@ -91,7 +94,10 @@ class TestBuildLessonPrompt:
91
  module_unit=None,
92
  curriculum_chunks=[],
93
  )
94
- assert "Think step by step" in prompt
 
 
 
95
 
96
 
97
  class TestSummarizeRetrievalConfidence:
 
23
  class TestRetrieveCurriculumContext:
24
  def test_empty_collection_returns_empty_list(self):
25
  collection = MagicMock()
26
+ collection.query.return_value = {
27
+ "documents": [[]],
28
+ "metadatas": [[]],
29
+ "distances": [[]],
30
+ }
31
 
32
  embedder = MagicMock()
33
+ embedder.encode.return_value = MagicMock()
34
+ embedder.encode.return_value.tolist.return_value = [0.0] * 768
35
 
36
  with patch(
37
+ "rag.vectorstore_loader.get_vectorstore_components",
38
  return_value=(MagicMock(), collection, embedder),
39
  ):
40
  result = retrieve_curriculum_context(
 
78
  ],
79
  )
80
  assert "JSON" in prompt
81
+ assert "Lesson title:" in prompt
82
  assert "needsReview" in prompt
83
+ assert "DepEd-aligned" in prompt
84
+ assert "7 sections" in prompt
 
 
85
 
86
+ def test_contains_required_sections_in_prompt(self):
87
  prompt = build_lesson_prompt(
88
  lesson_title="Functions",
89
  competency="M11GM-Ia-1",
 
94
  module_unit=None,
95
  curriculum_chunks=[],
96
  )
97
+ assert "introduction" in prompt
98
+ assert "key_concepts" in prompt
99
+ assert "worked_examples" in prompt
100
+ assert "try_it_yourself" in prompt
101
 
102
 
103
  class TestSummarizeRetrievalConfidence: