github-actions[bot] commited on
Commit
01d4b3f
ยท
1 Parent(s): f717a11

๐Ÿš€ Auto-deploy backend from GitHub (23fbade)

Browse files
main.py CHANGED
@@ -80,6 +80,7 @@ from services.user_provisioning_service import (
80
  from routes.rag_routes import router as rag_router
81
  from routes.admin_model_routes import router as admin_model_router
82
  from routes.diagnostic import router as diagnostic_router
 
83
  from rag.curriculum_rag import (
84
  build_analysis_curriculum_context,
85
  build_lesson_prompt,
@@ -363,6 +364,7 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
363
  "/api/admin/model-config/profile": ADMIN_ONLY,
364
  "/api/admin/model-config/override": ADMIN_ONLY,
365
  "/api/admin/model-config/reset": ADMIN_ONLY,
 
366
  }
367
 
368
  if not os.getenv("DEEPSEEK_API_KEY"):
@@ -1013,6 +1015,7 @@ app.add_middleware(AuthMiddleware)
1013
  app.include_router(rag_router)
1014
  app.include_router(admin_model_router)
1015
  app.include_router(diagnostic_router)
 
1016
 
1017
 
1018
  # โ”€โ”€โ”€ Global Exception Handler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
80
  from routes.rag_routes import router as rag_router
81
  from routes.admin_model_routes import router as admin_model_router
82
  from routes.diagnostic import router as diagnostic_router
83
+ from routes.video_routes import router as video_router
84
  from rag.curriculum_rag import (
85
  build_analysis_curriculum_context,
86
  build_lesson_prompt,
 
364
  "/api/admin/model-config/profile": ADMIN_ONLY,
365
  "/api/admin/model-config/override": ADMIN_ONLY,
366
  "/api/admin/model-config/reset": ADMIN_ONLY,
367
+ "/api/lessons/videos/search": ALL_APP_ROLES,
368
  }
369
 
370
  if not os.getenv("DEEPSEEK_API_KEY"):
 
1015
  app.include_router(rag_router)
1016
  app.include_router(admin_model_router)
1017
  app.include_router(diagnostic_router)
1018
+ app.include_router(video_router)
1019
 
1020
 
1021
  # โ”€โ”€โ”€ Global Exception Handler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
requirements.txt CHANGED
@@ -22,3 +22,4 @@ redis[hiredis]>=5.0.0
22
  PyYAML>=6.0.0
23
  mypy>=1.20.0
24
  pytest>=9.0.0
 
 
22
  PyYAML>=6.0.0
23
  mypy>=1.20.0
24
  pytest>=9.0.0
25
+ google-api-python-client>=2.0.0
routes/rag_routes.py CHANGED
@@ -173,17 +173,31 @@ async def rag_health():
173
  }
174
 
175
 
176
- def _fetch_youtube_video(lesson_title: str, subject: str, competency: str, quarter: int) -> dict:
 
 
 
 
 
 
 
177
  try:
178
- from backend.services.youtube_service import get_video_for_lesson
179
  except ImportError:
180
- return {}
181
  try:
182
- video = get_video_for_lesson(lesson_title, subject, competency, quarter)
183
- return video or {}
 
 
 
 
 
 
 
184
  except Exception as e:
185
- logger.warning("YouTube search failed: %s", e)
186
- return {}
187
 
188
 
189
  def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
@@ -332,23 +346,28 @@ async def rag_lesson(request: Request, payload: RagLessonRequest):
332
  },
333
  )
334
 
335
- # โ”€โ”€ Step 5: Enrich with video โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
336
  if parsed_lesson.get("sections"):
337
  video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
338
  if video_section:
339
  try:
340
- video_data = _fetch_youtube_video(
341
  payload.lessonTitle or payload.topic,
342
  payload.subject,
343
  payload.learningCompetency or "",
344
  payload.quarter,
 
345
  )
346
- if video_data:
347
- video_section["videoId"] = video_data.get("videoId", "")
348
- video_section["videoTitle"] = video_data.get("videoTitle", "")
349
- video_section["videoChannel"] = video_data.get("videoChannel", "")
350
- video_section["embedUrl"] = video_data.get("embedUrl", "")
351
- video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
 
 
 
 
352
  except Exception as exc:
353
  logger.warning("YouTube enrichment skipped: %s", exc)
354
 
 
173
  }
174
 
175
 
176
+ def _fetch_youtube_videos(
177
+ lesson_title: str,
178
+ subject: str,
179
+ competency: str,
180
+ quarter: int,
181
+ lesson_id: Optional[str] = None,
182
+ ) -> List[Dict]:
183
+ """Fetch up to 3 relevant YouTube videos for a lesson."""
184
  try:
185
+ from services.youtube_service import get_video_search_results
186
  except ImportError:
187
+ return []
188
  try:
189
+ result = get_video_search_results(
190
+ topic=lesson_title,
191
+ subject=subject,
192
+ lesson_context=competency,
193
+ grade_level=f"Grade {quarter + 10}",
194
+ lesson_id=lesson_id,
195
+ max_results=3,
196
+ )
197
+ return result.get("videos", [])
198
  except Exception as e:
199
+ logger.warning("YouTube video search failed: %s", e)
200
+ return []
201
 
202
 
203
  def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
 
346
  },
347
  )
348
 
349
+ # โ”€โ”€ Step 5: Enrich with videos โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
350
  if parsed_lesson.get("sections"):
351
  video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
352
  if video_section:
353
  try:
354
+ videos = _fetch_youtube_videos(
355
  payload.lessonTitle or payload.topic,
356
  payload.subject,
357
  payload.learningCompetency or "",
358
  payload.quarter,
359
+ lesson_id=payload.lessonId,
360
  )
361
+ if videos:
362
+ # Primary video for backwards compatibility
363
+ primary = videos[0]
364
+ video_section["videoId"] = primary.get("videoId", "")
365
+ video_section["videoTitle"] = primary.get("title", "")
366
+ video_section["videoChannel"] = primary.get("channelTitle", "")
367
+ video_section["embedUrl"] = f"https://www.youtube.com/embed/{primary.get('videoId', '')}"
368
+ video_section["thumbnailUrl"] = primary.get("thumbnailUrl", "")
369
+ # New: full videos array for Smart Video Integration
370
+ video_section["videos"] = videos
371
  except Exception as exc:
372
  logger.warning("YouTube enrichment skipped: %s", exc)
373
 
routes/video_routes.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Video lesson search routes for MathPulse AI.
3
+ POST /api/lessons/videos/search โ€” smart YouTube video search with RAG enrichment.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from typing import List, Optional
10
+
11
+ from fastapi import APIRouter, HTTPException, Request
12
+ from pydantic import BaseModel, Field
13
+
14
+ from services.youtube_service import (
15
+ get_video_search_results,
16
+ YOUTUBE_API_KEY,
17
+ )
18
+
19
+ logger = logging.getLogger("mathpulse.videos")
20
+ router = APIRouter(prefix="/api/lessons/videos", tags=["videos"])
21
+
22
+
23
+ class VideoSearchRequest(BaseModel):
24
+ topic: str = Field(..., min_length=1, max_length=200)
25
+ grade_level: str = Field(default="Grade 11", max_length=50)
26
+ subject: str = Field(default="General Mathematics", max_length=100)
27
+ lesson_context: str = Field(default="", max_length=1000)
28
+ lesson_id: Optional[str] = Field(default=None, max_length=100)
29
+
30
+
31
+ class VideoResult(BaseModel):
32
+ videoId: str
33
+ title: str
34
+ channelTitle: str
35
+ thumbnailUrl: str
36
+ durationSeconds: int
37
+
38
+
39
+ class VideoSearchResponse(BaseModel):
40
+ videos: List[VideoResult]
41
+ cached: bool = False
42
+
43
+
44
+ @router.post("/search", response_model=VideoSearchResponse)
45
+ async def search_videos(request: Request, payload: VideoSearchRequest):
46
+ """
47
+ Search for relevant educational YouTube videos for a lesson topic.
48
+
49
+ - Checks Firestore video_cache first (7-day TTL)
50
+ - Enriches the search query with RAG curriculum keywords
51
+ - Filters for educational channels, medium/long duration, HD quality
52
+ - Returns up to 3 video results
53
+ """
54
+ # Graceful degradation: if YouTube API key is not configured, return 503
55
+ # so the frontend can hide the video section silently
56
+ if not YOUTUBE_API_KEY:
57
+ logger.warning("YouTube API key not configured")
58
+ raise HTTPException(
59
+ status_code=503,
60
+ detail={
61
+ "error": "youtube_api_not_configured",
62
+ "message": "YouTube API key is not configured on the server.",
63
+ },
64
+ )
65
+
66
+ try:
67
+ result = get_video_search_results(
68
+ topic=payload.topic,
69
+ subject=payload.subject,
70
+ lesson_context=payload.lesson_context,
71
+ grade_level=payload.grade_level,
72
+ lesson_id=payload.lesson_id,
73
+ max_results=3,
74
+ )
75
+
76
+ videos = [
77
+ VideoResult(
78
+ videoId=v["videoId"],
79
+ title=v["title"],
80
+ channelTitle=v["channelTitle"],
81
+ thumbnailUrl=v["thumbnailUrl"],
82
+ durationSeconds=v["durationSeconds"],
83
+ )
84
+ for v in result.get("videos", [])
85
+ ]
86
+
87
+ return VideoSearchResponse(
88
+ videos=videos,
89
+ cached=result.get("cached", False),
90
+ )
91
+
92
+ except HTTPException:
93
+ raise
94
+ except Exception as exc:
95
+ logger.error("Video search endpoint error: %s", exc)
96
+ raise HTTPException(
97
+ status_code=500,
98
+ detail={
99
+ "error": "video_search_failed",
100
+ "message": f"Failed to search videos: {exc}",
101
+ },
102
+ )
services/youtube_service.py CHANGED
@@ -1,170 +1,384 @@
1
  """
2
- YouTube search service for lesson video embeddings.
3
- Uses YouTube Data API v3 to find relevant educational videos.
 
 
4
  """
5
 
6
  from __future__ import annotations
7
 
8
- import os
9
  import logging
10
- from typing import Optional
 
 
 
11
 
12
  logger = logging.getLogger("mathpulse.youtube")
13
 
14
- YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def _parse_iso8601_duration(duration: str) -> int:
18
  """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
19
- import re
20
  if not duration:
21
  return 0
22
- hours_match = re.search(r'(\d+)H', duration)
23
- minutes_match = re.search(r'(\d+)M', duration)
24
- seconds_match = re.search(r'(\d+)S', duration)
25
  hours = int(hours_match.group(1)) if hours_match else 0
26
  minutes = int(minutes_match.group(1)) if minutes_match else 0
27
  seconds = int(seconds_match.group(1)) if seconds_match else 0
28
  return hours * 3600 + minutes * 60 + seconds
29
 
30
 
31
- def search_youtube_video(
32
- query: str,
33
- max_results: int = 5,
34
- min_duration_seconds: int = 180,
35
- language: str = "en",
36
- ) -> Optional[dict]:
37
- """
38
- Search YouTube Data API v3 for relevant educational videos.
39
 
40
- Args:
41
- query: Search query combining lesson title, subject, and competency
42
- max_results: Maximum number of results to return
43
- min_duration_seconds: Minimum video duration (filter out shorts)
44
- language: Preferred video language
 
 
45
 
46
- Returns:
47
- Best video match with videoId, title, channel, embedUrl, thumbnailUrl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  """
49
- if not YOUTUBE_API_KEY:
50
- logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
51
- return None
 
 
 
 
 
52
 
53
- import urllib.parse
54
- import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- search_query = f"{query} DepEd Philippines Grade 11 Grade 12 mathematics"
57
- encoded_query = urllib.parse.quote(search_query)
 
 
 
58
 
59
- search_url = (
60
- f"https://www.googleapis.com/youtube/v3/search"
61
- f"?part=snippet&type=video&q={encoded_query}"
62
- f"&maxResults={max_results}&relevanceLanguage={language}"
63
- f"&key={YOUTUBE_API_KEY}"
64
- )
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  try:
67
- import urllib.request
68
- with urllib.request.urlopen(search_url, timeout=10) as response:
69
- data = json.loads(response.read().decode())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- video_results = []
72
- for item in data.get("items", []):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  video_id = item.get("id", {}).get("videoId", "")
74
  if not video_id:
75
  continue
76
 
77
- title = item.get("snippet", {}).get("title", "")
78
- channel = item.get("snippet", {}).get("channelTitle", "")
79
- description = item.get("snippet", {}).get("description", "")
80
-
81
- video_details_url = (
82
- f"https://www.googleapis.com/youtube/v3/videos"
83
- f"?part=contentDetails,statistics&id={video_id}&key={YOUTUBE_API_KEY}"
84
- )
85
-
86
- try:
87
- with urllib.request.urlopen(video_details_url, timeout=10) as vd_response:
88
- vd_data = json.loads(vd_response.read().decode())
89
- vd_item = vd_data.get("items", [{}])[0]
90
- content_details = vd_item.get("contentDetails", {})
91
- duration = content_details.get("duration", "")
92
 
93
- duration_secs = _parse_iso8601_duration(duration)
94
- except Exception:
95
- duration_secs = 600
 
 
96
 
97
- if duration_secs < min_duration_seconds:
 
98
  continue
99
 
100
- embed_url = f"https://www.youtube.com/embed/{video_id}"
101
- thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
 
 
 
 
102
 
103
- video_results.append({
 
 
 
 
 
 
 
 
104
  "videoId": video_id,
105
- "videoTitle": title,
106
- "videoChannel": channel,
107
- "embedUrl": embed_url,
108
  "thumbnailUrl": thumbnail_url,
109
  "durationSeconds": duration_secs,
110
- "description": description[:200],
111
  })
112
 
113
- if not video_results:
114
- return None
 
115
 
116
- for vr in video_results:
117
- if any(term in vr["videoTitle"].lower() or term in vr["description"].lower()
118
- for term in ["tutorial", "lesson", "explain", "math", "solution"]):
119
- return vr
120
 
121
- return video_results[0] if video_results else None
 
 
122
 
123
- except Exception as e:
124
- logger.error("YouTube search failed: %s", e)
125
- return None
126
 
127
 
128
- def get_video_for_lesson(
129
- lesson_title: str,
130
- subject: str,
131
- competency: str = "",
132
- quarter: int = 1,
133
- ) -> Optional[dict]:
134
- """Get the best YouTube video for a lesson."""
135
- query = " ".join(filter(None, [lesson_title, subject, competency]))[:200]
136
- return search_youtube_video(query)
137
-
138
 
139
- def store_video_in_firestore(lesson_id: str, video_data: dict):
140
- """Persist chosen video to Firestore for caching."""
141
- try:
142
- import firebase_admin
143
- from firebase_admin import firestore
144
- if not firebase_admin._apps:
145
- return
146
- db = firestore.client()
147
- doc_ref = db.collection("curriculumDocuments").document(lesson_id)
148
- doc_ref.collection("videoEmbed").document("primary").set({
149
- **video_data,
150
- "storedAt": firestore.SERVER_TIMESTAMP,
151
- })
152
- except Exception as e:
153
- logger.warning("Could not store video in Firestore: %s", e)
 
 
154
 
 
 
155
 
156
- def get_cached_video(lesson_id: str) -> Optional[dict]:
157
- """Retrieve cached video from Firestore."""
158
- try:
159
- import firebase_admin
160
- from firebase_admin import firestore
161
- if not firebase_admin._apps:
162
- return None
163
- db = firestore.client()
164
- doc = db.collection("curriculumDocuments").document(lesson_id)
165
- video_doc = doc.collection("videoEmbed").document("primary").get()
166
- if video_doc.exists:
167
- return video_doc.to_dict()
168
- except Exception:
169
- pass
170
- return None
 
1
  """
2
+ Smart YouTube Video Search Service for MathPulse AI.
3
+ Uses YouTube Data API v3 (googleapiclient.discovery) to find relevant
4
+ educational math videos, enriched with RAG curriculum context.
5
+ Results are cached in Firestore video_cache/{lessonId} with 7-day TTL.
6
  """
7
 
8
  from __future__ import annotations
9
 
10
+ import hashlib
11
  import logging
12
+ import os
13
+ import re
14
+ from datetime import datetime, timezone
15
+ from typing import Dict, List, Optional
16
 
17
  logger = logging.getLogger("mathpulse.youtube")
18
 
19
+ YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "").strip()
20
+
21
+ # Known educational channel keywords and exact names for post-filtering
22
+ _EDUCATIONAL_CHANNEL_KEYWORDS = [
23
+ "khan", "math", "academy", "education", "teacher", "professor",
24
+ "tutorial", "lesson", "school", "university", "college", "deped",
25
+ "philippines", "filipino", "pinoy", "stem", "learning", "study",
26
+ "organic chemistry tutor", "patrickjmt", "3blue1brown", "numberphile",
27
+ "math antics", "bright side", "crashcourse", "ted-ed", "ted ed",
28
+ "nancy pi", "professor leonard", "mit", "stanford", "harvard",
29
+ ]
30
+
31
+ _EDUCATIONAL_CHANNEL_EXACT = {
32
+ "khan academy", "patrickjmt", "3blue1brown", "numberphile",
33
+ "math antics", "the organic chemistry tutor", "professor leonard",
34
+ "nancy pi", "ted-ed", "crashcourse", "bright side",
35
+ "mit opencourseware", "stanford", "harvard",
36
+ }
37
+
38
+ # Minimum duration in seconds to filter out shorts (3 minutes)
39
+ _MIN_DURATION_SECONDS = 180
40
+ # Maximum duration in seconds to avoid extremely long videos (60 minutes)
41
+ _MAX_DURATION_SECONDS = 3600
42
+ # Cache TTL in seconds (7 days)
43
+ _CACHE_TTL_SECONDS = 7 * 24 * 60 * 60
44
+
45
+
46
+ def _build_youtube_client():
47
+ """Lazy-init googleapiclient YouTube client. Returns None if no API key."""
48
+ if not YOUTUBE_API_KEY:
49
+ return None
50
+ try:
51
+ from googleapiclient.discovery import build
52
+ return build("youtube", "v3", developerKey=YOUTUBE_API_KEY, cache_discovery=False)
53
+ except Exception as exc:
54
+ logger.warning("Failed to build YouTube client: %s", exc)
55
+ return None
56
 
57
 
58
  def _parse_iso8601_duration(duration: str) -> int:
59
  """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
 
60
  if not duration:
61
  return 0
62
+ hours_match = re.search(r"(\d+)H", duration)
63
+ minutes_match = re.search(r"(\d+)M", duration)
64
+ seconds_match = re.search(r"(\d+)S", duration)
65
  hours = int(hours_match.group(1)) if hours_match else 0
66
  minutes = int(minutes_match.group(1)) if minutes_match else 0
67
  seconds = int(seconds_match.group(1)) if seconds_match else 0
68
  return hours * 3600 + minutes * 60 + seconds
69
 
70
 
71
+ def _is_educational_channel(channel_title: str) -> bool:
72
+ """Check if a channel appears to be educational."""
73
+ lowered = channel_title.lower().strip()
74
+ if lowered in _EDUCATIONAL_CHANNEL_EXACT:
75
+ return True
76
+ return any(kw in lowered for kw in _EDUCATIONAL_CHANNEL_KEYWORDS)
77
+
 
78
 
79
+ def _score_video_result(item: dict, query: str) -> float:
80
+ """Score a video result for relevance. Higher is better."""
81
+ score = 0.0
82
+ title = (item.get("title") or "").lower()
83
+ description = (item.get("description") or "").lower()
84
+ channel = (item.get("channelTitle") or "").lower()
85
+ query_lower = query.lower()
86
 
87
+ # Title contains key math/education terms
88
+ math_terms = ["tutorial", "lesson", "explain", "math", "mathematics",
89
+ "solution", "problem", "example", "learn", "how to"]
90
+ for term in math_terms:
91
+ if term in title:
92
+ score += 2.0
93
+
94
+ # Query terms appear in title
95
+ for word in query_lower.split():
96
+ if len(word) > 2 and word in title:
97
+ score += 1.5
98
+
99
+ # Educational channel bonus
100
+ if _is_educational_channel(channel):
101
+ score += 5.0
102
+
103
+ # Description relevance
104
+ for word in query_lower.split():
105
+ if len(word) > 2 and word in description:
106
+ score += 0.5
107
+
108
+ # Duration sweet spot: 5-20 minutes
109
+ duration = item.get("durationSeconds", 0)
110
+ if 300 <= duration <= 1200:
111
+ score += 2.0
112
+ elif duration >= _MIN_DURATION_SECONDS:
113
+ score += 1.0
114
+
115
+ return score
116
+
117
+
118
+ def _enrich_query_with_rag(topic: str, subject: str, lesson_context: str = "") -> str:
119
  """
120
+ Query the RAG vectorstore to extract curriculum keywords and enrich
121
+ the YouTube search query for higher relevance.
122
+ """
123
+ enriched = topic
124
+ if subject:
125
+ enriched = f"{enriched} {subject}"
126
+ if lesson_context:
127
+ enriched = f"{enriched} {lesson_context}"
128
 
129
+ try:
130
+ from rag.curriculum_rag import retrieve_curriculum_context
131
+ chunks = retrieve_curriculum_context(
132
+ query=topic,
133
+ subject=subject if subject else None,
134
+ top_k=5,
135
+ )
136
+ if chunks:
137
+ # Extract key terms from top chunk contents
138
+ keywords: List[str] = []
139
+ for chunk in chunks[:3]:
140
+ content = str(chunk.get("content", "")).strip()
141
+ # Extract meaningful words (skip math symbols, numbers, stop words)
142
+ if content:
143
+ # Clean content: remove special chars, keep only alphabetic words
144
+ cleaned = re.sub(r'[^\w\s]', ' ', content)
145
+ words = [w for w in cleaned.split() if len(w) > 3 and w.isalpha()]
146
+ # Take up to 5 key words per chunk
147
+ keywords.extend(words[:5])
148
+ if keywords:
149
+ # Deduplicate and limit
150
+ unique_keywords = list(dict.fromkeys(keywords))[:8]
151
+ keyword_str = " ".join(unique_keywords)
152
+ enriched = f"{enriched} {keyword_str}"
153
+ except Exception as exc:
154
+ logger.debug("RAG enrichment skipped: %s", exc)
155
+
156
+ # Append standard DepEd/Philippines math context
157
+ enriched = f"{enriched} DepEd Philippines mathematics tutorial"
158
+ return enriched[:300]
159
+
160
+
161
+ def _get_cache_key(topic: str, subject: str, grade_level: str) -> str:
162
+ """Generate a deterministic Firestore document ID for caching."""
163
+ raw = f"{subject}|{topic}|{grade_level}"
164
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
165
+
166
+
167
+ def get_cached_videos(lesson_id: str) -> Optional[List[Dict]]:
168
+ """Check Firestore video_cache/{lessonId} for cached results (TTL 7 days)."""
169
+ try:
170
+ import firebase_admin
171
+ from firebase_admin import firestore
172
+ if not firebase_admin._apps:
173
+ return None
174
 
175
+ db = firestore.client()
176
+ doc_ref = db.collection("video_cache").document(lesson_id)
177
+ doc = doc_ref.get()
178
+ if not doc.exists:
179
+ return None
180
 
181
+ data = doc.to_dict()
182
+ if not data:
183
+ return None
 
 
 
184
 
185
+ cached_at = data.get("cachedAt")
186
+ if cached_at:
187
+ # Firestore timestamps have a .timestamp() method or are datetime objects
188
+ if hasattr(cached_at, "timestamp"):
189
+ cached_epoch = cached_at.timestamp()
190
+ elif isinstance(cached_at, datetime):
191
+ cached_epoch = cached_at.timestamp()
192
+ else:
193
+ cached_epoch = float(cached_at)
194
+ now_epoch = datetime.now(timezone.utc).timestamp()
195
+ if (now_epoch - cached_epoch) > _CACHE_TTL_SECONDS:
196
+ logger.info("Video cache expired for lesson %s", lesson_id)
197
+ return None
198
+
199
+ videos = data.get("videos")
200
+ if isinstance(videos, list) and len(videos) > 0:
201
+ logger.info("Video cache hit for lesson %s (%d videos)", lesson_id, len(videos))
202
+ return videos
203
+ except Exception as exc:
204
+ logger.debug("Could not read video cache: %s", exc)
205
+ return None
206
+
207
+
208
+ def cache_videos(lesson_id: str, videos: List[Dict], topic: str) -> None:
209
+ """Store search results in Firestore video_cache/{lessonId}."""
210
  try:
211
+ import firebase_admin
212
+ from firebase_admin import firestore
213
+ if not firebase_admin._apps:
214
+ return
215
+
216
+ db = firestore.client()
217
+ db.collection("video_cache").document(lesson_id).set({
218
+ "videos": videos,
219
+ "cachedAt": firestore.SERVER_TIMESTAMP,
220
+ "topic": topic,
221
+ })
222
+ logger.info("Cached %d videos for lesson %s", len(videos), lesson_id)
223
+ except Exception as exc:
224
+ logger.warning("Could not cache videos in Firestore: %s", exc)
225
+
226
+
227
+ def search_youtube_videos(
228
+ topic: str,
229
+ subject: str = "",
230
+ lesson_context: str = "",
231
+ grade_level: str = "",
232
+ max_results: int = 3,
233
+ language: str = "en",
234
+ ) -> List[Dict]:
235
+ """
236
+ Search YouTube Data API v3 for relevant educational math videos.
237
+
238
+ Returns up to `max_results` videos after applying filters:
239
+ - Educational channels (post-filter by channel name)
240
+ - Medium/long duration (>= 3 minutes, <= 60 minutes)
241
+ - HD quality preferred (videoDefinition = high)
242
+ - English or Filipino language
243
+
244
+ Each result contains: videoId, title, channelTitle, thumbnailUrl, durationSeconds.
245
+ """
246
+ client = _build_youtube_client()
247
+ if client is None:
248
+ logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
249
+ return []
250
+
251
+ # Step 1: Enrich query with RAG curriculum context
252
+ enriched_query = _enrich_query_with_rag(topic, subject, lesson_context)
253
+ logger.info("YouTube search query (enriched): %s", enriched_query)
254
 
255
+ try:
256
+ # Step 2: Search for videos
257
+ search_response = client.search().list(
258
+ part="snippet",
259
+ q=enriched_query,
260
+ type="video",
261
+ maxResults=15, # Fetch more to allow post-filtering
262
+ relevanceLanguage=language,
263
+ videoDefinition="high",
264
+ videoDuration="medium", # 4-20 minutes
265
+ safeSearch="strict",
266
+ order="relevance",
267
+ ).execute()
268
+
269
+ items = search_response.get("items", [])
270
+ if not items:
271
+ logger.info("No YouTube results for query: %s", enriched_query)
272
+ return []
273
+
274
+ # Step 3: Get video details (duration, etc.)
275
+ video_ids = [item["id"]["videoId"] for item in items if item.get("id", {}).get("videoId")]
276
+ if not video_ids:
277
+ return []
278
+
279
+ details_response = client.videos().list(
280
+ part="contentDetails,statistics,snippet",
281
+ id=",".join(video_ids),
282
+ ).execute()
283
+
284
+ details_map = {}
285
+ for detail in details_response.get("items", []):
286
+ vid = detail.get("id")
287
+ if vid:
288
+ details_map[vid] = detail
289
+
290
+ # Step 4: Build results with filtering
291
+ results = []
292
+ for item in items:
293
  video_id = item.get("id", {}).get("videoId", "")
294
  if not video_id:
295
  continue
296
 
297
+ detail = details_map.get(video_id, {})
298
+ snippet = detail.get("snippet", item.get("snippet", {}))
299
+ content_details = detail.get("contentDetails", {})
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
+ title = snippet.get("title", "")
302
+ channel_title = snippet.get("channelTitle", "")
303
+ description = snippet.get("description", "")
304
+ duration = content_details.get("duration", "")
305
+ duration_secs = _parse_iso8601_duration(duration)
306
 
307
+ # Filter: duration
308
+ if duration_secs < _MIN_DURATION_SECONDS or duration_secs > _MAX_DURATION_SECONDS:
309
  continue
310
 
311
+ # Filter: educational channels
312
+ if not _is_educational_channel(channel_title):
313
+ # Still allow if title strongly suggests math tutorial
314
+ lowered_title = title.lower()
315
+ if not any(term in lowered_title for term in ["tutorial", "lesson", "math", "explain"]):
316
+ continue
317
 
318
+ thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
319
+ # Prefer hqdefault if available
320
+ thumbs = snippet.get("thumbnails", {})
321
+ if "high" in thumbs:
322
+ thumbnail_url = thumbs["high"]["url"]
323
+ elif "medium" in thumbs:
324
+ thumbnail_url = thumbs["medium"]["url"]
325
+
326
+ results.append({
327
  "videoId": video_id,
328
+ "title": title,
329
+ "channelTitle": channel_title,
 
330
  "thumbnailUrl": thumbnail_url,
331
  "durationSeconds": duration_secs,
332
+ "description": description[:200] if description else "",
333
  })
334
 
335
+ # Step 5: Score and sort, return top N
336
+ for r in results:
337
+ r["_score"] = _score_video_result(r, enriched_query)
338
 
339
+ results.sort(key=lambda x: x["_score"], reverse=True)
340
+ for r in results:
341
+ r.pop("_score", None)
 
342
 
343
+ top_results = results[:max_results]
344
+ logger.info("YouTube search returned %d results (top %d)", len(results), len(top_results))
345
+ return top_results
346
 
347
+ except Exception as exc:
348
+ logger.error("YouTube search failed: %s", exc)
349
+ return []
350
 
351
 
352
+ def get_video_search_results(
353
+ topic: str,
354
+ subject: str = "",
355
+ lesson_context: str = "",
356
+ grade_level: str = "",
357
+ lesson_id: Optional[str] = None,
358
+ max_results: int = 3,
359
+ ) -> Dict:
360
+ """
361
+ High-level wrapper: check cache first, then search YouTube, then cache results.
362
 
363
+ Returns {"videos": [...], "cached": bool}.
364
+ """
365
+ cache_key = lesson_id or _get_cache_key(topic, subject, grade_level)
366
+
367
+ # Check cache first
368
+ cached = get_cached_videos(cache_key)
369
+ if cached is not None:
370
+ return {"videos": cached, "cached": True}
371
+
372
+ # Search YouTube
373
+ videos = search_youtube_videos(
374
+ topic=topic,
375
+ subject=subject,
376
+ lesson_context=lesson_context,
377
+ grade_level=grade_level,
378
+ max_results=max_results,
379
+ )
380
 
381
+ if videos:
382
+ cache_videos(cache_key, videos, topic)
383
 
384
+ return {"videos": videos, "cached": False}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_video_routes.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for the video search endpoint and YouTube service.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ import sys
9
+ from unittest.mock import MagicMock, patch
10
+
11
+ import pytest
12
+ from fastapi.testclient import TestClient
13
+
14
+ # Add backend directory to path
15
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
16
+
17
+ # Mock Firebase auth BEFORE importing the app
18
+ from main import app as _app_import
19
+ import main as main_module
20
+
21
+ if getattr(main_module, "firebase_auth", None) is None:
22
+ main_module.firebase_auth = MagicMock()
23
+ main_module.firebase_auth.verify_id_token = MagicMock(
24
+ return_value={
25
+ "uid": "test-student-uid",
26
+ "email": "student@example.com",
27
+ "role": "student",
28
+ }
29
+ )
30
+
31
+ client = TestClient(_app_import, headers={"Authorization": "Bearer test-auth-token"})
32
+
33
+
34
+ # โ”€โ”€โ”€ Fixtures โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
+
36
+ @pytest.fixture
37
+ def mock_youtube_api_key(monkeypatch):
38
+ monkeypatch.setenv("YOUTUBE_API_KEY", "test_youtube_api_key")
39
+
40
+
41
+ @pytest.fixture
42
+ def no_youtube_api_key(monkeypatch):
43
+ monkeypatch.setenv("YOUTUBE_API_KEY", "")
44
+
45
+
46
+ # โ”€โ”€โ”€ YouTube Service Tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
47
+
48
+ def test_parse_iso8601_duration():
49
+ from services.youtube_service import _parse_iso8601_duration
50
+ assert _parse_iso8601_duration("PT5M30S") == 330
51
+ assert _parse_iso8601_duration("PT1H2M3S") == 3723
52
+ assert _parse_iso8601_duration("PT0S") == 0
53
+ assert _parse_iso8601_duration("") == 0
54
+
55
+
56
+ def test_is_educational_channel():
57
+ from services.youtube_service import _is_educational_channel
58
+ assert _is_educational_channel("Khan Academy") is True
59
+ assert _is_educational_channel("Math Antics") is True
60
+ assert _is_educational_channel("3Blue1Brown") is True
61
+ assert _is_educational_channel("Gaming Channel") is False
62
+ assert _is_educational_channel("Random Vlogs") is False
63
+
64
+
65
+ def test_enrich_query_with_rag_fallback(monkeypatch):
66
+ """When RAG is unavailable, enrichment falls back to topic + subject."""
67
+ from services.youtube_service import _enrich_query_with_rag
68
+ # Mock RAG to simulate unavailability โ€” patch where it's used, not where it's imported
69
+ with patch("rag.curriculum_rag.retrieve_curriculum_context", side_effect=Exception("RAG unavailable")):
70
+ result = _enrich_query_with_rag("quadratic equations", "General Mathematics")
71
+ assert "quadratic equations" in result
72
+ assert "General Mathematics" in result
73
+ assert "DepEd Philippines mathematics" in result
74
+
75
+
76
+ def test_get_cache_key():
77
+ from services.youtube_service import _get_cache_key
78
+ key1 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
79
+ key2 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
80
+ key3 = _get_cache_key("linear equations", "General Mathematics", "Grade 11")
81
+ assert key1 == key2
82
+ assert key1 != key3
83
+ assert len(key1) == 32
84
+
85
+
86
+ def test_cache_and_retrieve(mock_youtube_api_key, monkeypatch):
87
+ from services.youtube_service import cache_videos, get_cached_videos
88
+
89
+ lesson_id = "test-lesson-123"
90
+ videos = [
91
+ {"videoId": "abc123", "title": "Test Video", "channelTitle": "Test Channel",
92
+ "thumbnailUrl": "http://example.com/thumb.jpg", "durationSeconds": 300}
93
+ ]
94
+
95
+ # Mock Firebase at the module level where it's imported inside functions
96
+ mock_doc = MagicMock()
97
+ mock_doc.get.return_value.exists = False
98
+ mock_db = MagicMock()
99
+ mock_db.collection.return_value.document.return_value = mock_doc
100
+
101
+ with patch("firebase_admin.firestore.client", return_value=mock_db):
102
+ with patch("firebase_admin._apps", {"default": MagicMock()}):
103
+ # Store should call set
104
+ cache_videos(lesson_id, videos, "quadratic equations")
105
+ mock_doc.set.assert_called_once()
106
+
107
+ # Retrieve should return None since we mock doc.exists = False
108
+ result = get_cached_videos(lesson_id)
109
+ assert result is None
110
+
111
+
112
+ def test_search_youtube_videos_no_api_key(no_youtube_api_key):
113
+ from services.youtube_service import search_youtube_videos
114
+ result = search_youtube_videos("quadratic equations")
115
+ assert result == []
116
+
117
+
118
+ # โ”€โ”€โ”€ Route Tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
119
+
120
+ def test_video_search_endpoint_no_api_key(no_youtube_api_key):
121
+ """Should return 503 when YouTube API key is not configured."""
122
+ response = client.post("/api/lessons/videos/search", json={
123
+ "topic": "quadratic equations",
124
+ "subject": "General Mathematics",
125
+ "grade_level": "Grade 11",
126
+ })
127
+ assert response.status_code == 503
128
+ data = response.json()
129
+ assert data["detail"]["error"] == "youtube_api_not_configured"
130
+
131
+
132
+ def test_video_search_endpoint_success(mock_youtube_api_key):
133
+ """Should return video results when search succeeds."""
134
+ mock_videos = [
135
+ {"videoId": "vid1", "title": "Video 1", "channelTitle": "Channel 1",
136
+ "thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
137
+ {"videoId": "vid2", "title": "Video 2", "channelTitle": "Channel 2",
138
+ "thumbnailUrl": "http://example.com/2.jpg", "durationSeconds": 450},
139
+ ]
140
+
141
+ with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
142
+ with patch("routes.video_routes.get_video_search_results") as mock_search:
143
+ mock_search.return_value = {"videos": mock_videos, "cached": False}
144
+ response = client.post("/api/lessons/videos/search", json={
145
+ "topic": "quadratic equations",
146
+ "subject": "General Mathematics",
147
+ "grade_level": "Grade 11",
148
+ "lesson_id": "lesson-123",
149
+ })
150
+
151
+ assert response.status_code == 200
152
+ data = response.json()
153
+ assert len(data["videos"]) == 2
154
+ assert data["cached"] is False
155
+ assert data["videos"][0]["videoId"] == "vid1"
156
+
157
+
158
+ def test_video_search_endpoint_empty_results(mock_youtube_api_key):
159
+ """Should return empty list when no videos found."""
160
+ with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
161
+ with patch("routes.video_routes.get_video_search_results") as mock_search:
162
+ mock_search.return_value = {"videos": [], "cached": False}
163
+ response = client.post("/api/lessons/videos/search", json={
164
+ "topic": "very obscure topic xyz123",
165
+ "subject": "General Mathematics",
166
+ })
167
+
168
+ assert response.status_code == 200
169
+ data = response.json()
170
+ assert data["videos"] == []
171
+ assert data["cached"] is False
172
+
173
+
174
+ def test_video_search_endpoint_cached(mock_youtube_api_key):
175
+ """Should return cached results."""
176
+ mock_videos = [
177
+ {"videoId": "vid1", "title": "Cached Video", "channelTitle": "Channel 1",
178
+ "thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
179
+ ]
180
+
181
+ with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
182
+ with patch("routes.video_routes.get_video_search_results") as mock_search:
183
+ mock_search.return_value = {"videos": mock_videos, "cached": True}
184
+ response = client.post("/api/lessons/videos/search", json={
185
+ "topic": "linear equations",
186
+ "lesson_id": "lesson-456",
187
+ })
188
+
189
+ assert response.status_code == 200
190
+ data = response.json()
191
+ assert data["cached"] is True
192
+ assert len(data["videos"]) == 1
193
+
194
+
195
+ def test_video_search_endpoint_validation_error(mock_youtube_api_key):
196
+ """Should return 422 when topic is missing or too long."""
197
+ with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
198
+ response = client.post("/api/lessons/videos/search", json={
199
+ "topic": "",
200
+ "subject": "General Mathematics",
201
+ })
202
+ assert response.status_code == 422
203
+
204
+ with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
205
+ response = client.post("/api/lessons/videos/search", json={
206
+ "topic": "x" * 201,
207
+ "subject": "General Mathematics",
208
+ })
209
+ assert response.status_code == 422