Spaces:
Sleeping
Sleeping
github-actions[bot] commited on
Commit ยท
01d4b3f
1
Parent(s): f717a11
๐ Auto-deploy backend from GitHub (23fbade)
Browse files- main.py +3 -0
- requirements.txt +1 -0
- routes/rag_routes.py +34 -15
- routes/video_routes.py +102 -0
- services/youtube_service.py +332 -118
- tests/test_video_routes.py +209 -0
main.py
CHANGED
|
@@ -80,6 +80,7 @@ from services.user_provisioning_service import (
|
|
| 80 |
from routes.rag_routes import router as rag_router
|
| 81 |
from routes.admin_model_routes import router as admin_model_router
|
| 82 |
from routes.diagnostic import router as diagnostic_router
|
|
|
|
| 83 |
from rag.curriculum_rag import (
|
| 84 |
build_analysis_curriculum_context,
|
| 85 |
build_lesson_prompt,
|
|
@@ -363,6 +364,7 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
|
|
| 363 |
"/api/admin/model-config/profile": ADMIN_ONLY,
|
| 364 |
"/api/admin/model-config/override": ADMIN_ONLY,
|
| 365 |
"/api/admin/model-config/reset": ADMIN_ONLY,
|
|
|
|
| 366 |
}
|
| 367 |
|
| 368 |
if not os.getenv("DEEPSEEK_API_KEY"):
|
|
@@ -1013,6 +1015,7 @@ app.add_middleware(AuthMiddleware)
|
|
| 1013 |
app.include_router(rag_router)
|
| 1014 |
app.include_router(admin_model_router)
|
| 1015 |
app.include_router(diagnostic_router)
|
|
|
|
| 1016 |
|
| 1017 |
|
| 1018 |
# โโโ Global Exception Handler โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 80 |
from routes.rag_routes import router as rag_router
|
| 81 |
from routes.admin_model_routes import router as admin_model_router
|
| 82 |
from routes.diagnostic import router as diagnostic_router
|
| 83 |
+
from routes.video_routes import router as video_router
|
| 84 |
from rag.curriculum_rag import (
|
| 85 |
build_analysis_curriculum_context,
|
| 86 |
build_lesson_prompt,
|
|
|
|
| 364 |
"/api/admin/model-config/profile": ADMIN_ONLY,
|
| 365 |
"/api/admin/model-config/override": ADMIN_ONLY,
|
| 366 |
"/api/admin/model-config/reset": ADMIN_ONLY,
|
| 367 |
+
"/api/lessons/videos/search": ALL_APP_ROLES,
|
| 368 |
}
|
| 369 |
|
| 370 |
if not os.getenv("DEEPSEEK_API_KEY"):
|
|
|
|
| 1015 |
app.include_router(rag_router)
|
| 1016 |
app.include_router(admin_model_router)
|
| 1017 |
app.include_router(diagnostic_router)
|
| 1018 |
+
app.include_router(video_router)
|
| 1019 |
|
| 1020 |
|
| 1021 |
# โโโ Global Exception Handler โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
requirements.txt
CHANGED
|
@@ -22,3 +22,4 @@ redis[hiredis]>=5.0.0
|
|
| 22 |
PyYAML>=6.0.0
|
| 23 |
mypy>=1.20.0
|
| 24 |
pytest>=9.0.0
|
|
|
|
|
|
| 22 |
PyYAML>=6.0.0
|
| 23 |
mypy>=1.20.0
|
| 24 |
pytest>=9.0.0
|
| 25 |
+
google-api-python-client>=2.0.0
|
routes/rag_routes.py
CHANGED
|
@@ -173,17 +173,31 @@ async def rag_health():
|
|
| 173 |
}
|
| 174 |
|
| 175 |
|
| 176 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
try:
|
| 178 |
-
from
|
| 179 |
except ImportError:
|
| 180 |
-
return
|
| 181 |
try:
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
except Exception as e:
|
| 185 |
-
logger.warning("YouTube search failed: %s", e)
|
| 186 |
-
return
|
| 187 |
|
| 188 |
|
| 189 |
def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
|
|
@@ -332,23 +346,28 @@ async def rag_lesson(request: Request, payload: RagLessonRequest):
|
|
| 332 |
},
|
| 333 |
)
|
| 334 |
|
| 335 |
-
# โโ Step 5: Enrich with
|
| 336 |
if parsed_lesson.get("sections"):
|
| 337 |
video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
|
| 338 |
if video_section:
|
| 339 |
try:
|
| 340 |
-
|
| 341 |
payload.lessonTitle or payload.topic,
|
| 342 |
payload.subject,
|
| 343 |
payload.learningCompetency or "",
|
| 344 |
payload.quarter,
|
|
|
|
| 345 |
)
|
| 346 |
-
if
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
video_section["
|
| 350 |
-
video_section["
|
| 351 |
-
video_section["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
except Exception as exc:
|
| 353 |
logger.warning("YouTube enrichment skipped: %s", exc)
|
| 354 |
|
|
|
|
| 173 |
}
|
| 174 |
|
| 175 |
|
| 176 |
+
def _fetch_youtube_videos(
|
| 177 |
+
lesson_title: str,
|
| 178 |
+
subject: str,
|
| 179 |
+
competency: str,
|
| 180 |
+
quarter: int,
|
| 181 |
+
lesson_id: Optional[str] = None,
|
| 182 |
+
) -> List[Dict]:
|
| 183 |
+
"""Fetch up to 3 relevant YouTube videos for a lesson."""
|
| 184 |
try:
|
| 185 |
+
from services.youtube_service import get_video_search_results
|
| 186 |
except ImportError:
|
| 187 |
+
return []
|
| 188 |
try:
|
| 189 |
+
result = get_video_search_results(
|
| 190 |
+
topic=lesson_title,
|
| 191 |
+
subject=subject,
|
| 192 |
+
lesson_context=competency,
|
| 193 |
+
grade_level=f"Grade {quarter + 10}",
|
| 194 |
+
lesson_id=lesson_id,
|
| 195 |
+
max_results=3,
|
| 196 |
+
)
|
| 197 |
+
return result.get("videos", [])
|
| 198 |
except Exception as e:
|
| 199 |
+
logger.warning("YouTube video search failed: %s", e)
|
| 200 |
+
return []
|
| 201 |
|
| 202 |
|
| 203 |
def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
|
|
|
|
| 346 |
},
|
| 347 |
)
|
| 348 |
|
| 349 |
+
# โโ Step 5: Enrich with videos โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 350 |
if parsed_lesson.get("sections"):
|
| 351 |
video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
|
| 352 |
if video_section:
|
| 353 |
try:
|
| 354 |
+
videos = _fetch_youtube_videos(
|
| 355 |
payload.lessonTitle or payload.topic,
|
| 356 |
payload.subject,
|
| 357 |
payload.learningCompetency or "",
|
| 358 |
payload.quarter,
|
| 359 |
+
lesson_id=payload.lessonId,
|
| 360 |
)
|
| 361 |
+
if videos:
|
| 362 |
+
# Primary video for backwards compatibility
|
| 363 |
+
primary = videos[0]
|
| 364 |
+
video_section["videoId"] = primary.get("videoId", "")
|
| 365 |
+
video_section["videoTitle"] = primary.get("title", "")
|
| 366 |
+
video_section["videoChannel"] = primary.get("channelTitle", "")
|
| 367 |
+
video_section["embedUrl"] = f"https://www.youtube.com/embed/{primary.get('videoId', '')}"
|
| 368 |
+
video_section["thumbnailUrl"] = primary.get("thumbnailUrl", "")
|
| 369 |
+
# New: full videos array for Smart Video Integration
|
| 370 |
+
video_section["videos"] = videos
|
| 371 |
except Exception as exc:
|
| 372 |
logger.warning("YouTube enrichment skipped: %s", exc)
|
| 373 |
|
routes/video_routes.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Video lesson search routes for MathPulse AI.
|
| 3 |
+
POST /api/lessons/videos/search โ smart YouTube video search with RAG enrichment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
from typing import List, Optional
|
| 10 |
+
|
| 11 |
+
from fastapi import APIRouter, HTTPException, Request
|
| 12 |
+
from pydantic import BaseModel, Field
|
| 13 |
+
|
| 14 |
+
from services.youtube_service import (
|
| 15 |
+
get_video_search_results,
|
| 16 |
+
YOUTUBE_API_KEY,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger("mathpulse.videos")
|
| 20 |
+
router = APIRouter(prefix="/api/lessons/videos", tags=["videos"])
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class VideoSearchRequest(BaseModel):
|
| 24 |
+
topic: str = Field(..., min_length=1, max_length=200)
|
| 25 |
+
grade_level: str = Field(default="Grade 11", max_length=50)
|
| 26 |
+
subject: str = Field(default="General Mathematics", max_length=100)
|
| 27 |
+
lesson_context: str = Field(default="", max_length=1000)
|
| 28 |
+
lesson_id: Optional[str] = Field(default=None, max_length=100)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class VideoResult(BaseModel):
|
| 32 |
+
videoId: str
|
| 33 |
+
title: str
|
| 34 |
+
channelTitle: str
|
| 35 |
+
thumbnailUrl: str
|
| 36 |
+
durationSeconds: int
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class VideoSearchResponse(BaseModel):
|
| 40 |
+
videos: List[VideoResult]
|
| 41 |
+
cached: bool = False
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@router.post("/search", response_model=VideoSearchResponse)
|
| 45 |
+
async def search_videos(request: Request, payload: VideoSearchRequest):
|
| 46 |
+
"""
|
| 47 |
+
Search for relevant educational YouTube videos for a lesson topic.
|
| 48 |
+
|
| 49 |
+
- Checks Firestore video_cache first (7-day TTL)
|
| 50 |
+
- Enriches the search query with RAG curriculum keywords
|
| 51 |
+
- Filters for educational channels, medium/long duration, HD quality
|
| 52 |
+
- Returns up to 3 video results
|
| 53 |
+
"""
|
| 54 |
+
# Graceful degradation: if YouTube API key is not configured, return 503
|
| 55 |
+
# so the frontend can hide the video section silently
|
| 56 |
+
if not YOUTUBE_API_KEY:
|
| 57 |
+
logger.warning("YouTube API key not configured")
|
| 58 |
+
raise HTTPException(
|
| 59 |
+
status_code=503,
|
| 60 |
+
detail={
|
| 61 |
+
"error": "youtube_api_not_configured",
|
| 62 |
+
"message": "YouTube API key is not configured on the server.",
|
| 63 |
+
},
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
result = get_video_search_results(
|
| 68 |
+
topic=payload.topic,
|
| 69 |
+
subject=payload.subject,
|
| 70 |
+
lesson_context=payload.lesson_context,
|
| 71 |
+
grade_level=payload.grade_level,
|
| 72 |
+
lesson_id=payload.lesson_id,
|
| 73 |
+
max_results=3,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
videos = [
|
| 77 |
+
VideoResult(
|
| 78 |
+
videoId=v["videoId"],
|
| 79 |
+
title=v["title"],
|
| 80 |
+
channelTitle=v["channelTitle"],
|
| 81 |
+
thumbnailUrl=v["thumbnailUrl"],
|
| 82 |
+
durationSeconds=v["durationSeconds"],
|
| 83 |
+
)
|
| 84 |
+
for v in result.get("videos", [])
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
return VideoSearchResponse(
|
| 88 |
+
videos=videos,
|
| 89 |
+
cached=result.get("cached", False),
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
except HTTPException:
|
| 93 |
+
raise
|
| 94 |
+
except Exception as exc:
|
| 95 |
+
logger.error("Video search endpoint error: %s", exc)
|
| 96 |
+
raise HTTPException(
|
| 97 |
+
status_code=500,
|
| 98 |
+
detail={
|
| 99 |
+
"error": "video_search_failed",
|
| 100 |
+
"message": f"Failed to search videos: {exc}",
|
| 101 |
+
},
|
| 102 |
+
)
|
services/youtube_service.py
CHANGED
|
@@ -1,170 +1,384 @@
|
|
| 1 |
"""
|
| 2 |
-
YouTube
|
| 3 |
-
Uses YouTube Data API v3 to find relevant
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
|
| 8 |
-
import
|
| 9 |
import logging
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger("mathpulse.youtube")
|
| 13 |
|
| 14 |
-
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def _parse_iso8601_duration(duration: str) -> int:
|
| 18 |
"""Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
|
| 19 |
-
import re
|
| 20 |
if not duration:
|
| 21 |
return 0
|
| 22 |
-
hours_match = re.search(r
|
| 23 |
-
minutes_match = re.search(r
|
| 24 |
-
seconds_match = re.search(r
|
| 25 |
hours = int(hours_match.group(1)) if hours_match else 0
|
| 26 |
minutes = int(minutes_match.group(1)) if minutes_match else 0
|
| 27 |
seconds = int(seconds_match.group(1)) if seconds_match else 0
|
| 28 |
return hours * 3600 + minutes * 60 + seconds
|
| 29 |
|
| 30 |
|
| 31 |
-
def
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
Search YouTube Data API v3 for relevant educational videos.
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
"""
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
f"&maxResults={max_results}&relevanceLanguage={language}"
|
| 63 |
-
f"&key={YOUTUBE_API_KEY}"
|
| 64 |
-
)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
try:
|
| 67 |
-
import
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
video_id = item.get("id", {}).get("videoId", "")
|
| 74 |
if not video_id:
|
| 75 |
continue
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
video_details_url = (
|
| 82 |
-
f"https://www.googleapis.com/youtube/v3/videos"
|
| 83 |
-
f"?part=contentDetails,statistics&id={video_id}&key={YOUTUBE_API_KEY}"
|
| 84 |
-
)
|
| 85 |
-
|
| 86 |
-
try:
|
| 87 |
-
with urllib.request.urlopen(video_details_url, timeout=10) as vd_response:
|
| 88 |
-
vd_data = json.loads(vd_response.read().decode())
|
| 89 |
-
vd_item = vd_data.get("items", [{}])[0]
|
| 90 |
-
content_details = vd_item.get("contentDetails", {})
|
| 91 |
-
duration = content_details.get("duration", "")
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
| 96 |
|
| 97 |
-
|
|
|
|
| 98 |
continue
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
"videoId": video_id,
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"embedUrl": embed_url,
|
| 108 |
"thumbnailUrl": thumbnail_url,
|
| 109 |
"durationSeconds": duration_secs,
|
| 110 |
-
"description": description[:200],
|
| 111 |
})
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
return vr
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
except Exception as
|
| 124 |
-
logger.error("YouTube search failed: %s",
|
| 125 |
-
return
|
| 126 |
|
| 127 |
|
| 128 |
-
def
|
| 129 |
-
|
| 130 |
-
subject: str,
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
-
|
| 140 |
-
"""
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
| 154 |
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
-
"""Retrieve cached video from Firestore."""
|
| 158 |
-
try:
|
| 159 |
-
import firebase_admin
|
| 160 |
-
from firebase_admin import firestore
|
| 161 |
-
if not firebase_admin._apps:
|
| 162 |
-
return None
|
| 163 |
-
db = firestore.client()
|
| 164 |
-
doc = db.collection("curriculumDocuments").document(lesson_id)
|
| 165 |
-
video_doc = doc.collection("videoEmbed").document("primary").get()
|
| 166 |
-
if video_doc.exists:
|
| 167 |
-
return video_doc.to_dict()
|
| 168 |
-
except Exception:
|
| 169 |
-
pass
|
| 170 |
-
return None
|
|
|
|
| 1 |
"""
|
| 2 |
+
Smart YouTube Video Search Service for MathPulse AI.
|
| 3 |
+
Uses YouTube Data API v3 (googleapiclient.discovery) to find relevant
|
| 4 |
+
educational math videos, enriched with RAG curriculum context.
|
| 5 |
+
Results are cached in Firestore video_cache/{lessonId} with 7-day TTL.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
| 9 |
|
| 10 |
+
import hashlib
|
| 11 |
import logging
|
| 12 |
+
import os
|
| 13 |
+
import re
|
| 14 |
+
from datetime import datetime, timezone
|
| 15 |
+
from typing import Dict, List, Optional
|
| 16 |
|
| 17 |
logger = logging.getLogger("mathpulse.youtube")
|
| 18 |
|
| 19 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "").strip()
|
| 20 |
+
|
| 21 |
+
# Known educational channel keywords and exact names for post-filtering
|
| 22 |
+
_EDUCATIONAL_CHANNEL_KEYWORDS = [
|
| 23 |
+
"khan", "math", "academy", "education", "teacher", "professor",
|
| 24 |
+
"tutorial", "lesson", "school", "university", "college", "deped",
|
| 25 |
+
"philippines", "filipino", "pinoy", "stem", "learning", "study",
|
| 26 |
+
"organic chemistry tutor", "patrickjmt", "3blue1brown", "numberphile",
|
| 27 |
+
"math antics", "bright side", "crashcourse", "ted-ed", "ted ed",
|
| 28 |
+
"nancy pi", "professor leonard", "mit", "stanford", "harvard",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
_EDUCATIONAL_CHANNEL_EXACT = {
|
| 32 |
+
"khan academy", "patrickjmt", "3blue1brown", "numberphile",
|
| 33 |
+
"math antics", "the organic chemistry tutor", "professor leonard",
|
| 34 |
+
"nancy pi", "ted-ed", "crashcourse", "bright side",
|
| 35 |
+
"mit opencourseware", "stanford", "harvard",
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
# Minimum duration in seconds to filter out shorts (3 minutes)
|
| 39 |
+
_MIN_DURATION_SECONDS = 180
|
| 40 |
+
# Maximum duration in seconds to avoid extremely long videos (60 minutes)
|
| 41 |
+
_MAX_DURATION_SECONDS = 3600
|
| 42 |
+
# Cache TTL in seconds (7 days)
|
| 43 |
+
_CACHE_TTL_SECONDS = 7 * 24 * 60 * 60
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _build_youtube_client():
|
| 47 |
+
"""Lazy-init googleapiclient YouTube client. Returns None if no API key."""
|
| 48 |
+
if not YOUTUBE_API_KEY:
|
| 49 |
+
return None
|
| 50 |
+
try:
|
| 51 |
+
from googleapiclient.discovery import build
|
| 52 |
+
return build("youtube", "v3", developerKey=YOUTUBE_API_KEY, cache_discovery=False)
|
| 53 |
+
except Exception as exc:
|
| 54 |
+
logger.warning("Failed to build YouTube client: %s", exc)
|
| 55 |
+
return None
|
| 56 |
|
| 57 |
|
| 58 |
def _parse_iso8601_duration(duration: str) -> int:
|
| 59 |
"""Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
|
|
|
|
| 60 |
if not duration:
|
| 61 |
return 0
|
| 62 |
+
hours_match = re.search(r"(\d+)H", duration)
|
| 63 |
+
minutes_match = re.search(r"(\d+)M", duration)
|
| 64 |
+
seconds_match = re.search(r"(\d+)S", duration)
|
| 65 |
hours = int(hours_match.group(1)) if hours_match else 0
|
| 66 |
minutes = int(minutes_match.group(1)) if minutes_match else 0
|
| 67 |
seconds = int(seconds_match.group(1)) if seconds_match else 0
|
| 68 |
return hours * 3600 + minutes * 60 + seconds
|
| 69 |
|
| 70 |
|
| 71 |
+
def _is_educational_channel(channel_title: str) -> bool:
|
| 72 |
+
"""Check if a channel appears to be educational."""
|
| 73 |
+
lowered = channel_title.lower().strip()
|
| 74 |
+
if lowered in _EDUCATIONAL_CHANNEL_EXACT:
|
| 75 |
+
return True
|
| 76 |
+
return any(kw in lowered for kw in _EDUCATIONAL_CHANNEL_KEYWORDS)
|
| 77 |
+
|
|
|
|
| 78 |
|
| 79 |
+
def _score_video_result(item: dict, query: str) -> float:
|
| 80 |
+
"""Score a video result for relevance. Higher is better."""
|
| 81 |
+
score = 0.0
|
| 82 |
+
title = (item.get("title") or "").lower()
|
| 83 |
+
description = (item.get("description") or "").lower()
|
| 84 |
+
channel = (item.get("channelTitle") or "").lower()
|
| 85 |
+
query_lower = query.lower()
|
| 86 |
|
| 87 |
+
# Title contains key math/education terms
|
| 88 |
+
math_terms = ["tutorial", "lesson", "explain", "math", "mathematics",
|
| 89 |
+
"solution", "problem", "example", "learn", "how to"]
|
| 90 |
+
for term in math_terms:
|
| 91 |
+
if term in title:
|
| 92 |
+
score += 2.0
|
| 93 |
+
|
| 94 |
+
# Query terms appear in title
|
| 95 |
+
for word in query_lower.split():
|
| 96 |
+
if len(word) > 2 and word in title:
|
| 97 |
+
score += 1.5
|
| 98 |
+
|
| 99 |
+
# Educational channel bonus
|
| 100 |
+
if _is_educational_channel(channel):
|
| 101 |
+
score += 5.0
|
| 102 |
+
|
| 103 |
+
# Description relevance
|
| 104 |
+
for word in query_lower.split():
|
| 105 |
+
if len(word) > 2 and word in description:
|
| 106 |
+
score += 0.5
|
| 107 |
+
|
| 108 |
+
# Duration sweet spot: 5-20 minutes
|
| 109 |
+
duration = item.get("durationSeconds", 0)
|
| 110 |
+
if 300 <= duration <= 1200:
|
| 111 |
+
score += 2.0
|
| 112 |
+
elif duration >= _MIN_DURATION_SECONDS:
|
| 113 |
+
score += 1.0
|
| 114 |
+
|
| 115 |
+
return score
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _enrich_query_with_rag(topic: str, subject: str, lesson_context: str = "") -> str:
|
| 119 |
"""
|
| 120 |
+
Query the RAG vectorstore to extract curriculum keywords and enrich
|
| 121 |
+
the YouTube search query for higher relevance.
|
| 122 |
+
"""
|
| 123 |
+
enriched = topic
|
| 124 |
+
if subject:
|
| 125 |
+
enriched = f"{enriched} {subject}"
|
| 126 |
+
if lesson_context:
|
| 127 |
+
enriched = f"{enriched} {lesson_context}"
|
| 128 |
|
| 129 |
+
try:
|
| 130 |
+
from rag.curriculum_rag import retrieve_curriculum_context
|
| 131 |
+
chunks = retrieve_curriculum_context(
|
| 132 |
+
query=topic,
|
| 133 |
+
subject=subject if subject else None,
|
| 134 |
+
top_k=5,
|
| 135 |
+
)
|
| 136 |
+
if chunks:
|
| 137 |
+
# Extract key terms from top chunk contents
|
| 138 |
+
keywords: List[str] = []
|
| 139 |
+
for chunk in chunks[:3]:
|
| 140 |
+
content = str(chunk.get("content", "")).strip()
|
| 141 |
+
# Extract meaningful words (skip math symbols, numbers, stop words)
|
| 142 |
+
if content:
|
| 143 |
+
# Clean content: remove special chars, keep only alphabetic words
|
| 144 |
+
cleaned = re.sub(r'[^\w\s]', ' ', content)
|
| 145 |
+
words = [w for w in cleaned.split() if len(w) > 3 and w.isalpha()]
|
| 146 |
+
# Take up to 5 key words per chunk
|
| 147 |
+
keywords.extend(words[:5])
|
| 148 |
+
if keywords:
|
| 149 |
+
# Deduplicate and limit
|
| 150 |
+
unique_keywords = list(dict.fromkeys(keywords))[:8]
|
| 151 |
+
keyword_str = " ".join(unique_keywords)
|
| 152 |
+
enriched = f"{enriched} {keyword_str}"
|
| 153 |
+
except Exception as exc:
|
| 154 |
+
logger.debug("RAG enrichment skipped: %s", exc)
|
| 155 |
+
|
| 156 |
+
# Append standard DepEd/Philippines math context
|
| 157 |
+
enriched = f"{enriched} DepEd Philippines mathematics tutorial"
|
| 158 |
+
return enriched[:300]
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _get_cache_key(topic: str, subject: str, grade_level: str) -> str:
|
| 162 |
+
"""Generate a deterministic Firestore document ID for caching."""
|
| 163 |
+
raw = f"{subject}|{topic}|{grade_level}"
|
| 164 |
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def get_cached_videos(lesson_id: str) -> Optional[List[Dict]]:
|
| 168 |
+
"""Check Firestore video_cache/{lessonId} for cached results (TTL 7 days)."""
|
| 169 |
+
try:
|
| 170 |
+
import firebase_admin
|
| 171 |
+
from firebase_admin import firestore
|
| 172 |
+
if not firebase_admin._apps:
|
| 173 |
+
return None
|
| 174 |
|
| 175 |
+
db = firestore.client()
|
| 176 |
+
doc_ref = db.collection("video_cache").document(lesson_id)
|
| 177 |
+
doc = doc_ref.get()
|
| 178 |
+
if not doc.exists:
|
| 179 |
+
return None
|
| 180 |
|
| 181 |
+
data = doc.to_dict()
|
| 182 |
+
if not data:
|
| 183 |
+
return None
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
cached_at = data.get("cachedAt")
|
| 186 |
+
if cached_at:
|
| 187 |
+
# Firestore timestamps have a .timestamp() method or are datetime objects
|
| 188 |
+
if hasattr(cached_at, "timestamp"):
|
| 189 |
+
cached_epoch = cached_at.timestamp()
|
| 190 |
+
elif isinstance(cached_at, datetime):
|
| 191 |
+
cached_epoch = cached_at.timestamp()
|
| 192 |
+
else:
|
| 193 |
+
cached_epoch = float(cached_at)
|
| 194 |
+
now_epoch = datetime.now(timezone.utc).timestamp()
|
| 195 |
+
if (now_epoch - cached_epoch) > _CACHE_TTL_SECONDS:
|
| 196 |
+
logger.info("Video cache expired for lesson %s", lesson_id)
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
videos = data.get("videos")
|
| 200 |
+
if isinstance(videos, list) and len(videos) > 0:
|
| 201 |
+
logger.info("Video cache hit for lesson %s (%d videos)", lesson_id, len(videos))
|
| 202 |
+
return videos
|
| 203 |
+
except Exception as exc:
|
| 204 |
+
logger.debug("Could not read video cache: %s", exc)
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def cache_videos(lesson_id: str, videos: List[Dict], topic: str) -> None:
|
| 209 |
+
"""Store search results in Firestore video_cache/{lessonId}."""
|
| 210 |
try:
|
| 211 |
+
import firebase_admin
|
| 212 |
+
from firebase_admin import firestore
|
| 213 |
+
if not firebase_admin._apps:
|
| 214 |
+
return
|
| 215 |
+
|
| 216 |
+
db = firestore.client()
|
| 217 |
+
db.collection("video_cache").document(lesson_id).set({
|
| 218 |
+
"videos": videos,
|
| 219 |
+
"cachedAt": firestore.SERVER_TIMESTAMP,
|
| 220 |
+
"topic": topic,
|
| 221 |
+
})
|
| 222 |
+
logger.info("Cached %d videos for lesson %s", len(videos), lesson_id)
|
| 223 |
+
except Exception as exc:
|
| 224 |
+
logger.warning("Could not cache videos in Firestore: %s", exc)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def search_youtube_videos(
|
| 228 |
+
topic: str,
|
| 229 |
+
subject: str = "",
|
| 230 |
+
lesson_context: str = "",
|
| 231 |
+
grade_level: str = "",
|
| 232 |
+
max_results: int = 3,
|
| 233 |
+
language: str = "en",
|
| 234 |
+
) -> List[Dict]:
|
| 235 |
+
"""
|
| 236 |
+
Search YouTube Data API v3 for relevant educational math videos.
|
| 237 |
+
|
| 238 |
+
Returns up to `max_results` videos after applying filters:
|
| 239 |
+
- Educational channels (post-filter by channel name)
|
| 240 |
+
- Medium/long duration (>= 3 minutes, <= 60 minutes)
|
| 241 |
+
- HD quality preferred (videoDefinition = high)
|
| 242 |
+
- English or Filipino language
|
| 243 |
+
|
| 244 |
+
Each result contains: videoId, title, channelTitle, thumbnailUrl, durationSeconds.
|
| 245 |
+
"""
|
| 246 |
+
client = _build_youtube_client()
|
| 247 |
+
if client is None:
|
| 248 |
+
logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
|
| 249 |
+
return []
|
| 250 |
+
|
| 251 |
+
# Step 1: Enrich query with RAG curriculum context
|
| 252 |
+
enriched_query = _enrich_query_with_rag(topic, subject, lesson_context)
|
| 253 |
+
logger.info("YouTube search query (enriched): %s", enriched_query)
|
| 254 |
|
| 255 |
+
try:
|
| 256 |
+
# Step 2: Search for videos
|
| 257 |
+
search_response = client.search().list(
|
| 258 |
+
part="snippet",
|
| 259 |
+
q=enriched_query,
|
| 260 |
+
type="video",
|
| 261 |
+
maxResults=15, # Fetch more to allow post-filtering
|
| 262 |
+
relevanceLanguage=language,
|
| 263 |
+
videoDefinition="high",
|
| 264 |
+
videoDuration="medium", # 4-20 minutes
|
| 265 |
+
safeSearch="strict",
|
| 266 |
+
order="relevance",
|
| 267 |
+
).execute()
|
| 268 |
+
|
| 269 |
+
items = search_response.get("items", [])
|
| 270 |
+
if not items:
|
| 271 |
+
logger.info("No YouTube results for query: %s", enriched_query)
|
| 272 |
+
return []
|
| 273 |
+
|
| 274 |
+
# Step 3: Get video details (duration, etc.)
|
| 275 |
+
video_ids = [item["id"]["videoId"] for item in items if item.get("id", {}).get("videoId")]
|
| 276 |
+
if not video_ids:
|
| 277 |
+
return []
|
| 278 |
+
|
| 279 |
+
details_response = client.videos().list(
|
| 280 |
+
part="contentDetails,statistics,snippet",
|
| 281 |
+
id=",".join(video_ids),
|
| 282 |
+
).execute()
|
| 283 |
+
|
| 284 |
+
details_map = {}
|
| 285 |
+
for detail in details_response.get("items", []):
|
| 286 |
+
vid = detail.get("id")
|
| 287 |
+
if vid:
|
| 288 |
+
details_map[vid] = detail
|
| 289 |
+
|
| 290 |
+
# Step 4: Build results with filtering
|
| 291 |
+
results = []
|
| 292 |
+
for item in items:
|
| 293 |
video_id = item.get("id", {}).get("videoId", "")
|
| 294 |
if not video_id:
|
| 295 |
continue
|
| 296 |
|
| 297 |
+
detail = details_map.get(video_id, {})
|
| 298 |
+
snippet = detail.get("snippet", item.get("snippet", {}))
|
| 299 |
+
content_details = detail.get("contentDetails", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
+
title = snippet.get("title", "")
|
| 302 |
+
channel_title = snippet.get("channelTitle", "")
|
| 303 |
+
description = snippet.get("description", "")
|
| 304 |
+
duration = content_details.get("duration", "")
|
| 305 |
+
duration_secs = _parse_iso8601_duration(duration)
|
| 306 |
|
| 307 |
+
# Filter: duration
|
| 308 |
+
if duration_secs < _MIN_DURATION_SECONDS or duration_secs > _MAX_DURATION_SECONDS:
|
| 309 |
continue
|
| 310 |
|
| 311 |
+
# Filter: educational channels
|
| 312 |
+
if not _is_educational_channel(channel_title):
|
| 313 |
+
# Still allow if title strongly suggests math tutorial
|
| 314 |
+
lowered_title = title.lower()
|
| 315 |
+
if not any(term in lowered_title for term in ["tutorial", "lesson", "math", "explain"]):
|
| 316 |
+
continue
|
| 317 |
|
| 318 |
+
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
|
| 319 |
+
# Prefer hqdefault if available
|
| 320 |
+
thumbs = snippet.get("thumbnails", {})
|
| 321 |
+
if "high" in thumbs:
|
| 322 |
+
thumbnail_url = thumbs["high"]["url"]
|
| 323 |
+
elif "medium" in thumbs:
|
| 324 |
+
thumbnail_url = thumbs["medium"]["url"]
|
| 325 |
+
|
| 326 |
+
results.append({
|
| 327 |
"videoId": video_id,
|
| 328 |
+
"title": title,
|
| 329 |
+
"channelTitle": channel_title,
|
|
|
|
| 330 |
"thumbnailUrl": thumbnail_url,
|
| 331 |
"durationSeconds": duration_secs,
|
| 332 |
+
"description": description[:200] if description else "",
|
| 333 |
})
|
| 334 |
|
| 335 |
+
# Step 5: Score and sort, return top N
|
| 336 |
+
for r in results:
|
| 337 |
+
r["_score"] = _score_video_result(r, enriched_query)
|
| 338 |
|
| 339 |
+
results.sort(key=lambda x: x["_score"], reverse=True)
|
| 340 |
+
for r in results:
|
| 341 |
+
r.pop("_score", None)
|
|
|
|
| 342 |
|
| 343 |
+
top_results = results[:max_results]
|
| 344 |
+
logger.info("YouTube search returned %d results (top %d)", len(results), len(top_results))
|
| 345 |
+
return top_results
|
| 346 |
|
| 347 |
+
except Exception as exc:
|
| 348 |
+
logger.error("YouTube search failed: %s", exc)
|
| 349 |
+
return []
|
| 350 |
|
| 351 |
|
| 352 |
+
def get_video_search_results(
|
| 353 |
+
topic: str,
|
| 354 |
+
subject: str = "",
|
| 355 |
+
lesson_context: str = "",
|
| 356 |
+
grade_level: str = "",
|
| 357 |
+
lesson_id: Optional[str] = None,
|
| 358 |
+
max_results: int = 3,
|
| 359 |
+
) -> Dict:
|
| 360 |
+
"""
|
| 361 |
+
High-level wrapper: check cache first, then search YouTube, then cache results.
|
| 362 |
|
| 363 |
+
Returns {"videos": [...], "cached": bool}.
|
| 364 |
+
"""
|
| 365 |
+
cache_key = lesson_id or _get_cache_key(topic, subject, grade_level)
|
| 366 |
+
|
| 367 |
+
# Check cache first
|
| 368 |
+
cached = get_cached_videos(cache_key)
|
| 369 |
+
if cached is not None:
|
| 370 |
+
return {"videos": cached, "cached": True}
|
| 371 |
+
|
| 372 |
+
# Search YouTube
|
| 373 |
+
videos = search_youtube_videos(
|
| 374 |
+
topic=topic,
|
| 375 |
+
subject=subject,
|
| 376 |
+
lesson_context=lesson_context,
|
| 377 |
+
grade_level=grade_level,
|
| 378 |
+
max_results=max_results,
|
| 379 |
+
)
|
| 380 |
|
| 381 |
+
if videos:
|
| 382 |
+
cache_videos(cache_key, videos, topic)
|
| 383 |
|
| 384 |
+
return {"videos": videos, "cached": False}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_video_routes.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests for the video search endpoint and YouTube service.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from unittest.mock import MagicMock, patch
|
| 10 |
+
|
| 11 |
+
import pytest
|
| 12 |
+
from fastapi.testclient import TestClient
|
| 13 |
+
|
| 14 |
+
# Add backend directory to path
|
| 15 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
| 16 |
+
|
| 17 |
+
# Mock Firebase auth BEFORE importing the app
|
| 18 |
+
from main import app as _app_import
|
| 19 |
+
import main as main_module
|
| 20 |
+
|
| 21 |
+
if getattr(main_module, "firebase_auth", None) is None:
|
| 22 |
+
main_module.firebase_auth = MagicMock()
|
| 23 |
+
main_module.firebase_auth.verify_id_token = MagicMock(
|
| 24 |
+
return_value={
|
| 25 |
+
"uid": "test-student-uid",
|
| 26 |
+
"email": "student@example.com",
|
| 27 |
+
"role": "student",
|
| 28 |
+
}
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
client = TestClient(_app_import, headers={"Authorization": "Bearer test-auth-token"})
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# โโโ Fixtures โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 35 |
+
|
| 36 |
+
@pytest.fixture
|
| 37 |
+
def mock_youtube_api_key(monkeypatch):
|
| 38 |
+
monkeypatch.setenv("YOUTUBE_API_KEY", "test_youtube_api_key")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@pytest.fixture
|
| 42 |
+
def no_youtube_api_key(monkeypatch):
|
| 43 |
+
monkeypatch.setenv("YOUTUBE_API_KEY", "")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# โโโ YouTube Service Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 47 |
+
|
| 48 |
+
def test_parse_iso8601_duration():
|
| 49 |
+
from services.youtube_service import _parse_iso8601_duration
|
| 50 |
+
assert _parse_iso8601_duration("PT5M30S") == 330
|
| 51 |
+
assert _parse_iso8601_duration("PT1H2M3S") == 3723
|
| 52 |
+
assert _parse_iso8601_duration("PT0S") == 0
|
| 53 |
+
assert _parse_iso8601_duration("") == 0
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def test_is_educational_channel():
|
| 57 |
+
from services.youtube_service import _is_educational_channel
|
| 58 |
+
assert _is_educational_channel("Khan Academy") is True
|
| 59 |
+
assert _is_educational_channel("Math Antics") is True
|
| 60 |
+
assert _is_educational_channel("3Blue1Brown") is True
|
| 61 |
+
assert _is_educational_channel("Gaming Channel") is False
|
| 62 |
+
assert _is_educational_channel("Random Vlogs") is False
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_enrich_query_with_rag_fallback(monkeypatch):
|
| 66 |
+
"""When RAG is unavailable, enrichment falls back to topic + subject."""
|
| 67 |
+
from services.youtube_service import _enrich_query_with_rag
|
| 68 |
+
# Mock RAG to simulate unavailability โ patch where it's used, not where it's imported
|
| 69 |
+
with patch("rag.curriculum_rag.retrieve_curriculum_context", side_effect=Exception("RAG unavailable")):
|
| 70 |
+
result = _enrich_query_with_rag("quadratic equations", "General Mathematics")
|
| 71 |
+
assert "quadratic equations" in result
|
| 72 |
+
assert "General Mathematics" in result
|
| 73 |
+
assert "DepEd Philippines mathematics" in result
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def test_get_cache_key():
|
| 77 |
+
from services.youtube_service import _get_cache_key
|
| 78 |
+
key1 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
|
| 79 |
+
key2 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
|
| 80 |
+
key3 = _get_cache_key("linear equations", "General Mathematics", "Grade 11")
|
| 81 |
+
assert key1 == key2
|
| 82 |
+
assert key1 != key3
|
| 83 |
+
assert len(key1) == 32
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def test_cache_and_retrieve(mock_youtube_api_key, monkeypatch):
|
| 87 |
+
from services.youtube_service import cache_videos, get_cached_videos
|
| 88 |
+
|
| 89 |
+
lesson_id = "test-lesson-123"
|
| 90 |
+
videos = [
|
| 91 |
+
{"videoId": "abc123", "title": "Test Video", "channelTitle": "Test Channel",
|
| 92 |
+
"thumbnailUrl": "http://example.com/thumb.jpg", "durationSeconds": 300}
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
# Mock Firebase at the module level where it's imported inside functions
|
| 96 |
+
mock_doc = MagicMock()
|
| 97 |
+
mock_doc.get.return_value.exists = False
|
| 98 |
+
mock_db = MagicMock()
|
| 99 |
+
mock_db.collection.return_value.document.return_value = mock_doc
|
| 100 |
+
|
| 101 |
+
with patch("firebase_admin.firestore.client", return_value=mock_db):
|
| 102 |
+
with patch("firebase_admin._apps", {"default": MagicMock()}):
|
| 103 |
+
# Store should call set
|
| 104 |
+
cache_videos(lesson_id, videos, "quadratic equations")
|
| 105 |
+
mock_doc.set.assert_called_once()
|
| 106 |
+
|
| 107 |
+
# Retrieve should return None since we mock doc.exists = False
|
| 108 |
+
result = get_cached_videos(lesson_id)
|
| 109 |
+
assert result is None
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def test_search_youtube_videos_no_api_key(no_youtube_api_key):
|
| 113 |
+
from services.youtube_service import search_youtube_videos
|
| 114 |
+
result = search_youtube_videos("quadratic equations")
|
| 115 |
+
assert result == []
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# โโโ Route Tests โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 119 |
+
|
| 120 |
+
def test_video_search_endpoint_no_api_key(no_youtube_api_key):
|
| 121 |
+
"""Should return 503 when YouTube API key is not configured."""
|
| 122 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 123 |
+
"topic": "quadratic equations",
|
| 124 |
+
"subject": "General Mathematics",
|
| 125 |
+
"grade_level": "Grade 11",
|
| 126 |
+
})
|
| 127 |
+
assert response.status_code == 503
|
| 128 |
+
data = response.json()
|
| 129 |
+
assert data["detail"]["error"] == "youtube_api_not_configured"
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def test_video_search_endpoint_success(mock_youtube_api_key):
|
| 133 |
+
"""Should return video results when search succeeds."""
|
| 134 |
+
mock_videos = [
|
| 135 |
+
{"videoId": "vid1", "title": "Video 1", "channelTitle": "Channel 1",
|
| 136 |
+
"thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
|
| 137 |
+
{"videoId": "vid2", "title": "Video 2", "channelTitle": "Channel 2",
|
| 138 |
+
"thumbnailUrl": "http://example.com/2.jpg", "durationSeconds": 450},
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
|
| 142 |
+
with patch("routes.video_routes.get_video_search_results") as mock_search:
|
| 143 |
+
mock_search.return_value = {"videos": mock_videos, "cached": False}
|
| 144 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 145 |
+
"topic": "quadratic equations",
|
| 146 |
+
"subject": "General Mathematics",
|
| 147 |
+
"grade_level": "Grade 11",
|
| 148 |
+
"lesson_id": "lesson-123",
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
+
assert response.status_code == 200
|
| 152 |
+
data = response.json()
|
| 153 |
+
assert len(data["videos"]) == 2
|
| 154 |
+
assert data["cached"] is False
|
| 155 |
+
assert data["videos"][0]["videoId"] == "vid1"
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def test_video_search_endpoint_empty_results(mock_youtube_api_key):
|
| 159 |
+
"""Should return empty list when no videos found."""
|
| 160 |
+
with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
|
| 161 |
+
with patch("routes.video_routes.get_video_search_results") as mock_search:
|
| 162 |
+
mock_search.return_value = {"videos": [], "cached": False}
|
| 163 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 164 |
+
"topic": "very obscure topic xyz123",
|
| 165 |
+
"subject": "General Mathematics",
|
| 166 |
+
})
|
| 167 |
+
|
| 168 |
+
assert response.status_code == 200
|
| 169 |
+
data = response.json()
|
| 170 |
+
assert data["videos"] == []
|
| 171 |
+
assert data["cached"] is False
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def test_video_search_endpoint_cached(mock_youtube_api_key):
|
| 175 |
+
"""Should return cached results."""
|
| 176 |
+
mock_videos = [
|
| 177 |
+
{"videoId": "vid1", "title": "Cached Video", "channelTitle": "Channel 1",
|
| 178 |
+
"thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
|
| 182 |
+
with patch("routes.video_routes.get_video_search_results") as mock_search:
|
| 183 |
+
mock_search.return_value = {"videos": mock_videos, "cached": True}
|
| 184 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 185 |
+
"topic": "linear equations",
|
| 186 |
+
"lesson_id": "lesson-456",
|
| 187 |
+
})
|
| 188 |
+
|
| 189 |
+
assert response.status_code == 200
|
| 190 |
+
data = response.json()
|
| 191 |
+
assert data["cached"] is True
|
| 192 |
+
assert len(data["videos"]) == 1
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def test_video_search_endpoint_validation_error(mock_youtube_api_key):
|
| 196 |
+
"""Should return 422 when topic is missing or too long."""
|
| 197 |
+
with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
|
| 198 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 199 |
+
"topic": "",
|
| 200 |
+
"subject": "General Mathematics",
|
| 201 |
+
})
|
| 202 |
+
assert response.status_code == 422
|
| 203 |
+
|
| 204 |
+
with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
|
| 205 |
+
response = client.post("/api/lessons/videos/search", json={
|
| 206 |
+
"topic": "x" * 201,
|
| 207 |
+
"subject": "General Mathematics",
|
| 208 |
+
})
|
| 209 |
+
assert response.status_code == 422
|