| """YouTube comment fetch and suggested-video metadata.""" |
|
|
| from __future__ import annotations |
|
|
| import os |
| import re |
| from pathlib import Path |
| from typing import Any |
|
|
| import yaml |
|
|
| from src.utils.logger import get_logger |
|
|
| logger = get_logger(__name__) |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[2] |
| SUGGESTED_CONFIG = PROJECT_ROOT / "configs" / "suggested_videos.yaml" |
|
|
| _VIDEO_ID_PATTERNS = ( |
| r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})", |
| r"youtu\.be/([a-zA-Z0-9_-]{11})", |
| r"youtube\.com/embed/([a-zA-Z0-9_-]{11})", |
| ) |
|
|
|
|
| class CommentsFetchError(Exception): |
| """Raised when comments cannot be fetched and demo fallback must not be used.""" |
|
|
|
|
| def extract_video_id(url: str) -> str | None: |
| for pattern in _VIDEO_ID_PATTERNS: |
| match = re.search(pattern, url) |
| if match: |
| return match.group(1) |
| return None |
|
|
|
|
| def load_suggested_config() -> dict[str, Any]: |
| if not SUGGESTED_CONFIG.exists(): |
| return {"max_comments": 15, "videos": [{"id": "jNQXAC9IVRw"}]} |
| with SUGGESTED_CONFIG.open(encoding="utf-8") as f: |
| return yaml.safe_load(f) or {} |
|
|
|
|
| def _parse_youtube_error(exc: Exception) -> str: |
| err_text = str(exc) |
| if "commentsDisabled" in err_text: |
| return "Comments are disabled on this video" |
| if "disabled comments" in err_text.lower(): |
| return "Comments are disabled on this video" |
| if "quota" in err_text.lower(): |
| return "YouTube API quota exceeded" |
| try: |
| from googleapiclient.errors import HttpError |
|
|
| if isinstance(exc, HttpError): |
| for detail in getattr(exc, "error_details", []) or []: |
| reason = detail.get("reason") if isinstance(detail, dict) else None |
| if reason == "commentsDisabled": |
| return "Comments are disabled on this video" |
| except ImportError: |
| pass |
| return err_text |
|
|
|
|
| def fetch_comments(url: str, max_comments: int) -> tuple[list[str], str]: |
| video_id = extract_video_id(url) or "unknown" |
| api_key = os.getenv("YOUTUBE_API_KEY", "").strip() |
| if api_key: |
| return _fetch_via_api(url, api_key, max_comments, video_id) |
| return _demo_comments(video_id, max_comments), "demo" |
|
|
|
|
| def _fetch_via_api( |
| url: str, api_key: str, max_comments: int, video_id: str |
| ) -> tuple[list[str], str]: |
| from googleapiclient.discovery import build |
|
|
| if video_id == "unknown": |
| raise CommentsFetchError(f"Could not parse video id from: {url}") |
|
|
| youtube = build("youtube", "v3", developerKey=api_key) |
| comments: list[str] = [] |
| page_token = None |
|
|
| try: |
| while len(comments) < max_comments: |
| response = ( |
| youtube.commentThreads() |
| .list( |
| part="snippet", |
| videoId=video_id, |
| maxResults=min(100, max_comments - len(comments)), |
| pageToken=page_token, |
| textFormat="plainText", |
| ) |
| .execute() |
| ) |
| for item in response.get("items", []): |
| text = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"] |
| comments.append(text) |
| page_token = response.get("nextPageToken") |
| if not page_token: |
| break |
| except Exception as exc: |
| message = _parse_youtube_error(exc) |
| logger.warning("YouTube API failed for %s: %s", video_id, message) |
| raise CommentsFetchError(message) from exc |
|
|
| if not comments: |
| raise CommentsFetchError("No comments found for this video") |
|
|
| logger.info("YouTube API: fetched %s comments for %s", len(comments), video_id) |
| return comments[:max_comments], "youtube" |
|
|
|
|
| def fetch_video_metadata(video_ids: list[str]) -> list[dict[str, Any]]: |
| api_key = os.getenv("YOUTUBE_API_KEY", "").strip() |
| if not api_key or not video_ids: |
| return [_placeholder_meta(vid) for vid in video_ids] |
|
|
| try: |
| from googleapiclient.discovery import build |
|
|
| youtube = build("youtube", "v3", developerKey=api_key) |
| response = ( |
| youtube.videos() |
| .list(part="snippet,status", id=",".join(video_ids)) |
| .execute() |
| ) |
| by_id: dict[str, dict[str, Any]] = {} |
| for item in response.get("items", []): |
| vid = item["id"] |
| snip = item["snippet"] |
| status = item.get("status", {}) |
| thumbs = snip.get("thumbnails", {}) |
| thumb = thumbs.get("medium") or thumbs.get("default") or {} |
| embeddable = status.get("embeddable", True) |
| by_id[vid] = { |
| "id": vid, |
| "title": snip.get("title", vid), |
| "channel_title": snip.get("channelTitle", "Unknown"), |
| "thumbnail_url": thumb.get("url", f"https://i.ytimg.com/vi/{vid}/mqdefault.jpg"), |
| "watch_url": f"https://www.youtube.com/watch?v={vid}", |
| "embeddable": bool(embeddable), |
| } |
| return [by_id.get(vid, _placeholder_meta(vid)) for vid in video_ids] |
| except Exception as exc: |
| logger.warning("YouTube metadata failed: %s", exc) |
| return [_placeholder_meta(vid) for vid in video_ids] |
|
|
|
|
| def _placeholder_meta(video_id: str) -> dict[str, Any]: |
| return { |
| "id": video_id, |
| "title": f"Video {video_id}", |
| "channel_title": "YouTube", |
| "thumbnail_url": f"https://i.ytimg.com/vi/{video_id}/mqdefault.jpg", |
| "watch_url": f"https://www.youtube.com/watch?v={video_id}", |
| "embeddable": True, |
| } |
|
|
|
|
| def _demo_comments(video_id: str, max_comments: int) -> list[str]: |
| import random |
|
|
| logger.info("Using demo comments for %s (no YOUTUBE_API_KEY)", video_id) |
| examples = [ |
| "This video is really informative, thanks for sharing!", |
| "You are all stupid idiots, get out of here!", |
| "Great content, I learned a lot from this.", |
| "These people should be eliminated from society.", |
| "I agree with the presenter's point of view.", |
| "What a bunch of racist criminals!", |
| "Thank you for this analysis, very helpful.", |
| "Kill them all, they don't deserve to live.", |
| "Interesting perspective on the topic.", |
| "This is absolute bullshit propaganda!", |
| "I think we need to look at both sides.", |
| "Well researched video, good job.", |
| "Go back to where you came from!", |
| "The data presented here is compelling.", |
| ] |
| rng = random.Random(video_id) |
| rotated = examples[:] |
| rng.shuffle(rotated) |
| return rotated[:max_comments] |
|
|