SignalMod / src /api /youtube.py
Mirae Kang
feat: implement new models and improve UI, #23
46cc63a
raw
history blame
6.6 kB
"""YouTube comment fetch and suggested-video metadata."""
from __future__ import annotations
import os
import re
from pathlib import Path
from typing import Any
import yaml
from src.utils.logger import get_logger
logger = get_logger(__name__)
PROJECT_ROOT = Path(__file__).resolve().parents[2]
SUGGESTED_CONFIG = PROJECT_ROOT / "configs" / "suggested_videos.yaml"
_VIDEO_ID_PATTERNS = (
r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
r"youtu\.be/([a-zA-Z0-9_-]{11})",
r"youtube\.com/embed/([a-zA-Z0-9_-]{11})",
)
class CommentsFetchError(Exception):
"""Raised when comments cannot be fetched and demo fallback must not be used."""
def extract_video_id(url: str) -> str | None:
for pattern in _VIDEO_ID_PATTERNS:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def load_suggested_config() -> dict[str, Any]:
if not SUGGESTED_CONFIG.exists():
return {"max_comments": 15, "videos": [{"id": "jNQXAC9IVRw"}]}
with SUGGESTED_CONFIG.open(encoding="utf-8") as f:
return yaml.safe_load(f) or {}
def _parse_youtube_error(exc: Exception) -> str:
err_text = str(exc)
if "commentsDisabled" in err_text:
return "Comments are disabled on this video"
if "disabled comments" in err_text.lower():
return "Comments are disabled on this video"
if "quota" in err_text.lower():
return "YouTube API quota exceeded"
try:
from googleapiclient.errors import HttpError
if isinstance(exc, HttpError):
for detail in getattr(exc, "error_details", []) or []:
reason = detail.get("reason") if isinstance(detail, dict) else None
if reason == "commentsDisabled":
return "Comments are disabled on this video"
except ImportError:
pass
return err_text
def fetch_comments(url: str, max_comments: int) -> tuple[list[str], str]:
video_id = extract_video_id(url) or "unknown"
api_key = os.getenv("YOUTUBE_API_KEY", "").strip()
if api_key:
return _fetch_via_api(url, api_key, max_comments, video_id)
return _demo_comments(video_id, max_comments), "demo"
def _fetch_via_api(
url: str, api_key: str, max_comments: int, video_id: str
) -> tuple[list[str], str]:
from googleapiclient.discovery import build
if video_id == "unknown":
raise CommentsFetchError(f"Could not parse video id from: {url}")
youtube = build("youtube", "v3", developerKey=api_key)
comments: list[str] = []
page_token = None
try:
while len(comments) < max_comments:
response = (
youtube.commentThreads()
.list(
part="snippet",
videoId=video_id,
maxResults=min(100, max_comments - len(comments)),
pageToken=page_token,
textFormat="plainText",
)
.execute()
)
for item in response.get("items", []):
text = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
comments.append(text)
page_token = response.get("nextPageToken")
if not page_token:
break
except Exception as exc:
message = _parse_youtube_error(exc)
logger.warning("YouTube API failed for %s: %s", video_id, message)
raise CommentsFetchError(message) from exc
if not comments:
raise CommentsFetchError("No comments found for this video")
logger.info("YouTube API: fetched %s comments for %s", len(comments), video_id)
return comments[:max_comments], "youtube"
def fetch_video_metadata(video_ids: list[str]) -> list[dict[str, Any]]:
api_key = os.getenv("YOUTUBE_API_KEY", "").strip()
if not api_key or not video_ids:
return [_placeholder_meta(vid) for vid in video_ids]
try:
from googleapiclient.discovery import build
youtube = build("youtube", "v3", developerKey=api_key)
response = (
youtube.videos()
.list(part="snippet,status", id=",".join(video_ids))
.execute()
)
by_id: dict[str, dict[str, Any]] = {}
for item in response.get("items", []):
vid = item["id"]
snip = item["snippet"]
status = item.get("status", {})
thumbs = snip.get("thumbnails", {})
thumb = thumbs.get("medium") or thumbs.get("default") or {}
embeddable = status.get("embeddable", True)
by_id[vid] = {
"id": vid,
"title": snip.get("title", vid),
"channel_title": snip.get("channelTitle", "Unknown"),
"thumbnail_url": thumb.get("url", f"https://i.ytimg.com/vi/{vid}/mqdefault.jpg"),
"watch_url": f"https://www.youtube.com/watch?v={vid}",
"embeddable": bool(embeddable),
}
return [by_id.get(vid, _placeholder_meta(vid)) for vid in video_ids]
except Exception as exc:
logger.warning("YouTube metadata failed: %s", exc)
return [_placeholder_meta(vid) for vid in video_ids]
def _placeholder_meta(video_id: str) -> dict[str, Any]:
return {
"id": video_id,
"title": f"Video {video_id}",
"channel_title": "YouTube",
"thumbnail_url": f"https://i.ytimg.com/vi/{video_id}/mqdefault.jpg",
"watch_url": f"https://www.youtube.com/watch?v={video_id}",
"embeddable": True,
}
def _demo_comments(video_id: str, max_comments: int) -> list[str]:
import random
logger.info("Using demo comments for %s (no YOUTUBE_API_KEY)", video_id)
examples = [
"This video is really informative, thanks for sharing!",
"You are all stupid idiots, get out of here!",
"Great content, I learned a lot from this.",
"These people should be eliminated from society.",
"I agree with the presenter's point of view.",
"What a bunch of racist criminals!",
"Thank you for this analysis, very helpful.",
"Kill them all, they don't deserve to live.",
"Interesting perspective on the topic.",
"This is absolute bullshit propaganda!",
"I think we need to look at both sides.",
"Well researched video, good job.",
"Go back to where you came from!",
"The data presented here is compelling.",
]
rng = random.Random(video_id)
rotated = examples[:]
rng.shuffle(rotated)
return rotated[:max_comments]