Spaces:
Sleeping
Sleeping
| """YouTube video transcript extraction.""" | |
| from urllib.parse import urlparse, parse_qs | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| def extract(url: str) -> str: | |
| """Fetch the transcript for a YouTube video.""" | |
| video_id = _parse_video_id(url) | |
| if not video_id: | |
| raise ValueError(f"Could not parse YouTube video ID from: {url}") | |
| ytt = YouTubeTranscriptApi() | |
| transcript = ytt.fetch(video_id) | |
| return " ".join(snippet.text for snippet in transcript) | |
| def _parse_video_id(url: str) -> str | None: | |
| """Extract video ID from youtube.com/watch?v=... or youtu.be/... URLs.""" | |
| parsed = urlparse(url) | |
| hostname = parsed.hostname or "" | |
| if "youtu.be" in hostname: | |
| return parsed.path.lstrip("/") | |
| if "youtube.com" in hostname: | |
| return parse_qs(parsed.query).get("v", [None])[0] | |
| return None | |