Spaces:
Build error
Build error
| """ | |
| YouTube transcript extraction. | |
| """ | |
| import re | |
| from fastapi import HTTPException | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api._errors import ( | |
| TranscriptsDisabled, | |
| NoTranscriptFound, | |
| VideoUnavailable, | |
| ) | |
| YT_ID_RE = re.compile(r"(?:v=|youtu\.be/|embed/|shorts/)([A-Za-z0-9_-]{11})") | |
| def extract_video_id(url: str) -> str: | |
| match = YT_ID_RE.search(url) | |
| if not match: | |
| raise HTTPException(status_code=400, detail="Could not extract a video ID from that URL.") | |
| return match.group(1) | |
| def fetch_transcript(video_id: str) -> str: | |
| """Synchronous transcript fetch — call via asyncio.to_thread.""" | |
| ytt = YouTubeTranscriptApi() | |
| try: | |
| transcript = ytt.fetch(video_id, languages=["en", "en-US", "en-GB"]) | |
| except TranscriptsDisabled: | |
| raise HTTPException(status_code=422, detail="This video has transcripts disabled.") | |
| except NoTranscriptFound: | |
| raise HTTPException(status_code=422, detail="No transcript found for this video.") | |
| except VideoUnavailable: | |
| raise HTTPException(status_code=404, detail="Video is unavailable or does not exist.") | |
| except Exception as exc: | |
| raise HTTPException(status_code=502, detail=f"Transcript fetch failed: {exc}") | |
| return " ".join(snippet.text for snippet in transcript) | |