Spaces:
Sleeping
Sleeping
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| from deep_translator import GoogleTranslator | |
| from yt_dlp import YoutubeDL | |
| import os | |
| import re | |
| def extract_video_id(url): | |
| """ | |
| Extracts the video ID from a YouTube URL. | |
| """ | |
| match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url) | |
| return match.group(1) if match else None | |
| def get_subtitles(video_url): | |
| """ | |
| Gets English subtitles using youtube-transcript-api. | |
| Tries manual subtitles first, then auto-generated. | |
| """ | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| raise ValueError("Invalid YouTube URL") | |
| try: | |
| # Try manually added subtitles in English | |
| subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
| return subtitles | |
| except Exception as e: | |
| print("π« Error getting manual transcript:", e) | |
| try: | |
| # Try auto-generated English subtitles | |
| subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
| return subtitles | |
| except Exception as fallback_error: | |
| print("β Fallback also failed:", fallback_error) | |
| raise RuntimeError("Subtitles not available.") | |
| def format_transcript(transcript): | |
| """ | |
| Returns a plain text version of subtitles | |
| """ | |
| formatter = TextFormatter() | |
| return formatter.format_transcript(transcript) | |
| def split_text(text, max_chars=5000): | |
| """ | |
| Splits text into chunks within API limit | |
| """ | |
| chunks = [] | |
| while len(text) > max_chars: | |
| split_at = text[:max_chars].rfind(".") + 1 # split at sentence boundary | |
| chunks.append(text[:split_at].strip()) | |
| text = text[split_at:].strip() | |
| if text: | |
| chunks.append(text) | |
| return chunks | |
| def translate_text(text, target_lang): | |
| """ | |
| Translate text into target language using deep-translator | |
| """ | |
| translated = [] | |
| chunks = split_text(text) | |
| for chunk in chunks: | |
| try: | |
| translated_chunk = GoogleTranslator(source='auto', target=target_lang.lower()).translate(chunk) | |
| translated.append(translated_chunk) | |
| except Exception as e: | |
| print(f"β Error translating chunk: {e}") | |
| translated.append("[Translation Failed]") | |
| return "\n\n".join(translated) | |
| def VideoToSubtitle(video_url, language): | |
| """ | |
| Main function to fetch and translate subtitles. | |
| """ | |
| print(f"π Processing Video: {video_url}, Language: {language}") | |
| subtitles = get_subtitles(video_url) | |
| english_text = format_transcript(subtitles) | |
| translated_text = translate_text(english_text, language) | |
| return english_text, translated_text | |