from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter from deep_translator import GoogleTranslator from yt_dlp import YoutubeDL import os import re def extract_video_id(url): """ Extracts the video ID from a YouTube URL. """ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url) return match.group(1) if match else None def get_subtitles(video_url): """ Gets English subtitles using youtube-transcript-api. Tries manual subtitles first, then auto-generated. """ video_id = extract_video_id(video_url) if not video_id: raise ValueError("Invalid YouTube URL") try: # Try manually added subtitles in English subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) return subtitles except Exception as e: print("🚫 Error getting manual transcript:", e) try: # Try auto-generated English subtitles subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) return subtitles except Exception as fallback_error: print("❌ Fallback also failed:", fallback_error) raise RuntimeError("Subtitles not available.") def format_transcript(transcript): """ Returns a plain text version of subtitles """ formatter = TextFormatter() return formatter.format_transcript(transcript) def split_text(text, max_chars=5000): """ Splits text into chunks within API limit """ chunks = [] while len(text) > max_chars: split_at = text[:max_chars].rfind(".") + 1 # split at sentence boundary chunks.append(text[:split_at].strip()) text = text[split_at:].strip() if text: chunks.append(text) return chunks def translate_text(text, target_lang): """ Translate text into target language using deep-translator """ translated = [] chunks = split_text(text) for chunk in chunks: try: translated_chunk = GoogleTranslator(source='auto', target=target_lang.lower()).translate(chunk) translated.append(translated_chunk) except Exception as e: print(f"❌ Error translating chunk: {e}") translated.append("[Translation Failed]") return "\n\n".join(translated) def VideoToSubtitle(video_url, language): """ Main function to fetch and translate subtitles. """ print(f"🔍 Processing Video: {video_url}, Language: {language}") subtitles = get_subtitles(video_url) english_text = format_transcript(subtitles) translated_text = translate_text(english_text, language) return english_text, translated_text