Language_Translator / Translator.py
nayyabzahra148's picture
Update Translator.py
17e22ac verified
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from deep_translator import GoogleTranslator
from yt_dlp import YoutubeDL
import os
import re
def extract_video_id(url):
"""
Extracts the video ID from a YouTube URL.
"""
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
return match.group(1) if match else None
def get_subtitles(video_url):
"""
Gets English subtitles using youtube-transcript-api.
Tries manual subtitles first, then auto-generated.
"""
video_id = extract_video_id(video_url)
if not video_id:
raise ValueError("Invalid YouTube URL")
try:
# Try manually added subtitles in English
subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
return subtitles
except Exception as e:
print("🚫 Error getting manual transcript:", e)
try:
# Try auto-generated English subtitles
subtitles = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
return subtitles
except Exception as fallback_error:
print("❌ Fallback also failed:", fallback_error)
raise RuntimeError("Subtitles not available.")
def format_transcript(transcript):
"""
Returns a plain text version of subtitles
"""
formatter = TextFormatter()
return formatter.format_transcript(transcript)
def split_text(text, max_chars=5000):
"""
Splits text into chunks within API limit
"""
chunks = []
while len(text) > max_chars:
split_at = text[:max_chars].rfind(".") + 1 # split at sentence boundary
chunks.append(text[:split_at].strip())
text = text[split_at:].strip()
if text:
chunks.append(text)
return chunks
def translate_text(text, target_lang):
"""
Translate text into target language using deep-translator
"""
translated = []
chunks = split_text(text)
for chunk in chunks:
try:
translated_chunk = GoogleTranslator(source='auto', target=target_lang.lower()).translate(chunk)
translated.append(translated_chunk)
except Exception as e:
print(f"❌ Error translating chunk: {e}")
translated.append("[Translation Failed]")
return "\n\n".join(translated)
def VideoToSubtitle(video_url, language):
"""
Main function to fetch and translate subtitles.
"""
print(f"πŸ” Processing Video: {video_url}, Language: {language}")
subtitles = get_subtitles(video_url)
english_text = format_transcript(subtitles)
translated_text = translate_text(english_text, language)
return english_text, translated_text