Spaces:

Nick021402
/

SubGen

Sleeping

App Files Files Community

SubGen / app.py

Nick021402

Rename App.py to app.py

6ef73b2 verified 8 months ago

raw

history blame contribute delete

10.5 kB

	# app.py - Main Gradio application
	import gradio as gr
	import whisper
	import torch
	from transformers import MarianMTModel, MarianTokenizer
	import yt_dlp
	import os
	import tempfile
	import subprocess
	from pathlib import Path
	import re

	class SubtitleTranslator:
	def __init__(self):
	# Use the smallest Whisper model for speed
	self.whisper_model = whisper.load_model("tiny")

	# Translation model cache
	self.translation_models = {}
	self.tokenizers = {}

	def download_youtube_audio(self, url):
	"""Download audio from YouTube video"""
	try:
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': 'temp_audio.%(ext)s',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])

	# Find the downloaded file
	for file in os.listdir('.'):
	if file.startswith('temp_audio') and file.endswith('.mp3'):
	return file
	return None
	except Exception as e:
	return None

	def extract_audio_from_video(self, video_path):
	"""Extract audio from uploaded video file"""
	try:
	audio_path = "temp_extracted_audio.wav"
	cmd = [
	'ffmpeg', '-i', video_path,
	'-acodec', 'pcm_s16le',
	'-ac', '1',
	'-ar', '16000',
	audio_path, '-y'
	]
	subprocess.run(cmd, check=True, capture_output=True)
	return audio_path
	except Exception as e:
	return None

	def transcribe_audio(self, audio_path):
	"""Transcribe audio using Whisper"""
	result = self.whisper_model.transcribe(audio_path)
	return result

	def get_translation_model(self, source_lang, target_lang="en"):
	"""Load translation model for language pair"""
	model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"

	try:
	if model_name not in self.translation_models:
	self.tokenizers[model_name] = MarianTokenizer.from_pretrained(model_name)
	self.translation_models[model_name] = MarianMTModel.from_pretrained(model_name)

	return self.translation_models[model_name], self.tokenizers[model_name]
	except:
	# Fallback to multilingual model
	fallback_model = "Helsinki-NLP/opus-mt-mul-en"
	if fallback_model not in self.translation_models:
	self.tokenizers[fallback_model] = MarianTokenizer.from_pretrained(fallback_model)
	self.translation_models[fallback_model] = MarianMTModel.from_pretrained(fallback_model)
	return self.translation_models[fallback_model], self.tokenizers[fallback_model]

	def translate_text(self, text, source_lang, target_lang="en"):
	"""Translate text using MarianMT"""
	if source_lang == target_lang:
	return text

	try:
	model, tokenizer = self.get_translation_model(source_lang, target_lang)
	inputs = tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=512)
	translated = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
	return tokenizer.decode(translated[0], skip_special_tokens=True)
	except:
	return text # Return original if translation fails

	def format_timestamp(self, seconds):
	"""Convert seconds to SRT timestamp format"""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	secs = int(seconds % 60)
	millisecs = int((seconds % 1) * 1000)
	return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"

	def create_srt(self, segments, source_lang):
	"""Create SRT subtitle content"""
	srt_content = ""

	for i, segment in enumerate(segments, 1):
	start_time = self.format_timestamp(segment['start'])
	end_time = self.format_timestamp(segment['end'])

	original_text = segment['text'].strip()
	translated_text = self.translate_text(original_text, source_lang, "en")

	srt_content += f"{i}\n"
	srt_content += f"{start_time} --> {end_time}\n"
	srt_content += f"{translated_text}\n\n"

	return srt_content

	def process_video(self, video_input, youtube_url):
	"""Main processing function"""
	try:
	# Determine input source
	if youtube_url and youtube_url.strip():
	audio_path = self.download_youtube_audio(youtube_url.strip())
	if not audio_path:
	return "Error: Could not download YouTube video", None
	elif video_input:
	audio_path = self.extract_audio_from_video(video_input)
	if not audio_path:
	return "Error: Could not extract audio from video", None
	else:
	return "Please provide either a video file or YouTube URL", None

	# Transcribe audio
	result = self.transcribe_audio(audio_path)

	# Detect language
	detected_lang = result.get('language', 'unknown')

	# Language code mapping for translation models
	lang_mapping = {
	'spanish': 'es', 'french': 'fr', 'german': 'de', 'italian': 'it',
	'portuguese': 'pt', 'russian': 'ru', 'chinese': 'zh', 'japanese': 'ja',
	'korean': 'ko', 'arabic': 'ar', 'hindi': 'hi', 'dutch': 'nl',
	'swedish': 'sv', 'norwegian': 'no', 'danish': 'da', 'finnish': 'fi'
	}

	source_lang_code = lang_mapping.get(detected_lang, detected_lang)

	# Create SRT content
	srt_content = self.create_srt(result['segments'], source_lang_code)

	# Save SRT file
	srt_filename = "translated_subtitles.srt"
	with open(srt_filename, 'w', encoding='utf-8') as f:
	f.write(srt_content)

	# Clean up temporary files
	if os.path.exists(audio_path):
	os.remove(audio_path)

	status_msg = f"✅ Processing complete!\n"
	status_msg += f"🔍 Detected language: {detected_lang}\n"
	status_msg += f"📝 Generated {len(result['segments'])} subtitle segments\n"
	status_msg += f"🌍 Translated to English"

	return status_msg, srt_filename

	except Exception as e:
	return f"Error during processing: {str(e)}", None

	# Initialize the translator
	translator = SubtitleTranslator()

	# Create Gradio interface
	def process_video_interface(video_file, youtube_url, progress=gr.Progress()):
	progress(0.1, desc="Starting processing...")

	progress(0.3, desc="Extracting audio...")
	result = translator.process_video(video_file, youtube_url)

	progress(0.7, desc="Transcribing and translating...")
	progress(1.0, desc="Complete!")

	return result

	# Custom CSS for better UI
	css = """
	.gradio-container {
	max-width: 900px !important;
	}
	.title {
	text-align: center;
	color: #2563eb;
	font-size: 2.5rem;
	font-weight: bold;
	margin-bottom: 1rem;
	}
	.subtitle {
	text-align: center;
	color: #64748b;
	font-size: 1.2rem;
	margin-bottom: 2rem;
	}
	.feature-box {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 1rem;
	border-radius: 10px;
	margin: 1rem 0;
	}
	"""

	# Create the Gradio app
	with gr.Blocks(css=css, title="Video Subtitle Translator") as app:
	gr.HTML("""
	<div class="title">🎬 Video Subtitle Translator</div>
	<div class="subtitle">Generate English subtitles from any language video using AI</div>
	""")

	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div class="feature-box">
	<h3>🚀 Features:</h3>
	<ul>
	<li>📹 Upload video files or paste YouTube links</li>
	<li>🎯 Automatic speech recognition with Whisper AI</li>
	<li>🌍 Auto-detect source language</li>
	<li>📝 Generate accurate English subtitles</li>
	<li>⏱️ Perfect timing synchronization</li>
	<li>💾 Download ready-to-use SRT files</li>
	</ul>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	video_input = gr.File(
	label="📁 Upload Video File",
	file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm", ".m4v"],
	type="filepath"
	)

	youtube_input = gr.Textbox(
	label="🔗 Or paste YouTube URL",
	placeholder="https://www.youtube.com/watch?v=...",
	lines=1
	)

	process_btn = gr.Button(
	"🚀 Generate Subtitles",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	status_output = gr.Textbox(
	label="📊 Processing Status",
	lines=6,
	interactive=False
	)

	srt_output = gr.File(
	label="💾 Download SRT File",
	interactive=False
	)

	gr.HTML("""
	<div style="text-align: center; margin-top: 2rem; color: #64748b;">
	<p>⚡ Powered by Whisper AI & MarianMT \| 🤗 Running on Hugging Face Spaces</p>
	<p>💡 Tip: For best results, use videos with clear audio and minimal background noise</p>
	</div>
	""")

	# Connect the processing function
	process_btn.click(
	fn=process_video_interface,
	inputs=[video_input, youtube_input],
	outputs=[status_output, srt_output],
	show_progress=True
	)

	if __name__ == "__main__":
	app.launch()