Spaces:
Sleeping
Sleeping
| # app.py - Main Gradio application | |
| import gradio as gr | |
| import whisper | |
| import torch | |
| from transformers import MarianMTModel, MarianTokenizer | |
| import yt_dlp | |
| import os | |
| import tempfile | |
| import subprocess | |
| from pathlib import Path | |
| import re | |
| class SubtitleTranslator: | |
| def __init__(self): | |
| # Use the smallest Whisper model for speed | |
| self.whisper_model = whisper.load_model("tiny") | |
| # Translation model cache | |
| self.translation_models = {} | |
| self.tokenizers = {} | |
| def download_youtube_audio(self, url): | |
| """Download audio from YouTube video""" | |
| try: | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': 'temp_audio.%(ext)s', | |
| 'postprocessors': [{ | |
| 'key': 'FFmpegExtractAudio', | |
| 'preferredcodec': 'mp3', | |
| 'preferredquality': '192', | |
| }], | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| # Find the downloaded file | |
| for file in os.listdir('.'): | |
| if file.startswith('temp_audio') and file.endswith('.mp3'): | |
| return file | |
| return None | |
| except Exception as e: | |
| return None | |
| def extract_audio_from_video(self, video_path): | |
| """Extract audio from uploaded video file""" | |
| try: | |
| audio_path = "temp_extracted_audio.wav" | |
| cmd = [ | |
| 'ffmpeg', '-i', video_path, | |
| '-acodec', 'pcm_s16le', | |
| '-ac', '1', | |
| '-ar', '16000', | |
| audio_path, '-y' | |
| ] | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| return audio_path | |
| except Exception as e: | |
| return None | |
| def transcribe_audio(self, audio_path): | |
| """Transcribe audio using Whisper""" | |
| result = self.whisper_model.transcribe(audio_path) | |
| return result | |
| def get_translation_model(self, source_lang, target_lang="en"): | |
| """Load translation model for language pair""" | |
| model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" | |
| try: | |
| if model_name not in self.translation_models: | |
| self.tokenizers[model_name] = MarianTokenizer.from_pretrained(model_name) | |
| self.translation_models[model_name] = MarianMTModel.from_pretrained(model_name) | |
| return self.translation_models[model_name], self.tokenizers[model_name] | |
| except: | |
| # Fallback to multilingual model | |
| fallback_model = "Helsinki-NLP/opus-mt-mul-en" | |
| if fallback_model not in self.translation_models: | |
| self.tokenizers[fallback_model] = MarianTokenizer.from_pretrained(fallback_model) | |
| self.translation_models[fallback_model] = MarianMTModel.from_pretrained(fallback_model) | |
| return self.translation_models[fallback_model], self.tokenizers[fallback_model] | |
| def translate_text(self, text, source_lang, target_lang="en"): | |
| """Translate text using MarianMT""" | |
| if source_lang == target_lang: | |
| return text | |
| try: | |
| model, tokenizer = self.get_translation_model(source_lang, target_lang) | |
| inputs = tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=512) | |
| translated = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True) | |
| return tokenizer.decode(translated[0], skip_special_tokens=True) | |
| except: | |
| return text # Return original if translation fails | |
| def format_timestamp(self, seconds): | |
| """Convert seconds to SRT timestamp format""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = int(seconds % 60) | |
| millisecs = int((seconds % 1) * 1000) | |
| return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}" | |
| def create_srt(self, segments, source_lang): | |
| """Create SRT subtitle content""" | |
| srt_content = "" | |
| for i, segment in enumerate(segments, 1): | |
| start_time = self.format_timestamp(segment['start']) | |
| end_time = self.format_timestamp(segment['end']) | |
| original_text = segment['text'].strip() | |
| translated_text = self.translate_text(original_text, source_lang, "en") | |
| srt_content += f"{i}\n" | |
| srt_content += f"{start_time} --> {end_time}\n" | |
| srt_content += f"{translated_text}\n\n" | |
| return srt_content | |
| def process_video(self, video_input, youtube_url): | |
| """Main processing function""" | |
| try: | |
| # Determine input source | |
| if youtube_url and youtube_url.strip(): | |
| audio_path = self.download_youtube_audio(youtube_url.strip()) | |
| if not audio_path: | |
| return "Error: Could not download YouTube video", None | |
| elif video_input: | |
| audio_path = self.extract_audio_from_video(video_input) | |
| if not audio_path: | |
| return "Error: Could not extract audio from video", None | |
| else: | |
| return "Please provide either a video file or YouTube URL", None | |
| # Transcribe audio | |
| result = self.transcribe_audio(audio_path) | |
| # Detect language | |
| detected_lang = result.get('language', 'unknown') | |
| # Language code mapping for translation models | |
| lang_mapping = { | |
| 'spanish': 'es', 'french': 'fr', 'german': 'de', 'italian': 'it', | |
| 'portuguese': 'pt', 'russian': 'ru', 'chinese': 'zh', 'japanese': 'ja', | |
| 'korean': 'ko', 'arabic': 'ar', 'hindi': 'hi', 'dutch': 'nl', | |
| 'swedish': 'sv', 'norwegian': 'no', 'danish': 'da', 'finnish': 'fi' | |
| } | |
| source_lang_code = lang_mapping.get(detected_lang, detected_lang) | |
| # Create SRT content | |
| srt_content = self.create_srt(result['segments'], source_lang_code) | |
| # Save SRT file | |
| srt_filename = "translated_subtitles.srt" | |
| with open(srt_filename, 'w', encoding='utf-8') as f: | |
| f.write(srt_content) | |
| # Clean up temporary files | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| status_msg = f"β Processing complete!\n" | |
| status_msg += f"π Detected language: {detected_lang}\n" | |
| status_msg += f"π Generated {len(result['segments'])} subtitle segments\n" | |
| status_msg += f"π Translated to English" | |
| return status_msg, srt_filename | |
| except Exception as e: | |
| return f"Error during processing: {str(e)}", None | |
| # Initialize the translator | |
| translator = SubtitleTranslator() | |
| # Create Gradio interface | |
| def process_video_interface(video_file, youtube_url, progress=gr.Progress()): | |
| progress(0.1, desc="Starting processing...") | |
| progress(0.3, desc="Extracting audio...") | |
| result = translator.process_video(video_file, youtube_url) | |
| progress(0.7, desc="Transcribing and translating...") | |
| progress(1.0, desc="Complete!") | |
| return result | |
| # Custom CSS for better UI | |
| css = """ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| } | |
| .title { | |
| text-align: center; | |
| color: #2563eb; | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| margin-bottom: 1rem; | |
| } | |
| .subtitle { | |
| text-align: center; | |
| color: #64748b; | |
| font-size: 1.2rem; | |
| margin-bottom: 2rem; | |
| } | |
| .feature-box { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 1rem; | |
| border-radius: 10px; | |
| margin: 1rem 0; | |
| } | |
| """ | |
| # Create the Gradio app | |
| with gr.Blocks(css=css, title="Video Subtitle Translator") as app: | |
| gr.HTML(""" | |
| <div class="title">π¬ Video Subtitle Translator</div> | |
| <div class="subtitle">Generate English subtitles from any language video using AI</div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="feature-box"> | |
| <h3>π Features:</h3> | |
| <ul> | |
| <li>πΉ Upload video files or paste YouTube links</li> | |
| <li>π― Automatic speech recognition with Whisper AI</li> | |
| <li>π Auto-detect source language</li> | |
| <li>π Generate accurate English subtitles</li> | |
| <li>β±οΈ Perfect timing synchronization</li> | |
| <li>πΎ Download ready-to-use SRT files</li> | |
| </ul> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| video_input = gr.File( | |
| label="π Upload Video File", | |
| file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm", ".m4v"], | |
| type="filepath" | |
| ) | |
| youtube_input = gr.Textbox( | |
| label="π Or paste YouTube URL", | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| lines=1 | |
| ) | |
| process_btn = gr.Button( | |
| "π Generate Subtitles", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| status_output = gr.Textbox( | |
| label="π Processing Status", | |
| lines=6, | |
| interactive=False | |
| ) | |
| srt_output = gr.File( | |
| label="πΎ Download SRT File", | |
| interactive=False | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 2rem; color: #64748b;"> | |
| <p>β‘ Powered by Whisper AI & MarianMT | π€ Running on Hugging Face Spaces</p> | |
| <p>π‘ Tip: For best results, use videos with clear audio and minimal background noise</p> | |
| </div> | |
| """) | |
| # Connect the processing function | |
| process_btn.click( | |
| fn=process_video_interface, | |
| inputs=[video_input, youtube_input], | |
| outputs=[status_output, srt_output], | |
| show_progress=True | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() |