Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| import os | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| from docx import Document | |
| from fpdf import FPDF | |
| from pptx import Presentation | |
| import subprocess | |
| import shlex | |
| import yt_dlp | |
| # Load the Whisper model (smaller model for faster transcription) | |
| model = whisper.load_model("tiny") | |
| # Load M2M100 translation model for different languages | |
| def load_translation_model(target_language): | |
| lang_codes = { | |
| "fa": "fa", # Persian (Farsi) | |
| "es": "es", # Spanish | |
| "fr": "fr", # French | |
| "de": "de", # German | |
| "it": "it", # Italian | |
| "pt": "pt", # Portuguese | |
| "ar": "ar", # Arabic | |
| "zh": "zh", # Chinese | |
| "hi": "hi", # Hindi | |
| "ja": "ja", # Japanese | |
| "ko": "ko", # Korean | |
| "ru": "ru", # Russian | |
| } | |
| target_lang_code = lang_codes.get(target_language) | |
| if not target_lang_code: | |
| raise ValueError(f"Translation model for {target_language} not supported") | |
| tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") | |
| translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") | |
| tokenizer.src_lang = "en" | |
| tokenizer.tgt_lang = target_lang_code | |
| return tokenizer, translation_model | |
| def translate_text(text, tokenizer, model): | |
| try: | |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang)) | |
| return tokenizer.decode(translated[0], skip_special_tokens=True) | |
| except Exception as e: | |
| raise RuntimeError(f"Error during translation: {e}") | |
| # Helper function to format timestamps in SRT format | |
| def format_timestamp(seconds): | |
| milliseconds = int((seconds % 1) * 1000) | |
| seconds = int(seconds) | |
| hours = seconds // 3600 | |
| minutes = (seconds % 3600) // 60 | |
| seconds = seconds % 60 | |
| return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}" | |
| # Corrected write_srt function | |
| def write_srt(transcription, output_file, tokenizer=None, translation_model=None): | |
| with open(output_file, "w") as f: | |
| for i, segment in enumerate(transcription['segments']): | |
| start = segment['start'] | |
| end = segment['end'] | |
| text = segment['text'] | |
| if translation_model: | |
| text = translate_text(text, tokenizer, translation_model) | |
| start_time = format_timestamp(start) | |
| end_time = format_timestamp(end) | |
| f.write(f"{i + 1}\n") | |
| f.write(f"{start_time} --> {end_time}\n") | |
| f.write(f"{text.strip()}\n\n") | |
| # Embedding subtitles into video (hardsub) | |
| def embed_hardsub_in_video(video_file, srt_file, output_video): | |
| command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"' | |
| try: | |
| process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300) | |
| if process.returncode != 0: | |
| raise RuntimeError(f"ffmpeg error: {process.stderr}") | |
| except subprocess.TimeoutExpired: | |
| raise RuntimeError("ffmpeg process timed out.") | |
| except Exception as e: | |
| raise RuntimeError(f"Error running ffmpeg: {e}") | |
| # Helper function to write Word documents | |
| def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None): | |
| doc = Document() | |
| rtl = target_language == "fa" | |
| for i, segment in enumerate(transcription['segments']): | |
| text = segment['text'] | |
| if translation_model: | |
| text = translate_text(text, tokenizer, translation_model) | |
| para = doc.add_paragraph(f"{i + 1}. {text.strip()}") | |
| if rtl: | |
| para.paragraph_format.right_to_left = True | |
| doc.save(output_file) | |
| # Helper function to reverse text for RTL | |
| def reverse_text_for_rtl(text): | |
| return ' '.join([word[::-1] for word in text.split()]) | |
| # Helper function to write PDF documents | |
| def write_pdf(transcription, output_file, tokenizer=None, translation_model=None): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| font_path = "/home/user/app/B-NAZANIN.TTF" | |
| pdf.add_font('B-NAZANIN', '', font_path, uni=True) | |
| pdf.set_font('B-NAZANIN', size=12) | |
| for i, segment in enumerate(transcription['segments']): | |
| text = segment['text'] | |
| if translation_model: | |
| text = translate_text(text, tokenizer, translation_model) | |
| reversed_text = reverse_text_for_rtl(text) | |
| pdf.multi_cell(0, 10, f"{i + 1}. {reversed_text.strip()}", align='L') | |
| pdf.output(output_file) | |
| # Helper function to write PowerPoint slides | |
| def write_ppt(transcription, output_file, tokenizer=None, translation_model=None): | |
| ppt = Presentation() | |
| for i, segment in enumerate(transcription['segments']): | |
| text = segment['text'] | |
| if translation_model: | |
| text = translate_text(text, tokenizer, translation_model) | |
| slide = ppt.slides.add_slide(ppt.slide_layouts[5]) | |
| title = slide.shapes.title | |
| title.text = f"{i + 1}. {text.strip()}" | |
| ppt.save(output_file) | |
| # Function to download YouTube video | |
| def download_youtube_video(url): | |
| ydl_opts = { | |
| 'format': 'mp4', | |
| 'outtmpl': 'downloaded_video.mp4', | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return 'downloaded_video.mp4' | |
| # Transcribing video and generating output | |
| def transcribe_video(video_file, video_url, language, target_language, output_format): | |
| if video_url: | |
| video_file_path = download_youtube_video(video_url) | |
| else: | |
| video_file_path = video_file.name | |
| result = model.transcribe(video_file_path, language=language) | |
| video_name = os.path.splitext(video_file_path)[0] | |
| if target_language != "en": | |
| try: | |
| tokenizer, translation_model = load_translation_model(target_language) | |
| except Exception as e: | |
| raise RuntimeError(f"Error loading translation model: {e}") | |
| else: | |
| tokenizer, translation_model = None, None | |
| srt_file = f"{video_name}.srt" | |
| write_srt(result, srt_file, tokenizer, translation_model) | |
| if output_format == "SRT": | |
| return srt_file | |
| elif output_format == "Video with Hardsub": | |
| output_video = f"{video_name}_with_subtitles.mp4" | |
| try: | |
| embed_hardsub_in_video(video_file_path, srt_file, output_video) | |
| return output_video | |
| except Exception as e: | |
| raise RuntimeError(f"Error embedding subtitles in video: {e}") | |
| elif output_format == "Word": | |
| word_file = f"{video_name}.docx" | |
| write_word(result, word_file, tokenizer, translation_model, target_language) | |
| return word_file | |
| elif output_format == "PDF": | |
| pdf_file = f"{video_name}.pdf" | |
| write_pdf(result, pdf_file, tokenizer, translation_model) | |
| return pdf_file | |
| elif output_format == "PowerPoint": | |
| ppt_file = f"{video_name}.pptx" | |
| write_ppt(result, ppt_file, tokenizer, translation_model) | |
| return ppt_file | |
| # Gradio interface with YouTube URL | |
| iface = gr.Interface( | |
| fn=transcribe_video, | |
| inputs=[ | |
| gr.File(label="Upload Video File (or leave empty for YouTube link)"), # Removed 'optional=True' | |
| gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."), | |
| gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"), | |
| gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"), | |
| gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub") | |
| ], | |
| outputs=gr.File(label="Download File"), | |
| title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)", | |
| description=( | |
| "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, " | |
| "translate the subtitles into multiple languages using M2M100, and export them " | |
| "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint." | |
| ), | |
| theme="compact", | |
| live=False | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |