File size: 4,688 Bytes
50f5d61 092da9f 8ca66cd cb5705d 50f5d61 cb5705d 50f5d61 cb5705d 50f5d61 cb5705d 50f5d61 cb5705d 8ca66cd cb5705d cf0f7c4 50f5d61 cf0f7c4 cb5705d 50f5d61 cb5705d 50f5d61 cb5705d 50f5d61 cf0f7c4 50f5d61 092da9f 50f5d61 cf0f7c4 092da9f cf0f7c4 cb5705d cf0f7c4 092da9f 50f5d61 8ca66cd 50f5d61 6fb78a3 50f5d61 6fb78a3 8ca66cd cf0f7c4 50f5d61 cf0f7c4 8ca66cd 6fb78a3 8ca66cd 50f5d61 6fb78a3 8ca66cd cb5705d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import os
import gradio as gr
from moviepy.editor import VideoFileClip
import whisper
import torch
import subprocess
import shutil
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import whisper
model = whisper.load_model("base")
# Load M2M-100 model & tokenizer
m2m_model_name = "facebook/m2m100_418M"
tokenizer = M2M100Tokenizer.from_pretrained(m2m_model_name)
translator_model = M2M100ForConditionalGeneration.from_pretrained(m2m_model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
translator_model.to(device)
TRANSLATION_LANGUAGES = {
"English (No Translation)": "en",
"Urdu": "ur",
"French": "fr",
"Spanish": "es",
"German": "de",
"Chinese": "zh",
"Arabic": "ar",
"Hindi": "hi"
}
def translate_text_m2m(text_list, target_lang):
"""Translates a list of English texts into the target language using M2M-100."""
if target_lang == "en":
return text_list # No translation needed
tokenizer.src_lang = "en"
inputs = tokenizer(text_list, return_tensors="pt", padding=True, truncation=True).to(device)
outputs = translator_model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(target_lang))
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
def generate_translated_subtitles(video_path, target_language):
"""Extracts audio, transcribes it with Whisper, translates subtitles, and saves an SRT file."""
video = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path)
# Transcribe with Whisper
result = model.transcribe(audio_path, language="en")
os.remove(audio_path)
texts = [segment['text'] for segment in result['segments']]
translated_texts = translate_text_m2m(texts, TRANSLATION_LANGUAGES[target_language])
srt_filename = f"subtitles_{TRANSLATION_LANGUAGES.get(target_language, 'en')}.srt"
# UTF-8 encoding
with open(srt_filename, "w", encoding="utf-8-sig") as srt_file:
for index, (segment, translated_text) in enumerate(zip(result['segments'], translated_texts)):
start_time, end_time = segment['start'], segment['end']
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
milliseconds = int((seconds - int(seconds)) * 1000)
return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
srt_file.write(f"{index + 1}\n")
srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
srt_file.write(f"{translated_text}\n\n")
return srt_filename
def burn_subtitles_on_video(video_path, srt_path):
"""Uses ffmpeg to burn subtitles into the video."""
new_video_path = "input_video.mp4"
new_srt_path = "subtitles.srt"
output_video = "video_with_subtitles.mp4"
shutil.copy(video_path, new_video_path)
shutil.copy(srt_path, new_srt_path)
command = [
"ffmpeg",
"-y",
"-i", "input_video.mp4",
"-vf", "subtitles=subtitles.srt:force_style='Fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf,Fontsize=24,PrimaryColour=&HFFFFFF&'",
"-c:a", "copy",
"video_with_subtitles.mp4"
]
try:
result = subprocess.run(command, check=True, capture_output=True, text=True)
print("FFmpeg Output:", result.stdout)
print("FFmpeg Error:", result.stderr)
return output_video
except subprocess.CalledProcessError as e:
print("FFmpeg Error:", e.stderr)
return None
def video_to_translated_subtitles(video, target_language, output_type):
"""Processes video: generates subtitles, translates (if needed), burns subtitles, and returns files."""
srt_filename = generate_translated_subtitles(video, target_language)
if output_type == "SRT File":
return srt_filename, srt_filename
burned_video = burn_subtitles_on_video(video, srt_filename)
return burned_video, burned_video
iface = gr.Interface(
fn=video_to_translated_subtitles,
inputs=[
gr.Video(label="Upload English Video"),
gr.Dropdown(choices=list(TRANSLATION_LANGUAGES.keys()), label="Translate to", value="English (No Translation)"),
gr.Radio(["SRT File", "Burned-in Subtitles"], label="Select Output Type"),
],
outputs=[
gr.File(label="Output"),
gr.DownloadButton(label="Download File")
],
title="Video to Subtitles (With Translation)",
description="Upload an English video, and get subtitles in your desired language"
)
iface.launch(share=True) |