import gradio as gr import subprocess import os from moviepy.editor import VideoFileClip import whisper import tempfile import shutil import yt_dlp # Load Whisper model model = whisper.load_model("small") def process_video(video): """ Handles: - Gradio uploads - n8n binary uploads (multipart/form-data) - YouTube or web URLs """ try: temp_dir = tempfile.mkdtemp() # Handle video input from various sources if isinstance(video, str) and video.startswith(("http://", "https://")): # 🎥 If it's a YouTube link or direct URL, download it video_path = os.path.join(temp_dir, "downloaded_video.mp4") ydl_opts = { "format": "bestvideo+bestaudio/best", "outtmpl": video_path, "quiet": True, "merge_output_format": "mp4" } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([video]) elif isinstance(video, dict) and "name" in video: # 📦 n8n file input (dict) video_path = video["name"] elif hasattr(video, "name"): # 📁 Uploaded from Gradio UI video_path = os.path.join(temp_dir, os.path.basename(video.name)) shutil.copy(video.name, video_path) elif isinstance(video, str) and os.path.exists(video): # 📍 Local file path video_path = video else: raise ValueError("Invalid video input type") # 🎧 Extract audio video_clip = VideoFileClip(video_path) audio_path = os.path.join(temp_dir, "audio.wav") video_clip.audio.write_audiofile(audio_path, codec="pcm_s16le") # 🗣 Transcribe using Whisper result = model.transcribe(audio_path, language="en") transcript = result["text"] # 💬 Save transcript to SRT srt_path = os.path.join(temp_dir, "transcript.srt") with open(srt_path, "w", encoding="utf-8") as f: for i, seg in enumerate(result["segments"]): start = seg["start"] end = seg["end"] text = seg["text"].strip() f.write(f"{i+1}\n{format_time(start)} --> {format_time(end)}\n{text}\n\n") # 🔥 Burn subtitles into video output_path = os.path.join(temp_dir, "output_with_subs.mp4") burn_subtitles(video_path, srt_path, output_path) return output_path, transcript except Exception as e: return None, f"❌ Error: {str(e)}" def format_time(seconds): hrs, rem = divmod(seconds, 3600) mins, secs = divmod(rem, 60) millis = int((secs - int(secs)) * 1000) return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{millis:03}" def burn_subtitles(video_path, srt_path, output_path): """Use FFmpeg to burn subtitles permanently into the video.""" cmd = [ "ffmpeg", "-y", "-i", video_path, "-vf", f"subtitles={srt_path}", "-c:a", "copy", output_path ] subprocess.run(cmd, check=True) # 🎛️ Gradio Interface iface = gr.Interface( fn=process_video, inputs=gr.Textbox(label="Enter YouTube URL or Upload Video File"), outputs=[ gr.Video(label="Video with Subtitles"), gr.Textbox(label="Transcript", lines=8) ], title="🎬 Whisper Subtitle Burner", description="Upload a video or paste a YouTube link. This app generates English subtitles using Whisper and burns them into the video." ) iface.launch()