import gradio as gr import yt_dlp import os import shutil import subprocess from faster_whisper import WhisperModel # =============================== # 1. Whisper Model (Lazy Load) # =============================== model = None def load_model(): global model if model is None: print("📥 Loading Whisper Model...") model = WhisperModel("base", device="cpu", compute_type="int8") print("✅ Model Loaded") return model # =============================== # 2. FFmpeg Path # =============================== def get_ffmpeg_path(): path = shutil.which("ffmpeg") return path if path else "/usr/bin/ffmpeg" # =============================== # 3. Convert Video → Audio # =============================== def extract_audio(video_path): audio_path = "uploaded_audio.wav" if os.path.exists(audio_path): os.remove(audio_path) cmd = [ get_ffmpeg_path(), "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio_path, "-y" ] subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return audio_path # =============================== # 4. Download Audio from ANY URL # =============================== def download_audio_from_url(url): output = "url_audio.%(ext)s" ydl_opts = { "format": "bestaudio/best", "outtmpl": output, "ffmpeg_location": os.path.dirname(get_ffmpeg_path()), "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "wav", "preferredquality": "192", }], "quiet": True, "nocheckcertificate": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return "url_audio.wav" # =============================== # 5. Main Transcribe Logic # =============================== def transcribe_media(url_input, file_input): try: audio_path = None # ---------- FILE UPLOAD ---------- if file_input: ext = os.path.splitext(file_input)[1].lower() if ext in [".mp3", ".wav", ".m4a"]: audio_path = file_input else: audio_path = extract_audio(file_input) # ---------- URL ---------- elif url_input and url_input.strip(): audio_path = download_audio_from_url(url_input) else: return "⚠️ Please paste a link or upload a file." if not os.path.exists(audio_path): return "❌ Audio processing failed." model = load_model() segments, _ = model.transcribe( audio_path, beam_size=1, vad_filter=True ) text = " ".join(seg.text for seg in segments) return text.strip() if text else "⚠️ No speech detected." except Exception as e: return f"❌ Error: {str(e)}" # =============================== # 6. UI # =============================== css = """ .container {max-width: 900px; margin: auto;} .gr-button-primary { background: linear-gradient(90deg,#ff416c,#ff4b2b); border: none; color: white; } """ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: with gr.Column(elem_classes="container"): gr.Markdown("## 🚀 Universal Video Transcript Tool") gr.Markdown( "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n" "**OR** upload video/audio file." ) with gr.Tabs(): with gr.TabItem("🔗 Paste Link"): url_in = gr.Textbox( label="Video URL", placeholder="https://youtube.com / tiktok.com / instagram.com" ) btn_url = gr.Button("🎧 Transcribe Link", variant="primary") with gr.TabItem("📂 Upload File"): file_in = gr.File( label="Upload Video / Audio", file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] ) btn_file = gr.Button("📂 Transcribe File", variant="primary") output = gr.Code(label="Transcript Output", language="markdown", lines=15) btn_url.click(transcribe_media, [url_in, gr.State(None)], output) btn_file.click(transcribe_media, [gr.State(None), file_in], output) demo.launch()