Spaces:
Running
Running
| import gradio as gr | |
| import yt_dlp | |
| import os | |
| import shutil | |
| import subprocess | |
| from faster_whisper import WhisperModel | |
| # =============================== | |
| # 1. Whisper Model (Lazy Load) | |
| # =============================== | |
| model = None | |
| def load_model(): | |
| global model | |
| if model is None: | |
| print("π₯ Loading Whisper Model...") | |
| model = WhisperModel("base", device="cpu", compute_type="int8") | |
| print("β Model Loaded") | |
| return model | |
| # =============================== | |
| # 2. FFmpeg Path | |
| # =============================== | |
| def get_ffmpeg_path(): | |
| path = shutil.which("ffmpeg") | |
| return path if path else "/usr/bin/ffmpeg" | |
| # =============================== | |
| # 3. Convert Video β Audio | |
| # =============================== | |
| def extract_audio(video_path): | |
| audio_path = "uploaded_audio.wav" | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| cmd = [ | |
| get_ffmpeg_path(), | |
| "-i", video_path, | |
| "-vn", | |
| "-ac", "1", | |
| "-ar", "16000", | |
| audio_path, | |
| "-y" | |
| ] | |
| subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| return audio_path | |
| # =============================== | |
| # 4. Download Audio from ANY URL | |
| # =============================== | |
| def download_audio_from_url(url): | |
| output = "url_audio.%(ext)s" | |
| ydl_opts = { | |
| "format": "bestaudio/best", | |
| "outtmpl": output, | |
| "ffmpeg_location": os.path.dirname(get_ffmpeg_path()), | |
| "postprocessors": [{ | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "wav", | |
| "preferredquality": "192", | |
| }], | |
| "quiet": True, | |
| "nocheckcertificate": True, | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return "url_audio.wav" | |
| # =============================== | |
| # 5. Main Transcribe Logic | |
| # =============================== | |
| def transcribe_media(url_input, file_input): | |
| try: | |
| audio_path = None | |
| # ---------- FILE UPLOAD ---------- | |
| if file_input: | |
| ext = os.path.splitext(file_input)[1].lower() | |
| if ext in [".mp3", ".wav", ".m4a"]: | |
| audio_path = file_input | |
| else: | |
| audio_path = extract_audio(file_input) | |
| # ---------- URL ---------- | |
| elif url_input and url_input.strip(): | |
| audio_path = download_audio_from_url(url_input) | |
| else: | |
| return "β οΈ Please paste a link or upload a file." | |
| if not os.path.exists(audio_path): | |
| return "β Audio processing failed." | |
| model = load_model() | |
| segments, _ = model.transcribe( | |
| audio_path, | |
| beam_size=1, | |
| vad_filter=True | |
| ) | |
| text = " ".join(seg.text for seg in segments) | |
| return text.strip() if text else "β οΈ No speech detected." | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # =============================== | |
| # 6. UI | |
| # =============================== | |
| css = """ | |
| .container {max-width: 900px; margin: auto;} | |
| .gr-button-primary { | |
| background: linear-gradient(90deg,#ff416c,#ff4b2b); | |
| border: none; | |
| color: white; | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("## π Universal Video Transcript Tool") | |
| gr.Markdown( | |
| "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n" | |
| "**OR** upload video/audio file." | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Paste Link"): | |
| url_in = gr.Textbox( | |
| label="Video URL", | |
| placeholder="https://youtube.com / tiktok.com / instagram.com" | |
| ) | |
| btn_url = gr.Button("π§ Transcribe Link", variant="primary") | |
| with gr.TabItem("π Upload File"): | |
| file_in = gr.File( | |
| label="Upload Video / Audio", | |
| file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] | |
| ) | |
| btn_file = gr.Button("π Transcribe File", variant="primary") | |
| output = gr.Code(label="Transcript Output", language="markdown", lines=15) | |
| btn_url.click(transcribe_media, [url_in, gr.State(None)], output) | |
| btn_file.click(transcribe_media, [gr.State(None), file_in], output) | |
| demo.launch() |