Offex's picture
Update app.py
ef9a67d verified
raw
history blame
4.38 kB
import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
# ===============================
# 1. Whisper Model (Lazy Load)
# ===============================
model = None
def load_model():
global model
if model is None:
print("πŸ“₯ Loading Whisper Model...")
model = WhisperModel("base", device="cpu", compute_type="int8")
print("βœ… Model Loaded")
return model
# ===============================
# 2. FFmpeg Path
# ===============================
def get_ffmpeg_path():
path = shutil.which("ffmpeg")
return path if path else "/usr/bin/ffmpeg"
# ===============================
# 3. Convert Video β†’ Audio
# ===============================
def extract_audio(video_path):
audio_path = "uploaded_audio.wav"
if os.path.exists(audio_path):
os.remove(audio_path)
cmd = [
get_ffmpeg_path(),
"-i", video_path,
"-vn",
"-ac", "1",
"-ar", "16000",
audio_path,
"-y"
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
# ===============================
# 4. Download Audio from ANY URL
# ===============================
def download_audio_from_url(url):
output = "url_audio.%(ext)s"
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": output,
"ffmpeg_location": os.path.dirname(get_ffmpeg_path()),
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
"preferredquality": "192",
}],
"quiet": True,
"nocheckcertificate": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return "url_audio.wav"
# ===============================
# 5. Main Transcribe Logic
# ===============================
def transcribe_media(url_input, file_input):
try:
audio_path = None
# ---------- FILE UPLOAD ----------
if file_input:
ext = os.path.splitext(file_input)[1].lower()
if ext in [".mp3", ".wav", ".m4a"]:
audio_path = file_input
else:
audio_path = extract_audio(file_input)
# ---------- URL ----------
elif url_input and url_input.strip():
audio_path = download_audio_from_url(url_input)
else:
return "⚠️ Please paste a link or upload a file."
if not os.path.exists(audio_path):
return "❌ Audio processing failed."
model = load_model()
segments, _ = model.transcribe(
audio_path,
beam_size=1,
vad_filter=True
)
text = " ".join(seg.text for seg in segments)
return text.strip() if text else "⚠️ No speech detected."
except Exception as e:
return f"❌ Error: {str(e)}"
# ===============================
# 6. UI
# ===============================
css = """
.container {max-width: 900px; margin: auto;}
.gr-button-primary {
background: linear-gradient(90deg,#ff416c,#ff4b2b);
border: none;
color: white;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
with gr.Column(elem_classes="container"):
gr.Markdown("## πŸš€ Universal Video Transcript Tool")
gr.Markdown(
"Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
"**OR** upload video/audio file."
)
with gr.Tabs():
with gr.TabItem("πŸ”— Paste Link"):
url_in = gr.Textbox(
label="Video URL",
placeholder="https://youtube.com / tiktok.com / instagram.com"
)
btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
with gr.TabItem("πŸ“‚ Upload File"):
file_in = gr.File(
label="Upload Video / Audio",
file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
)
btn_file = gr.Button("πŸ“‚ Transcribe File", variant="primary")
output = gr.Code(label="Transcript Output", language="markdown", lines=15)
btn_url.click(transcribe_media, [url_in, gr.State(None)], output)
btn_file.click(transcribe_media, [gr.State(None), file_in], output)
demo.launch()