Spaces:
Running
Running
File size: 4,378 Bytes
46ebbe7 980892f ef9a67d d97d093 46ebbe7 ef9a67d d97d093 46ebbe7 e01627b d97d093 2f774b2 d97d093 ef9a67d d97d093 ef9a67d 2f774b2 ef9a67d 2d3b613 ef9a67d 0a869f5 ef9a67d 1247156 ef9a67d 0a869f5 46ebbe7 ef9a67d ab99879 ef9a67d 2d3b613 ef9a67d 46ebbe7 ef9a67d 2d3b613 ef9a67d 980892f ef9a67d 2d3b613 46ebbe7 2d3b613 46ebbe7 ef9a67d 980892f 2d3b613 ef9a67d 980892f 2d3b613 ef9a67d 2d3b613 ef9a67d 0a869f5 2d3b613 ef9a67d 2d3b613 ef9a67d 2d3b613 ef9a67d 46ebbe7 ef9a67d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
# ===============================
# 1. Whisper Model (Lazy Load)
# ===============================
model = None
def load_model():
global model
if model is None:
print("π₯ Loading Whisper Model...")
model = WhisperModel("base", device="cpu", compute_type="int8")
print("β
Model Loaded")
return model
# ===============================
# 2. FFmpeg Path
# ===============================
def get_ffmpeg_path():
path = shutil.which("ffmpeg")
return path if path else "/usr/bin/ffmpeg"
# ===============================
# 3. Convert Video β Audio
# ===============================
def extract_audio(video_path):
audio_path = "uploaded_audio.wav"
if os.path.exists(audio_path):
os.remove(audio_path)
cmd = [
get_ffmpeg_path(),
"-i", video_path,
"-vn",
"-ac", "1",
"-ar", "16000",
audio_path,
"-y"
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
# ===============================
# 4. Download Audio from ANY URL
# ===============================
def download_audio_from_url(url):
output = "url_audio.%(ext)s"
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": output,
"ffmpeg_location": os.path.dirname(get_ffmpeg_path()),
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
"preferredquality": "192",
}],
"quiet": True,
"nocheckcertificate": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return "url_audio.wav"
# ===============================
# 5. Main Transcribe Logic
# ===============================
def transcribe_media(url_input, file_input):
try:
audio_path = None
# ---------- FILE UPLOAD ----------
if file_input:
ext = os.path.splitext(file_input)[1].lower()
if ext in [".mp3", ".wav", ".m4a"]:
audio_path = file_input
else:
audio_path = extract_audio(file_input)
# ---------- URL ----------
elif url_input and url_input.strip():
audio_path = download_audio_from_url(url_input)
else:
return "β οΈ Please paste a link or upload a file."
if not os.path.exists(audio_path):
return "β Audio processing failed."
model = load_model()
segments, _ = model.transcribe(
audio_path,
beam_size=1,
vad_filter=True
)
text = " ".join(seg.text for seg in segments)
return text.strip() if text else "β οΈ No speech detected."
except Exception as e:
return f"β Error: {str(e)}"
# ===============================
# 6. UI
# ===============================
css = """
.container {max-width: 900px; margin: auto;}
.gr-button-primary {
background: linear-gradient(90deg,#ff416c,#ff4b2b);
border: none;
color: white;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
with gr.Column(elem_classes="container"):
gr.Markdown("## π Universal Video Transcript Tool")
gr.Markdown(
"Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
"**OR** upload video/audio file."
)
with gr.Tabs():
with gr.TabItem("π Paste Link"):
url_in = gr.Textbox(
label="Video URL",
placeholder="https://youtube.com / tiktok.com / instagram.com"
)
btn_url = gr.Button("π§ Transcribe Link", variant="primary")
with gr.TabItem("π Upload File"):
file_in = gr.File(
label="Upload Video / Audio",
file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
)
btn_file = gr.Button("π Transcribe File", variant="primary")
output = gr.Code(label="Transcript Output", language="markdown", lines=15)
btn_url.click(transcribe_media, [url_in, gr.State(None)], output)
btn_file.click(transcribe_media, [gr.State(None), file_in], output)
demo.launch() |