Offex's picture
Update app.py
fbfb3b5 verified
import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
# ===============================
# Whisper Model (lazy load)
# ===============================
model = None
def load_model():
global model
if model is None:
model = WhisperModel("base", device="cpu", compute_type="int8")
return model
# ===============================
# FFmpeg path
# ===============================
def get_ffmpeg():
return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
# ===============================
# SAFE: Download video only (NO postprocessing)
# ===============================
def download_video_only(url):
video_path = "downloaded_video.mp4"
if os.path.exists(video_path):
os.remove(video_path)
ydl_opts = {
"format": "best",
"outtmpl": video_path,
"quiet": True,
"nocheckcertificate": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return video_path
# ===============================
# SAFE: Extract audio manually (NO ffprobe)
# ===============================
def extract_audio_safe(video_path):
audio_path = "extracted_audio.wav"
if os.path.exists(audio_path):
os.remove(audio_path)
subprocess.run(
[
get_ffmpeg(),
"-y",
"-i", video_path,
"-vn",
"-ac", "1",
"-ar", "16000",
audio_path
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
return audio_path
# ===============================
# Hindi script normalizer
# ===============================
def normalize_script(text, lang):
if lang == "hi":
try:
return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
except:
return text
return text
# ===============================
# Transcription logic (STABLE)
# ===============================
def transcribe(url, file, lang_choice):
try:
# -------- FILE MODE --------
if file:
ext = os.path.splitext(file)[1].lower()
if ext in [".mp3", ".wav", ".m4a"]:
audio = file
else:
audio = extract_audio_safe(file)
# -------- URL MODE --------
elif url:
video = download_video_only(url)
audio = extract_audio_safe(video)
else:
return "⚠️ Please paste a URL or upload a file."
# Safety check
if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
return "❌ Audio extraction failed. Please try again."
model = load_model()
language = None if lang_choice == "Auto Detect" else lang_choice
segments, info = model.transcribe(
audio,
beam_size=1,
vad_filter=True,
language=language
)
raw_text = " ".join(s.text for s in segments)
final_text = normalize_script(raw_text, info.language)
return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}"
except Exception as e:
if "instagram" in str(e).lower():
return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
return f"❌ Error: {str(e)}"
# ===============================
# MODERN UI
# ===============================
css = """
body {
background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
}
.glass {
background: rgba(255,255,255,0.08);
backdrop-filter: blur(18px);
border-radius: 18px;
padding: 25px;
box-shadow: 0 20px 40px rgba(0,0,0,0.4);
}
.gr-button-primary {
background: linear-gradient(135deg,#00c6ff,#0072ff);
border: none;
color: white;
font-weight: 600;
}
.gr-input, .gr-textarea {
background: rgba(255,255,255,0.12) !important;
color: white !important;
}
h1, h2, label, .markdown-text {
color: #ffffff !important;
}
footer {display:none;}
"""
with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
with gr.Column(elem_classes="glass"):
gr.Markdown("## πŸš€ Universal Transcript Tool (STABLE)")
gr.Markdown(
"βœ” YouTube βœ” TikTok βœ” Facebook βœ” Twitter/X\n\n"
"⚠️ Instagram URL blocked on Hugging Face β†’ **Upload video instead**\n\n"
"**No random ffprobe errors. Ever.**"
)
with gr.Tabs():
with gr.TabItem("πŸ”— Paste Link"):
url = gr.Textbox(label="Video URL")
btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
with gr.TabItem("πŸ“‚ Upload File"):
file = gr.File(
label="Upload Video / Audio",
file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
)
btn_file = gr.Button("πŸ“‚ Transcribe File", variant="primary")
lang = gr.Dropdown(
label="🌍 Transcript Language",
choices=[
"Auto Detect",
"hi",
"ur",
"en",
"ar",
"fr",
"de",
"es",
"ru",
"ja",
"zh"
],
value="Auto Detect"
)
output = gr.Code(label="Transcript Output", lines=14)
btn_url.click(transcribe, [url, gr.State(None), lang], output)
btn_file.click(transcribe, [gr.State(None), file, lang], output)
demo.launch()