Offex's picture
Update app.py
b292d46 verified
raw
history blame
5.32 kB
import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
# ๐Ÿ”ค Hindi Script Fix
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
# ===============================
# 1. Whisper Model (Lazy Load)
# ===============================
model = None
def load_model():
global model
if model is None:
print("๐Ÿ“ฅ Loading Whisper Model...")
model = WhisperModel("base", device="cpu", compute_type="int8")
print("โœ… Model Loaded")
return model
# ===============================
# 2. FFmpeg Path
# ===============================
def get_ffmpeg():
return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
# ===============================
# 3. Video โ†’ Audio
# ===============================
def extract_audio(video_path):
audio_path = "uploaded_audio.wav"
if os.path.exists(audio_path):
os.remove(audio_path)
cmd = [
get_ffmpeg(),
"-i", video_path,
"-vn",
"-ac", "1",
"-ar", "16000",
audio_path,
"-y"
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
# ===============================
# 4. Download Audio from URL
# ===============================
def download_audio_from_url(url):
output = "url_audio"
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": output,
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
}],
"quiet": True,
"nocheckcertificate": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return "url_audio.wav"
# ===============================
# 5. Hindi Script Normalizer
# ===============================
def normalize_script(text, lang):
if lang == "hi":
try:
return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
except:
return text
return text
# ===============================
# 6. Main Transcribe Logic
# ===============================
def transcribe_media(url_input, file_input, language_choice):
try:
audio_path = None
# ---------- FILE ----------
if file_input:
ext = os.path.splitext(file_input)[1].lower()
if ext in [".mp3", ".wav", ".m4a"]:
audio_path = file_input
else:
audio_path = extract_audio(file_input)
# ---------- URL ----------
elif url_input and url_input.strip():
audio_path = download_audio_from_url(url_input)
else:
return "โš ๏ธ Please paste a link or upload a file."
if not os.path.exists(audio_path):
return "โŒ Audio processing failed."
model = load_model()
# Language handling
language = None if language_choice == "Auto Detect" else language_choice
segments, info = model.transcribe(
audio_path,
beam_size=1,
vad_filter=True,
language=language
)
detected_lang = info.language
raw_text = " ".join(seg.text for seg in segments)
final_text = normalize_script(raw_text, detected_lang)
return f"๐ŸŒ Detected Language: {detected_lang}\n\n{final_text.strip()}"
except Exception as e:
return f"โŒ Error: {str(e)}"
# ===============================
# 7. UI
# ===============================
css = """
.container {max-width: 900px; margin: auto;}
.gr-button-primary {
background: linear-gradient(90deg,#667eea,#764ba2);
border: none;
color: white;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
with gr.Column(elem_classes="container"):
gr.Markdown("## ๐Ÿš€ Universal Transcript Tool")
gr.Markdown(
"Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
"Hindi output is always **Devanagari** ๐Ÿ‡ฎ๐Ÿ‡ณ"
)
with gr.Tabs():
with gr.TabItem("๐Ÿ”— Paste Link"):
url_in = gr.Textbox(label="Video URL")
btn_url = gr.Button("๐ŸŽง Transcribe Link", variant="primary")
with gr.TabItem("๐Ÿ“‚ Upload File"):
file_in = gr.File(
label="Upload Video / Audio",
file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
)
btn_file = gr.Button("๐Ÿ“‚ Transcribe File", variant="primary")
# ๐ŸŒ Language Selector
language_selector = gr.Dropdown(
choices=[
"Auto Detect",
"hi", # Hindi (Devanagari)
"ur", # Urdu
"en", # English
"ar",
"fr",
"de",
"es",
"ru",
"ja",
"zh"
],
value="Auto Detect",
label="๐ŸŒ Select Transcript Language"
)
output = gr.Code(label="Transcript Output", lines=15)
btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
demo.launch()