Spaces:
Running
Running
File size: 5,321 Bytes
46ebbe7 980892f ef9a67d d97d093 b292d46 f5712db ef9a67d b292d46 ef9a67d d97d093 46ebbe7 e01627b d97d093 b292d46 d97d093 b292d46 d97d093 ef9a67d b292d46 ef9a67d f5712db ef9a67d b292d46 ef9a67d b292d46 ef9a67d b292d46 ef9a67d b292d46 0a869f5 ef9a67d b292d46 ef9a67d 1247156 ef9a67d 0a869f5 b292d46 ef9a67d b292d46 ef9a67d b292d46 f5712db b292d46 ef9a67d b292d46 46ebbe7 b292d46 2d3b613 b292d46 ef9a67d b292d46 d816888 b292d46 ef9a67d d816888 980892f ef9a67d b292d46 d816888 b292d46 2d3b613 46ebbe7 2d3b613 46ebbe7 ef9a67d b292d46 ef9a67d 980892f b292d46 ef9a67d b292d46 ef9a67d 980892f b292d46 d816888 f5712db b292d46 f5712db 2d3b613 b292d46 ef9a67d 0a869f5 2d3b613 b292d46 ef9a67d 2d3b613 b292d46 d816888 b292d46 f5712db d816888 b292d46 d816888 b292d46 2d3b613 b292d46 46ebbe7 ef9a67d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
from faster_whisper import WhisperModel
# ๐ค Hindi Script Fix
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
# ===============================
# 1. Whisper Model (Lazy Load)
# ===============================
model = None
def load_model():
global model
if model is None:
print("๐ฅ Loading Whisper Model...")
model = WhisperModel("base", device="cpu", compute_type="int8")
print("โ
Model Loaded")
return model
# ===============================
# 2. FFmpeg Path
# ===============================
def get_ffmpeg():
return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
# ===============================
# 3. Video โ Audio
# ===============================
def extract_audio(video_path):
audio_path = "uploaded_audio.wav"
if os.path.exists(audio_path):
os.remove(audio_path)
cmd = [
get_ffmpeg(),
"-i", video_path,
"-vn",
"-ac", "1",
"-ar", "16000",
audio_path,
"-y"
]
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
# ===============================
# 4. Download Audio from URL
# ===============================
def download_audio_from_url(url):
output = "url_audio"
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": output,
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "wav",
}],
"quiet": True,
"nocheckcertificate": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return "url_audio.wav"
# ===============================
# 5. Hindi Script Normalizer
# ===============================
def normalize_script(text, lang):
if lang == "hi":
try:
return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
except:
return text
return text
# ===============================
# 6. Main Transcribe Logic
# ===============================
def transcribe_media(url_input, file_input, language_choice):
try:
audio_path = None
# ---------- FILE ----------
if file_input:
ext = os.path.splitext(file_input)[1].lower()
if ext in [".mp3", ".wav", ".m4a"]:
audio_path = file_input
else:
audio_path = extract_audio(file_input)
# ---------- URL ----------
elif url_input and url_input.strip():
audio_path = download_audio_from_url(url_input)
else:
return "โ ๏ธ Please paste a link or upload a file."
if not os.path.exists(audio_path):
return "โ Audio processing failed."
model = load_model()
# Language handling
language = None if language_choice == "Auto Detect" else language_choice
segments, info = model.transcribe(
audio_path,
beam_size=1,
vad_filter=True,
language=language
)
detected_lang = info.language
raw_text = " ".join(seg.text for seg in segments)
final_text = normalize_script(raw_text, detected_lang)
return f"๐ Detected Language: {detected_lang}\n\n{final_text.strip()}"
except Exception as e:
return f"โ Error: {str(e)}"
# ===============================
# 7. UI
# ===============================
css = """
.container {max-width: 900px; margin: auto;}
.gr-button-primary {
background: linear-gradient(90deg,#667eea,#764ba2);
border: none;
color: white;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
with gr.Column(elem_classes="container"):
gr.Markdown("## ๐ Universal Transcript Tool")
gr.Markdown(
"Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
"Hindi output is always **Devanagari** ๐ฎ๐ณ"
)
with gr.Tabs():
with gr.TabItem("๐ Paste Link"):
url_in = gr.Textbox(label="Video URL")
btn_url = gr.Button("๐ง Transcribe Link", variant="primary")
with gr.TabItem("๐ Upload File"):
file_in = gr.File(
label="Upload Video / Audio",
file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
)
btn_file = gr.Button("๐ Transcribe File", variant="primary")
# ๐ Language Selector
language_selector = gr.Dropdown(
choices=[
"Auto Detect",
"hi", # Hindi (Devanagari)
"ur", # Urdu
"en", # English
"ar",
"fr",
"de",
"es",
"ru",
"ja",
"zh"
],
value="Auto Detect",
label="๐ Select Transcript Language"
)
output = gr.Code(label="Transcript Output", lines=15)
btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
demo.launch() |