import gradio as gr import yt_dlp import os import shutil import subprocess from faster_whisper import WhisperModel from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate # =============================== # Whisper Model (lazy load) # =============================== model = None def load_model(): global model if model is None: model = WhisperModel("base", device="cpu", compute_type="int8") return model # =============================== # FFmpeg path # =============================== def get_ffmpeg(): return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" # =============================== # SAFE: Download video only (NO postprocessing) # =============================== def download_video_only(url): video_path = "downloaded_video.mp4" if os.path.exists(video_path): os.remove(video_path) ydl_opts = { "format": "best", "outtmpl": video_path, "quiet": True, "nocheckcertificate": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return video_path # =============================== # SAFE: Extract audio manually (NO ffprobe) # =============================== def extract_audio_safe(video_path): audio_path = "extracted_audio.wav" if os.path.exists(audio_path): os.remove(audio_path) subprocess.run( [ get_ffmpeg(), "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio_path ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) return audio_path # =============================== # Hindi script normalizer # =============================== def normalize_script(text, lang): if lang == "hi": try: return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) except: return text return text # =============================== # Transcription logic (STABLE) # =============================== def transcribe(url, file, lang_choice): try: # -------- FILE MODE -------- if file: ext = os.path.splitext(file)[1].lower() if ext in [".mp3", ".wav", ".m4a"]: audio = file else: audio = extract_audio_safe(file) # -------- URL MODE -------- elif url: video = download_video_only(url) audio = extract_audio_safe(video) else: return "⚠️ Please paste a URL or upload a file." # Safety check if not os.path.exists(audio) or os.path.getsize(audio) < 10000: return "❌ Audio extraction failed. Please try again." model = load_model() language = None if lang_choice == "Auto Detect" else lang_choice segments, info = model.transcribe( audio, beam_size=1, vad_filter=True, language=language ) raw_text = " ".join(s.text for s in segments) final_text = normalize_script(raw_text, info.language) return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}" except Exception as e: if "instagram" in str(e).lower(): return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead." return f"❌ Error: {str(e)}" # =============================== # MODERN UI # =============================== css = """ body { background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364); } .glass { background: rgba(255,255,255,0.08); backdrop-filter: blur(18px); border-radius: 18px; padding: 25px; box-shadow: 0 20px 40px rgba(0,0,0,0.4); } .gr-button-primary { background: linear-gradient(135deg,#00c6ff,#0072ff); border: none; color: white; font-weight: 600; } .gr-input, .gr-textarea { background: rgba(255,255,255,0.12) !important; color: white !important; } h1, h2, label, .markdown-text { color: #ffffff !important; } footer {display:none;} """ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo: with gr.Column(elem_classes="glass"): gr.Markdown("## 🚀 Universal Transcript Tool (STABLE)") gr.Markdown( "✔ YouTube ✔ TikTok ✔ Facebook ✔ Twitter/X\n\n" "⚠️ Instagram URL blocked on Hugging Face → **Upload video instead**\n\n" "**No random ffprobe errors. Ever.**" ) with gr.Tabs(): with gr.TabItem("🔗 Paste Link"): url = gr.Textbox(label="Video URL") btn_url = gr.Button("🎧 Transcribe Link", variant="primary") with gr.TabItem("📂 Upload File"): file = gr.File( label="Upload Video / Audio", file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] ) btn_file = gr.Button("📂 Transcribe File", variant="primary") lang = gr.Dropdown( label="🌍 Transcript Language", choices=[ "Auto Detect", "hi", "ur", "en", "ar", "fr", "de", "es", "ru", "ja", "zh" ], value="Auto Detect" ) output = gr.Code(label="Transcript Output", lines=14) btn_url.click(transcribe, [url, gr.State(None), lang], output) btn_file.click(transcribe, [gr.State(None), file, lang], output) demo.launch()