import gradio as gr import yt_dlp import os import shutil import subprocess from faster_whisper import WhisperModel # 🔤 Hindi Script Fix from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate # =============================== # 1. Whisper Model (Lazy Load) # =============================== model = None def load_model(): global model if model is None: print("📥 Loading Whisper Model...") model = WhisperModel("base", device="cpu", compute_type="int8") print("✅ Model Loaded") return model # =============================== # 2. FFmpeg Path # =============================== def get_ffmpeg(): return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" # =============================== # 3. Video → Audio # =============================== def extract_audio(video_path): audio_path = "uploaded_audio.wav" if os.path.exists(audio_path): os.remove(audio_path) cmd = [ get_ffmpeg(), "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio_path, "-y" ] subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return audio_path # =============================== # 4. Download Audio from URL # =============================== def download_audio_from_url(url): output = "url_audio" ydl_opts = { "format": "bestaudio/best", "outtmpl": output, "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "wav", }], "quiet": True, "nocheckcertificate": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return "url_audio.wav" # =============================== # 5. Hindi Script Normalizer # =============================== def normalize_script(text, lang): if lang == "hi": try: return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) except: return text return text # =============================== # 6. Main Transcribe Logic # =============================== def transcribe_media(url_input, file_input, language_choice): try: audio_path = None # ---------- FILE ---------- if file_input: ext = os.path.splitext(file_input)[1].lower() if ext in [".mp3", ".wav", ".m4a"]: audio_path = file_input else: audio_path = extract_audio(file_input) # ---------- URL ---------- elif url_input and url_input.strip(): audio_path = download_audio_from_url(url_input) else: return "⚠️ Please paste a link or upload a file." if not os.path.exists(audio_path): return "❌ Audio processing failed." model = load_model() # Language handling language = None if language_choice == "Auto Detect" else language_choice segments, info = model.transcribe( audio_path, beam_size=1, vad_filter=True, language=language ) detected_lang = info.language raw_text = " ".join(seg.text for seg in segments) final_text = normalize_script(raw_text, detected_lang) return f"🌍 Detected Language: {detected_lang}\n\n{final_text.strip()}" except Exception as e: return f"❌ Error: {str(e)}" # =============================== # 7. UI # =============================== css = """ .container {max-width: 900px; margin: auto;} .gr-button-primary { background: linear-gradient(90deg,#667eea,#764ba2); border: none; color: white; } """ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: with gr.Column(elem_classes="container"): gr.Markdown("## 🚀 Universal Transcript Tool") gr.Markdown( "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n" "Hindi output is always **Devanagari** 🇮🇳" ) with gr.Tabs(): with gr.TabItem("🔗 Paste Link"): url_in = gr.Textbox(label="Video URL") btn_url = gr.Button("🎧 Transcribe Link", variant="primary") with gr.TabItem("📂 Upload File"): file_in = gr.File( label="Upload Video / Audio", file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] ) btn_file = gr.Button("📂 Transcribe File", variant="primary") # 🌍 Language Selector language_selector = gr.Dropdown( choices=[ "Auto Detect", "hi", # Hindi (Devanagari) "ur", # Urdu "en", # English "ar", "fr", "de", "es", "ru", "ja", "zh" ], value="Auto Detect", label="🌍 Select Transcript Language" ) output = gr.Code(label="Transcript Output", lines=15) btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output) btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output) demo.launch()