import gradio as gr import yt_dlp import os import shutil import subprocess import tempfile from faster_whisper import WhisperModel from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate import torch # =============================== # 🔒 GLOBALS & CONFIG # =============================== MODEL_CACHE_DIR = "/tmp/qwen_whisper_cache" os.makedirs(MODEL_CACHE_DIR, exist_ok=True) # Lazy-loaded model (shared across calls) _model = None def load_whisper_model(): global _model if _model is None: print("📥 Loading Whisper 'base' model (CPU/int8)...") _model = WhisperModel( "base", device="cpu", compute_type="int8", download_root=MODEL_CACHE_DIR ) print("✅ Model loaded.") return _model def get_ffmpeg(): return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" # =============================== # 📥 SAFE DOWNLOAD (YouTube, TikTok, etc.) # =============================== def download_video(url): video_path = os.path.join(tempfile.gettempdir(), "downloaded_video.mp4") if os.path.exists(video_path): os.remove(video_path) ydl_opts = { "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", "outtmpl": video_path, "quiet": True, "nocheckcertificate": True, "noplaylist": True, "extract_audio": False, "retries": 10, "fragment_retries": 10, } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) # Ensure file exists if not os.path.exists(video_path): raise FileNotFoundError("Download failed: no file created") return video_path, info.get("title", "Untitled") except Exception as e: raise RuntimeError(f"Download failed: {str(e)}") # =============================== # 🎧 EXTRACT AUDIO (robust) # =============================== def extract_audio(video_path): audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav") if os.path.exists(audio_path): os.remove(audio_path) cmd = [ get_ffmpeg(), "-y", "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", audio_path ] try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) if result.returncode != 0: raise RuntimeError(f"FFmpeg failed: {result.stderr}") if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000: raise RuntimeError("Audio extraction produced empty/invalid file") return audio_path except subprocess.TimeoutExpired: raise RuntimeError("Audio extraction timed out (>60s)") # =============================== # 🌐 LANGUAGE-AWARE TRANSLITERATION & NORMALIZATION # =============================== def normalize_to_hindi(text): """Convert any script to Devanagari + clean up""" if not text.strip(): return "" # Step 1: Transliterate non-Devanagari scripts to Devanagari try: # Try Arabic → Devanagari (for Urdu) text = transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) # Try Roman → Devanagari (for Hindi/English mixed) text = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI) except Exception: pass # fallback to raw text # Step 2: Clean punctuation & spacing import re text = re.sub(r'[^\u0900-\u097F\u0020\u002E\u002C\u003F\u0021\u003B\u003A\u002D\u0028\u0029]', '', text) text = re.sub(r'\s+', ' ', text).strip() text = re.sub(r'\.\s*\.', '.', text) # fix .. → . text = re.sub(r'\?\s*\?', '?', text) text = re.sub(r'!\s*!', '!', text) # Step 3: Add proper full stops at end if missing if text and text[-1] not in "।.!?": text += "।" return text # =============================== # 🎯 CORE TRANSCRIBE FUNCTION (ALWAYS OUTPUT HINDI) # =============================== def transcribe_to_hindi(url=None, file=None, lang_choice="Auto Detect"): try: # ======== INPUT HANDLING ======== if file: ext = os.path.splitext(file)[1].lower() if ext in [".mp3", ".wav", ".m4a", ".ogg"]: audio_path = file title = os.path.basename(file) else: video_path = file audio_path = extract_audio(video_path) title = os.path.basename(video_path) elif url: video_path, title = download_video(url) audio_path = extract_audio(video_path) else: return "⚠️ Please paste a URL or upload a file." # Safety check if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000: return "❌ Audio file too small or missing. Try again." # ======== TRANSCRIPTION ======== model = load_whisper_model() segments, info = model.transcribe( audio_path, beam_size=5, best_of=3, patience=1.0, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), vad_filter=True, word_timestamps=False, language=None # Auto-detect ) raw_text = " ".join([seg.text for seg in segments]).strip() # ======== FORCE HINDI OUTPUT ======== # Even if detected language is en/ur/tam, convert to Hindi script final_text = normalize_to_hindi(raw_text) # Optional: Add title & metadata header = f"🎬 {title[:50]}{'...' if len(title) > 50 else ''}\n" header += f"🌍 Detected: {info.language or 'Unknown'} → 🇮🇳 Output: Hindi (Devanagari)\n\n" return header + final_text except Exception as e: err_msg = str(e).lower() if "instagram" in err_msg: return ( "❌ Instagram URLs are blocked on Hugging Face.\n\n" "✅ Solution: Download the video manually (e.g., via online downloader), then upload it here." ) elif "timeout" in err_msg or "network" in err_msg: return "⚠️ Network timeout. Try again or upload file directly." else: return f"❌ Error: {str(e)[:200]}..." # =============================== # 🎨 MODERN UI (HUGGING FACE OPTIMIZED) # =============================== CSS = """ /* Glassmorphism + Dark Gradient */ body { background: radial-gradient(circle at top, #0c1445, #1a2a6c, #2c3e50); font-family: 'Inter', system-ui, sans-serif; } .glass-card { background: rgba(255, 255, 255, 0.07); backdrop-filter:); border-radius: 20px; padding: 28px; box-shadow: 0 12px 32px rgba(0, 0, 0, 0.4); border: 1px solid rgba(255, 255, 255, 0.1); } .gr-button-primary { background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%); border: none; color: white; font-weight: 600; padding: 12px 24px; border-radius: 12px; transition: all 0.3s ease; } .gr-button-primary:hover { transform: translateY(-2px); box-shadow: 0 6px 15px rgba(37, 117252, 0.4); } .gr-input, .gr-textarea, .gr-dropdown { background: rgba(255, 255, 255, 0.08) !important; color: #e0e0ff !important; border: 1px solid rgba(255, 255, 255, 0.15) !important; border-radius: 10px; } .gr-markdown p, .gr-markdown h2 { color: #f0f4ff !important; } footer { display: none !important; } .title { font-size: 2.2rem; font-weight: 800; background: linear-gradient(90deg, #ffd700, #ff8c00); -webkit-background-clip: text; background-clip: text; color: transparent; margin-bottom: 12px; } .subtitle { color: #a0d2eb; font-size: 1.1rem; margin-bottom: 24px; } .feature-badge { display: inline-block; background: rgba(106, 17, 203, 0.3); color: #ffd700; padding: 3px 10; border-radius: 20px; font-size: 0.85rem; margin: 0 4px; }""" with gr.Blocks( css=CSS, theme=gr.themes.Default( primary_hue=gr.themes.Color(c100="#6a11cb", c200="#2575fc", c300="#1a5fb4"), secondary_hue=gr.themes.Color(c100="#ff9e00", c200="#ff7b00"), neutral_hue=gr.themes.Color(c100="#1e293b", c200="#0f172a"), ), title="🗣️ AI Hindi Transcript Studio", ) as demo: with gr.Column(elem_classes=["glass-card"]): gr.HTML("
AI Hindi Transcript Studio
") gr.HTML("
Upload or paste any video → Get clean Devanagari Hindi transcript instantly
") gr.Markdown( "✨ Supports: YouTube, TikTok, Facebook, Twitter/X, Instagram (via upload), local files
" "⚡ Zero ffprobe errors • Auto-script conversion • Real-time cleanup" ) with gr.Tabs(): with gr.TabItem("🔗 URL"): url_input = gr.Textbox( label="🎥 Video URL", placeholder="https://youtu.be/...", info="Instagram? Upload file instead (HF restriction)" ) btn_url = gr.Button("🔊 Transcribe to Hindi", variant="primary", size="lg") with gr.TabItem("📂 File"): file_input = gr.File( label="📁 Upload Video/Audio", file_types=["video", "audio"], info="MP4, MOV, MP3, WAV, M4A, etc." ) btn_file = gr.Button("📖 Convert to Hindi", variant="primary", size="lg") lang_dummy = gr.Dropdown( choices=["Auto (→ Hindi)"], value="Auto (→ Hindi)", interactive=False, visible=False ) # Hidden — we force Hindi output output_box = gr.Textbox( label="📝 Hindi Transcript (Devanagari)", lines=16, max_lines=25, show_copy_button=True, interactive=False, elem_classes=["gr-textarea"] ) gr.Markdown( "
" "🚀 Powered by Faster-Whisper + Indic Transliteration | Deployed on Hugging Face Spaces" "
" ) # Event bindings btn_url.click( fn=transcribe_to_hindi, inputs=[url_input, gr.State(None), lang_dummy], outputs=output_box ) btn_file.click( fn=transcribe_to_hindi, inputs=[gr.State(None), file_input, lang_dummy], outputs=output_box ) # Optional: Enable queue for HF Spaces demo.queue(concurrency_count=2, max_size=10) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)