Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import yt_dlp | |
| import os | |
| import shutil | |
| import subprocess | |
| import tempfile | |
| from faster_whisper import WhisperModel | |
| from indic_transliteration import sanscript | |
| from indic_transliteration.sanscript import transliterate | |
| import torch | |
| # =============================== | |
| # 🔒 GLOBALS & CONFIG | |
| # =============================== | |
| MODEL_CACHE_DIR = "/tmp/qwen_whisper_cache" | |
| os.makedirs(MODEL_CACHE_DIR, exist_ok=True) | |
| # Lazy-loaded model (shared across calls) | |
| _model = None | |
| def load_whisper_model(): | |
| global _model | |
| if _model is None: | |
| print("📥 Loading Whisper 'base' model (CPU/int8)...") | |
| _model = WhisperModel( | |
| "base", | |
| device="cpu", | |
| compute_type="int8", | |
| download_root=MODEL_CACHE_DIR | |
| ) | |
| print("✅ Model loaded.") | |
| return _model | |
| def get_ffmpeg(): | |
| return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" | |
| # =============================== | |
| # 📥 SAFE DOWNLOAD (YouTube, TikTok, etc.) | |
| # =============================== | |
| def download_video(url): | |
| video_path = os.path.join(tempfile.gettempdir(), "downloaded_video.mp4") | |
| if os.path.exists(video_path): | |
| os.remove(video_path) | |
| ydl_opts = { | |
| "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", | |
| "outtmpl": video_path, | |
| "quiet": True, | |
| "nocheckcertificate": True, | |
| "noplaylist": True, "extract_audio": False, | |
| "retries": 10, | |
| "fragment_retries": 10, | |
| } | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| info = ydl.extract_info(url, download=True) | |
| # Ensure file exists | |
| if not os.path.exists(video_path): | |
| raise FileNotFoundError("Download failed: no file created") | |
| return video_path, info.get("title", "Untitled") | |
| except Exception as e: | |
| raise RuntimeError(f"Download failed: {str(e)}") | |
| # =============================== | |
| # 🎧 EXTRACT AUDIO (robust) | |
| # =============================== | |
| def extract_audio(video_path): | |
| audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav") | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| cmd = [ | |
| get_ffmpeg(), | |
| "-y", | |
| "-i", video_path, | |
| "-vn", | |
| "-ac", "1", | |
| "-ar", "16000", | |
| "-c:a", "pcm_s16le", | |
| audio_path | |
| ] | |
| try: | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"FFmpeg failed: {result.stderr}") | |
| if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000: | |
| raise RuntimeError("Audio extraction produced empty/invalid file") | |
| return audio_path | |
| except subprocess.TimeoutExpired: | |
| raise RuntimeError("Audio extraction timed out (>60s)") | |
| # =============================== | |
| # 🌐 LANGUAGE-AWARE TRANSLITERATION & NORMALIZATION | |
| # =============================== | |
| def normalize_to_hindi(text): | |
| """Convert any script to Devanagari + clean up""" | |
| if not text.strip(): return "" | |
| # Step 1: Transliterate non-Devanagari scripts to Devanagari | |
| try: | |
| # Try Arabic → Devanagari (for Urdu) | |
| text = transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) | |
| # Try Roman → Devanagari (for Hindi/English mixed) | |
| text = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI) | |
| except Exception: | |
| pass # fallback to raw text | |
| # Step 2: Clean punctuation & spacing | |
| import re | |
| text = re.sub(r'[^\u0900-\u097F\u0020\u002E\u002C\u003F\u0021\u003B\u003A\u002D\u0028\u0029]', '', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| text = re.sub(r'\.\s*\.', '.', text) # fix .. → . | |
| text = re.sub(r'\?\s*\?', '?', text) | |
| text = re.sub(r'!\s*!', '!', text) | |
| # Step 3: Add proper full stops at end if missing | |
| if text and text[-1] not in "।.!?": | |
| text += "।" | |
| return text | |
| # =============================== | |
| # 🎯 CORE TRANSCRIBE FUNCTION (ALWAYS OUTPUT HINDI) | |
| # =============================== | |
| def transcribe_to_hindi(url=None, file=None, lang_choice="Auto Detect"): | |
| try: | |
| # ======== INPUT HANDLING ======== | |
| if file: | |
| ext = os.path.splitext(file)[1].lower() | |
| if ext in [".mp3", ".wav", ".m4a", ".ogg"]: | |
| audio_path = file | |
| title = os.path.basename(file) | |
| else: | |
| video_path = file | |
| audio_path = extract_audio(video_path) | |
| title = os.path.basename(video_path) | |
| elif url: | |
| video_path, title = download_video(url) | |
| audio_path = extract_audio(video_path) | |
| else: | |
| return "⚠️ Please paste a URL or upload a file." | |
| # Safety check | |
| if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000: | |
| return "❌ Audio file too small or missing. Try again." | |
| # ======== TRANSCRIPTION ======== | |
| model = load_whisper_model() | |
| segments, info = model.transcribe( | |
| audio_path, | |
| beam_size=5, | |
| best_of=3, | |
| patience=1.0, | |
| temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), | |
| vad_filter=True, | |
| word_timestamps=False, | |
| language=None # Auto-detect | |
| ) | |
| raw_text = " ".join([seg.text for seg in segments]).strip() | |
| # ======== FORCE HINDI OUTPUT ======== | |
| # Even if detected language is en/ur/tam, convert to Hindi script | |
| final_text = normalize_to_hindi(raw_text) | |
| # Optional: Add title & metadata | |
| header = f"🎬 {title[:50]}{'...' if len(title) > 50 else ''}\n" | |
| header += f"🌍 Detected: {info.language or 'Unknown'} → 🇮🇳 Output: Hindi (Devanagari)\n\n" | |
| return header + final_text | |
| except Exception as e: | |
| err_msg = str(e).lower() | |
| if "instagram" in err_msg: | |
| return ( | |
| "❌ Instagram URLs are blocked on Hugging Face.\n\n" | |
| "✅ Solution: Download the video manually (e.g., via online downloader), then upload it here." | |
| ) | |
| elif "timeout" in err_msg or "network" in err_msg: | |
| return "⚠️ Network timeout. Try again or upload file directly." | |
| else: | |
| return f"❌ Error: {str(e)[:200]}..." | |
| # =============================== | |
| # 🎨 MODERN UI (HUGGING FACE OPTIMIZED) | |
| # =============================== | |
| CSS = """ | |
| /* Glassmorphism + Dark Gradient */ | |
| body { | |
| background: radial-gradient(circle at top, #0c1445, #1a2a6c, #2c3e50); | |
| font-family: 'Inter', system-ui, sans-serif; | |
| } | |
| .glass-card { | |
| background: rgba(255, 255, 255, 0.07); | |
| backdrop-filter:); | |
| border-radius: 20px; padding: 28px; | |
| box-shadow: 0 12px 32px rgba(0, 0, 0, 0.4); | |
| border: 1px solid rgba(255, 255, 255, 0.1); | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%); | |
| border: none; | |
| color: white; | |
| font-weight: 600; | |
| padding: 12px 24px; | |
| border-radius: 12px; | |
| transition: all 0.3s ease; | |
| } | |
| .gr-button-primary:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 6px 15px rgba(37, 117252, 0.4); | |
| } | |
| .gr-input, .gr-textarea, .gr-dropdown { | |
| background: rgba(255, 255, 255, 0.08) !important; | |
| color: #e0e0ff !important; | |
| border: 1px solid rgba(255, 255, 255, 0.15) !important; | |
| border-radius: 10px; | |
| } | |
| .gr-markdown p, .gr-markdown h2 { | |
| color: #f0f4ff !important; | |
| } | |
| footer { display: none !important; } | |
| .title { | |
| font-size: 2.2rem; | |
| font-weight: 800; | |
| background: linear-gradient(90deg, #ffd700, #ff8c00); | |
| -webkit-background-clip: text; | |
| background-clip: text; | |
| color: transparent; | |
| margin-bottom: 12px; | |
| } | |
| .subtitle { | |
| color: #a0d2eb; | |
| font-size: 1.1rem; | |
| margin-bottom: 24px; | |
| } | |
| .feature-badge { | |
| display: inline-block; | |
| background: rgba(106, 17, 203, 0.3); | |
| color: #ffd700; | |
| padding: 3px 10; | |
| border-radius: 20px; | |
| font-size: 0.85rem; | |
| margin: 0 4px; | |
| }""" | |
| with gr.Blocks( | |
| css=CSS, | |
| theme=gr.themes.Default( | |
| primary_hue=gr.themes.Color(c100="#6a11cb", c200="#2575fc", c300="#1a5fb4"), | |
| secondary_hue=gr.themes.Color(c100="#ff9e00", c200="#ff7b00"), | |
| neutral_hue=gr.themes.Color(c100="#1e293b", c200="#0f172a"), | |
| ), | |
| title="🗣️ AI Hindi Transcript Studio", | |
| ) as demo: | |
| with gr.Column(elem_classes=["glass-card"]): | |
| gr.HTML("<div class='title'>AI Hindi Transcript Studio</div>") | |
| gr.HTML("<div class='subtitle'>Upload or paste any video → Get clean Devanagari Hindi transcript instantly</div>") | |
| gr.Markdown( | |
| "✨ Supports: YouTube, TikTok, Facebook, Twitter/X, Instagram (via upload), local files<br>" | |
| "⚡ Zero ffprobe errors • Auto-script conversion • Real-time cleanup" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("🔗 URL"): | |
| url_input = gr.Textbox( | |
| label="🎥 Video URL", | |
| placeholder="https://youtu.be/...", | |
| info="Instagram? Upload file instead (HF restriction)" | |
| ) | |
| btn_url = gr.Button("🔊 Transcribe to Hindi", variant="primary", size="lg") | |
| with gr.TabItem("📂 File"): | |
| file_input = gr.File( | |
| label="📁 Upload Video/Audio", | |
| file_types=["video", "audio"], | |
| info="MP4, MOV, MP3, WAV, M4A, etc." | |
| ) | |
| btn_file = gr.Button("📖 Convert to Hindi", variant="primary", size="lg") | |
| lang_dummy = gr.Dropdown( | |
| choices=["Auto (→ Hindi)"], | |
| value="Auto (→ Hindi)", | |
| interactive=False, | |
| visible=False | |
| ) # Hidden — we force Hindi output | |
| output_box = gr.Textbox( | |
| label="📝 Hindi Transcript (Devanagari)", | |
| lines=16, | |
| max_lines=25, | |
| show_copy_button=True, | |
| interactive=False, elem_classes=["gr-textarea"] | |
| ) | |
| gr.Markdown( | |
| "<div style='text-align:center; margin-top:20px; color:#a0d2eb; font-size:0.9rem;'>" | |
| "🚀 Powered by Faster-Whisper + Indic Transliteration | Deployed on Hugging Face Spaces" | |
| "</div>" | |
| ) | |
| # Event bindings | |
| btn_url.click( | |
| fn=transcribe_to_hindi, | |
| inputs=[url_input, gr.State(None), lang_dummy], | |
| outputs=output_box | |
| ) | |
| btn_file.click( | |
| fn=transcribe_to_hindi, | |
| inputs=[gr.State(None), file_input, lang_dummy], | |
| outputs=output_box | |
| ) | |
| # Optional: Enable queue for HF Spaces | |
| demo.queue(concurrency_count=2, max_size=10) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |