Spaces:
Running
Running
| import gradio as gr | |
| import yt_dlp | |
| import os | |
| import shutil | |
| import subprocess | |
| from faster_whisper import WhisperModel | |
| from indic_transliteration import sanscript | |
| from indic_transliteration.sanscript import transliterate | |
| # =============================== | |
| # Whisper Model (lazy load) | |
| # =============================== | |
| model = None | |
| def load_model(): | |
| global model | |
| if model is None: | |
| model = WhisperModel("base", device="cpu", compute_type="int8") | |
| return model | |
| # =============================== | |
| # FFmpeg path | |
| # =============================== | |
| def get_ffmpeg(): | |
| return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" | |
| # =============================== | |
| # SAFE: Download video only (NO postprocessing) | |
| # =============================== | |
| def download_video_only(url): | |
| video_path = "downloaded_video.mp4" | |
| if os.path.exists(video_path): | |
| os.remove(video_path) | |
| ydl_opts = { | |
| "format": "best", | |
| "outtmpl": video_path, | |
| "quiet": True, | |
| "nocheckcertificate": True, | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return video_path | |
| # =============================== | |
| # SAFE: Extract audio manually (NO ffprobe) | |
| # =============================== | |
| def extract_audio_safe(video_path): | |
| audio_path = "extracted_audio.wav" | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| subprocess.run( | |
| [ | |
| get_ffmpeg(), | |
| "-y", | |
| "-i", video_path, | |
| "-vn", | |
| "-ac", "1", | |
| "-ar", "16000", | |
| audio_path | |
| ], | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.DEVNULL | |
| ) | |
| return audio_path | |
| # =============================== | |
| # Hindi script normalizer | |
| # =============================== | |
| def normalize_script(text, lang): | |
| if lang == "hi": | |
| try: | |
| return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) | |
| except: | |
| return text | |
| return text | |
| # =============================== | |
| # Transcription logic (STABLE) | |
| # =============================== | |
| def transcribe(url, file, lang_choice): | |
| try: | |
| # -------- FILE MODE -------- | |
| if file: | |
| ext = os.path.splitext(file)[1].lower() | |
| if ext in [".mp3", ".wav", ".m4a"]: | |
| audio = file | |
| else: | |
| audio = extract_audio_safe(file) | |
| # -------- URL MODE -------- | |
| elif url: | |
| video = download_video_only(url) | |
| audio = extract_audio_safe(video) | |
| else: | |
| return "β οΈ Please paste a URL or upload a file." | |
| # Safety check | |
| if not os.path.exists(audio) or os.path.getsize(audio) < 10000: | |
| return "β Audio extraction failed. Please try again." | |
| model = load_model() | |
| language = None if lang_choice == "Auto Detect" else lang_choice | |
| segments, info = model.transcribe( | |
| audio, | |
| beam_size=1, | |
| vad_filter=True, | |
| language=language | |
| ) | |
| raw_text = " ".join(s.text for s in segments) | |
| final_text = normalize_script(raw_text, info.language) | |
| return f"π Detected Language: {info.language}\n\n{final_text.strip()}" | |
| except Exception as e: | |
| if "instagram" in str(e).lower(): | |
| return "β Instagram URL is blocked on Hugging Face. Please upload the video file instead." | |
| return f"β Error: {str(e)}" | |
| # =============================== | |
| # MODERN UI | |
| # =============================== | |
| css = """ | |
| body { | |
| background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364); | |
| } | |
| .glass { | |
| background: rgba(255,255,255,0.08); | |
| backdrop-filter: blur(18px); | |
| border-radius: 18px; | |
| padding: 25px; | |
| box-shadow: 0 20px 40px rgba(0,0,0,0.4); | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(135deg,#00c6ff,#0072ff); | |
| border: none; | |
| color: white; | |
| font-weight: 600; | |
| } | |
| .gr-input, .gr-textarea { | |
| background: rgba(255,255,255,0.12) !important; | |
| color: white !important; | |
| } | |
| h1, h2, label, .markdown-text { | |
| color: #ffffff !important; | |
| } | |
| footer {display:none;} | |
| """ | |
| with gr.Blocks(css=css, theme=gr.themes.Base()) as demo: | |
| with gr.Column(elem_classes="glass"): | |
| gr.Markdown("## π Universal Transcript Tool (STABLE)") | |
| gr.Markdown( | |
| "β YouTube β TikTok β Facebook β Twitter/X\n\n" | |
| "β οΈ Instagram URL blocked on Hugging Face β **Upload video instead**\n\n" | |
| "**No random ffprobe errors. Ever.**" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Paste Link"): | |
| url = gr.Textbox(label="Video URL") | |
| btn_url = gr.Button("π§ Transcribe Link", variant="primary") | |
| with gr.TabItem("π Upload File"): | |
| file = gr.File( | |
| label="Upload Video / Audio", | |
| file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] | |
| ) | |
| btn_file = gr.Button("π Transcribe File", variant="primary") | |
| lang = gr.Dropdown( | |
| label="π Transcript Language", | |
| choices=[ | |
| "Auto Detect", | |
| "hi", | |
| "ur", | |
| "en", | |
| "ar", | |
| "fr", | |
| "de", | |
| "es", | |
| "ru", | |
| "ja", | |
| "zh" | |
| ], | |
| value="Auto Detect" | |
| ) | |
| output = gr.Code(label="Transcript Output", lines=14) | |
| btn_url.click(transcribe, [url, gr.State(None), lang], output) | |
| btn_file.click(transcribe, [gr.State(None), file, lang], output) | |
| demo.launch() |