Spaces:
Running
Running
| import gradio as gr | |
| import yt_dlp | |
| import os | |
| import shutil | |
| import subprocess | |
| from faster_whisper import WhisperModel | |
| # ๐ค Hindi Script Fix | |
| from indic_transliteration import sanscript | |
| from indic_transliteration.sanscript import transliterate | |
| # =============================== | |
| # 1. Whisper Model (Lazy Load) | |
| # =============================== | |
| model = None | |
| def load_model(): | |
| global model | |
| if model is None: | |
| print("๐ฅ Loading Whisper Model...") | |
| model = WhisperModel("base", device="cpu", compute_type="int8") | |
| print("โ Model Loaded") | |
| return model | |
| # =============================== | |
| # 2. FFmpeg Path | |
| # =============================== | |
| def get_ffmpeg(): | |
| return shutil.which("ffmpeg") or "/usr/bin/ffmpeg" | |
| # =============================== | |
| # 3. Video โ Audio | |
| # =============================== | |
| def extract_audio(video_path): | |
| audio_path = "uploaded_audio.wav" | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| cmd = [ | |
| get_ffmpeg(), | |
| "-i", video_path, | |
| "-vn", | |
| "-ac", "1", | |
| "-ar", "16000", | |
| audio_path, | |
| "-y" | |
| ] | |
| subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| return audio_path | |
| # =============================== | |
| # 4. Download Audio from URL | |
| # =============================== | |
| def download_audio_from_url(url): | |
| output = "url_audio" | |
| ydl_opts = { | |
| "format": "bestaudio/best", | |
| "outtmpl": output, | |
| "postprocessors": [{ | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "wav", | |
| }], | |
| "quiet": True, | |
| "nocheckcertificate": True, | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return "url_audio.wav" | |
| # =============================== | |
| # 5. Hindi Script Normalizer | |
| # =============================== | |
| def normalize_script(text, lang): | |
| if lang == "hi": | |
| try: | |
| return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI) | |
| except: | |
| return text | |
| return text | |
| # =============================== | |
| # 6. Main Transcribe Logic | |
| # =============================== | |
| def transcribe_media(url_input, file_input, language_choice): | |
| try: | |
| audio_path = None | |
| # ---------- FILE ---------- | |
| if file_input: | |
| ext = os.path.splitext(file_input)[1].lower() | |
| if ext in [".mp3", ".wav", ".m4a"]: | |
| audio_path = file_input | |
| else: | |
| audio_path = extract_audio(file_input) | |
| # ---------- URL ---------- | |
| elif url_input and url_input.strip(): | |
| audio_path = download_audio_from_url(url_input) | |
| else: | |
| return "โ ๏ธ Please paste a link or upload a file." | |
| if not os.path.exists(audio_path): | |
| return "โ Audio processing failed." | |
| model = load_model() | |
| # Language handling | |
| language = None if language_choice == "Auto Detect" else language_choice | |
| segments, info = model.transcribe( | |
| audio_path, | |
| beam_size=1, | |
| vad_filter=True, | |
| language=language | |
| ) | |
| detected_lang = info.language | |
| raw_text = " ".join(seg.text for seg in segments) | |
| final_text = normalize_script(raw_text, detected_lang) | |
| return f"๐ Detected Language: {detected_lang}\n\n{final_text.strip()}" | |
| except Exception as e: | |
| return f"โ Error: {str(e)}" | |
| # =============================== | |
| # 7. UI | |
| # =============================== | |
| css = """ | |
| .container {max-width: 900px; margin: auto;} | |
| .gr-button-primary { | |
| background: linear-gradient(90deg,#667eea,#764ba2); | |
| border: none; | |
| color: white; | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("## ๐ Universal Transcript Tool") | |
| gr.Markdown( | |
| "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n" | |
| "Hindi output is always **Devanagari** ๐ฎ๐ณ" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("๐ Paste Link"): | |
| url_in = gr.Textbox(label="Video URL") | |
| btn_url = gr.Button("๐ง Transcribe Link", variant="primary") | |
| with gr.TabItem("๐ Upload File"): | |
| file_in = gr.File( | |
| label="Upload Video / Audio", | |
| file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"] | |
| ) | |
| btn_file = gr.Button("๐ Transcribe File", variant="primary") | |
| # ๐ Language Selector | |
| language_selector = gr.Dropdown( | |
| choices=[ | |
| "Auto Detect", | |
| "hi", # Hindi (Devanagari) | |
| "ur", # Urdu | |
| "en", # English | |
| "ar", | |
| "fr", | |
| "de", | |
| "es", | |
| "ru", | |
| "ja", | |
| "zh" | |
| ], | |
| value="Auto Detect", | |
| label="๐ Select Transcript Language" | |
| ) | |
| output = gr.Code(label="Transcript Output", lines=15) | |
| btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output) | |
| btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output) | |
| demo.launch() |