Spaces:
Running
Running
| import gradio as gr | |
| import whisper | |
| import yt_dlp | |
| import os | |
| import tempfile | |
| models = {} | |
| def load_model(model_name): | |
| if model_name not in models: | |
| models[model_name] = whisper.load_model(model_name) | |
| return models[model_name] | |
| def format_time(seconds): | |
| m = int(seconds // 60) | |
| s = int(seconds % 60) | |
| ms = int((seconds % 1) * 10) | |
| return f"{m:02d}:{s:02d}.{ms}" | |
| # Simple Devanagari to Roman fallback map | |
| DEVA_MAP = { | |
| 'अ':'a','आ':'aa','इ':'i','ई':'ii','उ':'u','ऊ':'uu','ए':'e','ऐ':'ai', | |
| 'ओ':'o','औ':'au','क':'k','ख':'kh','ग':'g','घ':'gh','च':'ch','छ':'chh', | |
| 'ज':'j','झ':'jh','ट':'t','ड':'d','त':'t','थ':'th','द':'d','ध':'dh', | |
| 'न':'n','प':'p','फ':'ph','ब':'b','भ':'bh','म':'m','य':'y','र':'r', | |
| 'ल':'l','व':'v','श':'sh','ष':'sh','स':'s','ह':'h','ं':'n','ः':'h', | |
| 'ा':'a','ि':'i','ी':'i','ु':'u','ू':'u','े':'e','ै':'ai','ो':'o', | |
| 'ौ':'au','्':'','ळ':'l','क्ष':'ksh','ज्ञ':'gya','ड़':'r','ढ़':'rh', | |
| 'ऑ':'o','ऍ':'e','ॉ':'o','।':'.','॥':'.','ऋ':'ri','ॠ':'ri', | |
| 'ग़':'g','ज़':'z','फ़':'f','ड़':'r','ढ़':'rh','ञ':'n','ण':'n','ङ':'n', | |
| } | |
| def devanagari_to_roman(text): | |
| result = [] | |
| for ch in text: | |
| result.append(DEVA_MAP.get(ch, ch)) | |
| return ''.join(result) | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap'); | |
| *, *::before, *::after { box-sizing: border-box; } | |
| body, .gradio-container { | |
| background: #0a0a0a !important; | |
| font-family: 'Geist', sans-serif !important; | |
| color: #ededed !important; | |
| } | |
| .gradio-container { | |
| max-width: 1080px !important; | |
| margin: 0 auto !important; | |
| padding: 0 !important; | |
| } | |
| /* NAV / Header */ | |
| .prose { | |
| padding: 0 40px !important; | |
| height: 56px !important; | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: space-between !important; | |
| border-bottom: 1px solid #1a1a1a !important; | |
| margin-bottom: 0 !important; | |
| } | |
| .prose h1 { | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 13px !important; | |
| font-weight: 600 !important; | |
| color: #ededed !important; | |
| letter-spacing: -0.02em !important; | |
| line-height: 1 !important; | |
| margin: 0 !important; | |
| } | |
| .prose h1 em { | |
| font-family: 'Instrument Serif', serif !important; | |
| font-style: italic !important; | |
| font-weight: 400 !important; | |
| color: #58B8FF !important; | |
| font-size: 14px !important; | |
| } | |
| .prose p { | |
| font-size: 10px !important; | |
| color: #2a2a2a !important; | |
| letter-spacing: 0.14em !important; | |
| margin: 0 !important; | |
| } | |
| /* Layout */ | |
| .contain, .gap { background: transparent !important; border: none !important; } | |
| .block { | |
| background: #0f0f0f !important; | |
| border: 1px solid #1a1a1a !important; | |
| border-radius: 10px !important; | |
| } | |
| .block label > span, label > span { | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 10px !important; | |
| font-weight: 500 !important; | |
| color: #333 !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.16em !important; | |
| } | |
| /* File upload */ | |
| [data-testid="file"], .file { | |
| background: #0a0a0a !important; | |
| border: 1px dashed #1a2d3a !important; | |
| border-radius: 12px !important; | |
| min-height: 160px !important; | |
| transition: all 0.2s !important; | |
| } | |
| [data-testid="file"]:hover { | |
| border-color: #3066BE !important; | |
| background: #060d18 !important; | |
| } | |
| /* Dropdowns */ | |
| .wrap-inner, select { | |
| background: #0a0a0a !important; | |
| border: 1px solid #1a1a1a !important; | |
| border-radius: 8px !important; | |
| color: #ededed !important; | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 12px !important; | |
| } | |
| /* Radio */ | |
| input[type="radio"] { accent-color: #3066BE !important; } | |
| input[type="checkbox"] { accent-color: #3066BE !important; } | |
| /* Textarea */ | |
| textarea { | |
| background: transparent !important; | |
| color: #c8c8c8 !important; | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 14px !important; | |
| line-height: 1.9 !important; | |
| font-weight: 300 !important; | |
| border: none !important; | |
| } | |
| textarea::placeholder { color: #1a1a1a !important; font-style: italic !important; } | |
| /* Primary button */ | |
| button.primary { | |
| background: #ededed !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| color: #000 !important; | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 12px !important; | |
| font-weight: 600 !important; | |
| letter-spacing: 0.04em !important; | |
| padding: 12px 28px !important; | |
| transition: all 0.18s ease !important; | |
| width: 100% !important; | |
| } | |
| button.primary:hover { | |
| background: #58B8FF !important; | |
| color: #000 !important; | |
| } | |
| /* Secondary button */ | |
| button.secondary { | |
| background: transparent !important; | |
| border: 1px solid #1a1a1a !important; | |
| border-radius: 8px !important; | |
| color: #333 !important; | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 12px !important; | |
| font-weight: 600 !important; | |
| letter-spacing: 0.04em !important; | |
| padding: 12px 28px !important; | |
| transition: all 0.18s ease !important; | |
| width: 100% !important; | |
| } | |
| button.secondary:hover { | |
| border-color: #3066BE !important; | |
| color: #58B8FF !important; | |
| } | |
| /* Tabs */ | |
| .tab-nav { border-bottom: 1px solid #141414 !important; } | |
| .tab-nav button { | |
| font-family: 'Geist', sans-serif !important; | |
| font-size: 11px !important; | |
| font-weight: 500 !important; | |
| letter-spacing: 0.12em !important; | |
| text-transform: uppercase !important; | |
| color: #333 !important; | |
| background: transparent !important; | |
| border: none !important; | |
| border-bottom: 1.5px solid transparent !important; | |
| padding: 12px 20px !important; | |
| transition: all 0.15s !important; | |
| } | |
| .tab-nav button.selected { | |
| color: #ededed !important; | |
| border-bottom-color: #3066BE !important; | |
| } | |
| /* Progress bar */ | |
| .progress-bar { background: #3066BE !important; } | |
| .progress-bar-wrap { background: #111 !important; border-radius: 0 !important; } | |
| /* Scrollbar */ | |
| ::-webkit-scrollbar { width: 2px; } | |
| ::-webkit-scrollbar-track { background: transparent; } | |
| ::-webkit-scrollbar-thumb { background: #1a2030; } | |
| footer { display: none !important; } | |
| """ | |
| LANGUAGES = [ | |
| "Auto Detect", "English", "Hinglish (Roman)", "Hindi", "Spanish", "French", | |
| "German", "Italian", "Portuguese", "Chinese", "Japanese", | |
| "Korean", "Arabic", "Russian", "Dutch", "Turkish" | |
| ] | |
| MODEL_INFO = { | |
| "tiny": "Fastest — best for short clips", | |
| "base": "Fast — good everyday accuracy", | |
| "small": "Balanced — recommended", | |
| "medium": "Best accuracy — slower processing" | |
| } | |
| with gr.Blocks(title="Kalp Transcript — Kalpi Edition") as demo: | |
| gr.Markdown(""" | |
| # Kalp *Transcript* | |
| by Kalpi Edition | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| file_input = gr.File( | |
| label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A" | |
| ) | |
| with gr.Row(): | |
| model_choice = gr.Dropdown( | |
| choices=[ | |
| "tiny — Fastest", | |
| "base — Fast", | |
| "small — Balanced", | |
| "medium — Best accuracy", | |
| "large-v3 — Most accurate (very slow)" | |
| ], | |
| value="tiny — Fastest", | |
| label="Model" | |
| ) | |
| language = gr.Dropdown( | |
| choices=LANGUAGES, | |
| value="Auto Detect", | |
| label="Language" | |
| ) | |
| with gr.Row(): | |
| translate = gr.Dropdown( | |
| choices=["Off", "Translate to English"], | |
| value="Off", | |
| label="Translate" | |
| ) | |
| timestamps = gr.Checkbox( | |
| label="Show timestamps", | |
| value=False | |
| ) | |
| gr.Markdown("<div style='height:4px'></div>") | |
| submit_btn = gr.Button("Transcribe →", variant="primary") | |
| clear_btn = gr.ClearButton(value="Clear", variant="secondary") | |
| with gr.Column(scale=6): | |
| with gr.Tabs(): | |
| with gr.Tab("Transcript"): | |
| output = gr.Textbox( | |
| label="", | |
| lines=18, | |
| placeholder="Your transcript will appear here..." | |
| ) | |
| with gr.Tab("Download .txt"): | |
| plain_output = gr.Textbox(label="", lines=12, visible=False) | |
| gr.Markdown("<div style='height:6px'></div>") | |
| download_btn = gr.Button("Save transcript", variant="secondary") | |
| download_file = gr.File(label="") | |
| def transcribe(file, model_name, language, show_timestamps, translate): | |
| if file is None: | |
| return "⚠️ Please upload a file first.", "" | |
| model = load_model(model_name) | |
| lang = None if language == "Auto Detect" else language | |
| task = "translate" if translate == "Translate to English" else "transcribe" | |
| # Handle Hinglish — transcribe in Hindi then romanize output | |
| if language == "Hinglish (Roman)": | |
| lang = "hi" | |
| result = model.transcribe(file.name, language=lang, task=task) | |
| for seg in result["segments"]: | |
| seg["text"] = devanagari_to_roman(seg["text"]) | |
| result["text"] = devanagari_to_roman(result["text"]) | |
| else: | |
| result = model.transcribe(file.name, language=lang, task=task) | |
| if show_timestamps: | |
| lines = [] | |
| for seg in result["segments"]: | |
| start = format_time(seg["start"]) | |
| end = format_time(seg["end"]) | |
| lines.append(f"[{start} → {end}] {seg['text'].strip()}") | |
| transcript = "\n".join(lines) | |
| else: | |
| transcript = result["text"].strip() | |
| return transcript, transcript | |
| def run(file, model_raw, language, timestamps, translate): | |
| model_name = model_raw.split()[0].strip() | |
| return transcribe(file, model_name, language, timestamps, translate) | |
| submit_btn.click( | |
| fn=run, | |
| inputs=[file_input, model_choice, language, timestamps, translate], | |
| outputs=[output, plain_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(css=custom_css) | |