import gradio as gr import whisper import yt_dlp import os import tempfile models = {} def load_model(model_name): if model_name not in models: models[model_name] = whisper.load_model(model_name) return models[model_name] def format_time(seconds): m = int(seconds // 60) s = int(seconds % 60) ms = int((seconds % 1) * 10) return f"{m:02d}:{s:02d}.{ms}" # Simple Devanagari to Roman fallback map DEVA_MAP = { 'अ':'a','आ':'aa','इ':'i','ई':'ii','उ':'u','ऊ':'uu','ए':'e','ऐ':'ai', 'ओ':'o','औ':'au','क':'k','ख':'kh','ग':'g','घ':'gh','च':'ch','छ':'chh', 'ज':'j','झ':'jh','ट':'t','ड':'d','त':'t','थ':'th','द':'d','ध':'dh', 'न':'n','प':'p','फ':'ph','ब':'b','भ':'bh','म':'m','य':'y','र':'r', 'ल':'l','व':'v','श':'sh','ष':'sh','स':'s','ह':'h','ं':'n','ः':'h', 'ा':'a','ि':'i','ी':'i','ु':'u','ू':'u','े':'e','ै':'ai','ो':'o', 'ौ':'au','्':'','ळ':'l','क्ष':'ksh','ज्ञ':'gya','ड़':'r','ढ़':'rh', 'ऑ':'o','ऍ':'e','ॉ':'o','।':'.','॥':'.','ऋ':'ri','ॠ':'ri', 'ग़':'g','ज़':'z','फ़':'f','ड़':'r','ढ़':'rh','ञ':'n','ण':'n','ङ':'n', } def devanagari_to_roman(text): result = [] for ch in text: result.append(DEVA_MAP.get(ch, ch)) return ''.join(result) custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap'); *, *::before, *::after { box-sizing: border-box; } body, .gradio-container { background: #0a0a0a !important; font-family: 'Geist', sans-serif !important; color: #ededed !important; } .gradio-container { max-width: 1080px !important; margin: 0 auto !important; padding: 0 !important; } /* NAV / Header */ .prose { padding: 0 40px !important; height: 56px !important; display: flex !important; align-items: center !important; justify-content: space-between !important; border-bottom: 1px solid #1a1a1a !important; margin-bottom: 0 !important; } .prose h1 { font-family: 'Geist', sans-serif !important; font-size: 13px !important; font-weight: 600 !important; color: #ededed !important; letter-spacing: -0.02em !important; line-height: 1 !important; margin: 0 !important; } .prose h1 em { font-family: 'Instrument Serif', serif !important; font-style: italic !important; font-weight: 400 !important; color: #58B8FF !important; font-size: 14px !important; } .prose p { font-size: 10px !important; color: #2a2a2a !important; letter-spacing: 0.14em !important; margin: 0 !important; } /* Layout */ .contain, .gap { background: transparent !important; border: none !important; } .block { background: #0f0f0f !important; border: 1px solid #1a1a1a !important; border-radius: 10px !important; } .block label > span, label > span { font-family: 'Geist', sans-serif !important; font-size: 10px !important; font-weight: 500 !important; color: #333 !important; text-transform: uppercase !important; letter-spacing: 0.16em !important; } /* File upload */ [data-testid="file"], .file { background: #0a0a0a !important; border: 1px dashed #1a2d3a !important; border-radius: 12px !important; min-height: 160px !important; transition: all 0.2s !important; } [data-testid="file"]:hover { border-color: #3066BE !important; background: #060d18 !important; } /* Dropdowns */ .wrap-inner, select { background: #0a0a0a !important; border: 1px solid #1a1a1a !important; border-radius: 8px !important; color: #ededed !important; font-family: 'Geist', sans-serif !important; font-size: 12px !important; } /* Radio */ input[type="radio"] { accent-color: #3066BE !important; } input[type="checkbox"] { accent-color: #3066BE !important; } /* Textarea */ textarea { background: transparent !important; color: #c8c8c8 !important; font-family: 'Geist', sans-serif !important; font-size: 14px !important; line-height: 1.9 !important; font-weight: 300 !important; border: none !important; } textarea::placeholder { color: #1a1a1a !important; font-style: italic !important; } /* Primary button */ button.primary { background: #ededed !important; border: none !important; border-radius: 8px !important; color: #000 !important; font-family: 'Geist', sans-serif !important; font-size: 12px !important; font-weight: 600 !important; letter-spacing: 0.04em !important; padding: 12px 28px !important; transition: all 0.18s ease !important; width: 100% !important; } button.primary:hover { background: #58B8FF !important; color: #000 !important; } /* Secondary button */ button.secondary { background: transparent !important; border: 1px solid #1a1a1a !important; border-radius: 8px !important; color: #333 !important; font-family: 'Geist', sans-serif !important; font-size: 12px !important; font-weight: 600 !important; letter-spacing: 0.04em !important; padding: 12px 28px !important; transition: all 0.18s ease !important; width: 100% !important; } button.secondary:hover { border-color: #3066BE !important; color: #58B8FF !important; } /* Tabs */ .tab-nav { border-bottom: 1px solid #141414 !important; } .tab-nav button { font-family: 'Geist', sans-serif !important; font-size: 11px !important; font-weight: 500 !important; letter-spacing: 0.12em !important; text-transform: uppercase !important; color: #333 !important; background: transparent !important; border: none !important; border-bottom: 1.5px solid transparent !important; padding: 12px 20px !important; transition: all 0.15s !important; } .tab-nav button.selected { color: #ededed !important; border-bottom-color: #3066BE !important; } /* Progress bar */ .progress-bar { background: #3066BE !important; } .progress-bar-wrap { background: #111 !important; border-radius: 0 !important; } /* Scrollbar */ ::-webkit-scrollbar { width: 2px; } ::-webkit-scrollbar-track { background: transparent; } ::-webkit-scrollbar-thumb { background: #1a2030; } footer { display: none !important; } """ LANGUAGES = [ "Auto Detect", "English", "Hinglish (Roman)", "Hindi", "Spanish", "French", "German", "Italian", "Portuguese", "Chinese", "Japanese", "Korean", "Arabic", "Russian", "Dutch", "Turkish" ] MODEL_INFO = { "tiny": "Fastest — best for short clips", "base": "Fast — good everyday accuracy", "small": "Balanced — recommended", "medium": "Best accuracy — slower processing" } with gr.Blocks(title="Kalp Transcript — Kalpi Edition") as demo: gr.Markdown(""" # Kalp *Transcript* by Kalpi Edition """) with gr.Row(): with gr.Column(scale=5): file_input = gr.File( label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A" ) with gr.Row(): model_choice = gr.Dropdown( choices=[ "tiny — Fastest", "base — Fast", "small — Balanced", "medium — Best accuracy", "large-v3 — Most accurate (very slow)" ], value="tiny — Fastest", label="Model" ) language = gr.Dropdown( choices=LANGUAGES, value="Auto Detect", label="Language" ) with gr.Row(): translate = gr.Dropdown( choices=["Off", "Translate to English"], value="Off", label="Translate" ) timestamps = gr.Checkbox( label="Show timestamps", value=False ) gr.Markdown("
") submit_btn = gr.Button("Transcribe →", variant="primary") clear_btn = gr.ClearButton(value="Clear", variant="secondary") with gr.Column(scale=6): with gr.Tabs(): with gr.Tab("Transcript"): output = gr.Textbox( label="", lines=18, placeholder="Your transcript will appear here..." ) with gr.Tab("Download .txt"): plain_output = gr.Textbox(label="", lines=12, visible=False) gr.Markdown("
") download_btn = gr.Button("Save transcript", variant="secondary") download_file = gr.File(label="") def transcribe(file, model_name, language, show_timestamps, translate): if file is None: return "⚠️ Please upload a file first.", "" model = load_model(model_name) lang = None if language == "Auto Detect" else language task = "translate" if translate == "Translate to English" else "transcribe" # Handle Hinglish — transcribe in Hindi then romanize output if language == "Hinglish (Roman)": lang = "hi" result = model.transcribe(file.name, language=lang, task=task) for seg in result["segments"]: seg["text"] = devanagari_to_roman(seg["text"]) result["text"] = devanagari_to_roman(result["text"]) else: result = model.transcribe(file.name, language=lang, task=task) if show_timestamps: lines = [] for seg in result["segments"]: start = format_time(seg["start"]) end = format_time(seg["end"]) lines.append(f"[{start} → {end}] {seg['text'].strip()}") transcript = "\n".join(lines) else: transcript = result["text"].strip() return transcript, transcript def run(file, model_raw, language, timestamps, translate): model_name = model_raw.split()[0].strip() return transcribe(file, model_name, language, timestamps, translate) submit_btn.click( fn=run, inputs=[file_input, model_choice, language, timestamps, translate], outputs=[output, plain_output] ) if __name__ == "__main__": demo.launch(css=custom_css)