KalpTranscript / app.py
Kalp97's picture
Upload app.py
af4bef5 verified
import gradio as gr
import whisper
import yt_dlp
import os
import tempfile
models = {}
def load_model(model_name):
if model_name not in models:
models[model_name] = whisper.load_model(model_name)
return models[model_name]
def format_time(seconds):
m = int(seconds // 60)
s = int(seconds % 60)
ms = int((seconds % 1) * 10)
return f"{m:02d}:{s:02d}.{ms}"
# Simple Devanagari to Roman fallback map
DEVA_MAP = {
'अ':'a','आ':'aa','इ':'i','ई':'ii','उ':'u','ऊ':'uu','ए':'e','ऐ':'ai',
'ओ':'o','औ':'au','क':'k','ख':'kh','ग':'g','घ':'gh','च':'ch','छ':'chh',
'ज':'j','झ':'jh','ट':'t','ड':'d','त':'t','थ':'th','द':'d','ध':'dh',
'न':'n','प':'p','फ':'ph','ब':'b','भ':'bh','म':'m','य':'y','र':'r',
'ल':'l','व':'v','श':'sh','ष':'sh','स':'s','ह':'h','ं':'n','ः':'h',
'ा':'a','ि':'i','ी':'i','ु':'u','ू':'u','े':'e','ै':'ai','ो':'o',
'ौ':'au','्':'','ळ':'l','क्ष':'ksh','ज्ञ':'gya','ड़':'r','ढ़':'rh',
'ऑ':'o','ऍ':'e','ॉ':'o','।':'.','॥':'.','ऋ':'ri','ॠ':'ri',
'ग़':'g','ज़':'z','फ़':'f','ड़':'r','ढ़':'rh','ञ':'n','ण':'n','ङ':'n',
}
def devanagari_to_roman(text):
result = []
for ch in text:
result.append(DEVA_MAP.get(ch, ch))
return ''.join(result)
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
*, *::before, *::after { box-sizing: border-box; }
body, .gradio-container {
background: #0a0a0a !important;
font-family: 'Geist', sans-serif !important;
color: #ededed !important;
}
.gradio-container {
max-width: 1080px !important;
margin: 0 auto !important;
padding: 0 !important;
}
/* NAV / Header */
.prose {
padding: 0 40px !important;
height: 56px !important;
display: flex !important;
align-items: center !important;
justify-content: space-between !important;
border-bottom: 1px solid #1a1a1a !important;
margin-bottom: 0 !important;
}
.prose h1 {
font-family: 'Geist', sans-serif !important;
font-size: 13px !important;
font-weight: 600 !important;
color: #ededed !important;
letter-spacing: -0.02em !important;
line-height: 1 !important;
margin: 0 !important;
}
.prose h1 em {
font-family: 'Instrument Serif', serif !important;
font-style: italic !important;
font-weight: 400 !important;
color: #58B8FF !important;
font-size: 14px !important;
}
.prose p {
font-size: 10px !important;
color: #2a2a2a !important;
letter-spacing: 0.14em !important;
margin: 0 !important;
}
/* Layout */
.contain, .gap { background: transparent !important; border: none !important; }
.block {
background: #0f0f0f !important;
border: 1px solid #1a1a1a !important;
border-radius: 10px !important;
}
.block label > span, label > span {
font-family: 'Geist', sans-serif !important;
font-size: 10px !important;
font-weight: 500 !important;
color: #333 !important;
text-transform: uppercase !important;
letter-spacing: 0.16em !important;
}
/* File upload */
[data-testid="file"], .file {
background: #0a0a0a !important;
border: 1px dashed #1a2d3a !important;
border-radius: 12px !important;
min-height: 160px !important;
transition: all 0.2s !important;
}
[data-testid="file"]:hover {
border-color: #3066BE !important;
background: #060d18 !important;
}
/* Dropdowns */
.wrap-inner, select {
background: #0a0a0a !important;
border: 1px solid #1a1a1a !important;
border-radius: 8px !important;
color: #ededed !important;
font-family: 'Geist', sans-serif !important;
font-size: 12px !important;
}
/* Radio */
input[type="radio"] { accent-color: #3066BE !important; }
input[type="checkbox"] { accent-color: #3066BE !important; }
/* Textarea */
textarea {
background: transparent !important;
color: #c8c8c8 !important;
font-family: 'Geist', sans-serif !important;
font-size: 14px !important;
line-height: 1.9 !important;
font-weight: 300 !important;
border: none !important;
}
textarea::placeholder { color: #1a1a1a !important; font-style: italic !important; }
/* Primary button */
button.primary {
background: #ededed !important;
border: none !important;
border-radius: 8px !important;
color: #000 !important;
font-family: 'Geist', sans-serif !important;
font-size: 12px !important;
font-weight: 600 !important;
letter-spacing: 0.04em !important;
padding: 12px 28px !important;
transition: all 0.18s ease !important;
width: 100% !important;
}
button.primary:hover {
background: #58B8FF !important;
color: #000 !important;
}
/* Secondary button */
button.secondary {
background: transparent !important;
border: 1px solid #1a1a1a !important;
border-radius: 8px !important;
color: #333 !important;
font-family: 'Geist', sans-serif !important;
font-size: 12px !important;
font-weight: 600 !important;
letter-spacing: 0.04em !important;
padding: 12px 28px !important;
transition: all 0.18s ease !important;
width: 100% !important;
}
button.secondary:hover {
border-color: #3066BE !important;
color: #58B8FF !important;
}
/* Tabs */
.tab-nav { border-bottom: 1px solid #141414 !important; }
.tab-nav button {
font-family: 'Geist', sans-serif !important;
font-size: 11px !important;
font-weight: 500 !important;
letter-spacing: 0.12em !important;
text-transform: uppercase !important;
color: #333 !important;
background: transparent !important;
border: none !important;
border-bottom: 1.5px solid transparent !important;
padding: 12px 20px !important;
transition: all 0.15s !important;
}
.tab-nav button.selected {
color: #ededed !important;
border-bottom-color: #3066BE !important;
}
/* Progress bar */
.progress-bar { background: #3066BE !important; }
.progress-bar-wrap { background: #111 !important; border-radius: 0 !important; }
/* Scrollbar */
::-webkit-scrollbar { width: 2px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: #1a2030; }
footer { display: none !important; }
"""
LANGUAGES = [
"Auto Detect", "English", "Hinglish (Roman)", "Hindi", "Spanish", "French",
"German", "Italian", "Portuguese", "Chinese", "Japanese",
"Korean", "Arabic", "Russian", "Dutch", "Turkish"
]
MODEL_INFO = {
"tiny": "Fastest — best for short clips",
"base": "Fast — good everyday accuracy",
"small": "Balanced — recommended",
"medium": "Best accuracy — slower processing"
}
with gr.Blocks(title="Kalp Transcript — Kalpi Edition") as demo:
gr.Markdown("""
# Kalp *Transcript*
by Kalpi Edition
""")
with gr.Row():
with gr.Column(scale=5):
file_input = gr.File(
label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
)
with gr.Row():
model_choice = gr.Dropdown(
choices=[
"tiny — Fastest",
"base — Fast",
"small — Balanced",
"medium — Best accuracy",
"large-v3 — Most accurate (very slow)"
],
value="tiny — Fastest",
label="Model"
)
language = gr.Dropdown(
choices=LANGUAGES,
value="Auto Detect",
label="Language"
)
with gr.Row():
translate = gr.Dropdown(
choices=["Off", "Translate to English"],
value="Off",
label="Translate"
)
timestamps = gr.Checkbox(
label="Show timestamps",
value=False
)
gr.Markdown("<div style='height:4px'></div>")
submit_btn = gr.Button("Transcribe →", variant="primary")
clear_btn = gr.ClearButton(value="Clear", variant="secondary")
with gr.Column(scale=6):
with gr.Tabs():
with gr.Tab("Transcript"):
output = gr.Textbox(
label="",
lines=18,
placeholder="Your transcript will appear here..."
)
with gr.Tab("Download .txt"):
plain_output = gr.Textbox(label="", lines=12, visible=False)
gr.Markdown("<div style='height:6px'></div>")
download_btn = gr.Button("Save transcript", variant="secondary")
download_file = gr.File(label="")
def transcribe(file, model_name, language, show_timestamps, translate):
if file is None:
return "⚠️ Please upload a file first.", ""
model = load_model(model_name)
lang = None if language == "Auto Detect" else language
task = "translate" if translate == "Translate to English" else "transcribe"
# Handle Hinglish — transcribe in Hindi then romanize output
if language == "Hinglish (Roman)":
lang = "hi"
result = model.transcribe(file.name, language=lang, task=task)
for seg in result["segments"]:
seg["text"] = devanagari_to_roman(seg["text"])
result["text"] = devanagari_to_roman(result["text"])
else:
result = model.transcribe(file.name, language=lang, task=task)
if show_timestamps:
lines = []
for seg in result["segments"]:
start = format_time(seg["start"])
end = format_time(seg["end"])
lines.append(f"[{start}{end}] {seg['text'].strip()}")
transcript = "\n".join(lines)
else:
transcript = result["text"].strip()
return transcript, transcript
def run(file, model_raw, language, timestamps, translate):
model_name = model_raw.split()[0].strip()
return transcribe(file, model_name, language, timestamps, translate)
submit_btn.click(
fn=run,
inputs=[file_input, model_choice, language, timestamps, translate],
outputs=[output, plain_output]
)
if __name__ == "__main__":
demo.launch(css=custom_css)