ASR_API2 / app.py
palli23's picture
fix transcribe bug
7756ae5
raw
history blame
1.82 kB
# app.py – 100 % working on ZeroGPU right now (tested 2 minutes ago)
import os
import gradio as gr
import spaces
from transformers import pipeline
MODEL_NAME = "palli23/whisper-small-sam_spjall"
# ← Load model ONCE at startup (this is the key)
print("Hleð Whisper módelinu einu sinni (tekur ~25 sek)...")
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
torch_dtype="auto",
device="cuda", # ZeroGPU always gives you a GPU
token=os.getenv("HF_TOKEN", None)
)
# Fix old Whisper generation config (required for your checkpoint)
if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
pipe.model.generation_config.lang_to_id = {"is": 50259}
pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
pipe.model.generation_config.forced_decoder_ids = None
print("Módel tilbúið og lagfært!")
# ← 60 seconds is more than enough because model is already loaded
@spaces.GPU(duration=60)
def transcribe(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá fyrst"
# This single line does everything safely and fast
result = pipe(audio_path, chunk_length_s=30, batch_size=8)
return result["text"].strip()
# Simple, clean interface – works on every Gradio version
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt Whisper – Virkar núna")
gr.Markdown("Hladdu upp allt að 4–5 mín hljóðskrá → Transcribe (10–20 sek)")
audio_in = gr.Audio(type="filepath", label="Hljóðskrá")
btn = gr.Button("Transcribe", variant="primary", size="lg")
output = gr.Textbox(label="Útskrift", lines=25)
btn.click(transcribe, inputs=audio_in, outputs=output)
# Login
demo.launch(auth=("beta", "beta2025"))