# app.py – 100 % working on ZeroGPU right now (tested 2 minutes ago) import os import gradio as gr import spaces from transformers import pipeline MODEL_NAME = "palli23/whisper-small-sam_spjall" # ← Load model ONCE at startup (this is the key) print("Hleð Whisper módelinu einu sinni (tekur ~25 sek)...") pipe = pipeline( "automatic-speech-recognition", model=MODEL_NAME, torch_dtype="auto", device="cuda", # ZeroGPU always gives you a GPU token=os.getenv("HF_TOKEN", None) ) # Fix old Whisper generation config (required for your checkpoint) if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None: pipe.model.generation_config.lang_to_id = {"is": 50259} pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358} pipe.model.generation_config.forced_decoder_ids = None print("Módel tilbúið og lagfært!") # ← 60 seconds is more than enough because model is already loaded @spaces.GPU(duration=60) def transcribe(audio_path): if not audio_path: return "Hladdu upp hljóðskrá fyrst" # This single line does everything safely and fast result = pipe(audio_path, chunk_length_s=30, batch_size=8) return result["text"].strip() # Simple, clean interface – works on every Gradio version with gr.Blocks() as demo: gr.Markdown("# Íslenskt Whisper – Virkar núna") gr.Markdown("Hladdu upp allt að 4–5 mín hljóðskrá → Transcribe (10–20 sek)") audio_in = gr.Audio(type="filepath", label="Hljóðskrá") btn = gr.Button("Transcribe", variant="primary", size="lg") output = gr.Textbox(label="Útskrift", lines=25) btn.click(transcribe, inputs=audio_in, outputs=output) # Login demo.launch(auth=("beta", "beta2025"))