# app.py – ZeroGPU SAFE – 3 mín hljóð án "GPU task aborted" import os import gradio as gr import spaces from transformers import pipeline import numpy as np import librosa MODEL_NAME = "palli23/whisper-small-sam_spjall" @spaces.GPU(duration=60) # ← MEST 60 sek – ZeroGPU leyfir def transcribe_safe(audio_path): if not audio_path: return "Hladdu upp hljóðskrá" # Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt) audio, sr = librosa.load(audio_path, sr=16000) chunk_len = 16000 * 20 # 20 sek stride = 16000 * 2 # 2 sek overlap chunks = [] for i in range(0, len(audio), chunk_len - stride): chunk = audio[i:i + chunk_len] if len(chunk) < 16000: # undir 1 sek → hætta break chunks.append(chunk) # Hlaða ASR á GPU (cached) pipe = pipeline( "automatic-speech-recognition", model=MODEL_NAME, device=0, token=os.getenv("HF_TOKEN") ) full_text = "" for idx, chunk in enumerate(chunks): result = pipe(chunk, batch_size=8) full_text += result["text"] + " " return full_text.strip() or "Ekkert heyrt" # Gradio – fallegt og tilbúið fyrir 3 mín with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo: gr.Markdown("# Íslenskt ASR – 3 mín hljóð") gr.Markdown("**~4 % WER · 25–45 sek · ZeroGPU (PRO)**") audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)") btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg") out = gr.Textbox(lines=30, label="Útskrift") btn.click(transcribe_safe, inputs=audio, outputs=out) demo.launch(auth=("beta", "beta2025"))