# app.py – FIXED: no_timestamps_token_id added (no more ValueError) import os import gradio as gr import spaces from transformers import pipeline import torch MODEL_NAME = "palli23/whisper-small-sam_spjall" print("Loading model once at startup...") pipe = pipeline( "automatic-speech-recognition", model=MODEL_NAME, torch_dtype=torch.float16, device=0, token=os.getenv("HF_TOKEN") ) # Fix old Whisper config completely (including timestamps token) if not hasattr(pipe.model.generation_config, "lang_to_id"): pipe.model.generation_config.lang_to_id = {"is": 50259} pipe.model.generation_config.task_to_id = {"transcribe": 50359} pipe.model.generation_config.forced_decoder_ids = None # ←←← THIS FIXES THE TIMESTAMP ERROR if not hasattr(pipe.model.generation_config, "no_timestamps_token_id"): pipe.model.generation_config.no_timestamps_token_id = 50363 pipe.model.generation_config.language = "is" pipe.model.generation_config.task = "transcribe" print("Model ready – fully fixed for timestamps!") @spaces.GPU(duration=120) def transcribe_safe(audio_path): if not audio_path: return "Hladdu upp hljóðskrá" import librosa audio, sr = librosa.load(audio_path, sr=16000) chunk_len = 16000 * 20 stride = 16000 * 2 chunks = [] for i in range(0, len(audio), chunk_len - stride): chunk = audio[i:i + chunk_len] if len(chunk) < 16000: break chunks.append(chunk) full_text = "" for chunk in chunks: result = pipe(chunk, batch_size=16) full_text += result["text"] + " " return full_text.strip() or "Ekkert heyrt" with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo: gr.Markdown("# Íslenskt ASR – 3 mín hljóð") gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**") audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)") btn = gr.Button("Transcribe (15–25 sek)", variant="primary", size="lg") out = gr.Textbox(lines=30, label="Útskrift") btn.click(transcribe_safe, inputs=audio, outputs=out) demo.launch(auth=("beta", "beta2025"))