File size: 2,180 Bytes
c3fbcde 2bc8f15 f0e9bad 0918b24 c27f348 707f539 c871a9c 1170a88 c871a9c cde6c6f 707f539 cde6c6f 707f539 1feadc6 c3fbcde cde6c6f c3fbcde 707f539 c3fbcde cde6c6f cc6ae2a 6161422 cc6ae2a 6161422 1feadc6 cde6c6f cc6ae2a c3fbcde cc6ae2a 707f539 cc6ae2a 6161422 cc6ae2a 707f539 cde6c6f cc6ae2a 6161422 707f539 cc6ae2a 707f539 cc6ae2a 1feadc6 cc6ae2a 1feadc6 ca5b750 cc6ae2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | # app.py – FIXED: no_timestamps_token_id added (no more ValueError)
import os
import gradio as gr
import spaces
from transformers import pipeline
import torch
MODEL_NAME = "palli23/whisper-small-sam_spjall"
print("Loading model once at startup...")
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
torch_dtype=torch.float16,
device=0,
token=os.getenv("HF_TOKEN")
)
# Fix old Whisper config completely (including timestamps token)
if not hasattr(pipe.model.generation_config, "lang_to_id"):
pipe.model.generation_config.lang_to_id = {"is": 50259}
pipe.model.generation_config.task_to_id = {"transcribe": 50359}
pipe.model.generation_config.forced_decoder_ids = None
# ←←← THIS FIXES THE TIMESTAMP ERROR
if not hasattr(pipe.model.generation_config, "no_timestamps_token_id"):
pipe.model.generation_config.no_timestamps_token_id = 50363
pipe.model.generation_config.language = "is"
pipe.model.generation_config.task = "transcribe"
print("Model ready – fully fixed for timestamps!")
@spaces.GPU(duration=120)
def transcribe_safe(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
import librosa
audio, sr = librosa.load(audio_path, sr=16000)
chunk_len = 16000 * 20
stride = 16000 * 2
chunks = []
for i in range(0, len(audio), chunk_len - stride):
chunk = audio[i:i + chunk_len]
if len(chunk) < 16000:
break
chunks.append(chunk)
full_text = ""
for chunk in chunks:
result = pipe(chunk, batch_size=16)
full_text += result["text"] + " "
return full_text.strip() or "Ekkert heyrt"
with gr.Blocks(title="Íslenskt ASR – 3 mín T4 Paid") as demo:
gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
gr.Markdown("**~4 % WER · 15–25 sek · T4 Paid**")
audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
btn = gr.Button("Transcribe (15–25 sek)", variant="primary", size="lg")
out = gr.Textbox(lines=30, label="Útskrift")
btn.click(transcribe_safe, inputs=audio, outputs=out)
demo.launch(auth=("beta", "beta2025")) |