File size: 1,252 Bytes
2bc8f15 f0e9bad 0918b24 c27f348 c871a9c 1170a88 c871a9c ea1ab79 6161422 cc6ae2a 6161422 ea1ab79 6161422 ea1ab79 cc6ae2a ea1ab79 6161422 ea1ab79 cc6ae2a ea1ab79 cc6ae2a ea1ab79 ca5b750 cc6ae2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import os
import gradio as gr
import spaces
from transformers import pipeline
MODEL_NAME = "palli23/whisper-small-sam_spjall"
@spaces.GPU(duration=60) # nóg fyrir 3 mín hljóð
def transcribe_3min(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
# Whisper pipeline með chunking – ZeroGPU öruggt
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
device=0,
token=os.getenv("HF_TOKEN")
)
result = pipe(
audio_path,
chunk_length_s=30, # 30 sek chunkar
stride_length_s=(6, 0), # 6 sek overlap
return_timestamps=False,
batch_size=8
)
return result["text"]
# Interface
with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
gr.Markdown("# Íslenskt ASR – 3 mínútur")
gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")
audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
btn = gr.Button("Transcribe", variant="primary", size="lg")
out = gr.Textbox(lines=30, label="Útskrift")
btn.click(transcribe_3min, inputs=audio, outputs=out)
demo.launch(auth=("beta", "beta2025")) |