File size: 1,252 Bytes
2bc8f15
f0e9bad
0918b24
c27f348
c871a9c
1170a88
c871a9c
ea1ab79
 
6161422
cc6ae2a
6161422
ea1ab79
 
 
 
 
 
 
6161422
ea1ab79
 
 
 
 
 
 
cc6ae2a
ea1ab79
6161422
ea1ab79
 
 
 
cc6ae2a
 
ea1ab79
cc6ae2a
 
ea1ab79
ca5b750
cc6ae2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import gradio as gr
import spaces
from transformers import pipeline

MODEL_NAME = "palli23/whisper-small-sam_spjall"

@spaces.GPU(duration=60)   # nóg fyrir 3 mín hljóð
def transcribe_3min(audio_path):
    if not audio_path:
        return "Hladdu upp hljóðskrá"
    
    # Whisper pipeline með chunking – ZeroGPU öruggt
    pipe = pipeline(
        "automatic-speech-recognition",
        model=MODEL_NAME,
        device=0,
        token=os.getenv("HF_TOKEN")
    )
    
    result = pipe(
        audio_path,
        chunk_length_s=30,           # 30 sek chunkar
        stride_length_s=(6, 0),      # 6 sek overlap
        return_timestamps=False,
        batch_size=8
    )
    
    return result["text"]

# Interface
with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
    gr.Markdown("# Íslenskt ASR – 3 mínútur")
    gr.Markdown("**Whisper · Very low WER · 0.5-5minute audio transcribe á ZeroGPU**")
    
    audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
    btn = gr.Button("Transcribe", variant="primary", size="lg")
    out = gr.Textbox(lines=30, label="Útskrift")
    
    btn.click(transcribe_3min, inputs=audio, outputs=out)

demo.launch(auth=("beta", "beta2025"))