File size: 1,460 Bytes
fd7965b
2bc8f15
f0e9bad
0918b24
c27f348
c871a9c
1170a88
c871a9c
fd7965b
ee7926b
fd7965b
ee7926b
 
 
fd7965b
 
 
ee7926b
 
 
fd7965b
 
 
 
0918b24
ee7926b
1170a88
1b09acf
 
ee7926b
 
fd7965b
 
1b09acf
 
845e97f
fd7965b
 
 
ee7926b
fd7965b
ee7926b
fd7965b
b348bed
ee7926b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# app.py – Virkar fullkomlega á ZeroGPU + venjulegum GPU Spaces
import os
import gradio as gr
import spaces
from transformers import pipeline

MODEL_NAME = "palli23/whisper-small-sam_spjall"

# Látum mótið hlaðast einu sinni við ræsingu – ÞETTA ER MIKILVÆGT
print("Hleð Whisper módelinu einu sinni...")

pipe = pipeline(
    "automatic-speech-recognition",
    model=MODEL_NAME,
    torch_dtype="auto",          # fínt fyrir bæði fp16 og fp32
    device="cuda" if os.getenv("SYSTEM") == "spaces" else "cpu",  # ZeroGPU skynjar CUDA sjálfkrafa
    model_kwargs={"attn_implementation": "sdpa"},  # hraðari á nýjum GPU-um
    token=os.getenv("HF_TOKEN")
)

print("Módel tilbúið – allt klárt!")

@spaces.GPU(duration=120)
def transcribe_audio(audio_path):
    if not audio_path:
        return "Hladdu upp hljóðskrá fyrst"
    
    result = pipe(
        audio_path,
        chunk_length_s=30,
        batch_size=8,
        generate_kwargs={"language": "is", "task": "transcribe"},
        return_timestamps=False
    )
    return result["text"]

with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt Whisper – mjög lágt WER – 30 sek–5 mín hljóð")
    audio_in = gr.Audio(type="filepath", label="Hladdu upp mp3/wav (allt að 5 mín)")
    btn = gr.Button("Transcribe", variant="primary")
    output = gr.Textbox(lines=25, label="Útskrift")

    btn.click(transcribe_audio, inputs=audio_in, outputs=output)

demo.launch()