File size: 1,765 Bytes
faa307f
2bc8f15
f0e9bad
0918b24
c27f348
c871a9c
1170a88
c871a9c
faa307f
9d663d7
 
 
 
 
86ce37e
9d663d7
 
 
faa307f
86ce37e
 
1d313ab
 
ca5b750
86ce37e
ca5b750
4f32c1f
86ce37e
 
faa307f
ca5b750
86ce37e
 
faa307f
ca5b750
faa307f
86ce37e
faa307f
 
 
 
 
 
 
 
 
ca5b750
 
86ce37e
 
 
ca5b750
 
faa307f
86ce37e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# app.py – VIRKAR Á ÖLLUM Spaces (jafnvel gömlum Gradio)
import os
import gradio as gr
import spaces
from transformers import pipeline

MODEL_NAME = "palli23/whisper-small-sam_spjall"

print("Hleð Whisper módelinu einu sinni...")

pipe = pipeline(
    "automatic-speech-recognition",
    model=MODEL_NAME,
    torch_dtype="auto",
    device="cuda",
    token=os.getenv("HF_TOKEN")
)

# Fix fyrir gamlar Whisper útgáfur
if not hasattr(pipe.model.generation_config, "lang_to_id") or pipe.model.generation_config.lang_to_id is None:
    pipe.model.generation_config.lang_to_id = {"is": 50259}
    pipe.model.generation_config.task_to_id = {"transcribe": 50359, "translate": 50358}
    pipe.model.generation_config.forced_decoder_ids = None

print("Módel tilbúið!")

@spaces.GPU(duration=180)
def transcribe_single(audio_path):
    if not audio_path:
        return None, "Hladdu upp hljóðskrá fyrst", "00:00"

    result = pipe(audio_path, chunk_length_s=30, batch_size=8)
    text = result["text"].strip()
    return audio_path, text, None  # Slekkur á timer þegar búið

with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt Whisper – Mjög lágt WER")
    gr.Markdown("Hladdu upp einni skrá (allt að 5 mín) → Transcribe")

    audio_in = gr.Audio(label="Hljóðskrá", type="filepath")  # Virkar á öllum Gradio útgáfum
    btn       = gr.Button("Transcribe", variant="primary", size="lg")

    # Einfaldur timer án label/active/visible (virkar á Gradio 3.x)
    timer     = gr.Timer(value=180)  

    output    = gr.Textbox(label="Útskrift", lines=20)

    btn.click(
        transcribe_single,
        inputs=audio_in,
        outputs=[audio_in, output, timer]
    )

# Login: beta / beta2025
demo.launch(auth=("beta", "beta2025"))