File size: 2,413 Bytes
e37e472
2bc8f15
e37e472
 
3325b0c
f0e9bad
0918b24
c27f348
c871a9c
e37e472
 
 
2055ff2
c871a9c
e37e472
 
 
3325b0c
 
e37e472
 
 
3325b0c
e37e472
 
 
 
 
 
 
 
 
 
38b1c82
ea1ab79
 
ac10614
3325b0c
e37e472
ea1ab79
 
 
6161422
3325b0c
e37e472
 
 
ea1ab79
3325b0c
e37e472
3325b0c
e37e472
 
 
 
3325b0c
e37e472
 
 
ac10614
ca5b750
e37e472
 
 
3325b0c
 
e37e472
 
3325b0c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# app.py
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

import gradio as gr
import spaces
from transformers import pipeline

# ——————————————————————————————
# Model loaded ONCE at startup (global)
# ——————————————————————————————
MODEL_NAME = "palli23/whisper-large-v3-is-samromur-20-40s-3x-final"

@spaces.GPU(duration=180)
def get_pipe():
    return pipeline(
        "automatic-speech-recognition",
        model=MODEL_NAME,
        torch_dtype="float16",
        device=0,                       # T4 GPU
        token=os.getenv("HF_TOKEN"),    # Remove line if model is public
    )

pipe = get_pipe()  # ← Loaded once when Space starts


# ——————————————————————————————
# Transcription function (super fast now)
# ——————————————————————————————
def transcribe_3min(audio_path):
    if not audio_path:
        return "Hladdu upp hljóðskrá"
    
    result = pipe(
        audio_path,
        chunk_length_s=30,
        stride_length_s=(6, 0),
        batch_size=8,
        return_timestamps=False,
    )
    return result["text"]


# ——————————————————————————————
# Gradio UI
# ——————————————————————————————
with gr.Blocks(title="Íslenskt ASR – 3 mín") as demo:
    gr.Markdown("# Íslenskt ASR – 3 mínútur")
    gr.Markdown("**Whisper · mjög lágur WER · allt að 5 mín hljóð**")
    
    audio_in = gr.Audio(
        type="filepath",
        label="Hladdu upp .mp3 / .wav (max 5 mín)"
    )
    btn = gr.Button("Transcribe", variant="primary", size="lg")
    output = gr.Textbox(lines=30, label="Útskrift")

    btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)


# ——————————————————————————————
# Stable launch (no more infinite Building)
# ——————————————————————————————
demo.launch(
    auth=("beta", "beta2025"),
    ssr_mode=False,
    show_error=True,
    server_name="0.0.0.0",
    server_port=7860,
    quiet=False
)