File size: 2,747 Bytes
33f0766
2102ae8
6adf5a9
 
3b102fc
6adf5a9
 
 
a0182fe
45c12a4
 
e37e472
3b102fc
33f0766
3b102fc
2102ae8
9648db0
 
 
2102ae8
9648db0
0096536
86cb813
e313a56
163d70f
94523af
2b18034
163d70f
9648db0
33f0766
3b102fc
2102ae8
9648db0
 
 
 
33f0766
2102ae8
5b74b20
33f0766
 
2102ae8
 
9648db0
2102ae8
33f0766
9648db0
 
 
2102ae8
9648db0
c675e00
9a5eb7a
33f0766
9a5eb7a
2102ae8
3b102fc
2102ae8
a0182fe
2102ae8
33f0766
3b102fc
33f0766
2102ae8
3b102fc
ac10614
9a5eb7a
33f0766
9a5eb7a
c675e00
2102ae8
c675e00
a0182fe
2102ae8
a0182fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# app.py — Your original working version + repetition_penalty=1.2 + ngram=3

import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

import gradio as gr
import spaces
from transformers import pipeline
import torch
import gc

# ——————————————————————————————
# ZeroGPU worker – model loaded inside
# ——————————————————————————————
@spaces.GPU(duration=180)
def transcribe_3min(audio_path):
    if not audio_path:
        return "Hlaðið upp hljóðskrá"

    pipe = pipeline(
        "automatic-speech-recognition", 
        #model="palli23/whisper-tiny-icelandic-distilled-v3",
        #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
        #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
        #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
        #model="palli23/whisper-tiny-samromur-spjallromur",
        model="palli23/whisper-small-sam_spjall",
        torch_dtype=torch.float16,
        device=0,  # GPU inside @spaces.GPU
    )

    result = pipe(
        audio_path,
        chunk_length_s=30,
        batch_size=8,
        return_timestamps=False,  # ← no timestamps, as you want
        generate_kwargs={
            "num_beams": 5,  #var beam size 1
            "repetition_penalty": 1.2,     # ← exactly what you asked for
            "no_repeat_ngram_size": 3,     # ← exactly what you asked for
            "temperature": 0.0,
        }
    )

    # Clean memory so ZeroGPU lives forever
    del pipe
    gc.collect()
    torch.cuda.empty_cache()

    return result["text"]

# ——————————————————————————————
# UI – clean and simple
# ——————————————————————————————
with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt ASR – 3 mínútur")
    gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
    gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")

    audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
    btn = gr.Button("Transcribe", variant="primary", size="lg")
    output = gr.Textbox(lines=25, label="Útskrift")

    btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)

# ——————————————————————————————
# Public launch
# ——————————————————————————————
demo.launch(
    share=True,
    server_name="0.0.0.0",
    server_port=7860,
    auth=None
)