File size: 2,747 Bytes
33f0766 2102ae8 6adf5a9 3b102fc 6adf5a9 a0182fe 45c12a4 e37e472 3b102fc 33f0766 3b102fc 2102ae8 9648db0 2102ae8 9648db0 0096536 86cb813 e313a56 163d70f 94523af 2b18034 163d70f 9648db0 33f0766 3b102fc 2102ae8 9648db0 33f0766 2102ae8 5b74b20 33f0766 2102ae8 9648db0 2102ae8 33f0766 9648db0 2102ae8 9648db0 c675e00 9a5eb7a 33f0766 9a5eb7a 2102ae8 3b102fc 2102ae8 a0182fe 2102ae8 33f0766 3b102fc 33f0766 2102ae8 3b102fc ac10614 9a5eb7a 33f0766 9a5eb7a c675e00 2102ae8 c675e00 a0182fe 2102ae8 a0182fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# app.py — Your original working version + repetition_penalty=1.2 + ngram=3
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
import gradio as gr
import spaces
from transformers import pipeline
import torch
import gc
# ——————————————————————————————
# ZeroGPU worker – model loaded inside
# ——————————————————————————————
@spaces.GPU(duration=180)
def transcribe_3min(audio_path):
if not audio_path:
return "Hlaðið upp hljóðskrá"
pipe = pipeline(
"automatic-speech-recognition",
#model="palli23/whisper-tiny-icelandic-distilled-v3",
#model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
#model = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
#model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
#model="palli23/whisper-tiny-samromur-spjallromur",
model="palli23/whisper-small-sam_spjall",
torch_dtype=torch.float16,
device=0, # GPU inside @spaces.GPU
)
result = pipe(
audio_path,
chunk_length_s=30,
batch_size=8,
return_timestamps=False, # ← no timestamps, as you want
generate_kwargs={
"num_beams": 5, #var beam size 1
"repetition_penalty": 1.2, # ← exactly what you asked for
"no_repeat_ngram_size": 3, # ← exactly what you asked for
"temperature": 0.0,
}
)
# Clean memory so ZeroGPU lives forever
del pipe
gc.collect()
torch.cuda.empty_cache()
return result["text"]
# ——————————————————————————————
# UI – clean and simple
# ——————————————————————————————
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt ASR – 3 mínútur")
gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð")
gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
btn = gr.Button("Transcribe", variant="primary", size="lg")
output = gr.Textbox(lines=25, label="Útskrift")
btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
# ——————————————————————————————
# Public launch
# ——————————————————————————————
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
auth=None
) |