# app.py — Your original working version + repetition_penalty=1.2 + ngram=3 import os os.environ["OMP_NUM_THREADS"] = "1" os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" import gradio as gr import spaces from transformers import pipeline import torch import gc # —————————————————————————————— # ZeroGPU worker – model loaded inside # —————————————————————————————— @spaces.GPU(duration=180) def transcribe_3min(audio_path): if not audio_path: return "Hlaðið upp hljóðskrá" pipe = pipeline( "automatic-speech-recognition", #model="palli23/whisper-tiny-icelandic-distilled-v3", #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3", #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v5", #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish", #model="palli23/whisper-tiny-samromur-spjallromur", model="palli23/whisper-small-sam_spjall", torch_dtype=torch.float16, device=0, # GPU inside @spaces.GPU ) result = pipe( audio_path, chunk_length_s=30, batch_size=8, return_timestamps=False, # ← no timestamps, as you want generate_kwargs={ "num_beams": 5, #var beam size 1 "repetition_penalty": 1.2, # ← exactly what you asked for "no_repeat_ngram_size": 3, # ← exactly what you asked for "temperature": 0.0, } ) # Clean memory so ZeroGPU lives forever del pipe gc.collect() torch.cuda.empty_cache() return result["text"] # —————————————————————————————— # UI – clean and simple # —————————————————————————————— with gr.Blocks() as demo: gr.Markdown("# Íslenskt ASR – 3 mínútur") gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt að 5 mín hljóð") gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav") btn = gr.Button("Transcribe", variant="primary", size="lg") output = gr.Textbox(lines=25, label="Útskrift") btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output) # —————————————————————————————— # Public launch # —————————————————————————————— demo.launch( share=True, server_name="0.0.0.0", server_port=7860, auth=None )