File size: 1,460 Bytes
fd7965b 2bc8f15 f0e9bad 0918b24 c27f348 c871a9c 1170a88 c871a9c fd7965b ee7926b fd7965b ee7926b fd7965b ee7926b fd7965b 0918b24 ee7926b 1170a88 1b09acf ee7926b fd7965b 1b09acf 845e97f fd7965b ee7926b fd7965b ee7926b fd7965b b348bed ee7926b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | # app.py – Virkar fullkomlega á ZeroGPU + venjulegum GPU Spaces
import os
import gradio as gr
import spaces
from transformers import pipeline
MODEL_NAME = "palli23/whisper-small-sam_spjall"
# Látum mótið hlaðast einu sinni við ræsingu – ÞETTA ER MIKILVÆGT
print("Hleð Whisper módelinu einu sinni...")
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
torch_dtype="auto", # fínt fyrir bæði fp16 og fp32
device="cuda" if os.getenv("SYSTEM") == "spaces" else "cpu", # ZeroGPU skynjar CUDA sjálfkrafa
model_kwargs={"attn_implementation": "sdpa"}, # hraðari á nýjum GPU-um
token=os.getenv("HF_TOKEN")
)
print("Módel tilbúið – allt klárt!")
@spaces.GPU(duration=120)
def transcribe_audio(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá fyrst"
result = pipe(
audio_path,
chunk_length_s=30,
batch_size=8,
generate_kwargs={"language": "is", "task": "transcribe"},
return_timestamps=False
)
return result["text"]
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt Whisper – mjög lágt WER – 30 sek–5 mín hljóð")
audio_in = gr.Audio(type="filepath", label="Hladdu upp mp3/wav (allt að 5 mín)")
btn = gr.Button("Transcribe", variant="primary")
output = gr.Textbox(lines=25, label="Útskrift")
btn.click(transcribe_audio, inputs=audio_in, outputs=output)
demo.launch() |