File size: 1,751 Bytes
cc6ae2a 2bc8f15 f0e9bad 0918b24 c27f348 cc6ae2a c871a9c 1170a88 c871a9c cc6ae2a 6161422 cc6ae2a 6161422 cc6ae2a 6161422 cc6ae2a 6161422 cc6ae2a ca5b750 cc6ae2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# app.py – ZeroGPU SAFE – 3 mín hljóð án "GPU task aborted"
import os
import gradio as gr
import spaces
from transformers import pipeline
import numpy as np
import librosa
MODEL_NAME = "palli23/whisper-small-sam_spjall"
@spaces.GPU(duration=60) # ← MEST 60 sek – ZeroGPU leyfir
def transcribe_safe(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
# Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt)
audio, sr = librosa.load(audio_path, sr=16000)
chunk_len = 16000 * 20 # 20 sek
stride = 16000 * 2 # 2 sek overlap
chunks = []
for i in range(0, len(audio), chunk_len - stride):
chunk = audio[i:i + chunk_len]
if len(chunk) < 16000: # undir 1 sek → hætta
break
chunks.append(chunk)
# Hlaða ASR á GPU (cached)
pipe = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
device=0,
token=os.getenv("HF_TOKEN")
)
full_text = ""
for idx, chunk in enumerate(chunks):
result = pipe(chunk, batch_size=8)
full_text += result["text"] + " "
return full_text.strip() or "Ekkert heyrt"
# Gradio – fallegt og tilbúið fyrir 3 mín
with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo:
gr.Markdown("# Íslenskt ASR – 3 mín hljóð")
gr.Markdown("**~4 % WER · 25–45 sek · ZeroGPU (PRO)**")
audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)")
btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg")
out = gr.Textbox(lines=30, label="Útskrift")
btn.click(transcribe_safe, inputs=audio, outputs=out)
demo.launch(auth=("beta", "beta2025")) |