Spaces:
Sleeping
Sleeping
| # app.py – ZeroGPU SAFE – 3 mín hljóð án "GPU task aborted" | |
| import os | |
| import gradio as gr | |
| import spaces | |
| from transformers import pipeline | |
| import numpy as np | |
| import librosa | |
| MODEL_NAME = "palli23/whisper-small-sam_spjall" | |
| # ← MEST 60 sek – ZeroGPU leyfir | |
| def transcribe_safe(audio_path): | |
| if not audio_path: | |
| return "Hladdu upp hljóðskrá" | |
| # Hlaða hljóð og klippa í 20 sek chunkar (mjög öruggt) | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| chunk_len = 16000 * 20 # 20 sek | |
| stride = 16000 * 2 # 2 sek overlap | |
| chunks = [] | |
| for i in range(0, len(audio), chunk_len - stride): | |
| chunk = audio[i:i + chunk_len] | |
| if len(chunk) < 16000: # undir 1 sek → hætta | |
| break | |
| chunks.append(chunk) | |
| # Hlaða ASR á GPU (cached) | |
| pipe = pipeline( | |
| "automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| device=0, | |
| token=os.getenv("HF_TOKEN") | |
| ) | |
| full_text = "" | |
| for idx, chunk in enumerate(chunks): | |
| result = pipe(chunk, batch_size=8) | |
| full_text += result["text"] + " " | |
| return full_text.strip() or "Ekkert heyrt" | |
| # Gradio – fallegt og tilbúið fyrir 3 mín | |
| with gr.Blocks(title="Íslenskt ASR – 3 mín ZeroGPU") as demo: | |
| gr.Markdown("# Íslenskt ASR – 3 mín hljóð") | |
| gr.Markdown("**~4 % WER · 25–45 sek · ZeroGPU (PRO)**") | |
| audio = gr.Audio(type="filepath", label="Hladdu upp .mp3 / .wav (allt að 3 mín)") | |
| btn = gr.Button("Transcribe (25–45 sek)", variant="primary", size="lg") | |
| out = gr.Textbox(lines=30, label="Útskrift") | |
| btn.click(transcribe_safe, inputs=audio, outputs=out) | |
| demo.launch(auth=("beta", "beta2025")) |