# app.py – Mælendagreining VIRKAR á ZeroGPU (2025 fix) import os import gradio as gr import spaces from transformers import pipeline from pyannote.audio import Pipeline import tempfile MODEL_NAME = "palli23/whisper-small-sam_spjall" @spaces.GPU(duration=120) def transcribe_with_diarization(audio_path): if not audio_path: return "Hladdu upp hljóðskrá" # Mælendagreining – 2025 syntax diarization = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.1", token=os.getenv("HF_TOKEN") # ← FIX ).to("cuda") dia_result = diarization(audio_path) # Whisper-small asr = pipeline( "automatic-speech-recognition", model=MODEL_NAME, device=0, token=os.getenv("HF_TOKEN") ) full_text = "" for turn, _, speaker in dia_result.itertracks(yield_label=True): with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: dia_result.crop(audio_path, turn).export(tmp.name, format="wav") segment_path = tmp.name text = asr(segment_path)["text"].strip() full_text += f"[MÆLENDI {speaker}] {text}\n" os.unlink(segment_path) return full_text or "Ekkert heyrt" with gr.Blocks() as demo: gr.Markdown("# Íslenskt ASR + Mælendagreining") gr.Markdown("**Whisper-small + pyannote 3.1 · 2025 fix**") audio = gr.Audio(type="filepath") btn = gr.Button("Transcribe með mælendum", variant="primary") out = gr.Textbox(lines=35) btn.click(transcribe_with_diarization, audio, out) demo.launch(auth=("beta", "beta2025"))