File size: 1,646 Bytes
04cee61 2bc8f15 f0e9bad 2179696 ddeefba d89e139 f0e9bad c95f5de 2767a40 04cee61 d89e139 2179696 ef69ec6 c95f5de 04cee61 d89e139 04cee61 d89e139 168cab1 d89e139 04cee61 d89e139 c95f5de 2179696 c95f5de 2179696 168cab1 d89e139 04cee61 2179696 d89e139 f0e9bad 04cee61 d89e139 04cee61 2179696 04cee61 365da29 04cee61 365da29 ef69ec6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | # app.py – Mælendagreining VIRKAR á ZeroGPU (2025 fix)
import os
import gradio as gr
import spaces
from transformers import pipeline
from pyannote.audio import Pipeline
import tempfile
MODEL_NAME = "palli23/whisper-small-sam_spjall"
@spaces.GPU(duration=120)
def transcribe_with_diarization(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
# Mælendagreining – 2025 syntax
diarization = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
token=os.getenv("HF_TOKEN") # ← FIX
).to("cuda")
dia_result = diarization(audio_path)
# Whisper-small
asr = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
device=0,
token=os.getenv("HF_TOKEN")
)
full_text = ""
for turn, _, speaker in dia_result.itertracks(yield_label=True):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
dia_result.crop(audio_path, turn).export(tmp.name, format="wav")
segment_path = tmp.name
text = asr(segment_path)["text"].strip()
full_text += f"[MÆLENDI {speaker}] {text}\n"
os.unlink(segment_path)
return full_text or "Ekkert heyrt"
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt ASR + Mælendagreining")
gr.Markdown("**Whisper-small + pyannote 3.1 · 2025 fix**")
audio = gr.Audio(type="filepath")
btn = gr.Button("Transcribe með mælendum", variant="primary")
out = gr.Textbox(lines=35)
btn.click(transcribe_with_diarization, audio, out)
demo.launch(auth=("beta", "beta2025")) |