| |
| import os |
| import gradio as gr |
| import spaces |
| from transformers import pipeline |
| from pyannote.audio import Pipeline |
| import torch |
| import tempfile |
| from torch.serialization import safe_globals |
|
|
| MODEL_NAME = "palli23/whisper-small-sam_spjall" |
|
|
| @spaces.GPU(duration=120) |
| def transcribe_with_diarization(audio_path): |
| if not audio_path: |
| return "Hladdu upp hljóðskrá" |
| |
| |
| with safe_globals([ |
| torch.torch_version.TorchVersion, |
| 'pyannote.audio.core.task.Specifications' |
| ]): |
| diarization = Pipeline.from_pretrained( |
| "pyannote/speaker-diarization-3.1", |
| token=os.getenv("HF_TOKEN") |
| ).to("cuda") |
| |
| dia = diarization(audio_path) |
| |
| |
| asr = pipeline( |
| "automatic-speech-recognition", |
| model=MODEL_NAME, |
| device=0, |
| token=os.getenv("HF_TOKEN") |
| ) |
| |
| result = [] |
| for turn, _, speaker in dia.itertracks(yield_label=True): |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
| dia.crop(audio_path, turn).export(f.name, format="wav") |
| segment_path = f.name |
| |
| text = asr(segment_path)["text"].strip() |
| result.append(f"[MÆLENDI {speaker}] {text}") |
| os.unlink(segment_path) |
| |
| return "\n".join(result) or "Ekkert heyrt" |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# Íslenskt ASR + Mælendagreining") |
| gr.Markdown("**Whisper-small + pyannote 3.1 · Fixed PyTorch 2.6+**") |
| |
| audio = gr.Audio(type="filepath") |
| btn = gr.Button("Transcribe með mælendum", variant="primary") |
| out = gr.Textbox(lines=35) |
| |
| btn.click(transcribe_with_diarization, audio, out) |
|
|
| demo.launch(auth=("beta", "beta2025")) |