| |
| |
| |
|
|
| import os |
| import gradio as gr |
| import spaces |
| import tempfile |
| import torch |
| from transformers import pipeline |
| from pyannote.audio import Pipeline |
| from torch.serialization import safe_globals |
|
|
| |
| |
| |
| ASR_MODEL = "palli23/whisper-small-sam_spjall" |
| DIAR_MODEL = "pyannote/speaker-diarization-3.1" |
|
|
| |
| |
| |
| @spaces.GPU(duration=120) |
| def transcribe_with_diarization(audio_path): |
|
|
| if not audio_path: |
| return "Hladdu upp hljóðskrá." |
|
|
| |
| |
| |
| |
| with safe_globals([ |
| torch.torch_version.TorchVersion, |
| "pyannote.audio.core.task.Specifications", |
| "pyannote.audio.core.model.Model", |
| "pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization" |
| ]): |
|
|
| |
| |
| |
| diarization = Pipeline.from_pretrained( |
| DIAR_MODEL, |
| token=os.getenv("HF_TOKEN") |
| ).to("cuda") |
|
|
| |
| diar = diarization(audio_path) |
|
|
| |
| |
| |
| asr = pipeline( |
| task="automatic-speech-recognition", |
| model=ASR_MODEL, |
| device=0, |
| token=os.getenv("HF_TOKEN") |
| ) |
|
|
| |
| |
| |
| final_output = [] |
|
|
| for turn, _, speaker in diar.itertracks(yield_label=True): |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
| diar.crop(audio_path, turn).export(tmp.name, format="wav") |
| seg_path = tmp.name |
|
|
| |
| text = asr(seg_path)["text"].strip() |
|
|
| |
| final_output.append(f"[MÆLENDI {speaker}] {text}") |
|
|
| |
| os.unlink(seg_path) |
|
|
| return "\n".join(final_output) if final_output else "Ekkert heyrt í hljóðinu." |
|
|
|
|
| |
| |
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# 🎙️ Íslenskt tal → texti + mælendagreining") |
| gr.Markdown("Whisper-small + pyannote 3.1 • Virkar á ZeroGPU • 5 mín hljóð max") |
|
|
| audio_input = gr.Audio(type="filepath", label="Hladdu upp hljóðskrá (.wav / .mp3)") |
| out_box = gr.Textbox(lines=30, label="Útskrift + mælendur") |
|
|
| run_button = gr.Button("Transcribe með mælendum", variant="primary") |
| run_button.click(transcribe_with_diarization, inputs=audio_input, outputs=out_box) |
|
|
| |
| demo.launch(auth=("beta", "beta2025")) |
|
|