| |
| |
| |
|
|
| import os |
| import gradio as gr |
| import spaces |
| import tempfile |
| from transformers import pipeline |
| from pyannote.audio import Pipeline |
|
|
|
|
| ASR_MODEL = "palli23/whisper-small-sam_spjall" |
| DIAR_MODEL = "pyannote/speaker-diarization" |
|
|
|
|
| @spaces.GPU(duration=120) |
| def transcribe_with_diarization(audio_path): |
|
|
| if not audio_path: |
| return "Hladdu upp hljóðskrá." |
|
|
| |
| |
| |
| diarization = Pipeline.from_pretrained( |
| DIAR_MODEL, |
| use_auth_token=os.getenv("HF_TOKEN") |
| ).to("cuda") |
|
|
| diar = diarization(audio_path) |
|
|
| |
| |
| |
| asr = pipeline( |
| task="automatic-speech-recognition", |
| model=ASR_MODEL, |
| device=0 |
| ) |
|
|
| |
| |
| |
| output_lines = [] |
|
|
| for turn, _, speaker in diar.itertracks(yield_label=True): |
|
|
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
| diar.crop(audio_path, turn).export(tmp.name, format="wav") |
| seg_file = tmp.name |
|
|
| text = asr(seg_file)["text"].strip() |
| output_lines.append(f"[MÆLENDI {speaker}] {text}") |
|
|
| os.unlink(seg_file) |
|
|
| return "\n".join(output_lines) or "Enginn texti fannst." |
|
|
|
|
| |
| |
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# 🎙️ Íslenskt ASR + mælendagreining") |
| gr.Markdown("Whisper-small + pyannote 2.1.1 (ZeroGPU örugg útgáfa)") |
|
|
| audio = gr.Audio(type="filepath", label="Hlaða inn hljóði (.wav or .mp3)") |
| out = gr.Textbox(lines=30, label="Útskrift með mælendum") |
|
|
| btn = gr.Button("Transcribe með mælendum", variant="primary") |
| btn.click(transcribe_with_diarization, inputs=audio, outputs=out) |
|
|
| demo.launch(auth=("beta", "beta2025")) |
|
|