|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
import spaces |
|
|
import tempfile |
|
|
import torch |
|
|
|
|
|
from torch.serialization import safe_globals |
|
|
from pyannote.audio.core.model import Model |
|
|
from pyannote.audio.core.task import Task, Specifications |
|
|
from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization |
|
|
from typing import OrderedDict |
|
|
|
|
|
from transformers import pipeline |
|
|
from pyannote.audio import Pipeline |
|
|
|
|
|
|
|
|
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" |
|
|
torch.serialization.add_safe_globals({ |
|
|
"OrderedDict": OrderedDict, |
|
|
}) |
|
|
|
|
|
MODEL_NAME = "palli23/whisper-small-sam_spjall" |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
|
def transcribe_with_diarization(audio_path): |
|
|
if not audio_path: |
|
|
return "Hladdu upp hljóðskrá" |
|
|
|
|
|
|
|
|
with safe_globals([ |
|
|
torch.torch_version.TorchVersion, |
|
|
Model, |
|
|
Task, |
|
|
Specifications, |
|
|
SpeakerDiarization, |
|
|
OrderedDict, |
|
|
]): |
|
|
diarization = Pipeline.from_pretrained( |
|
|
"pyannote/speaker-diarization-3.1", |
|
|
use_auth_token=os.getenv("HF_TOKEN") |
|
|
).to("cuda") |
|
|
|
|
|
|
|
|
dia = diarization(audio_path) |
|
|
|
|
|
|
|
|
asr = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model=MODEL_NAME, |
|
|
device=0, |
|
|
use_auth_token=os.getenv("HF_TOKEN"), |
|
|
) |
|
|
|
|
|
|
|
|
result = [] |
|
|
for turn, _, speaker in dia.itertracks(yield_label=True): |
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
|
|
diarization.crop(audio_path, turn).export(f.name, format="wav") |
|
|
chunk = f.name |
|
|
|
|
|
text = asr(chunk)["text"].strip() |
|
|
os.unlink(chunk) |
|
|
result.append(f"[MÆLENDI {speaker}] {text}") |
|
|
|
|
|
return "\n".join(result) or "Enginn texti heyrðist." |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Íslenskt ASR + Mælendagreining") |
|
|
gr.Markdown("Whisper-small + pyannote 3.1 (ZeroGPU örugg útgáfa)") |
|
|
|
|
|
audio = gr.Audio(type="filepath", label="Hljóðskrá") |
|
|
btn = gr.Button("Transcribe með mælendum") |
|
|
out = gr.Textbox(lines=35, label="Úttak") |
|
|
|
|
|
btn.click(transcribe_with_diarization, inputs=audio, outputs=out) |
|
|
|
|
|
demo.launch(auth=("beta", "beta2025")) |
|
|
|