ASR_API / app.py
palli23's picture
diarization
5be18fb
raw
history blame
1.89 kB
# app.py – FIXED Pyannote UnpicklingError (PyTorch 2.6+ Compatible)
import os
import gradio as gr
import spaces
from transformers import pipeline
from pyannote.audio import Pipeline
import torch
import tempfile
from torch.serialization import safe_globals # ← KEY FIX
MODEL_NAME = "palli23/whisper-small-sam_spjall"
@spaces.GPU(duration=120)
def transcribe_with_diarization(audio_path):
if not audio_path:
return "Hladdu upp hljóðskrá"
# FIX: Allowlist blocked globals for PyTorch 2.6+
with safe_globals([
torch.torch_version.TorchVersion,
'pyannote.audio.core.task.Specifications' # Add if needed
]):
diarization = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
token=os.getenv("HF_TOKEN")
).to("cuda")
dia = diarization(audio_path)
# Whisper-small
asr = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
device=0,
token=os.getenv("HF_TOKEN")
)
result = []
for turn, _, speaker in dia.itertracks(yield_label=True):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
dia.crop(audio_path, turn).export(f.name, format="wav")
segment_path = f.name
text = asr(segment_path)["text"].strip()
result.append(f"[MÆLENDI {speaker}] {text}")
os.unlink(segment_path)
return "\n".join(result) or "Ekkert heyrt"
# Interface
with gr.Blocks() as demo:
gr.Markdown("# Íslenskt ASR + Mælendagreining")
gr.Markdown("**Whisper-small + pyannote 3.1 · Fixed PyTorch 2.6+**")
audio = gr.Audio(type="filepath")
btn = gr.Button("Transcribe með mælendum", variant="primary")
out = gr.Textbox(lines=35)
btn.click(transcribe_with_diarization, audio, out)
demo.launch(auth=("beta", "beta2025"))