# app.py – Mælendagreining VIRKAR á ZeroGPU (2025 fix)
import os
import gradio as gr
import spaces
from transformers import pipeline
from pyannote.audio import Pipeline
import tempfile

MODEL_NAME = "palli23/whisper-small-sam_spjall"

@spaces.GPU(duration=120)
def transcribe_with_diarization(audio_path):
    if not audio_path:
        return "Hladdu upp hljóðskrá"
    
    # Mælendagreining – 2025 syntax
    diarization = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        token=os.getenv("HF_TOKEN")   # ← FIX
    ).to("cuda")
    
    dia_result = diarization(audio_path)
    
    # Whisper-small
    asr = pipeline(
        "automatic-speech-recognition",
        model=MODEL_NAME,
        device=0,
        token=os.getenv("HF_TOKEN")
    )
    
    full_text = ""
    for turn, _, speaker in dia_result.itertracks(yield_label=True):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            dia_result.crop(audio_path, turn).export(tmp.name, format="wav")
            segment_path = tmp.name
        
        text = asr(segment_path)["text"].strip()
        full_text += f"[MÆLENDI {speaker}] {text}\n"
        os.unlink(segment_path)
    
    return full_text or "Ekkert heyrt"

with gr.Blocks() as demo:
    gr.Markdown("# Íslenskt ASR + Mælendagreining")
    gr.Markdown("**Whisper-small + pyannote 3.1 · 2025 fix**")
    
    audio = gr.Audio(type="filepath")
    btn = gr.Button("Transcribe með mælendum", variant="primary")
    out = gr.Textbox(lines=35)
    
    btn.click(transcribe_with_diarization, audio, out)

demo.launch(auth=("beta", "beta2025"))