import gradio as gr
import torch
from pyannote.audio import Pipeline
import os


import spaces

@spaces.GPU
def analyze_audio(audio_path: str) -> tuple[str, dict]:
    if not audio_path:
        return "Please upload an audio file.", {}

    pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        token=os.getenv("HF_TOKEN"),
    )
    if torch.cuda.is_available():
        pipeline.to(torch.device("cuda"))

    diarization = pipeline(audio_path)

    segments = []
    speaker_times = {}

    for turn, _, speaker in diarization.itertracks(yield_label=True):
        segment = {
            "speaker": speaker,
            "start": round(turn.start, 2),
            "end": round(turn.end, 2),
            "duration": round(turn.end - turn.start, 2),
        }
        segments.append(segment)

        if speaker not in speaker_times:
            speaker_times[speaker] = 0
        speaker_times[speaker] += segment["duration"]

    num_speakers = len(speaker_times)
    total_duration = sum(speaker_times.values())

    speaker_summary = []
    for speaker, time in sorted(speaker_times.items()):
        speaker_summary.append({
            "speaker": speaker,
            "total_time": round(time, 2),
            "percentage": round(time / total_duration * 100, 1) if total_duration > 0 else 0,
        })

    summary_lines = [f"Detected {num_speakers} speaker{'s' if num_speakers != 1 else ''} in the audio:"]
    for info in speaker_summary:
        summary_lines.append(f"- {info['speaker']}: {info['total_time']}s ({info['percentage']}%)")
    readable_summary = "\n".join(summary_lines)

    details = {
        "num_speakers": num_speakers,
        "segments": segments,
        "speaker_summary": speaker_summary,
        "total_segments": len(segments),
        "message": readable_summary,
    }

    return details


with gr.Blocks(title="Speaker Diarization") as demo:
    gr.Markdown("# Speaker Diarization (pyannote.audio)")
    gr.Markdown("Provide an audio file and your Hugging Face token to run diarization.")

    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(type="filepath", label="Upload Audio")
            submit_btn = gr.Button("Analyze", variant="primary")
        with gr.Column():
            details = gr.JSON(label="Details")

    submit_btn.click(analyze_audio, [audio_input], [details])


if __name__ == "__main__":
    demo.queue().launch(share=False, ssr_mode=False)