import gradio as gr import torch from pyannote.audio import Pipeline import os import spaces @spaces.GPU def analyze_audio(audio_path: str) -> tuple[str, dict]: if not audio_path: return "Please upload an audio file.", {} pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.1", token=os.getenv("HF_TOKEN"), ) if torch.cuda.is_available(): pipeline.to(torch.device("cuda")) diarization = pipeline(audio_path) segments = [] speaker_times = {} for turn, _, speaker in diarization.itertracks(yield_label=True): segment = { "speaker": speaker, "start": round(turn.start, 2), "end": round(turn.end, 2), "duration": round(turn.end - turn.start, 2), } segments.append(segment) if speaker not in speaker_times: speaker_times[speaker] = 0 speaker_times[speaker] += segment["duration"] num_speakers = len(speaker_times) total_duration = sum(speaker_times.values()) speaker_summary = [] for speaker, time in sorted(speaker_times.items()): speaker_summary.append({ "speaker": speaker, "total_time": round(time, 2), "percentage": round(time / total_duration * 100, 1) if total_duration > 0 else 0, }) summary_lines = [f"Detected {num_speakers} speaker{'s' if num_speakers != 1 else ''} in the audio:"] for info in speaker_summary: summary_lines.append(f"- {info['speaker']}: {info['total_time']}s ({info['percentage']}%)") readable_summary = "\n".join(summary_lines) details = { "num_speakers": num_speakers, "segments": segments, "speaker_summary": speaker_summary, "total_segments": len(segments), "message": readable_summary, } return details with gr.Blocks(title="Speaker Diarization") as demo: gr.Markdown("# Speaker Diarization (pyannote.audio)") gr.Markdown("Provide an audio file and your Hugging Face token to run diarization.") with gr.Row(): with gr.Column(): audio_input = gr.Audio(type="filepath", label="Upload Audio") submit_btn = gr.Button("Analyze", variant="primary") with gr.Column(): details = gr.JSON(label="Details") submit_btn.click(analyze_audio, [audio_input], [details]) if __name__ == "__main__": demo.queue().launch(share=False, ssr_mode=False)