gijs's picture
Update app.py
ae84922 verified
import gradio as gr
import torch
from pyannote.audio import Pipeline
import os
import spaces
@spaces.GPU
def analyze_audio(audio_path: str) -> tuple[str, dict]:
if not audio_path:
return "Please upload an audio file.", {}
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
token=os.getenv("HF_TOKEN"),
)
if torch.cuda.is_available():
pipeline.to(torch.device("cuda"))
diarization = pipeline(audio_path)
segments = []
speaker_times = {}
for turn, _, speaker in diarization.itertracks(yield_label=True):
segment = {
"speaker": speaker,
"start": round(turn.start, 2),
"end": round(turn.end, 2),
"duration": round(turn.end - turn.start, 2),
}
segments.append(segment)
if speaker not in speaker_times:
speaker_times[speaker] = 0
speaker_times[speaker] += segment["duration"]
num_speakers = len(speaker_times)
total_duration = sum(speaker_times.values())
speaker_summary = []
for speaker, time in sorted(speaker_times.items()):
speaker_summary.append({
"speaker": speaker,
"total_time": round(time, 2),
"percentage": round(time / total_duration * 100, 1) if total_duration > 0 else 0,
})
summary_lines = [f"Detected {num_speakers} speaker{'s' if num_speakers != 1 else ''} in the audio:"]
for info in speaker_summary:
summary_lines.append(f"- {info['speaker']}: {info['total_time']}s ({info['percentage']}%)")
readable_summary = "\n".join(summary_lines)
details = {
"num_speakers": num_speakers,
"segments": segments,
"speaker_summary": speaker_summary,
"total_segments": len(segments),
"message": readable_summary,
}
return details
with gr.Blocks(title="Speaker Diarization") as demo:
gr.Markdown("# Speaker Diarization (pyannote.audio)")
gr.Markdown("Provide an audio file and your Hugging Face token to run diarization.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Audio")
submit_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
details = gr.JSON(label="Details")
submit_btn.click(analyze_audio, [audio_input], [details])
if __name__ == "__main__":
demo.queue().launch(share=False, ssr_mode=False)