marcosremar2 commited on
Commit
1689179
·
1 Parent(s): a050dff

Initial speaker diarization app with pyannote 3.1

Browse files
Files changed (3) hide show
  1. README.md +9 -5
  2. app.py +56 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,16 @@
1
  ---
2
  title: Speaker Diarization Pyannote
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.32.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: Speaker Diarization Pyannote
3
+ emoji: 🎤
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.20.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ hardware: t4-small
12
  ---
13
 
14
+ # Speaker Diarization with Pyannote
15
+
16
+ This space performs speaker diarization using pyannote.audio 3.1
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pyannote.audio import Pipeline
3
+ import torch
4
+ import torchaudio
5
+ from huggingface_hub import login
6
+ import os
7
+
8
+ # Login to Hugging Face if token is available
9
+ hf_token = os.environ.get("HF_TOKEN")
10
+ if hf_token:
11
+ login(token=hf_token)
12
+
13
+ # Initialize the pipeline
14
+ pipeline = Pipeline.from_pretrained(
15
+ "pyannote/speaker-diarization-3.1",
16
+ use_auth_token=hf_token
17
+ )
18
+
19
+ # Send pipeline to GPU if available
20
+ if torch.cuda.is_available():
21
+ pipeline.to(torch.device("cuda"))
22
+
23
+ def diarize_audio(audio_file):
24
+ """Process audio file and return diarization results"""
25
+ try:
26
+ # Apply pretrained pipeline
27
+ diarization = pipeline(audio_file)
28
+
29
+ # Format results
30
+ results = []
31
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
32
+ results.append(
33
+ f"Speaker {speaker}: {turn.start:.1f}s - {turn.end:.1f}s"
34
+ )
35
+
36
+ if not results:
37
+ return "No speakers detected in the audio."
38
+
39
+ return "\n".join(results)
40
+
41
+ except Exception as e:
42
+ return f"Error processing audio: {str(e)}"
43
+
44
+ # Create Gradio interface
45
+ demo = gr.Interface(
46
+ fn=diarize_audio,
47
+ inputs=gr.Audio(type="filepath", label="Upload Audio File"),
48
+ outputs=gr.Textbox(label="Diarization Results", lines=10),
49
+ title="Speaker Diarization with Pyannote 3.1",
50
+ description="Upload an audio file to identify different speakers and their speaking times.",
51
+ examples=[],
52
+ cache_examples=False
53
+ )
54
+
55
+ if __name__ == "__main__":
56
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pyannote.audio==3.1.1
2
+ torch==2.0.1
3
+ torchaudio==2.0.2
4
+ gradio==4.20.0
5
+ huggingface_hub