luckyhookin commited on
Commit
f7e0be6
·
0 Parent(s):
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +106 -0
  4. packages.txt +2 -0
  5. requirements.txt +8 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Speaker Diarization
3
+ emoji: 📚
4
+ colorFrom: red
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ import os
3
+ import gradio as gr
4
+ import spaces
5
+ import torch
6
+ from pyannote.audio import Pipeline
7
+ import torchaudio
8
+ from pydub import AudioSegment
9
+ from pyannote.audio import Pipeline
10
+ import json
11
+ import requests
12
+
13
+
14
+ # Authenticate with Huggingface
15
+ AUTH_TOKEN = os.getenv("HF_TOKEN")
16
+
17
+ # Load the diarization pipeline
18
+ device = torch.device("cuda")
19
+ pipeline = Pipeline.from_pretrained(
20
+ "pyannote/speaker-diarization-community-1",
21
+ token=AUTH_TOKEN).to(device)
22
+
23
+ def preprocess_audio(audio_path):
24
+ """Convert audio to mono, 16kHz WAV format suitable for pyannote."""
25
+ try:
26
+ if isinstance(audio_path, str):
27
+ bytes = False
28
+ else:
29
+ bytes = True
30
+
31
+ # Load audio with pydub
32
+ audio = AudioSegment.from_file(BytesIO(audio_path) if bytes else audio_path)
33
+ # Convert to mono and set sample rate to 16kHz
34
+ audio = audio.set_channels(1).set_frame_rate(16000)
35
+ # Export to temporary WAV file
36
+ temp_wav = "temp_audio.wav"
37
+ audio.export(temp_wav, format="wav")
38
+ return temp_wav
39
+ except Exception as e:
40
+ raise ValueError(f"Error preprocessing audio: {str(e)}")
41
+
42
+ def handle_audio(url, audio_path, num_speakers):
43
+ """Handle audio processing and diarization."""
44
+ if url:
45
+ response = requests.get(url, timeout=60)
46
+ audio_path = response.content
47
+
48
+ audio_path = preprocess_audio(audio_path)
49
+ res = diarize_audio(audio_path, num_speakers)
50
+ # Clean up temporary file
51
+ if os.path.exists(audio_path):
52
+ os.remove(audio_path)
53
+ return res
54
+
55
+
56
+ @spaces.GPU(duration=120)
57
+ def diarize_audio(audio_path, num_speakers):
58
+ """Perform speaker diarization and return formatted results."""
59
+ try:
60
+ # Load audio for pyannote
61
+ waveform, sample_rate = torchaudio.load(audio_path)
62
+ audio_dict = {"waveform": waveform, "sample_rate": sample_rate}
63
+
64
+ # Configure pipeline with number of speakers
65
+ pipeline_params = {"num_speakers": num_speakers} if num_speakers > 0 else { "min_speakers": 2, "max_speakers": 6 }
66
+ diarization = pipeline(audio_dict, **pipeline_params)
67
+
68
+ # Format results
69
+ results = []
70
+ for turn, speaker in diarization.exclusive_speaker_diarization:
71
+ result = {
72
+ "start": round(turn.start, 3),
73
+ "end": round(turn.end, 3),
74
+ "speaker_id": speaker
75
+ }
76
+ results.append(result)
77
+
78
+ return json.dumps(results, indent=2)
79
+
80
+ except Exception as e:
81
+ return f"Error: {str(e)}", ""
82
+
83
+ # Gradio interface
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("# Speaker Diarization with speaker-diarization-community-1")
86
+ gr.Markdown("Upload an audio file and specify the number of speakers to diarize the audio.")
87
+
88
+ with gr.Row():
89
+ url_input = gr.Textbox(label="URL")
90
+ audio_input = gr.Audio(label="Upload Audio File", type="filepath")
91
+ num_speakers = gr.Slider(minimum=0, maximum=10, step=1, label="Number of Speakers", value=2)
92
+
93
+ submit_btn = gr.Button("Diarize")
94
+
95
+ with gr.Row():
96
+ json_output = gr.Textbox(label="Diarization Results (JSON)")
97
+
98
+ submit_btn.click(
99
+ fn=handle_audio,
100
+ inputs=[url_input, audio_input, num_speakers],
101
+ outputs=[json_output],
102
+ concurrency_limit=2,
103
+ )
104
+
105
+ # Launch the Gradio app
106
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pyannote.audio
2
+ torch
3
+ torchaudio
4
+ pydub
5
+ numpy
6
+ huggingface_hub
7
+ gradio
8
+ spaces