shethjenil commited on
Commit
d18d03b
·
verified ·
1 Parent(s): 2cdbf02

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torchaudio
2
+ from torchaudio.functional import resample
3
+ import os
4
+ from pyannote.audio import Pipeline
5
+ os.environ["PYANNOTE_SKIP_DEPENDENCY_CHECK"] = "1"
6
+
7
+ def process(input_file):
8
+ pipeline = Pipeline.from_pretrained("shethjenil/speaker-diarization-community-1")
9
+ audio, sr = torchaudio.load(input_file)
10
+ target_sr = 16000
11
+ if sr != target_sr:
12
+ audio = resample(audio, sr, target_sr)
13
+ if audio.shape[0] > 1:
14
+ audio = audio.mean(dim=0, keepdim=True)
15
+ output = pipeline({"waveform":audio,"sample_rate":target_sr})
16
+ return {
17
+ "diarization":[[i['start'],i['end'],int(i['speaker'].lstrip("SPEAKER_"))] for i in output.serialize()['diarization']],
18
+ "exclusive_diarization":[[i['start'],i['end'],int(i['speaker'].lstrip("SPEAKER_"))] for i in output.serialize()['exclusive_diarization']],
19
+ "embedding":output.speaker_embeddings.tolist()
20
+ }
21
+
22
+ import gradio as gr
23
+ gr.Interface(process, inputs=gr.Audio(type="filepath"), outputs=gr.JSON()).launch()