shethjenil commited on
Commit
a9ef30b
·
verified ·
1 Parent(s): 5ea087a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -1,23 +1,13 @@
 
1
  import torchaudio
2
- from torchaudio.functional import resample
3
- import os
4
- from pyannote.audio import Pipeline
5
- os.environ["PYANNOTE_SKIP_DEPENDENCY_CHECK"] = "1"
6
-
7
- def process(input_file):
8
- pipeline = Pipeline.from_pretrained("shethjenil/speaker-diarization-community-1")
9
- audio, sr = torchaudio.load(input_file)
10
- target_sr = 16000
11
- if sr != target_sr:
12
- audio = resample(audio, sr, target_sr)
13
- if audio.shape[0] > 1:
14
- audio = audio.mean(dim=0, keepdim=True)
15
- output = pipeline({"waveform":audio,"sample_rate":target_sr})
16
- return {
17
- "diarization":[[i['start'],i['end'],int(i['speaker'].lstrip("SPEAKER_"))] for i in output.serialize()['diarization']],
18
- "exclusive_diarization":[[i['start'],i['end'],int(i['speaker'].lstrip("SPEAKER_"))] for i in output.serialize()['exclusive_diarization']],
19
- "embedding":output.speaker_embeddings.tolist()
20
- }
21
-
22
  import gradio as gr
23
- gr.Interface(process, inputs=gr.Audio(type="filepath"), outputs=gr.JSON()).launch()
 
 
 
 
 
 
 
 
 
 
1
+ from pyannote_audio_fix import SpeakerDiarization
2
  import torchaudio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import gradio as gr
4
+ def fn(audio,progress=gr.Progress(True)):
5
+ model = SpeakerDiarization()
6
+ wav , sr = torchaudio.load(audio)
7
+ if sr != 16000:
8
+ wav = torchaudio.functional.resample(wav,sr,16000)
9
+ if wav.shape[0] == 2:
10
+ wav = wav.mean(dim=0,keepdim=True)
11
+ return model(wav)
12
+
13
+ gr.Interface(fn=fn, inputs=gr.Audio(type='filepath'), outputs="json").launch()