draft of separate audios
Browse files
app.py
CHANGED
|
@@ -1,4 +1,57 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
with gr.Blocks() as demo:
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from modelscope.pipelines import pipeline
|
| 3 |
+
from modelscope.utils.constant import Tasks
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
import numpy as np
|
| 6 |
+
import os
|
| 7 |
+
# import torch
|
| 8 |
+
|
| 9 |
+
SAMPLE_RATE = 8000
|
| 10 |
+
|
| 11 |
+
def get_sample_rate(audio_file_path):
|
| 12 |
+
_, sample_rate = sf.read(audio_file_path, always_2d=True)
|
| 13 |
+
return sample_rate
|
| 14 |
+
|
| 15 |
+
def change_sample_rate(input_audio_file_path, output_audio_file_path, sample_rate):
|
| 16 |
+
# do ffmpeg -i $input_audio_file_path -ar $sample_rate $output_audio_file_path
|
| 17 |
+
os.system(f'ffmpeg -i {input_audio_file_path} -ar {sample_rate} {output_audio_file_path}')
|
| 18 |
+
|
| 19 |
+
def audio_is_stereo(audio_file_path):
|
| 20 |
+
audio, _ = sf.read(audio_file_path, always_2d=True)
|
| 21 |
+
return audio.shape[1] == 2
|
| 22 |
+
|
| 23 |
+
def set_mono(input_audio_file_path, output_audio_file_path):
|
| 24 |
+
os.system(f'ffmpeg -i {input_audio_file_path} -ac 1 {output_audio_file_path}')
|
| 25 |
+
|
| 26 |
+
os.system('wget https://maximofn.com/wp-content/uploads/2023/10/vocals.wav')
|
| 27 |
+
input = "vocals.wav"
|
| 28 |
+
input_8k = "vocals_8k.wav"
|
| 29 |
+
input_8k_mono = "vocals_8k_mono.wav"
|
| 30 |
+
|
| 31 |
+
sr = get_sample_rate(input)
|
| 32 |
+
|
| 33 |
+
if sr != SAMPLE_RATE:
|
| 34 |
+
change_sample_rate(input, input_8k, SAMPLE_RATE)
|
| 35 |
+
else:
|
| 36 |
+
input_8k = input
|
| 37 |
+
|
| 38 |
+
if audio_is_stereo(input_8k):
|
| 39 |
+
set_mono(input_8k, input_8k_mono)
|
| 40 |
+
else:
|
| 41 |
+
input_8k_mono = input_8k
|
| 42 |
+
|
| 43 |
+
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 44 |
+
device = 'cpu'
|
| 45 |
+
separation = pipeline(Tasks.speech_separation, model='damo/speech_mossformer_separation_temporal_8k', device=device)
|
| 46 |
+
print("Separating...")
|
| 47 |
+
result = separation(input_8k_mono)
|
| 48 |
+
print("Separated!")
|
| 49 |
+
|
| 50 |
+
print("Saving...")
|
| 51 |
+
for i, signal in enumerate(result['output_pcm_list']):
|
| 52 |
+
save_file = f'output_spk{i}.wav'
|
| 53 |
+
sf.write(save_file, np.frombuffer(signal, dtype=np.int16), SAMPLE_RATE)
|
| 54 |
+
print("Saved!")
|
| 55 |
|
| 56 |
|
| 57 |
with gr.Blocks() as demo:
|