Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,29 +1,29 @@
|
|
| 1 |
-
from torch import load as torch_load
|
| 2 |
-
from torch import save as torch_save
|
| 3 |
-
from torch.hub import load as torch_hub_load
|
| 4 |
-
from torchaudio import save as torchaudio_save
|
| 5 |
-
from pydub import AudioSegment
|
| 6 |
-
from torch import device as Device
|
| 7 |
-
from torch.cuda import is_available as cuda_is_available
|
| 8 |
-
import gradio as gr
|
| 9 |
-
knn_vc = torch_hub_load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=Device("cuda" if cuda_is_available() else "cpu"))
|
| 10 |
-
|
| 11 |
-
def process_audio(path):
|
| 12 |
-
AudioSegment.from_file(path).set_frame_rate(16000).set_channels(1).export(path, format="wav")
|
| 13 |
-
|
| 14 |
-
def voice_conversion(src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk):
|
| 15 |
-
[process_audio(path) for path in ref_paths+[src_path]]
|
| 16 |
-
torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), topk=topk)[None], 16000)
|
| 17 |
-
return 'output.wav'
|
| 18 |
-
|
| 19 |
-
def voices2model(ref_paths,ref_vad_trigger_level):
|
| 20 |
-
[process_audio(path) for path in ref_paths]
|
| 21 |
-
torch_save(knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), 'model.pt')
|
| 22 |
-
return 'model.pt'
|
| 23 |
-
|
| 24 |
-
def model2voice_conv(src_path,model_path,src_vad_trigger_level,topk):
|
| 25 |
-
process_audio(src_path)
|
| 26 |
-
torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), torch_load(model_path), topk=topk)[None], 16000)
|
| 27 |
-
return 'output.wav'
|
| 28 |
-
|
| 29 |
-
gr.TabbedInterface([gr.Interface(voice_conversion, [gr.Audio(type="filepath", label="Source Audio"),gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio")),gr.Interface(voices2model, [gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7)],gr.File(type="filepath", label="Model")),gr.Interface(model2voice_conv, [gr.Audio(type="filepath", label="Source Audio"),gr.File(type="filepath", label="Model"),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio"))],["Voice Conversion","Model Creation","Voice Conversion By Model"]).launch()
|
|
|
|
| 1 |
+
from torch import load as torch_load
|
| 2 |
+
from torch import save as torch_save
|
| 3 |
+
from torch.hub import load as torch_hub_load
|
| 4 |
+
from torchaudio import save as torchaudio_save
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
from torch import device as Device
|
| 7 |
+
from torch.cuda import is_available as cuda_is_available
|
| 8 |
+
import gradio as gr
|
| 9 |
+
knn_vc = torch_hub_load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=Device("cuda" if cuda_is_available() else "cpu"))
|
| 10 |
+
|
| 11 |
+
def process_audio(path):
|
| 12 |
+
AudioSegment.from_file(path).set_frame_rate(16000).set_channels(1).export(path, format="wav")
|
| 13 |
+
|
| 14 |
+
def voice_conversion(src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk):
|
| 15 |
+
[process_audio(path) for path in ref_paths+[src_path]]
|
| 16 |
+
torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), topk=topk)[None], 16000)
|
| 17 |
+
return 'output.wav'
|
| 18 |
+
|
| 19 |
+
def voices2model(ref_paths,ref_vad_trigger_level):
|
| 20 |
+
[process_audio(path) for path in ref_paths]
|
| 21 |
+
torch_save(knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), 'model.pt')
|
| 22 |
+
return 'model.pt'
|
| 23 |
+
|
| 24 |
+
def model2voice_conv(src_path,model_path,src_vad_trigger_level,topk):
|
| 25 |
+
process_audio(src_path)
|
| 26 |
+
torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), torch_load(model_path), topk=topk)[None], 16000)
|
| 27 |
+
return 'output.wav'
|
| 28 |
+
|
| 29 |
+
gr.TabbedInterface([gr.Interface(lambda src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk :voice_conversion(src_path,[i.name for i in ref_paths],src_vad_trigger_level,ref_vad_trigger_level,topk), [gr.Audio(type="filepath", label="Source Audio"),gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio")),gr.Interface(lambda ref_paths,ref_vad_trigger_level : voices2model([i.name for i in ref_paths],ref_vad_trigger_level), [gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7)],gr.File(type="filepath", label="Model")),gr.Interface(model2voice_conv, [gr.Audio(type="filepath", label="Source Audio"),gr.File(type="filepath", label="Model"),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio"))],["Voice Conversion","Model Creation","Voice Conversion By Model"]).launch()
|