shethjenil commited on
Commit
4062fed
·
verified ·
1 Parent(s): 87b6e66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -1,29 +1,30 @@
1
- from torch import load as torch_load
2
- from torch import save as torch_save
3
- from torch.hub import load as torch_hub_load
4
- from torchaudio import save as torchaudio_save
5
- from pydub import AudioSegment
6
- from torch import device as Device
7
- from torch.cuda import is_available as cuda_is_available
8
  import gradio as gr
9
- knn_vc = torch_hub_load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device=Device("cuda" if cuda_is_available() else "cpu"))
 
 
 
10
 
11
  def process_audio(path):
12
- AudioSegment.from_file(path).set_frame_rate(16000).set_channels(1).export(path, format="wav")
 
 
 
 
13
 
14
  def voice_conversion(src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk):
15
  [process_audio(path) for path in ref_paths+[src_path]]
16
- torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), topk=topk)[None], 16000)
17
  return 'output.wav'
18
 
19
  def voices2model(ref_paths,ref_vad_trigger_level):
20
  [process_audio(path) for path in ref_paths]
21
- torch_save(knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), 'model.pt')
22
  return 'model.pt'
23
 
24
  def model2voice_conv(src_path,model_path,src_vad_trigger_level,topk):
25
  process_audio(src_path)
26
- torchaudio_save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), torch_load(model_path), topk=topk)[None], 16000)
27
  return 'output.wav'
28
 
29
- gr.TabbedInterface([gr.Interface(lambda src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk :voice_conversion(src_path,[i.name for i in ref_paths],src_vad_trigger_level,ref_vad_trigger_level,topk), [gr.Audio(type="filepath", label="Source Audio"),gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio")),gr.Interface(lambda ref_paths,ref_vad_trigger_level : voices2model([i.name for i in ref_paths],ref_vad_trigger_level), [gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7)],gr.File(type="filepath", label="Model")),gr.Interface(model2voice_conv, [gr.Audio(type="filepath", label="Source Audio"),gr.File(type="filepath", label="Model"),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio"))],["Voice Conversion","Model Creation","Voice Conversion By Model"]).launch()
 
1
+ import torchaudio
 
 
 
 
 
 
2
  import gradio as gr
3
+ import torch
4
+ knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True)
5
+
6
+ import torchaudio
7
 
8
  def process_audio(path):
9
+ wav, sr = torchaudio.load(path)
10
+ if sr != 16000:
11
+ wav = torchaudio.functional.resample(wav,sr,16000)
12
+ wav = wav.mean(0, True)
13
+ torchaudio.save(path, wav, 16000)
14
 
15
  def voice_conversion(src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk):
16
  [process_audio(path) for path in ref_paths+[src_path]]
17
+ torchaudio.save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), topk=topk)[None], 16000)
18
  return 'output.wav'
19
 
20
  def voices2model(ref_paths,ref_vad_trigger_level):
21
  [process_audio(path) for path in ref_paths]
22
+ torch.save(knn_vc.get_matching_set(ref_paths,vad_trigger_level=ref_vad_trigger_level), 'model.pt')
23
  return 'model.pt'
24
 
25
  def model2voice_conv(src_path,model_path,src_vad_trigger_level,topk):
26
  process_audio(src_path)
27
+ torch.save('output.wav', knn_vc.match(knn_vc.get_features(src_path,vad_trigger_level=src_vad_trigger_level), torch.load(model_path), topk=topk)[None], 16000)
28
  return 'output.wav'
29
 
30
+ gr.TabbedInterface([gr.Interface(lambda src_path,ref_paths,src_vad_trigger_level,ref_vad_trigger_level,topk :voice_conversion(src_path,[i.name for i in ref_paths],src_vad_trigger_level,ref_vad_trigger_level,topk), [gr.Audio(type="filepath", label="Source Audio"),gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio")),gr.Interface(lambda ref_paths,ref_vad_trigger_level : voices2model([i.name for i in ref_paths],ref_vad_trigger_level), [gr.File(file_count="multiple",file_types=["audio"]),gr.Number(7)],gr.File(type="filepath", label="Model")),gr.Interface(model2voice_conv, [gr.Audio(type="filepath", label="Source Audio"),gr.File(type="filepath", label="Model"),gr.Number(7),gr.Number(4)],gr.Audio(type="filepath", label="Converted Audio"))],["Voice Conversion","Model Creation","Voice Conversion By Model"]).launch()