cjayic commited on
Commit
5be9b92
·
1 Parent(s): a9aac41

layout update, examples, update models

Browse files
app.py CHANGED
@@ -4,14 +4,13 @@ from hifigan.generator import HifiganGenerator
4
 
5
  from acoustic import AcousticModel
6
 
7
- #from hifigan.generator import HifiganGenerator
8
  from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
9
 
10
  hubert = torch.hub.load("bshall/hubert:main", "hubert_soft").cpu()
11
 
12
  acoustic = AcousticModel(False, True)
13
 
14
- checkpoint = torch.load("models/acoustic-model-100000.pt", map_location=torch.device('cpu'))
15
 
16
  consume_prefix_in_state_dict_if_present(checkpoint["acoustic-model"], "module.")
17
  acoustic.load_state_dict(checkpoint["acoustic-model"])
@@ -27,7 +26,6 @@ hifigan.eval()
27
 
28
  def run_conversion(audio_in):
29
  sr, source = audio_in
30
-
31
  source = torch.Tensor(source)
32
 
33
  if source.dim() == 1:
@@ -56,21 +54,52 @@ def run_conversion(audio_in):
56
 
57
 
58
  with gr.Blocks() as demo:
59
- with gr.Column(variant="panel"):
60
- with gr.Row(variant="compact"):
61
- input_audio = gr.Audio(
62
- label="Audio to be converted",
63
- ).style(
64
- container=False,
65
- )
66
- btn = gr.Button("Widowify").style(full_width=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  output_audio = gr.Audio(
68
  label="Converted Audio",
69
  elem_id="output_audio",
70
  interactive=False
71
  ).style(height="auto")
72
 
73
- btn.click(run_conversion, input_audio, output_audio)
74
- gr.Examples(["examples/jermacraft.wav","examples/meatgrinder.wav"], inputs=[input_audio])
 
 
 
 
 
 
 
 
75
 
 
76
  demo.launch()
 
4
 
5
  from acoustic import AcousticModel
6
 
 
7
  from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
8
 
9
  hubert = torch.hub.load("bshall/hubert:main", "hubert_soft").cpu()
10
 
11
  acoustic = AcousticModel(False, True)
12
 
13
+ checkpoint = torch.load("models/acoustic-model-best.pt", map_location=torch.device('cpu'))
14
 
15
  consume_prefix_in_state_dict_if_present(checkpoint["acoustic-model"], "module.")
16
  acoustic.load_state_dict(checkpoint["acoustic-model"])
 
26
 
27
  def run_conversion(audio_in):
28
  sr, source = audio_in
 
29
  source = torch.Tensor(source)
30
 
31
  if source.dim() == 1:
 
54
 
55
 
56
  with gr.Blocks() as demo:
57
+ with gr.Column():
58
+ gr.Markdown(
59
+ """
60
+ # Soft-VC | Widowmaker
61
+ This is a [Soft-VC model](https://github.com/bshall/soft-vc) trained on Widowmaker from Overwatch, allowing the conversion of any voice to Widowmaker's voice. While lower quality (16kHz), it captures the character fairly well, imo.
62
+
63
+ For a multi-speaker model, check out my [sovits-overwatch2](https://huggingface.co/spaces/cjayic/sovits-overwatch2) space!
64
+
65
+ The acoustic model has been trained for around 100k iterations, the HiFiGAN-Model for around 150k iterations. Quality could likely be improved by training the HiFiGAN further.
66
+ """),
67
+ with gr.Column():
68
+ with gr.Tab("Upload Audio File"):
69
+ with gr.Column():
70
+ input_audio = gr.Audio(
71
+ label="Audio to be converted",
72
+ ).style(
73
+ container=False,
74
+ )
75
+ btn_upload = gr.Button("Widowify", variant="primary").style(full_width=True)
76
+ with gr.Tab("Record Audio"):
77
+ with gr.Column():
78
+ input_audio_record = gr.Audio(
79
+ label="Audio to be converted",
80
+ source="microphone"
81
+ ).style(
82
+ container=False,
83
+ )
84
+ btn_rec = gr.Button("Widowify", variant="primary").style(full_width=True)
85
+
86
+ with gr.Row():
87
  output_audio = gr.Audio(
88
  label="Converted Audio",
89
  elem_id="output_audio",
90
  interactive=False
91
  ).style(height="auto")
92
 
93
+ btn_upload.click(run_conversion, [input_audio], output_audio)
94
+ btn_rec.click(run_conversion, [input_audio_record], output_audio)
95
+
96
+ gr.Examples(
97
+ ["examples/jermacraft.wav","examples/Mercy_0000000B0F5.wav","examples/weartie.wav","examples/gman_02.wav"], inputs=[input_audio],
98
+ outputs=[output_audio],
99
+ fn=run_conversion,
100
+ cache_examples=True,
101
+ run_on_click=True
102
+ )
103
 
104
+ demo.queue()
105
  demo.launch()
examples/{meatgrinder.wav → Mercy_0000000B0F5.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a29324c4e5909f7eff663b3f3a17100fdf36fef1c6707ba16b4175bb21b3cb84
3
- size 1460740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d82da53b88f1169a4e12eec15506d8b85029293b2a0db42e28e4a7a31a4914a
3
+ size 162958
examples/gman_02.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cffbf4dda570a1c1c7a92e807f5a3f1b1418d77c5e4fad8da590fbc23c4ff07
3
+ size 1850254
examples/weartie.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d843afa411d7e975299397e2435e892b277abda45cc64f0771da66cfb3490514
3
+ size 110982
models/acoustic-model-best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd7921e3b44db3204008cb2d0428c5917c924dcd1f7b0285ab7e1d48e51e24c
3
+ size 225997291
models/hifigan-model-best.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2c4c04b6a829854ccd9eb5eac3b0f7a434fc1e94809e6662e2be79e6f930c49
3
  size 1021686329
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d71c85ed4c2ae2285222330a467c38be2b9a33d29f225d8c1568ee558d98694
3
  size 1021686329