Spaces:

cjayic
/

soft-vc-widowmaker

Build error

App Files Files Community

cjayic commited on Dec 31, 2022

Commit

5be9b92

1 Parent(s): a9aac41

layout update, examples, update models

Browse files

Files changed (6) hide show

app.py +42 -13
examples/{meatgrinder.wav → Mercy_0000000B0F5.wav} +2 -2
examples/gman_02.wav +3 -0
examples/weartie.wav +3 -0
models/acoustic-model-best.pt +3 -0
models/hifigan-model-best.pt +1 -1

app.py CHANGED Viewed

@@ -4,14 +4,13 @@ from hifigan.generator import HifiganGenerator
 from acoustic import AcousticModel
-#from hifigan.generator import HifiganGenerator
 from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
 hubert = torch.hub.load("bshall/hubert:main", "hubert_soft").cpu()
 acoustic = AcousticModel(False, True)
-checkpoint = torch.load("models/acoustic-model-100000.pt", map_location=torch.device('cpu'))
 consume_prefix_in_state_dict_if_present(checkpoint["acoustic-model"], "module.")
 acoustic.load_state_dict(checkpoint["acoustic-model"])
@@ -27,7 +26,6 @@ hifigan.eval()
 def run_conversion(audio_in):
     sr, source = audio_in
     source = torch.Tensor(source)
     if source.dim() == 1:
@@ -56,21 +54,52 @@ def run_conversion(audio_in):
 with gr.Blocks() as demo:
-    with gr.Column(variant="panel"):
-        with gr.Row(variant="compact"):
-            input_audio = gr.Audio(
-                label="Audio to be converted",
-            ).style(
-                container=False,
-            )
-            btn = gr.Button("Widowify").style(full_width=False)
             output_audio = gr.Audio(
                 label="Converted Audio",
                 elem_id="output_audio",
                 interactive=False
             ).style(height="auto")
-    btn.click(run_conversion, input_audio, output_audio)
-    gr.Examples(["examples/jermacraft.wav","examples/meatgrinder.wav"], inputs=[input_audio])
 demo.launch()

 from acoustic import AcousticModel
 from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present
 hubert = torch.hub.load("bshall/hubert:main", "hubert_soft").cpu()
 acoustic = AcousticModel(False, True)
+checkpoint = torch.load("models/acoustic-model-best.pt", map_location=torch.device('cpu'))
 consume_prefix_in_state_dict_if_present(checkpoint["acoustic-model"], "module.")
 acoustic.load_state_dict(checkpoint["acoustic-model"])
 def run_conversion(audio_in):
     sr, source = audio_in
     source = torch.Tensor(source)
     if source.dim() == 1:
 with gr.Blocks() as demo:
+    with gr.Column():
+        gr.Markdown(
+        """
+        # Soft-VC | Widowmaker
+        This is a [Soft-VC model](https://github.com/bshall/soft-vc) trained on Widowmaker from Overwatch, allowing the conversion of any voice to Widowmaker's voice. While lower quality (16kHz), it captures the character fairly well, imo.
+        For a multi-speaker model, check out my [sovits-overwatch2](https://huggingface.co/spaces/cjayic/sovits-overwatch2) space!
+        The acoustic model has been trained for around 100k iterations, the HiFiGAN-Model for around 150k iterations. Quality could likely be improved by training the HiFiGAN further.
+        """),
+        with gr.Column():
+            with gr.Tab("Upload Audio File"):
+                with gr.Column():
+                    input_audio = gr.Audio(
+                        label="Audio to be converted",
+                    ).style(
+                        container=False,
+                    )
+                    btn_upload = gr.Button("Widowify", variant="primary").style(full_width=True)
+            with gr.Tab("Record Audio"):
+                with gr.Column():
+                    input_audio_record = gr.Audio(
+                        label="Audio to be converted",
+                        source="microphone"
+                    ).style(
+                        container=False,
+                    )
+                    btn_rec = gr.Button("Widowify", variant="primary").style(full_width=True)
+        with gr.Row():
             output_audio = gr.Audio(
                 label="Converted Audio",
                 elem_id="output_audio",
                 interactive=False
             ).style(height="auto")
+    btn_upload.click(run_conversion, [input_audio], output_audio)
+    btn_rec.click(run_conversion, [input_audio_record], output_audio)
+    gr.Examples(
+        ["examples/jermacraft.wav","examples/Mercy_0000000B0F5.wav","examples/weartie.wav","examples/gman_02.wav"], inputs=[input_audio],
+        outputs=[output_audio],
+        fn=run_conversion,
+        cache_examples=True,
+        run_on_click=True
+    )
+demo.queue()
 demo.launch()

examples/{meatgrinder.wav → Mercy_0000000B0F5.wav} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a29324c4e5909f7eff663b3f3a17100fdf36fef1c6707ba16b4175bb21b3cb84
-size 1460740

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d82da53b88f1169a4e12eec15506d8b85029293b2a0db42e28e4a7a31a4914a
+size 162958

examples/gman_02.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cffbf4dda570a1c1c7a92e807f5a3f1b1418d77c5e4fad8da590fbc23c4ff07
+size 1850254

examples/weartie.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d843afa411d7e975299397e2435e892b277abda45cc64f0771da66cfb3490514
+size 110982

models/acoustic-model-best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd7921e3b44db3204008cb2d0428c5917c924dcd1f7b0285ab7e1d48e51e24c
+size 225997291

models/hifigan-model-best.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2c4c04b6a829854ccd9eb5eac3b0f7a434fc1e94809e6662e2be79e6f930c49
 size 1021686329

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d71c85ed4c2ae2285222330a467c38be2b9a33d29f225d8c1568ee558d98694
 size 1021686329