Spaces:
Build error
Build error
added some descriptions
Browse files
README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.15.0
|
|
@@ -10,4 +10,7 @@ pinned: false
|
|
| 10 |
python_version: 3.7
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: SOVITS | Overwatch 2
|
| 3 |
+
emoji: 🗣️
|
| 4 |
+
colorFrom: orange
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.15.0
|
|
|
|
| 10 |
python_version: 3.7
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# SOVITS OW2 - Voice Conversion Model
|
| 14 |
+
|
| 15 |
+
This is a [SOVITS model](https://github.com/Francis-Komizu/Sovits) trained on every Overwatch 2 hero up to Kiriko (exception Bastion, please forgive me). The model was trained for 195000 iterations.
|
| 16 |
+
It's not too great to be honest, unlike Soft-VC it doesn't appear to adjust the voice pitch to the target speaker. I added a pitch shift option, but it's pretty slow and doesn't really improve things most of the time, use at your own risk.
|
app.py
CHANGED
|
@@ -28,7 +28,7 @@ _ = net_g.eval()
|
|
| 28 |
_ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
|
| 29 |
|
| 30 |
|
| 31 |
-
def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
|
| 32 |
fname = audio
|
| 33 |
source, sr = torchaudio.load(fname)
|
| 34 |
|
|
@@ -53,14 +53,24 @@ def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_
|
|
| 53 |
demo = gradio.Interface(
|
| 54 |
fn=infer,
|
| 55 |
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
gradio.Audio(label="Input Audio", type="filepath"),
|
| 57 |
gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
|
| 58 |
-
gradio.Slider(label="Pitch Shift Input (+12 = up one octave)", minimum=-12.0, maximum=12.0, value=0, step=1),
|
| 59 |
-
gradio.Slider(label="Length Factor", minimum=0.1, maximum=2.0, value=1.0),
|
| 60 |
gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
|
| 61 |
gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
|
| 62 |
],
|
| 63 |
outputs=[gradio.Audio(label="Audio as Target Voice")],
|
| 64 |
)
|
| 65 |
#demo.launch(share=True)
|
| 66 |
-
demo.launch(server_name="0.0.0.0")
|
|
|
|
| 28 |
_ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
|
| 29 |
|
| 30 |
|
| 31 |
+
def infer(md, audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
|
| 32 |
fname = audio
|
| 33 |
source, sr = torchaudio.load(fname)
|
| 34 |
|
|
|
|
| 53 |
demo = gradio.Interface(
|
| 54 |
fn=infer,
|
| 55 |
inputs=[
|
| 56 |
+
gradio.Markdown(
|
| 57 |
+
"""
|
| 58 |
+
# SOVITS | Overwatch 2
|
| 59 |
+
Upload any voice recording and turn it into a mangled approximation of any* Overwatch 2 Hero!
|
| 60 |
+
|
| 61 |
+
SOVITS doesn't really appear to adjust the pitch to the target speaker, so it helps to have your input voice at a similar pitch to the target voice.
|
| 62 |
+
I added a pitch shift option to preprocess the input voice, but it's slow and sometimes outright broken, use at your own risk.
|
| 63 |
+
|
| 64 |
+
( * up to Kiriko and without Bastion. Please forgive. )
|
| 65 |
+
"""),
|
| 66 |
gradio.Audio(label="Input Audio", type="filepath"),
|
| 67 |
gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
|
| 68 |
+
gradio.Slider(label="Pitch Shift Input (+12 = up one octave, ⚠️ broken AF ⚠️)", minimum=-12.0, maximum=12.0, value=0, step=1),
|
| 69 |
+
gradio.Slider(label="Length Factor (higher = slower speech)", minimum=0.1, maximum=2.0, value=1.0),
|
| 70 |
gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
|
| 71 |
gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
|
| 72 |
],
|
| 73 |
outputs=[gradio.Audio(label="Audio as Target Voice")],
|
| 74 |
)
|
| 75 |
#demo.launch(share=True)
|
| 76 |
+
demo.launch(server_name="0.0.0.0")
|