Spaces:
Running
Running
speed improvements and documentation
Browse files
InferenceInterfaces/ControllableInterface.py
CHANGED
|
@@ -59,31 +59,31 @@ class ControllableInterface:
|
|
| 59 |
0.0], dtype=torch.float32)
|
| 60 |
embedding = self.wgan.modify_embed(controllability_vector)
|
| 61 |
self.model.set_utterance_embedding(embedding=embedding)
|
| 62 |
-
wav, sr,
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
wavs.append(wav)
|
| 75 |
-
wav = sum(wavs)/len(wavs)
|
| 76 |
else:
|
| 77 |
self.model.set_utterance_embedding(reference_audio)
|
| 78 |
|
| 79 |
if not voice_seed and reference_audio is not None:
|
| 80 |
-
wav, sr,
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
return sr, wav
|
|
|
|
| 59 |
0.0], dtype=torch.float32)
|
| 60 |
embedding = self.wgan.modify_embed(controllability_vector)
|
| 61 |
self.model.set_utterance_embedding(embedding=embedding)
|
| 62 |
+
wav, sr, pitch, energy, durations = self.model(prompt,
|
| 63 |
+
input_is_phones=True,
|
| 64 |
+
duration_scaling_factor=1.0,
|
| 65 |
+
pitch_variance_scale=1.0,
|
| 66 |
+
energy_variance_scale=1.0,
|
| 67 |
+
pause_duration_scaling_factor=1.0,
|
| 68 |
+
return_plot_as_filepath=False,
|
| 69 |
+
prosody_creativity=prosody_creativity,
|
| 70 |
+
loudness_in_db=loudness_in_db,
|
| 71 |
+
pitch=pitch,
|
| 72 |
+
energy=energy,
|
| 73 |
+
durations=durations)
|
| 74 |
wavs.append(wav)
|
| 75 |
+
wav = sum(wavs) / len(wavs)
|
| 76 |
else:
|
| 77 |
self.model.set_utterance_embedding(reference_audio)
|
| 78 |
|
| 79 |
if not voice_seed and reference_audio is not None:
|
| 80 |
+
wav, sr, pitch, energy, durations = self.model(prompt,
|
| 81 |
+
input_is_phones=True,
|
| 82 |
+
duration_scaling_factor=1.0,
|
| 83 |
+
pitch_variance_scale=1.0,
|
| 84 |
+
energy_variance_scale=1.0,
|
| 85 |
+
pause_duration_scaling_factor=1.0,
|
| 86 |
+
return_plot_as_filepath=False,
|
| 87 |
+
prosody_creativity=prosody_creativity,
|
| 88 |
+
loudness_in_db=loudness_in_db)
|
| 89 |
+
return sr, wav
|
InferenceInterfaces/ToucanTTSInterface.py
CHANGED
|
@@ -232,7 +232,7 @@ class ToucanTTSInterface(torch.nn.Module):
|
|
| 232 |
plt.savefig("tmp.png")
|
| 233 |
plt.close()
|
| 234 |
return wave, sr, "tmp.png", pitch, energy, durations
|
| 235 |
-
return wave, sr
|
| 236 |
|
| 237 |
def read_to_file(self,
|
| 238 |
text_list,
|
|
|
|
| 232 |
plt.savefig("tmp.png")
|
| 233 |
plt.close()
|
| 234 |
return wave, sr, "tmp.png", pitch, energy, durations
|
| 235 |
+
return wave, sr, pitch, energy, durations
|
| 236 |
|
| 237 |
def read_to_file(self,
|
| 238 |
text_list,
|
app.py
CHANGED
|
@@ -43,8 +43,7 @@ class TTSWebUI:
|
|
| 43 |
# gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
|
| 44 |
# gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
|
| 45 |
],
|
| 46 |
-
outputs=[gr.Audio(type="numpy", label="Speech"),
|
| 47 |
-
gr.Image(label="Visualization")],
|
| 48 |
title=title,
|
| 49 |
allow_flagging="never",
|
| 50 |
description=article,
|
|
@@ -57,12 +56,12 @@ class TTSWebUI:
|
|
| 57 |
voice_seed,
|
| 58 |
reference_audio,
|
| 59 |
):
|
| 60 |
-
sr, wav
|
| 61 |
reference_audio,
|
| 62 |
voice_seed,
|
| 63 |
prosody_creativity,
|
| 64 |
-24.)
|
| 65 |
-
return (sr, float2pcm(wav))
|
| 66 |
|
| 67 |
|
| 68 |
if __name__ == '__main__':
|
|
|
|
| 43 |
# gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
|
| 44 |
# gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
|
| 45 |
],
|
| 46 |
+
outputs=[gr.Audio(type="numpy", label="Speech")],
|
|
|
|
| 47 |
title=title,
|
| 48 |
allow_flagging="never",
|
| 49 |
description=article,
|
|
|
|
| 56 |
voice_seed,
|
| 57 |
reference_audio,
|
| 58 |
):
|
| 59 |
+
sr, wav = self.controllable_ui.read(prompt,
|
| 60 |
reference_audio,
|
| 61 |
voice_seed,
|
| 62 |
prosody_creativity,
|
| 63 |
-24.)
|
| 64 |
+
return (sr, float2pcm(wav))
|
| 65 |
|
| 66 |
|
| 67 |
if __name__ == '__main__':
|