Spaces:
Runtime error
Runtime error
ButterCream
commited on
Commit
·
b07d516
1
Parent(s):
417a076
upgrade to VoPho - resolved all issues
Browse files- app.py +5 -18
- requirements.txt +4 -3
app.py
CHANGED
|
@@ -5,14 +5,10 @@ import re
|
|
| 5 |
import numpy as np
|
| 6 |
from scipy.io.wavfile import write
|
| 7 |
import nltk
|
| 8 |
-
|
| 9 |
-
nltk.download('punkt')
|
| 10 |
-
from nltk.tokenize import word_tokenize
|
| 11 |
|
| 12 |
import torch
|
| 13 |
|
| 14 |
-
import phonemizer # en-us
|
| 15 |
-
|
| 16 |
INTRO = """
|
| 17 |
<style>
|
| 18 |
|
|
@@ -94,13 +90,6 @@ theme = gr.themes.Soft(
|
|
| 94 |
block_background_fill='*neutral_50'
|
| 95 |
)
|
| 96 |
|
| 97 |
-
# eventually swap to something else
|
| 98 |
-
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us',
|
| 99 |
-
preserve_punctuation=True,
|
| 100 |
-
with_stress=True,
|
| 101 |
-
language_switch="remove-flags",
|
| 102 |
-
tie=False)
|
| 103 |
-
|
| 104 |
|
| 105 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
| 106 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
@@ -173,15 +162,13 @@ def split_and_recombine_text(text, desired_length=200, max_length=300):
|
|
| 173 |
|
| 174 |
return rv
|
| 175 |
|
|
|
|
| 176 |
|
| 177 |
def text_to_phonemes(text):
|
| 178 |
text = text.strip()
|
| 179 |
print("Text before phonemization: ", text)
|
| 180 |
-
ps =
|
| 181 |
print("Text after phonemization: ", ps)
|
| 182 |
-
ps = word_tokenize(ps[0])
|
| 183 |
-
ps = ' '.join(ps)
|
| 184 |
-
print("Final text after tokenization: ", ps)
|
| 185 |
return ps
|
| 186 |
|
| 187 |
|
|
@@ -218,7 +205,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
| 218 |
thresh = np.percentile(np.abs(synthaud), 95)
|
| 219 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
| 220 |
lead_percent = 0.008
|
| 221 |
-
trail_percent = 0.
|
| 222 |
|
| 223 |
|
| 224 |
# Leading artefact removal
|
|
@@ -295,4 +282,4 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
|
|
| 295 |
|
| 296 |
if __name__ == "__main__":
|
| 297 |
# demo.queue(api_open=False, max_size=15).launch(show_api=False)
|
| 298 |
-
clone.queue(api_open=False, max_size=15).launch(show_api=False)
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
from scipy.io.wavfile import write
|
| 7 |
import nltk
|
| 8 |
+
from VoPho.engine import Phonemizer
|
|
|
|
|
|
|
| 9 |
|
| 10 |
import torch
|
| 11 |
|
|
|
|
|
|
|
| 12 |
INTRO = """
|
| 13 |
<style>
|
| 14 |
|
|
|
|
| 90 |
block_background_fill='*neutral_50'
|
| 91 |
)
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
| 95 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
|
|
| 162 |
|
| 163 |
return rv
|
| 164 |
|
| 165 |
+
engine = Phonemizer()
|
| 166 |
|
| 167 |
def text_to_phonemes(text):
|
| 168 |
text = text.strip()
|
| 169 |
print("Text before phonemization: ", text)
|
| 170 |
+
ps = engine.phonemize(text)
|
| 171 |
print("Text after phonemization: ", ps)
|
|
|
|
|
|
|
|
|
|
| 172 |
return ps
|
| 173 |
|
| 174 |
|
|
|
|
| 205 |
thresh = np.percentile(np.abs(synthaud), 95)
|
| 206 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
| 207 |
lead_percent = 0.008
|
| 208 |
+
trail_percent = 0.0085
|
| 209 |
|
| 210 |
|
| 211 |
# Leading artefact removal
|
|
|
|
| 282 |
|
| 283 |
if __name__ == "__main__":
|
| 284 |
# demo.queue(api_open=False, max_size=15).launch(show_api=False)
|
| 285 |
+
clone.queue(api_open=False, max_size=15).launch(show_api=False)
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
SoundFile
|
| 2 |
-
torchaudio==2.
|
| 3 |
munch
|
| 4 |
-
torch==2.
|
| 5 |
pydub
|
| 6 |
pyyaml
|
| 7 |
librosa
|
|
@@ -23,4 +23,5 @@ gradio
|
|
| 23 |
spaces
|
| 24 |
gruut
|
| 25 |
txtsplit
|
| 26 |
-
scipy
|
|
|
|
|
|
| 1 |
SoundFile
|
| 2 |
+
torchaudio==2.2.0
|
| 3 |
munch
|
| 4 |
+
torch==2.2.0
|
| 5 |
pydub
|
| 6 |
pyyaml
|
| 7 |
librosa
|
|
|
|
| 23 |
spaces
|
| 24 |
gruut
|
| 25 |
txtsplit
|
| 26 |
+
scipy
|
| 27 |
+
VoPho==0.0.8
|