Spaces:
Runtime error
Runtime error
ButterCream
commited on
Commit
·
417a076
1
Parent(s):
a835dc1
readd lain fix
Browse files- app.py +12 -5
- requirements.txt +1 -2
app.py
CHANGED
|
@@ -5,13 +5,14 @@ import re
|
|
| 5 |
import numpy as np
|
| 6 |
from scipy.io.wavfile import write
|
| 7 |
import nltk
|
| 8 |
-
from VoPho.engine import Phonemizer
|
| 9 |
|
| 10 |
nltk.download('punkt')
|
| 11 |
from nltk.tokenize import word_tokenize
|
| 12 |
|
| 13 |
import torch
|
| 14 |
|
|
|
|
|
|
|
| 15 |
INTRO = """
|
| 16 |
<style>
|
| 17 |
|
|
@@ -93,6 +94,13 @@ theme = gr.themes.Soft(
|
|
| 93 |
block_background_fill='*neutral_50'
|
| 94 |
)
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
| 98 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
@@ -165,12 +173,11 @@ def split_and_recombine_text(text, desired_length=200, max_length=300):
|
|
| 165 |
|
| 166 |
return rv
|
| 167 |
|
| 168 |
-
engine = Phonemizer()
|
| 169 |
|
| 170 |
def text_to_phonemes(text):
|
| 171 |
text = text.strip()
|
| 172 |
print("Text before phonemization: ", text)
|
| 173 |
-
ps =
|
| 174 |
print("Text after phonemization: ", ps)
|
| 175 |
ps = word_tokenize(ps[0])
|
| 176 |
ps = ' '.join(ps)
|
|
@@ -211,7 +218,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
| 211 |
thresh = np.percentile(np.abs(synthaud), 95)
|
| 212 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
| 213 |
lead_percent = 0.008
|
| 214 |
-
trail_percent = 0.
|
| 215 |
|
| 216 |
|
| 217 |
# Leading artefact removal
|
|
@@ -252,7 +259,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
| 252 |
|
| 253 |
other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
|
| 254 |
if torch.cuda.is_available():
|
| 255 |
-
other_tts.
|
| 256 |
else:
|
| 257 |
other_tts.device = "cpu"
|
| 258 |
|
|
|
|
| 5 |
import numpy as np
|
| 6 |
from scipy.io.wavfile import write
|
| 7 |
import nltk
|
|
|
|
| 8 |
|
| 9 |
nltk.download('punkt')
|
| 10 |
from nltk.tokenize import word_tokenize
|
| 11 |
|
| 12 |
import torch
|
| 13 |
|
| 14 |
+
import phonemizer # en-us
|
| 15 |
+
|
| 16 |
INTRO = """
|
| 17 |
<style>
|
| 18 |
|
|
|
|
| 94 |
block_background_fill='*neutral_50'
|
| 95 |
)
|
| 96 |
|
| 97 |
+
# eventually swap to something else
|
| 98 |
+
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us',
|
| 99 |
+
preserve_punctuation=True,
|
| 100 |
+
with_stress=True,
|
| 101 |
+
language_switch="remove-flags",
|
| 102 |
+
tie=False)
|
| 103 |
+
|
| 104 |
|
| 105 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
| 106 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
|
|
| 173 |
|
| 174 |
return rv
|
| 175 |
|
|
|
|
| 176 |
|
| 177 |
def text_to_phonemes(text):
|
| 178 |
text = text.strip()
|
| 179 |
print("Text before phonemization: ", text)
|
| 180 |
+
ps = global_phonemizer.phonemize([text])
|
| 181 |
print("Text after phonemization: ", ps)
|
| 182 |
ps = word_tokenize(ps[0])
|
| 183 |
ps = ' '.join(ps)
|
|
|
|
| 218 |
thresh = np.percentile(np.abs(synthaud), 95)
|
| 219 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
| 220 |
lead_percent = 0.008
|
| 221 |
+
trail_percent = 0.009
|
| 222 |
|
| 223 |
|
| 224 |
# Leading artefact removal
|
|
|
|
| 259 |
|
| 260 |
other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
|
| 261 |
if torch.cuda.is_available():
|
| 262 |
+
other_tts.device = "cuda"
|
| 263 |
else:
|
| 264 |
other_tts.device = "cpu"
|
| 265 |
|
requirements.txt
CHANGED
|
@@ -23,5 +23,4 @@ gradio
|
|
| 23 |
spaces
|
| 24 |
gruut
|
| 25 |
txtsplit
|
| 26 |
-
scipy
|
| 27 |
-
VoPho
|
|
|
|
| 23 |
spaces
|
| 24 |
gruut
|
| 25 |
txtsplit
|
| 26 |
+
scipy
|
|
|