Spaces:
Running
Running
add wav/ogg option
Browse files
app.py
CHANGED
|
@@ -134,7 +134,7 @@ def load_checkpoints():
|
|
| 134 |
|
| 135 |
return model, hps, net_g_vctk, hps_vctk
|
| 136 |
|
| 137 |
-
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
| 138 |
if len(text.strip())==0:
|
| 139 |
return []
|
| 140 |
language = language.split()[0]
|
|
@@ -145,7 +145,6 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
| 145 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
|
| 146 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
| 147 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
|
| 148 |
-
result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
|
| 149 |
elif voice == 'LJS':
|
| 150 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
| 151 |
with torch.no_grad():
|
|
@@ -163,6 +162,9 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
| 163 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
| 164 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
| 165 |
result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
|
|
|
|
|
|
|
|
|
|
| 166 |
return result
|
| 167 |
|
| 168 |
# download_pretrained()
|
|
@@ -175,7 +177,7 @@ defaults = {
|
|
| 175 |
"noise_scale_w": .8,
|
| 176 |
"speed": 1.8,
|
| 177 |
"voice": "LJS",
|
| 178 |
-
"example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS"]
|
| 179 |
}
|
| 180 |
|
| 181 |
inputs = []
|
|
@@ -227,6 +229,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 227 |
ipa_block = gr.Textbox(label="International Phonetic Alphabet")
|
| 228 |
audio = gr.Audio(type="numpy", label="Output audio")
|
| 229 |
outputs = [ ipa_block, audio ]
|
|
|
|
|
|
|
| 230 |
btn = gr.Button("Vocalize")
|
| 231 |
btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
|
| 232 |
|
|
|
|
| 134 |
|
| 135 |
return model, hps, net_g_vctk, hps_vctk
|
| 136 |
|
| 137 |
+
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice, file_format):
|
| 138 |
if len(text.strip())==0:
|
| 139 |
return []
|
| 140 |
language = language.split()[0]
|
|
|
|
| 145 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
|
| 146 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
| 147 |
result = generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
|
|
|
|
| 148 |
elif voice == 'LJS':
|
| 149 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
| 150 |
with torch.no_grad():
|
|
|
|
| 162 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
| 163 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
| 164 |
result = [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
|
| 165 |
+
if file_format == 'ogg':
|
| 166 |
+
result = [result[0], wav2ogg(result[1][1], result[1][0], text, language)]
|
| 167 |
+
|
| 168 |
return result
|
| 169 |
|
| 170 |
# download_pretrained()
|
|
|
|
| 177 |
"noise_scale_w": .8,
|
| 178 |
"speed": 1.8,
|
| 179 |
"voice": "LJS",
|
| 180 |
+
"example": ["", "Lojban", 0.667, 0.8, 1.8,"LJS","wav"]
|
| 181 |
}
|
| 182 |
|
| 183 |
inputs = []
|
|
|
|
| 229 |
ipa_block = gr.Textbox(label="International Phonetic Alphabet")
|
| 230 |
audio = gr.Audio(type="numpy", label="Output audio")
|
| 231 |
outputs = [ ipa_block, audio ]
|
| 232 |
+
file_format = gr.Radio(["wav", "ogg"], value="wav", label="File format")
|
| 233 |
+
inputs.append(file_format)
|
| 234 |
btn = gr.Button("Vocalize")
|
| 235 |
btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
|
| 236 |
|
dev.sh
CHANGED
|
@@ -8,6 +8,7 @@ docker rm -f jboselvoha 2> /dev/null
|
|
| 8 |
# -p 7860:7860 \
|
| 9 |
# jboselvoha
|
| 10 |
docker run -d -it --name jboselvoha \
|
|
|
|
| 11 |
-v $(pwd)/libs:/home/user/app/libs:Z \
|
| 12 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
| 13 |
-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
|
|
|
|
| 8 |
# -p 7860:7860 \
|
| 9 |
# jboselvoha
|
| 10 |
docker run -d -it --name jboselvoha \
|
| 11 |
+
-v $(pwd)/lfs:/home/user/app/lfs:Z \
|
| 12 |
-v $(pwd)/libs:/home/user/app/libs:Z \
|
| 13 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
| 14 |
-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
|
prod.sh
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
docker kill jboselvoha 2> /dev/null
|
| 2 |
-
docker rm -f jboselvoha 2> /dev/null
|
| 3 |
-
docker run -it --name jboselvoha \
|
| 4 |
-
-v $(pwd)/lfs:/home/user/app/lfs/:Z \
|
| 5 |
-
-v $(pwd)/app.py:/home/user/app/app.py:Z \
|
| 6 |
-
-v $(pwd)/lojban/lojban.py:/home/user/app/lojban/lojban.py:Z \
|
| 7 |
-
-v $(pwd)/vits:/home/user/app/vits:Z \
|
| 8 |
-
-v $(pwd)/nix-tts:/home/user/app/nix-tts:Z \
|
| 9 |
-
-p 7860:7860 \
|
| 10 |
-
jboselvoha
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|