Spaces:
Running
Running
OpenVoice voice sample fix; print HF Space error
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from datasets import load_dataset
|
|
| 5 |
import threading, time, uuid, sqlite3, shutil, os, random, asyncio, threading
|
| 6 |
from pathlib import Path
|
| 7 |
from huggingface_hub import CommitScheduler, delete_file, hf_hub_download
|
| 8 |
-
from gradio_client import Client
|
| 9 |
import pyloudnorm as pyln
|
| 10 |
import soundfile as sf
|
| 11 |
import librosa
|
|
@@ -54,7 +54,7 @@ AVAILABLE_MODELS = {
|
|
| 54 |
'Pendrokar/xVASynth': 'Pendrokar/xVASynth', # EN-GB 4.29.0 4.42.0
|
| 55 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 56 |
'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # 4.29.0
|
| 57 |
-
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', #
|
| 58 |
|
| 59 |
# Parler
|
| 60 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
|
@@ -170,29 +170,30 @@ HF_SPACES = {
|
|
| 170 |
}
|
| 171 |
|
| 172 |
# for zero-shot TTS - voice sample of Scarlett Johanson
|
| 173 |
-
|
|
|
|
| 174 |
|
| 175 |
OVERRIDE_INPUTS = {
|
| 176 |
'coqui/xtts': {
|
| 177 |
1: 'en',
|
| 178 |
-
2:
|
| 179 |
-
3:
|
| 180 |
4: False, #use_mic
|
| 181 |
5: False, #cleanup_reference
|
| 182 |
6: False, #auto_detect
|
| 183 |
},
|
| 184 |
'collabora/WhisperSpeech': {
|
| 185 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
| 186 |
-
2: DEFAULT_VOICE_SAMPLE, # voice sample
|
| 187 |
3: 14.0, #Tempo - Gradio Slider issue: takes min. rather than value
|
| 188 |
},
|
| 189 |
'myshell-ai/OpenVoice': {
|
| 190 |
1: 'default', # style
|
| 191 |
-
2:
|
| 192 |
},
|
| 193 |
'myshell-ai/OpenVoiceV2': {
|
| 194 |
1: 'en_default', # style
|
| 195 |
-
2:
|
| 196 |
},
|
| 197 |
'PolyAI/pheme': {
|
| 198 |
1: 'YOU1000000044_S0000798', # voice
|
|
@@ -866,13 +867,16 @@ def synthandreturn(text):
|
|
| 866 |
else:
|
| 867 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
| 868 |
break
|
| 869 |
-
except Exception:
|
| 870 |
-
raise Exception
|
| 871 |
attempt_count += 1
|
|
|
|
| 872 |
print(f"{model}: Unable to call API (attempt: {attempt_count})")
|
| 873 |
-
# sleep for one second
|
| 874 |
time.sleep(1)
|
| 875 |
|
|
|
|
|
|
|
|
|
|
| 876 |
if attempt_count > 2:
|
| 877 |
raise gr.Error(f"{model}: Failed to call model")
|
| 878 |
else:
|
|
@@ -925,7 +929,7 @@ def synthandreturn(text):
|
|
| 925 |
try:
|
| 926 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
| 927 |
inputs[key] = value
|
| 928 |
-
print(f"{modelname}: Default inputs overridden")
|
| 929 |
except:
|
| 930 |
pass
|
| 931 |
|
|
|
|
| 5 |
import threading, time, uuid, sqlite3, shutil, os, random, asyncio, threading
|
| 6 |
from pathlib import Path
|
| 7 |
from huggingface_hub import CommitScheduler, delete_file, hf_hub_download
|
| 8 |
+
from gradio_client import Client, file
|
| 9 |
import pyloudnorm as pyln
|
| 10 |
import soundfile as sf
|
| 11 |
import librosa
|
|
|
|
| 54 |
'Pendrokar/xVASynth': 'Pendrokar/xVASynth', # EN-GB 4.29.0 4.42.0
|
| 55 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
| 56 |
'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # 4.29.0
|
| 57 |
+
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29.0
|
| 58 |
|
| 59 |
# Parler
|
| 60 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
|
|
|
| 170 |
}
|
| 171 |
|
| 172 |
# for zero-shot TTS - voice sample of Scarlett Johanson
|
| 173 |
+
DEFAULT_VOICE_SAMPLE_STR = 'https://cdn-uploads.huggingface.co/production/uploads/641de0213239b631552713e4/iKHHqWxWy6Zfmp6QP6CZZ.wav'
|
| 174 |
+
DEFAULT_VOICE_SAMPLE = file(DEFAULT_VOICE_SAMPLE_STR)
|
| 175 |
|
| 176 |
OVERRIDE_INPUTS = {
|
| 177 |
'coqui/xtts': {
|
| 178 |
1: 'en',
|
| 179 |
+
2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
| 180 |
+
3: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
| 181 |
4: False, #use_mic
|
| 182 |
5: False, #cleanup_reference
|
| 183 |
6: False, #auto_detect
|
| 184 |
},
|
| 185 |
'collabora/WhisperSpeech': {
|
| 186 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
| 187 |
+
2: DEFAULT_VOICE_SAMPLE, # voice sample URL
|
| 188 |
3: 14.0, #Tempo - Gradio Slider issue: takes min. rather than value
|
| 189 |
},
|
| 190 |
'myshell-ai/OpenVoice': {
|
| 191 |
1: 'default', # style
|
| 192 |
+
2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
| 193 |
},
|
| 194 |
'myshell-ai/OpenVoiceV2': {
|
| 195 |
1: 'en_default', # style
|
| 196 |
+
2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
|
| 197 |
},
|
| 198 |
'PolyAI/pheme': {
|
| 199 |
1: 'YOU1000000044_S0000798', # voice
|
|
|
|
| 867 |
else:
|
| 868 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
| 869 |
break
|
| 870 |
+
except Exception as e:
|
|
|
|
| 871 |
attempt_count += 1
|
| 872 |
+
print(repr(e))
|
| 873 |
print(f"{model}: Unable to call API (attempt: {attempt_count})")
|
| 874 |
+
# sleep for one second
|
| 875 |
time.sleep(1)
|
| 876 |
|
| 877 |
+
# Fetch and store client again
|
| 878 |
+
hf_clients[model] = Client(model, hf_token=hf_token)
|
| 879 |
+
|
| 880 |
if attempt_count > 2:
|
| 881 |
raise gr.Error(f"{model}: Failed to call model")
|
| 882 |
else:
|
|
|
|
| 929 |
try:
|
| 930 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
| 931 |
inputs[key] = value
|
| 932 |
+
print(f"{modelname}: Default inputs overridden by Arena")
|
| 933 |
except:
|
| 934 |
pass
|
| 935 |
|