Spaces:
Running
Running
enable MegaTTS; re-enabled llasa 3b; disabled outclassed TTS
Browse files- app/models.py +8 -5
app/models.py
CHANGED
|
@@ -86,7 +86,7 @@ AVAILABLE_MODELS = {
|
|
| 86 |
# llasa 1b TTS
|
| 87 |
# 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers',
|
| 88 |
# llasa 3b TTS
|
| 89 |
-
|
| 90 |
# llasa 8b TTS
|
| 91 |
# 'srinivasbilla/llasa-8b-tts': 'srinivasbilla/llasa-8b-tts', # ZeroGPU Pro account expired
|
| 92 |
|
|
@@ -101,16 +101,16 @@ AVAILABLE_MODELS = {
|
|
| 101 |
# 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 102 |
|
| 103 |
# Spark
|
| 104 |
-
'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
| 105 |
|
| 106 |
# Sesame
|
| 107 |
-
'sesame/csm-1b' : 'sesame/csm-1b',
|
| 108 |
|
| 109 |
# Orpheus
|
| 110 |
'MohamedRashad/Orpheus-TTS' : 'MohamedRashad/Orpheus-TTS',
|
| 111 |
|
| 112 |
# Index TTS
|
| 113 |
-
'IndexTeam/IndexTTS': 'IndexTeam/IndexTTS',
|
| 114 |
|
| 115 |
# Dia
|
| 116 |
# 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
|
|
@@ -121,6 +121,9 @@ AVAILABLE_MODELS = {
|
|
| 121 |
# OpenAudio S1 (Fish Audio)
|
| 122 |
'fishaudio/openaudio-s1-mini': 'fishaudio/openaudio-s1-mini',
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
# HF TTS w issues
|
| 125 |
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Discontinued for OpenAudio S1
|
| 126 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
|
@@ -973,7 +976,7 @@ OVERRIDE_INPUTS = {
|
|
| 973 |
|
| 974 |
# Chatterbox
|
| 975 |
'ResembleAI/Chatterbox': {
|
| 976 |
-
'audio_prompt_path_input':
|
| 977 |
'exaggeration_input': 0.5, # 1-2
|
| 978 |
'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
|
| 979 |
'seed_num_input': 1, # Seed for random number generation, can be any integer.
|
|
|
|
| 86 |
# llasa 1b TTS
|
| 87 |
# 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers',
|
| 88 |
# llasa 3b TTS
|
| 89 |
+
'srinivasbilla/llasa-3b-tts': 'srinivasbilla/llasa-3b-tts',
|
| 90 |
# llasa 8b TTS
|
| 91 |
# 'srinivasbilla/llasa-8b-tts': 'srinivasbilla/llasa-8b-tts', # ZeroGPU Pro account expired
|
| 92 |
|
|
|
|
| 101 |
# 'Steveeeeeeen/Zonos/hybrid': 'Steveeeeeeen/Zonos',
|
| 102 |
|
| 103 |
# Spark
|
| 104 |
+
# 'thunnai/SparkTTS': 'thunnai/SparkTTS',
|
| 105 |
|
| 106 |
# Sesame
|
| 107 |
+
# 'sesame/csm-1b' : 'sesame/csm-1b',
|
| 108 |
|
| 109 |
# Orpheus
|
| 110 |
'MohamedRashad/Orpheus-TTS' : 'MohamedRashad/Orpheus-TTS',
|
| 111 |
|
| 112 |
# Index TTS
|
| 113 |
+
# 'IndexTeam/IndexTTS': 'IndexTeam/IndexTTS', # hallucinations on the endquotes
|
| 114 |
|
| 115 |
# Dia
|
| 116 |
# 'nari-labs/Dia-1.6B': 'nari-labs/Dia-1.6B', # single speaker hallucinates
|
|
|
|
| 121 |
# OpenAudio S1 (Fish Audio)
|
| 122 |
'fishaudio/openaudio-s1-mini': 'fishaudio/openaudio-s1-mini',
|
| 123 |
|
| 124 |
+
# MegaTTS
|
| 125 |
+
'ByteDance/MegaTTS3': 'ByteDance/MegaTTS3',
|
| 126 |
+
|
| 127 |
# HF TTS w issues
|
| 128 |
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # Discontinued for OpenAudio S1
|
| 129 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
|
|
|
| 976 |
|
| 977 |
# Chatterbox
|
| 978 |
'ResembleAI/Chatterbox': {
|
| 979 |
+
'audio_prompt_path_input': handle_file('https://cdn-uploads.huggingface.co/production/uploads/642c0b71eb6e214d4f8897a3/bJvzvKbiIAZ3lX0MJDU0r.wav'), # voice; chosen by Manmay of Resemble AI org - https://huggingface.co/spaces/ResembleAI/Chatterbox/discussions/14#686cd36e9479e00d8d3fc079
|
| 980 |
'exaggeration_input': 0.5, # 1-2
|
| 981 |
'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
|
| 982 |
'seed_num_input': 1, # Seed for random number generation, can be any integer.
|