Pendrokar commited on
Commit
a6bd099
·
1 Parent(s): 2b726b0

MOSS unfavored due to high Zero-GPU requirements

Browse files
README.md CHANGED
@@ -24,6 +24,7 @@ models:
24
  - HKUSTAudio/Llasa-8B
25
  - IndexTeam/Index-TTS
26
  - kyutai/pocket-tts
 
27
  - lj1995/GPT-SoVITS
28
  - metavoiceio/metavoice-1B-v0.1
29
  - myshell-ai/MeloTTS-English-v2
 
24
  - HKUSTAudio/Llasa-8B
25
  - IndexTeam/Index-TTS
26
  - kyutai/pocket-tts
27
+ - kyutai/tts-voices
28
  - lj1995/GPT-SoVITS
29
  - metavoiceio/metavoice-1B-v0.1
30
  - myshell-ai/MeloTTS-English-v2
app/models.py CHANGED
@@ -169,8 +169,8 @@ AVAILABLE_MODELS = {
169
  'smallestai/smallest-ai-tts-lightningv3.1-demo': 'smallestai/smallest-ai-tts-lightningv3.1-demo',
170
 
171
  # Qwen3 TTS
172
- 'Qwen/Qwen3-TTS-Voice-Design': 'Qwen/Qwen3-TTS-Voice-Design',
173
  'Qwen/Qwen3-TTS': 'Qwen/Qwen3-TTS',
 
174
 
175
  # MOSS TTS
176
  'OpenMOSS-Team/MOSS-TTS': 'OpenMOSS-Team/MOSS-TTS',
@@ -279,7 +279,7 @@ top_five = [
279
  'NeuralFalcon/Pocket-TTS',
280
  'smallestai/smallest-ai-tts-lightningv3.1-demo',
281
  'Qwen/Qwen3-TTS-Voice-Design',
282
- 'OpenMOSS-Team/MOSS-TTS',
283
  ]
284
 
285
  # prioritize low vote models
 
169
  'smallestai/smallest-ai-tts-lightningv3.1-demo': 'smallestai/smallest-ai-tts-lightningv3.1-demo',
170
 
171
  # Qwen3 TTS
 
172
  'Qwen/Qwen3-TTS': 'Qwen/Qwen3-TTS',
173
+ 'Qwen/Qwen3-TTS-Voice-Design': 'Qwen/Qwen3-TTS-Voice-Design', # voice by prompt
174
 
175
  # MOSS TTS
176
  'OpenMOSS-Team/MOSS-TTS': 'OpenMOSS-Team/MOSS-TTS',
 
279
  'NeuralFalcon/Pocket-TTS',
280
  'smallestai/smallest-ai-tts-lightningv3.1-demo',
281
  'Qwen/Qwen3-TTS-Voice-Design',
282
+ 'Qwen/Qwen3-TTS',
283
  ]
284
 
285
  # prioritize low vote models
app/tts_spaces/hexgrad__kokoro.json CHANGED
@@ -6,5 +6,6 @@
6
  "is_zero_gpu_space": false,
7
  "series": "Kokoro",
8
  "space_link": "Remsky/Kokoro-TTS-Zero",
 
9
  "title": "Deprecated: Kokoro v1.0+ improves pronunciation"
10
  }
 
6
  "is_zero_gpu_space": false,
7
  "series": "Kokoro",
8
  "space_link": "Remsky/Kokoro-TTS-Zero",
9
+ "emoji": "😷",
10
  "title": "Deprecated: Kokoro v1.0+ improves pronunciation"
11
  }
create_space_files.py CHANGED
@@ -4,420 +4,7 @@ import os
4
 
5
  # HF_SPACES data from models.py
6
  hf_spaces = {
7
- 'coqui/xtts': {
8
- 'name': 'XTTS v2',
9
- 'function': '/predict',
10
- 'text_param_index': 0,
11
- 'return_audio_index': 0,
12
- 'series': 'XTTS',
13
- 'emoji': '😩',
14
- 'title': 'Gradio version too old',
15
- },
16
- 'collabora/WhisperSpeech': {
17
- 'name': 'WhisperSpeech',
18
- 'function': '/whisper_speech_demo',
19
- 'text_param_index': 0,
20
- 'return_audio_index': 0,
21
- 'series': 'WhisperSpeech',
22
- 'emoji': '😷',
23
- 'title': 'Broken space - runtime error',
24
- },
25
- 'myshell-ai/OpenVoice': {
26
- 'name': 'OpenVoice',
27
- 'function': '1',
28
- 'text_param_index': 0,
29
- 'return_audio_index': 1,
30
- 'series': 'OpenVoice',
31
- 'emoji': '😩',
32
- 'title': 'Gradio version too old',
33
- },
34
- 'myshell-ai/OpenVoiceV2': {
35
- 'name': 'OpenVoice v2',
36
- 'function': '1',
37
- 'text_param_index': 0,
38
- 'return_audio_index': 1,
39
- 'series': 'OpenVoice',
40
- 'emoji': '😩',
41
- 'title': 'Gradio version too old',
42
- },
43
- 'mrfakename/MetaVoice-1B-v0.1': {
44
- 'name': 'MetaVoice',
45
- 'function': '/tts',
46
- 'text_param_index': 0,
47
- 'return_audio_index': 0,
48
- 'series': 'MetaVoice',
49
- 'emoji': '😷',
50
- 'title': 'Broken space - runtime error',
51
- },
52
- 'Pendrokar/xVASynth-TTS': {
53
- 'name': 'xVASynth v3 DeepMoji',
54
- 'function': '/predict',
55
- 'text_param_index': 0,
56
- 'return_audio_index': 0,
57
- 'series': 'xVASynth',
58
- 'title': 'Outclassed',
59
- },
60
- 'Pendrokar/xVASynth-TTS/NoDeepMoji': {
61
- 'name': 'xVASynth v3',
62
- 'function': '/predict',
63
- 'text_param_index': 0,
64
- 'return_audio_index': 0,
65
- 'series': 'xVASynth',
66
- 'space_link': 'Pendrokar/xVASynth-TTS',
67
- 'title': 'Outclassed',
68
- },
69
- 'coqui/CoquiTTS': {
70
- 'name': 'CoquiTTS',
71
- 'function': '0',
72
- 'text_param_index': 0,
73
- 'return_audio_index': 0,
74
- 'series': 'CoquiTTS',
75
- 'title': 'Outclassed',
76
- },
77
- 'LeeSangHoon/HierSpeech_TTS': {
78
- 'name': 'HierSpeech++',
79
- 'function': '/predict',
80
- 'text_param_index': 0,
81
- 'return_audio_index': 0,
82
- 'series': 'HierSpeech++',
83
- 'emoji': '😒',
84
- 'title': 'Narration voice',
85
- },
86
- 'mrfakename/MeloTTS': {
87
- 'name': 'MeloTTS',
88
- 'function': '/synthesize',
89
- 'text_param_index': 'text',
90
- 'return_audio_index': 0,
91
- 'series': 'MeloTTS',
92
- 'emoji': '😷',
93
- 'title': 'Broken space / Outclassed narration voice',
94
- },
95
- 'parler-tts/parler_tts': {
96
- 'name': 'Parler Mini',
97
- 'function': '/gen_tts',
98
- 'text_param_index': 0,
99
- 'return_audio_index': 0,
100
- 'is_zero_gpu_space': True,
101
- 'series': 'Parler',
102
- 'emoji': '😷',
103
- 'title': 'Broken space - runtime error',
104
- },
105
- 'parler-tts/parler_tts/large': {
106
- 'name': 'Parler Large',
107
- 'function': '/gen_tts',
108
- 'text_param_index': 0,
109
- 'return_audio_index': 0,
110
- 'is_zero_gpu_space': True,
111
- 'series': 'Parler',
112
- 'emoji': '😷',
113
- 'title': 'Broken space - runtime error',
114
- 'space_link': 'parler-tts/parler_tts',
115
- },
116
- 'parler-tts/parler-tts-expresso': {
117
- 'name': 'Parler Mini Expresso',
118
- 'function': '/gen_tts',
119
- 'text_param_index': 0,
120
- 'return_audio_index': 0,
121
- 'is_zero_gpu_space': True,
122
- 'series': 'Parler',
123
- 'emoji': '😷',
124
- 'title': 'Broken space - runtime error',
125
- },
126
- 'PHBJT/multi_parler_tts': {
127
- 'name': 'Parler Mini Multi v1.1',
128
- 'function': '/gen_tts',
129
- 'text_param_index': 'text',
130
- 'return_audio_index': 1,
131
- 'is_zero_gpu_space': True,
132
- 'series': 'Parler',
133
- 'title': 'Unstable',
134
- },
135
- 'PHBJT/multi_parler_tts/reformatted': {
136
- 'name': 'Parler Mini Multi v1.1r',
137
- 'function': '/gen_tts',
138
- 'text_param_index': 'text',
139
- 'return_audio_index': 1,
140
- 'is_zero_gpu_space': True,
141
- 'series': 'Parler',
142
- 'title': 'Unstable',
143
- },
144
- 'innoai/Edge-TTS-Text-to-Speech': {
145
- 'name': 'Microsoft® Edge TTS',
146
- 'function': '/predict',
147
- 'text_param_index': 0,
148
- 'return_audio_index': 0,
149
- 'is_closed_source': True,
150
- 'series': 'Edge TTS',
151
- 'emoji': '',
152
- 'space_link': 'innoai/Edge-TTS-Text-to-Speech',
153
- },
154
- 'fishaudio/fish-speech-1': {
155
- 'name': 'Fish Speech',
156
- 'function': '/inference_wrapper',
157
- 'text_param_index': 'text',
158
- 'return_audio_index': 0,
159
- 'series': 'Fish Speech',
160
- 'emoji': '😵',
161
- 'title': 'Deprecated: Redirects to OpenAudio',
162
- },
163
- 'fishaudio/openaudio-s1-mini': {
164
- 'name': 'OpenAudio S1 Mini',
165
- 'function': '/partial',
166
- 'text_param_index': 'text',
167
- 'return_audio_index': 0,
168
- 'series': 'Fish Speech',
169
- },
170
- 'mrfakename/E2-F5-TTS': {
171
- 'name': 'F5 TTS',
172
- 'function': '/predict',
173
- 'text_param_index': 'gen_text',
174
- 'return_audio_index': 0,
175
- 'is_zero_gpu_space': True,
176
- 'series': 'E2/F5 TTS',
177
- 'title': 'Uses a voice sample from trained dataset',
178
- },
179
- 'mrfakename/E2-F5-TTS/E2': {
180
- 'name': 'E2 TTS',
181
- 'function': '/basic_tts',
182
- 'text_param_index': 'gen_text_input',
183
- 'return_audio_index': 0,
184
- 'is_zero_gpu_space': True,
185
- 'series': 'E2/F5 TTS',
186
- },
187
- 'Flux9665/MassivelyMultilingualTTS': {
188
- 'name': 'IMS-Toucan',
189
- 'function': '/predict',
190
- 'text_param_index': 0,
191
- 'return_audio_index': 0,
192
- 'series': 'IMS-Toucan',
193
- 'title': 'Changes voice pitch on each request',
194
- },
195
- 'Flux9665/EnglishToucan': {
196
- 'name': 'IMS-Toucan EN',
197
- 'function': '/predict',
198
- 'text_param_index': 0,
199
- 'return_audio_index': 0,
200
- 'series': 'IMS-Toucan',
201
- 'emoji': '😒',
202
- 'title': 'Narration voice',
203
- },
204
- 'Pendrokar/style-tts-2': {
205
- 'name': 'StyleTTS v2',
206
- 'function': '/synthesize',
207
- 'text_param_index': 'text',
208
- 'return_audio_index': 0,
209
- 'is_zero_gpu_space': True,
210
- 'series': 'StyleTTS',
211
- 'emoji': '😷',
212
- 'title': 'Outclassed by Kokoro',
213
- },
214
- 'hexgrad/kokoro': {
215
- 'name': 'Kokoro v0.19',
216
- 'function': '/generate',
217
- 'text_param_index': 'text',
218
- 'return_audio_index': 0,
219
- 'is_zero_gpu_space': False,
220
- 'series': 'Kokoro',
221
- 'space_link': 'Remsky/Kokoro-TTS-Zero',
222
- 'title': 'Deprecated: Kokoro v1.0+ improves pronunciation',
223
- },
224
- 'hexgrad/Kokoro-TTS/0.23': {
225
- 'name': 'StyleTTS Kokoro v23',
226
- 'function': '/multilingual',
227
- 'text_param_index': 'text',
228
- 'return_audio_index': 0,
229
- 'is_zero_gpu_space': True,
230
- 'series': 'Kokoro',
231
- 'title': 'Deprecated: Kokoro v1.0+ improves pronunciation',
232
- },
233
- 'hexgrad/Kokoro-API': {
234
- 'name': 'Kokoro v1.0',
235
- 'function': '/predict',
236
- 'text_param_index': 'text',
237
- 'return_audio_index': 0,
238
- 'is_zero_gpu_space': False,
239
- 'series': 'Kokoro',
240
- 'space_link': 'hexgrad/kokoro',
241
- },
242
- 'amphion/maskgct': {
243
- 'name': 'MaskGCT',
244
- 'function': '/predict',
245
- 'text_param_index': 1,
246
- 'return_audio_index': 0,
247
- 'is_zero_gpu_space': True,
248
- 'series': 'MaskGCT',
249
- 'emoji': '🥵',
250
- 'title': 'Requires 300s reserved ZeroGPU time! Cannot afford!',
251
- },
252
- 'Svngoku/maskgct-audio-lab': {
253
- 'name': 'MaskGCT',
254
- 'function': '/predict',
255
- 'text_param_index': 1,
256
- 'return_audio_index': 0,
257
- 'is_zero_gpu_space': True,
258
- 'series': 'MaskGCT',
259
- 'emoji': '🥵',
260
- 'title': 'Requires 300s reserved ZeroGPU time! Cannot afford!',
261
- },
262
- 'lj1995/GPT-SoVITS-v2': {
263
- 'name': 'GPT-SoVITS v2',
264
- 'function': '/get_tts_wav',
265
- 'text_param_index': 'text',
266
- 'return_audio_index': 0,
267
- 'is_zero_gpu_space': True,
268
- 'series': 'GPT-SoVITS',
269
- 'title': 'Outclassed',
270
- },
271
- 'lj1995/GPT-SoVITS-ProPlus': {
272
- 'name': 'GPT-SoVITS ProPlus',
273
- 'function': '/get_tts_wav',
274
- 'text_param_index': 'text',
275
- 'return_audio_index': 0,
276
- 'is_zero_gpu_space': True,
277
- 'series': 'GPT-SoVITS',
278
- 'title': 'Outclassed',
279
- },
280
- 'ameerazam08/OuteTTS-0.2-500M-Demo': {
281
- 'name': 'OuteTTS v0.2 500M',
282
- 'function': '/generate_tts',
283
- 'text_param_index': 0,
284
- 'return_audio_index': 0,
285
- 'is_zero_gpu_space': True,
286
- 'series': 'OuteTTS',
287
- 'emoji': '🥵',
288
- 'title': 'Requires 300s reserved ZeroGPU time! Cannot afford!',
289
- },
290
- 'OuteAI/OuteTTS-0.3-1B-Demo': {
291
- 'name': 'OuteTTS v0.3 1B',
292
- 'function': '/generate_tts',
293
- 'text_param_index': 'text',
294
- 'return_audio_index': 0,
295
- 'is_zero_gpu_space': True,
296
- 'series': 'OuteTTS',
297
- 'emoji': '🥵',
298
- 'title': 'Requires 300s reserved ZeroGPU time! Cannot afford!',
299
- },
300
- 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': {
301
- 'name': 'LLaSA 1B',
302
- 'function': '/predict',
303
- 'text_param_index': 'input_text',
304
- 'return_audio_index': 0,
305
- 'is_zero_gpu_space': True,
306
- 'series': 'LLaSA',
307
- 'title': 'Broken space - Runtime error',
308
- },
309
- 'srinivasbilla/llasa-3b-tts': {
310
- 'name': 'LLaSA 3B',
311
- 'function': '/infer',
312
- 'text_param_index': 'target_text',
313
- 'return_audio_index': 0,
314
- 'is_zero_gpu_space': True,
315
- 'series': 'LLaSA',
316
- },
317
- 'srinivasbilla/llasa-8b-tts': {
318
- 'name': 'LLaSA 8B',
319
- 'function': '/infer',
320
- 'text_param_index': 'target_text',
321
- 'return_audio_index': 0,
322
- 'is_zero_gpu_space': True,
323
- 'series': 'LLaSA',
324
- },
325
- 'CAMB-AI/mars5_space': {
326
- 'name': 'MARS 5',
327
- 'function': '/on_click',
328
- 'text_param_index': 'text',
329
- 'return_audio_index': 0,
330
- 'is_zero_gpu_space': False,
331
- 'series': 'MARS',
332
- },
333
- 'CAMB-AI/mars6-turbo-demo': {
334
- 'name': 'MARS 6',
335
- 'function': '/inference',
336
- 'text_param_index': 'text',
337
- 'return_audio_index': 0,
338
- 'is_zero_gpu_space': False,
339
- 'is_closed_source': True,
340
- 'series': 'MARS',
341
- 'title': 'Unstable',
342
- },
343
- 'Steveeeeeeen/Zonos': {
344
- 'name': 'Zonos T',
345
- 'function': '/generate_audio',
346
- 'text_param_index': 'text',
347
- 'return_audio_index': 0,
348
- 'is_zero_gpu_space': True,
349
- 'series': 'Zonos',
350
- },
351
- 'Steveeeeeeen/Zonos/hybrid': {
352
- 'name': 'Zonos H',
353
- 'function': '/generate_audio',
354
- 'text_param_index': 'text',
355
- 'return_audio_index': 0,
356
- 'is_zero_gpu_space': True,
357
- 'series': 'Zonos',
358
- 'title': 'Outclassed',
359
- 'space_link': 'Steveeeeeeen/Zonos',
360
- },
361
- 'thunnai/SparkTTS': {
362
- 'name': 'Spark-TTS',
363
- 'function': '/voice_clone',
364
- 'text_param_index': 'text',
365
- 'return_audio_index': 0,
366
- 'is_zero_gpu_space': True,
367
- 'series': 'Spark-TTS',
368
- 'title': 'Outclassed',
369
- },
370
- 'sesame/csm-1b': {
371
- 'name': 'CSM 1B',
372
- 'function': '/infer',
373
- 'text_param_index': 'gen_conversation_input',
374
- 'return_audio_index': 0,
375
- 'is_zero_gpu_space': True,
376
- 'series': 'CSM-1B',
377
- 'title': 'Outclassed',
378
- },
379
- 'MohamedRashad/Orpheus-TTS': {
380
- 'name': 'Orpheus 3B v0.1',
381
- 'function': '/generate_speech',
382
- 'text_param_index': 'text',
383
- 'return_audio_index': 0,
384
- 'is_zero_gpu_space': True,
385
- 'series': 'Orpheus',
386
- },
387
- 'IndexTeam/IndexTTS': {
388
- 'name': 'Index TTS',
389
- 'function': '/gen_single',
390
- 'text_param_index': 'text',
391
- 'return_audio_index': 0,
392
- 'is_zero_gpu_space': True,
393
- 'series': 'Index',
394
- 'title': 'Outclassed',
395
- },
396
- 'nari-labs/Dia-1.6B': {
397
- 'name': 'Dia',
398
- 'function': '/generate_audio',
399
- 'text_param_index': 'text_input',
400
- 'return_audio_index': 0,
401
- 'is_zero_gpu_space': True,
402
- 'series': 'Dia',
403
- 'title': 'Outclassed',
404
- },
405
- 'ResembleAI/Chatterbox': {
406
- 'name': 'Chatterbox',
407
- 'function': '/generate_tts_audio',
408
- 'text_param_index': 'text_input',
409
- 'return_audio_index': 0,
410
- 'is_zero_gpu_space': True,
411
- 'series': 'Chatterbox',
412
- },
413
- 'ByteDance/MegaTTS3': {
414
- 'name': 'MegaTTS',
415
- 'function': '/predict',
416
- 'text_param_index': 'inp_text',
417
- 'return_audio_index': 0,
418
- 'is_zero_gpu_space': True,
419
- 'series': 'MegaTTS',
420
- },
421
  }
422
 
423
  # Create files
 
4
 
5
  # HF_SPACES data from models.py
6
  hf_spaces = {
7
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
9
 
10
  # Create files
create_space_inputs_files.py CHANGED
@@ -5,365 +5,16 @@ import os
5
  from gradio_client import handle_file
6
  from app.models import DEFAULT_VOICE_SAMPLE, DEFAULT_VOICE_TRANSCRIPT, DEFAULT_VOICE_PROMPT
7
 
8
- # HF_SPACES data from models.py
9
- hf_spaces = {
10
- # tonyassi ZeroGPU space of XTTS:
11
- 'coqui/xtts': {
12
- 1: DEFAULT_VOICE_SAMPLE, # voice sample
13
- # 'audio': DEFAULT_VOICE_SAMPLE, # voice sample
14
- },
15
- 'collabora/WhisperSpeech': {
16
- 1: DEFAULT_VOICE_SAMPLE, # voice sample
17
- 2: DEFAULT_VOICE_SAMPLE, # voice sample URL
18
- 3: 14.0, #Tempo - Gradio Slider issue: takes min. rather than value
19
- },
20
- 'myshell-ai/OpenVoice': {
21
- 1: 'default', # style
22
- 2: 'https://huggingface.co/spaces/myshell-ai/OpenVoiceV2/resolve/main/examples/speaker0.mp3', # voice sample
23
- },
24
- 'myshell-ai/OpenVoiceV2': {
25
- 1: 'en_us', # style
26
- 2: 'https://huggingface.co/spaces/myshell-ai/OpenVoiceV2/resolve/main/examples/speaker0.mp3', # voice sample
27
- },
28
- 'PolyAI/pheme': {
29
- 1: 'YOU1000000044_S0000798', # voice
30
- 2: 210,
31
- 3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
32
- },
33
- 'Pendrokar/xVASynth-TTS': {
34
- 1: 'x_ex04', #fine-tuned voice model name
35
- 3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
36
- },
37
- 'Pendrokar/xVASynth-TTS/NoDeepMoji': {
38
- 1: 'x_ex02', #fine-tuned voice model name
39
- 3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
40
- 10: False, #Use DeepMoji
41
- },
42
- 'suno/bark': {
43
- 1: 'Speaker 3 (en)', # voice
44
- },
45
- 'amphion/Text-to-Speech': {
46
- 1: 'LikeManyWaters', # voice
47
- },
48
- 'LeeSangHoon/HierSpeech_TTS': {
49
- 1: handle_file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
50
- 2: 0.333,
51
- 3: 0.333,
52
- 4: 1,
53
- 5: 1,
54
- 6: 0,
55
- 7: 1111,
56
- },
57
- 'Manmay/tortoise-tts': {
58
- 1: None, # text-from-file
59
- 2: 'angie', # voice
60
- 3: 'disabled', # second voice for a dialogue
61
- 4: 'No', # split by newline
62
- },
63
- 'mrfakename/MeloTTS': {
64
- 'speaker': 'EN-Default', # DEFAULT_VOICE_SAMPLE=EN-Default
65
- 'speed': 1.0,
66
- 'language': 'EN',
67
- },
68
- 'mrfakename/MetaVoice-1B-v0.1': {
69
- 1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
70
- 2: 5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
71
- 3: "Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
72
- 4: "Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
73
- 5: None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
74
- },
75
- 'parler-tts/parler_tts': { # mini
76
- 1: 'Laura; Laura\'s ' + DEFAULT_VOICE_PROMPT, #description / voice prompt
77
- 2: False, #use_large
78
- },
79
- 'parler-tts/parler_tts/large': {
80
- 1: 'Laura; Laura\'s ' + DEFAULT_VOICE_PROMPT, #description / voice prompt
81
- 2: True, #use_large
82
- },
83
- # multi-lang parler mini 1.1
84
- 'PHBJT/multi_parler_tts': {
85
- 'description': 'a ' + DEFAULT_VOICE_PROMPT, #description / voice prompt
86
- 'do_format': False, # Reformat description using Gemma 2b
87
- },
88
- 'parler-tts/parler-tts-expresso': {
89
- 1: 'Elisabeth; Elisabeth\'s ' + DEFAULT_VOICE_PROMPT, #description / voice prompt
90
- },
91
- 'innoai/Edge-TTS-Text-to-Speech': {
92
- 1: 'en-US-EmmaMultilingualNeural - en-US (Female)', # voice
93
- 2: 0, # pace rate
94
- 3: 0, # pitch
95
- },
96
-
97
- 'fishaudio/fish-speech-1': {
98
- 'normalize': False,
99
- 'reference_audio': handle_file('https://huggingface.co/spaces/fishaudio/fish-speech-1/resolve/main/examples/English.wav'),
100
- 'reference_text': 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
101
- 'max_new_tokens': 1024,
102
- 'chunk_length': 200,
103
- 'top_p': 0.7,
104
- 'repetition_penalty': 1.2,
105
- 'temperature': 0.7,
106
- 'seed': 0,
107
- 'use_memory_cache': "never",
108
- },
109
-
110
- # OpenAudio S1 (Fish Audio)
111
- 'fishaudio/openaudio-s1-mini': {
112
- 'reference_id': None,
113
- 'reference_audio': handle_file('voice_samples/English.wav'),
114
- 'reference_text': 'In the ancient land of Eldoria, where the skies were painted with shades of mystic hues and the forests whispered secrets of old, there existed a dragon named Zephyros. Unlike the fearsome tales of dragons that plagued human hearts with terror, Zephyros was a creature of wonder and wisdom, revered by all who knew of his existence.', # reference_text
115
- 'max_new_tokens': 0,
116
- 'chunk_length': 0,
117
- 'top_p': 0.9,
118
- 'repetition_penalty': 1.1,
119
- 'temperature': 0.9,
120
- 'seed': 1,
121
- 'use_memory_cache': "on",
122
- },
123
-
124
- # F5
125
- 'mrfakename/E2-F5-TTS': {
126
- 'ref_audio': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
127
- 'ref_text': 'Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we\'ll call Kruger Bern.',
128
- 'remove_silence': False,
129
- },
130
-
131
- # E2 TODO: call switch model
132
- 'mrfakename/E2-F5-TTS/E2': {
133
- 'ref_audio_input': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
134
- 'ref_text_input': 'Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we\'ll call Kruger Bern.',
135
- 'remove_silence': False,
136
- 'cross_fade_duration_slider': 0.15,
137
- 'nfe_slider': 32,
138
- 'speed_slider': 1,
139
- },
140
-
141
- # IMS-Toucan
142
- 'Flux9665/MassivelyMultilingualTTS': {
143
- 1: "English (eng)", #language
144
- 2: 0.6, #prosody_creativity
145
- 3: 1, #duration_scaling_factor
146
- 4: 41, #voice_seed
147
- 5: -7.5, #emb1
148
- 6: None, #reference_audio
149
- },
150
-
151
- # StyleTTS 2
152
- 'Pendrokar/style-tts-2': {
153
- 'voice': "f-us-2",
154
- 'lang': 'en-us',
155
- 'lngsteps': 8,
156
- },
157
-
158
- # StyleTTS 2 Kokoro v0.19
159
- 'hexgrad/kokoro': {
160
- 'voice': "af",
161
- 'ps': None,
162
- 'speed': 1,
163
- 'trim': 0.5,
164
- 'use_gpu': False, # fast enough with multithreaded CPU
165
- 'sk': os.getenv('KOKORO'),
166
- },
167
-
168
- # StyleTTS 2 Kokoro v0.23
169
- 'hexgrad/Kokoro-TTS/0.23': {
170
- 'voice': "af",
171
- 'speed': 1,
172
- 'trim': 0.5,
173
- 'sk': os.getenv('KOKORO'),
174
- },
175
-
176
- # StyleTTS 2 Kokoro v1.0
177
- 'hexgrad/Kokoro-API': {
178
- 'voice': "af_heart",
179
- 'speed': 1,
180
- },
181
-
182
- # maskGCT (by amphion)
183
- 'amphion/maskgct': {
184
- 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
185
- 2: -1, #target_len
186
- 3: 25, #n_timesteps
187
- },
188
- 'Svngoku/maskgct-audio-lab': {
189
- 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
190
- 2: -1, #target_len
191
- 3: 25, #n_timesteps
192
- },
193
- 'lj1995/GPT-SoVITS-v2': {
194
- 'ref_wav_path': handle_file('voice_samples/EN_B00004_S00051_W000213.wav'),
195
- 'prompt_text': "Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
196
- 'prompt_language': "English",
197
- # text: "Please surprise me and speak in whatever voice you enjoy.",
198
- 'text_language': "English",
199
- 'how_to_cut': "No slice",
200
- 'top_k': 15,
201
- 'top_p': 1,
202
- 'temperature': 1,
203
- 'ref_free': False,
204
- 'speed': 1,
205
- 'if_freeze': False,
206
- 'inp_refs': None,
207
- },
208
- 'lj1995/GPT-SoVITS-ProPlus': {
209
- 'ref_wav_path': handle_file('voice_samples/EN_B00004_S00051_W000213.wav'),
210
- 'prompt_text': "Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
211
- 'prompt_language': "英文", # "English" in Japanese
212
- # text: "Please surprise me and speak in whatever voice you enjoy.",
213
- 'text_language': "英文", # "English" in Japanese
214
- 'how_to_cut': "不切", # "No slice" in Japanese
215
- 'top_k': 15,
216
- 'top_p': 1,
217
- 'temperature': 1,
218
- 'ref_free': False,
219
- 'speed': 1,
220
- 'if_freeze': False,
221
- 'inp_refs': None,
222
- },
223
- 'ameerazam08/OuteTTS-0.2-500M-Demo': {
224
- 1: 0.1, # temperature
225
- 2: 1.1, # repetition_penalty
226
- 3: "en", # language
227
- 4: "female_1", # speaker_selection
228
- 5: None, # reference_audio
229
- 6: None, # reference_text
230
- },
231
- 'OuteAI/OuteTTS-0.3-1B-Demo': {
232
- 'temperature': 0.1,
233
- 'repetition_penalty': 1.1,
234
- 'speaker_selection': "en_female_1",
235
- 'reference_audio': None,
236
- },
237
- 'HKUST-Audio/Llasa-1B-finetuned-for-two-speakers': {
238
- 'speaker_choice': 'kore',
239
- },
240
- 'srinivasbilla/llasa-3b-tts': {
241
- 'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
242
- },
243
- 'srinivasbilla/llasa-8b-tts': {
244
- 'sample_audio_path': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
245
- },
246
-
247
- # MARS 5
248
- 'CAMB-AI/mars5_space': {
249
- 'audio_file': DEFAULT_VOICE_SAMPLE,
250
- 'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
251
- 'temperature': 0.8,
252
- 'top_k': -1,
253
- 'top_p': 0.2,
254
- 'typical_p': 1,
255
- 'freq_penalty': 2.6,
256
- 'presence_penalty': 0.4,
257
- 'rep_penalty_window': 100,
258
- 'nar_guidance_w': 3,
259
- 'deep_clone': True, # too slow for deep clone
260
- },
261
-
262
- # MARS 6
263
- 'CAMB-AI/mars6-turbo-demo': {
264
- 'reference_audio': DEFAULT_VOICE_SAMPLE,
265
- 'reference_text': DEFAULT_VOICE_TRANSCRIPT,
266
- 'ras_K': 10,
267
- 'ras_t_r': 0.09,
268
- 'top_p': 0.2,
269
- 'quality_prefix': "48000",
270
- 'clone_method': "deep-clone",
271
- },
272
-
273
- # Zonos
274
- 'Steveeeeeeen/Zonos': {
275
- 'model_choice':"Zyphra/Zonos-v0.1-transformer",
276
- 'language': "en-us",
277
- 'speaker_audio': None, # optional
278
- 'prefix_audio': handle_file('https://huggingface.co/spaces/Steveeeeeeen/Zonos/resolve/main/assets/silence_100ms.wav'),
279
- # 'e1': 1,
280
- # 'e2': 0.05,
281
- # 'e3': 0.05,
282
- # 'e4': 0.05,
283
- # 'e5': 0.05,
284
- # 'e6': 0.05,
285
- # 'e7': 0.1,
286
- # 'e8': 0.2,
287
- 'vq_single': 0.78,
288
- 'fmax': 24000,
289
- 'pitch_std': 45,
290
- 'speaking_rate': 15,
291
- 'dnsmos_ovrl': 4,
292
- 'speaker_noised': False,
293
- 'cfg_scale': 2,
294
- 'min_p': 0.15,
295
- 'seed': 420,
296
- 'randomize_seed': False, # Set to False to easily recreate the state
297
- 'unconditional_keys': ["emotion"], # makes it ignore e1-e8
298
- },
299
- # 'Steveeeeeeen/Zonos/hybrid': {
300
- # 'model_choice': 'Zyphra/Zonos-v0.1-hybrid',
301
- # },
302
-
303
- # Spark-TTS
304
- 'thunnai/SparkTTS' : {
305
- 'prompt_text': DEFAULT_VOICE_TRANSCRIPT,
306
- 'prompt_wav_upload': DEFAULT_VOICE_SAMPLE,
307
- 'prompt_wav_record': None,
308
- },
309
-
310
- # csm-1b
311
- 'sesame/csm-1b' : {
312
- 'text_prompt_speaker_a': 'And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.',
313
- 'text_prompt_speaker_b': 'And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.', #second speaker unused
314
- 'audio_prompt_speaker_a': handle_file('voice_samples/read_speech_a.wav'),
315
- 'audio_prompt_speaker_b': handle_file('voice_samples/read_speech_a.wav'), #second speaker unused
316
- },
317
-
318
- # Orpheus 3B 0.1
319
- 'MohamedRashad/Orpheus-TTS' : {
320
- 'voice': 'tara',
321
- 'temperature': 0.6,
322
- 'top_p': 0.95,
323
- 'repetition_penalty': 1.1,
324
- 'max_new_tokens': 1200,
325
- },
326
-
327
- # Index TTS
328
- 'IndexTeam/IndexTTS' : {
329
- 'prompt': DEFAULT_VOICE_SAMPLE, # voice
330
- },
331
-
332
- # Dia
333
- 'nari-labs/Dia-1.6B': {
334
- 'audio_prompt_input': None,
335
- 'max_new_tokens': 860, # min tokens as we use only a single speaker
336
- 'cfg_scale': 3, # 1-5 # Higher values increase adherence to the text prompt.
337
- 'temperature': 1.3, # Lower values make the output more deterministic, higher values increase randomness.
338
- 'top_p': 0.95, # Filters vocabulary to the most likely tokens cumulatively reaching probability P.
339
- 'cfg_filter_top_k': 35, # Top k filter for CFG guidance.
340
- 'speed_factor': 0.94, # Adjusts the speed of the generated audio (1.0 = original speed).
341
- },
342
-
343
- # Chatterbox
344
- 'ResembleAI/Chatterbox': {
345
- 'audio_prompt_path_input': handle_file('https://cdn-uploads.huggingface.co/production/uploads/642c0b71eb6e214d4f8897a3/H8qgQbv6e8bgGVCM-w4mq.wav'), # voice; chosen by Manmay of Resemble AI org - https://huggingface.co/spaces/ResembleAI/Chatterbox/discussions/14#686cd36e9479e00d8d3fc079
346
- 'exaggeration_input': 0.5, # 1-2
347
- 'temperature_input': 0.8, # Lower values make the output more deterministic, higher values increase randomness.
348
- 'seed_num_input': 1, # Seed for random number generation, can be any integer.
349
- 'cfgw_input': 0.5, # CFG/Pace weight, can be any float value.
350
- },
351
-
352
- # MegaTTS
353
- 'ByteDance/MegaTTS3': {
354
- 'inp_audio': handle_file('voice_samples/xtts_sample_megatts.wav'),
355
- 'inp_npy': handle_file('voice_samples/xtts_sample_megatts.npy'),
356
- 'infer_timestep': 32,
357
- 'p_w': 1.4,
358
- 't_w': 3,
359
- },
360
  }
361
 
362
  # Create files
363
  output_dir = 'app/inputs'
364
  os.makedirs(output_dir, exist_ok=True)
365
 
366
- for key, value in hf_spaces.items():
367
  # Create safe filename from key
368
  filename = key.replace('/', '__') + '.json'
369
  filepath = os.path.join(output_dir, filename)
@@ -373,4 +24,4 @@ for key, value in hf_spaces.items():
373
 
374
  print(f"Created: {filepath}")
375
 
376
- print(f"\nTotal files created: {len(hf_spaces)}")
 
5
  from gradio_client import handle_file
6
  from app.models import DEFAULT_VOICE_SAMPLE, DEFAULT_VOICE_TRANSCRIPT, DEFAULT_VOICE_PROMPT
7
 
8
+ # OVERRIDE_INPUTS data from models.py
9
+ hf_space_inputs = {
10
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
12
 
13
  # Create files
14
  output_dir = 'app/inputs'
15
  os.makedirs(output_dir, exist_ok=True)
16
 
17
+ for key, value in hf_space_inputs.items():
18
  # Create safe filename from key
19
  filename = key.replace('/', '__') + '.json'
20
  filepath = os.path.join(output_dir, filename)
 
24
 
25
  print(f"Created: {filepath}")
26
 
27
+ print(f"\nTotal files created: {len(hf_space_inputs)}")
test_gradio_endpoints.py CHANGED
@@ -295,8 +295,8 @@ def main():
295
  config['space_url'] = space_url
296
 
297
  try:
298
- # Create Gradio client with 15 second timeout
299
- client = create_client_with_timeout(space_url, hf_token, timeout_secs=15)
300
 
301
  # Validate the endpoint
302
  result = validate_endpoint(model_name, client, config)
 
295
  config['space_url'] = space_url
296
 
297
  try:
298
+ # Create Gradio client with 30 second timeout
299
+ client = create_client_with_timeout(space_url, hf_token, timeout_secs=30)
300
 
301
  # Validate the endpoint
302
  result = validate_endpoint(model_name, client, config)