Dionyssos commited on
Commit
6cbb003
·
1 Parent(s): e69f5ce
Files changed (2) hide show
  1. README.md +7 -11
  2. app.py +129 -370
README.md CHANGED
@@ -1,18 +1,14 @@
1
- ---
2
- title: Speech analysis
3
- emoji: 💤
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.41.1
8
  app_file: app.py
9
- short_description: TTS for CPU
10
  license: cc-by-nc-4.0
11
  tags:
12
- - non-AR
13
- - affective
14
- - shift
15
- - tts
16
- ---
17
-
18
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ title: heritage TTS
2
+ emoji: 🏛️
 
3
  colorFrom: gray
4
  colorTo: gray
5
  sdk: gradio
6
  sdk_version: 5.41.1
7
  app_file: app.py
8
+ short_description: https://shift-europe.eu/
9
  license: cc-by-nc-4.0
10
  tags:
11
+ - non-AR
12
+ - affective
13
+ - shift
14
+ - tts
 
 
 
app.py CHANGED
@@ -59,56 +59,56 @@ def audionar_tts(text=None,
59
 
60
 
61
  if text is None or text.strip() == '':
62
- text = 'No Audio or Txt Input'
63
-
64
-
65
 
 
66
 
67
- if lang not in language_names: # StyleTTS2
68
-
69
- text = only_greek_or_only_latin(text, lang='eng')
70
 
71
- x = _tts.inference(text,
72
- ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
73
-
74
- if x.shape[0] > 10:
75
 
76
- x = audresample.resample(signal=x.astype(np.float32),
77
- original_rate=24000,
78
- target_rate=16000)[0, :] # 16 KHz
79
-
80
- else: # VITS
81
 
82
- lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
 
 
83
 
84
- global cached_lang_code, cached_net_g, cached_tokenizer
85
 
86
- if 'cached_lang_code' not in globals() or cached_lang_code != lang_code:
87
- cached_lang_code = lang_code
88
- cached_net_g = VitsModel.from_pretrained(f'facebook/mms-tts-{lang_code}').eval()
89
- cached_tokenizer = VitsTokenizer.from_pretrained(f'facebook/mms-tts-{lang_code}')
90
 
91
- net_g = cached_net_g
92
- tokenizer = cached_tokenizer
93
- text = only_greek_or_only_latin(text, lang=lang_code)
94
- text = transliterate_number(text, lang=lang_code)
95
- text = fix_vocals(text, lang=lang_code)
96
 
97
 
98
- sentences = textwrap.wrap(text, width=439)
99
 
100
- total_audio_parts = []
101
- for sentence in sentences:
102
- inputs = cached_tokenizer(sentence, return_tensors="pt")
103
- with torch.no_grad():
104
- audio_part = cached_net_g(
105
- input_ids=inputs.input_ids,
106
- attention_mask=inputs.attention_mask,
107
- lang_code=lang_code,
108
- )[0, :]
109
- total_audio_parts.append(audio_part)
110
 
111
- x = torch.cat(total_audio_parts).cpu().numpy()
112
 
113
 
114
  if soundscape and soundscape.strip():
@@ -164,334 +164,93 @@ def audionar_tts(text=None,
164
 
165
 
166
  # TTS
167
- # VOICES = [f'wav/{vox}' for vox in os.listdir('wav')]
168
- # add unidecode (to parse non-roman characters for the StyleTTS2
169
- # # for the VITS it should better skip the unknown letters - dont use unidecode())
170
- # at generation fill the state of "last tts"
171
- # at record fill the state of "last record" and place in list of voice/langs for TTS
172
- VOICES = ['jv_ID_google-gmu_04982.wav',
173
- 'it_IT_mls_1595.wav',
174
- 'en_US_vctk_p303.wav',
175
- 'en_US_vctk_p306.wav',
176
- 'it_IT_mls_8842.wav',
177
- 'en_US_cmu_arctic_ksp.wav',
178
- 'jv_ID_google-gmu_05970.wav',
179
- 'en_US_vctk_p318.wav',
180
- 'ha_NE_openbible.wav',
181
- 'ne_NP_ne-google_0883.wav',
182
- 'en_US_vctk_p280.wav',
183
- 'bn_multi_1010.wav',
184
- 'en_US_vctk_p259.wav',
185
- 'it_IT_mls_844.wav',
186
- 'en_US_vctk_p269.wav',
187
- 'en_US_vctk_p285.wav',
188
- 'de_DE_m-ailabs_angela_merkel.wav',
189
- 'en_US_vctk_p316.wav',
190
- 'en_US_vctk_p362.wav',
191
- 'jv_ID_google-gmu_06207.wav',
192
- 'tn_ZA_google-nwu_9061.wav',
193
- 'fr_FR_tom.wav',
194
- 'en_US_vctk_p233.wav',
195
- 'it_IT_mls_4975.wav',
196
- 'en_US_vctk_p236.wav',
197
- 'bn_multi_01232.wav',
198
- 'bn_multi_5958.wav',
199
- 'it_IT_mls_9185.wav',
200
- 'en_US_vctk_p248.wav',
201
- 'en_US_vctk_p287.wav',
202
- 'it_IT_mls_9772.wav',
203
- 'te_IN_cmu-indic_sk.wav',
204
- 'tn_ZA_google-nwu_8333.wav',
205
- 'en_US_vctk_p260.wav',
206
- 'en_US_vctk_p247.wav',
207
- 'en_US_vctk_p329.wav',
208
- 'en_US_cmu_arctic_fem.wav',
209
- 'en_US_cmu_arctic_rms.wav',
210
- 'en_US_vctk_p308.wav',
211
- 'jv_ID_google-gmu_08736.wav',
212
- 'en_US_vctk_p245.wav',
213
- 'fr_FR_m-ailabs_nadine_eckert_boulet.wav',
214
- 'jv_ID_google-gmu_03314.wav',
215
- 'en_US_vctk_p239.wav',
216
- 'jv_ID_google-gmu_05540.wav',
217
- 'it_IT_mls_7440.wav',
218
- 'en_US_vctk_p310.wav',
219
- 'en_US_vctk_p237.wav',
220
- 'en_US_hifi-tts_92.wav',
221
- 'en_US_cmu_arctic_aew.wav',
222
- 'ne_NP_ne-google_2099.wav',
223
- 'en_US_vctk_p226.wav',
224
- 'af_ZA_google-nwu_1919.wav',
225
- 'jv_ID_google-gmu_03727.wav',
226
- 'en_US_vctk_p317.wav',
227
- 'tn_ZA_google-nwu_0378.wav',
228
- 'nl_pmk.wav',
229
- 'en_US_vctk_p286.wav',
230
- 'tn_ZA_google-nwu_3342.wav',
231
- # 'en_US_vctk_p343.wav',
232
- 'de_DE_m-ailabs_ramona_deininger.wav',
233
- 'jv_ID_google-gmu_03424.wav',
234
- 'en_US_vctk_p341.wav',
235
- 'jv_ID_google-gmu_03187.wav',
236
- 'ne_NP_ne-google_3960.wav',
237
- 'jv_ID_google-gmu_06080.wav',
238
- 'ne_NP_ne-google_3997.wav',
239
- # 'en_US_vctk_p267.wav',
240
- 'en_US_vctk_p240.wav',
241
- 'ne_NP_ne-google_5687.wav',
242
- 'ne_NP_ne-google_9407.wav',
243
- 'jv_ID_google-gmu_05667.wav',
244
- 'jv_ID_google-gmu_01519.wav',
245
- 'ne_NP_ne-google_7957.wav',
246
- 'it_IT_mls_4705.wav',
247
- 'ne_NP_ne-google_6329.wav',
248
- 'it_IT_mls_1725.wav',
249
- 'tn_ZA_google-nwu_8914.wav',
250
- 'en_US_ljspeech.wav',
251
- 'tn_ZA_google-nwu_4850.wav',
252
- 'en_US_vctk_p238.wav',
253
- 'en_US_vctk_p302.wav',
254
- 'jv_ID_google-gmu_08178.wav',
255
- 'en_US_vctk_p313.wav',
256
- 'af_ZA_google-nwu_2418.wav',
257
- 'bn_multi_00737.wav',
258
- 'en_US_vctk_p275.wav', # y
259
- 'af_ZA_google-nwu_0184.wav',
260
- 'jv_ID_google-gmu_07638.wav',
261
- 'ne_NP_ne-google_6587.wav',
262
- 'ne_NP_ne-google_0258.wav',
263
- 'en_US_vctk_p232.wav',
264
- 'en_US_vctk_p336.wav',
265
- 'jv_ID_google-gmu_09039.wav',
266
- 'en_US_vctk_p312.wav',
267
- 'af_ZA_google-nwu_8148.wav',
268
- 'en_US_vctk_p326.wav',
269
- 'en_US_vctk_p264.wav',
270
- 'en_US_vctk_p295.wav',
271
- # 'en_US_vctk_p298.wav',
272
- 'es_ES_m-ailabs_victor_villarraza.wav',
273
- 'pl_PL_m-ailabs_nina_brown.wav',
274
- 'tn_ZA_google-nwu_9365.wav',
275
- 'en_US_vctk_p294.wav',
276
- 'jv_ID_google-gmu_00658.wav',
277
- 'jv_ID_google-gmu_08305.wav',
278
- 'en_US_vctk_p330.wav',
279
- 'gu_IN_cmu-indic_cmu_indic_guj_dp.wav',
280
- 'jv_ID_google-gmu_05219.wav',
281
- 'en_US_vctk_p284.wav',
282
- 'de_DE_m-ailabs_eva_k.wav',
283
- # 'bn_multi_00779.wav',
284
- 'en_UK_apope.wav',
285
- 'en_US_vctk_p345.wav',
286
- 'it_IT_mls_6744.wav',
287
- 'en_US_vctk_p347.wav',
288
- 'en_US_m-ailabs_mary_ann.wav',
289
- 'en_US_m-ailabs_elliot_miller.wav',
290
- 'en_US_vctk_p279.wav',
291
- 'ru_RU_multi_nikolaev.wav',
292
- 'bn_multi_4811.wav',
293
- 'tn_ZA_google-nwu_7693.wav',
294
- 'bn_multi_01701.wav',
295
- 'en_US_vctk_p262.wav',
296
- # 'en_US_vctk_p266.wav',
297
- 'en_US_vctk_p243.wav',
298
- 'en_US_vctk_p297.wav',
299
- 'en_US_vctk_p278.wav',
300
- 'jv_ID_google-gmu_02059.wav',
301
- 'en_US_vctk_p231.wav',
302
- 'te_IN_cmu-indic_kpn.wav',
303
- 'en_US_vctk_p250.wav',
304
- 'it_IT_mls_4974.wav',
305
- 'en_US_cmu_arctic_awbrms.wav',
306
- # 'en_US_vctk_p263.wav',
307
- 'nl_femal.wav',
308
- 'tn_ZA_google-nwu_6116.wav',
309
- 'jv_ID_google-gmu_06383.wav',
310
- 'en_US_vctk_p225.wav',
311
- 'en_US_vctk_p228.wav',
312
- 'it_IT_mls_277.wav',
313
- 'tn_ZA_google-nwu_7866.wav',
314
- 'en_US_vctk_p300.wav',
315
- 'ne_NP_ne-google_0649.wav',
316
- 'es_ES_carlfm.wav',
317
- 'jv_ID_google-gmu_06510.wav',
318
- 'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
319
- 'en_US_vctk_p340.wav',
320
- 'en_US_cmu_arctic_gka.wav',
321
- 'ne_NP_ne-google_2027.wav',
322
- 'jv_ID_google-gmu_09724.wav',
323
- 'en_US_vctk_p361.wav',
324
- 'ne_NP_ne-google_6834.wav',
325
- 'jv_ID_google-gmu_02326.wav',
326
- 'fr_FR_m-ailabs_zeckou.wav',
327
- 'tn_ZA_google-nwu_1932.wav',
328
- # 'female-20-happy.wav',
329
- 'tn_ZA_google-nwu_1483.wav',
330
- 'de_DE_thorsten-emotion_amused.wav',
331
- 'ru_RU_multi_minaev.wav',
332
- 'sw_lanfrica.wav',
333
- 'en_US_vctk_p271.wav',
334
- 'tn_ZA_google-nwu_0441.wav',
335
- 'it_IT_mls_6001.wav',
336
- 'en_US_vctk_p305.wav',
337
- 'it_IT_mls_8828.wav',
338
- 'jv_ID_google-gmu_08002.wav',
339
- 'it_IT_mls_2033.wav',
340
- 'tn_ZA_google-nwu_3629.wav',
341
- 'it_IT_mls_6348.wav',
342
- 'en_US_cmu_arctic_axb.wav',
343
- 'it_IT_mls_8181.wav',
344
- 'en_US_vctk_p230.wav',
345
- 'af_ZA_google-nwu_7214.wav',
346
- 'nl_nathalie.wav',
347
- 'it_IT_mls_8207.wav',
348
- 'ko_KO_kss.wav',
349
- 'af_ZA_google-nwu_6590.wav',
350
- 'jv_ID_google-gmu_00264.wav',
351
- 'tn_ZA_google-nwu_6234.wav',
352
- 'jv_ID_google-gmu_05522.wav',
353
- 'en_US_cmu_arctic_lnh.wav',
354
- 'en_US_vctk_p272.wav',
355
- 'en_US_cmu_arctic_slp.wav',
356
- 'en_US_vctk_p299.wav',
357
- 'en_US_hifi-tts_9017.wav',
358
- 'it_IT_mls_4998.wav',
359
- 'it_IT_mls_6299.wav',
360
- 'en_US_cmu_arctic_rxr.wav',
361
- # 'female-46-neutral.wav',
362
- 'jv_ID_google-gmu_01392.wav',
363
- 'tn_ZA_google-nwu_8512.wav',
364
- 'en_US_vctk_p244.wav',
365
- # 'bn_multi_3108.wav',
366
- # 'it_IT_mls_7405.wav',
367
- # 'bn_multi_3713.wav',
368
- # 'yo_openbible.wav',
369
- # 'jv_ID_google-gmu_01932.wav',
370
- 'en_US_vctk_p270.wav',
371
- 'tn_ZA_google-nwu_6459.wav',
372
- 'bn_multi_4046.wav',
373
- 'en_US_vctk_p288.wav',
374
- 'en_US_vctk_p251.wav',
375
- 'es_ES_m-ailabs_tux.wav',
376
- 'tn_ZA_google-nwu_6206.wav',
377
- 'bn_multi_9169.wav',
378
- # 'en_US_vctk_p293.wav',
379
- # 'en_US_vctk_p255.wav',
380
- 'af_ZA_google-nwu_8963.wav',
381
- # 'en_US_vctk_p265.wav',
382
- 'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',
383
- 'jv_ID_google-gmu_07335.wav',
384
- 'en_US_vctk_p323.wav',
385
- 'en_US_vctk_p281.wav',
386
- 'en_US_cmu_arctic_bdl.wav',
387
- 'en_US_m-ailabs_judy_bieber.wav',
388
- 'it_IT_mls_10446.wav',
389
- 'en_US_vctk_p261.wav',
390
- 'en_US_vctk_p292.wav',
391
- 'te_IN_cmu-indic_ss.wav',
392
- 'en_US_vctk_p311.wav',
393
- 'it_IT_mls_12428.wav',
394
- 'en_US_cmu_arctic_aup.wav',
395
- 'jv_ID_google-gmu_04679.wav',
396
- 'it_IT_mls_4971.wav',
397
- 'en_US_cmu_arctic_ljm.wav',
398
- 'fa_haaniye.wav',
399
- 'en_US_vctk_p339.wav',
400
- 'tn_ZA_google-nwu_7896.wav',
401
- 'en_US_vctk_p253.wav',
402
- 'it_IT_mls_5421.wav',
403
- # 'ne_NP_ne-google_0546.wav',
404
- 'vi_VN_vais1000.wav',
405
- 'en_US_vctk_p229.wav',
406
- 'en_US_vctk_p254.wav',
407
- 'en_US_vctk_p258.wav',
408
- 'it_IT_mls_7936.wav',
409
- 'en_US_vctk_p301.wav',
410
- 'tn_ZA_google-nwu_0045.wav',
411
- 'it_IT_mls_659.wav',
412
- 'tn_ZA_google-nwu_7674.wav',
413
- 'it_IT_mls_12804.wav',
414
- 'el_GR_rapunzelina.wav',
415
- 'en_US_hifi-tts_6097.wav',
416
- 'en_US_vctk_p257.wav',
417
- 'jv_ID_google-gmu_07875.wav',
418
- 'it_IT_mls_1157.wav',
419
- 'it_IT_mls_643.wav',
420
- 'en_US_vctk_p304.wav',
421
- 'ru_RU_multi_hajdurova.wav',
422
- 'it_IT_mls_8461.wav',
423
- 'bn_multi_3958.wav',
424
- 'it_IT_mls_1989.wav',
425
- 'en_US_vctk_p249.wav',
426
- # 'bn_multi_0834.wav',
427
- 'en_US_vctk_p307.wav',
428
- 'es_ES_m-ailabs_karen_savage.wav',
429
- 'fr_FR_m-ailabs_bernard.wav',
430
- 'en_US_vctk_p252.wav',
431
- 'en_US_cmu_arctic_jmk.wav',
432
- 'en_US_vctk_p333.wav',
433
- 'tn_ZA_google-nwu_4506.wav',
434
- 'ne_NP_ne-google_0283.wav',
435
- 'de_DE_m-ailabs_karlsson.wav',
436
- 'en_US_cmu_arctic_awb.wav',
437
- 'en_US_vctk_p246.wav',
438
- 'en_US_cmu_arctic_clb.wav',
439
- 'en_US_vctk_p364.wav',
440
- 'nl_flemishguy.wav',
441
- 'en_US_vctk_p276.wav', # y
442
- # 'en_US_vctk_p274.wav',
443
- 'fr_FR_m-ailabs_gilles_g_le_blanc.wav',
444
- 'it_IT_mls_7444.wav',
445
- 'style_o22050.wav',
446
- 'en_US_vctk_s5.wav',
447
- 'en_US_vctk_p268.wav',
448
- 'it_IT_mls_6807.wav',
449
- 'it_IT_mls_2019.wav',
450
- # 'male-60-angry.wav',
451
- 'af_ZA_google-nwu_8924.wav',
452
- 'en_US_vctk_p374.wav',
453
- 'en_US_vctk_p363.wav',
454
- 'it_IT_mls_644.wav',
455
- 'ne_NP_ne-google_3614.wav',
456
- 'en_US_vctk_p241.wav',
457
- 'ne_NP_ne-google_3154.wav',
458
- 'en_US_vctk_p234.wav',
459
- 'it_IT_mls_8384.wav',
460
- 'fr_FR_m-ailabs_ezwa.wav',
461
- 'it_IT_mls_5010.wav',
462
- 'en_US_vctk_p351.wav',
463
- 'en_US_cmu_arctic_eey.wav',
464
- 'jv_ID_google-gmu_04285.wav',
465
- 'jv_ID_google-gmu_06941.wav',
466
- 'hu_HU_diana-majlinger.wav',
467
- 'tn_ZA_google-nwu_2839.wav',
468
- 'bn_multi_03042.wav',
469
- 'tn_ZA_google-nwu_5628.wav',
470
- 'it_IT_mls_4649.wav',
471
- 'af_ZA_google-nwu_7130.wav',
472
- 'en_US_cmu_arctic_slt.wav',
473
- 'jv_ID_google-gmu_04175.wav',
474
- 'gu_IN_cmu-indic_cmu_indic_guj_kt.wav',
475
- 'jv_ID_google-gmu_00027.wav',
476
- 'jv_ID_google-gmu_02884.wav',
477
- 'en_US_vctk_p360.wav',
478
- 'en_US_vctk_p334.wav',
479
- # 'male-27-sad.wav',
480
- 'tn_ZA_google-nwu_1498.wav',
481
- 'fi_FI_harri-tapani-ylilammi.wav',
482
- 'bn_multi_rm.wav',
483
- 'ne_NP_ne-google_2139.wav',
484
- 'pl_PL_m-ailabs_piotr_nater.wav',
485
- 'fr_FR_siwis.wav',
486
- 'nl_bart-de-leeuw.wav',
487
- 'jv_ID_google-gmu_04715.wav',
488
- 'en_US_vctk_p283.wav',
489
- 'en_US_vctk_p314.wav',
490
- 'en_US_vctk_p335.wav',
491
- 'jv_ID_google-gmu_07765.wav',
492
- 'en_US_vctk_p273.wav'
493
- ]
494
- VOICES = [t[:-4] for t in VOICES] # crop .wav for visuals in gr.DropDown
495
 
496
  _tts = StyleTTS2().to('cpu')
497
 
@@ -506,16 +265,16 @@ with gr.Blocks(theme='huggingface') as demo:
506
  )
507
  choice_dropdown = gr.Dropdown(
508
  choices=language_names + VOICES,
509
- label="Select Voice or Language",
510
- value=VOICES[0]
511
  )
512
  soundscape_input = gr.Textbox(
513
  lines=1,
514
- value="frogs",
515
  label="AudioGen Txt"
516
  )
517
  kv_input = gr.Number(
518
- label="Sounds diversity",
519
  value=24,
520
  )
521
  generate_button = gr.Button("Generate Audio", variant="primary")
 
59
 
60
 
61
  if text is None or text.strip() == '':
62
+
63
+ x = np.zeros(4 * 16000, dtype=np.float32) # If no txt 4s of audiogen
 
64
 
65
+ else:
66
 
67
+ if lang not in language_names: # StyleTTS2
68
+
69
+ text = only_greek_or_only_latin(text, lang='eng')
70
 
71
+ x = _tts.inference(text,
72
+ ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
73
+
74
+ if x.shape[0] > 10:
75
 
76
+ x = audresample.resample(signal=x.astype(np.float32),
77
+ original_rate=24000,
78
+ target_rate=16000)[0, :] # 16 KHz
 
 
79
 
80
+ else: # VITS
81
+
82
+ lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
83
 
84
+ global cached_lang_code, cached_net_g, cached_tokenizer
85
 
86
+ if 'cached_lang_code' not in globals() or cached_lang_code != lang_code:
87
+ cached_lang_code = lang_code
88
+ cached_net_g = VitsModel.from_pretrained(f'facebook/mms-tts-{lang_code}').eval()
89
+ cached_tokenizer = VitsTokenizer.from_pretrained(f'facebook/mms-tts-{lang_code}')
90
 
91
+ net_g = cached_net_g
92
+ tokenizer = cached_tokenizer
93
+ text = only_greek_or_only_latin(text, lang=lang_code)
94
+ text = transliterate_number(text, lang=lang_code)
95
+ text = fix_vocals(text, lang=lang_code)
96
 
97
 
98
+ sentences = textwrap.wrap(text, width=439)
99
 
100
+ total_audio_parts = []
101
+ for sentence in sentences:
102
+ inputs = cached_tokenizer(sentence, return_tensors="pt")
103
+ with torch.no_grad():
104
+ audio_part = cached_net_g(
105
+ input_ids=inputs.input_ids,
106
+ attention_mask=inputs.attention_mask,
107
+ lang_code=lang_code,
108
+ )[0, :]
109
+ total_audio_parts.append(audio_part)
110
 
111
+ x = torch.cat(total_audio_parts).cpu().numpy()
112
 
113
 
114
  if soundscape and soundscape.strip():
 
164
 
165
 
166
  # TTS
167
+
168
+ VOICES = [
169
+ 'jv_ID_google-gmu_04982.wav',
170
+ 'en_US_vctk_p303.wav',
171
+ 'en_US_vctk_p306.wav',
172
+ 'en_US_vctk_p318.wav',
173
+ 'en_US_vctk_p269.wav',
174
+ 'en_US_vctk_p316.wav',
175
+ 'en_US_vctk_p362.wav', # cls
176
+ 'fr_FR_tom.wav',
177
+ 'bn_multi_5958.wav',
178
+ 'en_US_vctk_p287.wav',
179
+ 'en_US_vctk_p260.wav',
180
+ 'en_US_cmu_arctic_fem.wav',
181
+ 'en_US_cmu_arctic_rms.wav',
182
+ 'fr_FR_m-ailabs_nadine_eckert_boulet.wav',
183
+ 'en_US_vctk_p237.wav',
184
+ 'en_US_vctk_p317.wav',
185
+ 'tn_ZA_google-nwu_0378.wav',
186
+ 'nl_pmk.wav',
187
+ 'tn_ZA_google-nwu_3342.wav',
188
+ 'ne_NP_ne-google_3997.wav',
189
+ 'tn_ZA_google-nwu_8914.wav',
190
+ 'en_US_vctk_p238.wav',
191
+ 'en_US_vctk_p275.wav',
192
+ 'af_ZA_google-nwu_0184.wav',
193
+ 'af_ZA_google-nwu_8148.wav',
194
+ 'en_US_vctk_p326.wav',
195
+ 'en_US_vctk_p264.wav',
196
+ 'en_US_vctk_p295.wav',
197
+ 'en_US_vctk_p294.wav',
198
+ 'en_US_vctk_p330.wav',
199
+ 'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',
200
+ 'jv_ID_google-gmu_05219.wav',
201
+ 'en_US_vctk_p284.wav',
202
+ 'en_US_m-ailabs_mary_ann.wav',
203
+ 'bn_multi_01701.wav',
204
+ 'en_US_vctk_p262.wav',
205
+ 'en_US_vctk_p243.wav',
206
+ 'en_US_vctk_p278.wav',
207
+ 'en_US_vctk_p250.wav',
208
+ 'nl_femal.wav',
209
+ 'en_US_vctk_p228.wav',
210
+ 'ne_NP_ne-google_0649.wav',
211
+ 'en_US_cmu_arctic_gka.wav',
212
+ 'en_US_vctk_p361.wav',
213
+ 'jv_ID_google-gmu_02326.wav',
214
+ 'tn_ZA_google-nwu_1932.wav',
215
+ 'de_DE_thorsten-emotion_amused.wav',
216
+ 'jv_ID_google-gmu_08002.wav',
217
+ 'tn_ZA_google-nwu_3629.wav',
218
+ 'en_US_vctk_p230.wav',
219
+ 'af_ZA_google-nwu_7214.wav',
220
+ 'nl_nathalie.wav',
221
+ 'en_US_cmu_arctic_lnh.wav',
222
+ 'tn_ZA_google-nwu_6459.wav',
223
+ 'tn_ZA_google-nwu_6206.wav',
224
+ 'en_US_vctk_p323.wav',
225
+ 'en_US_m-ailabs_judy_bieber.wav',
226
+ 'en_US_vctk_p261.wav',
227
+ 'fa_haaniye.wav',
228
+ # 'en_US_vctk_p339.wav',
229
+ 'tn_ZA_google-nwu_7896.wav',
230
+ 'en_US_vctk_p258.wav',
231
+ 'tn_ZA_google-nwu_7674.wav',
232
+ 'en_US_hifi-tts_6097.wav',
233
+ 'en_US_vctk_p304.wav',
234
+ 'en_US_vctk_p307.wav',
235
+ 'fr_FR_m-ailabs_bernard.wav',
236
+ 'en_US_cmu_arctic_jmk.wav',
237
+ 'ne_NP_ne-google_0283.wav',
238
+ 'en_US_vctk_p246.wav',
239
+ 'en_US_vctk_p276.wav',
240
+ 'style_o22050.wav',
241
+ 'en_US_vctk_s5.wav',
242
+ 'en_US_vctk_p268.wav', # reduce clip
243
+ 'af_ZA_google-nwu_8924.wav',
244
+ 'en_US_vctk_p363.wav',
245
+ 'ne_NP_ne-google_3614.wav',
246
+ 'ne_NP_ne-google_3154.wav',
247
+ 'en_US_cmu_arctic_eey.wav', # y fix styl
248
+ 'tn_ZA_google-nwu_2839.wav',
249
+ 'af_ZA_google-nwu_7130.wav',
250
+ 'ne_NP_ne-google_2139.wav',
251
+ 'jv_ID_google-gmu_04715.wav',
252
+ 'en_US_vctk_p273.wav'
253
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  _tts = StyleTTS2().to('cpu')
256
 
 
265
  )
266
  choice_dropdown = gr.Dropdown(
267
  choices=language_names + VOICES,
268
+ label="Vox",
269
+ value=language_names[0]
270
  )
271
  soundscape_input = gr.Textbox(
272
  lines=1,
273
+ value="swims in lake frogs",
274
  label="AudioGen Txt"
275
  )
276
  kv_input = gr.Number(
277
+ label="Diversy",
278
  value=24,
279
  )
280
  generate_button = gr.Button("Generate Audio", variant="primary")