Files changed (1) hide show
  1. app.py +167 -203
app.py CHANGED
@@ -46,7 +46,8 @@ logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
46
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
47
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
48
 
49
- test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
 
50
  test_names = ["model.pth", "model.index"]
51
 
52
  for url, filename in zip(test_model.split(", "), test_names):
@@ -64,9 +65,9 @@ for url, filename in zip(test_model.split(", "), test_names):
64
  with open(filename, "wb") as f:
65
  pass
66
 
67
- title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
68
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
69
- RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
70
  theme = args.theme
71
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
72
 
@@ -83,15 +84,15 @@ async def get_voices_list(proxy=None):
83
  """Print all available voices."""
84
  from edge_tts import list_voices
85
  voices = await list_voices(proxy=proxy)
86
- voices = sorted(voices, key=lambda voice: voice["ShortName"])
87
 
88
  table = [
89
  {
90
- "ShortName": voice["ShortName"],
91
- "Gender": voice["Gender"],
92
- "ContentCategories": ", ".join(voice["VoiceTag"]["ContentCategories"]),
93
- "VoicePersonalities": ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
94
- "FriendlyName": voice["FriendlyName"],
95
  }
96
  for voice in voices
97
  ]
@@ -102,11 +103,8 @@ async def get_voices_list(proxy=None):
102
  def find_files(directory):
103
  file_paths = []
104
  for filename in os.listdir(directory):
105
- # Check if the file has the desired extension
106
  if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
107
- # If yes, add the file path to the list
108
  file_paths.append(os.path.join(directory, filename))
109
-
110
  return file_paths
111
 
112
 
@@ -120,7 +118,6 @@ def unzip_in_folder(my_zip, my_dir):
120
 
121
 
122
  def find_my_model(a_, b_):
123
-
124
  if a_ is None or a_.endswith(".pth"):
125
  return a_, b_
126
 
@@ -179,7 +176,6 @@ def ensure_valid_file(url):
179
  raise ValueError("No Content-Length header found")
180
 
181
  file_size = int(content_length)
182
- # print("debug", url, file_size)
183
  if file_size > 900000000 and IS_ZERO_GPU:
184
  raise ValueError("The file is too large. Max allowed is 900 MB.")
185
 
@@ -196,11 +192,10 @@ def clear_files(directory):
196
 
197
 
198
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
199
-
200
- if not url_data:
201
  return None, None
202
 
203
- if "," in url_data:
204
  a_, b_ = url_data.split(",")
205
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
206
  else:
@@ -250,8 +245,6 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
250
  except Exception as e:
251
  raise e
252
  finally:
253
- # time.sleep(10)
254
- # shutil.rmtree(directory)
255
  t = threading.Thread(target=clear_files, args=(directory,))
256
  t.start()
257
 
@@ -264,7 +257,6 @@ def add_audio_effects(audio_list, type_output):
264
  try:
265
  output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
266
 
267
- # Initialize audio effects plugins
268
  board = Pedalboard(
269
  [
270
  HighpassFilter(),
@@ -273,7 +265,6 @@ def add_audio_effects(audio_list, type_output):
273
  ]
274
  )
275
 
276
- # Temporary WAV to hold processed data before exporting
277
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
278
 
279
  with AudioFile(audio_path) as f:
@@ -283,11 +274,9 @@ def add_audio_effects(audio_list, type_output):
283
  effected = board(chunk, f.samplerate, reset=False)
284
  o.write(effected)
285
 
286
- # Convert with pydub to desired output type
287
  audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
288
  audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
289
 
290
- # Clean up temp file
291
  os.remove(temp_wav)
292
 
293
  result.append(output_path)
@@ -300,7 +289,6 @@ def add_audio_effects(audio_list, type_output):
300
 
301
 
302
  def apply_noisereduce(audio_list, type_output):
303
- # https://github.com/sa-if/Audio-Denoiser
304
  print("Noice reduce")
305
 
306
  result = []
@@ -308,16 +296,10 @@ def apply_noisereduce(audio_list, type_output):
308
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
309
 
310
  try:
311
- # Load audio file
312
  audio = AudioSegment.from_file(audio_path)
313
-
314
- # Convert audio to numpy array
315
  samples = np.array(audio.get_array_of_samples())
316
-
317
- # Reduce noise
318
  reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
319
 
320
- # Convert reduced noise signal back to audio
321
  reduced_audio = AudioSegment(
322
  reduced_noise.tobytes(),
323
  frame_rate=audio.frame_rate,
@@ -325,7 +307,6 @@ def apply_noisereduce(audio_list, type_output):
325
  channels=audio.channels
326
  )
327
 
328
- # Save reduced audio to file
329
  reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
330
  result.append(out_path)
331
 
@@ -367,7 +348,7 @@ def run(
367
  steps,
368
  ):
369
  if not audio_files:
370
- raise ValueError("The audio pls")
371
 
372
  if isinstance(audio_files, str):
373
  audio_files = [audio_files]
@@ -411,17 +392,19 @@ def run(
411
 
412
  def audio_conf():
413
  return gr.File(
414
- label="Audio files",
415
  file_count="multiple",
416
  type="filepath",
 
417
  container=True,
418
  )
419
 
420
 
421
  def model_conf():
422
  return gr.File(
423
- label="Model file",
424
  type="filepath",
 
425
  height=130,
426
  )
427
 
@@ -450,8 +433,9 @@ def pitch_lvl_conf():
450
 
451
  def index_conf():
452
  return gr.File(
453
- label="Index file",
454
  type="filepath",
 
455
  height=130,
456
  )
457
 
@@ -498,14 +482,15 @@ def consonant_protec_conf():
498
 
499
  def button_conf():
500
  return gr.Button(
501
- "Inference",
502
  variant="primary",
 
503
  )
504
 
505
 
506
  def output_conf():
507
  return gr.File(
508
- label="Result",
509
  file_count="multiple",
510
  interactive=False,
511
  )
@@ -514,25 +499,24 @@ def output_conf():
514
  def active_tts_conf():
515
  return gr.Checkbox(
516
  False,
517
- label="TTS",
518
- # info="",
519
  container=False,
520
  )
521
 
522
 
523
  def tts_voice_conf():
524
  return gr.Dropdown(
525
- label="tts voice",
526
- choices=voices,
527
  visible=False,
528
- value="en-US-EmmaMultilingualNeural-Female",
529
  )
530
 
531
 
532
  def tts_text_conf():
533
  return gr.Textbox(
534
  value="",
535
- placeholder="Write the text here...",
536
  label="Text",
537
  visible=False,
538
  lines=3,
@@ -541,7 +525,7 @@ def tts_text_conf():
541
 
542
  def tts_button_conf():
543
  return gr.Button(
544
- "Process TTS",
545
  variant="secondary",
546
  visible=False,
547
  )
@@ -550,8 +534,7 @@ def tts_button_conf():
550
  def tts_play_conf():
551
  return gr.Checkbox(
552
  False,
553
- label="Play",
554
- # info="",
555
  container=False,
556
  visible=False,
557
  )
@@ -561,7 +544,6 @@ def sound_gui():
561
  return gr.Audio(
562
  value=None,
563
  type="filepath",
564
- # format="mp3",
565
  autoplay=True,
566
  visible=True,
567
  interactive=False,
@@ -582,16 +564,16 @@ def steps_conf():
582
 
583
  def format_output_gui():
584
  return gr.Dropdown(
585
- label="Format output:",
586
  choices=["wav", "mp3", "flac"],
587
  value="wav",
588
  )
589
 
 
590
  def denoise_conf():
591
  return gr.Checkbox(
592
  False,
593
- label="Denoise",
594
- # info="",
595
  container=False,
596
  visible=True,
597
  )
@@ -600,8 +582,7 @@ def denoise_conf():
600
  def effects_conf():
601
  return gr.Checkbox(
602
  False,
603
- label="Reverb",
604
- # info="",
605
  container=False,
606
  visible=True,
607
  )
@@ -615,7 +596,13 @@ def infer_tts_audio(tts_voice, tts_text, play_tts):
615
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
616
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
617
 
618
- asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
 
 
 
 
 
 
619
  if play_tts:
620
  return [out_path], out_path
621
  return [out_path], None
@@ -636,8 +623,7 @@ def show_components_tts(value_active):
636
  def down_active_conf():
637
  return gr.Checkbox(
638
  False,
639
- label="URL-to-Model",
640
- # info="",
641
  container=False,
642
  )
643
 
@@ -645,8 +631,8 @@ def down_active_conf():
645
  def down_url_conf():
646
  return gr.Textbox(
647
  value="",
648
- placeholder="Write the url here...",
649
- label="Enter URL",
650
  visible=False,
651
  lines=1,
652
  )
@@ -654,7 +640,7 @@ def down_url_conf():
654
 
655
  def down_button_conf():
656
  return gr.Button(
657
- "Process",
658
  variant="secondary",
659
  visible=False,
660
  )
@@ -671,7 +657,7 @@ def show_components_down(value_active):
671
 
672
  CSS = """
673
  #audio_tts {
674
- visibility: hidden; /* invisible but still takes space */
675
  height: 0px;
676
  width: 0px;
677
  max-width: 0px;
@@ -684,168 +670,146 @@ def get_gui(theme):
684
  gr.Markdown(title)
685
  gr.Markdown(description)
686
 
687
- active_tts = active_tts_conf()
688
- with gr.Row():
689
- with gr.Column(scale=1):
690
- tts_text = tts_text_conf()
691
- with gr.Column(scale=2):
692
- with gr.Row():
693
- with gr.Column():
694
- with gr.Row():
695
- tts_voice = tts_voice_conf()
696
- tts_active_play = tts_play_conf()
697
-
698
- tts_button = tts_button_conf()
699
- tts_play = sound_gui()
700
-
701
- active_tts.change(
702
- fn=show_components_tts,
703
- inputs=[active_tts],
704
- outputs=[tts_voice, tts_text, tts_button, tts_active_play],
705
- )
706
-
707
- aud = audio_conf()
708
- # gr.HTML("<hr>")
709
-
710
- tts_button.click(
711
- fn=infer_tts_audio,
712
- inputs=[tts_voice, tts_text, tts_active_play],
713
- outputs=[aud, tts_play],
714
- )
715
 
716
- down_active_gui = down_active_conf()
717
- down_info = gr.Markdown(
718
- f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
719
- visible=False
720
- )
721
- with gr.Row():
722
- with gr.Column(scale=3):
723
- down_url_gui = down_url_conf()
724
- with gr.Column(scale=1):
725
- down_button_gui = down_button_conf()
726
 
727
- with gr.Column():
 
 
728
  with gr.Row():
729
- model = model_conf()
730
- indx = index_conf()
731
-
732
- down_active_gui.change(
733
- show_components_down,
734
- [down_active_gui],
735
- [down_info, down_url_gui, down_button_gui]
736
- )
737
-
738
- down_button_gui.click(
739
- get_my_model,
740
- [down_url_gui],
741
- [model, indx]
742
- )
 
 
 
 
 
 
 
 
 
 
743
 
744
- with gr.Accordion(label="Advanced settings", open=False):
745
- algo = pitch_algo_conf()
746
- algo_lvl = pitch_lvl_conf()
747
- indx_inf = index_inf_conf()
748
- res_fc = respiration_filter_conf()
749
- envel_r = envelope_ratio_conf()
750
- const = consonant_protec_conf()
751
- steps_gui = steps_conf()
752
- format_out = format_output_gui()
753
- with gr.Row():
754
- with gr.Column():
755
- with gr.Row():
756
- denoise_gui = denoise_conf()
757
- effects_gui = effects_conf()
758
- button_base = button_conf()
759
- output_base = output_conf()
760
-
761
- button_base.click(
762
- run,
763
- inputs=[
764
- aud,
765
- model,
766
- algo,
767
- algo_lvl,
768
- indx,
769
- indx_inf,
770
- res_fc,
771
- envel_r,
772
- const,
773
- denoise_gui,
774
- effects_gui,
775
- format_out,
776
- steps_gui,
777
- ],
778
- outputs=[output_base],
779
- )
780
 
781
- gr.Examples(
782
- examples=[
783
- [
784
- ["./test.ogg"],
785
- "./model.pth",
786
- "rmvpe+",
787
- 0,
788
- "./model.index",
789
- 0.75,
790
- 3,
791
- 0.25,
792
- 0.50,
793
- ],
794
- [
795
- ["./example2/test2.ogg"],
796
- "./example2/model_link.txt",
797
- "rmvpe+",
798
- 0,
799
- "./example2/index_link.txt",
800
- 0.75,
801
- 3,
802
- 0.25,
803
- 0.50,
804
- ],
805
- [
806
- ["./example3/test3.wav"],
807
- "./example3/zip_link.txt",
808
- "rmvpe+",
809
- 0,
810
- None,
811
- 0.75,
812
- 3,
813
- 0.25,
814
- 0.50,
 
 
 
 
 
 
 
 
 
815
  ],
 
 
816
 
817
- ],
818
- fn=run,
819
- inputs=[
820
- aud,
821
- model,
822
- algo,
823
- algo_lvl,
824
- indx,
825
- indx_inf,
826
- res_fc,
827
- envel_r,
828
- const,
829
- ],
830
- outputs=[output_base],
831
- cache_examples=False,
832
- )
833
  gr.Markdown(RESOURCES)
834
 
835
  return app
836
 
837
 
838
  if __name__ == "__main__":
 
839
  tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
 
 
840
  voices = sorted([
841
- (" - ".join(reversed(v["FriendlyName"].split("-"))).replace("Microsoft ", "").replace("Online (Natural)", f"({v['Gender']})").strip(), f"{v['ShortName']}-{v['Gender']}")
842
- for v in tts_voice_list
 
 
 
 
 
 
 
843
  ])
844
 
 
845
  app = get_gui(theme)
846
-
847
  app.queue(default_concurrency_limit=40)
848
 
 
849
  app.launch(
850
  max_threads=40,
851
  share=IS_COLAB,
@@ -853,4 +817,4 @@ if __name__ == "__main__":
853
  quiet=False,
854
  debug=IS_COLAB,
855
  ssr_mode=False,
856
- )
 
46
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
47
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
48
 
49
+ # مدل پیش‌فرض (اختیاری)
50
+ test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
51
  test_names = ["model.pth", "model.index"]
52
 
53
  for url, filename in zip(test_model.split(", "), test_names):
 
65
  with open(filename, "wb") as f:
66
  pass
67
 
68
+ title = "<center><strong><font size='7'>RVC⚡ZERO - Local Upload</font></strong></center>"
69
+ description = "Upload your own model (.pth) and audio files for voice conversion." if IS_ZERO_GPU else ""
70
+ RESOURCES = "- You can upload your custom RVC models and audio files directly."
71
  theme = args.theme
72
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
73
 
 
84
  """Print all available voices."""
85
  from edge_tts import list_voices
86
  voices = await list_voices(proxy=proxy)
87
+ voices = sorted(voices, key=lambda voice: voice.get("ShortName", ""))
88
 
89
  table = [
90
  {
91
+ "ShortName": voice.get("ShortName", "Unknown"),
92
+ "Gender": voice.get("Gender", "Unknown"),
93
+ "ContentCategories": ", ".join(voice.get("VoiceTag", {}).get("ContentCategories", [])),
94
+ "VoicePersonalities": ", ".join(voice.get("VoiceTag", {}).get("VoicePersonalities", [])),
95
+ "FriendlyName": voice.get("FriendlyName", voice.get("Name", "Unknown Voice")),
96
  }
97
  for voice in voices
98
  ]
 
103
  def find_files(directory):
104
  file_paths = []
105
  for filename in os.listdir(directory):
 
106
  if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
 
107
  file_paths.append(os.path.join(directory, filename))
 
108
  return file_paths
109
 
110
 
 
118
 
119
 
120
  def find_my_model(a_, b_):
 
121
  if a_ is None or a_.endswith(".pth"):
122
  return a_, b_
123
 
 
176
  raise ValueError("No Content-Length header found")
177
 
178
  file_size = int(content_length)
 
179
  if file_size > 900000000 and IS_ZERO_GPU:
180
  raise ValueError("The file is too large. Max allowed is 900 MB.")
181
 
 
192
 
193
 
194
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
195
+ if not url_
 
196
  return None, None
197
 
198
+ if "," in url_
199
  a_, b_ = url_data.split(",")
200
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
201
  else:
 
245
  except Exception as e:
246
  raise e
247
  finally:
 
 
248
  t = threading.Thread(target=clear_files, args=(directory,))
249
  t.start()
250
 
 
257
  try:
258
  output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
259
 
 
260
  board = Pedalboard(
261
  [
262
  HighpassFilter(),
 
265
  ]
266
  )
267
 
 
268
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
269
 
270
  with AudioFile(audio_path) as f:
 
274
  effected = board(chunk, f.samplerate, reset=False)
275
  o.write(effected)
276
 
 
277
  audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
278
  audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
279
 
 
280
  os.remove(temp_wav)
281
 
282
  result.append(output_path)
 
289
 
290
 
291
  def apply_noisereduce(audio_list, type_output):
 
292
  print("Noice reduce")
293
 
294
  result = []
 
296
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
297
 
298
  try:
 
299
  audio = AudioSegment.from_file(audio_path)
 
 
300
  samples = np.array(audio.get_array_of_samples())
 
 
301
  reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
302
 
 
303
  reduced_audio = AudioSegment(
304
  reduced_noise.tobytes(),
305
  frame_rate=audio.frame_rate,
 
307
  channels=audio.channels
308
  )
309
 
 
310
  reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
311
  result.append(out_path)
312
 
 
348
  steps,
349
  ):
350
  if not audio_files:
351
+ raise ValueError("Please upload audio files")
352
 
353
  if isinstance(audio_files, str):
354
  audio_files = [audio_files]
 
392
 
393
  def audio_conf():
394
  return gr.File(
395
+ label="Upload Audio Files (wav, mp3, ogg, flac)",
396
  file_count="multiple",
397
  type="filepath",
398
+ file_types=[".wav", ".mp3", ".ogg", ".flac", ".m4a"],
399
  container=True,
400
  )
401
 
402
 
403
  def model_conf():
404
  return gr.File(
405
+ label="Upload Model File (.pth)",
406
  type="filepath",
407
+ file_types=[".pth"],
408
  height=130,
409
  )
410
 
 
433
 
434
  def index_conf():
435
  return gr.File(
436
+ label="Upload Index File (.index) - Optional",
437
  type="filepath",
438
+ file_types=[".index"],
439
  height=130,
440
  )
441
 
 
482
 
483
  def button_conf():
484
  return gr.Button(
485
+ "Convert Voice",
486
  variant="primary",
487
+ size="lg",
488
  )
489
 
490
 
491
  def output_conf():
492
  return gr.File(
493
+ label="Converted Audio",
494
  file_count="multiple",
495
  interactive=False,
496
  )
 
499
  def active_tts_conf():
500
  return gr.Checkbox(
501
  False,
502
+ label="Use Text-to-Speech",
 
503
  container=False,
504
  )
505
 
506
 
507
  def tts_voice_conf():
508
  return gr.Dropdown(
509
+ label="TTS Voice",
510
+ choices=[], # Will be populated later
511
  visible=False,
512
+ value=None,
513
  )
514
 
515
 
516
  def tts_text_conf():
517
  return gr.Textbox(
518
  value="",
519
+ placeholder="Enter text to convert to speech...",
520
  label="Text",
521
  visible=False,
522
  lines=3,
 
525
 
526
  def tts_button_conf():
527
  return gr.Button(
528
+ "Generate Speech",
529
  variant="secondary",
530
  visible=False,
531
  )
 
534
  def tts_play_conf():
535
  return gr.Checkbox(
536
  False,
537
+ label="Auto-play generated audio",
 
538
  container=False,
539
  visible=False,
540
  )
 
544
  return gr.Audio(
545
  value=None,
546
  type="filepath",
 
547
  autoplay=True,
548
  visible=True,
549
  interactive=False,
 
564
 
565
  def format_output_gui():
566
  return gr.Dropdown(
567
+ label="Output Format:",
568
  choices=["wav", "mp3", "flac"],
569
  value="wav",
570
  )
571
 
572
+
573
  def denoise_conf():
574
  return gr.Checkbox(
575
  False,
576
+ label="Apply Noise Reduction",
 
577
  container=False,
578
  visible=True,
579
  )
 
582
  def effects_conf():
583
  return gr.Checkbox(
584
  False,
585
+ label="Apply Audio Effects (Reverb)",
 
586
  container=False,
587
  visible=True,
588
  )
 
596
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
597
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
598
 
599
+ # Extract ShortName from combined value (e.g., "en-US-EmmaMultilingualNeural-Female")
600
+ if tts_voice:
601
+ short_name = "-".join(tts_voice.split('-')[:-1])
602
+ else:
603
+ short_name = "en-US-EmmaMultilingualNeural"
604
+
605
+ asyncio.run(edge_tts.Communicate(tts_text, short_name).save(out_path))
606
  if play_tts:
607
  return [out_path], out_path
608
  return [out_path], None
 
623
  def down_active_conf():
624
  return gr.Checkbox(
625
  False,
626
+ label="Download from URL",
 
627
  container=False,
628
  )
629
 
 
631
  def down_url_conf():
632
  return gr.Textbox(
633
  value="",
634
+ placeholder="Hugging Face model URL...",
635
+ label="Model URL",
636
  visible=False,
637
  lines=1,
638
  )
 
640
 
641
  def down_button_conf():
642
  return gr.Button(
643
+ "Download Model",
644
  variant="secondary",
645
  visible=False,
646
  )
 
657
 
658
  CSS = """
659
  #audio_tts {
660
+ visibility: hidden;
661
  height: 0px;
662
  width: 0px;
663
  max-width: 0px;
 
670
  gr.Markdown(title)
671
  gr.Markdown(description)
672
 
673
+ with gr.Tab("Voice Conversion"):
674
+ # بخش آپلود فایل‌های صوتی
675
+ gr.Markdown("### 📤 Upload Audio Files")
676
+ aud = audio_conf()
677
+
678
+ # بخش TTS
679
+ active_tts = active_tts_conf()
680
+ with gr.Row(visible=False) as tts_row:
681
+ with gr.Column(scale=1):
682
+ tts_text = tts_text_conf()
683
+ with gr.Column(scale=2):
684
+ with gr.Row():
685
+ with gr.Column():
686
+ with gr.Row():
687
+ tts_voice = tts_voice_conf()
688
+ tts_active_play = tts_play_conf()
689
+ tts_button = tts_button_conf()
690
+ tts_play = sound_gui()
691
+
692
+ active_tts.change(
693
+ fn=show_components_tts,
694
+ inputs=[active_tts],
695
+ outputs=[tts_voice, tts_text, tts_button, tts_active_play],
696
+ )
 
 
 
 
697
 
698
+ tts_button.click(
699
+ fn=infer_tts_audio,
700
+ inputs=[tts_voice, tts_text, tts_active_play],
701
+ outputs=[aud, tts_play],
702
+ )
 
 
 
 
 
703
 
704
+ # بخش مدل
705
+ gr.Markdown("### 🎯 Model Selection")
706
+
707
  with gr.Row():
708
+ with gr.Column(scale=1):
709
+ model = model_conf()
710
+ gr.Markdown("*Upload your .pth model file*")
711
+ with gr.Column(scale=1):
712
+ indx = index_conf()
713
+ gr.Markdown("*Upload .index file (optional)*")
714
+
715
+ # بخش دانلود از URL
716
+ down_active_gui = down_active_conf()
717
+ down_info = gr.Markdown(
718
+ f"Download models from Hugging Face URLs",
719
+ visible=False
720
+ )
721
+ with gr.Row(visible=False) as url_row:
722
+ with gr.Column(scale=3):
723
+ down_url_gui = down_url_conf()
724
+ with gr.Column(scale=1):
725
+ down_button_gui = down_button_conf()
726
+
727
+ down_active_gui.change(
728
+ show_components_down,
729
+ [down_active_gui],
730
+ [down_info, down_url_gui, down_button_gui]
731
+ )
732
 
733
+ down_button_gui.click(
734
+ get_my_model,
735
+ [down_url_gui],
736
+ [model, indx]
737
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
 
739
+ # تنظیمات پیشرفته
740
+ with gr.Accordion(label="⚙️ Advanced Settings", open=False):
741
+ with gr.Row():
742
+ algo = pitch_algo_conf()
743
+ algo_lvl = pitch_lvl_conf()
744
+
745
+ with gr.Row():
746
+ indx_inf = index_inf_conf()
747
+ steps_gui = steps_conf()
748
+
749
+ with gr.Row():
750
+ res_fc = respiration_filter_conf()
751
+ envel_r = envelope_ratio_conf()
752
+ const = consonant_protec_conf()
753
+
754
+ with gr.Row():
755
+ format_out = format_output_gui()
756
+ denoise_gui = denoise_conf()
757
+ effects_gui = effects_conf()
758
+
759
+ # دکمه تبدیل
760
+ button_base = button_conf()
761
+
762
+ # نتیجه
763
+ gr.Markdown("### 🎵 Output")
764
+ output_base = output_conf()
765
+
766
+ button_base.click(
767
+ run,
768
+ inputs=[
769
+ aud,
770
+ model,
771
+ algo,
772
+ algo_lvl,
773
+ indx,
774
+ indx_inf,
775
+ res_fc,
776
+ envel_r,
777
+ const,
778
+ denoise_gui,
779
+ effects_gui,
780
+ format_out,
781
+ steps_gui,
782
  ],
783
+ outputs=[output_base],
784
+ )
785
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  gr.Markdown(RESOURCES)
787
 
788
  return app
789
 
790
 
791
  if __name__ == "__main__":
792
+ # Get voice list safely
793
  tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
794
+
795
+ # Build voice dropdown options with safe .get() access
796
  voices = sorted([
797
+ (
798
+ " - ".join(
799
+ reversed(
800
+ voice.get("FriendlyName", voice.get("Name", "Unknown Voice")).split("-")
801
+ )
802
+ ).replace("Microsoft ", "").replace("Online (Natural)", f"({voice.get('Gender', 'Unknown')})").strip(),
803
+ f"{voice.get('ShortName', 'Unknown')}-{voice.get('Gender', 'Unknown')}"
804
+ )
805
+ for voice in tts_voice_list
806
  ])
807
 
808
+ # Initialize GUI
809
  app = get_gui(theme)
 
810
  app.queue(default_concurrency_limit=40)
811
 
812
+ # Launch app
813
  app.launch(
814
  max_threads=40,
815
  share=IS_COLAB,
 
817
  quiet=False,
818
  debug=IS_COLAB,
819
  ssr_mode=False,
820
+ )