Tha456 commited on
Commit
57c14d7
·
verified ·
1 Parent(s): 9550e2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -356
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  import spaces
4
  from infer_rvc_python import BaseLoader
@@ -26,6 +27,40 @@ import sys
26
  import torch
27
  import fairseq
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
30
  parser.add_argument(
31
  '--share',
@@ -67,27 +102,18 @@ for url, filename in zip(test_model.split(", "), test_names):
67
  with open(filename, "wb") as f:
68
  pass
69
 
70
- title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
71
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
72
- RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
73
  theme = args.theme
74
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
75
 
76
- PITCH_ALGO_OPT = [
77
- "pm",
78
- "harvest",
79
- "crepe",
80
- "rmvpe",
81
- "rmvpe+",
82
- ]
83
-
84
 
85
  async def get_voices_list(proxy=None):
86
- """Print all available voices."""
87
  from edge_tts import list_voices
88
  voices = await list_voices(proxy=proxy)
89
  voices = sorted(voices, key=lambda voice: voice["ShortName"])
90
-
91
  table = [
92
  {
93
  "ShortName": voice["ShortName"],
@@ -98,21 +124,15 @@ async def get_voices_list(proxy=None):
98
  }
99
  for voice in voices
100
  ]
101
-
102
  return table
103
 
104
-
105
  def find_files(directory):
106
  file_paths = []
107
  for filename in os.listdir(directory):
108
- # Check if the file has the desired extension
109
  if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
110
- # If yes, add the file path to the list
111
  file_paths.append(os.path.join(directory, filename))
112
-
113
  return file_paths
114
 
115
-
116
  def unzip_in_folder(my_zip, my_dir):
117
  with zipfile.ZipFile(my_zip) as zip:
118
  for zip_info in zip.infolist():
@@ -121,37 +141,24 @@ def unzip_in_folder(my_zip, my_dir):
121
  zip_info.filename = os.path.basename(zip_info.filename)
122
  zip.extract(zip_info, my_dir)
123
 
124
-
125
  def find_my_model(a_, b_):
126
-
127
  if a_ is None or a_.endswith(".pth"):
128
  return a_, b_
129
-
130
  txt_files = []
131
  for base_file in [a_, b_]:
132
  if base_file is not None and base_file.endswith(".txt"):
133
  txt_files.append(base_file)
134
-
135
  directory = os.path.dirname(a_)
136
-
137
  for txt in txt_files:
138
  with open(txt, 'r') as file:
139
  first_line = file.readline()
140
-
141
- download_manager(
142
- url=first_line.strip(),
143
- path=directory,
144
- extension="",
145
- )
146
-
147
  for f in find_files(directory):
148
  if f.endswith(".zip"):
149
  unzip_in_folder(f, directory)
150
-
151
  model = None
152
  index = None
153
  end_files = find_files(directory)
154
-
155
  for ff in end_files:
156
  if ff.endswith(".pth"):
157
  model = os.path.join(directory, ff)
@@ -159,79 +166,56 @@ def find_my_model(a_, b_):
159
  if ff.endswith(".index"):
160
  index = os.path.join(directory, ff)
161
  gr.Info(f"Index found: {ff}")
162
-
163
  if not model:
164
  gr.Error(f"Model not found in: {end_files}")
165
-
166
  if not index:
167
  gr.Warning("Index not found")
168
-
169
  return model, index
170
 
171
-
172
  def ensure_valid_file(url):
173
  if "huggingface" not in url:
174
  raise ValueError("Only downloads from Hugging Face are allowed")
175
-
176
  try:
177
  request = urllib.request.Request(url, method="HEAD")
178
  with urllib.request.urlopen(request) as response:
179
  content_length = response.headers.get("Content-Length")
180
-
181
  if content_length is None:
182
  raise ValueError("No Content-Length header found")
183
-
184
  file_size = int(content_length)
185
- # print("debug", url, file_size)
186
  if file_size > 900000000 and IS_ZERO_GPU:
187
  raise ValueError("The file is too large. Max allowed is 900 MB.")
188
-
189
  return file_size
190
-
191
  except Exception as e:
192
  raise e
193
 
194
-
195
  def clear_files(directory):
196
  time.sleep(15)
197
  print(f"Clearing files: {directory}.")
198
  shutil.rmtree(directory)
199
 
200
-
201
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
202
-
203
  if not url_data:
204
  return None, None
205
-
206
  if "," in url_data:
207
  a_, b_ = url_data.split(",")
208
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
209
  else:
210
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
211
-
212
  out_dir = "downloads"
213
  folder_download = str(random.randint(1000, 9999))
214
  directory = os.path.join(out_dir, folder_download)
215
  os.makedirs(directory, exist_ok=True)
216
-
217
  try:
218
  valid_url = [a_] if not b_ else [a_, b_]
219
  for link in valid_url:
220
  ensure_valid_file(link)
221
- download_manager(
222
- url=link,
223
- path=directory,
224
- extension="",
225
- )
226
-
227
  for f in find_files(directory):
228
  if f.endswith(".zip"):
229
  unzip_in_folder(f, directory)
230
-
231
  model = None
232
  index = None
233
  end_files = find_files(directory)
234
-
235
  for ff in end_files:
236
  if ff.endswith(".pth"):
237
  model = ff
@@ -239,107 +223,68 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
239
  if ff.endswith(".index"):
240
  index = ff
241
  gr.Info(f"Index found: {ff}")
242
-
243
  if not model:
244
  raise ValueError(f"Model not found in: {end_files}")
245
-
246
  if not index:
247
  gr.Warning("Index not found")
248
  else:
249
  index = os.path.abspath(index)
250
-
251
  return os.path.abspath(model), index
252
-
253
  except Exception as e:
254
  raise e
255
  finally:
256
- # time.sleep(10)
257
- # shutil.rmtree(directory)
258
  t = threading.Thread(target=clear_files, args=(directory,))
259
  t.start()
260
 
261
-
262
  def add_audio_effects(audio_list, type_output):
263
- print("Audio effects")
264
-
265
  result = []
266
  for audio_path in audio_list:
267
  try:
268
  output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
269
-
270
- # Initialize audio effects plugins
271
  board = Pedalboard(
272
  [
273
  HighpassFilter(),
274
  Compressor(ratio=4, threshold_db=-15),
275
  Reverb(room_size=0.10, dry_level=0.8, wet_level=0.2, damping=0.7)
276
- ]
277
  )
278
-
279
- # Temporary WAV to hold processed data before exporting
280
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
281
-
282
  with AudioFile(audio_path) as f:
283
  with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
284
  while f.tell() < f.frames:
285
  chunk = f.read(int(f.samplerate))
286
  effected = board(chunk, f.samplerate, reset=False)
287
  o.write(effected)
288
-
289
- # Convert with pydub to desired output type
290
  audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
291
  audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
292
-
293
- # Clean up temp file
294
  os.remove(temp_wav)
295
-
296
  result.append(output_path)
297
  except Exception as e:
298
  traceback.print_exc()
299
- print(f"Error noisereduce: {str(e)}")
300
  result.append(audio_path)
301
-
302
  return result
303
 
304
-
305
  def apply_noisereduce(audio_list, type_output):
306
- # https://github.com/sa-if/Audio-Denoiser
307
- print("Noice reduce")
308
-
309
  result = []
310
  for audio_path in audio_list:
311
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
312
-
313
  try:
314
- # Load audio file
315
  audio = AudioSegment.from_file(audio_path)
316
-
317
- # Convert audio to numpy array
318
  samples = np.array(audio.get_array_of_samples())
319
-
320
- # Reduce noise
321
  reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
322
-
323
- # Convert reduced noise signal back to audio
324
  reduced_audio = AudioSegment(
325
  reduced_noise.tobytes(),
326
  frame_rate=audio.frame_rate,
327
  sample_width=audio.sample_width,
328
  channels=audio.channels
329
  )
330
-
331
- # Save reduced audio to file
332
  reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
333
  result.append(out_path)
334
-
335
  except Exception as e:
336
  traceback.print_exc()
337
- print(f"Error noisereduce: {str(e)}")
338
  result.append(audio_path)
339
-
340
  return result
341
 
342
-
343
  @spaces.GPU()
344
  def convert_now(audio_files, random_tag, converter, type_output, steps):
345
  for step in range(steps):
@@ -350,10 +295,9 @@ def convert_now(audio_files, random_tag, converter, type_output, steps):
350
  parallel_workers=(2 if IS_COLAB else 8),
351
  type_output=type_output,
352
  )
353
-
354
  return audio_files
355
 
356
-
357
  def run(
358
  audio_files,
359
  file_m,
@@ -368,7 +312,13 @@ def run(
368
  audio_effects,
369
  type_output,
370
  steps,
 
371
  ):
 
 
 
 
 
372
  if not audio_files:
373
  raise ValueError("The audio pls")
374
 
@@ -411,275 +361,103 @@ def run(
411
 
412
  return result
413
 
414
-
415
  def audio_conf():
416
- return gr.File(
417
- label="Audio files",
418
- file_count="multiple",
419
- type="filepath",
420
- container=True,
421
- )
422
-
423
 
424
  def model_conf():
425
- return gr.File(
426
- label="Model file",
427
- type="filepath",
428
- height=130,
429
- )
430
-
431
 
432
  def pitch_algo_conf():
433
- return gr.Dropdown(
434
- PITCH_ALGO_OPT,
435
- value=PITCH_ALGO_OPT[4],
436
- label="Pitch algorithm",
437
- visible=True,
438
- interactive=True,
439
- )
440
-
441
 
442
  def pitch_lvl_conf():
443
- return gr.Slider(
444
- label="Pitch level",
445
- minimum=-24,
446
- maximum=24,
447
- step=1,
448
- value=0,
449
- visible=True,
450
- interactive=True,
451
- )
452
-
453
 
454
  def index_conf():
455
- return gr.File(
456
- label="Index file",
457
- type="filepath",
458
- height=130,
459
- )
460
-
461
 
462
  def index_inf_conf():
463
- return gr.Slider(
464
- minimum=0,
465
- maximum=1,
466
- label="Index influence",
467
- value=0.75,
468
- )
469
-
470
 
471
  def respiration_filter_conf():
472
- return gr.Slider(
473
- minimum=0,
474
- maximum=7,
475
- label="Respiration median filtering",
476
- value=3,
477
- step=1,
478
- interactive=True,
479
- )
480
-
481
 
482
  def envelope_ratio_conf():
483
- return gr.Slider(
484
- minimum=0,
485
- maximum=1,
486
- label="Envelope ratio",
487
- value=0.25,
488
- interactive=True,
489
- )
490
-
491
 
492
  def consonant_protec_conf():
493
- return gr.Slider(
494
- minimum=0,
495
- maximum=0.5,
496
- label="Consonant breath protection",
497
- value=0.5,
498
- interactive=True,
499
- )
500
-
501
 
502
  def button_conf():
503
- return gr.Button(
504
- "Inference",
505
- variant="primary",
506
- )
507
-
508
 
509
  def output_conf():
510
- return gr.File(
511
- label="Result",
512
- file_count="multiple",
513
- interactive=False,
514
- )
515
-
516
 
517
  def active_tts_conf():
518
- return gr.Checkbox(
519
- False,
520
- label="TTS",
521
- # info="",
522
- container=False,
523
- )
524
-
525
 
526
  def tts_voice_conf():
527
- return gr.Dropdown(
528
- label="tts voice",
529
- choices=voices,
530
- visible=False,
531
- value="en-US-EmmaMultilingualNeural-Female",
532
- )
533
-
534
 
535
  def tts_text_conf():
536
- return gr.Textbox(
537
- value="",
538
- placeholder="Write the text here...",
539
- label="Text",
540
- visible=False,
541
- lines=3,
542
- )
543
-
544
 
545
  def tts_button_conf():
546
- return gr.Button(
547
- "Process TTS",
548
- variant="secondary",
549
- visible=False,
550
- )
551
-
552
 
553
  def tts_play_conf():
554
- return gr.Checkbox(
555
- False,
556
- label="Play",
557
- # info="",
558
- container=False,
559
- visible=False,
560
- )
561
-
562
 
563
  def sound_gui():
564
- return gr.Audio(
565
- value=None,
566
- type="filepath",
567
- # format="mp3",
568
- autoplay=True,
569
- visible=True,
570
- interactive=False,
571
- elem_id="audio_tts",
572
- )
573
-
574
 
575
  def steps_conf():
576
- return gr.Slider(
577
- minimum=1,
578
- maximum=3,
579
- label="Steps",
580
- value=1,
581
- step=1,
582
- interactive=True,
583
- )
584
-
585
 
586
  def format_output_gui():
587
- return gr.Dropdown(
588
- label="Format output:",
589
- choices=["wav", "mp3", "flac"],
590
- value="wav",
591
- )
592
 
593
  def denoise_conf():
594
- return gr.Checkbox(
595
- False,
596
- label="Denoise",
597
- # info="",
598
- container=False,
599
- visible=True,
600
- )
601
-
602
 
603
  def effects_conf():
604
- return gr.Checkbox(
605
- False,
606
- label="Reverb",
607
- # info="",
608
- container=False,
609
- visible=True,
610
- )
611
-
612
 
613
  def infer_tts_audio(tts_voice, tts_text, play_tts):
614
  out_dir = "output"
615
  folder_tts = "USER_"+str(random.randint(10000, 99999))
616
-
617
  os.makedirs(out_dir, exist_ok=True)
618
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
619
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
620
-
621
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
622
  if play_tts:
623
  return [out_path], out_path
624
  return [out_path], None
625
 
626
-
627
  def show_components_tts(value_active):
628
- return gr.update(
629
- visible=value_active
630
- ), gr.update(
631
- visible=value_active
632
- ), gr.update(
633
- visible=value_active
634
- ), gr.update(
635
- visible=value_active
636
- )
637
-
638
 
639
  def down_active_conf():
640
- return gr.Checkbox(
641
- False,
642
- label="URL-to-Model",
643
- # info="",
644
- container=False,
645
- )
646
-
647
 
648
  def down_url_conf():
649
- return gr.Textbox(
650
- value="",
651
- placeholder="Write the url here...",
652
- label="Enter URL",
653
- visible=False,
654
- lines=1,
655
- )
656
-
657
 
658
  def down_button_conf():
659
- return gr.Button(
660
- "Process",
661
- variant="secondary",
662
- visible=False,
663
- )
664
-
665
 
666
  def show_components_down(value_active):
667
- return gr.update(
668
- visible=value_active
669
- ), gr.update(
670
- visible=value_active
671
- ), gr.update(
672
- visible=value_active
673
- )
674
 
675
  CSS = """
676
- #audio_tts {
677
- visibility: hidden; /* invisible but still takes space */
678
- height: 0px;
679
- width: 0px;
680
- max-width: 0px;
681
- max-height: 0px;
682
- }
683
  """
684
 
685
  def get_gui(theme):
@@ -697,7 +475,6 @@ def get_gui(theme):
697
  with gr.Row():
698
  tts_voice = tts_voice_conf()
699
  tts_active_play = tts_play_conf()
700
-
701
  tts_button = tts_button_conf()
702
  tts_play = sound_gui()
703
 
@@ -707,8 +484,14 @@ def get_gui(theme):
707
  outputs=[tts_voice, tts_text, tts_button, tts_active_play],
708
  )
709
 
 
 
 
 
 
 
 
710
  aud = audio_conf()
711
- # gr.HTML("<hr>")
712
 
713
  tts_button.click(
714
  fn=infer_tts_audio,
@@ -718,7 +501,7 @@ def get_gui(theme):
718
 
719
  down_active_gui = down_active_conf()
720
  down_info = gr.Markdown(
721
- f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
722
  visible=False
723
  )
724
  with gr.Row():
@@ -732,6 +515,13 @@ def get_gui(theme):
732
  model = model_conf()
733
  indx = index_conf()
734
 
 
 
 
 
 
 
 
735
  down_active_gui.change(
736
  show_components_down,
737
  [down_active_gui],
@@ -758,9 +548,11 @@ def get_gui(theme):
758
  with gr.Row():
759
  denoise_gui = denoise_conf()
760
  effects_gui = effects_conf()
 
761
  button_base = button_conf()
762
  output_base = output_conf()
763
 
 
764
  button_base.click(
765
  run,
766
  inputs=[
@@ -777,59 +569,17 @@ def get_gui(theme):
777
  effects_gui,
778
  format_out,
779
  steps_gui,
 
780
  ],
781
  outputs=[output_base],
782
  )
783
 
784
  gr.Examples(
785
  examples=[
786
- [
787
- ["./test.ogg"],
788
- "./model.pth",
789
- "rmvpe+",
790
- 0,
791
- "./model.index",
792
- 0.75,
793
- 3,
794
- 0.25,
795
- 0.50,
796
- ],
797
- [
798
- ["./example2/test2.ogg"],
799
- "./example2/model_link.txt",
800
- "rmvpe+",
801
- 0,
802
- "./example2/index_link.txt",
803
- 0.75,
804
- 3,
805
- 0.25,
806
- 0.50,
807
- ],
808
- [
809
- ["./example3/test3.wav"],
810
- "./example3/zip_link.txt",
811
- "rmvpe+",
812
- 0,
813
- None,
814
- 0.75,
815
- 3,
816
- 0.25,
817
- 0.50,
818
- ],
819
-
820
  ],
821
  fn=run,
822
- inputs=[
823
- aud,
824
- model,
825
- algo,
826
- algo_lvl,
827
- indx,
828
- indx_inf,
829
- res_fc,
830
- envel_r,
831
- const,
832
- ],
833
  outputs=[output_base],
834
  cache_examples=False,
835
  )
@@ -837,7 +587,6 @@ def get_gui(theme):
837
 
838
  return app
839
 
840
-
841
  if __name__ == "__main__":
842
  tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
843
  voices = sorted([
@@ -846,9 +595,7 @@ if __name__ == "__main__":
846
  ])
847
 
848
  app = get_gui(theme)
849
-
850
  app.queue(default_concurrency_limit=40)
851
-
852
  app.launch(
853
  max_threads=40,
854
  share=IS_COLAB,
@@ -856,4 +603,4 @@ if __name__ == "__main__":
856
  quiet=False,
857
  debug=IS_COLAB,
858
  ssr_mode=False,
859
- )
 
1
  import os
2
+ from huggingface_hub import snapshot_download
3
  import gradio as gr
4
  import spaces
5
  from infer_rvc_python import BaseLoader
 
27
  import torch
28
  import fairseq
29
 
30
+ # =====================================================================
31
+ # កូដទាញយកម៉ូដែលសំឡេងពី Repository
32
+ # =====================================================================
33
+ print("📥 កំពុងទាញយកម៉ូដែលសំឡេងទាំងអស់ពី Repository... សូមរង់ចាំ!")
34
+ REPO_ID = "Tha456/Khmer-Voice-Models"
35
+ LOCAL_MODELS_DIR = snapshot_download(repo_id=REPO_ID, local_dir="my_models")
36
+ print(f"✅ ទាញយកជោគជ័យ! ឯកសារទាំងអស់ស្ថិតនៅក្នុង Folder: {LOCAL_MODELS_DIR}")
37
+
38
+ # បង្កើតអថេរផ្លូវ (Path) សម្រាប់កូដខាងក្រោមយកទៅប្រើ
39
+ nita_pth = os.path.join(LOCAL_MODELS_DIR, "nita_female.pth")
40
+ nita_index = os.path.join(LOCAL_MODELS_DIR, "nita_female.index")
41
+
42
+ nimol_pth = os.path.join(LOCAL_MODELS_DIR, "nimol_famale.pth")
43
+ nimol_index = os.path.join(LOCAL_MODELS_DIR, "nimol_famale.index")
44
+
45
+ saman_pth = os.path.join(LOCAL_MODELS_DIR, "saman_male.pth")
46
+ saman_index = os.path.join(LOCAL_MODELS_DIR, "saman_male.index")
47
+
48
+ sana_pth = os.path.join(LOCAL_MODELS_DIR, "sana_femal.pth")
49
+ sana_index = os.path.join(LOCAL_MODELS_DIR, "sana_femal.index")
50
+
51
+ sovanna_pth = os.path.join(LOCAL_MODELS_DIR, "sovanna_male.pth")
52
+ sovanna_index = os.path.join(LOCAL_MODELS_DIR, "sovanna_male.index")
53
+
54
+ # បង្កើត Dictionary សម្រាប់គ្រប់គ្រងម៉ូដែលសំឡេងខ្មែរ
55
+ KHMER_VOICES = {
56
+ "នីតា (Nita - Female)": {"pth": nita_pth, "index": nita_index},
57
+ "និមល (Nimol - Female)": {"pth": nimol_pth, "index": nimol_index},
58
+ "សាម៉ន (Saman - Male)": {"pth": saman_pth, "index": saman_index},
59
+ "សាណា (Sana - Female)": {"pth": sana_pth, "index": sana_index},
60
+ "សុវណ្ណា (Sovanna - Male)": {"pth": sovanna_pth, "index": sovanna_index},
61
+ }
62
+ # =====================================================================
63
+
64
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
65
  parser.add_argument(
66
  '--share',
 
102
  with open(filename, "wb") as f:
103
  pass
104
 
105
+ title = "<center><strong><font size='7'>RVC⚡ZERO Khmer Edition</font></strong></center>"
106
+ description = "This demo is provided for educational and research purposes only." if IS_ZERO_GPU else ""
107
+ RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier."
108
  theme = args.theme
109
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
110
 
111
+ PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
 
 
 
 
 
 
 
112
 
113
  async def get_voices_list(proxy=None):
 
114
  from edge_tts import list_voices
115
  voices = await list_voices(proxy=proxy)
116
  voices = sorted(voices, key=lambda voice: voice["ShortName"])
 
117
  table = [
118
  {
119
  "ShortName": voice["ShortName"],
 
124
  }
125
  for voice in voices
126
  ]
 
127
  return table
128
 
 
129
  def find_files(directory):
130
  file_paths = []
131
  for filename in os.listdir(directory):
 
132
  if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
 
133
  file_paths.append(os.path.join(directory, filename))
 
134
  return file_paths
135
 
 
136
  def unzip_in_folder(my_zip, my_dir):
137
  with zipfile.ZipFile(my_zip) as zip:
138
  for zip_info in zip.infolist():
 
141
  zip_info.filename = os.path.basename(zip_info.filename)
142
  zip.extract(zip_info, my_dir)
143
 
 
144
  def find_my_model(a_, b_):
 
145
  if a_ is None or a_.endswith(".pth"):
146
  return a_, b_
 
147
  txt_files = []
148
  for base_file in [a_, b_]:
149
  if base_file is not None and base_file.endswith(".txt"):
150
  txt_files.append(base_file)
 
151
  directory = os.path.dirname(a_)
 
152
  for txt in txt_files:
153
  with open(txt, 'r') as file:
154
  first_line = file.readline()
155
+ download_manager(url=first_line.strip(), path=directory, extension="")
 
 
 
 
 
 
156
  for f in find_files(directory):
157
  if f.endswith(".zip"):
158
  unzip_in_folder(f, directory)
 
159
  model = None
160
  index = None
161
  end_files = find_files(directory)
 
162
  for ff in end_files:
163
  if ff.endswith(".pth"):
164
  model = os.path.join(directory, ff)
 
166
  if ff.endswith(".index"):
167
  index = os.path.join(directory, ff)
168
  gr.Info(f"Index found: {ff}")
 
169
  if not model:
170
  gr.Error(f"Model not found in: {end_files}")
 
171
  if not index:
172
  gr.Warning("Index not found")
 
173
  return model, index
174
 
 
175
  def ensure_valid_file(url):
176
  if "huggingface" not in url:
177
  raise ValueError("Only downloads from Hugging Face are allowed")
 
178
  try:
179
  request = urllib.request.Request(url, method="HEAD")
180
  with urllib.request.urlopen(request) as response:
181
  content_length = response.headers.get("Content-Length")
 
182
  if content_length is None:
183
  raise ValueError("No Content-Length header found")
 
184
  file_size = int(content_length)
 
185
  if file_size > 900000000 and IS_ZERO_GPU:
186
  raise ValueError("The file is too large. Max allowed is 900 MB.")
 
187
  return file_size
 
188
  except Exception as e:
189
  raise e
190
 
 
191
  def clear_files(directory):
192
  time.sleep(15)
193
  print(f"Clearing files: {directory}.")
194
  shutil.rmtree(directory)
195
 
 
196
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
 
197
  if not url_data:
198
  return None, None
 
199
  if "," in url_data:
200
  a_, b_ = url_data.split(",")
201
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
202
  else:
203
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
 
204
  out_dir = "downloads"
205
  folder_download = str(random.randint(1000, 9999))
206
  directory = os.path.join(out_dir, folder_download)
207
  os.makedirs(directory, exist_ok=True)
 
208
  try:
209
  valid_url = [a_] if not b_ else [a_, b_]
210
  for link in valid_url:
211
  ensure_valid_file(link)
212
+ download_manager(url=link, path=directory, extension="")
 
 
 
 
 
213
  for f in find_files(directory):
214
  if f.endswith(".zip"):
215
  unzip_in_folder(f, directory)
 
216
  model = None
217
  index = None
218
  end_files = find_files(directory)
 
219
  for ff in end_files:
220
  if ff.endswith(".pth"):
221
  model = ff
 
223
  if ff.endswith(".index"):
224
  index = ff
225
  gr.Info(f"Index found: {ff}")
 
226
  if not model:
227
  raise ValueError(f"Model not found in: {end_files}")
 
228
  if not index:
229
  gr.Warning("Index not found")
230
  else:
231
  index = os.path.abspath(index)
 
232
  return os.path.abspath(model), index
 
233
  except Exception as e:
234
  raise e
235
  finally:
 
 
236
  t = threading.Thread(target=clear_files, args=(directory,))
237
  t.start()
238
 
 
239
  def add_audio_effects(audio_list, type_output):
 
 
240
  result = []
241
  for audio_path in audio_list:
242
  try:
243
  output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
 
 
244
  board = Pedalboard(
245
  [
246
  HighpassFilter(),
247
  Compressor(ratio=4, threshold_db=-15),
248
  Reverb(room_size=0.10, dry_level=0.8, wet_level=0.2, damping=0.7)
249
+ ]
250
  )
 
 
251
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
 
252
  with AudioFile(audio_path) as f:
253
  with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
254
  while f.tell() < f.frames:
255
  chunk = f.read(int(f.samplerate))
256
  effected = board(chunk, f.samplerate, reset=False)
257
  o.write(effected)
 
 
258
  audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
259
  audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
 
 
260
  os.remove(temp_wav)
 
261
  result.append(output_path)
262
  except Exception as e:
263
  traceback.print_exc()
 
264
  result.append(audio_path)
 
265
  return result
266
 
 
267
  def apply_noisereduce(audio_list, type_output):
 
 
 
268
  result = []
269
  for audio_path in audio_list:
270
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
 
271
  try:
 
272
  audio = AudioSegment.from_file(audio_path)
 
 
273
  samples = np.array(audio.get_array_of_samples())
 
 
274
  reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
 
 
275
  reduced_audio = AudioSegment(
276
  reduced_noise.tobytes(),
277
  frame_rate=audio.frame_rate,
278
  sample_width=audio.sample_width,
279
  channels=audio.channels
280
  )
 
 
281
  reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
282
  result.append(out_path)
 
283
  except Exception as e:
284
  traceback.print_exc()
 
285
  result.append(audio_path)
 
286
  return result
287
 
 
288
  @spaces.GPU()
289
  def convert_now(audio_files, random_tag, converter, type_output, steps):
290
  for step in range(steps):
 
295
  parallel_workers=(2 if IS_COLAB else 8),
296
  type_output=type_output,
297
  )
 
298
  return audio_files
299
 
300
+ # --- កែប្រែត្រង់ចំណុចនេះ៖ បន្ថែមអថេរ voice_select ដើម្បីចាប់យកម៉ូដែលស្វ័យប្រវត្តិ ---
301
  def run(
302
  audio_files,
303
  file_m,
 
312
  audio_effects,
313
  type_output,
314
  steps,
315
+ voice_select="Upload Custom Model",
316
  ):
317
+ # ប្រសិនបើអ្នកប្រើរើសសំឡេងខ្មែរដែលមានស្រាប់ វានឹងកំណត់យកហ្វាយពី Repository ភ្លាម
318
+ if voice_select in KHMER_VOICES:
319
+ file_m = KHMER_VOICES[voice_select]["pth"]
320
+ file_index = KHMER_VOICES[voice_select]["index"]
321
+
322
  if not audio_files:
323
  raise ValueError("The audio pls")
324
 
 
361
 
362
  return result
363
 
 
364
  def audio_conf():
365
+ return gr.File(label="Audio files", file_count="multiple", type="filepath", container=True)
 
 
 
 
 
 
366
 
367
  def model_conf():
368
+ return gr.File(label="Model file (.pth)", type="filepath", height=130, visible=False) # លាក់លំនាំដើម
 
 
 
 
 
369
 
370
  def pitch_algo_conf():
371
+ return gr.Dropdown(PITCH_ALGO_OPT, value=PITCH_ALGO_OPT[4], label="Pitch algorithm", visible=True, interactive=True)
 
 
 
 
 
 
 
372
 
373
  def pitch_lvl_conf():
374
+ return gr.Slider(label="Pitch level", minimum=-24, maximum=24, step=1, value=0, visible=True, interactive=True)
 
 
 
 
 
 
 
 
 
375
 
376
  def index_conf():
377
+ return gr.File(label="Index file (.index)", type="filepath", height=130, visible=False) # លាក់លំនាំដើម
 
 
 
 
 
378
 
379
  def index_inf_conf():
380
+ return gr.Slider(minimum=0, maximum=1, label="Index influence", value=0.75)
 
 
 
 
 
 
381
 
382
  def respiration_filter_conf():
383
+ return gr.Slider(minimum=0, maximum=7, label="Respiration median filtering", value=3, step=1, interactive=True)
 
 
 
 
 
 
 
 
384
 
385
  def envelope_ratio_conf():
386
+ return gr.Slider(minimum=0, maximum=1, label="Envelope ratio", value=0.25, interactive=True)
 
 
 
 
 
 
 
387
 
388
  def consonant_protec_conf():
389
+ return gr.Slider(minimum=0, maximum=0.5, label="Consonant breath protection", value=0.5, interactive=True)
 
 
 
 
 
 
 
390
 
391
  def button_conf():
392
+ return gr.Button("Inference", variant="primary")
 
 
 
 
393
 
394
  def output_conf():
395
+ return gr.File(label="Result", file_count="multiple", interactive=False)
 
 
 
 
 
396
 
397
  def active_tts_conf():
398
+ return gr.Checkbox(False, label="TTS", container=False)
 
 
 
 
 
 
399
 
400
  def tts_voice_conf():
401
+ return gr.Dropdown(label="tts voice", choices=voices, visible=False, value="en-US-EmmaMultilingualNeural-Female")
 
 
 
 
 
 
402
 
403
  def tts_text_conf():
404
+ return gr.Textbox(value="", placeholder="Write the text here...", label="Text", visible=False, lines=3)
 
 
 
 
 
 
 
405
 
406
  def tts_button_conf():
407
+ return gr.Button("Process TTS", variant="secondary", visible=False)
 
 
 
 
 
408
 
409
  def tts_play_conf():
410
+ return gr.Checkbox(False, label="Play", container=False, visible=False)
 
 
 
 
 
 
 
411
 
412
  def sound_gui():
413
+ return gr.Audio(value=None, type="filepath", autoplay=True, visible=True, interactive=False, elem_id="audio_tts")
 
 
 
 
 
 
 
 
 
414
 
415
  def steps_conf():
416
+ return gr.Slider(minimum=1, maximum=3, label="Steps", value=1, step=1, interactive=True)
 
 
 
 
 
 
 
 
417
 
418
  def format_output_gui():
419
+ return gr.Dropdown(label="Format output:", choices=["wav", "mp3", "flac"], value="wav")
 
 
 
 
420
 
421
  def denoise_conf():
422
+ return gr.Checkbox(False, label="Denoise", container=False, visible=True)
 
 
 
 
 
 
 
423
 
424
  def effects_conf():
425
+ return gr.Checkbox(False, label="Reverb", container=False, visible=True)
 
 
 
 
 
 
 
426
 
427
  def infer_tts_audio(tts_voice, tts_text, play_tts):
428
  out_dir = "output"
429
  folder_tts = "USER_"+str(random.randint(10000, 99999))
 
430
  os.makedirs(out_dir, exist_ok=True)
431
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
432
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
 
433
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
434
  if play_tts:
435
  return [out_path], out_path
436
  return [out_path], None
437
 
 
438
  def show_components_tts(value_active):
439
+ return gr.update(visible=value_active), gr.update(visible=value_active), gr.update(visible=value_active), gr.update(visible=value_active)
 
 
 
 
 
 
 
 
 
440
 
441
  def down_active_conf():
442
+ return gr.Checkbox(False, label="URL-to-Model", container=False)
 
 
 
 
 
 
443
 
444
  def down_url_conf():
445
+ return gr.Textbox(value="", placeholder="Write the url here...", label="Enter URL", visible=False, lines=1)
 
 
 
 
 
 
 
446
 
447
  def down_button_conf():
448
+ return gr.Button("Process", variant="secondary", visible=False)
 
 
 
 
 
449
 
450
  def show_components_down(value_active):
451
+ return gr.update(visible=value_active), gr.update(visible=value_active), gr.update(visible=value_active)
452
+
453
+ # បង្កើតមុខងារកែប្រែភាពមើលឃើញរបស់ File Upload
454
+ def update_model_visibility(voice_choice):
455
+ if voice_choice == "Upload Custom Model":
456
+ return gr.update(visible=True), gr.update(visible=True)
457
+ return gr.update(visible=False), gr.update(visible=False)
458
 
459
  CSS = """
460
+ #audio_tts { visibility: hidden; height: 0px; width: 0px; max-width: 0px; max-height: 0px; }
 
 
 
 
 
 
461
  """
462
 
463
  def get_gui(theme):
 
475
  with gr.Row():
476
  tts_voice = tts_voice_conf()
477
  tts_active_play = tts_play_conf()
 
478
  tts_button = tts_button_conf()
479
  tts_play = sound_gui()
480
 
 
484
  outputs=[tts_voice, tts_text, tts_button, tts_active_play],
485
  )
486
 
487
+ # --- កែប្រែត្រង់ចំណុចនេះ៖ បន្ថែម Dropdown ជ្រើសរើសសំឡេងខ្មែរ ---
488
+ voice_select = gr.Dropdown(
489
+ choices=list(KHMER_VOICES.keys()) + ["Upload Custom Model"],
490
+ value="នីតា (Nita - Female)",
491
+ label="🎙️ ជ្រើសរើសម៉ូដែលសំឡេងខ្មែរ (Select Khmer Voice Model)",
492
+ )
493
+
494
  aud = audio_conf()
 
495
 
496
  tts_button.click(
497
  fn=infer_tts_audio,
 
501
 
502
  down_active_gui = down_active_conf()
503
  down_info = gr.Markdown(
504
+ f"Provide a link to a zip file...",
505
  visible=False
506
  )
507
  with gr.Row():
 
515
  model = model_conf()
516
  indx = index_conf()
517
 
518
+ # កំណត់ឱ្យលាក់ ឬបង្ហាញផ្ទាំង Upload ទៅតាមការរើសសំឡេង
519
+ voice_select.change(
520
+ fn=update_model_visibility,
521
+ inputs=[voice_select],
522
+ outputs=[model, indx]
523
+ )
524
+
525
  down_active_gui.change(
526
  show_components_down,
527
  [down_active_gui],
 
548
  with gr.Row():
549
  denoise_gui = denoise_conf()
550
  effects_gui = effects_conf()
551
+
552
  button_base = button_conf()
553
  output_base = output_conf()
554
 
555
+ # បញ្ចូល voice_select ទៅក្នុងប្រព័ន្ធរត់កូដ
556
  button_base.click(
557
  run,
558
  inputs=[
 
569
  effects_gui,
570
  format_out,
571
  steps_gui,
572
+ voice_select, # ថែមត្រង់នេះ
573
  ],
574
  outputs=[output_base],
575
  )
576
 
577
  gr.Examples(
578
  examples=[
579
+ [["./test.ogg"], "./model.pth", "rmvpe+", 0, "./model.index", 0.75, 3, 0.25, 0.50],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  ],
581
  fn=run,
582
+ inputs=[aud, model, algo, algo_lvl, indx, indx_inf, res_fc, envel_r, const],
 
 
 
 
 
 
 
 
 
 
583
  outputs=[output_base],
584
  cache_examples=False,
585
  )
 
587
 
588
  return app
589
 
 
590
  if __name__ == "__main__":
591
  tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
592
  voices = sorted([
 
595
  ])
596
 
597
  app = get_gui(theme)
 
598
  app.queue(default_concurrency_limit=40)
 
599
  app.launch(
600
  max_threads=40,
601
  share=IS_COLAB,
 
603
  quiet=False,
604
  debug=IS_COLAB,
605
  ssr_mode=False,
606
+ )