Singhp08 commited on
Commit
c6c990a
·
verified ·
1 Parent(s): e66e4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -474
app.py CHANGED
@@ -12,7 +12,6 @@ import edge_tts
12
  import asyncio
13
  import librosa
14
  import traceback
15
- import soundfile as sf
16
  from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
17
  from pedalboard.io import AudioFile
18
  from pydub import AudioSegment
@@ -25,17 +24,8 @@ import argparse
25
  import sys
26
 
27
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
28
- parser.add_argument(
29
- '--share',
30
- action='store_true',
31
- help='Enable sharing mode'
32
- )
33
- parser.add_argument(
34
- '--theme',
35
- type=str,
36
- default="aliabid94/new-theme",
37
- help='Set the theme (default: aliabid94/new-theme)'
38
- )
39
  args = parser.parse_args()
40
 
41
  IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
@@ -46,359 +36,211 @@ logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
46
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
47
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
48
 
49
- test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
50
- test_names = ["model.pth", "model.index"]
51
-
52
- for url, filename in zip(test_model.split(", "), test_names):
53
- try:
54
- download_manager(
55
- url=url,
56
- path=".",
57
- extension="",
58
- overwrite=False,
59
- progress=True,
60
- )
61
- if not os.path.isfile(filename):
62
- raise FileNotFoundError
63
- except Exception:
64
- with open(filename, "wb") as f:
65
- pass
66
 
67
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
68
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
69
- RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
70
  theme = args.theme
71
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
72
 
73
- PITCH_ALGO_OPT = [
74
- "pm",
75
- "harvest",
76
- "crepe",
77
- "rmvpe",
78
- "rmvpe+",
79
- ]
80
-
81
 
82
  async def get_voices_list(proxy=None):
83
- """Print all available voices."""
84
  from edge_tts import list_voices
85
  voices = await list_voices(proxy=proxy)
86
- voices = sorted(voices, key=lambda voice: voice["ShortName"])
87
-
88
- table = [
89
  {
90
- "ShortName": voice["ShortName"],
91
- "Gender": voice["Gender"],
92
- "ContentCategories": ", ".join(voice["VoiceTag"]["ContentCategories"]),
93
- "VoicePersonalities": ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
94
- "FriendlyName": voice["FriendlyName"],
95
  }
96
- for voice in voices
97
  ]
98
 
99
- return table
100
-
101
-
102
  def find_files(directory):
103
  file_paths = []
104
- for filename in os.listdir(directory):
105
- if filename.endswith('.pth') or filename.endswith('.zip') or filename.endswith('.index'):
106
- file_paths.append(os.path.join(directory, filename))
107
  return file_paths
108
 
109
-
110
  def unzip_in_folder(my_zip, my_dir):
111
- with zipfile.ZipFile(my_zip) as zip:
112
- for zip_info in zip.infolist():
113
- if zip_info.is_dir():
114
  continue
115
- zip_info.filename = os.path.basename(zip_info.filename)
116
- zip.extract(zip_info, my_dir)
117
-
118
 
119
  def find_my_model(a_, b_):
120
  if a_ is None or a_.endswith(".pth"):
121
  return a_, b_
122
-
123
- txt_files = []
124
- for base_file in [a_, b_]:
125
- if base_file is not None and base_file.endswith(".txt"):
126
- txt_files.append(base_file)
127
-
128
  directory = os.path.dirname(a_)
129
-
130
  for txt in txt_files:
131
- with open(txt, 'r') as file:
132
- first_line = file.readline()
133
-
134
- download_manager(
135
- url=first_line.strip(),
136
- path=directory,
137
- extension="",
138
- )
139
-
140
  for f in find_files(directory):
141
  if f.endswith(".zip"):
142
  unzip_in_folder(f, directory)
143
-
144
- model = None
145
- index = None
146
- end_files = find_files(directory)
147
-
148
- for ff in end_files:
149
  if ff.endswith(".pth"):
150
- model = os.path.join(directory, ff)
151
  gr.Info(f"Model found: {ff}")
152
  if ff.endswith(".index"):
153
- index = os.path.join(directory, ff)
154
  gr.Info(f"Index found: {ff}")
155
-
156
  if not model:
157
- gr.Error(f"Model not found in: {end_files}")
158
-
159
  if not index:
160
  gr.Warning("Index not found")
161
-
162
  return model, index
163
 
164
-
165
  def ensure_valid_file(url):
166
  if "huggingface" not in url:
167
- raise ValueError("Only downloads from Hugging Face are allowed")
168
-
169
- try:
170
- request = urllib.request.Request(url, method="HEAD")
171
- with urllib.request.urlopen(request) as response:
172
- content_length = response.headers.get("Content-Length")
173
-
174
- if content_length is None:
175
- raise ValueError("No Content-Length header found")
176
-
177
- file_size = int(content_length)
178
- if file_size > 900000000 and IS_ZERO_GPU:
179
- raise ValueError("The file is too large. Max allowed is 900 MB.")
180
-
181
- return file_size
182
-
183
- except Exception as e:
184
- raise e
185
-
186
 
187
  def clear_files(directory):
188
  time.sleep(15)
189
- print(f"Clearing files: {directory}.")
190
- shutil.rmtree(directory)
191
-
192
 
193
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
194
  if not url_data:
195
  return None, None
196
-
197
  if "," in url_data:
198
  a_, b_ = url_data.split(",")
199
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
200
  else:
201
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
202
-
203
  out_dir = "downloads"
204
- folder_download = str(random.randint(1000, 9999))
205
- directory = os.path.join(out_dir, folder_download)
206
  os.makedirs(directory, exist_ok=True)
207
-
208
  try:
209
- valid_url = [a_] if not b_ else [a_, b_]
210
- for link in valid_url:
211
  ensure_valid_file(link)
212
- download_manager(
213
- url=link,
214
- path=directory,
215
- extension="",
216
- )
217
-
218
  for f in find_files(directory):
219
  if f.endswith(".zip"):
220
  unzip_in_folder(f, directory)
221
-
222
- model = None
223
- index = None
224
- end_files = find_files(directory)
225
-
226
- for ff in end_files:
227
  if ff.endswith(".pth"):
228
  model = ff
229
- gr.Info(f"Model found: {ff}")
230
  if ff.endswith(".index"):
231
  index = ff
232
- gr.Info(f"Index found: {ff}")
233
-
234
  if not model:
235
- raise ValueError(f"Model not found in: {end_files}")
236
-
237
  if not index:
238
  gr.Warning("Index not found")
239
- else:
240
- index = os.path.abspath(index)
241
-
242
- return os.path.abspath(model), index
243
-
244
- except Exception as e:
245
- raise e
246
  finally:
247
- t = threading.Thread(target=clear_files, args=(directory,))
248
- t.start()
249
 
250
-
251
- # ========== नया फ़ंक्शन: logs/ फोल्डर से सभी मॉडल स्कैन करें ==========
252
  def scan_models():
253
- """logs/ फोल्डर के अंदर हर सबफोल्डर को एक मॉडल मानें और उसकी .pth व .index फाइलें ढूंढें।"""
254
  logs_dir = "logs"
255
  if not os.path.isdir(logs_dir):
256
  return []
257
-
258
  models = []
259
  for model_name in os.listdir(logs_dir):
260
  model_path = os.path.join(logs_dir, model_name)
261
  if not os.path.isdir(model_path):
262
  continue
263
-
264
- # फोल्डर के अंदर .pth और .index फाइलें देखें
265
  pth_files = [f for f in os.listdir(model_path) if f.endswith(".pth")]
266
- index_files = [f for f in os.listdir(model_path) if f.endswith(".index")]
267
-
268
- if pth_files and index_files:
269
- # पहली मिलने वाली फाइल ले लें (या आप नाम से मिलान कर सकते हैं)
270
  pth_path = os.path.join(model_path, pth_files[0])
271
- index_path = os.path.join(model_path, index_files[0])
272
- models.append((model_name, pth_path, index_path))
273
  return models
274
 
275
-
276
  def update_model_paths(model_name):
277
- """चुने गए मॉडल के अनुसार pth और index के पूरे पथ लौटाएँ।"""
278
- models = scan_models()
279
- for name, pth, idx in models:
280
  if name == model_name:
281
  return pth, idx
282
  return None, None
283
 
284
-
285
- # ========== ऑडियो इफेक्ट और कन्वर्जन फंक्शन (कोई बदलाव नहीं) ==========
286
-
287
  def add_audio_effects(audio_list, type_output):
288
- print("Audio effects")
289
-
290
  result = []
291
  for audio_path in audio_list:
292
  try:
293
- output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
294
-
295
- board = Pedalboard(
296
- [
297
- HighpassFilter(),
298
- Compressor(ratio=4, threshold_db=-15),
299
- Reverb(room_size=0.10, dry_level=0.8, wet_level=0.2, damping=0.7)
300
- ]
301
- )
302
-
303
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
304
-
305
  with AudioFile(audio_path) as f:
306
  with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
307
  while f.tell() < f.frames:
308
  chunk = f.read(int(f.samplerate))
309
- effected = board(chunk, f.samplerate, reset=False)
310
- o.write(effected)
311
-
312
- audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
313
- audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
314
-
315
  os.remove(temp_wav)
316
-
317
- result.append(output_path)
318
- except Exception as e:
319
- traceback.print_exc()
320
- print(f"Error audio effects: {str(e)}")
321
  result.append(audio_path)
322
-
323
  return result
324
 
325
-
326
  def apply_noisereduce(audio_list, type_output):
327
- print("Noise reduce")
328
-
329
  result = []
330
  for audio_path in audio_list:
331
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
332
-
333
  try:
334
  audio = AudioSegment.from_file(audio_path)
335
  samples = np.array(audio.get_array_of_samples())
336
- reduced_noise = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
337
-
338
  reduced_audio = AudioSegment(
339
- reduced_noise.tobytes(),
340
  frame_rate=audio.frame_rate,
341
  sample_width=audio.sample_width,
342
  channels=audio.channels
343
  )
344
-
345
- reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
346
  result.append(out_path)
347
-
348
- except Exception as e:
349
- traceback.print_exc()
350
- print(f"Error noisereduce: {str(e)}")
351
  result.append(audio_path)
352
-
353
  return result
354
 
355
-
356
  @spaces.GPU()
357
  def convert_now(audio_files, random_tag, converter, type_output, steps):
358
- for step in range(steps):
359
  audio_files = converter(
360
- audio_files,
361
- random_tag,
362
  overwrite=False,
363
  parallel_workers=(2 if IS_COLAB else 8),
364
- type_output=type_output,
365
  )
366
  return audio_files
367
 
368
-
369
  def run(
370
- audio_files,
371
- file_m,
372
- pitch_alg,
373
- pitch_lvl,
374
- file_index,
375
- index_inf,
376
- r_m_f,
377
- e_r,
378
- c_b_p,
379
- active_noise_reduce,
380
- audio_effects,
381
- type_output,
382
- steps,
383
  ):
384
  if not audio_files:
385
  raise ValueError("Please provide audio files")
386
-
387
  if isinstance(audio_files, str):
388
  audio_files = [audio_files]
389
-
390
- try:
391
- duration_base = librosa.get_duration(filename=audio_files[0])
392
- print("Duration:", duration_base)
393
- except Exception as e:
394
- print(e)
395
-
396
- if file_m is not None and file_m.endswith(".txt"):
397
  file_m, file_index = find_my_model(file_m, file_index)
398
- print(file_m, file_index)
399
-
400
  random_tag = "USER_" + str(random.randint(10000000, 99999999))
401
-
402
  converter.apply_conf(
403
  tag=random_tag,
404
  file_model=file_m,
@@ -412,341 +254,198 @@ def run(
412
  resample_sr=0,
413
  )
414
  time.sleep(0.1)
415
-
416
  result = convert_now(audio_files, random_tag, converter, type_output, steps)
417
-
418
  if active_noise_reduce:
419
  result = apply_noisereduce(result, type_output)
420
-
421
  if audio_effects:
422
  result = add_audio_effects(result, type_output)
423
-
424
  return result
425
 
426
-
427
- # ========== UI कॉन्फ़िगरेशन ==========
428
-
429
  def audio_conf():
430
- return gr.File(
431
- label="Audio files",
432
- file_count="multiple",
433
- type="filepath",
434
- container=True,
435
- )
436
-
437
 
438
  def model_dropdown_conf():
439
  models = scan_models()
440
  choices = [name for name, _, _ in models]
441
- return gr.Dropdown(
442
- label="Select Model",
443
- choices=choices,
444
- value=choices[0] if choices else None,
445
- interactive=True,
446
- )
447
-
448
 
449
  def hidden_model_path_conf():
450
  return gr.Textbox(visible=False)
451
 
452
-
453
  def hidden_index_path_conf():
454
  return gr.Textbox(visible=False)
455
 
456
-
457
  def pitch_algo_conf():
458
- return gr.Dropdown(
459
- PITCH_ALGO_OPT,
460
- value=PITCH_ALGO_OPT[4],
461
- label="Pitch algorithm",
462
- visible=True,
463
- interactive=True,
464
- )
465
-
466
 
467
  def pitch_lvl_conf():
468
- return gr.Slider(
469
- label="Pitch level",
470
- minimum=-24,
471
- maximum=24,
472
- step=1,
473
- value=0,
474
- visible=True,
475
- interactive=True,
476
- )
477
-
478
 
479
  def index_inf_conf():
480
- return gr.Slider(
481
- minimum=0,
482
- maximum=1,
483
- label="Index influence",
484
- value=0.75,
485
- )
486
-
487
 
488
  def respiration_filter_conf():
489
- return gr.Slider(
490
- minimum=0,
491
- maximum=7,
492
- label="Respiration median filtering",
493
- value=3,
494
- step=1,
495
- interactive=True,
496
- )
497
-
498
 
499
  def envelope_ratio_conf():
500
- return gr.Slider(
501
- minimum=0,
502
- maximum=1,
503
- label="Envelope ratio",
504
- value=0.25,
505
- interactive=True,
506
- )
507
-
508
 
509
  def consonant_protec_conf():
510
- return gr.Slider(
511
- minimum=0,
512
- maximum=0.5,
513
- label="Consonant breath protection",
514
- value=0.5,
515
- interactive=True,
516
- )
517
-
518
 
519
  def button_conf():
520
- return gr.Button(
521
- "Inference",
522
- variant="primary",
523
- )
524
-
525
 
526
  def output_conf():
527
- return gr.File(
528
- label="Result",
529
- file_count="multiple",
530
- interactive=False,
531
- )
532
-
533
 
534
  def active_tts_conf():
535
- return gr.Checkbox(
536
- False,
537
- label="TTS",
538
- container=False,
539
- )
540
-
541
-
542
- def tts_voice_conf():
543
- return gr.Dropdown(
544
- label="tts voice",
545
- choices=[],
546
- visible=False,
547
- value=None,
548
- )
549
 
 
 
550
 
551
  def tts_text_conf():
552
- return gr.Textbox(
553
- value="",
554
- placeholder="Write the text here...",
555
- label="Text",
556
- visible=False,
557
- lines=3,
558
- )
559
-
560
 
561
  def tts_button_conf():
562
- return gr.Button(
563
- "Process TTS",
564
- variant="secondary",
565
- visible=False,
566
- )
567
-
568
 
569
  def tts_play_conf():
570
- return gr.Checkbox(
571
- False,
572
- label="Play",
573
- container=False,
574
- visible=False,
575
- )
576
-
577
 
578
  def sound_gui():
579
- return gr.Audio(
580
- value=None,
581
- type="filepath",
582
- autoplay=True,
583
- visible=True,
584
- interactive=False,
585
- elem_id="audio_tts",
586
- )
587
-
588
 
589
  def steps_conf():
590
- return gr.Slider(
591
- minimum=1,
592
- maximum=3,
593
- label="Steps",
594
- value=1,
595
- step=1,
596
- interactive=True,
597
- )
598
-
599
 
600
  def format_output_gui():
601
- return gr.Dropdown(
602
- label="Format output:",
603
- choices=["wav", "mp3", "flac"],
604
- value="wav",
605
- )
606
-
607
 
608
  def denoise_conf():
609
- return gr.Checkbox(
610
- False,
611
- label="Denoise",
612
- container=False,
613
- visible=True,
614
- )
615
-
616
 
617
  def effects_conf():
618
- return gr.Checkbox(
619
- False,
620
- label="Reverb",
621
- container=False,
622
- visible=True,
623
- )
624
-
625
 
626
  def infer_tts_audio(tts_voice, tts_text, play_tts):
627
  out_dir = "output"
628
  folder_tts = "USER_" + str(random.randint(10000, 99999))
629
-
630
- os.makedirs(out_dir, exist_ok=True)
631
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
632
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
633
-
634
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
635
  if play_tts:
636
  return [out_path], out_path
637
  return [out_path], None
638
 
639
-
640
- def show_components_tts(value_active):
641
- return (
642
- gr.update(visible=value_active),
643
- gr.update(visible=value_active),
644
- gr.update(visible=value_active),
645
- gr.update(visible=value_active),
646
- )
647
-
648
 
649
  def down_active_conf():
650
- return gr.Checkbox(
651
- False,
652
- label="URL-to-Model",
653
- container=False,
654
- )
655
-
656
 
657
  def down_url_conf():
658
- return gr.Textbox(
659
- value="",
660
- placeholder="Write the url here...",
661
- label="Enter URL",
662
- visible=False,
663
- lines=1,
664
- )
665
-
666
 
667
  def down_button_conf():
668
- return gr.Button(
669
- "Process",
670
- variant="secondary",
671
- visible=False,
672
- )
673
-
674
-
675
- def show_components_down(value_active):
676
- return (
677
- gr.update(visible=value_active),
678
- gr.update(visible=value_active),
679
- gr.update(visible=value_active),
680
- )
681
 
 
 
682
 
683
  CSS = """
684
  #audio_tts {
685
- visibility: hidden;
686
- height: 0px;
687
- width: 0px;
688
- max-width: 0px;
689
- max-height: 0px;
690
  }
691
  """
692
 
693
-
694
- def get_gui(theme):
695
- with gr.Blocks(theme=theme, css=CSS, fill_width=True, fill_height=False, delete_cache=delete_cache_time) as app:
696
  gr.Markdown(title)
697
  gr.Markdown(description)
698
 
699
- # ===== TTS सेक्शन =====
700
  active_tts = active_tts_conf()
701
  with gr.Row():
702
  with gr.Column(scale=1):
703
  tts_text = tts_text_conf()
704
  with gr.Column(scale=2):
705
  with gr.Row():
706
- with gr.Column():
707
- with gr.Row():
708
- tts_voice = tts_voice_conf()
709
- tts_active_play = tts_play_conf()
710
-
711
  tts_button = tts_button_conf()
712
  tts_play = sound_gui()
713
 
714
- active_tts.change(
715
- fn=show_components_tts,
716
- inputs=[active_tts],
717
- outputs=[tts_voice, tts_text, tts_button, tts_active_play],
718
- )
719
-
720
  aud = audio_conf()
 
721
 
722
- tts_button.click(
723
- fn=infer_tts_audio,
724
- inputs=[tts_voice, tts_text, tts_active_play],
725
- outputs=[aud, tts_play],
726
- )
727
-
728
- # ===== URL-to-Model सेक्शन =====
729
- down_active_gui = down_active_conf()
730
  down_info = gr.Markdown(
731
- f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
732
  visible=False
733
  )
734
  with gr.Row():
735
- with gr.Column(scale=3):
736
- down_url_gui = down_url_conf()
737
- with gr.Column(scale=1):
738
- down_button_gui = down_button_conf()
739
 
740
- # ये हैं असली मॉडल पथ वाले छुपे हुए टेक्स्टबॉक्स
741
- hidden_model_path = hidden_model_path_conf()
742
- hidden_index_path = hidden_index_path_conf()
743
 
744
- down_active_gui.change(
745
- show_components_down,
746
- [down_active_gui],
747
- [down_info, down_url_gui, down_button_gui]
748
- )
749
 
750
- # जब URL से मॉडल डाउनलोड हो, तो उसके पथ छुपे हुए टेक्स्टबॉक्स में डालें
751
  def update_from_url(url_data):
752
- model_path, index_path = get_my_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  import asyncio
13
  import librosa
14
  import traceback
 
15
  from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
16
  from pedalboard.io import AudioFile
17
  from pydub import AudioSegment
 
24
  import sys
25
 
26
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
27
+ parser.add_argument('--share', action='store_true', help='Enable sharing mode')
28
+ parser.add_argument('--theme', type=str, default="aliabid94/new-theme", help='Set the theme')
 
 
 
 
 
 
 
 
 
29
  args = parser.parse_args()
30
 
31
  IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
 
36
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
37
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
38
 
39
+ # ========== डिफ़ॉल्ट मॉडल डाउनलोड को हटा दिया गया है ==========
40
+ # पहले यहाँ test_model डाउनलोड होता था, जिससे स्पेस हैंग हो जाता था। अब नहीं होगा।
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
43
+ description = "This demo is provided for educational and research purposes only." if IS_ZERO_GPU else ""
44
+ RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
45
  theme = args.theme
46
  delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
47
 
48
+ PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
 
 
 
 
 
 
 
49
 
50
  async def get_voices_list(proxy=None):
 
51
  from edge_tts import list_voices
52
  voices = await list_voices(proxy=proxy)
53
+ voices = sorted(voices, key=lambda v: v["ShortName"])
54
+ return [
 
55
  {
56
+ "ShortName": v["ShortName"],
57
+ "Gender": v["Gender"],
58
+ "ContentCategories": ", ".join(v["VoiceTag"]["ContentCategories"]),
59
+ "VoicePersonalities": ", ".join(v["VoiceTag"]["VoicePersonalities"]),
60
+ "FriendlyName": v["FriendlyName"],
61
  }
62
+ for v in voices
63
  ]
64
 
 
 
 
65
  def find_files(directory):
66
  file_paths = []
67
+ for fname in os.listdir(directory):
68
+ if fname.endswith(('.pth', '.zip', '.index')):
69
+ file_paths.append(os.path.join(directory, fname))
70
  return file_paths
71
 
 
72
  def unzip_in_folder(my_zip, my_dir):
73
+ with zipfile.ZipFile(my_zip) as zf:
74
+ for info in zf.infolist():
75
+ if info.is_dir():
76
  continue
77
+ info.filename = os.path.basename(info.filename)
78
+ zf.extract(info, my_dir)
 
79
 
80
  def find_my_model(a_, b_):
81
  if a_ is None or a_.endswith(".pth"):
82
  return a_, b_
83
+ txt_files = [f for f in [a_, b_] if f and f.endswith(".txt")]
 
 
 
 
 
84
  directory = os.path.dirname(a_)
 
85
  for txt in txt_files:
86
+ with open(txt) as f:
87
+ url = f.readline().strip()
88
+ download_manager(url=url, path=directory, extension="")
 
 
 
 
 
 
89
  for f in find_files(directory):
90
  if f.endswith(".zip"):
91
  unzip_in_folder(f, directory)
92
+ model = index = None
93
+ for ff in find_files(directory):
 
 
 
 
94
  if ff.endswith(".pth"):
95
+ model = ff
96
  gr.Info(f"Model found: {ff}")
97
  if ff.endswith(".index"):
98
+ index = ff
99
  gr.Info(f"Index found: {ff}")
 
100
  if not model:
101
+ gr.Error("Model not found")
 
102
  if not index:
103
  gr.Warning("Index not found")
 
104
  return model, index
105
 
 
106
  def ensure_valid_file(url):
107
  if "huggingface" not in url:
108
+ raise ValueError("Only Hugging Face URLs allowed")
109
+ req = urllib.request.Request(url, method="HEAD")
110
+ with urllib.request.urlopen(req) as resp:
111
+ size = int(resp.headers.get("Content-Length", 0))
112
+ if size > 900_000_000 and IS_ZERO_GPU:
113
+ raise ValueError("File too large for Zero GPU")
114
+ return size
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  def clear_files(directory):
117
  time.sleep(15)
118
+ shutil.rmtree(directory, ignore_errors=True)
 
 
119
 
120
  def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
121
  if not url_data:
122
  return None, None
 
123
  if "," in url_data:
124
  a_, b_ = url_data.split(",")
125
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
126
  else:
127
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
 
128
  out_dir = "downloads"
129
+ folder = str(random.randint(1000, 9999))
130
+ directory = os.path.join(out_dir, folder)
131
  os.makedirs(directory, exist_ok=True)
 
132
  try:
133
+ for link in [a_] if not b_ else [a_, b_]:
 
134
  ensure_valid_file(link)
135
+ download_manager(url=link, path=directory, extension="")
 
 
 
 
 
136
  for f in find_files(directory):
137
  if f.endswith(".zip"):
138
  unzip_in_folder(f, directory)
139
+ model = index = None
140
+ for ff in find_files(directory):
 
 
 
 
141
  if ff.endswith(".pth"):
142
  model = ff
 
143
  if ff.endswith(".index"):
144
  index = ff
 
 
145
  if not model:
146
+ raise ValueError("Model .pth not found")
 
147
  if not index:
148
  gr.Warning("Index not found")
149
+ return os.path.abspath(model), os.path.abspath(index) if index else None
 
 
 
 
 
 
150
  finally:
151
+ threading.Thread(target=clear_files, args=(directory,)).start()
 
152
 
153
+ # ========== logs/ फोल्डर से मॉडल स्कैन ==========
 
154
  def scan_models():
 
155
  logs_dir = "logs"
156
  if not os.path.isdir(logs_dir):
157
  return []
 
158
  models = []
159
  for model_name in os.listdir(logs_dir):
160
  model_path = os.path.join(logs_dir, model_name)
161
  if not os.path.isdir(model_path):
162
  continue
 
 
163
  pth_files = [f for f in os.listdir(model_path) if f.endswith(".pth")]
164
+ idx_files = [f for f in os.listdir(model_path) if f.endswith(".index")]
165
+ if pth_files and idx_files:
 
 
166
  pth_path = os.path.join(model_path, pth_files[0])
167
+ idx_path = os.path.join(model_path, idx_files[0])
168
+ models.append((model_name, pth_path, idx_path))
169
  return models
170
 
 
171
  def update_model_paths(model_name):
172
+ for name, pth, idx in scan_models():
 
 
173
  if name == model_name:
174
  return pth, idx
175
  return None, None
176
 
177
+ # ========== ऑडियो इफेक्ट ==========
 
 
178
  def add_audio_effects(audio_list, type_output):
 
 
179
  result = []
180
  for audio_path in audio_list:
181
  try:
182
+ out_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
183
+ board = Pedalboard([
184
+ HighpassFilter(),
185
+ Compressor(ratio=4, threshold_db=-15),
186
+ Reverb(room_size=0.1, dry_level=0.8, wet_level=0.2, damping=0.7)
187
+ ])
 
 
 
 
188
  temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
 
189
  with AudioFile(audio_path) as f:
190
  with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
191
  while f.tell() < f.frames:
192
  chunk = f.read(int(f.samplerate))
193
+ o.write(board(chunk, f.samplerate, reset=False))
194
+ AudioSegment.from_file(temp_wav).export(out_path, format=type_output)
 
 
 
 
195
  os.remove(temp_wav)
196
+ result.append(out_path)
197
+ except Exception:
 
 
 
198
  result.append(audio_path)
 
199
  return result
200
 
 
201
  def apply_noisereduce(audio_list, type_output):
 
 
202
  result = []
203
  for audio_path in audio_list:
204
  out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
 
205
  try:
206
  audio = AudioSegment.from_file(audio_path)
207
  samples = np.array(audio.get_array_of_samples())
208
+ reduced = nr.reduce_noise(samples, sr=audio.frame_rate, prop_decrease=0.6)
 
209
  reduced_audio = AudioSegment(
210
+ reduced.tobytes(),
211
  frame_rate=audio.frame_rate,
212
  sample_width=audio.sample_width,
213
  channels=audio.channels
214
  )
215
+ reduced_audio.export(out_path, format=type_output)
 
216
  result.append(out_path)
217
+ except Exception:
 
 
 
218
  result.append(audio_path)
 
219
  return result
220
 
 
221
  @spaces.GPU()
222
  def convert_now(audio_files, random_tag, converter, type_output, steps):
223
+ for _ in range(steps):
224
  audio_files = converter(
225
+ audio_files, random_tag,
 
226
  overwrite=False,
227
  parallel_workers=(2 if IS_COLAB else 8),
228
+ type_output=type_output
229
  )
230
  return audio_files
231
 
 
232
  def run(
233
+ audio_files, file_m, pitch_alg, pitch_lvl, file_index,
234
+ index_inf, r_m_f, e_r, c_b_p, active_noise_reduce,
235
+ audio_effects, type_output, steps
 
 
 
 
 
 
 
 
 
 
236
  ):
237
  if not audio_files:
238
  raise ValueError("Please provide audio files")
 
239
  if isinstance(audio_files, str):
240
  audio_files = [audio_files]
241
+ if file_m and file_m.endswith(".txt"):
 
 
 
 
 
 
 
242
  file_m, file_index = find_my_model(file_m, file_index)
 
 
243
  random_tag = "USER_" + str(random.randint(10000000, 99999999))
 
244
  converter.apply_conf(
245
  tag=random_tag,
246
  file_model=file_m,
 
254
  resample_sr=0,
255
  )
256
  time.sleep(0.1)
 
257
  result = convert_now(audio_files, random_tag, converter, type_output, steps)
 
258
  if active_noise_reduce:
259
  result = apply_noisereduce(result, type_output)
 
260
  if audio_effects:
261
  result = add_audio_effects(result, type_output)
 
262
  return result
263
 
264
+ # ========== UI कम्पोनेंट ==========
 
 
265
  def audio_conf():
266
+ return gr.File(label="Audio files", file_count="multiple", type="filepath")
 
 
 
 
 
 
267
 
268
  def model_dropdown_conf():
269
  models = scan_models()
270
  choices = [name for name, _, _ in models]
271
+ return gr.Dropdown(label="Select Model", choices=choices, value=choices[0] if choices else None, interactive=True)
 
 
 
 
 
 
272
 
273
  def hidden_model_path_conf():
274
  return gr.Textbox(visible=False)
275
 
 
276
  def hidden_index_path_conf():
277
  return gr.Textbox(visible=False)
278
 
 
279
  def pitch_algo_conf():
280
+ return gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch algorithm")
 
 
 
 
 
 
 
281
 
282
  def pitch_lvl_conf():
283
+ return gr.Slider(-24, 24, value=0, step=1, label="Pitch level")
 
 
 
 
 
 
 
 
 
284
 
285
  def index_inf_conf():
286
+ return gr.Slider(0, 1, value=0.75, label="Index influence")
 
 
 
 
 
 
287
 
288
  def respiration_filter_conf():
289
+ return gr.Slider(0, 7, value=3, step=1, label="Respiration median filtering")
 
 
 
 
 
 
 
 
290
 
291
  def envelope_ratio_conf():
292
+ return gr.Slider(0, 1, value=0.25, label="Envelope ratio")
 
 
 
 
 
 
 
293
 
294
  def consonant_protec_conf():
295
+ return gr.Slider(0, 0.5, value=0.5, label="Consonant breath protection")
 
 
 
 
 
 
 
296
 
297
  def button_conf():
298
+ return gr.Button("Inference", variant="primary")
 
 
 
 
299
 
300
  def output_conf():
301
+ return gr.File(label="Result", file_count="multiple", interactive=False)
 
 
 
 
 
302
 
303
  def active_tts_conf():
304
+ return gr.Checkbox(False, label="TTS", container=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
+ def tts_voice_conf(voices):
307
+ return gr.Dropdown(label="tts voice", choices=voices, visible=False)
308
 
309
  def tts_text_conf():
310
+ return gr.Textbox(placeholder="Write the text here...", label="Text", visible=False, lines=3)
 
 
 
 
 
 
 
311
 
312
  def tts_button_conf():
313
+ return gr.Button("Process TTS", variant="secondary", visible=False)
 
 
 
 
 
314
 
315
  def tts_play_conf():
316
+ return gr.Checkbox(False, label="Play", container=False, visible=False)
 
 
 
 
 
 
317
 
318
  def sound_gui():
319
+ return gr.Audio(type="filepath", autoplay=True, visible=True, interactive=False, elem_id="audio_tts")
 
 
 
 
 
 
 
 
320
 
321
  def steps_conf():
322
+ return gr.Slider(1, 3, value=1, step=1, label="Steps")
 
 
 
 
 
 
 
 
323
 
324
  def format_output_gui():
325
+ return gr.Dropdown(choices=["wav", "mp3", "flac"], value="wav", label="Format output")
 
 
 
 
 
326
 
327
  def denoise_conf():
328
+ return gr.Checkbox(False, label="Denoise", container=False)
 
 
 
 
 
 
329
 
330
  def effects_conf():
331
+ return gr.Checkbox(False, label="Reverb", container=False)
 
 
 
 
 
 
332
 
333
  def infer_tts_audio(tts_voice, tts_text, play_tts):
334
  out_dir = "output"
335
  folder_tts = "USER_" + str(random.randint(10000, 99999))
 
 
336
  os.makedirs(os.path.join(out_dir, folder_tts), exist_ok=True)
337
  out_path = os.path.join(out_dir, folder_tts, "tts.mp3")
 
338
  asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(out_path))
339
  if play_tts:
340
  return [out_path], out_path
341
  return [out_path], None
342
 
343
+ def show_components_tts(val):
344
+ return (gr.update(visible=val),) * 4
 
 
 
 
 
 
 
345
 
346
  def down_active_conf():
347
+ return gr.Checkbox(False, label="URL-to-Model", container=False)
 
 
 
 
 
348
 
349
  def down_url_conf():
350
+ return gr.Textbox(placeholder="Write the url here...", label="Enter URL", visible=False)
 
 
 
 
 
 
 
351
 
352
  def down_button_conf():
353
+ return gr.Button("Process", variant="secondary", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
+ def show_components_down(val):
356
+ return (gr.update(visible=val),) * 3
357
 
358
  CSS = """
359
  #audio_tts {
360
+ visibility: hidden; height: 0px; width: 0px; max-width: 0px; max-height: 0px;
 
 
 
 
361
  }
362
  """
363
 
364
+ def get_gui(theme, voices):
365
+ with gr.Blocks(theme=theme, css=CSS, delete_cache=delete_cache_time) as app:
 
366
  gr.Markdown(title)
367
  gr.Markdown(description)
368
 
 
369
  active_tts = active_tts_conf()
370
  with gr.Row():
371
  with gr.Column(scale=1):
372
  tts_text = tts_text_conf()
373
  with gr.Column(scale=2):
374
  with gr.Row():
375
+ tts_voice = tts_voice_conf(voices)
376
+ tts_active_play = tts_play_conf()
 
 
 
377
  tts_button = tts_button_conf()
378
  tts_play = sound_gui()
379
 
380
+ active_tts.change(show_components_tts, [active_tts], [tts_voice, tts_text, tts_button, tts_active_play])
 
 
 
 
 
381
  aud = audio_conf()
382
+ tts_button.click(infer_tts_audio, [tts_voice, tts_text, tts_active_play], [aud, tts_play])
383
 
384
+ down_active = down_active_conf()
 
 
 
 
 
 
 
385
  down_info = gr.Markdown(
386
+ "Provide a link to a zip file, or separate links with comma for .pth and .index files.",
387
  visible=False
388
  )
389
  with gr.Row():
390
+ down_url = down_url_conf()
391
+ down_button = down_button_conf()
 
 
392
 
393
+ hidden_model = hidden_model_path_conf()
394
+ hidden_index = hidden_index_path_conf()
 
395
 
396
+ down_active.change(show_components_down, [down_active], [down_info, down_url, down_button])
 
 
 
 
397
 
 
398
  def update_from_url(url_data):
399
+ model_p, index_p = get_my_model(url_data)
400
+ return model_p, index_p
401
+
402
+ down_button.click(update_from_url, [down_url], [hidden_model, hidden_index])
403
+
404
+ model_dropdown = model_dropdown_conf()
405
+
406
+ def on_model_select(name):
407
+ return update_model_paths(name)
408
+
409
+ model_dropdown.change(on_model_select, [model_dropdown], [hidden_model, hidden_index])
410
+
411
+ with gr.Accordion("Advanced settings", open=False):
412
+ algo = pitch_algo_conf()
413
+ algo_lvl = pitch_lvl_conf()
414
+ idx_inf = index_inf_conf()
415
+ res_fc = respiration_filter_conf()
416
+ env_r = envelope_ratio_conf()
417
+ cons = consonant_protec_conf()
418
+ steps_gui = steps_conf()
419
+ fmt_out = format_output_gui()
420
+ with gr.Row():
421
+ denoise_gui = denoise_conf()
422
+ effects_gui = effects_conf()
423
+
424
+ btn = button_conf()
425
+ out = output_conf()
426
+
427
+ btn.click(
428
+ run,
429
+ inputs=[
430
+ aud, hidden_model, algo, algo_lvl, hidden_index,
431
+ idx_inf, res_fc, env_r, cons,
432
+ denoise_gui, effects_gui, fmt_out, steps_gui
433
+ ],
434
+ outputs=out
435
+ )
436
+
437
+ gr.Markdown(RESOURCES)
438
+
439
+ return app
440
+
441
+ if __name__ == "__main__":
442
+ tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
443
+ voices = sorted([
444
+ (" - ".join(reversed(v["FriendlyName"].split("-"))).replace("Microsoft ", "").replace("Online (Natural)", f"({v['Gender']})").strip(),
445
+ f"{v['ShortName']}-{v['Gender']}")
446
+ for v in tts_voice_list
447
+ ])
448
+
449
+ app = get_gui(theme, voices)
450
+ app.queue(default_concurrency_limit=40)
451
+ app.launch(max_threads=40, share=IS_COLAB, show_error=True, quiet=False, debug=IS_COLAB, ssr_mode=False)