Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +54 -172
  3. pre-requirements.txt +0 -2
  4. requirements.txt +4 -11
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.43.1
8
  app_file: app.py
9
  license: mit
10
  pinned: true
 
4
  colorFrom: gray
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.28.3
8
  app_file: app.py
9
  license: mit
10
  pinned: true
app.py CHANGED
@@ -6,7 +6,7 @@ import random
6
  import logging
7
  import time
8
  import soundfile as sf
9
- from infer_rvc_python.main import download_manager, load_hu_bert, Config
10
  import zipfile
11
  import edge_tts
12
  import asyncio
@@ -21,57 +21,14 @@ import numpy as np
21
  import urllib.request
22
  import shutil
23
  import threading
24
- import argparse
25
- import sys
26
- import torch
27
- import fairseq
28
-
29
- parser = argparse.ArgumentParser(description="Run the app with optional sharing")
30
- parser.add_argument(
31
- '--share',
32
- action='store_true',
33
- help='Enable sharing mode'
34
- )
35
- parser.add_argument(
36
- '--theme',
37
- type=str,
38
- default="aliabid94/new-theme",
39
- help='Set the theme (default: aliabid94/new-theme)'
40
- )
41
- args = parser.parse_args()
42
-
43
- IS_COLAB = True if ('google.colab' in sys.modules or args.share) else False
44
- IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
45
 
46
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
47
 
48
- torch.serialization.add_safe_globals([fairseq.data.dictionary.Dictionary])
49
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
50
- converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
51
-
52
- test_model = "https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/Aldeano_Minecraft__RVC_V2_-_500_Epochs_/resolve/main/model.index?download=true"
53
- test_names = ["model.pth", "model.index"]
54
-
55
- for url, filename in zip(test_model.split(", "), test_names):
56
- try:
57
- download_manager(
58
- url=url,
59
- path=".",
60
- extension="",
61
- overwrite=False,
62
- progress=True,
63
- )
64
- if not os.path.isfile(filename):
65
- raise FileNotFoundError
66
- except Exception:
67
- with open(filename, "wb") as f:
68
- pass
69
 
70
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
71
- description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately." if IS_ZERO_GPU else ""
72
- RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier, which provides free GPU [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
73
- theme = args.theme
74
- delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
75
 
76
  PITCH_ALGO_OPT = [
77
  "pm",
@@ -82,26 +39,6 @@ PITCH_ALGO_OPT = [
82
  ]
83
 
84
 
85
- async def get_voices_list(proxy=None):
86
- """Print all available voices."""
87
- from edge_tts import list_voices
88
- voices = await list_voices(proxy=proxy)
89
- voices = sorted(voices, key=lambda voice: voice["ShortName"])
90
-
91
- table = [
92
- {
93
- "ShortName": voice["ShortName"],
94
- "Gender": voice["Gender"],
95
- "ContentCategories": ", ".join(voice["VoiceTag"]["ContentCategories"]),
96
- "VoicePersonalities": ", ".join(voice["VoiceTag"]["VoicePersonalities"]),
97
- "FriendlyName": voice["FriendlyName"],
98
- }
99
- for voice in voices
100
- ]
101
-
102
- return table
103
-
104
-
105
  def find_files(directory):
106
  file_paths = []
107
  for filename in os.listdir(directory):
@@ -169,24 +106,19 @@ def find_my_model(a_, b_):
169
  return model, index
170
 
171
 
172
- def ensure_valid_file(url):
 
173
  if "huggingface" not in url:
174
  raise ValueError("Only downloads from Hugging Face are allowed")
175
 
176
  try:
177
- request = urllib.request.Request(url, method="HEAD")
178
- with urllib.request.urlopen(request) as response:
179
- content_length = response.headers.get("Content-Length")
180
-
181
- if content_length is None:
182
- raise ValueError("No Content-Length header found")
183
 
184
  file_size = int(content_length)
185
- # print("debug", url, file_size)
186
- if file_size > 900000000 and IS_ZERO_GPU:
187
- raise ValueError("The file is too large. Max allowed is 900 MB.")
188
-
189
- return file_size
190
 
191
  except Exception as e:
192
  raise e
@@ -198,13 +130,13 @@ def clear_files(directory):
198
  shutil.rmtree(directory)
199
 
200
 
201
- def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
202
 
203
  if not url_data:
204
  return None, None
205
 
206
  if "," in url_data:
207
- a_, b_ = url_data.split(",")
208
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
209
  else:
210
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
@@ -215,9 +147,12 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
215
  os.makedirs(directory, exist_ok=True)
216
 
217
  try:
 
 
 
 
218
  valid_url = [a_] if not b_ else [a_, b_]
219
  for link in valid_url:
220
- ensure_valid_file(link)
221
  download_manager(
222
  url=link,
223
  path=directory,
@@ -259,13 +194,13 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
259
  t.start()
260
 
261
 
262
- def add_audio_effects(audio_list, type_output):
263
  print("Audio effects")
264
 
265
  result = []
266
  for audio_path in audio_list:
267
  try:
268
- output_path = f'{os.path.splitext(audio_path)[0]}_effects.{type_output}'
269
 
270
  # Initialize audio effects plugins
271
  board = Pedalboard(
@@ -276,23 +211,13 @@ def add_audio_effects(audio_list, type_output):
276
  ]
277
  )
278
 
279
- # Temporary WAV to hold processed data before exporting
280
- temp_wav = f'{os.path.splitext(audio_path)[0]}_temp.wav'
281
-
282
  with AudioFile(audio_path) as f:
283
- with AudioFile(temp_wav, 'w', f.samplerate, f.num_channels) as o:
 
284
  while f.tell() < f.frames:
285
  chunk = f.read(int(f.samplerate))
286
  effected = board(chunk, f.samplerate, reset=False)
287
  o.write(effected)
288
-
289
- # Convert with pydub to desired output type
290
- audio_seg = AudioSegment.from_file(temp_wav, format=type_output)
291
- audio_seg.export(output_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
292
-
293
- # Clean up temp file
294
- os.remove(temp_wav)
295
-
296
  result.append(output_path)
297
  except Exception as e:
298
  traceback.print_exc()
@@ -302,13 +227,13 @@ def add_audio_effects(audio_list, type_output):
302
  return result
303
 
304
 
305
- def apply_noisereduce(audio_list, type_output):
306
  # https://github.com/sa-if/Audio-Denoiser
307
  print("Noice reduce")
308
 
309
  result = []
310
  for audio_path in audio_list:
311
- out_path = f"{os.path.splitext(audio_path)[0]}_noisereduce.{type_output}"
312
 
313
  try:
314
  # Load audio file
@@ -329,7 +254,7 @@ def apply_noisereduce(audio_list, type_output):
329
  )
330
 
331
  # Save reduced audio to file
332
- reduced_audio.export(out_path, format=type_output, bitrate=("320k" if type_output == "mp3" else None))
333
  result.append(out_path)
334
 
335
  except Exception as e:
@@ -341,17 +266,13 @@ def apply_noisereduce(audio_list, type_output):
341
 
342
 
343
  @spaces.GPU()
344
- def convert_now(audio_files, random_tag, converter, type_output, steps):
345
- for step in range(steps):
346
- audio_files = converter(
347
- audio_files,
348
- random_tag,
349
- overwrite=False,
350
- parallel_workers=(2 if IS_COLAB else 8),
351
- type_output=type_output,
352
- )
353
-
354
- return audio_files
355
 
356
 
357
  def run(
@@ -366,8 +287,6 @@ def run(
366
  c_b_p,
367
  active_noise_reduce,
368
  audio_effects,
369
- type_output,
370
- steps,
371
  ):
372
  if not audio_files:
373
  raise ValueError("The audio pls")
@@ -397,17 +316,17 @@ def run(
397
  respiration_median_filtering=r_m_f,
398
  envelope_ratio=e_r,
399
  consonant_breath_protection=c_b_p,
400
- resample_sr=0,
401
  )
402
  time.sleep(0.1)
403
 
404
- result = convert_now(audio_files, random_tag, converter, type_output, steps)
405
 
406
  if active_noise_reduce:
407
- result = apply_noisereduce(result, type_output)
408
 
409
  if audio_effects:
410
- result = add_audio_effects(result, type_output)
411
 
412
  return result
413
 
@@ -566,30 +485,10 @@ def sound_gui():
566
  type="filepath",
567
  # format="mp3",
568
  autoplay=True,
569
- visible=True,
570
- interactive=False,
571
- elem_id="audio_tts",
572
- )
573
-
574
-
575
- def steps_conf():
576
- return gr.Slider(
577
- minimum=1,
578
- maximum=3,
579
- label="Steps",
580
- value=1,
581
- step=1,
582
- interactive=True,
583
  )
584
 
585
 
586
- def format_output_gui():
587
- return gr.Dropdown(
588
- label="Format output:",
589
- choices=["wav", "mp3", "flac"],
590
- value="wav",
591
- )
592
-
593
  def denoise_conf():
594
  return gr.Checkbox(
595
  False,
@@ -672,18 +571,9 @@ def show_components_down(value_active):
672
  visible=value_active
673
  )
674
 
675
- CSS = """
676
- #audio_tts {
677
- visibility: hidden; /* invisible but still takes space */
678
- height: 0px;
679
- width: 0px;
680
- max-width: 0px;
681
- max-height: 0px;
682
- }
683
- """
684
 
685
  def get_gui(theme):
686
- with gr.Blocks(theme=theme, css=CSS, fill_width=True, fill_height=False, delete_cache=delete_cache_time) as app:
687
  gr.Markdown(title)
688
  gr.Markdown(description)
689
 
@@ -718,7 +608,7 @@ def get_gui(theme):
718
 
719
  down_active_gui = down_active_conf()
720
  down_info = gr.Markdown(
721
- f"Provide a link to a zip file, like this one: `https://huggingface.co/MrDawg/ToothBrushing/resolve/main/ToothBrushing.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `{test_model}`",
722
  visible=False
723
  )
724
  with gr.Row():
@@ -744,20 +634,17 @@ def get_gui(theme):
744
  [model, indx]
745
  )
746
 
747
- with gr.Accordion(label="Advanced settings", open=False):
748
- algo = pitch_algo_conf()
749
- algo_lvl = pitch_lvl_conf()
750
- indx_inf = index_inf_conf()
751
- res_fc = respiration_filter_conf()
752
- envel_r = envelope_ratio_conf()
753
- const = consonant_protec_conf()
754
- steps_gui = steps_conf()
755
- format_out = format_output_gui()
756
- with gr.Row():
757
- with gr.Column():
758
- with gr.Row():
759
- denoise_gui = denoise_conf()
760
- effects_gui = effects_conf()
761
  button_base = button_conf()
762
  output_base = output_conf()
763
 
@@ -775,8 +662,6 @@ def get_gui(theme):
775
  const,
776
  denoise_gui,
777
  effects_gui,
778
- format_out,
779
- steps_gui,
780
  ],
781
  outputs=[output_base],
782
  )
@@ -833,17 +718,14 @@ def get_gui(theme):
833
  outputs=[output_base],
834
  cache_examples=False,
835
  )
836
- gr.Markdown(RESOURCES)
837
 
838
  return app
839
 
840
 
841
  if __name__ == "__main__":
842
- tts_voice_list = asyncio.new_event_loop().run_until_complete(get_voices_list(proxy=None))
843
- voices = sorted([
844
- (" - ".join(reversed(v["FriendlyName"].split("-"))).replace("Microsoft ", "").replace("Online (Natural)", f"({v['Gender']})").strip(), f"{v['ShortName']}-{v['Gender']}")
845
- for v in tts_voice_list
846
- ])
847
 
848
  app = get_gui(theme)
849
 
@@ -851,9 +733,9 @@ if __name__ == "__main__":
851
 
852
  app.launch(
853
  max_threads=40,
854
- share=IS_COLAB,
855
  show_error=True,
856
  quiet=False,
857
- debug=IS_COLAB,
858
- ssr_mode=False,
859
  )
 
6
  import logging
7
  import time
8
  import soundfile as sf
9
+ from infer_rvc_python.main import download_manager
10
  import zipfile
11
  import edge_tts
12
  import asyncio
 
21
  import urllib.request
22
  import shutil
23
  import threading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
26
 
 
27
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
30
+ description = "This demo is provided for educational and research purposes only. The authors and contributors of this project do not endorse or encourage any misuse or unethical use of this software. Any use of this software for purposes other than those intended is solely at the user's own risk. The authors and contributors shall not be held responsible for any damages or liabilities arising from the use of this demo inappropriately."
31
+ theme = "aliabid94/new-theme"
 
 
32
 
33
  PITCH_ALGO_OPT = [
34
  "pm",
 
39
  ]
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def find_files(directory):
43
  file_paths = []
44
  for filename in os.listdir(directory):
 
106
  return model, index
107
 
108
 
109
+ def get_file_size(url):
110
+
111
  if "huggingface" not in url:
112
  raise ValueError("Only downloads from Hugging Face are allowed")
113
 
114
  try:
115
+ with urllib.request.urlopen(url) as response:
116
+ info = response.info()
117
+ content_length = info.get("Content-Length")
 
 
 
118
 
119
  file_size = int(content_length)
120
+ if file_size > 500000000:
121
+ raise ValueError("The file is too large. You can only download files up to 500 MB in size.")
 
 
 
122
 
123
  except Exception as e:
124
  raise e
 
130
  shutil.rmtree(directory)
131
 
132
 
133
+ def get_my_model(url_data):
134
 
135
  if not url_data:
136
  return None, None
137
 
138
  if "," in url_data:
139
+ a_, b_ = url_data.split()
140
  a_, b_ = a_.strip().replace("/blob/", "/resolve/"), b_.strip().replace("/blob/", "/resolve/")
141
  else:
142
  a_, b_ = url_data.strip().replace("/blob/", "/resolve/"), None
 
147
  os.makedirs(directory, exist_ok=True)
148
 
149
  try:
150
+ get_file_size(a_)
151
+ if b_:
152
+ get_file_size(b_)
153
+
154
  valid_url = [a_] if not b_ else [a_, b_]
155
  for link in valid_url:
 
156
  download_manager(
157
  url=link,
158
  path=directory,
 
194
  t.start()
195
 
196
 
197
+ def add_audio_effects(audio_list):
198
  print("Audio effects")
199
 
200
  result = []
201
  for audio_path in audio_list:
202
  try:
203
+ output_path = f'{os.path.splitext(audio_path)[0]}_effects.wav'
204
 
205
  # Initialize audio effects plugins
206
  board = Pedalboard(
 
211
  ]
212
  )
213
 
 
 
 
214
  with AudioFile(audio_path) as f:
215
+ with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
216
+ # Read one second of audio at a time, until the file is empty:
217
  while f.tell() < f.frames:
218
  chunk = f.read(int(f.samplerate))
219
  effected = board(chunk, f.samplerate, reset=False)
220
  o.write(effected)
 
 
 
 
 
 
 
 
221
  result.append(output_path)
222
  except Exception as e:
223
  traceback.print_exc()
 
227
  return result
228
 
229
 
230
+ def apply_noisereduce(audio_list):
231
  # https://github.com/sa-if/Audio-Denoiser
232
  print("Noice reduce")
233
 
234
  result = []
235
  for audio_path in audio_list:
236
+ out_path = f'{os.path.splitext(audio_path)[0]}_noisereduce.wav'
237
 
238
  try:
239
  # Load audio file
 
254
  )
255
 
256
  # Save reduced audio to file
257
+ reduced_audio.export(out_path, format="wav")
258
  result.append(out_path)
259
 
260
  except Exception as e:
 
266
 
267
 
268
  @spaces.GPU()
269
+ def convert_now(audio_files, random_tag, converter):
270
+ return converter(
271
+ audio_files,
272
+ random_tag,
273
+ overwrite=False,
274
+ parallel_workers=8
275
+ )
 
 
 
 
276
 
277
 
278
  def run(
 
287
  c_b_p,
288
  active_noise_reduce,
289
  audio_effects,
 
 
290
  ):
291
  if not audio_files:
292
  raise ValueError("The audio pls")
 
316
  respiration_median_filtering=r_m_f,
317
  envelope_ratio=e_r,
318
  consonant_breath_protection=c_b_p,
319
+ resample_sr=44100 if audio_files[0].endswith('.mp3') else 0,
320
  )
321
  time.sleep(0.1)
322
 
323
+ result = convert_now(audio_files, random_tag, converter)
324
 
325
  if active_noise_reduce:
326
+ result = apply_noisereduce(result)
327
 
328
  if audio_effects:
329
+ result = add_audio_effects(result)
330
 
331
  return result
332
 
 
485
  type="filepath",
486
  # format="mp3",
487
  autoplay=True,
488
+ visible=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  )
490
 
491
 
 
 
 
 
 
 
 
492
  def denoise_conf():
493
  return gr.Checkbox(
494
  False,
 
571
  visible=value_active
572
  )
573
 
 
 
 
 
 
 
 
 
 
574
 
575
  def get_gui(theme):
576
+ with gr.Blocks(theme=theme, delete_cache=(3200, 3200)) as app:
577
  gr.Markdown(title)
578
  gr.Markdown(description)
579
 
 
608
 
609
  down_active_gui = down_active_conf()
610
  down_info = gr.Markdown(
611
+ "Provide a link to a zip file, like this one: `https://huggingface.co/mrmocciai/Models/resolve/main/Genshin%20Impact/ayaka-v2.zip?download=true`, or separate links with a comma for the .pth and .index files, like this: `https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.pth?download=true, https://huggingface.co/sail-rvc/ayaka-jp/resolve/main/model.index?download=true`",
612
  visible=False
613
  )
614
  with gr.Row():
 
634
  [model, indx]
635
  )
636
 
637
+ algo = pitch_algo_conf()
638
+ algo_lvl = pitch_lvl_conf()
639
+ indx_inf = index_inf_conf()
640
+ res_fc = respiration_filter_conf()
641
+ envel_r = envelope_ratio_conf()
642
+ const = consonant_protec_conf()
643
+ with gr.Row():
644
+ with gr.Column():
645
+ with gr.Row():
646
+ denoise_gui = denoise_conf()
647
+ effects_gui = effects_conf()
 
 
 
648
  button_base = button_conf()
649
  output_base = output_conf()
650
 
 
662
  const,
663
  denoise_gui,
664
  effects_gui,
 
 
665
  ],
666
  outputs=[output_base],
667
  )
 
718
  outputs=[output_base],
719
  cache_examples=False,
720
  )
 
721
 
722
  return app
723
 
724
 
725
  if __name__ == "__main__":
726
+
727
+ tts_voice_list = asyncio.new_event_loop().run_until_complete(edge_tts.list_voices())
728
+ voices = sorted([f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list])
 
 
729
 
730
  app = get_gui(theme)
731
 
 
733
 
734
  app.launch(
735
  max_threads=40,
736
+ share=False,
737
  show_error=True,
738
  quiet=False,
739
+ debug=False,
740
+ allowed_paths=["./downloads/"],
741
  )
pre-requirements.txt DELETED
@@ -1,2 +0,0 @@
1
- pip==23.0.1
2
- Setuptools<=80.6.0
 
 
 
requirements.txt CHANGED
@@ -1,13 +1,6 @@
1
- torch==2.9.1
2
- torchvision==0.24.1
3
- torchaudio==2.9.1
4
- infer-rvc-python
5
- edge_tts==7.2.7
6
  pedalboard
7
  noisereduce
8
- numpy==1.23.5
9
- transformers<=4.48.3
10
- # pydantic==2.10.6
11
- gradio==5.43.1
12
- spaces
13
- matplotlib-inline
 
1
+ torch==2.2.0
2
+ infer-rvc-python==1.1.0
3
+ edge-tts
 
 
4
  pedalboard
5
  noisereduce
6
+ numpy==1.23.5