Singhp08 commited on
Commit
8ffa95c
·
verified ·
1 Parent(s): 1bf63c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -42
app.py CHANGED
@@ -23,6 +23,7 @@ import threading
23
  import argparse
24
  import sys
25
 
 
26
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
27
  parser.add_argument('--share', action='store_true', help='Enable sharing mode')
28
  parser.add_argument('--theme', type=str, default="aliabid94/new-theme", help='Set the theme')
@@ -33,12 +34,10 @@ IS_ZERO_GPU = os.getenv("SPACES_ZERO_GPU")
33
 
34
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
35
 
 
36
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
37
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
38
 
39
- # ========== डिफ़ॉल्ट मॉडल डाउनलोड को हटा दिया गया है ==========
40
- # पहले यहाँ test_model डाउनलोड होता था, जिससे स्पेस हैंग हो जाता था। अब नहीं होगा।
41
-
42
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
43
  description = "This demo is provided for educational and research purposes only." if IS_ZERO_GPU else ""
44
  RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
@@ -47,6 +46,7 @@ delete_cache_time = (3200, 3200) if IS_ZERO_GPU else (86400, 86400)
47
 
48
  PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
49
 
 
50
  async def get_voices_list(proxy=None):
51
  from edge_tts import list_voices
52
  voices = await list_voices(proxy=proxy)
@@ -62,6 +62,7 @@ async def get_voices_list(proxy=None):
62
  for v in voices
63
  ]
64
 
 
65
  def find_files(directory):
66
  file_paths = []
67
  for fname in os.listdir(directory):
@@ -150,31 +151,56 @@ def get_my_model(url_data, progress=gr.Progress(track_tqdm=True)):
150
  finally:
151
  threading.Thread(target=clear_files, args=(directory,)).start()
152
 
153
- # ========== logs/ फोल्डर से मॉडल स्कैन ==========
154
  def scan_models():
 
 
 
 
 
155
  logs_dir = "logs"
156
  if not os.path.isdir(logs_dir):
157
  return []
 
158
  models = []
159
- for model_name in os.listdir(logs_dir):
160
- model_path = os.path.join(logs_dir, model_name)
161
- if not os.path.isdir(model_path):
162
- continue
163
- pth_files = [f for f in os.listdir(model_path) if f.endswith(".pth")]
164
- idx_files = [f for f in os.listdir(model_path) if f.endswith(".index")]
165
- if pth_files and idx_files:
166
- pth_path = os.path.join(model_path, pth_files[0])
167
- idx_path = os.path.join(model_path, idx_files[0])
168
- models.append((model_name, pth_path, idx_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  return models
170
 
171
- def update_model_paths(model_name):
172
- for name, pth, idx in scan_models():
173
- if name == model_name:
174
- return pth, idx
 
 
 
 
175
  return None, None
176
 
177
- # ========== ऑडियो इफेक्ट ==========
178
  def add_audio_effects(audio_list, type_output):
179
  result = []
180
  for audio_path in audio_list:
@@ -244,13 +270,13 @@ def run(
244
  type_output,
245
  steps,
246
  ):
247
- # डीबग जानकारी प्रिंट करें
248
  print("DEBUG: file_m =", file_m)
249
  print("DEBUG: file_index =", file_index)
250
 
251
  if not audio_files:
252
  raise ValueError("Please provide audio files")
253
 
 
254
  if isinstance(audio_files, str):
255
  audio_files = [audio_files]
256
 
@@ -290,14 +316,36 @@ def run(
290
 
291
  return result
292
 
293
- # ========== UI कम्पोनेंट ==========
294
- def audio_conf():
295
- return gr.File(label="Audio files", file_count="multiple", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  def model_dropdown_conf():
298
  models = scan_models()
299
- choices = [name for name, _, _ in models]
300
- return gr.Dropdown(label="Select Model", choices=choices, value=choices[0] if choices else None, interactive=True)
 
 
 
 
 
301
 
302
  def hidden_model_path_conf():
303
  return gr.Textbox(visible=False)
@@ -324,16 +372,16 @@ def consonant_protec_conf():
324
  return gr.Slider(0, 0.5, value=0.5, label="Consonant breath protection")
325
 
326
  def button_conf():
327
- return gr.Button("Inference", variant="primary")
328
 
329
  def output_conf():
330
- return gr.File(label="Result", file_count="multiple", interactive=False)
331
 
332
  def active_tts_conf():
333
- return gr.Checkbox(False, label="TTS", container=False)
334
 
335
  def tts_voice_conf(voices):
336
- return gr.Dropdown(label="tts voice", choices=voices, visible=False)
337
 
338
  def tts_text_conf():
339
  return gr.Textbox(placeholder="Write the text here...", label="Text", visible=False, lines=3)
@@ -354,11 +402,12 @@ def format_output_gui():
354
  return gr.Dropdown(choices=["wav", "mp3", "flac"], value="wav", label="Format output")
355
 
356
  def denoise_conf():
357
- return gr.Checkbox(False, label="Denoise", container=False)
358
 
359
  def effects_conf():
360
- return gr.Checkbox(False, label="Reverb", container=False)
361
 
 
362
  def infer_tts_audio(tts_voice, tts_text, play_tts):
363
  out_dir = "output"
364
  folder_tts = "USER_" + str(random.randint(10000, 99999))
@@ -373,7 +422,7 @@ def show_components_tts(val):
373
  return (gr.update(visible=val),) * 4
374
 
375
  def down_active_conf():
376
- return gr.Checkbox(False, label="URL-to-Model", container=False)
377
 
378
  def down_url_conf():
379
  return gr.Textbox(placeholder="Write the url here...", label="Enter URL", visible=False)
@@ -384,6 +433,7 @@ def down_button_conf():
384
  def show_components_down(val):
385
  return (gr.update(visible=val),) * 3
386
 
 
387
  CSS = """
388
  #audio_tts {
389
  visibility: hidden; height: 0px; width: 0px; max-width: 0px; max-height: 0px;
@@ -395,6 +445,7 @@ def get_gui(theme, voices):
395
  gr.Markdown(title)
396
  gr.Markdown(description)
397
 
 
398
  active_tts = active_tts_conf()
399
  with gr.Row():
400
  with gr.Column(scale=1):
@@ -407,9 +458,17 @@ def get_gui(theme, voices):
407
  tts_play = sound_gui()
408
 
409
  active_tts.change(show_components_tts, [active_tts], [tts_voice, tts_text, tts_button, tts_active_play])
410
- aud = audio_conf()
411
- tts_button.click(infer_tts_audio, [tts_voice, tts_text, tts_active_play], [aud, tts_play])
412
 
 
 
 
 
 
 
 
 
 
 
413
  down_active = down_active_conf()
414
  down_info = gr.Markdown(
415
  "Provide a link to a zip file, or separate links with comma for .pth and .index files.",
@@ -430,14 +489,12 @@ def get_gui(theme, voices):
430
 
431
  down_button.click(update_from_url, [down_url], [hidden_model, hidden_index])
432
 
 
433
  model_dropdown = model_dropdown_conf()
 
434
 
435
- def on_model_select(name):
436
- return update_model_paths(name)
437
-
438
- model_dropdown.change(on_model_select, [model_dropdown], [hidden_model, hidden_index])
439
-
440
- with gr.Accordion("Advanced settings", open=False):
441
  algo = pitch_algo_conf()
442
  algo_lvl = pitch_lvl_conf()
443
  idx_inf = index_inf_conf()
@@ -453,10 +510,24 @@ def get_gui(theme, voices):
453
  btn = button_conf()
454
  out = output_conf()
455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  btn.click(
457
- run,
458
  inputs=[
459
- aud, hidden_model, algo, algo_lvl, hidden_index,
 
460
  idx_inf, res_fc, env_r, cons,
461
  denoise_gui, effects_gui, fmt_out, steps_gui
462
  ],
 
23
  import argparse
24
  import sys
25
 
26
+ # ---------- कमांड लाइन आर्गुमेंट्स ----------
27
  parser = argparse.ArgumentParser(description="Run the app with optional sharing")
28
  parser.add_argument('--share', action='store_true', help='Enable sharing mode')
29
  parser.add_argument('--theme', type=str, default="aliabid94/new-theme", help='Set the theme')
 
34
 
35
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
36
 
37
+ # ---------- RVC कन्वर्टर इनिशियलाइज़ेशन ----------
38
  converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
39
  converter.hu_bert_model = load_hu_bert(Config(only_cpu=False), converter.hubert_path)
40
 
 
 
 
41
  title = "<center><strong><font size='7'>RVC⚡ZERO</font></strong></center>"
42
  description = "This demo is provided for educational and research purposes only." if IS_ZERO_GPU else ""
43
  RESOURCES = "- You can also try `RVC⚡ZERO` in Colab’s free tier [link](https://github.com/R3gm/rvc_zero_ui?tab=readme-ov-file#rvczero)."
 
46
 
47
  PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
48
 
49
+ # ========== एज TTS वॉइस लिस्ट ==========
50
  async def get_voices_list(proxy=None):
51
  from edge_tts import list_voices
52
  voices = await list_voices(proxy=proxy)
 
62
  for v in voices
63
  ]
64
 
65
+ # ========== फ़ाइल सर्च हेल्पर्स ==========
66
  def find_files(directory):
67
  file_paths = []
68
  for fname in os.listdir(directory):
 
151
  finally:
152
  threading.Thread(target=clear_files, args=(directory,)).start()
153
 
154
+ # ==================== नया मॉडल स्कैनिंग लॉजिक (फिक्स) ====================
155
  def scan_models():
156
+ """
157
+ logs फ़ोल्डर के अंदर किसी भी स्थान पर मौजूद .pth और .index फ़ाइलों को ढूंढता है।
158
+ हर मॉडल का नाम .pth फ़ाइल के बेसनेम (बिना एक्सटेंशन) से लिया जाता है।
159
+ संबंधित .index फ़ाइल उसी डायरेक्टरी में या logs फ़ोल्डर में खोजी जाती है।
160
+ """
161
  logs_dir = "logs"
162
  if not os.path.isdir(logs_dir):
163
  return []
164
+
165
  models = []
166
+ # पूरे logs फ़ोल्डर में .pth फ़ाइलें खोजें
167
+ for root, dirs, files in os.walk(logs_dir):
168
+ for file in files:
169
+ if file.endswith(".pth"):
170
+ pth_path = os.path.join(root, file)
171
+ model_name = os.path.splitext(file)[0]
172
+ # संबंधित .index फ़ाइल खोजें (पहले उसी फ़ोल्डर में, फिर पूरे logs में)
173
+ idx_path = None
174
+ # उसी फ़ोल्डर में देखें
175
+ for idx_file in os.listdir(root):
176
+ if idx_file.endswith(".index") and os.path.splitext(idx_file)[0] == model_name:
177
+ idx_path = os.path.join(root, idx_file)
178
+ break
179
+ # अगर न मिले तो पूरे logs में ढूंढें
180
+ if idx_path is None:
181
+ for r, d, f in os.walk(logs_dir):
182
+ for idx_file in f:
183
+ if idx_file.endswith(".index") and os.path.splitext(idx_file)[0] == model_name:
184
+ idx_path = os.path.join(r, idx_file)
185
+ break
186
+ if idx_path:
187
+ break
188
+ # डुप्लिकेट न���म से बचने के लिए यूनिक key बनाएँ (फ़ोल्डर+नाम)
189
+ unique_key = os.path.relpath(pth_path, logs_dir).replace(os.sep, "_")
190
+ models.append((unique_key, pth_path, idx_path if idx_path else ""))
191
  return models
192
 
193
+ def update_model_paths(selected_key):
194
+ """
195
+ ड्रॉपडाउन से चुने गए मॉडल के लिए .pth और .index के पूरे पाथ लौटाता है।
196
+ """
197
+ models = scan_models()
198
+ for key, pth, idx in models:
199
+ if key == selected_key:
200
+ return pth, idx if idx else None
201
  return None, None
202
 
203
+ # ========== ऑडियो इफेक्ट्स ==========
204
  def add_audio_effects(audio_list, type_output):
205
  result = []
206
  for audio_path in audio_list:
 
270
  type_output,
271
  steps,
272
  ):
 
273
  print("DEBUG: file_m =", file_m)
274
  print("DEBUG: file_index =", file_index)
275
 
276
  if not audio_files:
277
  raise ValueError("Please provide audio files")
278
 
279
+ # यदि एकल ऑडियो फ़ाइल (gr.Audio से) आई है तो उसे लिस्ट में बदलें
280
  if isinstance(audio_files, str):
281
  audio_files = [audio_files]
282
 
 
316
 
317
  return result
318
 
319
+ # ========== UI कम्पोनेंट्स ==========
320
+ def audio_input_conf():
321
+ """
322
+ दो तरह के इनपुट:
323
+ 1. gr.Audio - माइक्रोफ़ोन से रिकॉर्ड या एकल फ़ाइल अपलोड
324
+ 2. gr.File - एक साथ कई फ़ाइलें अपलोड करने के लिए
325
+ """
326
+ return gr.Audio(
327
+ label="🎤 Record or Upload Audio",
328
+ type="filepath",
329
+ sources=["microphone", "upload"]
330
+ )
331
+
332
+ def multi_audio_conf():
333
+ return gr.File(
334
+ label="📁 Upload Multiple Audio Files (Optional)",
335
+ file_count="multiple",
336
+ file_types=[".wav", ".mp3", ".flac", ".m4a", ".ogg"],
337
+ type="filepath"
338
+ )
339
 
340
  def model_dropdown_conf():
341
  models = scan_models()
342
+ choices = [key for key, _, _ in models]
343
+ return gr.Dropdown(
344
+ label="🤖 Select Model",
345
+ choices=choices,
346
+ value=choices[0] if choices else None,
347
+ interactive=True
348
+ )
349
 
350
  def hidden_model_path_conf():
351
  return gr.Textbox(visible=False)
 
372
  return gr.Slider(0, 0.5, value=0.5, label="Consonant breath protection")
373
 
374
  def button_conf():
375
+ return gr.Button("🚀 Inference", variant="primary")
376
 
377
  def output_conf():
378
+ return gr.File(label="Result", file_count="multiple", interactive=False)
379
 
380
  def active_tts_conf():
381
+ return gr.Checkbox(False, label="🔊 TTS", container=False)
382
 
383
  def tts_voice_conf(voices):
384
+ return gr.Dropdown(label="TTS Voice", choices=voices, visible=False)
385
 
386
  def tts_text_conf():
387
  return gr.Textbox(placeholder="Write the text here...", label="Text", visible=False, lines=3)
 
402
  return gr.Dropdown(choices=["wav", "mp3", "flac"], value="wav", label="Format output")
403
 
404
  def denoise_conf():
405
+ return gr.Checkbox(False, label="🧹 Denoise", container=False)
406
 
407
  def effects_conf():
408
+ return gr.Checkbox(False, label="🎚️ Reverb", container=False)
409
 
410
+ # ---------- TTS ----------
411
  def infer_tts_audio(tts_voice, tts_text, play_tts):
412
  out_dir = "output"
413
  folder_tts = "USER_" + str(random.randint(10000, 99999))
 
422
  return (gr.update(visible=val),) * 4
423
 
424
  def down_active_conf():
425
+ return gr.Checkbox(False, label="🌐 URL-to-Model", container=False)
426
 
427
  def down_url_conf():
428
  return gr.Textbox(placeholder="Write the url here...", label="Enter URL", visible=False)
 
433
  def show_components_down(val):
434
  return (gr.update(visible=val),) * 3
435
 
436
+ # ---------- मुख्य GUI ----------
437
  CSS = """
438
  #audio_tts {
439
  visibility: hidden; height: 0px; width: 0px; max-width: 0px; max-height: 0px;
 
445
  gr.Markdown(title)
446
  gr.Markdown(description)
447
 
448
+ # ---- TTS सेक्शन ----
449
  active_tts = active_tts_conf()
450
  with gr.Row():
451
  with gr.Column(scale=1):
 
458
  tts_play = sound_gui()
459
 
460
  active_tts.change(show_components_tts, [active_tts], [tts_voice, tts_text, tts_button, tts_active_play])
 
 
461
 
462
+ # ---- ऑडियो इनपुट (रिकॉर्ड + मल्टीपल) ----
463
+ gr.Markdown("## 📥 Input Audio")
464
+ with gr.Row():
465
+ audio_record = audio_input_conf()
466
+ audio_multi = multi_audio_conf()
467
+
468
+ # TTS आउटपुट को ऑडियो इनपुट में जोड़ें
469
+ tts_button.click(infer_tts_audio, [tts_voice, tts_text, tts_active_play], [audio_multi, tts_play])
470
+
471
+ # ---- URL से मॉडल लोडिंग ----
472
  down_active = down_active_conf()
473
  down_info = gr.Markdown(
474
  "Provide a link to a zip file, or separate links with comma for .pth and .index files.",
 
489
 
490
  down_button.click(update_from_url, [down_url], [hidden_model, hidden_index])
491
 
492
+ # ---- मॉडल चयन (ड्रॉपडाउन) ----
493
  model_dropdown = model_dropdown_conf()
494
+ model_dropdown.change(update_model_paths, [model_dropdown], [hidden_model, hidden_index])
495
 
496
+ # ---- एडवांस्ड सेटिंग्स ----
497
+ with gr.Accordion("⚙️ Advanced settings", open=False):
 
 
 
 
498
  algo = pitch_algo_conf()
499
  algo_lvl = pitch_lvl_conf()
500
  idx_inf = index_inf_conf()
 
510
  btn = button_conf()
511
  out = output_conf()
512
 
513
+ # ---- रन फ़ंक्शन: ऑडियो स्रोतों को मर्ज करना ----
514
+ def combined_audio_inputs(record_audio, multi_files):
515
+ """
516
+ यदि multi_files में फ़ाइलें हैं तो उन्हें प्राथमिकता दें,
517
+ अन्यथा record_audio का उपयोग करें।
518
+ """
519
+ if multi_files:
520
+ return multi_files
521
+ elif record_audio:
522
+ return record_audio
523
+ else:
524
+ return None
525
+
526
  btn.click(
527
+ lambda rec, multi, *rest: run(combined_audio_inputs(rec, multi), *rest),
528
  inputs=[
529
+ audio_record, audio_multi,
530
+ hidden_model, algo, algo_lvl, hidden_index,
531
  idx_inf, res_fc, env_r, cons,
532
  denoise_gui, effects_gui, fmt_out, steps_gui
533
  ],