lainlives commited on
Commit
88d335a
·
verified ·
1 Parent(s): 8fabd14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1008 -64
app.py CHANGED
@@ -28,7 +28,6 @@ from ultimate_rvc.core.manage.models import (
28
  get_voice_model_names,
29
  )
30
  from ultimate_rvc.web.common import initialize_dropdowns
31
- from ultimate_rvc.web.config.main import TotalConfig
32
  from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import (
33
  render as render_song_cover_multi_step_tab,
34
  )
@@ -46,12 +45,284 @@ from ultimate_rvc.web.tabs.manage.models import render as render_models_tab
46
  from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
47
 
48
  import sys
49
- from ultimate_rvc.web.main import start_app
50
  from huggingface_hub import snapshot_download
51
- from ultimate_rvc.core.manage.config import load_config
52
- from ultimate_rvc.web.config.main import SettingsManagementConfig, TotalConfig
53
  from enum import StrEnum
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  embedders_list = [
56
  ("embedders/contentvec/", ["pytorch_model.bin", "config.json"]),
57
  ("embedders/custom/Crusty/", ["model.safetensors", "config.json"]),
@@ -114,48 +385,8 @@ class SeparationModel2(StrEnum):
114
  UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
115
 
116
 
117
- class ModelController:
118
- """Manages the active model and provides a consistent interface."""
119
- def __init__(self, initial_model):
120
- self._model = initial_model
121
- self._lock = threading.RLock() # Use a lock for thread safety
122
 
123
- def get_model(self):
124
- """Atomically get the current model instance."""
125
- with self._lock:
126
- # Return a copy if the model is mutable, to prevent modification issues
127
- return copy.copy(self._model)
128
 
129
- def set_model(self, new_model):
130
- """Atomically set a new model instance."""
131
- with self._lock:
132
- self._model = new_model
133
- print(f"--- Model updated to {new_model.__class__.__name__} ---")
134
-
135
- def periodic_updater(controller, interval_seconds, new_model_class, stop_event):
136
- """
137
- A function to run in a separate thread that periodically updates the model.
138
- """
139
- while not stop_event.is_set():
140
- time.sleep(interval_seconds)
141
- if not stop_event.is_set():
142
- # Create a new instance of the desired class and set it
143
- new_model = new_model_class()
144
- controller.set_model(new_model)
145
-
146
- controller = ModelController(SeparationModel)
147
- stop_event = threading.Event()
148
-
149
- # Start the updater thread
150
- update_thread = threading.Thread(
151
- target=periodic_updater,
152
- args=(controller, 5, SeparationModel2, stop_event), # Update every 5 seconds to Model2
153
- daemon=True # Daemon threads exit when the main program exits
154
- )
155
- update_thread.start()
156
-
157
-
158
- load_config("default", TotalConfig)
159
 
160
  now_dir = os.getcwd()
161
 
@@ -177,11 +408,723 @@ snapshot_download(repo_id=repo_id, local_dir=dump_path, token=hf_token)
177
 
178
 
179
 
180
- config_name = os.environ.get("URVC_CONFIG")
181
  cookiefile = os.environ.get("YT_COOKIEFILE")
182
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def render_app() -> gr.Blocks:
186
  """
187
  Render the Ultimate RVC web application.
@@ -204,12 +1147,11 @@ def render_app() -> gr.Blocks:
204
  cache_delete_cutoff = 86400 # and delete files older than 24 hours
205
 
206
  with gr.Blocks(
207
- title="Ultimate RVC",
208
  theme=gr.Theme.load(str(Path(__file__).parent / "config/theme.json")),
209
  css=css,
210
  delete_cache=(cache_delete_frequency, cache_delete_cutoff),
211
  ) as app:
212
- gr.HTML("<h1>Ultimate RVC 💙</h1>")
213
  for component_config in [
214
  total_config.song.one_click.voice_model,
215
  total_config.song.one_click.cached_song,
@@ -247,19 +1189,20 @@ def render_app() -> gr.Blocks:
247
  ]:
248
  component_config.instantiate()
249
  # main tab
250
- with gr.Tab("Generate", elem_id="generate-tab"):
251
- with gr.Tab("Song covers"):
252
- render_song_cover_one_click_tab(total_config, cookiefile)
253
- render_song_cover_multi_step_tab(total_config, cookiefile)
254
- with gr.Tab("Speech"):
255
- render_speech_one_click_tab(total_config)
256
- render_speech_multi_step_tab(total_config)
257
- with gr.Tab("Models", elem_id="manage-tab"):
258
- render_models_tab(total_config)
259
- with gr.Tab("Audio", elem_id="audio-tab"):
260
- render_audio_tab(total_config)
261
- with gr.Tab("Settings", elem_id="settings-tab"):
262
- render_settings_tab(total_config)
 
263
 
264
  app.load(
265
  _init_dropdowns,
@@ -392,7 +1335,7 @@ def start_app(
392
  "-h",
393
  help="The hostname that the server will use.",
394
  ),
395
- ] = None,
396
  listen_port: Annotated[
397
  int | None,
398
  typer.Option(
@@ -412,14 +1355,15 @@ def start_app(
412
  """Run the Ultimate RVC web application."""
413
  os.environ["GRADIO_TEMP_DIR"] = str(TEMP_DIR)
414
  gr.set_static_paths([MODELS_DIR, AUDIO_DIR])
415
- app.queue()
416
  app.launch(
417
- share=share,
418
- server_name=(None if not listen else (listen_host or "0.0.0.0")), # noqa: S104
419
  server_port=listen_port,
420
  ssr_mode=ssr_mode,
421
  )
422
 
423
 
 
 
424
  if __name__ == "__main__":
425
  app_wrapper()
 
28
  get_voice_model_names,
29
  )
30
  from ultimate_rvc.web.common import initialize_dropdowns
 
31
  from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import (
32
  render as render_song_cover_multi_step_tab,
33
  )
 
45
  from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
46
 
47
  import sys
 
48
  from huggingface_hub import snapshot_download
 
 
49
  from enum import StrEnum
50
 
51
+
52
+
53
+
54
+ from typing import TYPE_CHECKING, Any
55
+
56
+ from functools import cached_property
57
+
58
+ from pydantic import BaseModel
59
+
60
+ from ultimate_rvc.web.config.component import (
61
+ AnyComponentConfig,
62
+ AudioConfig,
63
+ CheckboxConfig,
64
+ ComponentConfig,
65
+ DropdownConfig,
66
+ RadioConfig,
67
+ SliderConfig,
68
+ )
69
+ from ultimate_rvc.web.config.tab import (
70
+ SongGenerationConfig,
71
+ SpeechGenerationConfig,
72
+ TrainingConfig,
73
+ )
74
+
75
+ from typing import Any, TypedDict
76
+
77
+ from collections.abc import Callable, Sequence
78
+ from enum import StrEnum, auto
79
+
80
+ if TYPE_CHECKING:
81
+ import gradio as gr
82
+
83
+
84
+
85
+ type StrPath = str | PathLike[str]
86
+
87
+ type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
88
+
89
+
90
+
91
+ class SegmentSize(IntEnum):
92
+ """Enumeration of segment sizes for audio separation."""
93
+
94
+ SEG_64 = 64
95
+ SEG_128 = 128
96
+ SEG_256 = 256
97
+ SEG_512 = 512
98
+ SEG_1024 = 1024
99
+ SEG_2048 = 2048
100
+ SEG_4096 = 4096
101
+
102
+
103
+ class F0Method(StrEnum):
104
+ """Enumeration of pitch extraction methods."""
105
+
106
+ RMVPE = "rmvpe"
107
+ CREPE = "crepe"
108
+ CREPE_TINY = "crepe-tiny"
109
+ FCPE = "fcpe"
110
+
111
+
112
+
113
+ class RVCContentType(StrEnum):
114
+ """Enumeration of valid content to convert with RVC."""
115
+
116
+ VOCALS = "vocals"
117
+ VOICE = "voice"
118
+ SPEECH = "speech"
119
+ AUDIO = "audio"
120
+
121
+
122
+ class SampleRate(IntEnum):
123
+ """Enumeration of supported audio sample rates."""
124
+
125
+ HZ_16000 = 16000
126
+ HZ_44100 = 44100
127
+ HZ_48000 = 48000
128
+ HZ_96000 = 96000
129
+ HZ_192000 = 192000
130
+
131
+
132
+ class AudioExt(StrEnum):
133
+ """Enumeration of supported audio file formats."""
134
+
135
+ MP3 = "mp3"
136
+ WAV = "wav"
137
+ FLAC = "flac"
138
+ OGG = "ogg"
139
+
140
+
141
+ class DeviceType(StrEnum):
142
+ """Enumeration of device types for training voice models."""
143
+
144
+ AUTOMATIC = "Automatic"
145
+ CPU = "CPU"
146
+ GPU = "GPU"
147
+
148
+
149
+ class TrainingSampleRate(StrEnum):
150
+ """Enumeration of sample rates for training voice models."""
151
+
152
+ HZ_32K = "32000"
153
+ HZ_40K = "40000"
154
+ HZ_48K = "48000"
155
+
156
+
157
+ class PretrainedSampleRate(StrEnum):
158
+ """Enumeration of valid sample rates for pretrained models."""
159
+
160
+ HZ_32K = "32k"
161
+ HZ_40K = "40k"
162
+ HZ_44K = "44k"
163
+ HZ_48K = "48k"
164
+
165
+
166
+ class TrainingF0Method(StrEnum):
167
+ """Enumeration of pitch extraction methods for training."""
168
+
169
+ RMVPE = "rmvpe"
170
+ CREPE = "crepe"
171
+ CREPE_TINY = "crepe-tiny"
172
+
173
+
174
+ class AudioSplitMethod(StrEnum):
175
+ """
176
+ Enumeration of methods to use for splitting audio files during
177
+ dataset preprocessing.
178
+ """
179
+
180
+ SKIP = "Skip"
181
+ SIMPLE = "Simple"
182
+ AUTOMATIC = "Automatic"
183
+
184
+
185
+ class Vocoder(StrEnum):
186
+ """Enumeration of vocoders for training voice models."""
187
+
188
+ HIFI_GAN = "HiFi-GAN"
189
+ MRF_HIFI_GAN = "MRF HiFi-GAN"
190
+ REFINE_GAN = "RefineGAN"
191
+
192
+
193
+ class IndexAlgorithm(StrEnum):
194
+ """Enumeration of indexing algorithms for training voice models."""
195
+
196
+ AUTO = "Auto"
197
+ FAISS = "Faiss"
198
+ KMEANS = "KMeans"
199
+
200
+
201
+ class PretrainedType(StrEnum):
202
+ """
203
+ Enumeration of the possible types of pretrained models to finetune
204
+ voice models on.
205
+ """
206
+
207
+ NONE = "None"
208
+ DEFAULT = "Default"
209
+ CUSTOM = "Custom"
210
+
211
+
212
+
213
+
214
+ class ConcurrencyId(StrEnum):
215
+ """Enumeration of possible concurrency identifiers."""
216
+
217
+ GPU = auto()
218
+
219
+
220
+ class SongSourceType(StrEnum):
221
+ """The type of source providing the song to generate a cover of."""
222
+
223
+ LOCAL_FILE = "Local file"
224
+ CACHED_SONG = "Cached song"
225
+
226
+
227
+ class SpeechSourceType(StrEnum):
228
+ """The type of source providing the text to generate speech from."""
229
+
230
+ TEXT = "Text"
231
+ LOCAL_FILE = "Local file"
232
+
233
+
234
+ class SongTransferOption(StrEnum):
235
+ """Enumeration of possible song transfer options."""
236
+
237
+ STEP_1_AUDIO = "Step 1: stem splitting"
238
+ STEP_2_VOCALS = "Step 2: vocal conversion"
239
+ STEP_3_VOCALS = "Step 3: vocal effect"
240
+ STEP_4_INSTRUMENTALS = "Step 4: instrumentals"
241
+ STEP_4_BACKUP_VOCALS = "Step 4: backup vocals"
242
+ STEP_5_MAIN_VOCALS = "Step 5: main vocals"
243
+ STEP_5_INSTRUMENTALS = "Step 5: instrumentals"
244
+ STEP_5_BACKUP_VOCALS = "Step 5: backup vocals"
245
+
246
+
247
+ class SpeechTransferOption(StrEnum):
248
+ """Enumeration of possible speech transfer options."""
249
+
250
+ STEP_2_SPEECH = "Step 2: vocal conversion"
251
+ STEP_3_SPEECH = "Step 3: vocal effect"
252
+
253
+
254
+ class ComponentVisibilityKwArgs(TypedDict, total=False):
255
+ """
256
+ Keyword arguments for setting component visibility.
257
+
258
+ Attributes
259
+ ----------
260
+ visible : bool
261
+ Whether the component should be visible.
262
+ value : Any
263
+ The value of the component.
264
+
265
+ """
266
+
267
+ visible: bool
268
+ value: Any
269
+
270
+
271
+ class UpdateDropdownKwArgs(TypedDict, total=False):
272
+ """
273
+ Keyword arguments for updating a dropdown component.
274
+
275
+ Attributes
276
+ ----------
277
+ choices : DropdownChoices
278
+ The updated choices for the dropdown component.
279
+ value : DropdownValue
280
+ The updated value for the dropdown component.
281
+
282
+ """
283
+
284
+ choices: DropdownChoices
285
+ value: DropdownValue
286
+
287
+
288
+ class TextBoxKwArgs(TypedDict, total=False):
289
+ """
290
+ Keyword arguments for updating a textbox component.
291
+
292
+ Attributes
293
+ ----------
294
+ value : str | None
295
+ The updated value for the textbox component.
296
+ placeholder : str | None
297
+ The updated placeholder for the textbox component.
298
+
299
+ """
300
+
301
+ value: str | None
302
+ placeholder: str | None
303
+
304
+
305
+ class UpdateAudioKwArgs(TypedDict, total=False):
306
+ """
307
+ Keyword arguments for updating an audio component.
308
+
309
+ Attributes
310
+ ----------
311
+ value : str | None
312
+ The updated value for the audio component.
313
+
314
+ """
315
+
316
+ value: str | None
317
+
318
+
319
+ class DatasetType(StrEnum):
320
+ """The type of dataset to train a voice model."""
321
+
322
+ NEW_DATASET = "New dataset"
323
+ EXISTING_DATASET = "Existing dataset"
324
+
325
+
326
  embedders_list = [
327
  ("embedders/contentvec/", ["pytorch_model.bin", "config.json"]),
328
  ("embedders/custom/Crusty/", ["model.safetensors", "config.json"]),
 
385
  UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
386
 
387
 
 
 
 
 
 
388
 
 
 
 
 
 
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
 
391
  now_dir = os.getcwd()
392
 
 
408
 
409
 
410
 
411
+ config_name = "default" #os.environ.get("URVC_CONFIG")
412
  cookiefile = os.environ.get("YT_COOKIEFILE")
413
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
414
 
415
 
416
+
417
+ """
418
+ Module defining models for representing configuration settings for
419
+ UI tabs.
420
+ """
421
+
422
+ from __future__ import annotations
423
+
424
+
425
+
426
+ class SongIntermediateAudioConfig(BaseModel):
427
+ """
428
+ Configuration settings for intermediate audio components in the
429
+ one-click song generation tab.
430
+
431
+ Attributes
432
+ ----------
433
+ song : AudioConfig
434
+ Configuration settings for the input song audio component.
435
+ vocals : AudioConfig
436
+ Configuration settings for the vocals audio component.
437
+ instrumentals : AudioConfig
438
+ Configuration settings for the instrumentals audio component.
439
+ main_vocals : AudioConfig
440
+ Configuration settings for the main vocals audio component.
441
+ backup_vocals : AudioConfig
442
+ Configuration settings for the backup vocals audio component.
443
+ main_vocals_dereverbed : AudioConfig
444
+ Configuration settings for the main vocals de-reverbed audio
445
+ component.
446
+ main_vocals_reverb : AudioConfig
447
+ Configuration settings for the main vocals reverb audio
448
+ component.
449
+ converted_vocals : AudioConfig
450
+ Configuration settings for the converted vocals audio
451
+ component.
452
+ postprocessed_vocals : AudioConfig
453
+ Configuration settings for the postprocessed vocals audio
454
+ component.
455
+ instrumentals_shifted : AudioConfig
456
+ Configuration settings for the shifted instrumentals audio
457
+ component.
458
+ backup_vocals_shifted : AudioConfig
459
+ Configuration settings for the shifted backup vocals audio
460
+ component.
461
+ all : list[gr.Audio]
462
+ List of instances of all intermediate audio components.
463
+
464
+ """
465
+
466
+ song: AudioConfig = AudioConfig.intermediate(label="Song")
467
+ vocals: AudioConfig = AudioConfig.intermediate(label="Vocals")
468
+ instrumentals: AudioConfig = AudioConfig.intermediate(
469
+ label="Instrumentals",
470
+ )
471
+ main_vocals: AudioConfig = AudioConfig.intermediate(
472
+ label="Main vocals",
473
+ )
474
+ backup_vocals: AudioConfig = AudioConfig.intermediate(
475
+ label="Backup vocals",
476
+ )
477
+ main_vocals_dereverbed: AudioConfig = AudioConfig.intermediate(
478
+ label="De-reverbed main vocals",
479
+ )
480
+ main_vocals_reverb: AudioConfig = AudioConfig.intermediate(
481
+ label="Main vocals with reverb",
482
+ )
483
+ converted_vocals: AudioConfig = AudioConfig.intermediate(
484
+ label="Converted vocals",
485
+ )
486
+ postprocessed_vocals: AudioConfig = AudioConfig.intermediate(
487
+ label="Postprocessed vocals",
488
+ )
489
+ instrumentals_shifted: AudioConfig = AudioConfig.intermediate(
490
+ label="Pitch-shifted instrumentals",
491
+ )
492
+ backup_vocals_shifted: AudioConfig = AudioConfig.intermediate(
493
+ label="Pitch-shifted backup vocals",
494
+ )
495
+
496
+ @property
497
+ def all(self) -> list[gr.Audio]:
498
+ """
499
+ Retrieve instances of all intermediate audio components
500
+ in the one-click song generation tab.
501
+
502
+ Returns
503
+ -------
504
+ list[gr.Audio]
505
+ List of instances of all intermediate audio components in
506
+ the one-click song generation tab.
507
+
508
+ """
509
+ # NOTE we are using self.__annotations__ to get the fields in
510
+ # the order they are defined in the class
511
+ return [getattr(self, field).instance for field in self.__annotations__]
512
+
513
+
514
+ class OneClickSongGenerationConfig(SongGenerationConfig):
515
+ """
516
+ Configuration settings for the one-click song generation tab.
517
+
518
+ Attributes
519
+ ----------
520
+ n_octaves : SliderConfig
521
+ Configuration settings for an octave pitch shift slider
522
+ component.
523
+ n_semitones : SliderConfig
524
+ Configuration settings for a semitone pitch shift slider
525
+ component.
526
+ show_intermediate_audio : CheckboxConfig
527
+ Configuration settings for a show intermediate audio checkbox
528
+ component.
529
+ intermediate_audio : SongIntermediateAudioConfig
530
+ Configuration settings for intermediate audio components.
531
+
532
+ See Also
533
+ --------
534
+ SongGenerationConfig
535
+ Parent model defining common component configuration settings
536
+ for song generation tabs.
537
+
538
+ """
539
+
540
+ n_octaves: SliderConfig = SliderConfig.octave_shift(
541
+ label="Vocal pitch shift",
542
+ info=(
543
+ "The number of octaves to shift the pitch of the converted vocals by. Use 1"
544
+ " for male-to-female and -1 for vice-versa."
545
+ ),
546
+ )
547
+
548
+ n_semitones: SliderConfig = SliderConfig.semitone_shift(
549
+ label="Overall pitch shift",
550
+ info=(
551
+ "The number of semi-tones to shift the pitch of the converted vocals,"
552
+ " instrumentals and backup vocals by."
553
+ ),
554
+ )
555
+ show_intermediate_audio: CheckboxConfig = CheckboxConfig(
556
+ label="Show intermediate audio",
557
+ info="Show intermediate audio tracks produced during song cover generation.",
558
+ value=False,
559
+ exclude_value=True,
560
+ )
561
+ intermediate_audio: SongIntermediateAudioConfig = SongIntermediateAudioConfig()
562
+
563
+
564
+ class SongInputAudioConfig(BaseModel):
565
+ """
566
+ Configuration settings for input audio components in the multi-step
567
+ song generation tab.
568
+
569
+ Attributes
570
+ ----------
571
+ audio : AudioConfig
572
+ Configuration settings for the input audio component.
573
+ vocals : AudioConfig
574
+ Configuration settings for the vocals audio component.
575
+ converted_vocals : AudioConfig
576
+ Configuration settings for the converted vocals audio
577
+ component.
578
+ instrumentals : AudioConfig
579
+ Configuration settings for the instrumentals audio
580
+ component.
581
+ backup_vocals : AudioConfig
582
+ Configuration settings for the backup vocals audio
583
+ component.
584
+ main_vocals : AudioConfig
585
+ Configuration settings for the main vocals audio
586
+ component.
587
+ shifted_instrumentals : AudioConfig
588
+ Configuration settings for the shifted instrumentals audio
589
+ component.
590
+ shifted_backup_vocals : AudioConfig
591
+ Configuration settings for the shifted backup vocals audio
592
+ component.
593
+ all : list[AudioConfig]
594
+ List of configuration settings for all input audio
595
+ components in the multi-step song generation tab.
596
+
597
+ """
598
+
599
+ audio: AudioConfig = AudioConfig.input(label="Audio")
600
+ vocals: AudioConfig = AudioConfig.input(label="Vocals")
601
+ converted_vocals: AudioConfig = AudioConfig.input(label="Vocals")
602
+ instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
603
+ backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
604
+ main_vocals: AudioConfig = AudioConfig.input(label="Main vocals")
605
+ shifted_instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
606
+ shifted_backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
607
+
608
+ @property
609
+ def all(self) -> list[AudioConfig]:
610
+ """
611
+ Retrieve configuration settings for all input audio components
612
+ in the multi-step song generation tab.
613
+
614
+ Returns
615
+ -------
616
+ list[AudioConfig]
617
+ List of configuration settings for all input audio
618
+ components in the multi-step song generation tab.
619
+
620
+ """
621
+ return [getattr(self, field) for field in self.__annotations__]
622
+
623
+
624
+ class SongDirsConfig(BaseModel):
625
+ """
626
+ Configuration settings for song directory components in the
627
+ multi-step song generation tab.
628
+
629
+ Attributes
630
+ ----------
631
+ separate_audio : DropdownConfig
632
+ Configuration settings for the song directory component
633
+ for separating audio.
634
+ convert_vocals : DropdownConfig
635
+ Configuration settings for the song directory component
636
+ for converting vocals.
637
+ postprocess_vocals : DropdownConfig
638
+ Configuration settings for the song directory component
639
+ for postprocessing vocals.
640
+ pitch_shift_background : DropdownConfig
641
+ Configuration settings for the song directory component
642
+ for pitch-shifting background audio.
643
+ mix : DropdownConfig
644
+ Configuration settings for the song directory component
645
+ for mixing audio.
646
+ all : list[gr.Dropdown]
647
+ List of instances of all song directory components in the
648
+ multi-step song generation tab.
649
+
650
+ """
651
+
652
+ separate_audio: DropdownConfig = DropdownConfig.song_dir()
653
+ convert_vocals: DropdownConfig = DropdownConfig.song_dir()
654
+ postprocess_vocals: DropdownConfig = DropdownConfig.song_dir()
655
+ pitch_shift_background: DropdownConfig = DropdownConfig.song_dir()
656
+ mix: DropdownConfig = DropdownConfig.song_dir()
657
+
658
+ @property
659
+ def all(self) -> list[gr.Dropdown]:
660
+ """
661
+ Retrieve instances of all song directory components in the
662
+ multi-step song generation tab.
663
+
664
+ Returns
665
+ -------
666
+ list[gr.Dropdown]
667
+ List of instances of all song directory components in
668
+ the multi-step song generation tab.
669
+
670
+ """
671
+ return [getattr(self, field).instance for field in self.__annotations__]
672
+
673
+
674
+ class MultiStepSongGenerationConfig(SongGenerationConfig):
675
+ """
676
+ Configuration settings for multi-step song generation tab.
677
+
678
+ Attributes
679
+ ----------
680
+ separation_model : DropdownConfig
681
+ Configuration settings for a separation model dropdown
682
+ component.
683
+ segment_size : RadioConfig
684
+ Configuration settings for a segment size radio component.
685
+ n_octaves : SliderConfig
686
+ Configuration settings for an octave pitch shift slider
687
+ component.
688
+ n_semitones : SliderConfig
689
+ Configuration settings for a semitone pitch shift slider
690
+ component.
691
+ n_semitones_instrumentals : SliderConfig
692
+ Configuration settings for an instrumentals pitch shift slider
693
+ component.
694
+ n_semitones_backup_vocals : SliderConfig
695
+ Configuration settings for a backup vocals pitch shift slider
696
+ component.
697
+ input_audio : SongInputAudioConfig
698
+ Configuration settings for input audio components.
699
+ song_dirs : SongDirsConfig
700
+ Configuration settings for song directory components.
701
+
702
+ See Also
703
+ --------
704
+ SongGenerationConfig
705
+ Parent model defining common component configuration settings
706
+ for song generation tabs.
707
+
708
+ """
709
+
710
+ separation_model: DropdownConfig = DropdownConfig(
711
+ label="Separation model",
712
+ info="The model to use for audio separation.",
713
+ value=SeparationModel.UVR_MDX_NET_VOC_FT,
714
+ choices=list(SeparationModel2),
715
+ )
716
+ segment_size: RadioConfig = RadioConfig(
717
+ label="Segment size",
718
+ info=(
719
+ "The size of the segments into which the audio is split. Using a larger"
720
+ " size consumes more resources, but may give better results."
721
+ ),
722
+ value=SegmentSize.SEG_2048,
723
+ choices=list(SegmentSize),
724
+ )
725
+ n_octaves: SliderConfig = SliderConfig.octave_shift(
726
+ label="Pitch shift (octaves)",
727
+ info=(
728
+ "The number of octaves to pitch-shift the converted voice by. Use 1 for"
729
+ " male-to-female and -1 for vice-versa."
730
+ ),
731
+ )
732
+ n_semitones: SliderConfig = SliderConfig.semitone_shift(
733
+ label="Pitch shift (semi-tones)",
734
+ info=(
735
+ "The number of semi-tones to pitch-shift the converted vocals by. Altering"
736
+ " this slightly reduces sound quality."
737
+ ),
738
+ )
739
+ n_semitones_instrumentals: SliderConfig = SliderConfig.semitone_shift(
740
+ label="Instrumental pitch shift",
741
+ info="The number of semi-tones to pitch-shift the instrumentals by.",
742
+ )
743
+ n_semitones_backup_vocals: SliderConfig = SliderConfig.semitone_shift(
744
+ label="Backup vocal pitch shift",
745
+ info="The number of semi-tones to pitch-shift the backup vocals by.",
746
+ )
747
+ input_audio: SongInputAudioConfig = SongInputAudioConfig()
748
+ song_dirs: SongDirsConfig = SongDirsConfig()
749
+
750
+
751
+ class SpeechIntermediateAudioConfig(BaseModel):
752
+ """
753
+ Configuration settings for intermediate audio components in the
754
+ one-click speech generation tab.
755
+
756
+ Attributes
757
+ ----------
758
+ speech : AudioConfig
759
+ Configuration settings for the input speech audio component.
760
+ converted_speech : AudioConfig
761
+ Configuration settings for the converted speech audio component.
762
+ all : list[gr.Audio]
763
+ List of instances of all intermediate audio components in the
764
+ speech generation tab.
765
+
766
+ """
767
+
768
+ speech: AudioConfig = AudioConfig.intermediate(label="Speech")
769
+ converted_speech: AudioConfig = AudioConfig.intermediate(label="Converted speech")
770
+
771
+ @property
772
+ def all(self) -> list[gr.Audio]:
773
+ """
774
+ Retrieve instances of all intermediate audio components in the
775
+ speech generation tab.
776
+
777
+ Returns
778
+ -------
779
+ list[gr.Audio]
780
+ List of instances of all intermediate audio components in
781
+ the speech generation tab.
782
+
783
+ """
784
+ return [getattr(self, field).instance for field in self.__annotations__]
785
+
786
+
787
+ class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
788
+ """
789
+ Configuration settings for one-click speech generation tab.
790
+
791
+ Attributes
792
+ ----------
793
+ intermediate_audio : SpeechIntermediateAudioConfig
794
+ Configuration settings for intermediate audio components.
795
+ show_intermediate_audio : CheckboxConfig
796
+ Configuration settings for a show intermediate audio checkbox
797
+ component.
798
+
799
+ See Also
800
+ --------
801
+ SpeechGenerationConfig
802
+ Parent model defining common component configuration settings
803
+ for speech generation tabs.
804
+
805
+ """
806
+
807
+ intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()
808
+
809
+ show_intermediate_audio: CheckboxConfig = CheckboxConfig(
810
+ label="Show intermediate audio",
811
+ info="Show intermediate audio tracks produced during speech generation.",
812
+ value=False,
813
+ exclude_value=True,
814
+ )
815
+
816
+
817
+ class SpeechInputAudioConfig(BaseModel):
818
+ """
819
+ Configuration settings for input audio components in the multi-step
820
+ speech generation tab.
821
+
822
+ Attributes
823
+ ----------
824
+ speech : AudioConfig
825
+ Configuration settings for the input speech audio component.
826
+ converted_speech : AudioConfig
827
+ Configuration settings for the converted speech audio component.
828
+
829
+ all : list[AudioConfig]
830
+ List of configuration settings for all input audio components in
831
+ the multi-step speech generation tab.
832
+
833
+ """
834
+
835
+ speech: AudioConfig = AudioConfig.input("Speech")
836
+ converted_speech: AudioConfig = AudioConfig.input("Converted speech")
837
+
838
+ @property
839
+ def all(self) -> list[AudioConfig]:
840
+ """
841
+ Retrieve configuration settings for all input audio components
842
+ in the multi-step speech generation tab.
843
+
844
+ Returns
845
+ -------
846
+ list[AudioConfig]
847
+ List of configuration settings for all input audio
848
+ components in the multi-step speech generation tab.
849
+
850
+ """
851
+ return [getattr(self, field) for field in self.__annotations__]
852
+
853
+
854
+ class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
855
+ """
856
+ Configuration settings for the multi-step speech generation tab.
857
+
858
+ Attributes
859
+ ----------
860
+ input_audio : SpeechInputAudioConfig
861
+ Configuration settings for input audio components.
862
+
863
+ See Also
864
+ --------
865
+ SpeechGenerationConfig
866
+ Parent model defining common component configuration settings
867
+ for speech generation tabs.
868
+
869
+ """
870
+
871
+ input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()
872
+
873
+
874
+ class MultiStepTrainingConfig(TrainingConfig):
875
+ """Configuration settings for multi-step training tab."""
876
+
877
+
878
+ class ModelManagementConfig(BaseModel):
879
+ """
880
+
881
+ Configuration settings for model management tab.
882
+
883
+ Attributes
884
+ ----------
885
+ voices : DropdownConfig
886
+ Configuration settings for delete voice models dropdown
887
+ component.
888
+ embedders : DropdownConfig
889
+ Configuration settings for delete embedder models dropdown
890
+ component.
891
+ pretraineds : DropdownConfig
892
+ Configuration settings for delete pretrained models dropdown
893
+ component.
894
+ traineds : DropdownConfig
895
+ Configuration settings for delete training models dropdown
896
+ component.
897
+ dummy_checkbox : CheckboxConfig
898
+ Configuration settings for a dummy checkbox component.
899
+
900
+ """
901
+
902
+ voices: DropdownConfig = DropdownConfig.multi_delete(
903
+ label="Voice models",
904
+ info="Select one or more voice models to delete.",
905
+ )
906
+ embedders: DropdownConfig = DropdownConfig.multi_delete(
907
+ label="Custom embedder models",
908
+ info="Select one or more embedder models to delete.",
909
+ )
910
+ pretraineds: DropdownConfig = DropdownConfig.multi_delete(
911
+ label="Custom pretrained models",
912
+ info="Select one or more pretrained models to delete.",
913
+ )
914
+ traineds: DropdownConfig = DropdownConfig.multi_delete(
915
+ label="Training models",
916
+ info="Select one or more training models to delete.",
917
+ )
918
+
919
+ dummy_checkbox: CheckboxConfig = CheckboxConfig(
920
+ value=False,
921
+ visible=False,
922
+ exclude_value=True,
923
+ )
924
+
925
+
926
+ class AudioManagementConfig(BaseModel):
927
+ """
928
+ Configuration settings for audio management tab.
929
+
930
+ Attributes
931
+ ----------
932
+ intermediate : DropdownConfig
933
+ Configuration settings for delete intermediate audio files
934
+ dropdown component
935
+ speech : DropdownConfig
936
+ Configuration settings for delete speech audio files dropdown
937
+ component.
938
+ output : DropdownConfig
939
+ Configuration settings for delete output audio files dropdown
940
+ component.
941
+ dataset : DropdownConfig
942
+ Configuration settings for delete dataset audio files dropdown
943
+ component.
944
+ dummy_checkbox : CheckboxConfig
945
+ Configuration settings for a dummy checkbox component.
946
+
947
+ """
948
+
949
+ intermediate: DropdownConfig = DropdownConfig.multi_delete(
950
+ label="Song directories",
951
+ info=(
952
+ "Select one or more song directories containing intermediate audio files to"
953
+ " delete."
954
+ ),
955
+ )
956
+ speech: DropdownConfig = DropdownConfig.multi_delete(
957
+ label="Speech audio files",
958
+ info="Select one or more speech audio files to delete.",
959
+ )
960
+ output: DropdownConfig = DropdownConfig.multi_delete(
961
+ label="Output audio files",
962
+ info="Select one or more output audio files to delete.",
963
+ )
964
+ dataset: DropdownConfig = DropdownConfig.multi_delete(
965
+ label="Dataset audio files",
966
+ info="Select one or more datasets containing audio files to delete.",
967
+ )
968
+
969
+ dummy_checkbox: CheckboxConfig = CheckboxConfig(
970
+ value=False,
971
+ visible=False,
972
+ exclude_value=True,
973
+ )
974
+
975
+
976
+ class SettingsManagementConfig(BaseModel):
977
+ """
978
+ Configuration settings for settings management tab.
979
+
980
+ Attributes
981
+ ----------
982
+ dummy_checkbox : CheckboxConfig
983
+ Configuration settings for a dummy checkbox component.
984
+
985
+ """
986
+
987
+ load_config_name: DropdownConfig = DropdownConfig(
988
+ label="Configuration name",
989
+ info="The name of a configuration to load UI settings from",
990
+ value=None,
991
+ render=False,
992
+ exclude_value=True,
993
+ )
994
+ delete_config_names: DropdownConfig = DropdownConfig.multi_delete(
995
+ label="Configuration names",
996
+ info="Select the name of one or more configurations to delete",
997
+ )
998
+ dummy_checkbox: CheckboxConfig = CheckboxConfig(
999
+ value=False,
1000
+ visible=False,
1001
+ exclude_value=True,
1002
+ )
1003
+
1004
+
1005
+ class TotalSongGenerationConfig(BaseModel):
1006
+ """
1007
+ All configuration settings for song generation tabs.
1008
+
1009
+ Attributes
1010
+ ----------
1011
+ one_click : OneClickSongGenerationConfig
1012
+ Configuration settings for the one-click song generation tab.
1013
+ multi_step : MultiStepSongGenerationConfig
1014
+ Configuration settings for the multi-step song generation tab.
1015
+
1016
+ """
1017
+
1018
+ one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
1019
+ multi_step: MultiStepSongGenerationConfig = MultiStepSongGenerationConfig()
1020
+
1021
+
1022
+ class TotalSpeechGenerationConfig(BaseModel):
1023
+ """
1024
+ All configuration settings for speech generation tabs.
1025
+
1026
+ Attributes
1027
+ ----------
1028
+ one_click : OneClickSpeechGenerationConfig
1029
+ Configuration settings for the one-click speech generation tab.
1030
+ multi_step : MultiStepSpeechGenerationConfig
1031
+ Configuration settings for the multi-step speech generation tab.
1032
+
1033
+ """
1034
+
1035
+ one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
1036
+ multi_step: MultiStepSpeechGenerationConfig = MultiStepSpeechGenerationConfig()
1037
+
1038
+
1039
+ class TotalTrainingConfig(BaseModel):
1040
+ """
1041
+ All configuration settings for training tabs.
1042
+
1043
+ Attributes
1044
+ ----------
1045
+ training : TrainingConfig
1046
+ Configuration settings for the multi-step training tab.
1047
+
1048
+ """
1049
+
1050
+ multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()
1051
+
1052
+
1053
+ class TotalManagementConfig(BaseModel):
1054
+ """
1055
+ All configuration settings for management tabs.
1056
+
1057
+ Attributes
1058
+ ----------
1059
+ model : ModelManagementConfig
1060
+ Configuration settings for the model management tab.
1061
+ audio : AudioManagementConfig
1062
+ Configuration settings for the audio management tab.
1063
+ settings : SettingsManagementConfig
1064
+ Configuration settings for the settings management tab.
1065
+
1066
+ """
1067
+
1068
+ model: ModelManagementConfig = ModelManagementConfig()
1069
+ audio: AudioManagementConfig = AudioManagementConfig()
1070
+ settings: SettingsManagementConfig = SettingsManagementConfig()
1071
+
1072
+
1073
+ class TotalConfig(BaseModel):
1074
+ """
1075
+ All configuration settings for the Ultimate RVC app.
1076
+
1077
+ Attributes
1078
+ ----------
1079
+ song : TotalSongGenerationConfig
1080
+ Configuration settings for song generation tabs.
1081
+ speech : TotalSpeechGenerationConfig
1082
+ Configuration settings for speech generation tabs.
1083
+ training : TotalTrainingConfig
1084
+ Configuration settings for training tabs.
1085
+ management : TotalManagementConfig
1086
+ Configuration settings for management tabs.
1087
+
1088
+ """
1089
+
1090
+ song: TotalSongGenerationConfig = TotalSongGenerationConfig()
1091
+ speech: TotalSpeechGenerationConfig = TotalSpeechGenerationConfig()
1092
+ training: TotalTrainingConfig = TotalTrainingConfig()
1093
+ management: TotalManagementConfig = TotalManagementConfig()
1094
+
1095
+ @cached_property
1096
+ def all(self) -> list[AnyComponentConfig]:
1097
+ """
1098
+ Recursively collect those component configuration models nested
1099
+ within the current model instance, which have values that are
1100
+ not excluded.
1101
+
1102
+ Returns
1103
+ -------
1104
+ list[AnyComponentConfig]
1105
+ A list of component configuration models found within the
1106
+ current model instance, which have values that are not
1107
+ excluded.
1108
+
1109
+ """
1110
+
1111
+ def _collect(model: BaseModel) -> list[AnyComponentConfig]:
1112
+ component_configs: list[Any] = []
1113
+ for _, value in model:
1114
+ if isinstance(value, ComponentConfig):
1115
+ if not value.exclude_value:
1116
+ component_configs.append(value)
1117
+ elif isinstance(value, BaseModel):
1118
+ component_configs.extend(_collect(value))
1119
+ return component_configs
1120
+
1121
+ return _collect(self)
1122
+
1123
+
1124
+
1125
+
1126
+
1127
+
1128
  def render_app() -> gr.Blocks:
1129
  """
1130
  Render the Ultimate RVC web application.
 
1147
  cache_delete_cutoff = 86400 # and delete files older than 24 hours
1148
 
1149
  with gr.Blocks(
1150
+ title="Redzone-6 Audio Playground",
1151
  theme=gr.Theme.load(str(Path(__file__).parent / "config/theme.json")),
1152
  css=css,
1153
  delete_cache=(cache_delete_frequency, cache_delete_cutoff),
1154
  ) as app:
 
1155
  for component_config in [
1156
  total_config.song.one_click.voice_model,
1157
  total_config.song.one_click.cached_song,
 
1189
  ]:
1190
  component_config.instantiate()
1191
  # main tab
1192
+ # with gr.Tab("Generate", elem_id="generate-tab"):
1193
+ with gr.Tab("RCV Suite" elem_id="generate-tab"):
1194
+ render_song_cover_one_click_tab(total_config, cookiefile)
1195
+ render_song_cover_multi_step_tab(total_config, cookiefile)
1196
+ with gr.Tab("Speech" elem_id="generate-tab"):
1197
+ render_speech_one_click_tab(total_config)
1198
+ render_speech_multi_step_tab(total_config)
1199
+ with gr.Tab("Configuration", elem_id="settings-tab"):
1200
+ with gr.Tab("Models", elem_id="manage-tab"):
1201
+ render_models_tab(total_config)
1202
+ with gr.Tab("Settings", elem_id="settings-tab"):
1203
+ render_settings_tab(total_config)
1204
+ # with gr.Tab("Audio", elem_id="audio-tab"):
1205
+ render_audio_tab(total_config)
1206
 
1207
  app.load(
1208
  _init_dropdowns,
 
1335
  "-h",
1336
  help="The hostname that the server will use.",
1337
  ),
1338
+ ] = "0.0.0.0",
1339
  listen_port: Annotated[
1340
  int | None,
1341
  typer.Option(
 
1355
  """Run the Ultimate RVC web application."""
1356
  os.environ["GRADIO_TEMP_DIR"] = str(TEMP_DIR)
1357
  gr.set_static_paths([MODELS_DIR, AUDIO_DIR])
1358
+ # app.queue()
1359
  app.launch(
1360
+ server_name=listen_host,
 
1361
  server_port=listen_port,
1362
  ssr_mode=ssr_mode,
1363
  )
1364
 
1365
 
1366
+ load_config("default", TotalConfig)
1367
+ load_config("theme", TotalConfig)
1368
  if __name__ == "__main__":
1369
  app_wrapper()