Spaces:
Running
Running
app.py
CHANGED
|
@@ -34,8 +34,6 @@ from ultimate_rvc.web.config.tab import (SongGenerationConfig,
|
|
| 34 |
TrainingConfig)
|
| 35 |
from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
|
| 36 |
render as render_song_cover_multi_step_tab
|
| 37 |
-
from ultimate_rvc.web.tabs.generate.song_cover.one_click_generation import \
|
| 38 |
-
render as render_song_cover_one_click_tab
|
| 39 |
from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
|
| 40 |
render as render_speech_multi_step_tab
|
| 41 |
from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
|
|
@@ -46,7 +44,44 @@ from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
|
|
| 46 |
|
| 47 |
if TYPE_CHECKING:
|
| 48 |
import gradio as gr
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
type StrPath = str | PathLike[str]
|
| 52 |
|
|
@@ -306,32 +341,6 @@ class SeparationModel(StrEnum):
|
|
| 306 |
UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
|
| 307 |
REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
|
| 308 |
UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
|
| 309 |
-
UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
|
| 310 |
-
Kim_Vocal_1 = "Kim_Vocal_1.onnx"
|
| 311 |
-
Kim_Vocal_2 = "Kim_Vocal_2.onnx"
|
| 312 |
-
Kim_Inst = "Kim_Inst.onnx"
|
| 313 |
-
UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
|
| 314 |
-
kuielab_a_vocals = "kuielab_a_vocals.onnx"
|
| 315 |
-
kuielab_b_vocals = "kuielab_b_vocals.onnx"
|
| 316 |
-
kuielab_a_drums = "kuielab_a_drums.onnx"
|
| 317 |
-
kuielab_b_drums = "kuielab_b_drums.onnx"
|
| 318 |
-
kuielab_a_bass = "kuielab_a_bass.onnx"
|
| 319 |
-
kuielab_b_bass = "kuielab_b_bass.onnx"
|
| 320 |
-
kuielab_a_other = "kuielab_a_other.onnx"
|
| 321 |
-
kuielab_b_other = "kuielab_b_other.onnx"
|
| 322 |
-
MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
|
| 323 |
-
UVR_DeNoise = "UVR-DeNoise.pth"
|
| 324 |
-
UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
class SeparationModel2(StrEnum):
|
| 328 |
-
"""Enumeration of audio separation models."""
|
| 329 |
-
|
| 330 |
-
UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
|
| 331 |
-
UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
|
| 332 |
-
REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
|
| 333 |
-
UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
|
| 334 |
-
UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
|
| 335 |
Kim_Vocal_1 = "Kim_Vocal_1.onnx"
|
| 336 |
Kim_Vocal_2 = "Kim_Vocal_2.onnx"
|
| 337 |
Kim_Inst = "Kim_Inst.onnx"
|
|
@@ -662,8 +671,8 @@ class MultiStepSongGenerationConfig(SongGenerationConfig):
|
|
| 662 |
separation_model: DropdownConfig = DropdownConfig(
|
| 663 |
label="Separation model",
|
| 664 |
info="The model to use for audio separation.",
|
| 665 |
-
value=SeparationModel.
|
| 666 |
-
choices=list(
|
| 667 |
)
|
| 668 |
segment_size: RadioConfig = RadioConfig(
|
| 669 |
label="Segment size",
|
|
@@ -1915,6 +1924,231 @@ class SpeechGenerationConfig(GenerationConfig):
|
|
| 1915 |
total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
|
| 1916 |
|
| 1917 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1918 |
def render_app() -> gr.Blocks:
|
| 1919 |
"""
|
| 1920 |
Render the Ultimate RVC web application.
|
|
@@ -2099,6 +2333,360 @@ def _init_dropdowns() -> list[gr.Dropdown]:
|
|
| 2099 |
]
|
| 2100 |
|
| 2101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2102 |
app = render_app()
|
| 2103 |
app_wrapper = typer.Typer()
|
| 2104 |
|
|
|
|
| 34 |
TrainingConfig)
|
| 35 |
from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
|
| 36 |
render as render_song_cover_multi_step_tab
|
|
|
|
|
|
|
| 37 |
from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
|
| 38 |
render as render_speech_multi_step_tab
|
| 39 |
from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
|
|
|
|
| 44 |
|
| 45 |
if TYPE_CHECKING:
|
| 46 |
import gradio as gr
|
| 47 |
+
from typing import TYPE_CHECKING
|
| 48 |
|
| 49 |
+
from functools import partial
|
| 50 |
+
|
| 51 |
+
import gradio as gr
|
| 52 |
+
from ultimate_rvc.core.common import (INTERMEDIATE_AUDIO_BASE_DIR,
|
| 53 |
+
OUTPUT_AUDIO_DIR, copy_file_safe,
|
| 54 |
+
display_progress, get_file_hash,
|
| 55 |
+
json_dump, json_load, validate_model,
|
| 56 |
+
validate_url)
|
| 57 |
+
from ultimate_rvc.core.exceptions import (Entity, InvalidLocationError,
|
| 58 |
+
Location, NotFoundError,
|
| 59 |
+
NotProvidedError, UIMessage,
|
| 60 |
+
YoutubeUrlError)
|
| 61 |
+
from ultimate_rvc.core.generate.common import (convert, get_unique_base_path,
|
| 62 |
+
mix_audio,
|
| 63 |
+
validate_audio_dir_exists,
|
| 64 |
+
validate_audio_file_exists,
|
| 65 |
+
wavify)
|
| 66 |
+
from ultimate_rvc.core.generate.song_cover import (get_named_song_dirs,
|
| 67 |
+
get_song_cover_name,
|
| 68 |
+
run_pipeline)
|
| 69 |
+
from ultimate_rvc.core.generate.typing_extra import (EffectedVocalsMetaData,
|
| 70 |
+
FileMetaData,
|
| 71 |
+
MixedAudioType,
|
| 72 |
+
PitchShiftMetaData,
|
| 73 |
+
RVCAudioMetaData,
|
| 74 |
+
SeparatedAudioMetaData,
|
| 75 |
+
SongSourceType)
|
| 76 |
+
from ultimate_rvc.core.manage.audio import get_saved_output_audio
|
| 77 |
+
from ultimate_rvc.typing_extra import EmbedderModel
|
| 78 |
+
from ultimate_rvc.web.common import (PROGRESS_BAR, exception_harness,
|
| 79 |
+
toggle_intermediate_audio,
|
| 80 |
+
toggle_visibility,
|
| 81 |
+
toggle_visible_component,
|
| 82 |
+
update_dropdowns, update_output_name,
|
| 83 |
+
update_value)
|
| 84 |
+
from ultimate_rvc.web.typing_extra import ConcurrencyId
|
| 85 |
|
| 86 |
type StrPath = str | PathLike[str]
|
| 87 |
|
|
|
|
| 341 |
UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
|
| 342 |
REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
|
| 343 |
UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
Kim_Vocal_1 = "Kim_Vocal_1.onnx"
|
| 345 |
Kim_Vocal_2 = "Kim_Vocal_2.onnx"
|
| 346 |
Kim_Inst = "Kim_Inst.onnx"
|
|
|
|
| 671 |
separation_model: DropdownConfig = DropdownConfig(
|
| 672 |
label="Separation model",
|
| 673 |
info="The model to use for audio separation.",
|
| 674 |
+
value=SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
|
| 675 |
+
choices=list(SeparationModel),
|
| 676 |
)
|
| 677 |
segment_size: RadioConfig = RadioConfig(
|
| 678 |
label="Segment size",
|
|
|
|
| 1924 |
total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
|
| 1925 |
|
| 1926 |
|
| 1927 |
+
def run_newpipeline(
|
| 1928 |
+
source: str,
|
| 1929 |
+
model_name: str,
|
| 1930 |
+
n_octaves: int = 0,
|
| 1931 |
+
n_semitones: int = 0,
|
| 1932 |
+
f0_methods: Sequence[F0Method] | None = None,
|
| 1933 |
+
index_rate: float = 0.3,
|
| 1934 |
+
rms_mix_rate: float = 1.0,
|
| 1935 |
+
protect_rate: float = 0.33,
|
| 1936 |
+
hop_length: int = 128,
|
| 1937 |
+
split_vocals: bool = False,
|
| 1938 |
+
autotune_vocals: bool = False,
|
| 1939 |
+
autotune_strength: float = 1.0,
|
| 1940 |
+
clean_vocals: bool = False,
|
| 1941 |
+
clean_strength: float = 0.7,
|
| 1942 |
+
embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
|
| 1943 |
+
custom_embedder_model: str | None = None,
|
| 1944 |
+
sid: int = 0,
|
| 1945 |
+
room_size: float = 0.15,
|
| 1946 |
+
wet_level: float = 0.2,
|
| 1947 |
+
dry_level: float = 0.8,
|
| 1948 |
+
damping: float = 0.7,
|
| 1949 |
+
main_gain: int = 0,
|
| 1950 |
+
inst_gain: int = 0,
|
| 1951 |
+
backup_gain: int = 0,
|
| 1952 |
+
output_sr: int = 44100,
|
| 1953 |
+
output_format: AudioExt = AudioExt.MP3,
|
| 1954 |
+
output_name: str | None = None,
|
| 1955 |
+
cookiefile: StrPath | None = None,
|
| 1956 |
+
progress_bar: gr.Progress | None = None,
|
| 1957 |
+
) -> tuple[Path, ...]:
|
| 1958 |
+
"""
|
| 1959 |
+
Run the song cover generation pipeline.
|
| 1960 |
+
|
| 1961 |
+
Parameters
|
| 1962 |
+
----------
|
| 1963 |
+
source : str
|
| 1964 |
+
A Youtube URL, the path to a local audio file or the path to a
|
| 1965 |
+
song directory.
|
| 1966 |
+
model_name : str
|
| 1967 |
+
The name of the voice model to use for vocal conversion.
|
| 1968 |
+
n_octaves : int, default=0
|
| 1969 |
+
The number of octaves to pitch-shift the converted vocals by.
|
| 1970 |
+
n_semitones : int, default=0
|
| 1971 |
+
The number of semi-tones to pitch-shift the converted vocals,
|
| 1972 |
+
instrumentals, and backup vocals by.
|
| 1973 |
+
f0_methods : Sequence[F0Method], optional
|
| 1974 |
+
The methods to use for pitch extraction during vocal
|
| 1975 |
+
conversion. If None, the method used is rmvpe.
|
| 1976 |
+
index_rate : float, default=0.3
|
| 1977 |
+
The influence of the index file on the vocal conversion.
|
| 1978 |
+
rms_mix_rate : float, default=1.0
|
| 1979 |
+
The blending rate of the volume envelope of the converted
|
| 1980 |
+
vocals.
|
| 1981 |
+
protect_rate : float, default=0.33
|
| 1982 |
+
The protect rate for consonants and breathing sounds during
|
| 1983 |
+
vocal conversion.
|
| 1984 |
+
hop_length : int, default=128
|
| 1985 |
+
The hop length to use for crepe-based pitch detection.
|
| 1986 |
+
split_vocals : bool, default=False
|
| 1987 |
+
Whether to perform audio splitting before converting the main
|
| 1988 |
+
vocals.
|
| 1989 |
+
autotune_vocals : bool, default=False
|
| 1990 |
+
Whether to apply autotune to the converted vocals.
|
| 1991 |
+
autotune_strength : float, default=1.0
|
| 1992 |
+
The strength of the autotune to apply to the converted vocals.
|
| 1993 |
+
clean_vocals : bool, default=False
|
| 1994 |
+
Whether to clean the converted vocals.
|
| 1995 |
+
clean_strength : float, default=0.7
|
| 1996 |
+
The intensity of the cleaning to apply to the converted vocals.
|
| 1997 |
+
embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
|
| 1998 |
+
The model to use for generating speaker embeddings during vocal
|
| 1999 |
+
conversion.
|
| 2000 |
+
custom_embedder_model : StrPath, optional
|
| 2001 |
+
The name of a custom embedder model to use for generating
|
| 2002 |
+
speaker embeddings during vocal conversion.
|
| 2003 |
+
sid : int, default=0
|
| 2004 |
+
The speaker id to use for multi-speaker models during vocal
|
| 2005 |
+
conversion.
|
| 2006 |
+
room_size : float, default=0.15
|
| 2007 |
+
The room size of the reverb effect to apply to the converted
|
| 2008 |
+
vocals.
|
| 2009 |
+
wet_level : float, default=0.2
|
| 2010 |
+
The wetness level of the reverb effect to apply to the converted
|
| 2011 |
+
vocals.
|
| 2012 |
+
dry_level : float, default=0.8
|
| 2013 |
+
The dryness level of the reverb effect to apply to the converted
|
| 2014 |
+
vocals.
|
| 2015 |
+
damping : float, default=0.7
|
| 2016 |
+
The damping of the reverb effect to apply to the converted
|
| 2017 |
+
vocals.
|
| 2018 |
+
main_gain : int, default=0
|
| 2019 |
+
The gain to apply to the post-processed vocals.
|
| 2020 |
+
inst_gain : int, default=0
|
| 2021 |
+
The gain to apply to the pitch-shifted instrumentals.
|
| 2022 |
+
backup_gain : int, default=0
|
| 2023 |
+
The gain to apply to the pitch-shifted backup vocals.
|
| 2024 |
+
output_sr : int, default=44100
|
| 2025 |
+
The sample rate of the song cover.
|
| 2026 |
+
output_format : AudioExt, default=AudioExt.MP3
|
| 2027 |
+
The audio format of the song cover.
|
| 2028 |
+
output_name : str, optional
|
| 2029 |
+
The name of the song cover.
|
| 2030 |
+
cookiefile : StrPath, optional
|
| 2031 |
+
The path to a file containing cookies to use when downloading
|
| 2032 |
+
audio from Youtube.
|
| 2033 |
+
progress_bar : gr.Progress, optional
|
| 2034 |
+
Gradio progress bar to update.
|
| 2035 |
+
|
| 2036 |
+
Returns
|
| 2037 |
+
-------
|
| 2038 |
+
tuple[Path,...]
|
| 2039 |
+
The path to the generated song cover and the paths to any
|
| 2040 |
+
intermediate audio files that were generated.
|
| 2041 |
+
|
| 2042 |
+
"""
|
| 2043 |
+
validate_model(model_name, Entity.VOICE_MODEL)
|
| 2044 |
+
if embedder_model == EmbedderModel.CUSTOM:
|
| 2045 |
+
validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
|
| 2046 |
+
display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
|
| 2047 |
+
song, song_dir = retrieve_song(source, cookiefile=cookiefile)
|
| 2048 |
+
display_progress("[~] Separating vocals from instrumentals...", 1 / 9, progress_bar)
|
| 2049 |
+
vocals_track, instrumentals_track = separate_audio(
|
| 2050 |
+
song,
|
| 2051 |
+
song_dir,
|
| 2052 |
+
SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
|
| 2053 |
+
SegmentSize.SEG_2048,
|
| 2054 |
+
)
|
| 2055 |
+
display_progress(
|
| 2056 |
+
"[~] Separating main vocals from backup vocals...",
|
| 2057 |
+
2 / 9,
|
| 2058 |
+
progress_bar,
|
| 2059 |
+
)
|
| 2060 |
+
backup_vocals_track, main_vocals_track = separate_audio(
|
| 2061 |
+
vocals_track,
|
| 2062 |
+
song_dir,
|
| 2063 |
+
SeparationModel.UVR_MDX_NET_KARA_2,
|
| 2064 |
+
SegmentSize.SEG_2048,
|
| 2065 |
+
)
|
| 2066 |
+
display_progress("[~] De-noising vocals...", 3 / 9, progress_bar)
|
| 2067 |
+
noise_track, clean_track = separate_audio(
|
| 2068 |
+
clean_track,
|
| 2069 |
+
song_dir,
|
| 2070 |
+
SeparationModel.UVR_DeNoise,
|
| 2071 |
+
SegmentSize.SEG_2048,
|
| 2072 |
+
)
|
| 2073 |
+
display_progress("[~] De-reverbing vocals...", 4 / 9, progress_bar)
|
| 2074 |
+
reverb_track, vocals_dereverb_track = separate_audio(
|
| 2075 |
+
main_vocals_track,
|
| 2076 |
+
song_dir,
|
| 2077 |
+
SeparationModel.UVR_DeEcho_DeReverb,
|
| 2078 |
+
SegmentSize.SEG_2048,
|
| 2079 |
+
)
|
| 2080 |
+
|
| 2081 |
+
display_progress("[~] Converting vocals...", 5 / 9, progress_bar)
|
| 2082 |
+
converted_vocals_track = convert(
|
| 2083 |
+
audio_track=vocals_dereverb_track,
|
| 2084 |
+
directory=song_dir,
|
| 2085 |
+
model_name=model_name,
|
| 2086 |
+
n_octaves=n_octaves,
|
| 2087 |
+
n_semitones=n_semitones,
|
| 2088 |
+
f0_methods=f0_methods,
|
| 2089 |
+
index_rate=index_rate,
|
| 2090 |
+
rms_mix_rate=rms_mix_rate,
|
| 2091 |
+
protect_rate=protect_rate,
|
| 2092 |
+
hop_length=hop_length,
|
| 2093 |
+
split_audio=split_vocals,
|
| 2094 |
+
autotune_audio=autotune_vocals,
|
| 2095 |
+
autotune_strength=autotune_strength,
|
| 2096 |
+
clean_audio=clean_vocals,
|
| 2097 |
+
clean_strength=clean_strength,
|
| 2098 |
+
embedder_model=embedder_model,
|
| 2099 |
+
custom_embedder_model=custom_embedder_model,
|
| 2100 |
+
sid=sid,
|
| 2101 |
+
content_type=RVCContentType.VOCALS,
|
| 2102 |
+
)
|
| 2103 |
+
display_progress("[~] Post-processing vocals...", 6 / 9, progress_bar)
|
| 2104 |
+
effected_vocals_track = postprocess(
|
| 2105 |
+
converted_vocals_track,
|
| 2106 |
+
song_dir,
|
| 2107 |
+
room_size,
|
| 2108 |
+
wet_level,
|
| 2109 |
+
dry_level,
|
| 2110 |
+
damping,
|
| 2111 |
+
)
|
| 2112 |
+
display_progress("[~] Pitch-shifting instrumentals...", 7 / 9, progress_bar)
|
| 2113 |
+
shifted_instrumentals_track = pitch_shift(
|
| 2114 |
+
instrumentals_track,
|
| 2115 |
+
song_dir,
|
| 2116 |
+
n_semitones,
|
| 2117 |
+
)
|
| 2118 |
+
display_progress("[~] Pitch-shifting backup vocals...", 8 / 9, progress_bar)
|
| 2119 |
+
shifted_backup_vocals_track = pitch_shift(
|
| 2120 |
+
backup_vocals_track,
|
| 2121 |
+
song_dir,
|
| 2122 |
+
n_semitones,
|
| 2123 |
+
)
|
| 2124 |
+
|
| 2125 |
+
song_cover = mix_song(
|
| 2126 |
+
[
|
| 2127 |
+
(effected_vocals_track, main_gain),
|
| 2128 |
+
(shifted_instrumentals_track, inst_gain),
|
| 2129 |
+
(shifted_backup_vocals_track, backup_gain),
|
| 2130 |
+
],
|
| 2131 |
+
song_dir,
|
| 2132 |
+
output_sr,
|
| 2133 |
+
output_format,
|
| 2134 |
+
output_name,
|
| 2135 |
+
)
|
| 2136 |
+
return (
|
| 2137 |
+
song_cover,
|
| 2138 |
+
song,
|
| 2139 |
+
vocals_track,
|
| 2140 |
+
instrumentals_track,
|
| 2141 |
+
main_vocals_track,
|
| 2142 |
+
backup_vocals_track,
|
| 2143 |
+
vocals_dereverb_track,
|
| 2144 |
+
reverb_track,
|
| 2145 |
+
converted_vocals_track,
|
| 2146 |
+
effected_vocals_track,
|
| 2147 |
+
shifted_instrumentals_track,
|
| 2148 |
+
shifted_backup_vocals_track,
|
| 2149 |
+
)
|
| 2150 |
+
|
| 2151 |
+
|
| 2152 |
def render_app() -> gr.Blocks:
|
| 2153 |
"""
|
| 2154 |
Render the Ultimate RVC web application.
|
|
|
|
| 2333 |
]
|
| 2334 |
|
| 2335 |
|
| 2336 |
+
def render_song_cover_one_click_tab(
|
| 2337 |
+
total_config: TotalConfig, cookiefile: str | None = None
|
| 2338 |
+
) -> None:
|
| 2339 |
+
"""
|
| 2340 |
+
Render "Generate song covers - One-click generation" tab.
|
| 2341 |
+
|
| 2342 |
+
Parameters
|
| 2343 |
+
----------
|
| 2344 |
+
total_config : TotalConfig
|
| 2345 |
+
Model containing all component configuration settings for the
|
| 2346 |
+
Ultimate RVC web UI.
|
| 2347 |
+
cookiefile : str, optional
|
| 2348 |
+
The path to a file containing cookies to use when downloading
|
| 2349 |
+
audio from Youtube.
|
| 2350 |
+
|
| 2351 |
+
"""
|
| 2352 |
+
with gr.Tab("One-click"):
|
| 2353 |
+
tab_config = total_config.song.one_click
|
| 2354 |
+
_render_input(tab_config)
|
| 2355 |
+
with gr.Accordion("Options", open=False):
|
| 2356 |
+
_render_main_options(tab_config)
|
| 2357 |
+
_render_conversion_options(tab_config)
|
| 2358 |
+
_render_mixing_options(tab_config)
|
| 2359 |
+
_render_output_options(tab_config)
|
| 2360 |
+
_render_intermediate_audio(tab_config)
|
| 2361 |
+
|
| 2362 |
+
with gr.Row(equal_height=True):
|
| 2363 |
+
reset_btn = gr.Button(value="Reset options", scale=2)
|
| 2364 |
+
generate_btn = gr.Button("Generate", scale=2, variant="primary")
|
| 2365 |
+
song_cover = gr.Audio(
|
| 2366 |
+
label="Song cover",
|
| 2367 |
+
scale=3,
|
| 2368 |
+
waveform_options=gr.WaveformOptions(show_recording_waveform=False),
|
| 2369 |
+
)
|
| 2370 |
+
song_dirs = total_config.song.multi_step.song_dirs.all
|
| 2371 |
+
generate_btn.click(
|
| 2372 |
+
partial(
|
| 2373 |
+
exception_harness(
|
| 2374 |
+
run_pipeline,
|
| 2375 |
+
info_msg="Song cover generated successfully!",
|
| 2376 |
+
),
|
| 2377 |
+
cookiefile=cookiefile,
|
| 2378 |
+
progress_bar=PROGRESS_BAR,
|
| 2379 |
+
),
|
| 2380 |
+
inputs=[
|
| 2381 |
+
tab_config.source.instance,
|
| 2382 |
+
tab_config.voice_model.instance,
|
| 2383 |
+
tab_config.n_octaves.instance,
|
| 2384 |
+
tab_config.n_semitones.instance,
|
| 2385 |
+
tab_config.f0_methods.instance,
|
| 2386 |
+
tab_config.index_rate.instance,
|
| 2387 |
+
tab_config.rms_mix_rate.instance,
|
| 2388 |
+
tab_config.protect_rate.instance,
|
| 2389 |
+
tab_config.hop_length.instance,
|
| 2390 |
+
tab_config.split_voice.instance,
|
| 2391 |
+
tab_config.autotune_voice.instance,
|
| 2392 |
+
tab_config.autotune_strength.instance,
|
| 2393 |
+
tab_config.clean_voice.instance,
|
| 2394 |
+
tab_config.clean_strength.instance,
|
| 2395 |
+
tab_config.embedder_model.instance,
|
| 2396 |
+
tab_config.custom_embedder_model.instance,
|
| 2397 |
+
tab_config.sid.instance,
|
| 2398 |
+
tab_config.room_size.instance,
|
| 2399 |
+
tab_config.wet_level.instance,
|
| 2400 |
+
tab_config.dry_level.instance,
|
| 2401 |
+
tab_config.damping.instance,
|
| 2402 |
+
tab_config.main_gain.instance,
|
| 2403 |
+
tab_config.inst_gain.instance,
|
| 2404 |
+
tab_config.backup_gain.instance,
|
| 2405 |
+
tab_config.output_sr.instance,
|
| 2406 |
+
tab_config.output_format.instance,
|
| 2407 |
+
tab_config.output_name.instance,
|
| 2408 |
+
],
|
| 2409 |
+
outputs=[song_cover, *tab_config.intermediate_audio.all],
|
| 2410 |
+
concurrency_limit=4,
|
| 2411 |
+
concurrency_id=ConcurrencyId.GPU,
|
| 2412 |
+
).success(
|
| 2413 |
+
partial(update_dropdowns, get_named_song_dirs, 3 + len(song_dirs), [], [2]),
|
| 2414 |
+
outputs=[
|
| 2415 |
+
total_config.song.one_click.cached_song.instance,
|
| 2416 |
+
total_config.song.multi_step.cached_song.instance,
|
| 2417 |
+
total_config.management.audio.intermediate.instance,
|
| 2418 |
+
*song_dirs,
|
| 2419 |
+
],
|
| 2420 |
+
show_progress="hidden",
|
| 2421 |
+
).then(
|
| 2422 |
+
partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
|
| 2423 |
+
outputs=total_config.management.audio.output.instance,
|
| 2424 |
+
show_progress="hidden",
|
| 2425 |
+
)
|
| 2426 |
+
reset_btn.click(
|
| 2427 |
+
lambda: [
|
| 2428 |
+
tab_config.n_octaves.value,
|
| 2429 |
+
tab_config.n_semitones.value,
|
| 2430 |
+
tab_config.f0_methods.value,
|
| 2431 |
+
tab_config.index_rate.value,
|
| 2432 |
+
tab_config.rms_mix_rate.value,
|
| 2433 |
+
tab_config.protect_rate.value,
|
| 2434 |
+
tab_config.hop_length.value,
|
| 2435 |
+
tab_config.split_voice.value,
|
| 2436 |
+
tab_config.autotune_voice.value,
|
| 2437 |
+
tab_config.autotune_strength.value,
|
| 2438 |
+
tab_config.clean_voice.value,
|
| 2439 |
+
tab_config.clean_strength.value,
|
| 2440 |
+
tab_config.embedder_model.value,
|
| 2441 |
+
tab_config.sid.value,
|
| 2442 |
+
tab_config.room_size.value,
|
| 2443 |
+
tab_config.wet_level.value,
|
| 2444 |
+
tab_config.dry_level.value,
|
| 2445 |
+
tab_config.damping.value,
|
| 2446 |
+
tab_config.main_gain.value,
|
| 2447 |
+
tab_config.inst_gain.value,
|
| 2448 |
+
tab_config.backup_gain.value,
|
| 2449 |
+
tab_config.output_sr.value,
|
| 2450 |
+
tab_config.output_format.value,
|
| 2451 |
+
tab_config.show_intermediate_audio.value,
|
| 2452 |
+
],
|
| 2453 |
+
outputs=[
|
| 2454 |
+
tab_config.n_octaves.instance,
|
| 2455 |
+
tab_config.n_semitones.instance,
|
| 2456 |
+
tab_config.f0_methods.instance,
|
| 2457 |
+
tab_config.index_rate.instance,
|
| 2458 |
+
tab_config.rms_mix_rate.instance,
|
| 2459 |
+
tab_config.protect_rate.instance,
|
| 2460 |
+
tab_config.hop_length.instance,
|
| 2461 |
+
tab_config.split_voice.instance,
|
| 2462 |
+
tab_config.autotune_voice.instance,
|
| 2463 |
+
tab_config.autotune_strength.instance,
|
| 2464 |
+
tab_config.clean_voice.instance,
|
| 2465 |
+
tab_config.clean_strength.instance,
|
| 2466 |
+
tab_config.embedder_model.instance,
|
| 2467 |
+
tab_config.sid.instance,
|
| 2468 |
+
tab_config.room_size.instance,
|
| 2469 |
+
tab_config.wet_level.instance,
|
| 2470 |
+
tab_config.dry_level.instance,
|
| 2471 |
+
tab_config.damping.instance,
|
| 2472 |
+
tab_config.main_gain.instance,
|
| 2473 |
+
tab_config.inst_gain.instance,
|
| 2474 |
+
tab_config.backup_gain.instance,
|
| 2475 |
+
tab_config.output_sr.instance,
|
| 2476 |
+
tab_config.output_format.instance,
|
| 2477 |
+
tab_config.show_intermediate_audio.instance,
|
| 2478 |
+
],
|
| 2479 |
+
show_progress="hidden",
|
| 2480 |
+
)
|
| 2481 |
+
|
| 2482 |
+
|
| 2483 |
+
def _render_input(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2484 |
+
with gr.Row():
|
| 2485 |
+
with gr.Column():
|
| 2486 |
+
tab_config.source_type.instantiate()
|
| 2487 |
+
with gr.Column():
|
| 2488 |
+
tab_config.source.instantiate()
|
| 2489 |
+
local_file = gr.Audio(
|
| 2490 |
+
label="Source",
|
| 2491 |
+
type="filepath",
|
| 2492 |
+
visible=False,
|
| 2493 |
+
waveform_options=gr.WaveformOptions(show_recording_waveform=False),
|
| 2494 |
+
)
|
| 2495 |
+
tab_config.cached_song.instance.render()
|
| 2496 |
+
tab_config.source_type.instance.input(
|
| 2497 |
+
partial(toggle_visible_component, 3),
|
| 2498 |
+
inputs=tab_config.source_type.instance,
|
| 2499 |
+
outputs=[
|
| 2500 |
+
tab_config.source.instance,
|
| 2501 |
+
local_file,
|
| 2502 |
+
tab_config.cached_song.instance,
|
| 2503 |
+
],
|
| 2504 |
+
show_progress="hidden",
|
| 2505 |
+
)
|
| 2506 |
+
|
| 2507 |
+
local_file.change(
|
| 2508 |
+
update_value,
|
| 2509 |
+
inputs=local_file,
|
| 2510 |
+
outputs=tab_config.source.instance,
|
| 2511 |
+
show_progress="hidden",
|
| 2512 |
+
)
|
| 2513 |
+
tab_config.cached_song.instance.input(
|
| 2514 |
+
update_value,
|
| 2515 |
+
inputs=tab_config.cached_song.instance,
|
| 2516 |
+
outputs=tab_config.source.instance,
|
| 2517 |
+
show_progress="hidden",
|
| 2518 |
+
)
|
| 2519 |
+
|
| 2520 |
+
with gr.Row():
|
| 2521 |
+
tab_config.voice_model.instance.render()
|
| 2522 |
+
|
| 2523 |
+
|
| 2524 |
+
def _render_main_options(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2525 |
+
with gr.Row():
|
| 2526 |
+
tab_config.n_octaves.instantiate()
|
| 2527 |
+
tab_config.n_semitones.instantiate()
|
| 2528 |
+
|
| 2529 |
+
|
| 2530 |
+
def _render_conversion_options(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2531 |
+
with gr.Accordion("Vocal conversion", open=True):
|
| 2532 |
+
gr.Markdown("")
|
| 2533 |
+
with gr.Accordion("Voice synthesis", open=True):
|
| 2534 |
+
with gr.Row():
|
| 2535 |
+
tab_config.f0_methods.instantiate()
|
| 2536 |
+
tab_config.index_rate.instantiate()
|
| 2537 |
+
with gr.Row():
|
| 2538 |
+
tab_config.rms_mix_rate.instantiate()
|
| 2539 |
+
tab_config.protect_rate.instantiate()
|
| 2540 |
+
tab_config.hop_length.instantiate()
|
| 2541 |
+
with gr.Accordion("Vocal enrichment", open=True):
|
| 2542 |
+
with gr.Row():
|
| 2543 |
+
with gr.Column():
|
| 2544 |
+
tab_config.split_voice.instantiate()
|
| 2545 |
+
with gr.Column():
|
| 2546 |
+
tab_config.autotune_voice.instantiate()
|
| 2547 |
+
tab_config.autotune_strength.instantiate()
|
| 2548 |
+
with gr.Column():
|
| 2549 |
+
tab_config.clean_voice.instantiate()
|
| 2550 |
+
tab_config.clean_strength.instantiate()
|
| 2551 |
+
tab_config.autotune_voice.instance.change(
|
| 2552 |
+
partial(toggle_visibility, targets={True}),
|
| 2553 |
+
inputs=tab_config.autotune_voice.instance,
|
| 2554 |
+
outputs=tab_config.autotune_strength.instance,
|
| 2555 |
+
show_progress="hidden",
|
| 2556 |
+
)
|
| 2557 |
+
tab_config.clean_voice.instance.change(
|
| 2558 |
+
partial(toggle_visibility, targets={True}),
|
| 2559 |
+
inputs=tab_config.clean_voice.instance,
|
| 2560 |
+
outputs=tab_config.clean_strength.instance,
|
| 2561 |
+
show_progress="hidden",
|
| 2562 |
+
)
|
| 2563 |
+
with gr.Accordion("Speaker embedding", open=True):
|
| 2564 |
+
with gr.Row():
|
| 2565 |
+
with gr.Column():
|
| 2566 |
+
tab_config.embedder_model.instantiate()
|
| 2567 |
+
tab_config.custom_embedder_model.instance.render()
|
| 2568 |
+
tab_config.sid.instantiate()
|
| 2569 |
+
tab_config.embedder_model.instance.change(
|
| 2570 |
+
partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
|
| 2571 |
+
inputs=tab_config.embedder_model.instance,
|
| 2572 |
+
outputs=tab_config.custom_embedder_model.instance,
|
| 2573 |
+
show_progress="hidden",
|
| 2574 |
+
)
|
| 2575 |
+
|
| 2576 |
+
|
| 2577 |
+
def _render_mixing_options(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2578 |
+
with gr.Accordion("Audio mixing", open=True):
|
| 2579 |
+
gr.Markdown("")
|
| 2580 |
+
with gr.Accordion("Reverb control on converted vocals", open=True):
|
| 2581 |
+
with gr.Row():
|
| 2582 |
+
tab_config.room_size.instantiate()
|
| 2583 |
+
with gr.Row():
|
| 2584 |
+
tab_config.wet_level.instantiate()
|
| 2585 |
+
tab_config.dry_level.instantiate()
|
| 2586 |
+
tab_config.damping.instantiate()
|
| 2587 |
+
|
| 2588 |
+
with gr.Accordion("Volume controls (dB)", open=True), gr.Row():
|
| 2589 |
+
tab_config.main_gain.instantiate()
|
| 2590 |
+
tab_config.inst_gain.instantiate()
|
| 2591 |
+
tab_config.backup_gain.instantiate()
|
| 2592 |
+
|
| 2593 |
+
|
| 2594 |
+
def _render_output_options(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2595 |
+
with gr.Accordion("Audio output", open=True):
|
| 2596 |
+
with gr.Row():
|
| 2597 |
+
tab_config.output_name.instantiate(
|
| 2598 |
+
value=partial(
|
| 2599 |
+
update_output_name,
|
| 2600 |
+
get_song_cover_name,
|
| 2601 |
+
True, # noqa: FBT003
|
| 2602 |
+
),
|
| 2603 |
+
inputs=[
|
| 2604 |
+
gr.State(None),
|
| 2605 |
+
tab_config.cached_song.instance,
|
| 2606 |
+
tab_config.voice_model.instance,
|
| 2607 |
+
],
|
| 2608 |
+
)
|
| 2609 |
+
tab_config.output_sr.instantiate()
|
| 2610 |
+
tab_config.output_format.instantiate()
|
| 2611 |
+
with gr.Row():
|
| 2612 |
+
tab_config.show_intermediate_audio.instantiate()
|
| 2613 |
+
|
| 2614 |
+
|
| 2615 |
+
def _render_intermediate_audio(tab_config: OneClickSongGenerationConfig) -> None:
|
| 2616 |
+
with gr.Accordion(
|
| 2617 |
+
"Intermediate audio tracks",
|
| 2618 |
+
open=False,
|
| 2619 |
+
visible=False,
|
| 2620 |
+
) as intermediate_audio_accordion:
|
| 2621 |
+
with gr.Accordion(
|
| 2622 |
+
"Step 0: song retrieval",
|
| 2623 |
+
open=False,
|
| 2624 |
+
) as song_retrieval_accordion:
|
| 2625 |
+
tab_config.intermediate_audio.song.instantiate()
|
| 2626 |
+
with (
|
| 2627 |
+
gr.Accordion(
|
| 2628 |
+
"Step 1a: vocals/instrumentals separation",
|
| 2629 |
+
open=False,
|
| 2630 |
+
) as vocals_separation_accordion,
|
| 2631 |
+
gr.Row(),
|
| 2632 |
+
):
|
| 2633 |
+
tab_config.intermediate_audio.vocals.instantiate()
|
| 2634 |
+
tab_config.intermediate_audio.instrumentals.instantiate()
|
| 2635 |
+
with (
|
| 2636 |
+
gr.Accordion(
|
| 2637 |
+
"Step 1b: main vocals/ backup vocals separation",
|
| 2638 |
+
open=False,
|
| 2639 |
+
) as main_vocals_separation_accordion,
|
| 2640 |
+
gr.Row(),
|
| 2641 |
+
):
|
| 2642 |
+
tab_config.intermediate_audio.main_vocals.instantiate()
|
| 2643 |
+
tab_config.intermediate_audio.backup_vocals.instantiate()
|
| 2644 |
+
with (
|
| 2645 |
+
gr.Accordion(
|
| 2646 |
+
"Step 1c: main vocals cleanup",
|
| 2647 |
+
open=False,
|
| 2648 |
+
) as vocal_cleanup_accordion,
|
| 2649 |
+
gr.Row(),
|
| 2650 |
+
):
|
| 2651 |
+
tab_config.intermediate_audio.main_vocals_dereverbed.instantiate()
|
| 2652 |
+
tab_config.intermediate_audio.main_vocals_reverb.instantiate()
|
| 2653 |
+
with gr.Accordion(
|
| 2654 |
+
"Step 2: conversion of main vocals",
|
| 2655 |
+
open=False,
|
| 2656 |
+
) as vocal_conversion_accordion:
|
| 2657 |
+
tab_config.intermediate_audio.converted_vocals.instantiate()
|
| 2658 |
+
with gr.Accordion(
|
| 2659 |
+
"Step 3: post-processing of converted vocals",
|
| 2660 |
+
open=False,
|
| 2661 |
+
) as vocals_postprocessing_accordion:
|
| 2662 |
+
tab_config.intermediate_audio.postprocessed_vocals.instantiate()
|
| 2663 |
+
with (
|
| 2664 |
+
gr.Accordion(
|
| 2665 |
+
"Step 4: pitch shift of background tracks",
|
| 2666 |
+
open=False,
|
| 2667 |
+
) as pitch_shift_accordion,
|
| 2668 |
+
gr.Row(),
|
| 2669 |
+
):
|
| 2670 |
+
tab_config.intermediate_audio.instrumentals_shifted.instantiate()
|
| 2671 |
+
tab_config.intermediate_audio.backup_vocals_shifted.instantiate()
|
| 2672 |
+
|
| 2673 |
+
tab_config.show_intermediate_audio.instance.change(
|
| 2674 |
+
partial(toggle_intermediate_audio, num_components=7),
|
| 2675 |
+
inputs=tab_config.show_intermediate_audio.instance,
|
| 2676 |
+
outputs=[
|
| 2677 |
+
intermediate_audio_accordion,
|
| 2678 |
+
song_retrieval_accordion,
|
| 2679 |
+
vocals_separation_accordion,
|
| 2680 |
+
main_vocals_separation_accordion,
|
| 2681 |
+
vocal_cleanup_accordion,
|
| 2682 |
+
vocal_conversion_accordion,
|
| 2683 |
+
vocals_postprocessing_accordion,
|
| 2684 |
+
pitch_shift_accordion,
|
| 2685 |
+
],
|
| 2686 |
+
show_progress="hidden",
|
| 2687 |
+
)
|
| 2688 |
+
|
| 2689 |
+
|
| 2690 |
app = render_app()
|
| 2691 |
app_wrapper = typer.Typer()
|
| 2692 |
|