Spaces:
Running
Running
| from __future__ import annotations | |
| import os | |
| import sys | |
| from enum import IntEnum, StrEnum, auto | |
| from functools import cached_property | |
| from pathlib import Path | |
| from typing import TYPE_CHECKING, Annotated, Any, TypedDict | |
| import gradio as gr | |
| import typer | |
| from huggingface_hub import snapshot_download | |
| from pydantic import BaseModel | |
| from ultimate_rvc.common import AUDIO_DIR, MODELS_DIR, TEMP_DIR | |
| from ultimate_rvc.core.generate.song_cover import get_named_song_dirs | |
| from ultimate_rvc.core.generate.speech import get_edge_tts_voice_names | |
| from ultimate_rvc.core.manage.audio import ( | |
| get_audio_datasets, | |
| get_named_audio_datasets, | |
| get_saved_output_audio, | |
| get_saved_speech_audio, | |
| ) | |
| from ultimate_rvc.core.manage.config import get_config_names, load_config | |
| from ultimate_rvc.core.manage.models import ( | |
| get_custom_embedder_model_names, | |
| get_custom_pretrained_model_names, | |
| get_training_model_names, | |
| get_voice_model_names, | |
| ) | |
| from ultimate_rvc.web.common import ( | |
| initialize_dropdowns, | |
| exception_harness, | |
| render_transfer_component, | |
| setup_transfer_event, | |
| toggle_visibility, | |
| toggle_visible_component, | |
| update_dropdowns, | |
| update_output_name, | |
| update_value, | |
| ) | |
| from ultimate_rvc.web.config.component import ( | |
| AnyComponentConfig, | |
| AudioConfig, | |
| CheckboxConfig, | |
| ComponentConfig, | |
| DropdownConfig, | |
| NumberConfig, | |
| RadioConfig, | |
| SliderConfig, | |
| TextboxConfig, | |
| ) | |
| from ultimate_rvc.web.config.tab import ( | |
| SongGenerationConfig, | |
| SpeechGenerationConfig, | |
| TrainingConfig, | |
| ) | |
| from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import ( | |
| render as render_speech_multi_step_tab, | |
| ) | |
| from ultimate_rvc.web.tabs.generate.speech.one_click_generation import ( | |
| render as render_speech_one_click_tab, | |
| ) | |
| from ultimate_rvc.web.tabs.manage.audio import render as render_audio_tab | |
| from ultimate_rvc.web.tabs.manage.models import render as render_models_tab | |
| from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab | |
| if TYPE_CHECKING: | |
| import gradio as gr | |
| from typing import TYPE_CHECKING | |
| from functools import partial | |
| import gradio as gr | |
| from ultimate_rvc.core.common import ( | |
| INTERMEDIATE_AUDIO_BASE_DIR, | |
| OUTPUT_AUDIO_DIR, | |
| copy_file_safe, | |
| display_progress, | |
| get_file_hash, | |
| json_dump, | |
| json_load, | |
| validate_model, | |
| validate_url, | |
| ) | |
| from ultimate_rvc.core.exceptions import ( | |
| Entity, | |
| InvalidLocationError, | |
| Location, | |
| NotFoundError, | |
| NotProvidedError, | |
| UIMessage, | |
| YoutubeUrlError, | |
| ) | |
| from ultimate_rvc.core.generate.common import ( | |
| convert, | |
| get_unique_base_path, | |
| mix_audio, | |
| validate_audio_dir_exists, | |
| validate_audio_file_exists, | |
| wavify, | |
| ) | |
| from ultimate_rvc.core.generate.song_cover import ( | |
| get_named_song_dirs, | |
| get_song_cover_name, | |
| mix_song, | |
| pitch_shift, | |
| postprocess, | |
| retrieve_song, | |
| separate_audio, | |
| get_named_song_dirs, | |
| get_song_cover_name, | |
| run_pipeline, | |
| ) | |
| from ultimate_rvc.core.generate.typing_extra import ( | |
| EffectedVocalsMetaData, | |
| FileMetaData, | |
| MixedAudioType, | |
| PitchShiftMetaData, | |
| RVCAudioMetaData, | |
| SeparatedAudioMetaData, | |
| ) | |
| from ultimate_rvc.core.manage.audio import get_saved_output_audio | |
| from ultimate_rvc.typing_extra import EmbedderModel | |
| from ultimate_rvc.web.common import ( | |
| PROGRESS_BAR, | |
| exception_harness, | |
| toggle_intermediate_audio, | |
| toggle_visibility, | |
| toggle_visible_component, | |
| update_dropdowns, | |
| update_output_name, | |
| update_value, | |
| ) | |
| from ultimate_rvc.web.typing_extra import ConcurrencyId | |
| type StrPath = str | PathLike[str] | |
| type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None | |
| class SegmentSize(IntEnum): | |
| """Enumeration of segment sizes for audio separation.""" | |
| SEG_64 = 64 | |
| SEG_128 = 128 | |
| SEG_256 = 256 | |
| SEG_512 = 512 | |
| SEG_1024 = 1024 | |
| SEG_2048 = 2048 | |
| SEG_4096 = 4096 | |
| class F0Method(StrEnum): | |
| """Enumeration of pitch extraction methods.""" | |
| RMVPE = "rmvpe" | |
| CREPE = "crepe" | |
| CREPE_TINY = "crepe-tiny" | |
| FCPE = "fcpe" | |
| class RVCContentType(StrEnum): | |
| """Enumeration of valid content to convert with RVC.""" | |
| VOCALS = "vocals" | |
| VOICE = "voice" | |
| SPEECH = "speech" | |
| AUDIO = "audio" | |
| class SampleRate(IntEnum): | |
| """Enumeration of supported audio sample rates.""" | |
| HZ_16000 = 16000 | |
| HZ_44100 = 44100 | |
| HZ_48000 = 48000 | |
| HZ_96000 = 96000 | |
| HZ_192000 = 192000 | |
| class AudioExt(StrEnum): | |
| """Enumeration of supported audio file formats.""" | |
| MP3 = "mp3" | |
| WAV = "wav" | |
| FLAC = "flac" | |
| OGG = "ogg" | |
| class DeviceType(StrEnum): | |
| """Enumeration of device types for training voice models.""" | |
| AUTOMATIC = "Automatic" | |
| CPU = "CPU" | |
| GPU = "GPU" | |
| class TrainingSampleRate(StrEnum): | |
| """Enumeration of sample rates for training voice models.""" | |
| HZ_32K = "32000" | |
| HZ_40K = "40000" | |
| HZ_48K = "48000" | |
| class PretrainedSampleRate(StrEnum): | |
| """Enumeration of valid sample rates for pretrained models.""" | |
| HZ_32K = "32k" | |
| HZ_40K = "40k" | |
| HZ_44K = "44k" | |
| HZ_48K = "48k" | |
| class TrainingF0Method(StrEnum): | |
| """Enumeration of pitch extraction methods for training.""" | |
| RMVPE = "rmvpe" | |
| CREPE = "crepe" | |
| CREPE_TINY = "crepe-tiny" | |
| class AudioSplitMethod(StrEnum): | |
| """ | |
| Enumeration of methods to use for splitting audio files during | |
| dataset preprocessing. | |
| """ | |
| SKIP = "Skip" | |
| SIMPLE = "Simple" | |
| AUTOMATIC = "Automatic" | |
| class Vocoder(StrEnum): | |
| """Enumeration of vocoders for training voice models.""" | |
| HIFI_GAN = "HiFi-GAN" | |
| MRF_HIFI_GAN = "MRF HiFi-GAN" | |
| REFINE_GAN = "RefineGAN" | |
| class IndexAlgorithm(StrEnum): | |
| """Enumeration of indexing algorithms for training voice models.""" | |
| AUTO = "Auto" | |
| FAISS = "Faiss" | |
| KMEANS = "KMeans" | |
| class PretrainedType(StrEnum): | |
| """ | |
| Enumeration of the possible types of pretrained models to finetune | |
| voice models on. | |
| """ | |
| NONE = "None" | |
| DEFAULT = "Default" | |
| CUSTOM = "Custom" | |
| class ConcurrencyId(StrEnum): | |
| """Enumeration of possible concurrency identifiers.""" | |
| GPU = auto() | |
| class SongSourceType(StrEnum): | |
| """The type of source providing the song to generate a cover of.""" | |
| PATH = "Local or HTTP filepath" | |
| LOCAL_FILE = "Local file" | |
| CACHED_SONG = "Cached song" | |
| class SpeechSourceType(StrEnum): | |
| """The type of source providing the text to generate speech from.""" | |
| TEXT = "Text" | |
| LOCAL_FILE = "Local file" | |
| class SongTransferOption(StrEnum): | |
| """Enumeration of possible song transfer options.""" | |
| STEP_1_AUDIO = "Step 1: stem splitting" | |
| STEP_2_VOCALS = "Step 2: vocal conversion" | |
| STEP_3_VOCALS = "Step 3: vocal effect" | |
| STEP_4_INSTRUMENTALS = "Step 4: instrumentals" | |
| STEP_4_BACKUP_VOCALS = "Step 4: backup vocals" | |
| STEP_5_MAIN_VOCALS = "Step 5: main vocals" | |
| STEP_5_INSTRUMENTALS = "Step 5: instrumentals" | |
| STEP_5_BACKUP_VOCALS = "Step 5: backup vocals" | |
| class SpeechTransferOption(StrEnum): | |
| """Enumeration of possible speech transfer options.""" | |
| STEP_2_SPEECH = "Step 2: speech conversion" | |
| STEP_3_SPEECH = "Step 3: speech effect" | |
| class ComponentVisibilityKwArgs(TypedDict, total=False): | |
| """ | |
| Keyword arguments for setting component visibility. | |
| Attributes | |
| ---------- | |
| visible : bool | |
| Whether the component should be visible. | |
| value : Any | |
| The value of the component. | |
| """ | |
| visible: bool | |
| value: Any | |
| class UpdateDropdownKwArgs(TypedDict, total=False): | |
| """ | |
| Keyword arguments for updating a dropdown component. | |
| Attributes | |
| ---------- | |
| choices : DropdownChoices | |
| The updated choices for the dropdown component. | |
| value : DropdownValue | |
| The updated value for the dropdown component. | |
| """ | |
| choices: DropdownChoices | |
| value: DropdownValue | |
| class TextBoxKwArgs(TypedDict, total=False): | |
| """ | |
| Keyword arguments for updating a textbox component. | |
| Attributes | |
| ---------- | |
| value : str | None | |
| The updated value for the textbox component. | |
| placeholder : str | None | |
| The updated placeholder for the textbox component. | |
| """ | |
| value: str | None | |
| placeholder: str | None | |
| class UpdateAudioKwArgs(TypedDict, total=False): | |
| """ | |
| Keyword arguments for updating an audio component. | |
| Attributes | |
| ---------- | |
| value : str | None | |
| The updated value for the audio component. | |
| """ | |
| value: str | None | |
| class DatasetType(StrEnum): | |
| """The type of dataset to train a voice model.""" | |
| NEW_DATASET = "Create new dataset" | |
| EXISTING_DATASET = "Use existing dataset" | |
| class EmbedderModel(StrEnum): | |
| """Enumeration of audio embedding models.""" | |
| CONTENTVEC = "contentvec" | |
| CRUSTY = "Crusty" | |
| CUSTOM = "custom" | |
| class SeparationModel(StrEnum): | |
| """Enumeration of audio separation models.""" | |
| UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx" | |
| UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx" | |
| REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx" | |
| UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx" | |
| Kim_Vocal_1 = "Kim_Vocal_1.onnx" | |
| Kim_Vocal_2 = "Kim_Vocal_2.onnx" | |
| Kim_Inst = "Kim_Inst.onnx" | |
| UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx" | |
| kuielab_a_vocals = "kuielab_a_vocals.onnx" | |
| kuielab_b_vocals = "kuielab_b_vocals.onnx" | |
| kuielab_a_drums = "kuielab_a_drums.onnx" | |
| kuielab_b_drums = "kuielab_b_drums.onnx" | |
| kuielab_a_bass = "kuielab_a_bass.onnx" | |
| kuielab_b_bass = "kuielab_b_bass.onnx" | |
| kuielab_a_other = "kuielab_a_other.onnx" | |
| kuielab_b_other = "kuielab_b_other.onnx" | |
| MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt" | |
| UVR_DeNoise = "UVR-DeNoise.pth" | |
| UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth" | |
| now_dir = os.getcwd() | |
| sys.path.append(now_dir) | |
| models_dir = "models" | |
| dump_path = os.path.join(now_dir, models_dir) | |
| repo_id = "lainlives/voice" | |
| hf_token = os.environ.get("HF_TOKEN") | |
| snapshot_download(repo_id=repo_id, local_dir=dump_path, token=hf_token) | |
| # if __name__ == "__main__": | |
| # start_app(share=False, ssr_mode = True) | |
| config_name = "default" # os.environ.get("URVC_CONFIG") | |
| cookiefile = os.environ.get("YT_COOKIEFILE") | |
| """ | |
| Module defining models for representing configuration settings for | |
| UI tabs. | |
| """ | |
| class SongIntermediateAudioConfig(BaseModel): | |
| """ | |
| Configuration settings for intermediate audio components in the | |
| one-click song generation tab. | |
| Attributes | |
| ---------- | |
| song : AudioConfig | |
| Configuration settings for the input song audio component. | |
| vocals : AudioConfig | |
| Configuration settings for the vocals audio component. | |
| instrumentals : AudioConfig | |
| Configuration settings for the instrumentals audio component. | |
| main_vocals : AudioConfig | |
| Configuration settings for the main vocals audio component. | |
| backup_vocals : AudioConfig | |
| Configuration settings for the backup vocals audio component. | |
| main_vocals_dereverbed : AudioConfig | |
| Configuration settings for the main vocals de-reverbed audio | |
| component. | |
| main_vocals_reverb : AudioConfig | |
| Configuration settings for the main vocals reverb audio | |
| component. | |
| converted_vocals : AudioConfig | |
| Configuration settings for the converted vocals audio | |
| component. | |
| postprocessed_vocals : AudioConfig | |
| Configuration settings for the postprocessed vocals audio | |
| component. | |
| instrumentals_shifted : AudioConfig | |
| Configuration settings for the shifted instrumentals audio | |
| component. | |
| backup_vocals_shifted : AudioConfig | |
| Configuration settings for the shifted backup vocals audio | |
| component. | |
| all : list[gr.Audio] | |
| List of instances of all intermediate audio components. | |
| """ | |
| song: AudioConfig = AudioConfig.intermediate(label="Song") | |
| vocals: AudioConfig = AudioConfig.intermediate(label="Vocals") | |
| instrumentals: AudioConfig = AudioConfig.intermediate( | |
| label="Instrumentals", | |
| ) | |
| main_vocals: AudioConfig = AudioConfig.intermediate( | |
| label="Main vocals", | |
| ) | |
| backup_vocals: AudioConfig = AudioConfig.intermediate( | |
| label="Backup vocals", | |
| ) | |
| main_vocals_dereverbed: AudioConfig = AudioConfig.intermediate( | |
| label="De-reverbed main vocals", | |
| ) | |
| main_vocals_reverb: AudioConfig = AudioConfig.intermediate( | |
| label="Main vocals with reverb", | |
| ) | |
| converted_vocals: AudioConfig = AudioConfig.intermediate( | |
| label="Converted vocals", | |
| ) | |
| postprocessed_vocals: AudioConfig = AudioConfig.intermediate( | |
| label="Postprocessed vocals", | |
| ) | |
| instrumentals_shifted: AudioConfig = AudioConfig.intermediate( | |
| label="Pitch-shifted instrumentals", | |
| ) | |
| backup_vocals_shifted: AudioConfig = AudioConfig.intermediate( | |
| label="Pitch-shifted backup vocals", | |
| ) | |
| def all(self) -> list[gr.Audio]: | |
| """ | |
| Retrieve instances of all intermediate audio components | |
| in the one-click song generation tab. | |
| Returns | |
| ------- | |
| list[gr.Audio] | |
| List of instances of all intermediate audio components in | |
| the one-click song generation tab. | |
| """ | |
| # NOTE we are using self.__annotations__ to get the fields in | |
| # the order they are defined in the class | |
| return [getattr(self, field).instance for field in self.__annotations__] | |
| class OneClickSongGenerationConfig(SongGenerationConfig): | |
| """ | |
| Configuration settings for the one-click song generation tab. | |
| Attributes | |
| ---------- | |
| n_octaves : SliderConfig | |
| Configuration settings for an octave pitch shift slider | |
| component. | |
| n_semitones : SliderConfig | |
| Configuration settings for a semitone pitch shift slider | |
| component. | |
| show_intermediate_audio : CheckboxConfig | |
| Configuration settings for a show intermediate audio checkbox | |
| component. | |
| intermediate_audio : SongIntermediateAudioConfig | |
| Configuration settings for intermediate audio components. | |
| See Also | |
| -------- | |
| SongGenerationConfig | |
| Parent model defining common component configuration settings | |
| for song generation tabs. | |
| """ | |
| n_octaves: SliderConfig = SliderConfig.octave_shift( | |
| label="Vocal pitch shift", | |
| info=( | |
| "The number of octaves to shift the pitch of the converted vocals by. Use 1" | |
| " for male-to-female and -1 for vice-versa." | |
| ), | |
| ) | |
| n_semitones: SliderConfig = SliderConfig.semitone_shift( | |
| label="Overall pitch shift", | |
| info=( | |
| "The number of semi-tones to shift the pitch of the converted vocals," | |
| " instrumentals and backup vocals by." | |
| ), | |
| ) | |
| show_intermediate_audio: CheckboxConfig = CheckboxConfig( | |
| label="Show intermediate audio", | |
| info="Show intermediate audio tracks produced during song cover generation.", | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| intermediate_audio: SongIntermediateAudioConfig = SongIntermediateAudioConfig() | |
| class SongInputAudioConfig(BaseModel): | |
| """ | |
| Configuration settings for input audio components in the multi-step | |
| song generation tab. | |
| Attributes | |
| ---------- | |
| audio : AudioConfig | |
| Configuration settings for the input audio component. | |
| vocals : AudioConfig | |
| Configuration settings for the vocals audio component. | |
| converted_vocals : AudioConfig | |
| Configuration settings for the converted vocals audio | |
| component. | |
| instrumentals : AudioConfig | |
| Configuration settings for the instrumentals audio | |
| component. | |
| backup_vocals : AudioConfig | |
| Configuration settings for the backup vocals audio | |
| component. | |
| main_vocals : AudioConfig | |
| Configuration settings for the main vocals audio | |
| component. | |
| shifted_instrumentals : AudioConfig | |
| Configuration settings for the shifted instrumentals audio | |
| component. | |
| shifted_backup_vocals : AudioConfig | |
| Configuration settings for the shifted backup vocals audio | |
| component. | |
| all : list[AudioConfig] | |
| List of configuration settings for all input audio | |
| components in the multi-step song generation tab. | |
| """ | |
| audio: AudioConfig = AudioConfig.input(label="Audio") | |
| vocals: AudioConfig = AudioConfig.input(label="Vocals") | |
| converted_vocals: AudioConfig = AudioConfig.input(label="Vocals") | |
| instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals") | |
| backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals") | |
| main_vocals: AudioConfig = AudioConfig.input(label="Main vocals") | |
| shifted_instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals") | |
| shifted_backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals") | |
| def all(self) -> list[AudioConfig]: | |
| """ | |
| Retrieve configuration settings for all input audio components | |
| in the multi-step song generation tab. | |
| Returns | |
| ------- | |
| list[AudioConfig] | |
| List of configuration settings for all input audio | |
| components in the multi-step song generation tab. | |
| """ | |
| return [getattr(self, field) for field in self.__annotations__] | |
| class SongDirsConfig(BaseModel): | |
| """ | |
| Configuration settings for song directory components in the | |
| multi-step song generation tab. | |
| Attributes | |
| ---------- | |
| separate_audio : DropdownConfig | |
| Configuration settings for the song directory component | |
| for separating audio. | |
| convert_vocals : DropdownConfig | |
| Configuration settings for the song directory component | |
| for converting vocals. | |
| postprocess_vocals : DropdownConfig | |
| Configuration settings for the song directory component | |
| for postprocessing vocals. | |
| pitch_shift_background : DropdownConfig | |
| Configuration settings for the song directory component | |
| for pitch-shifting background audio. | |
| mix : DropdownConfig | |
| Configuration settings for the song directory component | |
| for mixing audio. | |
| all : list[gr.Dropdown] | |
| List of instances of all song directory components in the | |
| multi-step song generation tab. | |
| """ | |
| separate_audio: DropdownConfig = DropdownConfig.song_dir() | |
| convert_vocals: DropdownConfig = DropdownConfig.song_dir() | |
| postprocess_vocals: DropdownConfig = DropdownConfig.song_dir() | |
| pitch_shift_background: DropdownConfig = DropdownConfig.song_dir() | |
| mix: DropdownConfig = DropdownConfig.song_dir() | |
| def all(self) -> list[gr.Dropdown]: | |
| """ | |
| Retrieve instances of all song directory components in the | |
| multi-step song generation tab. | |
| Returns | |
| ------- | |
| list[gr.Dropdown] | |
| List of instances of all song directory components in | |
| the multi-step song generation tab. | |
| """ | |
| return [getattr(self, field).instance for field in self.__annotations__] | |
| class MultiStepSongGenerationConfig(SongGenerationConfig): | |
| """ | |
| Configuration settings for multi-step song generation tab. | |
| Attributes | |
| ---------- | |
| separation_model : DropdownConfig | |
| Configuration settings for a separation model dropdown | |
| component. | |
| segment_size : RadioConfig | |
| Configuration settings for a segment size radio component. | |
| n_octaves : SliderConfig | |
| Configuration settings for an octave pitch shift slider | |
| component. | |
| n_semitones : SliderConfig | |
| Configuration settings for a semitone pitch shift slider | |
| component. | |
| n_semitones_instrumentals : SliderConfig | |
| Configuration settings for an instrumentals pitch shift slider | |
| component. | |
| n_semitones_backup_vocals : SliderConfig | |
| Configuration settings for a backup vocals pitch shift slider | |
| component. | |
| input_audio : SongInputAudioConfig | |
| Configuration settings for input audio components. | |
| song_dirs : SongDirsConfig | |
| Configuration settings for song directory components. | |
| See Also | |
| -------- | |
| SongGenerationConfig | |
| Parent model defining common component configuration settings | |
| for song generation tabs. | |
| """ | |
| separation_model: DropdownConfig = DropdownConfig( | |
| label="Separation model", | |
| info="The model to use for audio separation.", | |
| value=SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2, | |
| choices=list(SeparationModel), | |
| ) | |
| segment_size: RadioConfig = RadioConfig( | |
| label="Segment size", | |
| info=( | |
| "The size of the segments into which the audio is split. Using a larger" | |
| " size consumes more resources, but may give better results." | |
| ), | |
| value=SegmentSize.SEG_2048, | |
| choices=list(SegmentSize), | |
| ) | |
| n_octaves: SliderConfig = SliderConfig.octave_shift( | |
| label="Pitch shift (octaves)", | |
| info=( | |
| "The number of octaves to pitch-shift the converted voice by. Use 1 for" | |
| " male-to-female and -1 for vice-versa." | |
| ), | |
| ) | |
| n_semitones: SliderConfig = SliderConfig.semitone_shift( | |
| label="Pitch shift (semi-tones)", | |
| info=( | |
| "The number of semi-tones to pitch-shift the converted vocals by. Altering" | |
| " this slightly reduces sound quality." | |
| ), | |
| ) | |
| n_semitones_instrumentals: SliderConfig = SliderConfig.semitone_shift( | |
| label="Instrumental pitch shift", | |
| info="The number of semi-tones to pitch-shift the instrumentals by.", | |
| ) | |
| n_semitones_backup_vocals: SliderConfig = SliderConfig.semitone_shift( | |
| label="Backup vocal pitch shift", | |
| info="The number of semi-tones to pitch-shift the backup vocals by.", | |
| ) | |
| input_audio: SongInputAudioConfig = SongInputAudioConfig() | |
| song_dirs: SongDirsConfig = SongDirsConfig() | |
| class SpeechIntermediateAudioConfig(BaseModel): | |
| """ | |
| Configuration settings for intermediate audio components in the | |
| one-click speech generation tab. | |
| Attributes | |
| ---------- | |
| speech : AudioConfig | |
| Configuration settings for the input speech audio component. | |
| converted_speech : AudioConfig | |
| Configuration settings for the converted speech audio component. | |
| all : list[gr.Audio] | |
| List of instances of all intermediate audio components in the | |
| speech generation tab. | |
| """ | |
| speech: AudioConfig = AudioConfig.intermediate(label="Speech") | |
| converted_speech: AudioConfig = AudioConfig.intermediate(label="Converted speech") | |
| def all(self) -> list[gr.Audio]: | |
| """ | |
| Retrieve instances of all intermediate audio components in the | |
| speech generation tab. | |
| Returns | |
| ------- | |
| list[gr.Audio] | |
| List of instances of all intermediate audio components in | |
| the speech generation tab. | |
| """ | |
| return [getattr(self, field).instance for field in self.__annotations__] | |
| class OneClickSpeechGenerationConfig(SpeechGenerationConfig): | |
| """ | |
| Configuration settings for one-click speech generation tab. | |
| Attributes | |
| ---------- | |
| intermediate_audio : SpeechIntermediateAudioConfig | |
| Configuration settings for intermediate audio components. | |
| show_intermediate_audio : CheckboxConfig | |
| Configuration settings for a show intermediate audio checkbox | |
| component. | |
| See Also | |
| -------- | |
| SpeechGenerationConfig | |
| Parent model defining common component configuration settings | |
| for speech generation tabs. | |
| """ | |
| intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig() | |
| show_intermediate_audio: CheckboxConfig = CheckboxConfig( | |
| label="Show intermediate audio", | |
| info="Show intermediate audio tracks produced during speech generation.", | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| class SpeechInputAudioConfig(BaseModel): | |
| """ | |
| Configuration settings for input audio components in the multi-step | |
| speech generation tab. | |
| Attributes | |
| ---------- | |
| speech : AudioConfig | |
| Configuration settings for the input speech audio component. | |
| converted_speech : AudioConfig | |
| Configuration settings for the converted speech audio component. | |
| all : list[AudioConfig] | |
| List of configuration settings for all input audio components in | |
| the multi-step speech generation tab. | |
| """ | |
| speech: AudioConfig = AudioConfig.input("Speech") | |
| converted_speech: AudioConfig = AudioConfig.input("Converted speech") | |
| def all(self) -> list[AudioConfig]: | |
| """ | |
| Retrieve configuration settings for all input audio components | |
| in the multi-step speech generation tab. | |
| Returns | |
| ------- | |
| list[AudioConfig] | |
| List of configuration settings for all input audio | |
| components in the multi-step speech generation tab. | |
| """ | |
| return [getattr(self, field) for field in self.__annotations__] | |
| class MultiStepSpeechGenerationConfig(SpeechGenerationConfig): | |
| """ | |
| Configuration settings for the multi-step speech generation tab. | |
| Attributes | |
| ---------- | |
| input_audio : SpeechInputAudioConfig | |
| Configuration settings for input audio components. | |
| See Also | |
| -------- | |
| SpeechGenerationConfig | |
| Parent model defining common component configuration settings | |
| for speech generation tabs. | |
| """ | |
| input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig() | |
| class MultiStepTrainingConfig(TrainingConfig): | |
| """Configuration settings for multi-step training tab.""" | |
| class ModelManagementConfig(BaseModel): | |
| """ | |
| Configuration settings for model management tab. | |
| Attributes | |
| ---------- | |
| voices : DropdownConfig | |
| Configuration settings for delete voice models dropdown | |
| component. | |
| embedders : DropdownConfig | |
| Configuration settings for delete embedder models dropdown | |
| component. | |
| pretraineds : DropdownConfig | |
| Configuration settings for delete pretrained models dropdown | |
| component. | |
| traineds : DropdownConfig | |
| Configuration settings for delete training models dropdown | |
| component. | |
| dummy_checkbox : CheckboxConfig | |
| Configuration settings for a dummy checkbox component. | |
| """ | |
| voices: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Voice models", | |
| info="Select one or more voice models to delete.", | |
| ) | |
| embedders: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Custom embedder models", | |
| info="Select one or more embedder models to delete.", | |
| ) | |
| pretraineds: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Custom pretrained models", | |
| info="Select one or more pretrained models to delete.", | |
| ) | |
| traineds: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Training models", | |
| info="Select one or more training models to delete.", | |
| ) | |
| dummy_checkbox: CheckboxConfig = CheckboxConfig( | |
| value=False, | |
| visible=False, | |
| exclude_value=True, | |
| ) | |
| class AudioManagementConfig(BaseModel): | |
| """ | |
| Configuration settings for audio management tab. | |
| Attributes | |
| ---------- | |
| intermediate : DropdownConfig | |
| Configuration settings for delete intermediate audio files | |
| dropdown component | |
| speech : DropdownConfig | |
| Configuration settings for delete speech audio files dropdown | |
| component. | |
| output : DropdownConfig | |
| Configuration settings for delete output audio files dropdown | |
| component. | |
| dataset : DropdownConfig | |
| Configuration settings for delete dataset audio files dropdown | |
| component. | |
| dummy_checkbox : CheckboxConfig | |
| Configuration settings for a dummy checkbox component. | |
| """ | |
| intermediate: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Song directories", | |
| info=( | |
| "Select one or more song directories containing intermediate audio files to" | |
| " delete." | |
| ), | |
| ) | |
| speech: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Speech audio files", | |
| info="Select one or more speech audio files to delete.", | |
| ) | |
| output: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Output audio files", | |
| info="Select one or more output audio files to delete.", | |
| ) | |
| dataset: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Dataset audio files", | |
| info="Select one or more datasets containing audio files to delete.", | |
| ) | |
| dummy_checkbox: CheckboxConfig = CheckboxConfig( | |
| value=False, | |
| visible=False, | |
| exclude_value=True, | |
| ) | |
| class SettingsManagementConfig(BaseModel): | |
| """ | |
| Configuration settings for settings management tab. | |
| Attributes | |
| ---------- | |
| dummy_checkbox : CheckboxConfig | |
| Configuration settings for a dummy checkbox component. | |
| """ | |
| load_config_name: DropdownConfig = DropdownConfig( | |
| label="Configuration name", | |
| info="The name of a configuration to load UI settings from", | |
| value=None, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| delete_config_names: DropdownConfig = DropdownConfig.multi_delete( | |
| label="Configuration names", | |
| info="Select the name of one or more configurations to delete", | |
| ) | |
| dummy_checkbox: CheckboxConfig = CheckboxConfig( | |
| value=False, | |
| visible=False, | |
| exclude_value=True, | |
| ) | |
| class TotalSongGenerationConfig(BaseModel): | |
| """ | |
| All configuration settings for song generation tabs. | |
| Attributes | |
| ---------- | |
| one_click : OneClickSongGenerationConfig | |
| Configuration settings for the one-click song generation tab. | |
| multi_step : MultiStepSongGenerationConfig | |
| Configuration settings for the multi-step song generation tab. | |
| """ | |
| one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig() | |
| multi_step: MultiStepSongGenerationConfig = MultiStepSongGenerationConfig() | |
| class TotalSpeechGenerationConfig(BaseModel): | |
| """ | |
| All configuration settings for speech generation tabs. | |
| Attributes | |
| ---------- | |
| one_click : OneClickSpeechGenerationConfig | |
| Configuration settings for the one-click speech generation tab. | |
| multi_step : MultiStepSpeechGenerationConfig | |
| Configuration settings for the multi-step speech generation tab. | |
| """ | |
| one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig() | |
| multi_step: MultiStepSpeechGenerationConfig = MultiStepSpeechGenerationConfig() | |
| class TotalTrainingConfig(BaseModel): | |
| """ | |
| All configuration settings for training tabs. | |
| Attributes | |
| ---------- | |
| training : TrainingConfig | |
| Configuration settings for the multi-step training tab. | |
| """ | |
| multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig() | |
| class TotalManagementConfig(BaseModel): | |
| """ | |
| All configuration settings for management tabs. | |
| Attributes | |
| ---------- | |
| model : ModelManagementConfig | |
| Configuration settings for the model management tab. | |
| audio : AudioManagementConfig | |
| Configuration settings for the audio management tab. | |
| settings : SettingsManagementConfig | |
| Configuration settings for the settings management tab. | |
| """ | |
| model: ModelManagementConfig = ModelManagementConfig() | |
| audio: AudioManagementConfig = AudioManagementConfig() | |
| settings: SettingsManagementConfig = SettingsManagementConfig() | |
| class TotalConfig(BaseModel): | |
| """ | |
| All configuration settings for the Ultimate RVC app. | |
| Attributes | |
| ---------- | |
| song : TotalSongGenerationConfig | |
| Configuration settings for song generation tabs. | |
| speech : TotalSpeechGenerationConfig | |
| Configuration settings for speech generation tabs. | |
| training : TotalTrainingConfig | |
| Configuration settings for training tabs. | |
| management : TotalManagementConfig | |
| Configuration settings for management tabs. | |
| """ | |
| song: TotalSongGenerationConfig = TotalSongGenerationConfig() | |
| speech: TotalSpeechGenerationConfig = TotalSpeechGenerationConfig() | |
| training: TotalTrainingConfig = TotalTrainingConfig() | |
| management: TotalManagementConfig = TotalManagementConfig() | |
| def all(self) -> list[AnyComponentConfig]: | |
| """ | |
| Recursively collect those component configuration models nested | |
| within the current model instance, which have values that are | |
| not excluded. | |
| Returns | |
| ------- | |
| list[AnyComponentConfig] | |
| A list of component configuration models found within the | |
| current model instance, which have values that are not | |
| excluded. | |
| """ | |
| def _collect(model: BaseModel) -> list[AnyComponentConfig]: | |
| component_configs: list[Any] = [] | |
| for _, value in model: | |
| if isinstance(value, ComponentConfig): | |
| if not value.exclude_value: | |
| component_configs.append(value) | |
| elif isinstance(value, BaseModel): | |
| component_configs.extend(_collect(value)) | |
| return component_configs | |
| return _collect(self) | |
| class BaseTabConfig(BaseModel): | |
| """ | |
| Base model defining common component configuration settings for | |
| UI tabs. | |
| Attributes | |
| ---------- | |
| embedder_model : DropdownConfig | |
| Configuration settings for an embedder model dropdown component. | |
| custom_embedder_model : DropdownConfig | |
| Configuration settings for a custom embedder model dropdown | |
| component. | |
| """ | |
| embedder_model: DropdownConfig = DropdownConfig( | |
| label="Embedder model", | |
| info="The model to use for generating speaker embeddings.", | |
| value=EmbedderModel.CONTENTVEC, | |
| choices=list(EmbedderModel), | |
| exclude_value=True, | |
| ) | |
| custom_embedder_model: DropdownConfig = DropdownConfig( | |
| label="Custom embedder model", | |
| info="Select a custom embedder model from the dropdown.", | |
| value=None, | |
| visible=False, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| class TrainingConfig(BaseTabConfig): | |
| """ | |
| Common component configuration settings for training tabs. | |
| Attributes | |
| ---------- | |
| dataset_type : DropdownConfig | |
| Configuration settings for a dataset type dropdown component. | |
| dataset : DropdownConfig | |
| Configuration settings for a dataset dropdown component. | |
| dataset_name : TextboxConfig | |
| Configuration settings for a dataset name textbox component. | |
| preprocess_model : DropdownConfig | |
| Configuration settings for a model name dropdown component | |
| for audio preprocessing. | |
| sample_rate : DropdownConfig | |
| Configuration settings for a sample rate dropdown component. | |
| filter_audio : CheckboxConfig | |
| Configuration settings for a filter audio checkbox component. | |
| clean_audio : CheckboxConfig | |
| Configuration settings for a clean audio checkbox component. | |
| clean_strength : SliderConfig | |
| Configuration settings for a clean strength slider component. | |
| split_method : DropdownConfig | |
| Configuration settings for an audio splitting method dropdown | |
| component. | |
| chunk_len : SliderConfig | |
| Configuration settings for a chunk length slider component. | |
| overlap_len : SliderConfig | |
| Configuration settings for an overlap length slider component. | |
| preprocess_cores : SliderConfig | |
| Configuration settings for a CPU cores slider component for | |
| preprocessing. | |
| extract_model : DropdownConfig | |
| Configuration settings for a model name dropdown component for | |
| feature extraction. | |
| f0_method : DropdownConfig | |
| Configuration settings for an F0 method dropdown component. | |
| hop_length : SliderConfig | |
| Configuration settings for a hop length slider component. | |
| include_mutes : SliderConfig | |
| Configuration settings for an include mutes slider component. | |
| extract_cores : SliderConfig | |
| Configuration settings for a CPU cores slider component for | |
| feature extraction. | |
| extraction_acceleration : HardwareAccelerationConfig | |
| Configuration settings for a hardware acceleration component for | |
| feature extraction. | |
| extraction_gpus : DropdownConfig | |
| Configuration settings for a GPU dropdown compoennt for feature | |
| extraction. | |
| train_model : DropdownConfig | |
| Configuration settings for a model name dropdown component for | |
| training. | |
| num_epochs : SliderConfig | |
| Configuration settings for a number of epochs slider component. | |
| batch_size : SliderConfig | |
| Configuration settings for a batch size slider component. | |
| detect_overtraining : CheckboxConfig | |
| Configuration settings for a detect overtraining checkbox | |
| component. | |
| overtraining_threshold : SliderConfig | |
| Configuration settings for an overtraining threshold slider | |
| component. | |
| vocoder : DropdownConfig | |
| Configuration settings for a vocoder dropdown component. | |
| index_algorithm : DropdownConfig | |
| Configuration settings for an index algorithm dropdown | |
| component. | |
| pretrained_type : DropdownConfig | |
| Configuration settings for a pretrained model type dropdown | |
| component. | |
| custom_pretrained_model : DropdownConfig | |
| Configuration settings for a custom pretrained model dropdown | |
| component. | |
| save_interval : SliderConfig | |
| Configuration settings for a save-interval slider component. | |
| save_all_checkpoints : CheckboxConfig | |
| Configuration settings for a save-all-checkpoints checkbox | |
| component. | |
| save_all_weights : CheckboxConfig | |
| Configuration settings for a save-all-weights checkbox | |
| component. | |
| clear_saved_data : CheckboxConfig | |
| Configuration settings for a clear-saved-data checkbox | |
| component. | |
| upload_model : CheckboxConfig | |
| Configuration settings for an upload voice model checkbox | |
| component. | |
| upload_name : TextboxConfig | |
| Configuration settings for an upload name textbox component. | |
| training_acceleration : HardwareAccelerationConfig | |
| Configuration settings for a hardware acceleration component for | |
| training. | |
| training_gpus : DropdownConfig | |
| Configuration settings for a GPU dropdown component for | |
| training. | |
| preload_dataset : CheckboxConfig | |
| Configuration settings for a preload dataset checkbox component. | |
| reduce_memory_usage : CheckboxConfig | |
| Configuration settings for a reduce-memory-usage checkbox | |
| component. | |
| See Also | |
| -------- | |
| BaseTabConfig | |
| Parent model defining common component configuration settings | |
| for UI tabs. | |
| """ | |
| dataset_type: DropdownConfig = DropdownConfig( | |
| label="Dataset type", | |
| info="Select the type of dataset to preprocess.", | |
| value=DatasetType.NEW_DATASET, | |
| choices=list(DatasetType), | |
| exclude_value=True, | |
| ) | |
| dataset: DropdownConfig = DropdownConfig( | |
| label="Dataset path", | |
| info=( | |
| "The path to an existing dataset. Either select a path to a previously" | |
| " created dataset or provide a path to an external dataset." | |
| ), | |
| value=None, | |
| allow_custom_value=True, | |
| visible=False, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| dataset_name: TextboxConfig = TextboxConfig( | |
| label="Dataset name", | |
| info=( | |
| "The name of the new dataset. If the dataset already exists, the provided" | |
| " audio files will be added to it." | |
| ), | |
| value="My dataset", | |
| exclude_value=True, | |
| ) | |
| preprocess_model: DropdownConfig = DropdownConfig( | |
| label="Model name", | |
| info=( | |
| "Name of the model to preprocess the given dataset for. Either select an" | |
| " existing model from the dropdown or provide the name of a new model." | |
| ), | |
| value="My model", | |
| allow_custom_value=True, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| sample_rate: DropdownConfig = DropdownConfig( | |
| label="Sample rate", | |
| info="Target sample rate for the audio files in the provided dataset.", | |
| value=TrainingSampleRate.HZ_40K, | |
| choices=list(TrainingSampleRate), | |
| ) | |
| filter_audio: CheckboxConfig = CheckboxConfig( | |
| label="Filter audio", | |
| info=( | |
| "Whether to remove low-frequency sounds from the audio files in the" | |
| " provided dataset by applying a high-pass butterworth filter.<br><br>" | |
| ), | |
| value=True, | |
| ) | |
| clean_audio: CheckboxConfig = CheckboxConfig( | |
| label="Clean audio", | |
| info=( | |
| "Whether to clean the audio files in the provided dataset using noise" | |
| " reduction algorithms.<br><br><br>" | |
| ), | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False) | |
| split_method: DropdownConfig = DropdownConfig( | |
| label="Audio splitting method", | |
| info=( | |
| "The method to use for splitting the audio files in the provided dataset." | |
| " Use the `Skip` method to skip splitting if the audio files are already" | |
| " split. Use the `Simple` method if excessive silence has already been" | |
| " removed from the audio files. Use the `Automatic` method for automatic" | |
| " silence detection and splitting around it." | |
| ), | |
| value=AudioSplitMethod.AUTOMATIC, | |
| choices=list(AudioSplitMethod), | |
| exclude_value=True, | |
| ) | |
| chunk_len: SliderConfig = SliderConfig( | |
| label="Chunk length", | |
| info="Length of split audio chunks.", | |
| value=3.0, | |
| minimum=0.5, | |
| maximum=5.0, | |
| step=0.1, | |
| visible=False, | |
| ) | |
| overlap_len: SliderConfig = SliderConfig( | |
| label="Overlap length", | |
| info="Length of overlap between split audio chunks.", | |
| value=0.3, | |
| minimum=0.0, | |
| maximum=0.4, | |
| step=0.1, | |
| visible=False, | |
| ) | |
| preprocess_cores: SliderConfig = SliderConfig.cpu_cores() | |
| extract_model: DropdownConfig = DropdownConfig( | |
| label="Model name", | |
| info=( | |
| "Name of the model with an associated preprocessed dataset to extract" | |
| " training features from. When a new dataset is preprocessed, its" | |
| " associated model is selected by default." | |
| ), | |
| value=None, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| f0_method: DropdownConfig = DropdownConfig( | |
| label="F0 method", | |
| info="The method to use for extracting pitch features.", | |
| value=TrainingF0Method.RMVPE, | |
| choices=list(TrainingF0Method), | |
| exclude_value=True, | |
| ) | |
| hop_length: SliderConfig = SliderConfig.hop_length( | |
| label="Hop length", | |
| info="The hop length to use for extracting pitch features.<br><br>", | |
| visible=False, | |
| ) | |
| include_mutes: SliderConfig = SliderConfig( | |
| label="Include mutes", | |
| info=( | |
| "The number of mute audio files to include in the generated training file" | |
| " list. Adding silent files enables the training model to handle pure" | |
| " silence in inferred audio files. If the preprocessed audio dataset" | |
| " already contains segments of pure silence, set this to 0." | |
| ), | |
| value=0, | |
| minimum=0, | |
| maximum=10, | |
| step=1, | |
| ) | |
| extraction_cores: SliderConfig = SliderConfig.cpu_cores() | |
| extraction_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration() | |
| extraction_gpus: DropdownConfig = DropdownConfig.gpu() | |
| train_model: DropdownConfig = DropdownConfig( | |
| label="Model name", | |
| info=( | |
| "Name of the model to train. When training features are extracted for a new" | |
| " model, its name is selected by default." | |
| ), | |
| value=None, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| num_epochs: SliderConfig = SliderConfig( | |
| label="Number of epochs", | |
| info=( | |
| "The number of epochs to train the voice model. A higher number can improve" | |
| " voice model performance but may lead to overtraining." | |
| ), | |
| value=500, | |
| minimum=1, | |
| maximum=5000, | |
| step=1, | |
| ) | |
| batch_size: SliderConfig = SliderConfig( | |
| label="Batch size", | |
| info=( | |
| "The number of samples in each training batch. It is advisable to align" | |
| " this value with the available VRAM of your GPU." | |
| ), | |
| value=16, | |
| minimum=1, | |
| maximum=128, | |
| step=1, | |
| ) | |
| detect_overtraining: CheckboxConfig = CheckboxConfig( | |
| label="Detect overtraining", | |
| info=( | |
| "Whether to detect overtraining to prevent the voice model from learning" | |
| " the training data too well and losing the ability to generalize to new" | |
| " data." | |
| ), | |
| value=True, | |
| exclude_value=True, | |
| ) | |
| overtraining_threshold: SliderConfig = SliderConfig( | |
| label="Overtraining threshold", | |
| info=( | |
| "The maximum number of epochs to continue training without any observed" | |
| " improvement in voice model performance." | |
| ), | |
| value=500, | |
| minimum=1, | |
| maximum=1000, | |
| visible=False, | |
| ) | |
| vocoder: DropdownConfig = DropdownConfig( | |
| label="Vocoder", | |
| info=( | |
| "The vocoder to use for audio synthesis during training. HiFi-GAN provides" | |
| " basic audio fidelity, while RefineGAN provides the highest audio" | |
| " fidelity." | |
| ), | |
| value=Vocoder.HIFI_GAN, | |
| choices=list(Vocoder), | |
| ) | |
| index_algorithm: DropdownConfig = DropdownConfig( | |
| label="Index algorithm", | |
| info=( | |
| "The method to use for generating an index file for the trained voice" | |
| " model. `KMeans` is particularly useful for large datasets." | |
| ), | |
| value=IndexAlgorithm.AUTO, | |
| choices=list(IndexAlgorithm), | |
| ) | |
| pretrained_type: DropdownConfig = DropdownConfig( | |
| label="Pretrained model type", | |
| info=( | |
| "The type of pretrained model to finetune the voice model on. `None` will" | |
| " train the voice model from scratch, while `Default` will use a pretrained" | |
| " model tailored to the specific voice model architecture. `Custom` will" | |
| " use a custom pretrained that you provide." | |
| ), | |
| value=PretrainedType.DEFAULT, | |
| choices=list(PretrainedType), | |
| exclude_value=True, | |
| ) | |
| custom_pretrained_model: DropdownConfig = DropdownConfig( | |
| label="Custom pretrained model", | |
| info="Select a custom pretrained model to finetune from the dropdown.", | |
| value=None, | |
| visible=False, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| save_interval: SliderConfig = SliderConfig( | |
| label="Save interval", | |
| info=( | |
| "The epoch interval at which to to save voice model weights and" | |
| " checkpoints. The best model weights are always saved regardless of this" | |
| " setting." | |
| ), | |
| value=10, | |
| minimum=1, | |
| maximum=100, | |
| step=1, | |
| ) | |
| save_all_checkpoints: CheckboxConfig = CheckboxConfig( | |
| label="Save all checkpoints", | |
| info=( | |
| "Whether to save a unique checkpoint at each save interval. If not enabled," | |
| " only the latest checkpoint will be saved at each interval." | |
| ), | |
| value=True, | |
| ) | |
| save_all_weights: CheckboxConfig = CheckboxConfig( | |
| label="Save all weights", | |
| info=( | |
| "Whether to save unique voice model weights at each save interval. If not" | |
| " enabled, only the best voice model weights will be saved." | |
| ), | |
| value=True, | |
| ) | |
| clear_saved_data: CheckboxConfig = CheckboxConfig( | |
| label="Clear saved data", | |
| info=( | |
| "Whether to delete any existing training data associated with the voice" | |
| " model before training commences. Enable this setting only if you are" | |
| " training a new voice model from scratch or restarting training." | |
| ), | |
| value=False, | |
| ) | |
| upload_model: CheckboxConfig = CheckboxConfig( | |
| label="Upload voice model", | |
| info=( | |
| "Whether to automatically upload the trained voice model so that it can be" | |
| " used for generation tasks within the Ultimate RVC app." | |
| ), | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| upload_name: TextboxConfig = TextboxConfig( | |
| label="Upload name", | |
| info="The name to give the uploaded voice model.", | |
| value=None, | |
| visible=False, | |
| exclude_value=True, | |
| ) | |
| training_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration() | |
| training_gpus: DropdownConfig = DropdownConfig.gpu() | |
| preload_dataset: CheckboxConfig = CheckboxConfig( | |
| label="Preload dataset", | |
| info=( | |
| "Whether to preload all training data into GPU memory. This can improve" | |
| " training speed but requires a lot of VRAM.<br><br>" | |
| ), | |
| value=True, | |
| ) | |
| reduce_memory_usage: CheckboxConfig = CheckboxConfig( | |
| label="Reduce memory usage", | |
| info=( | |
| "Whether to reduce VRAM usage at the cost of slower training speed by" | |
| " enabling activation checkpointing. This is useful for GPUs with limited" | |
| " memory (e.g., <6GB VRAM) or when training with a batch size larger than" | |
| " what your GPU can normally accommodate." | |
| ), | |
| value=False, | |
| ) | |
| class GenerationConfig(BaseTabConfig): | |
| """ | |
| Common component configuration settings for generation tabs. | |
| voice_model : DropdownConfig | |
| Configuration settings for a voice model dropdown component. | |
| f0_methods : DropdownConfig | |
| Configuration settings for a pitch extraction algorithms | |
| dropdown component. | |
| index_rate : SliderConfig | |
| Configuration settings for an index rate slider component. | |
| rms_mix_rate : SliderConfig | |
| Configuration settings for a RMS mix rate slider component. | |
| protect_rate : SliderConfig | |
| Configuration settings for a protect rate slider component. | |
| split_voice : CheckboxConfig | |
| Configuration settings for a split voice checkbox component. | |
| autotune_voice: CheckboxConfig | |
| Configuration settings for an autotune voice checkbox component. | |
| autotune_strength: SliderConfig | |
| Configuration settings for an autotune strength slider | |
| component. | |
| sid : NumberConfig | |
| Configuration settings for a speaker ID number component. | |
| output_sr : DropdownConfig | |
| Configuration settings for an output sample rate dropdown | |
| component. | |
| output_format : DropdownConfig | |
| Configuration settings for an output format dropdown | |
| component. | |
| output_name : TextboxConfig | |
| Configuration settings for an output name textbox component. | |
| See Also | |
| -------- | |
| BaseTabConfig | |
| Parent model defining common component configuration settings | |
| for UI tabs. | |
| """ | |
| voice_model: DropdownConfig = DropdownConfig( | |
| label="Voice model", | |
| info="Select a model to use for voice conversion.", | |
| value=None, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| f0_methods: DropdownConfig = DropdownConfig( | |
| label="Pitch extraction algorithm(s)", | |
| info=( | |
| "If more than one method is selected, then the median of the pitch values" | |
| " extracted by each method is used. RMVPE is recommended for most cases and" | |
| " is the default when no method is selected." | |
| ), | |
| value=[F0Method.RMVPE], | |
| choices=list(F0Method), | |
| multiselect=True, | |
| ) | |
| index_rate: SliderConfig = SliderConfig( | |
| label="Index rate", | |
| info=( | |
| "Increase to bias the conversion towards the accent of the voice model." | |
| " Decrease to potentially reduce artifacts coming from the voice" | |
| " model.<br><br><br>" | |
| ), | |
| value=0.3, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| rms_mix_rate: SliderConfig = SliderConfig( | |
| label="RMS mix rate", | |
| info=( | |
| "How much to mimic the loudness (0) of the input voice or a fixed loudness" | |
| " (1). A value of 1 is recommended for most cases.<br><br>" | |
| ), | |
| value=1.0, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| protect_rate: SliderConfig = SliderConfig( | |
| label="Protect rate", | |
| info=( | |
| "Controls the extent to which consonants and breathing sounds are protected" | |
| " from artifacts. A higher value offers more protection but may worsen the" | |
| " indexing effect.<br><br>" | |
| ), | |
| value=0.33, | |
| minimum=0.0, | |
| maximum=0.5, | |
| ) | |
| hop_length: SliderConfig = SliderConfig.hop_length( | |
| label="Hop length", | |
| info=( | |
| "How often the CREPE-based pitch extraction method checks for pitch changes" | |
| " measured in milliseconds. Lower values lead to longer conversion times" | |
| " and a higher risk of voice cracks, but better pitch accuracy." | |
| ), | |
| visible=True, | |
| ) | |
| split_voice: CheckboxConfig = CheckboxConfig( | |
| label="Split input voice", | |
| info=( | |
| "Whether to split the input voice track into smaller segments before" | |
| " converting it. This can improve output quality for longer voice tracks." | |
| ), | |
| value=False, | |
| ) | |
| autotune_voice: CheckboxConfig = CheckboxConfig( | |
| label="Autotune converted voice", | |
| info="Whether to apply autotune to the converted voice.<br><br>", | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| autotune_strength: SliderConfig = SliderConfig( | |
| label="Autotune intensity", | |
| info=( | |
| "Higher values result in stronger snapping to the chromatic grid and" | |
| " artifacting." | |
| ), | |
| value=1.0, | |
| minimum=0.0, | |
| maximum=1.0, | |
| visible=False, | |
| ) | |
| sid: NumberConfig = NumberConfig( | |
| label="Speaker ID", | |
| info="Speaker ID for multi-speaker-models.", | |
| value=0, | |
| precision=0, | |
| ) | |
| output_sr: DropdownConfig = DropdownConfig( | |
| label="Output sample rate", | |
| info="The sample rate of the mixed output track.", | |
| value=SampleRate.HZ_44100, | |
| choices=list(SampleRate), | |
| ) | |
| output_format: DropdownConfig = DropdownConfig( | |
| label="Output format", | |
| info="The audio format of the mixed output track.", | |
| value=AudioExt.MP3, | |
| choices=list(AudioExt), | |
| ) | |
| output_name: TextboxConfig = TextboxConfig( | |
| label="Output name", | |
| info="If no name is provided, a suitable name will be generated automatically.", | |
| value=None, | |
| placeholder="Ultimate RVC output", | |
| exclude_value=True, | |
| ) | |
| class SongGenerationConfig(GenerationConfig): | |
| """ | |
| Common component configuration settings for song generation tabs. | |
| Attributes | |
| ---------- | |
| source_type : DropdownConfig | |
| Configuration settings for a source type dropdown component. | |
| source : TextboxConfig | |
| Configuration settings for an input source textbox component. | |
| cached_song : DropdownConfig | |
| Configuration settings for a cached song dropdown component. | |
| clean_strength : SliderConfig | |
| Configuration settings for a clean strength slider component. | |
| clean_voice : CheckboxConfig | |
| Configuration settings for a clean voice checkbox component. | |
| room_size : SliderConfig | |
| Configuration settings for a room size slider component. | |
| wet_level : SliderConfig | |
| Configuration settings for a wetness level slider component. | |
| dry_level : SliderConfig | |
| Configuration settings for a dryness level slider component. | |
| damping : SliderConfig | |
| Configuration settings for a damping level slider component. | |
| main_gain : SliderConfig | |
| Configuration settings for a main gain slider component. | |
| inst_gain : SliderConfig | |
| Configuration settings for an instrumentals gain slider | |
| component. | |
| backup_gain : SliderConfig | |
| Configuration settings for a backup vocals gain slider | |
| component. | |
| See Also | |
| -------- | |
| GenerationConfig | |
| Parent model defining common component configuration settings | |
| for song generation tabs. | |
| """ | |
| source_type: DropdownConfig = DropdownConfig( | |
| label="Source type", | |
| info="The type of source to retrieve a song from.", | |
| value=SongSourceType.LOCAL_FILE, | |
| choices=list(SongSourceType), | |
| type="index", | |
| exclude_value=True, | |
| ) | |
| source: TextboxConfig = TextboxConfig( | |
| label="Source", | |
| info="Local (to the server) filepath or http link. Youtube probably wont work but most other sites still do.", | |
| value=None, | |
| exclude_value=True, | |
| ) | |
| cached_song: DropdownConfig = DropdownConfig( | |
| label="Source", | |
| info="Select a song from the list of cached songs.", | |
| value=None, | |
| visible=False, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| clean_voice: CheckboxConfig = CheckboxConfig( | |
| label="Clean converted voice", | |
| info=( | |
| "Whether to clean the converted voice using noise reduction" | |
| " algorithms.<br><br>" | |
| ), | |
| value=False, | |
| exclude_value=True, | |
| ) | |
| clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False) | |
| room_size: SliderConfig = SliderConfig( | |
| label="Room size", | |
| info=( | |
| "Size of the room which reverb effect simulates. Increase for longer reverb" | |
| " time." | |
| ), | |
| value=0.15, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| wet_level: SliderConfig = SliderConfig( | |
| label="Wetness level", | |
| info="Loudness of converted vocals with reverb effect applied.", | |
| value=0.2, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| dry_level: SliderConfig = SliderConfig( | |
| label="Dryness level", | |
| info="Loudness of converted vocals without reverb effect applied.", | |
| value=0.8, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| damping: SliderConfig = SliderConfig( | |
| label="Damping level", | |
| info="Absorption of high frequencies in reverb effect.", | |
| value=0.7, | |
| minimum=0.0, | |
| maximum=1.0, | |
| ) | |
| main_gain: SliderConfig = SliderConfig.gain( | |
| label="Main gain", | |
| info="The gain to apply to the main vocals.", | |
| ) | |
| inst_gain: SliderConfig = SliderConfig.gain( | |
| label="Instrumentals gain", | |
| info="The gain to apply to the instrumentals.", | |
| ) | |
| backup_gain: SliderConfig = SliderConfig.gain( | |
| label="Backup gain", | |
| info="The gain to apply to the backup vocals.", | |
| ) | |
| class SpeechGenerationConfig(GenerationConfig): | |
| """ | |
| Common component configuration settings for speech generation tabs. | |
| Attributes | |
| ---------- | |
| source_type : DropdownConfig | |
| Configuration settings for a source type dropdown component. | |
| source : TextboxConfig | |
| Configuration settings for an input source textbox component. | |
| edge_tts_voice : DropdownConfig | |
| Configuration settings for an Edge TTS voice dropdown | |
| component. | |
| n_octaves : SliderConfig | |
| Configuration settings for an octave pitch shift slider | |
| component. | |
| n_semitones : SliderConfig | |
| Configuration settings for a semitone pitch shift slider | |
| component. | |
| tts_pitch_shift : SliderConfig | |
| Configuration settings for a TTS pitch shift slider | |
| component. | |
| tts_speed_change : SliderConfig | |
| Configuration settings for a TTS speed change slider | |
| component. | |
| tts_volume_change : SliderConfig | |
| Configuration settings for a TTS volume change slider | |
| component. | |
| clean_voice : CheckboxConfig | |
| Configuration settings for a clean voice checkbox | |
| component. | |
| clean_strength : SliderConfig | |
| Configuration settings for a clean strength slider | |
| component. | |
| output_gain : GainSliderConfig | |
| Configuration settings for an output gain slider component. | |
| See Also | |
| -------- | |
| GenerationConfig | |
| Parent model defining common component configuration settings | |
| for generation tabs. | |
| """ | |
| source_type: DropdownConfig = DropdownConfig( | |
| label="Source type", | |
| info="The type of source to generate speech from.", | |
| value=SpeechSourceType.TEXT, | |
| choices=list(SpeechSourceType), | |
| type="index", | |
| exclude_value=True, | |
| ) | |
| source: TextboxConfig = TextboxConfig( | |
| label="Source", | |
| info="Text to generate speech from", | |
| value=None, | |
| exclude_value=True, | |
| ) | |
| edge_tts_voice: DropdownConfig = DropdownConfig( | |
| label="Edge TTS voice", | |
| info="Select a voice to use for text to speech conversion.", | |
| value=None, | |
| render=False, | |
| exclude_value=True, | |
| ) | |
| n_octaves: SliderConfig = SliderConfig.octave_shift( | |
| label="Octave shift", | |
| info=( | |
| "The number of octaves to pitch-shift the converted speech by. Use 1 for" | |
| " male-to-female and -1 for vice-versa." | |
| ), | |
| ) | |
| n_semitones: SliderConfig = SliderConfig.semitone_shift( | |
| label="Semitone shift", | |
| info="The number of semi-tones to pitch-shift the converted speech by.", | |
| ) | |
| tts_pitch_shift: SliderConfig = SliderConfig( | |
| label="Edge TTS pitch shift", | |
| info=( | |
| "The number of hertz to shift the pitch of the speech generated by Edge" | |
| " TTS." | |
| ), | |
| value=0, | |
| minimum=-100, | |
| maximum=100, | |
| step=1, | |
| ) | |
| tts_speed_change: SliderConfig = SliderConfig( | |
| label="TTS speed change", | |
| info="The percentual change to the speed of the speech generated by Edge TTS.", | |
| value=0, | |
| minimum=-50, | |
| maximum=100, | |
| step=1, | |
| ) | |
| tts_volume_change: SliderConfig = SliderConfig( | |
| label="TTS volume change", | |
| info="The percentual change to the volume of the speech generated by Edge TTS.", | |
| value=0, | |
| minimum=-100, | |
| maximum=100, | |
| step=1, | |
| ) | |
| clean_voice: CheckboxConfig = CheckboxConfig( | |
| label="Clean converted voice", | |
| info=( | |
| "Whether to clean the converted voice using noise reduction" | |
| " algorithms.<br><br>" | |
| ), | |
| value=True, | |
| exclude_value=True, | |
| ) | |
| clean_strength: SliderConfig = SliderConfig.clean_strength(visible=True) | |
| output_gain: SliderConfig = SliderConfig.gain( | |
| label="Output gain", | |
| info="The gain to apply to the converted speech.<br><br>", | |
| ) | |
| total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig() | |
| def render_song_cover_multi_step_tab( | |
| total_config: TotalConfig, cookiefile: str | None = None | |
| ) -> None: | |
| """ | |
| Render "Generate song cover - multi-step generation" tab. | |
| Parameters | |
| ---------- | |
| total_config : TotalConfig | |
| Model containing all component configuration settings for the | |
| Ultimate RVC web UI. | |
| cookiefile : str, optional | |
| The path to a file containing cookies to use when downloading | |
| audio from Youtube. | |
| """ | |
| tab_config = total_config.song.multi_step | |
| for input_track in tab_config.input_audio.all: | |
| input_track.instantiate() | |
| with gr.Tab("Multi-step"): | |
| _render_step_0(total_config, cookiefile=cookiefile) | |
| _render_step_1(tab_config) | |
| _render_step_2(tab_config) | |
| _render_step_3(tab_config) | |
| _render_step_4(tab_config) | |
| _render_step_5(total_config, tab_config) | |
| def _render_step_0(total_config: TotalConfig, cookiefile: str | None) -> None: | |
| tab_config = total_config.song.multi_step | |
| current_song_dir = gr.State(None) | |
| with gr.Accordion("Step 0: song retrieval", open=True): | |
| gr.Markdown("") | |
| with gr.Row(): | |
| with gr.Column(): | |
| tab_config.source_type.instantiate() | |
| with gr.Column(): | |
| tab_config.source.instantiate() | |
| local_file = gr.Audio( | |
| label="Source", | |
| type="filepath", | |
| visible=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| tab_config.cached_song.instance.render() | |
| tab_config.source_type.instance.input( | |
| partial(toggle_visible_component, 3), | |
| inputs=tab_config.source_type.instance, | |
| outputs=[ | |
| tab_config.source.instance, | |
| local_file, | |
| tab_config.cached_song.instance, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| local_file.change( | |
| update_value, | |
| inputs=local_file, | |
| outputs=tab_config.source.instance, | |
| show_progress="hidden", | |
| ) | |
| tab_config.cached_song.instance.input( | |
| update_value, | |
| inputs=tab_config.cached_song.instance, | |
| outputs=tab_config.source.instance, | |
| show_progress="hidden", | |
| ) | |
| with gr.Accordion("Options", open=False): | |
| song_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_1_AUDIO], | |
| "Song", | |
| ) | |
| with gr.Row(): | |
| retrieve_song_reset_btn = gr.Button("Reset options") | |
| retrieve_song_btn = gr.Button("Retrieve song", variant="primary") | |
| song_transfer_btn = gr.Button("Transfer song") | |
| song_output = gr.Audio( | |
| label="Song", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| retrieve_song_reset_btn.click( | |
| lambda: gr.Dropdown(value=[SongTransferOption.STEP_1_AUDIO]), | |
| outputs=song_transfer, | |
| show_progress="hidden", | |
| ) | |
| retrieve_song_btn.click( | |
| partial( | |
| exception_harness( | |
| retrieve_song, | |
| info_msg="Song retrieved successfully!", | |
| ), | |
| cookiefile=cookiefile, | |
| ), | |
| inputs=tab_config.source.instance, | |
| outputs=[song_output, current_song_dir], | |
| ).then( | |
| partial( | |
| update_dropdowns, | |
| get_named_song_dirs, | |
| len(tab_config.song_dirs.all) + 2, | |
| value_indices=range(len(tab_config.song_dirs.all)), | |
| ), | |
| inputs=current_song_dir, | |
| outputs=[ | |
| *tab_config.song_dirs.all, | |
| tab_config.cached_song.instance, | |
| total_config.song.one_click.cached_song.instance, | |
| ], | |
| show_progress="hidden", | |
| ).then( | |
| partial(update_dropdowns, get_named_song_dirs, 1, [], [0]), | |
| outputs=total_config.management.audio.intermediate.instance, | |
| show_progress="hidden", | |
| ) | |
| setup_transfer_event( | |
| song_transfer_btn, | |
| song_transfer, | |
| song_output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_step_1(tab_config: MultiStepSongGenerationConfig) -> None: | |
| with gr.Accordion("Step 1: vocal separation", open=False): | |
| tab_config.input_audio.audio.instance.render() | |
| tab_config.song_dirs.separate_audio.instance.render() | |
| with gr.Accordion("Options", open=False): | |
| with gr.Row(): | |
| tab_config.separation_model.instantiate() | |
| tab_config.segment_size.instantiate() | |
| with gr.Row(): | |
| primary_stem_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_2_VOCALS], | |
| "Primary stem", | |
| ) | |
| secondary_stem_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_4_INSTRUMENTALS], | |
| "Secondary stem", | |
| ) | |
| with gr.Row(): | |
| separate_audio_reset_btn = gr.Button("Reset options") | |
| separate_vocals_btn = gr.Button("Separate vocals", variant="primary") | |
| with gr.Row(): | |
| primary_stem_transfer_btn = gr.Button("Transfer primary stem") | |
| secondary_stem_transfer_btn = gr.Button("Transfer secondary stem") | |
| with gr.Row(): | |
| primary_stem_output = gr.Audio( | |
| label="Primary stem", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| secondary_stem_output = gr.Audio( | |
| label="Secondary stem", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| separate_audio_reset_btn.click( | |
| lambda: [ | |
| tab_config.separation_model.value, | |
| tab_config.segment_size.value, | |
| gr.Dropdown(value=[SongTransferOption.STEP_2_VOCALS]), | |
| gr.Dropdown(value=[SongTransferOption.STEP_4_INSTRUMENTALS]), | |
| ], | |
| outputs=[ | |
| tab_config.separation_model.instance, | |
| tab_config.segment_size.instance, | |
| primary_stem_transfer, | |
| secondary_stem_transfer, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| separate_vocals_btn.click( | |
| exception_harness( | |
| separate_audio, | |
| info_msg="Vocals separated successfully!", | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.audio.instance, | |
| tab_config.song_dirs.separate_audio.instance, | |
| tab_config.separation_model.instance, | |
| tab_config.segment_size.instance, | |
| ], | |
| outputs=[primary_stem_output, secondary_stem_output], | |
| concurrency_limit=1, | |
| concurrency_id=ConcurrencyId.GPU, | |
| ) | |
| for btn, transfer, output in [ | |
| (primary_stem_transfer_btn, primary_stem_transfer, primary_stem_output), | |
| ( | |
| secondary_stem_transfer_btn, | |
| secondary_stem_transfer, | |
| secondary_stem_output, | |
| ), | |
| ]: | |
| setup_transfer_event( | |
| btn, | |
| transfer, | |
| output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_step_2(tab_config: MultiStepSongGenerationConfig) -> None: | |
| with gr.Accordion("Step 2: vocal conversion", open=False): | |
| tab_config.input_audio.vocals.instance.render() | |
| tab_config.voice_model.instance.render() | |
| tab_config.song_dirs.convert_vocals.instance.render() | |
| with gr.Accordion("Options", open=False): | |
| with gr.Row(): | |
| tab_config.n_octaves.instantiate() | |
| tab_config.n_semitones.instantiate() | |
| converted_vocals_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_3_VOCALS], | |
| "Converted vocals", | |
| ) | |
| with gr.Accordion("Advanced", open=False): | |
| with gr.Accordion("Voice synthesis", open=False): | |
| with gr.Row(): | |
| tab_config.f0_methods.instantiate() | |
| tab_config.index_rate.instantiate() | |
| with gr.Row(): | |
| tab_config.rms_mix_rate.instantiate() | |
| tab_config.protect_rate.instantiate() | |
| tab_config.hop_length.instantiate() | |
| with gr.Accordion("Vocal enrichment", open=False), gr.Row(): | |
| with gr.Column(): | |
| tab_config.split_voice.instantiate() | |
| with gr.Column(): | |
| tab_config.autotune_voice.instantiate() | |
| tab_config.autotune_strength.instantiate() | |
| with gr.Column(): | |
| tab_config.clean_voice.instantiate() | |
| tab_config.clean_strength.instantiate() | |
| tab_config.autotune_voice.instance.change( | |
| partial(toggle_visibility, targets={True}), | |
| inputs=tab_config.autotune_voice.instance, | |
| outputs=tab_config.autotune_strength.instance, | |
| show_progress="hidden", | |
| ) | |
| tab_config.clean_voice.instance.change( | |
| partial(toggle_visibility, targets={True}), | |
| inputs=tab_config.clean_voice.instance, | |
| outputs=tab_config.clean_strength.instance, | |
| show_progress="hidden", | |
| ) | |
| with gr.Accordion("Speaker embeddings", open=False), gr.Row(): | |
| with gr.Column(): | |
| tab_config.embedder_model.instantiate() | |
| tab_config.custom_embedder_model.instance.render() | |
| tab_config.sid.instantiate() | |
| tab_config.embedder_model.instance.change( | |
| partial(toggle_visibility, targets={EmbedderModel.CUSTOM}), | |
| inputs=tab_config.embedder_model.instance, | |
| outputs=tab_config.custom_embedder_model.instance, | |
| show_progress="hidden", | |
| ) | |
| with gr.Row(): | |
| convert_vocals_reset_btn = gr.Button("Reset options") | |
| convert_vocals_btn = gr.Button("Convert vocals", variant="primary") | |
| converted_vocals_transfer_btn = gr.Button("Transfer converted vocals") | |
| converted_vocals_track_output = gr.Audio( | |
| label="Converted vocals", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| convert_vocals_reset_btn.click( | |
| lambda: [ | |
| tab_config.n_octaves.value, | |
| tab_config.n_semitones.value, | |
| tab_config.f0_methods.value, | |
| tab_config.index_rate.value, | |
| tab_config.rms_mix_rate.value, | |
| tab_config.protect_rate.value, | |
| tab_config.hop_length.value, | |
| tab_config.split_voice.value, | |
| tab_config.autotune_voice.value, | |
| tab_config.autotune_strength.value, | |
| tab_config.clean_voice.value, | |
| tab_config.clean_strength.value, | |
| tab_config.embedder_model.value, | |
| tab_config.sid.value, | |
| gr.Dropdown(value=[SongTransferOption.STEP_3_VOCALS]), | |
| ], | |
| outputs=[ | |
| tab_config.n_octaves.instance, | |
| tab_config.n_semitones.instance, | |
| tab_config.f0_methods.instance, | |
| tab_config.index_rate.instance, | |
| tab_config.rms_mix_rate.instance, | |
| tab_config.protect_rate.instance, | |
| tab_config.hop_length.instance, | |
| tab_config.split_voice.instance, | |
| tab_config.autotune_voice.instance, | |
| tab_config.autotune_strength.instance, | |
| tab_config.clean_voice.instance, | |
| tab_config.clean_strength.instance, | |
| tab_config.embedder_model.instance, | |
| tab_config.sid.instance, | |
| converted_vocals_transfer, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| convert_vocals_btn.click( | |
| partial( | |
| exception_harness(convert, info_msg="Vocals converted successfully!"), | |
| content_type=RVCContentType.VOCALS, | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.vocals.instance, | |
| tab_config.song_dirs.convert_vocals.instance, | |
| tab_config.voice_model.instance, | |
| tab_config.n_octaves.instance, | |
| tab_config.n_semitones.instance, | |
| tab_config.f0_methods.instance, | |
| tab_config.index_rate.instance, | |
| tab_config.rms_mix_rate.instance, | |
| tab_config.protect_rate.instance, | |
| tab_config.hop_length.instance, | |
| tab_config.split_voice.instance, | |
| tab_config.autotune_voice.instance, | |
| tab_config.autotune_strength.instance, | |
| tab_config.clean_voice.instance, | |
| tab_config.clean_strength.instance, | |
| tab_config.embedder_model.instance, | |
| tab_config.custom_embedder_model.instance, | |
| tab_config.sid.instance, | |
| ], | |
| outputs=converted_vocals_track_output, | |
| concurrency_id=ConcurrencyId.GPU, | |
| concurrency_limit=1, | |
| ) | |
| setup_transfer_event( | |
| converted_vocals_transfer_btn, | |
| converted_vocals_transfer, | |
| converted_vocals_track_output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_step_3(tab_config: MultiStepSongGenerationConfig) -> None: | |
| with gr.Accordion("Step 3: vocal post-processing", open=False): | |
| tab_config.input_audio.converted_vocals.instance.render() | |
| tab_config.song_dirs.postprocess_vocals.instance.render() | |
| with gr.Accordion("Options", open=False): | |
| tab_config.room_size.instantiate() | |
| with gr.Row(): | |
| tab_config.wet_level.instantiate() | |
| tab_config.dry_level.instantiate() | |
| tab_config.damping.instantiate() | |
| effected_vocals_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_5_MAIN_VOCALS], | |
| "Effected vocals", | |
| ) | |
| with gr.Row(): | |
| postprocess_vocals_reset_btn = gr.Button("Reset options") | |
| postprocess_vocals_btn = gr.Button("Post-process vocals", variant="primary") | |
| effected_vocals_transfer_btn = gr.Button("Transfer effected vocals") | |
| effected_vocals_track_output = gr.Audio( | |
| label="Effected vocals", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| postprocess_vocals_reset_btn.click( | |
| lambda: [ | |
| tab_config.room_size.value, | |
| tab_config.wet_level.value, | |
| tab_config.dry_level.value, | |
| tab_config.damping.value, | |
| gr.Dropdown(value=[SongTransferOption.STEP_5_MAIN_VOCALS]), | |
| ], | |
| outputs=[ | |
| tab_config.room_size.instance, | |
| tab_config.wet_level.instance, | |
| tab_config.dry_level.instance, | |
| tab_config.damping.instance, | |
| effected_vocals_transfer, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| postprocess_vocals_btn.click( | |
| exception_harness( | |
| postprocess, | |
| info_msg="Vocals post-processed successfully!", | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.converted_vocals.instance, | |
| tab_config.song_dirs.postprocess_vocals.instance, | |
| tab_config.room_size.instance, | |
| tab_config.wet_level.instance, | |
| tab_config.dry_level.instance, | |
| tab_config.damping.instance, | |
| ], | |
| outputs=effected_vocals_track_output, | |
| ) | |
| setup_transfer_event( | |
| effected_vocals_transfer_btn, | |
| effected_vocals_transfer, | |
| effected_vocals_track_output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_step_4(tab_config: MultiStepSongGenerationConfig) -> None: | |
| with gr.Accordion("Step 4: pitch shift of background audio", open=False): | |
| with gr.Row(): | |
| tab_config.input_audio.instrumentals.instance.render() | |
| tab_config.input_audio.backup_vocals.instance.render() | |
| with gr.Row(): | |
| tab_config.n_semitones_instrumentals.instantiate() | |
| tab_config.n_semitones_backup_vocals.instantiate() | |
| tab_config.song_dirs.pitch_shift_background.instance.render() | |
| with gr.Accordion("Options", open=False), gr.Row(): | |
| shifted_instrumentals_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_5_INSTRUMENTALS], | |
| "Pitch-shifted instrumentals", | |
| ) | |
| shifted_backup_vocals_transfer = _render_song_transfer( | |
| [SongTransferOption.STEP_5_BACKUP_VOCALS], | |
| "Pitch-shifted backup vocals", | |
| ) | |
| with gr.Row(): | |
| pitch_shift_instrumentals_btn = gr.Button( | |
| "Pitch shift instrumentals", | |
| variant="primary", | |
| ) | |
| pitch_shift_backup_vocals_btn = gr.Button( | |
| "Pitch shift backup vocals", | |
| variant="primary", | |
| ) | |
| with gr.Row(): | |
| shifted_instrumentals_transfer_btn = gr.Button( | |
| "Transfer shifted instrumentals", | |
| ) | |
| shifted_backup_vocals_transfer_btn = gr.Button( | |
| "Transfer shifted backup vocals", | |
| ) | |
| pitch_shift_background_reset_btn = gr.Button("Reset options") | |
| with gr.Row(): | |
| shifted_instrumentals_track_output = gr.Audio( | |
| label="Pitch-shifted instrumentals", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| shifted_backup_vocals_track_output = gr.Audio( | |
| label="Pitch-shifted backup vocals", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| pitch_shift_background_reset_btn.click( | |
| lambda: [ | |
| tab_config.n_semitones_instrumentals.value, | |
| tab_config.n_semitones_backup_vocals.value, | |
| gr.Dropdown(value=[SongTransferOption.STEP_5_INSTRUMENTALS]), | |
| gr.Dropdown(value=[SongTransferOption.STEP_5_BACKUP_VOCALS]), | |
| ], | |
| outputs=[ | |
| tab_config.n_semitones_instrumentals.instance, | |
| tab_config.n_semitones_backup_vocals.instance, | |
| shifted_instrumentals_transfer, | |
| shifted_backup_vocals_transfer, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| pitch_shift_instrumentals_btn.click( | |
| exception_harness( | |
| pitch_shift, | |
| info_msg="Instrumentals pitch-shifted successfully!", | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.instrumentals.instance, | |
| tab_config.song_dirs.pitch_shift_background.instance, | |
| tab_config.n_semitones_instrumentals.instance, | |
| ], | |
| outputs=shifted_instrumentals_track_output, | |
| ) | |
| pitch_shift_backup_vocals_btn.click( | |
| exception_harness( | |
| pitch_shift, | |
| info_msg="Backup vocals pitch-shifted successfully!", | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.backup_vocals.instance, | |
| tab_config.song_dirs.pitch_shift_background.instance, | |
| tab_config.n_semitones_backup_vocals.instance, | |
| ], | |
| outputs=shifted_backup_vocals_track_output, | |
| ) | |
| for btn, transfer, output in [ | |
| ( | |
| shifted_instrumentals_transfer_btn, | |
| shifted_instrumentals_transfer, | |
| shifted_instrumentals_track_output, | |
| ), | |
| ( | |
| shifted_backup_vocals_transfer_btn, | |
| shifted_backup_vocals_transfer, | |
| shifted_backup_vocals_track_output, | |
| ), | |
| ]: | |
| setup_transfer_event( | |
| btn, | |
| transfer, | |
| output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_step_5( | |
| total_config: TotalConfig, | |
| tab_config: MultiStepSongGenerationConfig, | |
| ) -> None: | |
| with gr.Accordion("Step 5: song mixing", open=False): | |
| with gr.Row(): | |
| tab_config.input_audio.main_vocals.instance.render() | |
| tab_config.input_audio.shifted_instrumentals.instance.render() | |
| tab_config.input_audio.shifted_backup_vocals.instance.render() | |
| tab_config.song_dirs.mix.instance.render() | |
| with gr.Accordion("Options", open=False): | |
| with gr.Row(): | |
| tab_config.main_gain.instantiate() | |
| tab_config.inst_gain.instantiate() | |
| tab_config.backup_gain.instantiate() | |
| with gr.Row(): | |
| tab_config.output_name.instantiate( | |
| value=partial( | |
| update_output_name, | |
| get_song_cover_name, | |
| False, # noqa: FBT003, | |
| ), | |
| inputs=[ | |
| tab_config.input_audio.main_vocals.instance, | |
| tab_config.song_dirs.mix.instance, | |
| ], | |
| ) | |
| tab_config.output_sr.instantiate() | |
| tab_config.output_format.instantiate() | |
| song_cover_transfer = _render_song_transfer([], "Song cover") | |
| with gr.Row(): | |
| mix_reset_btn = gr.Button("Reset options") | |
| mix_btn = gr.Button("Mix song cover", variant="primary") | |
| song_cover_transfer_btn = gr.Button("Transfer song cover") | |
| song_cover_output = gr.Audio( | |
| label="Song cover", | |
| type="filepath", | |
| interactive=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=True), | |
| ) | |
| mix_reset_btn.click( | |
| lambda: [ | |
| tab_config.main_gain.value, | |
| tab_config.inst_gain.value, | |
| tab_config.backup_gain.value, | |
| tab_config.output_sr.value, | |
| tab_config.output_format.value, | |
| gr.Dropdown(value=[]), | |
| ], | |
| outputs=[ | |
| tab_config.main_gain.instance, | |
| tab_config.inst_gain.instance, | |
| tab_config.backup_gain.instance, | |
| tab_config.output_sr.instance, | |
| tab_config.output_format.instance, | |
| song_cover_transfer, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| temp_audio_gains = gr.State() | |
| mix_btn.click( | |
| partial( | |
| _pair_audio_tracks_and_gain, | |
| [ | |
| tab_config.input_audio.main_vocals.instance, | |
| tab_config.input_audio.shifted_instrumentals.instance, | |
| tab_config.input_audio.shifted_backup_vocals.instance, | |
| ], | |
| [ | |
| tab_config.main_gain.instance, | |
| tab_config.inst_gain.instance, | |
| tab_config.backup_gain.instance, | |
| ], | |
| ), | |
| inputs={ | |
| tab_config.input_audio.main_vocals.instance, | |
| tab_config.input_audio.shifted_instrumentals.instance, | |
| tab_config.input_audio.shifted_backup_vocals.instance, | |
| tab_config.main_gain.instance, | |
| tab_config.inst_gain.instance, | |
| tab_config.backup_gain.instance, | |
| }, | |
| outputs=temp_audio_gains, | |
| ).then( | |
| exception_harness(mix_song, info_msg="Song cover succesfully generated."), | |
| inputs=[ | |
| temp_audio_gains, | |
| tab_config.song_dirs.mix.instance, | |
| tab_config.output_sr.instance, | |
| tab_config.output_format.instance, | |
| tab_config.output_name.instance, | |
| ], | |
| outputs=song_cover_output, | |
| ).then( | |
| partial(update_dropdowns, get_saved_output_audio, 1, [], [0]), | |
| outputs=total_config.management.audio.output.instance, | |
| show_progress="hidden", | |
| ) | |
| setup_transfer_event( | |
| song_cover_transfer_btn, | |
| song_cover_transfer, | |
| song_cover_output, | |
| tab_config.input_audio.all, | |
| ) | |
| def _render_song_transfer( | |
| value: list[SongTransferOption], | |
| label_prefix: str, | |
| ) -> gr.Dropdown: | |
| return render_transfer_component(value, label_prefix, SongTransferOption) | |
| def _pair_audio_tracks_and_gain( | |
| audio_components: Sequence[gr.Audio], | |
| gain_components: Sequence[gr.Slider], | |
| data: dict[gr.Audio | gr.Slider, Any], | |
| ) -> list[tuple[str, int]]: | |
| """ | |
| Pair audio tracks and gain levels stored in separate gradio | |
| components. | |
| This function is meant to first be partially applied to the sequence | |
| of audio components and the sequence of slider components containing | |
| the values that should be combined. The resulting function can then | |
| be called by an event listener whose inputs is a set containing | |
| those audio and slider components. The `data` parameter in that case | |
| will contain a mapping from each of those components to the value | |
| that the component stores. | |
| Parameters | |
| ---------- | |
| audio_components : Sequence[gr.Audio] | |
| Audio components to pair with gain levels. | |
| gain_components : Sequence[gr.Slider] | |
| Gain level components to pair with audio tracks. | |
| data : dict[gr.Audio | gr.Slider, Any] | |
| Data from the audio and gain components. | |
| Returns | |
| ------- | |
| list[tuple[str, int]] | |
| Paired audio tracks and gain levels. | |
| Raises | |
| ------ | |
| ValueError | |
| If the number of audio tracks and gain levels are not the same. | |
| """ | |
| audio_tracks = [data[component] for component in audio_components] | |
| gain_levels = [data[component] for component in gain_components] | |
| if len(audio_tracks) != len(gain_levels): | |
| err_msg = "Number of audio tracks and gain levels must be the same." | |
| raise ValueError(err_msg) | |
| return [ | |
| (audio_track, gain_level) | |
| for audio_track, gain_level in zip(audio_tracks, gain_levels, strict=True) | |
| if audio_track | |
| ] | |
| def render_app() -> gr.Blocks: | |
| """ | |
| Render the Ultimate RVC web application. | |
| Returns | |
| ------- | |
| gr.Blocks | |
| The rendered web application. | |
| """ | |
| css = """ | |
| h1 { text-align: center; margin-top: 20px; margin-bottom: 20px; } | |
| #generate-tab-button { font-weight: bold !important;} | |
| #manage-tab-button { font-weight: bold !important;} | |
| #audio-tab-button { font-weight: bold !important;} | |
| #settings-tab-button { font-weight: bold !important;} | |
| """ | |
| cache_delete_frequency = 86400 # every 24 hours check for files to delete | |
| cache_delete_cutoff = 86400 # and delete files older than 24 hours | |
| with gr.Blocks( | |
| title="Redzone-6 Audio Playground", | |
| theme=gr.Theme.load(str(Path(__file__).parent / "config/theme.json")), | |
| css=css, | |
| delete_cache=(cache_delete_frequency, cache_delete_cutoff), | |
| ) as app: | |
| for component_config in [ | |
| total_config.song.one_click.voice_model, | |
| total_config.song.one_click.cached_song, | |
| total_config.song.one_click.custom_embedder_model, | |
| total_config.song.multi_step.voice_model, | |
| total_config.song.multi_step.cached_song, | |
| total_config.song.multi_step.custom_embedder_model, | |
| total_config.song.multi_step.song_dirs.separate_audio, | |
| total_config.song.multi_step.song_dirs.convert_vocals, | |
| total_config.song.multi_step.song_dirs.postprocess_vocals, | |
| total_config.song.multi_step.song_dirs.pitch_shift_background, | |
| total_config.song.multi_step.song_dirs.mix, | |
| total_config.speech.one_click.edge_tts_voice, | |
| total_config.speech.one_click.voice_model, | |
| total_config.speech.one_click.custom_embedder_model, | |
| total_config.speech.multi_step.edge_tts_voice, | |
| total_config.speech.multi_step.voice_model, | |
| total_config.speech.multi_step.custom_embedder_model, | |
| total_config.training.multi_step.dataset, | |
| total_config.training.multi_step.preprocess_model, | |
| total_config.training.multi_step.extract_model, | |
| total_config.training.multi_step.train_model, | |
| total_config.training.multi_step.custom_embedder_model, | |
| total_config.training.multi_step.custom_pretrained_model, | |
| total_config.management.audio.intermediate, | |
| total_config.management.audio.speech, | |
| total_config.management.audio.output, | |
| total_config.management.audio.dataset, | |
| total_config.management.model.voices, | |
| total_config.management.model.embedders, | |
| total_config.management.model.pretraineds, | |
| total_config.management.model.traineds, | |
| total_config.management.settings.load_config_name, | |
| total_config.management.settings.delete_config_names, | |
| ]: | |
| component_config.instantiate() | |
| # main tab | |
| # with gr.Tab("Generate", elem_id="generate-tab"): | |
| with gr.Tab("Music", elem_id="generate-tab"): | |
| render_song_cover_one_click_tab(total_config, cookiefile) | |
| render_song_cover_multi_step_tab(total_config, cookiefile) | |
| with gr.Tab("Speech", elem_id="generate-tab"): | |
| render_speech_one_click_tab(total_config) | |
| render_speech_multi_step_tab(total_config) | |
| with gr.Tab("Configuration", elem_id="settings-tab"): | |
| with gr.Tab("Models"): | |
| render_models_tab(total_config) | |
| with gr.Tab("Settings"): | |
| render_settings_tab(total_config) | |
| render_audio_tab(total_config) | |
| app.load( | |
| _init_dropdowns, | |
| outputs=[ | |
| total_config.speech.one_click.edge_tts_voice.instance, | |
| total_config.speech.multi_step.edge_tts_voice.instance, | |
| total_config.song.one_click.voice_model.instance, | |
| total_config.song.multi_step.voice_model.instance, | |
| total_config.speech.one_click.voice_model.instance, | |
| total_config.speech.multi_step.voice_model.instance, | |
| total_config.management.model.voices.instance, | |
| total_config.song.one_click.custom_embedder_model.instance, | |
| total_config.song.multi_step.custom_embedder_model.instance, | |
| total_config.speech.one_click.custom_embedder_model.instance, | |
| total_config.speech.multi_step.custom_embedder_model.instance, | |
| total_config.training.multi_step.custom_embedder_model.instance, | |
| total_config.management.model.embedders.instance, | |
| total_config.training.multi_step.custom_pretrained_model.instance, | |
| total_config.management.model.pretraineds.instance, | |
| total_config.training.multi_step.extract_model.instance, | |
| total_config.training.multi_step.train_model.instance, | |
| total_config.training.multi_step.preprocess_model.instance, | |
| total_config.management.model.traineds.instance, | |
| total_config.song.one_click.cached_song.instance, | |
| total_config.song.multi_step.cached_song.instance, | |
| total_config.song.multi_step.song_dirs.separate_audio.instance, | |
| total_config.song.multi_step.song_dirs.convert_vocals.instance, | |
| total_config.song.multi_step.song_dirs.postprocess_vocals.instance, | |
| total_config.song.multi_step.song_dirs.pitch_shift_background.instance, | |
| total_config.song.multi_step.song_dirs.mix.instance, | |
| total_config.management.audio.intermediate.instance, | |
| total_config.training.multi_step.dataset.instance, | |
| total_config.management.audio.speech.instance, | |
| total_config.management.audio.output.instance, | |
| total_config.management.audio.dataset.instance, | |
| total_config.management.settings.load_config_name.instance, | |
| total_config.management.settings.delete_config_names.instance, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| return app | |
| def _init_dropdowns() -> list[gr.Dropdown]: | |
| """ | |
| Initialize the Ultimate RVC web application by updating the choices | |
| and default values of non-static dropdown components. | |
| Returns | |
| ------- | |
| tuple[gr.Dropdown, ...] | |
| A tuple of gr.Dropdown components with updated choices and | |
| default values. | |
| """ | |
| # Initialize model dropdowns | |
| edge_tts_models = initialize_dropdowns( | |
| get_edge_tts_voice_names, | |
| 2, | |
| "en-US-ChristopherNeural", | |
| range(2), | |
| ) | |
| voice_models = initialize_dropdowns( | |
| get_voice_model_names, | |
| 5, | |
| value_indices=range(4), | |
| ) | |
| custom_embedder_models = initialize_dropdowns( | |
| get_custom_embedder_model_names, | |
| 6, | |
| value_indices=range(5), | |
| ) | |
| custom_pretrained_models = initialize_dropdowns( | |
| get_custom_pretrained_model_names, | |
| 2, | |
| value_indices=range(1), | |
| ) | |
| training_models = initialize_dropdowns( | |
| get_training_model_names, | |
| 4, | |
| value_indices=range(2), | |
| ) | |
| song_dirs = initialize_dropdowns( | |
| get_named_song_dirs, | |
| 8, | |
| value_indices=range(7), | |
| ) | |
| dataset = gr.Dropdown(get_audio_datasets()) | |
| speech_delete = gr.Dropdown(get_saved_speech_audio()) | |
| output_delete = gr.Dropdown(get_saved_output_audio()) | |
| dataset_delete = gr.Dropdown(get_named_audio_datasets()) | |
| configs = initialize_dropdowns(get_config_names, 2, value_indices=range(1)) | |
| return [ | |
| *edge_tts_models, | |
| *voice_models, | |
| *custom_embedder_models, | |
| *custom_pretrained_models, | |
| *training_models, | |
| *song_dirs, | |
| dataset, | |
| speech_delete, | |
| output_delete, | |
| dataset_delete, | |
| *configs, | |
| ] | |
| def render_song_cover_one_click_tab( | |
| total_config: TotalConfig, cookiefile: str | None = None | |
| ) -> None: | |
| """ | |
| Render "Generate song covers - One-click generation" tab. | |
| Parameters | |
| ---------- | |
| total_config : TotalConfig | |
| Model containing all component configuration settings for the | |
| Ultimate RVC web UI. | |
| cookiefile : str, optional | |
| The path to a file containing cookies to use when downloading | |
| audio from Youtube. | |
| """ | |
| with gr.Tab("One-click"): | |
| tab_config = total_config.song.one_click | |
| _render_input(tab_config) | |
| with gr.Accordion("Options", open=False): | |
| _render_main_options(tab_config) | |
| _render_conversion_options(tab_config) | |
| _render_mixing_options(tab_config) | |
| _render_output_options(tab_config) | |
| _render_intermediate_audio(tab_config) | |
| with gr.Row(equal_height=True): | |
| reset_btn = gr.Button(value="Reset options", scale=2) | |
| generate_btn = gr.Button("Generate", scale=2, variant="primary") | |
| song_cover = gr.Audio( | |
| label="Song cover", | |
| scale=3, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=False), | |
| ) | |
| song_dirs = total_config.song.multi_step.song_dirs.all | |
| generate_btn.click( | |
| partial( | |
| exception_harness( | |
| run_pipeline, | |
| info_msg="Song cover generated successfully!", | |
| ), | |
| cookiefile=cookiefile, | |
| progress_bar=PROGRESS_BAR, | |
| ), | |
| inputs=[ | |
| tab_config.source.instance, | |
| tab_config.voice_model.instance, | |
| tab_config.n_octaves.instance, | |
| tab_config.n_semitones.instance, | |
| tab_config.f0_methods.instance, | |
| tab_config.index_rate.instance, | |
| tab_config.rms_mix_rate.instance, | |
| tab_config.protect_rate.instance, | |
| tab_config.hop_length.instance, | |
| tab_config.split_voice.instance, | |
| tab_config.autotune_voice.instance, | |
| tab_config.autotune_strength.instance, | |
| tab_config.clean_voice.instance, | |
| tab_config.clean_strength.instance, | |
| tab_config.embedder_model.instance, | |
| tab_config.custom_embedder_model.instance, | |
| tab_config.sid.instance, | |
| tab_config.room_size.instance, | |
| tab_config.wet_level.instance, | |
| tab_config.dry_level.instance, | |
| tab_config.damping.instance, | |
| tab_config.main_gain.instance, | |
| tab_config.inst_gain.instance, | |
| tab_config.backup_gain.instance, | |
| tab_config.output_sr.instance, | |
| tab_config.output_format.instance, | |
| tab_config.output_name.instance, | |
| ], | |
| outputs=[song_cover, *tab_config.intermediate_audio.all], | |
| concurrency_limit=4, | |
| concurrency_id=ConcurrencyId.GPU, | |
| ).success( | |
| partial(update_dropdowns, get_named_song_dirs, 3 + len(song_dirs), [], [2]), | |
| outputs=[ | |
| total_config.song.one_click.cached_song.instance, | |
| total_config.song.multi_step.cached_song.instance, | |
| total_config.management.audio.intermediate.instance, | |
| *song_dirs, | |
| ], | |
| show_progress="hidden", | |
| ).then( | |
| partial(update_dropdowns, get_saved_output_audio, 1, [], [0]), | |
| outputs=total_config.management.audio.output.instance, | |
| show_progress="hidden", | |
| ) | |
| reset_btn.click( | |
| lambda: [ | |
| tab_config.n_octaves.value, | |
| tab_config.n_semitones.value, | |
| tab_config.f0_methods.value, | |
| tab_config.index_rate.value, | |
| tab_config.rms_mix_rate.value, | |
| tab_config.protect_rate.value, | |
| tab_config.hop_length.value, | |
| tab_config.split_voice.value, | |
| tab_config.autotune_voice.value, | |
| tab_config.autotune_strength.value, | |
| tab_config.clean_voice.value, | |
| tab_config.clean_strength.value, | |
| tab_config.embedder_model.value, | |
| tab_config.sid.value, | |
| tab_config.room_size.value, | |
| tab_config.wet_level.value, | |
| tab_config.dry_level.value, | |
| tab_config.damping.value, | |
| tab_config.main_gain.value, | |
| tab_config.inst_gain.value, | |
| tab_config.backup_gain.value, | |
| tab_config.output_sr.value, | |
| tab_config.output_format.value, | |
| tab_config.show_intermediate_audio.value, | |
| ], | |
| outputs=[ | |
| tab_config.n_octaves.instance, | |
| tab_config.n_semitones.instance, | |
| tab_config.f0_methods.instance, | |
| tab_config.index_rate.instance, | |
| tab_config.rms_mix_rate.instance, | |
| tab_config.protect_rate.instance, | |
| tab_config.hop_length.instance, | |
| tab_config.split_voice.instance, | |
| tab_config.autotune_voice.instance, | |
| tab_config.autotune_strength.instance, | |
| tab_config.clean_voice.instance, | |
| tab_config.clean_strength.instance, | |
| tab_config.embedder_model.instance, | |
| tab_config.sid.instance, | |
| tab_config.room_size.instance, | |
| tab_config.wet_level.instance, | |
| tab_config.dry_level.instance, | |
| tab_config.damping.instance, | |
| tab_config.main_gain.instance, | |
| tab_config.inst_gain.instance, | |
| tab_config.backup_gain.instance, | |
| tab_config.output_sr.instance, | |
| tab_config.output_format.instance, | |
| tab_config.show_intermediate_audio.instance, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| def _render_input(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Row(): | |
| with gr.Column(): | |
| tab_config.source_type.instantiate() | |
| with gr.Column(): | |
| tab_config.source.instantiate() | |
| local_file = gr.Audio( | |
| label="Source", | |
| type="filepath", | |
| visible=False, | |
| waveform_options=gr.WaveformOptions(show_recording_waveform=False), | |
| ) | |
| tab_config.cached_song.instance.render() | |
| tab_config.source_type.instance.input( | |
| partial(toggle_visible_component, 3), | |
| inputs=tab_config.source_type.instance, | |
| outputs=[ | |
| tab_config.source.instance, | |
| local_file, | |
| tab_config.cached_song.instance, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| local_file.change( | |
| update_value, | |
| inputs=local_file, | |
| outputs=tab_config.source.instance, | |
| show_progress="hidden", | |
| ) | |
| tab_config.cached_song.instance.input( | |
| update_value, | |
| inputs=tab_config.cached_song.instance, | |
| outputs=tab_config.source.instance, | |
| show_progress="hidden", | |
| ) | |
| with gr.Row(): | |
| tab_config.voice_model.instance.render() | |
| def _render_main_options(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Row(): | |
| tab_config.n_octaves.instantiate() | |
| tab_config.n_semitones.instantiate() | |
| def _render_conversion_options(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Accordion("Vocal conversion", open=True): | |
| gr.Markdown("") | |
| with gr.Accordion("Voice synthesis", open=True): | |
| with gr.Row(): | |
| tab_config.f0_methods.instantiate() | |
| tab_config.index_rate.instantiate() | |
| with gr.Row(): | |
| tab_config.rms_mix_rate.instantiate() | |
| tab_config.protect_rate.instantiate() | |
| tab_config.hop_length.instantiate() | |
| with gr.Accordion("Vocal enrichment", open=True): | |
| with gr.Row(): | |
| with gr.Column(): | |
| tab_config.split_voice.instantiate() | |
| with gr.Column(): | |
| tab_config.autotune_voice.instantiate() | |
| tab_config.autotune_strength.instantiate() | |
| with gr.Column(): | |
| tab_config.clean_voice.instantiate() | |
| tab_config.clean_strength.instantiate() | |
| tab_config.autotune_voice.instance.change( | |
| partial(toggle_visibility, targets={True}), | |
| inputs=tab_config.autotune_voice.instance, | |
| outputs=tab_config.autotune_strength.instance, | |
| show_progress="hidden", | |
| ) | |
| tab_config.clean_voice.instance.change( | |
| partial(toggle_visibility, targets={True}), | |
| inputs=tab_config.clean_voice.instance, | |
| outputs=tab_config.clean_strength.instance, | |
| show_progress="hidden", | |
| ) | |
| with gr.Accordion("Speaker embedding", open=True): | |
| with gr.Row(): | |
| with gr.Column(): | |
| tab_config.embedder_model.instantiate() | |
| tab_config.custom_embedder_model.instance.render() | |
| tab_config.sid.instantiate() | |
| tab_config.embedder_model.instance.change( | |
| partial(toggle_visibility, targets={EmbedderModel.CUSTOM}), | |
| inputs=tab_config.embedder_model.instance, | |
| outputs=tab_config.custom_embedder_model.instance, | |
| show_progress="hidden", | |
| ) | |
| def _render_mixing_options(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Accordion("Audio mixing", open=True): | |
| gr.Markdown("") | |
| with gr.Accordion("Reverb control on converted vocals", open=True): | |
| with gr.Row(): | |
| tab_config.room_size.instantiate() | |
| with gr.Row(): | |
| tab_config.wet_level.instantiate() | |
| tab_config.dry_level.instantiate() | |
| tab_config.damping.instantiate() | |
| with gr.Accordion("Volume controls (dB)", open=True), gr.Row(): | |
| tab_config.main_gain.instantiate() | |
| tab_config.inst_gain.instantiate() | |
| tab_config.backup_gain.instantiate() | |
| def _render_output_options(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Accordion("Audio output", open=True): | |
| with gr.Row(): | |
| tab_config.output_name.instantiate( | |
| value=partial( | |
| update_output_name, | |
| get_song_cover_name, | |
| True, # noqa: FBT003 | |
| ), | |
| inputs=[ | |
| gr.State(None), | |
| tab_config.cached_song.instance, | |
| tab_config.voice_model.instance, | |
| ], | |
| ) | |
| tab_config.output_sr.instantiate() | |
| tab_config.output_format.instantiate() | |
| with gr.Row(): | |
| tab_config.show_intermediate_audio.instantiate() | |
| def _render_intermediate_audio(tab_config: OneClickSongGenerationConfig) -> None: | |
| with gr.Accordion( | |
| "Intermediate audio tracks", | |
| open=False, | |
| visible=False, | |
| ) as intermediate_audio_accordion: | |
| with gr.Accordion( | |
| "Step 0: song retrieval", | |
| open=False, | |
| ) as song_retrieval_accordion: | |
| tab_config.intermediate_audio.song.instantiate() | |
| with ( | |
| gr.Accordion( | |
| "Step 1a: vocals/instrumentals separation", | |
| open=False, | |
| ) as vocals_separation_accordion, | |
| gr.Row(), | |
| ): | |
| tab_config.intermediate_audio.vocals.instantiate() | |
| tab_config.intermediate_audio.instrumentals.instantiate() | |
| with ( | |
| gr.Accordion( | |
| "Step 1b: main vocals/ backup vocals separation", | |
| open=False, | |
| ) as main_vocals_separation_accordion, | |
| gr.Row(), | |
| ): | |
| tab_config.intermediate_audio.main_vocals.instantiate() | |
| tab_config.intermediate_audio.backup_vocals.instantiate() | |
| with ( | |
| gr.Accordion( | |
| "Step 1c: main vocals cleanup", | |
| open=False, | |
| ) as vocal_cleanup_accordion, | |
| gr.Row(), | |
| ): | |
| tab_config.intermediate_audio.main_vocals_dereverbed.instantiate() | |
| tab_config.intermediate_audio.main_vocals_reverb.instantiate() | |
| with gr.Accordion( | |
| "Step 2: conversion of main vocals", | |
| open=False, | |
| ) as vocal_conversion_accordion: | |
| tab_config.intermediate_audio.converted_vocals.instantiate() | |
| with gr.Accordion( | |
| "Step 3: post-processing of converted vocals", | |
| open=False, | |
| ) as vocals_postprocessing_accordion: | |
| tab_config.intermediate_audio.postprocessed_vocals.instantiate() | |
| with ( | |
| gr.Accordion( | |
| "Step 4: pitch shift of background tracks", | |
| open=False, | |
| ) as pitch_shift_accordion, | |
| gr.Row(), | |
| ): | |
| tab_config.intermediate_audio.instrumentals_shifted.instantiate() | |
| tab_config.intermediate_audio.backup_vocals_shifted.instantiate() | |
| tab_config.show_intermediate_audio.instance.change( | |
| partial(toggle_intermediate_audio, num_components=7), | |
| inputs=tab_config.show_intermediate_audio.instance, | |
| outputs=[ | |
| intermediate_audio_accordion, | |
| song_retrieval_accordion, | |
| vocals_separation_accordion, | |
| main_vocals_separation_accordion, | |
| vocal_cleanup_accordion, | |
| vocal_conversion_accordion, | |
| vocals_postprocessing_accordion, | |
| pitch_shift_accordion, | |
| ], | |
| show_progress="hidden", | |
| ) | |
| app = render_app() | |
| app_wrapper = typer.Typer() | |
| def start_app( | |
| share: Annotated[ | |
| bool, | |
| typer.Option("--share", "-s", help="Enable sharing"), | |
| ] = False, | |
| listen: Annotated[ | |
| bool, | |
| typer.Option( | |
| "--listen", | |
| "-l", | |
| help="Make the web application reachable from your local network.", | |
| ), | |
| ] = False, | |
| listen_host: Annotated[ | |
| str | None, | |
| typer.Option( | |
| "--listen-host", | |
| "-h", | |
| help="The hostname that the server will use.", | |
| ), | |
| ] = "0.0.0.0", | |
| listen_port: Annotated[ | |
| int | None, | |
| typer.Option( | |
| "--listen-port", | |
| "-p", | |
| help="The listening port that the server will use.", | |
| ), | |
| ] = None, | |
| ssr_mode: Annotated[ | |
| bool, | |
| typer.Option( | |
| "--ssr-mode", | |
| help="Enable server-side rendering mode.", | |
| ), | |
| ] = False, | |
| ) -> None: | |
| """Run the Ultimate RVC web application.""" | |
| os.environ["GRADIO_TEMP_DIR"] = str(TEMP_DIR) | |
| gr.set_static_paths([MODELS_DIR, AUDIO_DIR]) | |
| # app.queue() | |
| app.launch( | |
| server_name=listen_host, | |
| server_port=listen_port, | |
| ssr_mode=ssr_mode, | |
| ) | |
| load_config("default", TotalConfig) | |
| if __name__ == "__main__": | |
| app_wrapper() | |