diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -90,1880 +90,1882 @@ type StrPath = str | PathLike[str]
 
 type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
 
+class SegmentSize(IntEnum):
+    """Enumeration of segment sizes for audio separation."""
 
+    SEG_64 = 64
+    SEG_128 = 128
+    SEG_256 = 256
+    SEG_512 = 512
+    SEG_1024 = 1024
+    SEG_2048 = 2048
+    SEG_4096 = 4096
 
-class BaseTabConfig(BaseModel):
-    """
-    Base model defining common component configuration settings for
-    UI tabs.
 
-    Attributes
-    ----------
-    embedder_model : DropdownConfig
-        Configuration settings for an embedder model dropdown component.
-    custom_embedder_model : DropdownConfig
-        Configuration settings for a custom embedder model dropdown
-        component.
+class F0Method(StrEnum):
+    """Enumeration of pitch extraction methods."""
 
-    """
+    RMVPE = "rmvpe"
+    CREPE = "crepe"
+    CREPE_TINY = "crepe-tiny"
+    FCPE = "fcpe"
 
-    embedder_model: DropdownConfig = DropdownConfig(
-        label="Embedder model",
-        info="The model to use for generating speaker embeddings.",
-        value=EmbedderModel.CONTENTVEC,
-        choices=list(EmbedderModel),
-        exclude_value=True,
-    )
-    custom_embedder_model: DropdownConfig = DropdownConfig(
-        label="Custom embedder model",
-        info="Select a custom embedder model from the dropdown.",
-        value=None,
-        visible=False,
-        render=False,
-        exclude_value=True,
-    )
 
 
-class GenerationConfig(BaseTabConfig):
-    """
-    Common component configuration settings for generation tabs.
+class RVCContentType(StrEnum):
+    """Enumeration of valid content to convert with RVC."""
 
-    voice_model : DropdownConfig
-        Configuration settings for a voice model dropdown component.
-    f0_methods : DropdownConfig
-        Configuration settings for a pitch extraction algorithms
-        dropdown component.
-    index_rate : SliderConfig
-        Configuration settings for an index rate slider component.
-    rms_mix_rate : SliderConfig
-        Configuration settings for a RMS mix rate slider component.
-    protect_rate : SliderConfig
-        Configuration settings for a protect rate slider component.
-    split_voice : CheckboxConfig
-        Configuration settings for a split voice checkbox component.
-    autotune_voice: CheckboxConfig
-        Configuration settings for an autotune voice checkbox component.
-    autotune_strength: SliderConfig
-        Configuration settings for an autotune strength slider
-        component.
-    sid : NumberConfig
-        Configuration settings for a speaker ID number component.
-    output_sr : DropdownConfig
-        Configuration settings for an output sample rate dropdown
-        component.
-    output_format : DropdownConfig
-        Configuration settings for an output format dropdown
-        component.
-    output_name : TextboxConfig
-        Configuration settings for an output name textbox component.
+    VOCALS = "vocals"
+    VOICE = "voice"
+    SPEECH = "speech"
+    AUDIO = "audio"
 
-    See Also
-    --------
-    BaseTabConfig
-        Parent model defining common component configuration settings
-        for UI tabs.
 
-    """
+class SampleRate(IntEnum):
+    """Enumeration of supported audio sample rates."""
 
-    voice_model: DropdownConfig = DropdownConfig(
-        label="Voice model",
-        info="Select a model to use for voice conversion.",
-        value=None,
-        render=False,
-        exclude_value=True,
-    )
-    f0_methods: DropdownConfig = DropdownConfig(
-        label="Pitch extraction algorithm(s)",
-        info=(
-            "If more than one method is selected, then the median of the pitch values"
-            " extracted by each method is used. RMVPE is recommended for most cases and"
-            " is the default when no method is selected."
-        ),
-        value=[F0Method.RMVPE],
-        choices=list(F0Method),
-        multiselect=True,
-    )
-    index_rate: SliderConfig = SliderConfig(
-        label="Index rate",
-        info=(
-            "Increase to bias the conversion towards the accent of the voice model."
-            " Decrease to potentially reduce artifacts coming from the voice"
-            " model.<br><br><br>"
-        ),
-        value=0.3,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    rms_mix_rate: SliderConfig = SliderConfig(
-        label="RMS mix rate",
-        info=(
-            "How much to mimic the loudness (0) of the input voice or a fixed loudness"
-            " (1). A value of 1 is recommended for most cases.<br><br>"
-        ),
-        value=1.0,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    protect_rate: SliderConfig = SliderConfig(
-        label="Protect rate",
-        info=(
-            "Controls the extent to which consonants and breathing sounds are protected"
-            " from artifacts. A higher value offers more protection but may worsen the"
-            " indexing effect.<br><br>"
-        ),
-        value=0.33,
-        minimum=0.0,
-        maximum=0.5,
-    )
+    HZ_16000 = 16000
+    HZ_44100 = 44100
+    HZ_48000 = 48000
+    HZ_96000 = 96000
+    HZ_192000 = 192000
 
-    hop_length: SliderConfig = SliderConfig.hop_length(
-        label="Hop length",
-        info=(
-            "How often the CREPE-based pitch extraction method checks for pitch changes"
-            " measured in milliseconds. Lower values lead to longer conversion times"
-            " and a higher risk of voice cracks, but better pitch accuracy."
-        ),
-        visible=True,
-    )
 
-    split_voice: CheckboxConfig = CheckboxConfig(
-        label="Split input voice",
-        info=(
-            "Whether to split the input voice track into smaller segments before"
-            " converting it. This can improve output quality for longer voice tracks."
-        ),
-        value=False,
-    )
-    autotune_voice: CheckboxConfig = CheckboxConfig(
-        label="Autotune converted voice",
-        info="Whether to apply autotune to the converted voice.<br><br>",
-        value=False,
-        exclude_value=True,
-    )
-    autotune_strength: SliderConfig = SliderConfig(
-        label="Autotune intensity",
-        info=(
-            "Higher values result in stronger snapping to the chromatic grid and"
-            " artifacting."
-        ),
-        value=1.0,
-        minimum=0.0,
-        maximum=1.0,
-        visible=False,
-    )
-    sid: NumberConfig = NumberConfig(
-        label="Speaker ID",
-        info="Speaker ID for multi-speaker-models.",
-        value=0,
-        precision=0,
-    )
-    output_sr: DropdownConfig = DropdownConfig(
-        label="Output sample rate",
-        info="The sample rate of the mixed output track.",
-        value=SampleRate.HZ_44100,
-        choices=list(SampleRate),
-    )
-    output_format: DropdownConfig = DropdownConfig(
-        label="Output format",
-        info="The audio format of the mixed output track.",
-        value=AudioExt.MP3,
-        choices=list(AudioExt),
-    )
-    output_name: TextboxConfig = TextboxConfig(
-        label="Output name",
-        info="If no name is provided, a suitable name will be generated automatically.",
-        value=None,
-        placeholder="Ultimate RVC output",
-        exclude_value=True,
-    )
+class AudioExt(StrEnum):
+    """Enumeration of supported audio file formats."""
 
+    MP3 = "mp3"
+    WAV = "wav"
+    FLAC = "flac"
+    OGG = "ogg"
 
-class SongGenerationConfig(GenerationConfig):
-    """
-    Common component configuration settings for song generation tabs.
 
-    Attributes
-    ----------
-    source_type : DropdownConfig
-        Configuration settings for a source type dropdown component.
-    source : TextboxConfig
-        Configuration settings for an input source textbox component.
-    cached_song : DropdownConfig
-        Configuration settings for a cached song dropdown component.
-    clean_strength : SliderConfig
-        Configuration settings for a clean strength slider component.
-    clean_voice : CheckboxConfig
-        Configuration settings for a clean voice checkbox component.
-    room_size : SliderConfig
-        Configuration settings for a room size slider component.
-    wet_level : SliderConfig
-        Configuration settings for a wetness level slider component.
-    dry_level : SliderConfig
-        Configuration settings for a dryness level slider component.
-    damping : SliderConfig
-        Configuration settings for a damping level slider component.
-    main_gain : SliderConfig
-        Configuration settings for a main gain slider component.
-    inst_gain : SliderConfig
-        Configuration settings for an instrumentals gain slider
-        component.
-    backup_gain : SliderConfig
-        Configuration settings for a backup vocals gain slider
-        component.
+class DeviceType(StrEnum):
+    """Enumeration of device types for training voice models."""
 
-    See Also
-    --------
-    GenerationConfig
-        Parent model defining common component configuration settings
-        for song generation tabs.
+    AUTOMATIC = "Automatic"
+    CPU = "CPU"
+    GPU = "GPU"
 
-    """
 
-    source_type: DropdownConfig = DropdownConfig(
-        label="Source type",
-        info="The type of source to retrieve a song from.",
-        value=SongSourceType.LOCAL_FILE,
-        choices=list(SongSourceType),
-        type="index",
-        exclude_value=True,
-    )
-    source: TextboxConfig = TextboxConfig(
-        label="Source",
-        info="Link to a song on YouTube or the full path of a local audio file.",
-        value=None,
-        exclude_value=True,
-    )
-    cached_song: DropdownConfig = DropdownConfig(
-        label="Source",
-        info="Select a song from the list of cached songs.",
-        value=None,
-        visible=False,
-        render=False,
-        exclude_value=True,
-    )
-    clean_voice: CheckboxConfig = CheckboxConfig(
-        label="Clean converted voice",
-        info=(
-            "Whether to clean the converted voice using noise reduction"
-            " algorithms.<br><br>"
-        ),
-        value=False,
-        exclude_value=True,
-    )
-    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
-    room_size: SliderConfig = SliderConfig(
-        label="Room size",
-        info=(
-            "Size of the room which reverb effect simulates. Increase for longer reverb"
-            " time."
-        ),
-        value=0.15,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    wet_level: SliderConfig = SliderConfig(
-        label="Wetness level",
-        info="Loudness of converted vocals with reverb effect applied.",
-        value=0.2,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    dry_level: SliderConfig = SliderConfig(
-        label="Dryness level",
-        info="Loudness of converted vocals without reverb effect applied.",
-        value=0.8,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    damping: SliderConfig = SliderConfig(
-        label="Damping level",
-        info="Absorption of high frequencies in reverb effect.",
-        value=0.7,
-        minimum=0.0,
-        maximum=1.0,
-    )
-    main_gain: SliderConfig = SliderConfig.gain(
-        label="Main gain",
-        info="The gain to apply to the main vocals.",
-    )
-    inst_gain: SliderConfig = SliderConfig.gain(
-        label="Instrumentals gain",
-        info="The gain to apply to the instrumentals.",
-    )
-    backup_gain: SliderConfig = SliderConfig.gain(
-        label="Backup gain",
-        info="The gain to apply to the backup vocals.",
-    )
+class TrainingSampleRate(StrEnum):
+    """Enumeration of sample rates for training voice models."""
 
+    HZ_32K = "32000"
+    HZ_40K = "40000"
+    HZ_48K = "48000"
 
-class SpeechGenerationConfig(GenerationConfig):
-    """
-    Common component configuration settings for speech generation tabs.
 
-    Attributes
-    ----------
-    source_type : DropdownConfig
-        Configuration settings for a source type dropdown component.
-    source : TextboxConfig
-        Configuration settings for an input source textbox component.
-    edge_tts_voice : DropdownConfig
-        Configuration settings for an Edge TTS voice dropdown
-        component.
-    n_octaves : SliderConfig
-        Configuration settings for an octave pitch shift slider
-        component.
-    n_semitones : SliderConfig
-        Configuration settings for a semitone pitch shift slider
-        component.
-    tts_pitch_shift : SliderConfig
-        Configuration settings for a TTS pitch shift slider
-        component.
-    tts_speed_change : SliderConfig
-        Configuration settings for a TTS speed change slider
-        component.
-    tts_volume_change : SliderConfig
-        Configuration settings for a TTS volume change slider
-        component.
-    clean_voice : CheckboxConfig
-        Configuration settings for a clean voice checkbox
-        component.
-    clean_strength : SliderConfig
-        Configuration settings for a clean strength slider
-        component.
-    output_gain : GainSliderConfig
-        Configuration settings for an output gain slider component.
+class PretrainedSampleRate(StrEnum):
+    """Enumeration of valid sample rates for pretrained models."""
 
-    See Also
-    --------
-    GenerationConfig
-        Parent model defining common component configuration settings
-        for generation tabs.
+    HZ_32K = "32k"
+    HZ_40K = "40k"
+    HZ_44K = "44k"
+    HZ_48K = "48k"
 
-    """
 
-    source_type: DropdownConfig = DropdownConfig(
-        label="Source type",
-        info="The type of source to generate speech from.",
-        value=SpeechSourceType.TEXT,
-        choices=list(SpeechSourceType),
-        type="index",
-        exclude_value=True,
-    )
-    source: TextboxConfig = TextboxConfig(
-        label="Source",
-        info="Text to generate speech from",
-        value=None,
-        exclude_value=True,
-    )
-    edge_tts_voice: DropdownConfig = DropdownConfig(
-        label="Edge TTS voice",
-        info="Select a voice to use for text to speech conversion.",
-        value=None,
-        render=False,
-        exclude_value=True,
-    )
-    n_octaves: SliderConfig = SliderConfig.octave_shift(
-        label="Octave shift",
-        info=(
-            "The number of octaves to pitch-shift the converted speech by. Use 1 for"
-            " male-to-female and -1 for vice-versa."
-        ),
-    )
-    n_semitones: SliderConfig = SliderConfig.semitone_shift(
-        label="Semitone shift",
-        info="The number of semi-tones to pitch-shift the converted speech by.",
-    )
-    tts_pitch_shift: SliderConfig = SliderConfig(
-        label="Edge TTS pitch shift",
-        info=(
-            "The number of hertz to shift the pitch of the speech generated by Edge"
-            " TTS."
-        ),
-        value=0,
-        minimum=-100,
-        maximum=100,
-        step=1,
-    )
-    tts_speed_change: SliderConfig = SliderConfig(
-        label="TTS speed change",
-        info="The percentual change to the speed of the speech generated by Edge TTS.",
-        value=0,
-        minimum=-50,
-        maximum=100,
-        step=1,
-    )
-    tts_volume_change: SliderConfig = SliderConfig(
-        label="TTS volume change",
-        info="The percentual change to the volume of the speech generated by Edge TTS.",
-        value=0,
-        minimum=-100,
-        maximum=100,
-        step=1,
-    )
-    clean_voice: CheckboxConfig = CheckboxConfig(
-        label="Clean converted voice",
-        info=(
-            "Whether to clean the converted voice using noise reduction"
-            " algorithms.<br><br>"
-        ),
-        value=True,
-        exclude_value=True,
-    )
-    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=True)
-    output_gain: SliderConfig = SliderConfig.gain(
-        label="Output gain",
-        info="The gain to apply to the converted speech.<br><br>",
-    )
+class TrainingF0Method(StrEnum):
+    """Enumeration of pitch extraction methods for training."""
 
+    RMVPE = "rmvpe"
+    CREPE = "crepe"
+    CREPE_TINY = "crepe-tiny"
 
-class TrainingConfig(BaseTabConfig):
-    """
-    Common component configuration settings for training tabs.
 
-    Attributes
-    ----------
-    dataset_type : DropdownConfig
-        Configuration settings for a dataset type dropdown component.
-    dataset : DropdownConfig
-        Configuration settings for a dataset dropdown component.
-    dataset_name : TextboxConfig
-        Configuration settings for a dataset name textbox component.
-    preprocess_model : DropdownConfig
-        Configuration settings for a model name dropdown component
-        for audio preprocessing.
-    sample_rate : DropdownConfig
-        Configuration settings for a sample rate dropdown component.
-    filter_audio : CheckboxConfig
-        Configuration settings for a filter audio checkbox component.
-    clean_audio : CheckboxConfig
-        Configuration settings for a clean audio checkbox component.
-    clean_strength : SliderConfig
-        Configuration settings for a clean strength slider component.
-    split_method : DropdownConfig
-        Configuration settings for an audio splitting method dropdown
-        component.
-    chunk_len : SliderConfig
-        Configuration settings for a chunk length slider component.
-    overlap_len : SliderConfig
-        Configuration settings for an overlap length slider component.
-    preprocess_cores : SliderConfig
-        Configuration settings for a CPU cores slider component for
-        preprocessing.
-    extract_model : DropdownConfig
-        Configuration settings for a model name dropdown component for
-        feature extraction.
-    f0_method : DropdownConfig
-        Configuration settings for an F0 method dropdown component.
-    hop_length : SliderConfig
-        Configuration settings for a hop length slider component.
-    include_mutes : SliderConfig
-        Configuration settings for an include mutes slider component.
-    extract_cores : SliderConfig
-        Configuration settings for a CPU cores slider component for
-        feature extraction.
-    extraction_acceleration : HardwareAccelerationConfig
-        Configuration settings for a hardware acceleration component for
-        feature extraction.
-    extraction_gpus : DropdownConfig
-        Configuration settings for a GPU dropdown compoennt for feature
-        extraction.
-    train_model : DropdownConfig
-        Configuration settings for a model name dropdown component for
-        training.
-    num_epochs : SliderConfig
-        Configuration settings for a number of epochs slider component.
-    batch_size : SliderConfig
-        Configuration settings for a batch size slider component.
-    detect_overtraining : CheckboxConfig
-        Configuration settings for a detect overtraining checkbox
-        component.
-    overtraining_threshold : SliderConfig
-        Configuration settings for an overtraining threshold slider
-        component.
-    vocoder : DropdownConfig
-        Configuration settings for a vocoder dropdown component.
-    index_algorithm : DropdownConfig
-        Configuration settings for an index algorithm dropdown
-        component.
-    pretrained_type : DropdownConfig
-        Configuration settings for a pretrained model type dropdown
-        component.
-    custom_pretrained_model : DropdownConfig
-        Configuration settings for a custom pretrained model dropdown
-        component.
-    save_interval : SliderConfig
-        Configuration settings for a save-interval slider component.
-    save_all_checkpoints : CheckboxConfig
-        Configuration settings for a save-all-checkpoints checkbox
-        component.
-    save_all_weights : CheckboxConfig
-        Configuration settings for a save-all-weights checkbox
-        component.
-    clear_saved_data : CheckboxConfig
-        Configuration settings for a clear-saved-data checkbox
-        component.
-    upload_model : CheckboxConfig
-        Configuration settings for an upload voice model checkbox
-        component.
-    upload_name : TextboxConfig
-        Configuration settings for an upload name textbox component.
-    training_acceleration : HardwareAccelerationConfig
-        Configuration settings for a hardware acceleration component for
-        training.
-    training_gpus : DropdownConfig
-        Configuration settings for a GPU dropdown component for
-        training.
-    preload_dataset : CheckboxConfig
-        Configuration settings for a preload dataset checkbox component.
-    reduce_memory_usage : CheckboxConfig
-        Configuration settings for a reduce-memory-usage checkbox
-        component.
-
-    See Also
-    --------
-    BaseTabConfig
-        Parent model defining common component configuration settings
-        for UI tabs.
-
-    """
-
-    dataset_type: DropdownConfig = DropdownConfig(
-        label="Dataset type",
-        info="Select the type of dataset to preprocess.",
-        value=DatasetType.NEW_DATASET,
-        choices=list(DatasetType),
-        exclude_value=True,
-    )
-    dataset: DropdownConfig = DropdownConfig(
-        label="Dataset path",
-        info=(
-            "The path to an existing dataset. Either select a path to a previously"
-            " created dataset or provide a path to an external dataset."
-        ),
-        value=None,
-        allow_custom_value=True,
-        visible=False,
-        render=False,
-        exclude_value=True,
-    )
-    dataset_name: TextboxConfig = TextboxConfig(
-        label="Dataset name",
-        info=(
-            "The name of the new dataset. If the dataset already exists, the provided"
-            " audio files will be added to it."
-        ),
-        value="My dataset",
-        exclude_value=True,
-    )
-    preprocess_model: DropdownConfig = DropdownConfig(
-        label="Model name",
-        info=(
-            "Name of the model to preprocess the given dataset for. Either select an"
-            " existing model from the dropdown or provide the name of a new model."
-        ),
-        value="My model",
-        allow_custom_value=True,
-        render=False,
-        exclude_value=True,
-    )
-    sample_rate: DropdownConfig = DropdownConfig(
-        label="Sample rate",
-        info="Target sample rate for the audio files in the provided dataset.",
-        value=TrainingSampleRate.HZ_40K,
-        choices=list(TrainingSampleRate),
-    )
-    filter_audio: CheckboxConfig = CheckboxConfig(
-        label="Filter audio",
-        info=(
-            "Whether to remove low-frequency sounds from the audio files in the"
-            " provided dataset by applying a high-pass butterworth filter.<br><br>"
-        ),
-        value=True,
-    )
-    clean_audio: CheckboxConfig = CheckboxConfig(
-        label="Clean audio",
-        info=(
-            "Whether to clean the audio files in the provided dataset using noise"
-            " reduction algorithms.<br><br><br>"
-        ),
-        value=False,
-        exclude_value=True,
-    )
-    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
-    split_method: DropdownConfig = DropdownConfig(
-        label="Audio splitting method",
-        info=(
-            "The method to use for splitting the audio files in the provided dataset."
-            " Use the `Skip` method to skip splitting if the audio files are already"
-            " split. Use the `Simple` method if excessive silence has already been"
-            " removed from the audio files. Use the `Automatic` method for automatic"
-            " silence detection and splitting around it."
-        ),
-        value=AudioSplitMethod.AUTOMATIC,
-        choices=list(AudioSplitMethod),
-        exclude_value=True,
-    )
-    chunk_len: SliderConfig = SliderConfig(
-        label="Chunk length",
-        info="Length of split audio chunks.",
-        value=3.0,
-        minimum=0.5,
-        maximum=5.0,
-        step=0.1,
-        visible=False,
-    )
-    overlap_len: SliderConfig = SliderConfig(
-        label="Overlap length",
-        info="Length of overlap between split audio chunks.",
-        value=0.3,
-        minimum=0.0,
-        maximum=0.4,
-        step=0.1,
-        visible=False,
-    )
-    preprocess_cores: SliderConfig = SliderConfig.cpu_cores()
-
-    extract_model: DropdownConfig = DropdownConfig(
-        label="Model name",
-        info=(
-            "Name of the model with an associated preprocessed dataset to extract"
-            " training features from. When a new dataset is preprocessed, its"
-            " associated model is selected by default."
-        ),
-        value=None,
-        render=False,
-        exclude_value=True,
-    )
-    f0_method: DropdownConfig = DropdownConfig(
-        label="F0 method",
-        info="The method to use for extracting pitch features.",
-        value=TrainingF0Method.RMVPE,
-        choices=list(TrainingF0Method),
-        exclude_value=True,
-    )
-
-    hop_length: SliderConfig = SliderConfig.hop_length(
-        label="Hop length",
-        info="The hop length to use for extracting pitch features.<br><br>",
-        visible=False,
-    )
-    include_mutes: SliderConfig = SliderConfig(
-        label="Include mutes",
-        info=(
-            "The number of mute audio files to include in the generated training file"
-            " list. Adding silent files enables the training model to handle pure"
-            " silence in inferred audio files. If the preprocessed audio dataset"
-            " already contains segments of pure silence, set this to 0."
-        ),
-        value=0,
-        minimum=0,
-        maximum=10,
-        step=1,
-    )
-    extraction_cores: SliderConfig = SliderConfig.cpu_cores()
-    extraction_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
-    extraction_gpus: DropdownConfig = DropdownConfig.gpu()
-
-    train_model: DropdownConfig = DropdownConfig(
-        label="Model name",
-        info=(
-            "Name of the model to train. When training features are extracted for a new"
-            " model, its name is selected by default."
-        ),
-        value=None,
-        render=False,
-        exclude_value=True,
-    )
-    num_epochs: SliderConfig = SliderConfig(
-        label="Number of epochs",
-        info=(
-            "The number of epochs to train the voice model. A higher number can improve"
-            " voice model performance but may lead to overtraining."
-        ),
-        value=500,
-        minimum=1,
-        maximum=5000,
-        step=1,
-    )
-    batch_size: SliderConfig = SliderConfig(
-        label="Batch size",
-        info=(
-            "The number of samples in each training batch. It is advisable to align"
-            " this value with the available VRAM of your GPU."
-        ),
-        value=16,
-        minimum=1,
-        maximum=128,
-        step=1,
-    )
-    detect_overtraining: CheckboxConfig = CheckboxConfig(
-        label="Detect overtraining",
-        info=(
-            "Whether to detect overtraining to prevent the voice model from learning"
-            " the training data too well and losing the ability to generalize to new"
-            " data."
-        ),
-        value=True,
-        exclude_value=True,
-    )
-    overtraining_threshold: SliderConfig = SliderConfig(
-        label="Overtraining threshold",
-        info=(
-            "The maximum number of epochs to continue training without any observed"
-            " improvement in voice model performance."
-        ),
-        value=500,
-        minimum=1,
-        maximum=1000,
-        visible=False,
-    )
-    vocoder: DropdownConfig = DropdownConfig(
-        label="Vocoder",
-        info=(
-            "The vocoder to use for audio synthesis during training. HiFi-GAN provides"
-            " basic audio fidelity, while RefineGAN provides the highest audio"
-            " fidelity."
-        ),
-        value=Vocoder.HIFI_GAN,
-        choices=list(Vocoder),
-    )
-    index_algorithm: DropdownConfig = DropdownConfig(
-        label="Index algorithm",
-        info=(
-            "The method to use for generating an index file for the trained voice"
-            " model. `KMeans` is particularly useful for large datasets."
-        ),
-        value=IndexAlgorithm.AUTO,
-        choices=list(IndexAlgorithm),
-    )
-    pretrained_type: DropdownConfig = DropdownConfig(
-        label="Pretrained model type",
-        info=(
-            "The type of pretrained model to finetune the voice model on. `None` will"
-            " train the voice model from scratch, while `Default` will use a pretrained"
-            " model tailored to the specific voice model architecture. `Custom` will"
-            " use a custom pretrained that you provide."
-        ),
-        value=PretrainedType.DEFAULT,
-        choices=list(PretrainedType),
-        exclude_value=True,
-    )
-    custom_pretrained_model: DropdownConfig = DropdownConfig(
-        label="Custom pretrained model",
-        info="Select a custom pretrained model to finetune from the dropdown.",
-        value=None,
-        visible=False,
-        render=False,
-        exclude_value=True,
-    )
-    save_interval: SliderConfig = SliderConfig(
-        label="Save interval",
-        info=(
-            "The epoch interval at which to to save voice model weights and"
-            " checkpoints. The best model weights are always saved regardless of this"
-            " setting."
-        ),
-        value=10,
-        minimum=1,
-        maximum=100,
-        step=1,
-    )
-    save_all_checkpoints: CheckboxConfig = CheckboxConfig(
-        label="Save all checkpoints",
-        info=(
-            "Whether to save a unique checkpoint at each save interval. If not enabled,"
-            " only the latest checkpoint will be saved at each interval."
-        ),
-        value=True,
-    )
-    save_all_weights: CheckboxConfig = CheckboxConfig(
-        label="Save all weights",
-        info=(
-            "Whether to save unique voice model weights at each save interval. If not"
-            " enabled, only the best voice model weights will be saved."
-        ),
-        value=True,
-    )
-    clear_saved_data: CheckboxConfig = CheckboxConfig(
-        label="Clear saved data",
-        info=(
-            "Whether to delete any existing training data associated with the voice"
-            " model before training commences. Enable this setting only if you are"
-            " training a new voice model from scratch or restarting training."
-        ),
-        value=False,
-    )
-    upload_model: CheckboxConfig = CheckboxConfig(
-        label="Upload voice model",
-        info=(
-            "Whether to automatically upload the trained voice model so that it can be"
-            " used for generation tasks within the Ultimate RVC app."
-        ),
-        value=False,
-        exclude_value=True,
-    )
-    upload_name: TextboxConfig = TextboxConfig(
-        label="Upload name",
-        info="The name to give the uploaded voice model.",
-        value=None,
-        visible=False,
-        exclude_value=True,
-    )
-    training_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
-    training_gpus: DropdownConfig = DropdownConfig.gpu()
-    preload_dataset: CheckboxConfig = CheckboxConfig(
-        label="Preload dataset",
-        info=(
-            "Whether to preload all training data into GPU memory. This can improve"
-            " training speed but requires a lot of VRAM.<br><br>"
-        ),
-        value=True,
-    )
-    reduce_memory_usage: CheckboxConfig = CheckboxConfig(
-        label="Reduce memory usage",
-        info=(
-            "Whether to reduce VRAM usage at the cost of slower training speed by"
-            " enabling activation checkpointing. This is useful for GPUs with limited"
-            " memory (e.g., <6GB VRAM) or when training with a batch size larger than"
-            " what your GPU can normally accommodate."
-        ),
-        value=False,
-    )
+class AudioSplitMethod(StrEnum):
+    """
+    Enumeration of methods to use for splitting audio files during
+    dataset preprocessing.
+    """
 
+    SKIP = "Skip"
+    SIMPLE = "Simple"
+    AUTOMATIC = "Automatic"
 
 
-class SegmentSize(IntEnum):
-    """Enumeration of segment sizes for audio separation."""
+class Vocoder(StrEnum):
+    """Enumeration of vocoders for training voice models."""
 
-    SEG_64 = 64
-    SEG_128 = 128
-    SEG_256 = 256
-    SEG_512 = 512
-    SEG_1024 = 1024
-    SEG_2048 = 2048
-    SEG_4096 = 4096
+    HIFI_GAN = "HiFi-GAN"
+    MRF_HIFI_GAN = "MRF HiFi-GAN"
+    REFINE_GAN = "RefineGAN"
 
 
-class F0Method(StrEnum):
-    """Enumeration of pitch extraction methods."""
+class IndexAlgorithm(StrEnum):
+    """Enumeration of indexing algorithms for training voice models."""
 
-    RMVPE = "rmvpe"
-    CREPE = "crepe"
-    CREPE_TINY = "crepe-tiny"
-    FCPE = "fcpe"
+    AUTO = "Auto"
+    FAISS = "Faiss"
+    KMEANS = "KMeans"
 
 
+class PretrainedType(StrEnum):
+    """
+    Enumeration of the possible types of pretrained models to finetune
+    voice models on.
+    """
 
-class RVCContentType(StrEnum):
-    """Enumeration of valid content to convert with RVC."""
+    NONE = "None"
+    DEFAULT = "Default"
+    CUSTOM = "Custom"
 
-    VOCALS = "vocals"
-    VOICE = "voice"
-    SPEECH = "speech"
-    AUDIO = "audio"
 
 
-class SampleRate(IntEnum):
-    """Enumeration of supported audio sample rates."""
 
-    HZ_16000 = 16000
-    HZ_44100 = 44100
-    HZ_48000 = 48000
-    HZ_96000 = 96000
-    HZ_192000 = 192000
+class ConcurrencyId(StrEnum):
+    """Enumeration of possible concurrency identifiers."""
 
+    GPU = auto()
 
-class AudioExt(StrEnum):
-    """Enumeration of supported audio file formats."""
 
-    MP3 = "mp3"
-    WAV = "wav"
-    FLAC = "flac"
-    OGG = "ogg"
+class SongSourceType(StrEnum):
+    """The type of source providing the song to generate a cover of."""
 
+    LOCAL_FILE = "Local file"
+    CACHED_SONG = "Cached song"
+
+
+class SpeechSourceType(StrEnum):
+    """The type of source providing the text to generate speech from."""
+
+    TEXT = "Text"
+    LOCAL_FILE = "Local file"
+
+
+class SongTransferOption(StrEnum):
+    """Enumeration of possible song transfer options."""
+
+    STEP_1_AUDIO = "Step 1: stem splitting"
+    STEP_2_VOCALS = "Step 2: vocal conversion"
+    STEP_3_VOCALS = "Step 3: vocal effect"
+    STEP_4_INSTRUMENTALS = "Step 4: instrumentals"
+    STEP_4_BACKUP_VOCALS = "Step 4: backup vocals"
+    STEP_5_MAIN_VOCALS = "Step 5: main vocals"
+    STEP_5_INSTRUMENTALS = "Step 5: instrumentals"
+    STEP_5_BACKUP_VOCALS = "Step 5: backup vocals"
+
+
+class SpeechTransferOption(StrEnum):
+    """Enumeration of possible speech transfer options."""
+
+    STEP_2_SPEECH = "Step 2: vocal conversion"
+    STEP_3_SPEECH = "Step 3: vocal effect"
+
+
+class ComponentVisibilityKwArgs(TypedDict, total=False):
+    """
+    Keyword arguments for setting component visibility.
+
+    Attributes
+    ----------
+    visible : bool
+        Whether the component should be visible.
+    value : Any
+        The value of the component.
+
+    """
+
+    visible: bool
+    value: Any
+
+
+class UpdateDropdownKwArgs(TypedDict, total=False):
+    """
+    Keyword arguments for updating a dropdown component.
+
+    Attributes
+    ----------
+    choices : DropdownChoices
+        The updated choices for the dropdown component.
+    value : DropdownValue
+        The updated value for the dropdown component.
+
+    """
+
+    choices: DropdownChoices
+    value: DropdownValue
+
+
+class TextBoxKwArgs(TypedDict, total=False):
+    """
+    Keyword arguments for updating a textbox component.
+
+    Attributes
+    ----------
+    value : str | None
+        The updated value for the textbox component.
+    placeholder : str | None
+        The updated placeholder for the textbox component.
+
+    """
+
+    value: str | None
+    placeholder: str | None
+
+
+class UpdateAudioKwArgs(TypedDict, total=False):
+    """
+    Keyword arguments for updating an audio component.
+
+    Attributes
+    ----------
+    value : str | None
+        The updated value for the audio component.
+
+    """
+
+    value: str | None
+
+
+class DatasetType(StrEnum):
+    """The type of dataset to train a voice model."""
+
+    NEW_DATASET = "New dataset"
+    EXISTING_DATASET = "Existing dataset"
+
+
+embedders_list = [
+    ("embedders/contentvec/", ["pytorch_model.bin", "config.json"]),
+    ("embedders/custom/Crusty/", ["model.safetensors", "config.json"]),
+]
+
+
+class EmbedderModel(StrEnum):
+    """Enumeration of audio embedding models."""
+
+    CONTENTVEC = "contentvec"
+    CRUSTY = "Crusty"
+    CUSTOM = "custom"
+
+
+
+
+    
+class SeparationModel(StrEnum):
+    """Enumeration of audio separation models."""
+
+    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
+    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
+    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
+    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
+    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
+    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
+    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
+    Kim_Inst = "Kim_Inst.onnx"
+    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
+    kuielab_a_vocals = "kuielab_a_vocals.onnx"
+    kuielab_b_vocals = "kuielab_b_vocals.onnx"
+    kuielab_a_drums = "kuielab_a_drums.onnx"
+    kuielab_b_drums = "kuielab_b_drums.onnx"
+    kuielab_a_bass = "kuielab_a_bass.onnx"
+    kuielab_b_bass = "kuielab_b_bass.onnx"
+    kuielab_a_other = "kuielab_a_other.onnx"
+    kuielab_b_other = "kuielab_b_other.onnx"
+    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
+    UVR_DeNoise = "UVR-DeNoise.pth"
+    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
+    
+class SeparationModel2(StrEnum):
+    """Enumeration of audio separation models."""
+
+    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
+    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
+    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
+    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
+    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
+    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
+    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
+    Kim_Inst = "Kim_Inst.onnx"
+    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
+    kuielab_a_vocals = "kuielab_a_vocals.onnx"
+    kuielab_b_vocals = "kuielab_b_vocals.onnx"
+    kuielab_a_drums = "kuielab_a_drums.onnx"
+    kuielab_b_drums = "kuielab_b_drums.onnx"
+    kuielab_a_bass = "kuielab_a_bass.onnx"
+    kuielab_b_bass = "kuielab_b_bass.onnx"
+    kuielab_a_other = "kuielab_a_other.onnx"
+    kuielab_b_other = "kuielab_b_other.onnx"
+    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
+    UVR_DeNoise = "UVR-DeNoise.pth"
+    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
+
+
+
+
+
+now_dir = os.getcwd()
+
+sys.path.append(now_dir)
+models_dir = "models"
+
+dump_path = os.path.join(now_dir, models_dir)
+
+repo_id = "lainlives/voice"
+
+hf_token = os.environ.get("HF_TOKEN")
+snapshot_download(repo_id=repo_id, local_dir=dump_path, token=hf_token)
+
+#if __name__ == "__main__":
+#    start_app(share=False, ssr_mode = True)
 
-class DeviceType(StrEnum):
-    """Enumeration of device types for training voice models."""
 
-    AUTOMATIC = "Automatic"
-    CPU = "CPU"
-    GPU = "GPU"
 
 
-class TrainingSampleRate(StrEnum):
-    """Enumeration of sample rates for training voice models."""
 
-    HZ_32K = "32000"
-    HZ_40K = "40000"
-    HZ_48K = "48000"
 
+config_name = "default" #os.environ.get("URVC_CONFIG")
+cookiefile = os.environ.get("YT_COOKIEFILE")
 
-class PretrainedSampleRate(StrEnum):
-    """Enumeration of valid sample rates for pretrained models."""
 
-    HZ_32K = "32k"
-    HZ_40K = "40k"
-    HZ_44K = "44k"
-    HZ_48K = "48k"
 
+"""
+Module defining models for representing configuration settings for
+UI tabs.
+"""
 
-class TrainingF0Method(StrEnum):
-    """Enumeration of pitch extraction methods for training."""
 
-    RMVPE = "rmvpe"
-    CREPE = "crepe"
-    CREPE_TINY = "crepe-tiny"
 
 
-class AudioSplitMethod(StrEnum):
+class SongIntermediateAudioConfig(BaseModel):
     """
-    Enumeration of methods to use for splitting audio files during
-    dataset preprocessing.
+    Configuration settings for intermediate audio components in the
+    one-click song generation tab.
+
+    Attributes
+    ----------
+    song : AudioConfig
+        Configuration settings for the input song audio component.
+    vocals : AudioConfig
+        Configuration settings for the vocals audio component.
+    instrumentals : AudioConfig
+        Configuration settings for the instrumentals audio component.
+    main_vocals : AudioConfig
+        Configuration settings for the main vocals audio component.
+    backup_vocals : AudioConfig
+        Configuration settings for the backup vocals audio component.
+    main_vocals_dereverbed : AudioConfig
+        Configuration settings for the main vocals de-reverbed audio
+        component.
+    main_vocals_reverb : AudioConfig
+        Configuration settings for the main vocals reverb audio
+        component.
+    converted_vocals : AudioConfig
+        Configuration settings for the converted vocals audio
+        component.
+    postprocessed_vocals : AudioConfig
+        Configuration settings for the postprocessed vocals audio
+        component.
+    instrumentals_shifted : AudioConfig
+        Configuration settings for the shifted instrumentals audio
+        component.
+    backup_vocals_shifted : AudioConfig
+        Configuration settings for the shifted backup vocals audio
+        component.
+    all : list[gr.Audio]
+        List of instances of all intermediate audio components.
+
     """
 
-    SKIP = "Skip"
-    SIMPLE = "Simple"
-    AUTOMATIC = "Automatic"
+    song: AudioConfig = AudioConfig.intermediate(label="Song")
+    vocals: AudioConfig = AudioConfig.intermediate(label="Vocals")
+    instrumentals: AudioConfig = AudioConfig.intermediate(
+        label="Instrumentals",
+    )
+    main_vocals: AudioConfig = AudioConfig.intermediate(
+        label="Main vocals",
+    )
+    backup_vocals: AudioConfig = AudioConfig.intermediate(
+        label="Backup vocals",
+    )
+    main_vocals_dereverbed: AudioConfig = AudioConfig.intermediate(
+        label="De-reverbed main vocals",
+    )
+    main_vocals_reverb: AudioConfig = AudioConfig.intermediate(
+        label="Main vocals with reverb",
+    )
+    converted_vocals: AudioConfig = AudioConfig.intermediate(
+        label="Converted vocals",
+    )
+    postprocessed_vocals: AudioConfig = AudioConfig.intermediate(
+        label="Postprocessed vocals",
+    )
+    instrumentals_shifted: AudioConfig = AudioConfig.intermediate(
+        label="Pitch-shifted instrumentals",
+    )
+    backup_vocals_shifted: AudioConfig = AudioConfig.intermediate(
+        label="Pitch-shifted backup vocals",
+    )
 
+    @property
+    def all(self) -> list[gr.Audio]:
+        """
+        Retrieve instances of all intermediate audio components
+        in the one-click song generation tab.
 
-class Vocoder(StrEnum):
-    """Enumeration of vocoders for training voice models."""
+        Returns
+        -------
+        list[gr.Audio]
+            List of instances of all intermediate audio components in
+            the one-click song generation tab.
 
-    HIFI_GAN = "HiFi-GAN"
-    MRF_HIFI_GAN = "MRF HiFi-GAN"
-    REFINE_GAN = "RefineGAN"
+        """
+        # NOTE we are using self.__annotations__ to get the fields in
+        # the order they are defined in the class
+        return [getattr(self, field).instance for field in self.__annotations__]
 
 
-class IndexAlgorithm(StrEnum):
-    """Enumeration of indexing algorithms for training voice models."""
+class OneClickSongGenerationConfig(SongGenerationConfig):
+    """
+    Configuration settings for the one-click song generation tab.
 
-    AUTO = "Auto"
-    FAISS = "Faiss"
-    KMEANS = "KMeans"
+    Attributes
+    ----------
+    n_octaves : SliderConfig
+        Configuration settings for an octave pitch shift slider
+        component.
+    n_semitones : SliderConfig
+        Configuration settings for a semitone pitch shift slider
+        component.
+    show_intermediate_audio : CheckboxConfig
+        Configuration settings for a show intermediate audio checkbox
+        component.
+    intermediate_audio : SongIntermediateAudioConfig
+        Configuration settings for intermediate audio components.
 
+    See Also
+    --------
+    SongGenerationConfig
+        Parent model defining common component configuration settings
+        for song generation tabs.
 
-class PretrainedType(StrEnum):
-    """
-    Enumeration of the possible types of pretrained models to finetune
-    voice models on.
     """
 
-    NONE = "None"
-    DEFAULT = "Default"
-    CUSTOM = "Custom"
+    n_octaves: SliderConfig = SliderConfig.octave_shift(
+        label="Vocal pitch shift",
+        info=(
+            "The number of octaves to shift the pitch of the converted vocals by. Use 1"
+            " for male-to-female and -1 for vice-versa."
+        ),
+    )
 
+    n_semitones: SliderConfig = SliderConfig.semitone_shift(
+        label="Overall pitch shift",
+        info=(
+            "The number of semi-tones to shift the pitch of the converted vocals,"
+            " instrumentals and backup vocals by."
+        ),
+    )
+    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
+        label="Show intermediate audio",
+        info="Show intermediate audio tracks produced during song cover generation.",
+        value=False,
+        exclude_value=True,
+    )
+    intermediate_audio: SongIntermediateAudioConfig = SongIntermediateAudioConfig()
 
 
+class SongInputAudioConfig(BaseModel):
+    """
+    Configuration settings for input audio components in the multi-step
+    song generation tab.
 
-class ConcurrencyId(StrEnum):
-    """Enumeration of possible concurrency identifiers."""
+    Attributes
+    ----------
+    audio : AudioConfig
+        Configuration settings for the input audio component.
+    vocals : AudioConfig
+        Configuration settings for the vocals audio component.
+    converted_vocals : AudioConfig
+        Configuration settings for the converted vocals audio
+        component.
+    instrumentals : AudioConfig
+        Configuration settings for the instrumentals audio
+        component.
+    backup_vocals : AudioConfig
+        Configuration settings for the backup vocals audio
+        component.
+    main_vocals : AudioConfig
+        Configuration settings for the main vocals audio
+        component.
+    shifted_instrumentals : AudioConfig
+        Configuration settings for the shifted instrumentals audio
+        component.
+    shifted_backup_vocals : AudioConfig
+        Configuration settings for the shifted backup vocals audio
+        component.
+    all : list[AudioConfig]
+        List of configuration settings for all input audio
+        components in the multi-step song generation tab.
 
-    GPU = auto()
+    """
 
+    audio: AudioConfig = AudioConfig.input(label="Audio")
+    vocals: AudioConfig = AudioConfig.input(label="Vocals")
+    converted_vocals: AudioConfig = AudioConfig.input(label="Vocals")
+    instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
+    backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
+    main_vocals: AudioConfig = AudioConfig.input(label="Main vocals")
+    shifted_instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
+    shifted_backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
 
-class SongSourceType(StrEnum):
-    """The type of source providing the song to generate a cover of."""
+    @property
+    def all(self) -> list[AudioConfig]:
+        """
+        Retrieve configuration settings for all input audio components
+        in the multi-step song generation tab.
 
-    LOCAL_FILE = "Local file"
-    CACHED_SONG = "Cached song"
+        Returns
+        -------
+        list[AudioConfig]
+            List of configuration settings for all input audio
+            components in the multi-step song generation tab.
 
+        """
+        return [getattr(self, field) for field in self.__annotations__]
 
-class SpeechSourceType(StrEnum):
-    """The type of source providing the text to generate speech from."""
 
-    TEXT = "Text"
-    LOCAL_FILE = "Local file"
+class SongDirsConfig(BaseModel):
+    """
+    Configuration settings for song directory components in the
+    multi-step song generation tab.
 
+    Attributes
+    ----------
+    separate_audio : DropdownConfig
+        Configuration settings for the song directory component
+        for separating audio.
+    convert_vocals : DropdownConfig
+        Configuration settings for the song directory component
+        for converting vocals.
+    postprocess_vocals : DropdownConfig
+        Configuration settings for the song directory component
+        for postprocessing vocals.
+    pitch_shift_background : DropdownConfig
+        Configuration settings for the song directory component
+        for pitch-shifting background audio.
+    mix : DropdownConfig
+        Configuration settings for the song directory component
+        for mixing audio.
+    all : list[gr.Dropdown]
+        List of instances of all song directory components in the
+        multi-step song generation tab.
 
-class SongTransferOption(StrEnum):
-    """Enumeration of possible song transfer options."""
+    """
 
-    STEP_1_AUDIO = "Step 1: stem splitting"
-    STEP_2_VOCALS = "Step 2: vocal conversion"
-    STEP_3_VOCALS = "Step 3: vocal effect"
-    STEP_4_INSTRUMENTALS = "Step 4: instrumentals"
-    STEP_4_BACKUP_VOCALS = "Step 4: backup vocals"
-    STEP_5_MAIN_VOCALS = "Step 5: main vocals"
-    STEP_5_INSTRUMENTALS = "Step 5: instrumentals"
-    STEP_5_BACKUP_VOCALS = "Step 5: backup vocals"
+    separate_audio: DropdownConfig = DropdownConfig.song_dir()
+    convert_vocals: DropdownConfig = DropdownConfig.song_dir()
+    postprocess_vocals: DropdownConfig = DropdownConfig.song_dir()
+    pitch_shift_background: DropdownConfig = DropdownConfig.song_dir()
+    mix: DropdownConfig = DropdownConfig.song_dir()
 
+    @property
+    def all(self) -> list[gr.Dropdown]:
+        """
+        Retrieve instances of all song directory components in the
+        multi-step song generation tab.
 
-class SpeechTransferOption(StrEnum):
-    """Enumeration of possible speech transfer options."""
+        Returns
+        -------
+        list[gr.Dropdown]
+            List of instances of all song directory components in
+            the multi-step song generation tab.
 
-    STEP_2_SPEECH = "Step 2: vocal conversion"
-    STEP_3_SPEECH = "Step 3: vocal effect"
+        """
+        return [getattr(self, field).instance for field in self.__annotations__]
 
 
-class ComponentVisibilityKwArgs(TypedDict, total=False):
+class MultiStepSongGenerationConfig(SongGenerationConfig):
     """
-    Keyword arguments for setting component visibility.
+    Configuration settings for multi-step song generation tab.
 
     Attributes
     ----------
-    visible : bool
-        Whether the component should be visible.
-    value : Any
-        The value of the component.
+    separation_model : DropdownConfig
+        Configuration settings for a separation model dropdown
+        component.
+    segment_size : RadioConfig
+        Configuration settings for a segment size radio component.
+    n_octaves : SliderConfig
+        Configuration settings for an octave pitch shift slider
+        component.
+    n_semitones : SliderConfig
+        Configuration settings for a semitone pitch shift slider
+        component.
+    n_semitones_instrumentals : SliderConfig
+        Configuration settings for an instrumentals pitch shift slider
+        component.
+    n_semitones_backup_vocals : SliderConfig
+        Configuration settings for a backup vocals pitch shift slider
+        component.
+    input_audio : SongInputAudioConfig
+        Configuration settings for input audio components.
+    song_dirs : SongDirsConfig
+        Configuration settings for song directory components.
+
+    See Also
+    --------
+    SongGenerationConfig
+        Parent model defining common component configuration settings
+        for song generation tabs.
 
     """
 
-    visible: bool
-    value: Any
+    separation_model: DropdownConfig = DropdownConfig(
+        label="Separation model",
+        info="The model to use for audio separation.",
+        value=SeparationModel.UVR_MDX_NET_VOC_FT,
+        choices=list(SeparationModel2),
+    )
+    segment_size: RadioConfig = RadioConfig(
+        label="Segment size",
+        info=(
+            "The size of the segments into which the audio is split. Using a larger"
+            " size consumes more resources, but may give better results."
+        ),
+        value=SegmentSize.SEG_2048,
+        choices=list(SegmentSize),
+    )
+    n_octaves: SliderConfig = SliderConfig.octave_shift(
+        label="Pitch shift (octaves)",
+        info=(
+            "The number of octaves to pitch-shift the converted voice by. Use 1 for"
+            " male-to-female and -1 for vice-versa."
+        ),
+    )
+    n_semitones: SliderConfig = SliderConfig.semitone_shift(
+        label="Pitch shift (semi-tones)",
+        info=(
+            "The number of semi-tones to pitch-shift the converted vocals by. Altering"
+            " this slightly reduces sound quality."
+        ),
+    )
+    n_semitones_instrumentals: SliderConfig = SliderConfig.semitone_shift(
+        label="Instrumental pitch shift",
+        info="The number of semi-tones to pitch-shift the instrumentals by.",
+    )
+    n_semitones_backup_vocals: SliderConfig = SliderConfig.semitone_shift(
+        label="Backup vocal pitch shift",
+        info="The number of semi-tones to pitch-shift the backup vocals by.",
+    )
+    input_audio: SongInputAudioConfig = SongInputAudioConfig()
+    song_dirs: SongDirsConfig = SongDirsConfig()
 
 
-class UpdateDropdownKwArgs(TypedDict, total=False):
+class SpeechIntermediateAudioConfig(BaseModel):
     """
-    Keyword arguments for updating a dropdown component.
+    Configuration settings for intermediate audio components in the
+    one-click speech generation tab.
 
     Attributes
     ----------
-    choices : DropdownChoices
-        The updated choices for the dropdown component.
-    value : DropdownValue
-        The updated value for the dropdown component.
+    speech : AudioConfig
+        Configuration settings for the input speech audio component.
+    converted_speech : AudioConfig
+        Configuration settings for the converted speech audio component.
+    all : list[gr.Audio]
+        List of instances of all intermediate audio components in the
+        speech generation tab.
 
     """
 
-    choices: DropdownChoices
-    value: DropdownValue
-
-
-class TextBoxKwArgs(TypedDict, total=False):
-    """
-    Keyword arguments for updating a textbox component.
+    speech: AudioConfig = AudioConfig.intermediate(label="Speech")
+    converted_speech: AudioConfig = AudioConfig.intermediate(label="Converted speech")
 
-    Attributes
-    ----------
-    value : str | None
-        The updated value for the textbox component.
-    placeholder : str | None
-        The updated placeholder for the textbox component.
+    @property
+    def all(self) -> list[gr.Audio]:
+        """
+        Retrieve instances of all intermediate audio components in the
+        speech generation tab.
 
-    """
+        Returns
+        -------
+        list[gr.Audio]
+            List of instances of all intermediate audio components in
+            the speech generation tab.
 
-    value: str | None
-    placeholder: str | None
+        """
+        return [getattr(self, field).instance for field in self.__annotations__]
 
 
-class UpdateAudioKwArgs(TypedDict, total=False):
+class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
     """
-    Keyword arguments for updating an audio component.
+    Configuration settings for one-click speech generation tab.
 
     Attributes
     ----------
-    value : str | None
-        The updated value for the audio component.
-
-    """
-
-    value: str | None
-
-
-class DatasetType(StrEnum):
-    """The type of dataset to train a voice model."""
-
-    NEW_DATASET = "New dataset"
-    EXISTING_DATASET = "Existing dataset"
+    intermediate_audio : SpeechIntermediateAudioConfig
+        Configuration settings for intermediate audio components.
+    show_intermediate_audio : CheckboxConfig
+        Configuration settings for a show intermediate audio checkbox
+        component.
 
+    See Also
+    --------
+    SpeechGenerationConfig
+        Parent model defining common component configuration settings
+        for speech generation tabs.
 
-embedders_list = [
-    ("embedders/contentvec/", ["pytorch_model.bin", "config.json"]),
-    ("embedders/custom/Crusty/", ["model.safetensors", "config.json"]),
-]
+    """
 
+    intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()
 
-class EmbedderModel(StrEnum):
-    """Enumeration of audio embedding models."""
+    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
+        label="Show intermediate audio",
+        info="Show intermediate audio tracks produced during speech generation.",
+        value=False,
+        exclude_value=True,
+    )
 
-    CONTENTVEC = "contentvec"
-    CRUSTY = "Crusty"
-    CUSTOM = "custom"
-    
-class SeparationModel(StrEnum):
-    """Enumeration of audio separation models."""
 
-    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
-    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
-    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
-    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
-    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
-    Kim_Inst = "Kim_Inst.onnx"
-    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
-    kuielab_a_vocals = "kuielab_a_vocals.onnx"
-    kuielab_b_vocals = "kuielab_b_vocals.onnx"
-    kuielab_a_drums = "kuielab_a_drums.onnx"
-    kuielab_b_drums = "kuielab_b_drums.onnx"
-    kuielab_a_bass = "kuielab_a_bass.onnx"
-    kuielab_b_bass = "kuielab_b_bass.onnx"
-    kuielab_a_other = "kuielab_a_other.onnx"
-    kuielab_b_other = "kuielab_b_other.onnx"
-    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
-    UVR_DeNoise = "UVR-DeNoise.pth"
-    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
-    
-class SeparationModel2(StrEnum):
-    """Enumeration of audio separation models."""
+class SpeechInputAudioConfig(BaseModel):
+    """
+    Configuration settings for input audio components in the multi-step
+    speech generation tab.
 
-    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
-    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
-    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
-    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
-    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
-    Kim_Inst = "Kim_Inst.onnx"
-    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
-    kuielab_a_vocals = "kuielab_a_vocals.onnx"
-    kuielab_b_vocals = "kuielab_b_vocals.onnx"
-    kuielab_a_drums = "kuielab_a_drums.onnx"
-    kuielab_b_drums = "kuielab_b_drums.onnx"
-    kuielab_a_bass = "kuielab_a_bass.onnx"
-    kuielab_b_bass = "kuielab_b_bass.onnx"
-    kuielab_a_other = "kuielab_a_other.onnx"
-    kuielab_b_other = "kuielab_b_other.onnx"
-    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
-    UVR_DeNoise = "UVR-DeNoise.pth"
-    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
+    Attributes
+    ----------
+    speech : AudioConfig
+        Configuration settings for the input speech audio component.
+    converted_speech : AudioConfig
+        Configuration settings for the converted speech audio component.
 
+    all : list[AudioConfig]
+        List of configuration settings for all input audio components in
+        the multi-step speech generation tab.
 
+    """
 
+    speech: AudioConfig = AudioConfig.input("Speech")
+    converted_speech: AudioConfig = AudioConfig.input("Converted speech")
 
+    @property
+    def all(self) -> list[AudioConfig]:
+        """
+        Retrieve configuration settings for all input audio components
+        in the multi-step speech generation tab.
 
-now_dir = os.getcwd()
+        Returns
+        -------
+        list[AudioConfig]
+            List of configuration settings for all input audio
+            components in the multi-step speech generation tab.
 
-sys.path.append(now_dir)
-models_dir = "models"
+        """
+        return [getattr(self, field) for field in self.__annotations__]
 
-dump_path = os.path.join(now_dir, models_dir)
 
-repo_id = "lainlives/voice"
+class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
+    """
+    Configuration settings for the multi-step speech generation tab.
 
-hf_token = os.environ.get("HF_TOKEN")
-snapshot_download(repo_id=repo_id, local_dir=dump_path, token=hf_token)
+    Attributes
+    ----------
+    input_audio : SpeechInputAudioConfig
+        Configuration settings for input audio components.
 
-#if __name__ == "__main__":
-#    start_app(share=False, ssr_mode = True)
+    See Also
+    --------
+    SpeechGenerationConfig
+        Parent model defining common component configuration settings
+        for speech generation tabs.
 
+    """
 
+    input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()
 
 
+class MultiStepTrainingConfig(TrainingConfig):
+    """Configuration settings for multi-step training tab."""
 
 
-config_name = "default" #os.environ.get("URVC_CONFIG")
-cookiefile = os.environ.get("YT_COOKIEFILE")
+class ModelManagementConfig(BaseModel):
+    """
 
+    Configuration settings for model management tab.
 
+    Attributes
+    ----------
+    voices : DropdownConfig
+        Configuration settings for delete voice models dropdown
+        component.
+    embedders : DropdownConfig
+        Configuration settings for delete embedder models dropdown
+        component.
+    pretraineds : DropdownConfig
+        Configuration settings for delete pretrained models dropdown
+        component.
+    traineds : DropdownConfig
+        Configuration settings for delete training models dropdown
+        component.
+    dummy_checkbox : CheckboxConfig
+        Configuration settings for a dummy checkbox component.
 
-"""
-Module defining models for representing configuration settings for
-UI tabs.
-"""
+    """
 
+    voices: DropdownConfig = DropdownConfig.multi_delete(
+        label="Voice models",
+        info="Select one or more voice models to delete.",
+    )
+    embedders: DropdownConfig = DropdownConfig.multi_delete(
+        label="Custom embedder models",
+        info="Select one or more embedder models to delete.",
+    )
+    pretraineds: DropdownConfig = DropdownConfig.multi_delete(
+        label="Custom pretrained models",
+        info="Select one or more pretrained models to delete.",
+    )
+    traineds: DropdownConfig = DropdownConfig.multi_delete(
+        label="Training models",
+        info="Select one or more training models to delete.",
+    )
 
+    dummy_checkbox: CheckboxConfig = CheckboxConfig(
+        value=False,
+        visible=False,
+        exclude_value=True,
+    )
 
 
-class SongIntermediateAudioConfig(BaseModel):
+class AudioManagementConfig(BaseModel):
     """
-    Configuration settings for intermediate audio components in the
-    one-click song generation tab.
+    Configuration settings for audio management tab.
 
     Attributes
     ----------
-    song : AudioConfig
-        Configuration settings for the input song audio component.
-    vocals : AudioConfig
-        Configuration settings for the vocals audio component.
-    instrumentals : AudioConfig
-        Configuration settings for the instrumentals audio component.
-    main_vocals : AudioConfig
-        Configuration settings for the main vocals audio component.
-    backup_vocals : AudioConfig
-        Configuration settings for the backup vocals audio component.
-    main_vocals_dereverbed : AudioConfig
-        Configuration settings for the main vocals de-reverbed audio
-        component.
-    main_vocals_reverb : AudioConfig
-        Configuration settings for the main vocals reverb audio
-        component.
-    converted_vocals : AudioConfig
-        Configuration settings for the converted vocals audio
-        component.
-    postprocessed_vocals : AudioConfig
-        Configuration settings for the postprocessed vocals audio
+    intermediate : DropdownConfig
+        Configuration settings for delete intermediate audio files
+        dropdown component
+    speech : DropdownConfig
+        Configuration settings for delete speech audio files dropdown
         component.
-    instrumentals_shifted : AudioConfig
-        Configuration settings for the shifted instrumentals audio
+    output : DropdownConfig
+        Configuration settings for delete output audio files dropdown
         component.
-    backup_vocals_shifted : AudioConfig
-        Configuration settings for the shifted backup vocals audio
+    dataset : DropdownConfig
+        Configuration settings for delete dataset audio files dropdown
         component.
-    all : list[gr.Audio]
-        List of instances of all intermediate audio components.
+    dummy_checkbox : CheckboxConfig
+        Configuration settings for a dummy checkbox component.
 
     """
 
-    song: AudioConfig = AudioConfig.intermediate(label="Song")
-    vocals: AudioConfig = AudioConfig.intermediate(label="Vocals")
-    instrumentals: AudioConfig = AudioConfig.intermediate(
-        label="Instrumentals",
-    )
-    main_vocals: AudioConfig = AudioConfig.intermediate(
-        label="Main vocals",
+    intermediate: DropdownConfig = DropdownConfig.multi_delete(
+        label="Song directories",
+        info=(
+            "Select one or more song directories containing intermediate audio files to"
+            " delete."
+        ),
     )
-    backup_vocals: AudioConfig = AudioConfig.intermediate(
-        label="Backup vocals",
+    speech: DropdownConfig = DropdownConfig.multi_delete(
+        label="Speech audio files",
+        info="Select one or more speech audio files to delete.",
     )
-    main_vocals_dereverbed: AudioConfig = AudioConfig.intermediate(
-        label="De-reverbed main vocals",
+    output: DropdownConfig = DropdownConfig.multi_delete(
+        label="Output audio files",
+        info="Select one or more output audio files to delete.",
     )
-    main_vocals_reverb: AudioConfig = AudioConfig.intermediate(
-        label="Main vocals with reverb",
+    dataset: DropdownConfig = DropdownConfig.multi_delete(
+        label="Dataset audio files",
+        info="Select one or more datasets containing audio files to delete.",
     )
-    converted_vocals: AudioConfig = AudioConfig.intermediate(
-        label="Converted vocals",
+
+    dummy_checkbox: CheckboxConfig = CheckboxConfig(
+        value=False,
+        visible=False,
+        exclude_value=True,
     )
-    postprocessed_vocals: AudioConfig = AudioConfig.intermediate(
-        label="Postprocessed vocals",
+
+
+class SettingsManagementConfig(BaseModel):
+    """
+    Configuration settings for settings management tab.
+
+    Attributes
+    ----------
+    dummy_checkbox : CheckboxConfig
+        Configuration settings for a dummy checkbox component.
+
+    """
+
+    load_config_name: DropdownConfig = DropdownConfig(
+        label="Configuration name",
+        info="The name of a configuration to load UI settings from",
+        value=None,
+        render=False,
+        exclude_value=True,
     )
-    instrumentals_shifted: AudioConfig = AudioConfig.intermediate(
-        label="Pitch-shifted instrumentals",
+    delete_config_names: DropdownConfig = DropdownConfig.multi_delete(
+        label="Configuration names",
+        info="Select the name of one or more configurations to delete",
     )
-    backup_vocals_shifted: AudioConfig = AudioConfig.intermediate(
-        label="Pitch-shifted backup vocals",
+    dummy_checkbox: CheckboxConfig = CheckboxConfig(
+        value=False,
+        visible=False,
+        exclude_value=True,
     )
 
-    @property
-    def all(self) -> list[gr.Audio]:
-        """
-        Retrieve instances of all intermediate audio components
-        in the one-click song generation tab.
 
-        Returns
-        -------
-        list[gr.Audio]
-            List of instances of all intermediate audio components in
-            the one-click song generation tab.
+class TotalSongGenerationConfig(BaseModel):
+    """
+    All configuration settings for song generation tabs.
 
-        """
-        # NOTE we are using self.__annotations__ to get the fields in
-        # the order they are defined in the class
-        return [getattr(self, field).instance for field in self.__annotations__]
+    Attributes
+    ----------
+    one_click : OneClickSongGenerationConfig
+        Configuration settings for the one-click song generation tab.
+    multi_step : MultiStepSongGenerationConfig
+        Configuration settings for the multi-step song generation tab.
+
+    """
 
+    one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
+    multi_step: MultiStepSongGenerationConfig = MultiStepSongGenerationConfig()
 
-class OneClickSongGenerationConfig(SongGenerationConfig):
+
+class TotalSpeechGenerationConfig(BaseModel):
     """
-    Configuration settings for the one-click song generation tab.
+    All configuration settings for speech generation tabs.
 
     Attributes
     ----------
-    n_octaves : SliderConfig
-        Configuration settings for an octave pitch shift slider
-        component.
-    n_semitones : SliderConfig
-        Configuration settings for a semitone pitch shift slider
-        component.
-    show_intermediate_audio : CheckboxConfig
-        Configuration settings for a show intermediate audio checkbox
-        component.
-    intermediate_audio : SongIntermediateAudioConfig
-        Configuration settings for intermediate audio components.
+    one_click : OneClickSpeechGenerationConfig
+        Configuration settings for the one-click speech generation tab.
+    multi_step : MultiStepSpeechGenerationConfig
+        Configuration settings for the multi-step speech generation tab.
+
+    """
+
+    one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
+    multi_step: MultiStepSpeechGenerationConfig = MultiStepSpeechGenerationConfig()
+
+
+class TotalTrainingConfig(BaseModel):
+    """
+    All configuration settings for training tabs.
+
+    Attributes
+    ----------
+    training : TrainingConfig
+        Configuration settings for the multi-step training tab.
 
-    See Also
-    --------
-    SongGenerationConfig
-        Parent model defining common component configuration settings
-        for song generation tabs.
+    """
 
+    multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()
+
+
+class TotalManagementConfig(BaseModel):
     """
+    All configuration settings for management tabs.
 
-    n_octaves: SliderConfig = SliderConfig.octave_shift(
-        label="Vocal pitch shift",
-        info=(
-            "The number of octaves to shift the pitch of the converted vocals by. Use 1"
-            " for male-to-female and -1 for vice-versa."
-        ),
-    )
+    Attributes
+    ----------
+    model : ModelManagementConfig
+        Configuration settings for the model management tab.
+    audio : AudioManagementConfig
+        Configuration settings for the audio management tab.
+    settings : SettingsManagementConfig
+        Configuration settings for the settings management tab.
 
-    n_semitones: SliderConfig = SliderConfig.semitone_shift(
-        label="Overall pitch shift",
-        info=(
-            "The number of semi-tones to shift the pitch of the converted vocals,"
-            " instrumentals and backup vocals by."
-        ),
-    )
-    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
-        label="Show intermediate audio",
-        info="Show intermediate audio tracks produced during song cover generation.",
-        value=False,
-        exclude_value=True,
-    )
-    intermediate_audio: SongIntermediateAudioConfig = SongIntermediateAudioConfig()
+    """
+
+    model: ModelManagementConfig = ModelManagementConfig()
+    audio: AudioManagementConfig = AudioManagementConfig()
+    settings: SettingsManagementConfig = SettingsManagementConfig()
 
 
-class SongInputAudioConfig(BaseModel):
+class TotalConfig(BaseModel):
     """
-    Configuration settings for input audio components in the multi-step
-    song generation tab.
+    All configuration settings for the Ultimate RVC app.
 
     Attributes
     ----------
-    audio : AudioConfig
-        Configuration settings for the input audio component.
-    vocals : AudioConfig
-        Configuration settings for the vocals audio component.
-    converted_vocals : AudioConfig
-        Configuration settings for the converted vocals audio
-        component.
-    instrumentals : AudioConfig
-        Configuration settings for the instrumentals audio
-        component.
-    backup_vocals : AudioConfig
-        Configuration settings for the backup vocals audio
-        component.
-    main_vocals : AudioConfig
-        Configuration settings for the main vocals audio
-        component.
-    shifted_instrumentals : AudioConfig
-        Configuration settings for the shifted instrumentals audio
-        component.
-    shifted_backup_vocals : AudioConfig
-        Configuration settings for the shifted backup vocals audio
-        component.
-    all : list[AudioConfig]
-        List of configuration settings for all input audio
-        components in the multi-step song generation tab.
+    song : TotalSongGenerationConfig
+        Configuration settings for song generation tabs.
+    speech : TotalSpeechGenerationConfig
+        Configuration settings for speech generation tabs.
+    training : TotalTrainingConfig
+        Configuration settings for training tabs.
+    management : TotalManagementConfig
+        Configuration settings for management tabs.
 
     """
 
-    audio: AudioConfig = AudioConfig.input(label="Audio")
-    vocals: AudioConfig = AudioConfig.input(label="Vocals")
-    converted_vocals: AudioConfig = AudioConfig.input(label="Vocals")
-    instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
-    backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
-    main_vocals: AudioConfig = AudioConfig.input(label="Main vocals")
-    shifted_instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
-    shifted_backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
+    song: TotalSongGenerationConfig = TotalSongGenerationConfig()
+    speech: TotalSpeechGenerationConfig = TotalSpeechGenerationConfig()
+    training: TotalTrainingConfig = TotalTrainingConfig()
+    management: TotalManagementConfig = TotalManagementConfig()
 
-    @property
-    def all(self) -> list[AudioConfig]:
+    @cached_property
+    def all(self) -> list[AnyComponentConfig]:
         """
-        Retrieve configuration settings for all input audio components
-        in the multi-step song generation tab.
+        Recursively collect those component configuration models nested
+        within the current model instance, which have values that are
+        not excluded.
 
         Returns
         -------
-        list[AudioConfig]
-            List of configuration settings for all input audio
-            components in the multi-step song generation tab.
+        list[AnyComponentConfig]
+            A list of component configuration models found within the
+            current model instance, which have values that are not
+            excluded.
 
         """
-        return [getattr(self, field) for field in self.__annotations__]
 
+        def _collect(model: BaseModel) -> list[AnyComponentConfig]:
+            component_configs: list[Any] = []
+            for _, value in model:
+                if isinstance(value, ComponentConfig):
+                    if not value.exclude_value:
+                        component_configs.append(value)
+                elif isinstance(value, BaseModel):
+                    component_configs.extend(_collect(value))
+            return component_configs
+
+        return _collect(self)
 
-class SongDirsConfig(BaseModel):
+
+
+
+
+class BaseTabConfig(BaseModel):
     """
-    Configuration settings for song directory components in the
-    multi-step song generation tab.
+    Base model defining common component configuration settings for
+    UI tabs.
 
     Attributes
     ----------
-    separate_audio : DropdownConfig
-        Configuration settings for the song directory component
-        for separating audio.
-    convert_vocals : DropdownConfig
-        Configuration settings for the song directory component
-        for converting vocals.
-    postprocess_vocals : DropdownConfig
-        Configuration settings for the song directory component
-        for postprocessing vocals.
-    pitch_shift_background : DropdownConfig
-        Configuration settings for the song directory component
-        for pitch-shifting background audio.
-    mix : DropdownConfig
-        Configuration settings for the song directory component
-        for mixing audio.
-    all : list[gr.Dropdown]
-        List of instances of all song directory components in the
-        multi-step song generation tab.
+    embedder_model : DropdownConfig
+        Configuration settings for an embedder model dropdown component.
+    custom_embedder_model : DropdownConfig
+        Configuration settings for a custom embedder model dropdown
+        component.
 
     """
 
-    separate_audio: DropdownConfig = DropdownConfig.song_dir()
-    convert_vocals: DropdownConfig = DropdownConfig.song_dir()
-    postprocess_vocals: DropdownConfig = DropdownConfig.song_dir()
-    pitch_shift_background: DropdownConfig = DropdownConfig.song_dir()
-    mix: DropdownConfig = DropdownConfig.song_dir()
-
-    @property
-    def all(self) -> list[gr.Dropdown]:
-        """
-        Retrieve instances of all song directory components in the
-        multi-step song generation tab.
-
-        Returns
-        -------
-        list[gr.Dropdown]
-            List of instances of all song directory components in
-            the multi-step song generation tab.
+    embedder_model: DropdownConfig = DropdownConfig(
+        label="Embedder model",
+        info="The model to use for generating speaker embeddings.",
+        value=EmbedderModel.CONTENTVEC,
+        choices=list(EmbedderModel),
+        exclude_value=True,
+    )
+    custom_embedder_model: DropdownConfig = DropdownConfig(
+        label="Custom embedder model",
+        info="Select a custom embedder model from the dropdown.",
+        value=None,
+        visible=False,
+        render=False,
+        exclude_value=True,
+    )
 
-        """
-        return [getattr(self, field).instance for field in self.__annotations__]
 
 
-class MultiStepSongGenerationConfig(SongGenerationConfig):
+class TrainingConfig(BaseTabConfig):
     """
-    Configuration settings for multi-step song generation tab.
+    Common component configuration settings for training tabs.
 
     Attributes
     ----------
-    separation_model : DropdownConfig
-        Configuration settings for a separation model dropdown
+    dataset_type : DropdownConfig
+        Configuration settings for a dataset type dropdown component.
+    dataset : DropdownConfig
+        Configuration settings for a dataset dropdown component.
+    dataset_name : TextboxConfig
+        Configuration settings for a dataset name textbox component.
+    preprocess_model : DropdownConfig
+        Configuration settings for a model name dropdown component
+        for audio preprocessing.
+    sample_rate : DropdownConfig
+        Configuration settings for a sample rate dropdown component.
+    filter_audio : CheckboxConfig
+        Configuration settings for a filter audio checkbox component.
+    clean_audio : CheckboxConfig
+        Configuration settings for a clean audio checkbox component.
+    clean_strength : SliderConfig
+        Configuration settings for a clean strength slider component.
+    split_method : DropdownConfig
+        Configuration settings for an audio splitting method dropdown
+        component.
+    chunk_len : SliderConfig
+        Configuration settings for a chunk length slider component.
+    overlap_len : SliderConfig
+        Configuration settings for an overlap length slider component.
+    preprocess_cores : SliderConfig
+        Configuration settings for a CPU cores slider component for
+        preprocessing.
+    extract_model : DropdownConfig
+        Configuration settings for a model name dropdown component for
+        feature extraction.
+    f0_method : DropdownConfig
+        Configuration settings for an F0 method dropdown component.
+    hop_length : SliderConfig
+        Configuration settings for a hop length slider component.
+    include_mutes : SliderConfig
+        Configuration settings for an include mutes slider component.
+    extract_cores : SliderConfig
+        Configuration settings for a CPU cores slider component for
+        feature extraction.
+    extraction_acceleration : HardwareAccelerationConfig
+        Configuration settings for a hardware acceleration component for
+        feature extraction.
+    extraction_gpus : DropdownConfig
+        Configuration settings for a GPU dropdown compoennt for feature
+        extraction.
+    train_model : DropdownConfig
+        Configuration settings for a model name dropdown component for
+        training.
+    num_epochs : SliderConfig
+        Configuration settings for a number of epochs slider component.
+    batch_size : SliderConfig
+        Configuration settings for a batch size slider component.
+    detect_overtraining : CheckboxConfig
+        Configuration settings for a detect overtraining checkbox
+        component.
+    overtraining_threshold : SliderConfig
+        Configuration settings for an overtraining threshold slider
+        component.
+    vocoder : DropdownConfig
+        Configuration settings for a vocoder dropdown component.
+    index_algorithm : DropdownConfig
+        Configuration settings for an index algorithm dropdown
         component.
-    segment_size : RadioConfig
-        Configuration settings for a segment size radio component.
-    n_octaves : SliderConfig
-        Configuration settings for an octave pitch shift slider
+    pretrained_type : DropdownConfig
+        Configuration settings for a pretrained model type dropdown
         component.
-    n_semitones : SliderConfig
-        Configuration settings for a semitone pitch shift slider
+    custom_pretrained_model : DropdownConfig
+        Configuration settings for a custom pretrained model dropdown
         component.
-    n_semitones_instrumentals : SliderConfig
-        Configuration settings for an instrumentals pitch shift slider
+    save_interval : SliderConfig
+        Configuration settings for a save-interval slider component.
+    save_all_checkpoints : CheckboxConfig
+        Configuration settings for a save-all-checkpoints checkbox
         component.
-    n_semitones_backup_vocals : SliderConfig
-        Configuration settings for a backup vocals pitch shift slider
+    save_all_weights : CheckboxConfig
+        Configuration settings for a save-all-weights checkbox
+        component.
+    clear_saved_data : CheckboxConfig
+        Configuration settings for a clear-saved-data checkbox
+        component.
+    upload_model : CheckboxConfig
+        Configuration settings for an upload voice model checkbox
+        component.
+    upload_name : TextboxConfig
+        Configuration settings for an upload name textbox component.
+    training_acceleration : HardwareAccelerationConfig
+        Configuration settings for a hardware acceleration component for
+        training.
+    training_gpus : DropdownConfig
+        Configuration settings for a GPU dropdown component for
+        training.
+    preload_dataset : CheckboxConfig
+        Configuration settings for a preload dataset checkbox component.
+    reduce_memory_usage : CheckboxConfig
+        Configuration settings for a reduce-memory-usage checkbox
         component.
-    input_audio : SongInputAudioConfig
-        Configuration settings for input audio components.
-    song_dirs : SongDirsConfig
-        Configuration settings for song directory components.
 
     See Also
     --------
-    SongGenerationConfig
+    BaseTabConfig
         Parent model defining common component configuration settings
-        for song generation tabs.
+        for UI tabs.
 
     """
 
-    separation_model: DropdownConfig = DropdownConfig(
-        label="Separation model",
-        info="The model to use for audio separation.",
-        value=SeparationModel.UVR_MDX_NET_VOC_FT,
-        choices=list(SeparationModel2),
+    dataset_type: DropdownConfig = DropdownConfig(
+        label="Dataset type",
+        info="Select the type of dataset to preprocess.",
+        value=DatasetType.NEW_DATASET,
+        choices=list(DatasetType),
+        exclude_value=True,
+    )
+    dataset: DropdownConfig = DropdownConfig(
+        label="Dataset path",
+        info=(
+            "The path to an existing dataset. Either select a path to a previously"
+            " created dataset or provide a path to an external dataset."
+        ),
+        value=None,
+        allow_custom_value=True,
+        visible=False,
+        render=False,
+        exclude_value=True,
+    )
+    dataset_name: TextboxConfig = TextboxConfig(
+        label="Dataset name",
+        info=(
+            "The name of the new dataset. If the dataset already exists, the provided"
+            " audio files will be added to it."
+        ),
+        value="My dataset",
+        exclude_value=True,
+    )
+    preprocess_model: DropdownConfig = DropdownConfig(
+        label="Model name",
+        info=(
+            "Name of the model to preprocess the given dataset for. Either select an"
+            " existing model from the dropdown or provide the name of a new model."
+        ),
+        value="My model",
+        allow_custom_value=True,
+        render=False,
+        exclude_value=True,
+    )
+    sample_rate: DropdownConfig = DropdownConfig(
+        label="Sample rate",
+        info="Target sample rate for the audio files in the provided dataset.",
+        value=TrainingSampleRate.HZ_40K,
+        choices=list(TrainingSampleRate),
+    )
+    filter_audio: CheckboxConfig = CheckboxConfig(
+        label="Filter audio",
+        info=(
+            "Whether to remove low-frequency sounds from the audio files in the"
+            " provided dataset by applying a high-pass butterworth filter.<br><br>"
+        ),
+        value=True,
+    )
+    clean_audio: CheckboxConfig = CheckboxConfig(
+        label="Clean audio",
+        info=(
+            "Whether to clean the audio files in the provided dataset using noise"
+            " reduction algorithms.<br><br><br>"
+        ),
+        value=False,
+        exclude_value=True,
+    )
+    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
+    split_method: DropdownConfig = DropdownConfig(
+        label="Audio splitting method",
+        info=(
+            "The method to use for splitting the audio files in the provided dataset."
+            " Use the `Skip` method to skip splitting if the audio files are already"
+            " split. Use the `Simple` method if excessive silence has already been"
+            " removed from the audio files. Use the `Automatic` method for automatic"
+            " silence detection and splitting around it."
+        ),
+        value=AudioSplitMethod.AUTOMATIC,
+        choices=list(AudioSplitMethod),
+        exclude_value=True,
+    )
+    chunk_len: SliderConfig = SliderConfig(
+        label="Chunk length",
+        info="Length of split audio chunks.",
+        value=3.0,
+        minimum=0.5,
+        maximum=5.0,
+        step=0.1,
+        visible=False,
+    )
+    overlap_len: SliderConfig = SliderConfig(
+        label="Overlap length",
+        info="Length of overlap between split audio chunks.",
+        value=0.3,
+        minimum=0.0,
+        maximum=0.4,
+        step=0.1,
+        visible=False,
+    )
+    preprocess_cores: SliderConfig = SliderConfig.cpu_cores()
+
+    extract_model: DropdownConfig = DropdownConfig(
+        label="Model name",
+        info=(
+            "Name of the model with an associated preprocessed dataset to extract"
+            " training features from. When a new dataset is preprocessed, its"
+            " associated model is selected by default."
+        ),
+        value=None,
+        render=False,
+        exclude_value=True,
+    )
+    f0_method: DropdownConfig = DropdownConfig(
+        label="F0 method",
+        info="The method to use for extracting pitch features.",
+        value=TrainingF0Method.RMVPE,
+        choices=list(TrainingF0Method),
+        exclude_value=True,
+    )
+
+    hop_length: SliderConfig = SliderConfig.hop_length(
+        label="Hop length",
+        info="The hop length to use for extracting pitch features.<br><br>",
+        visible=False,
+    )
+    include_mutes: SliderConfig = SliderConfig(
+        label="Include mutes",
+        info=(
+            "The number of mute audio files to include in the generated training file"
+            " list. Adding silent files enables the training model to handle pure"
+            " silence in inferred audio files. If the preprocessed audio dataset"
+            " already contains segments of pure silence, set this to 0."
+        ),
+        value=0,
+        minimum=0,
+        maximum=10,
+        step=1,
+    )
+    extraction_cores: SliderConfig = SliderConfig.cpu_cores()
+    extraction_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
+    extraction_gpus: DropdownConfig = DropdownConfig.gpu()
+
+    train_model: DropdownConfig = DropdownConfig(
+        label="Model name",
+        info=(
+            "Name of the model to train. When training features are extracted for a new"
+            " model, its name is selected by default."
+        ),
+        value=None,
+        render=False,
+        exclude_value=True,
+    )
+    num_epochs: SliderConfig = SliderConfig(
+        label="Number of epochs",
+        info=(
+            "The number of epochs to train the voice model. A higher number can improve"
+            " voice model performance but may lead to overtraining."
+        ),
+        value=500,
+        minimum=1,
+        maximum=5000,
+        step=1,
+    )
+    batch_size: SliderConfig = SliderConfig(
+        label="Batch size",
+        info=(
+            "The number of samples in each training batch. It is advisable to align"
+            " this value with the available VRAM of your GPU."
+        ),
+        value=16,
+        minimum=1,
+        maximum=128,
+        step=1,
+    )
+    detect_overtraining: CheckboxConfig = CheckboxConfig(
+        label="Detect overtraining",
+        info=(
+            "Whether to detect overtraining to prevent the voice model from learning"
+            " the training data too well and losing the ability to generalize to new"
+            " data."
+        ),
+        value=True,
+        exclude_value=True,
+    )
+    overtraining_threshold: SliderConfig = SliderConfig(
+        label="Overtraining threshold",
+        info=(
+            "The maximum number of epochs to continue training without any observed"
+            " improvement in voice model performance."
+        ),
+        value=500,
+        minimum=1,
+        maximum=1000,
+        visible=False,
+    )
+    vocoder: DropdownConfig = DropdownConfig(
+        label="Vocoder",
+        info=(
+            "The vocoder to use for audio synthesis during training. HiFi-GAN provides"
+            " basic audio fidelity, while RefineGAN provides the highest audio"
+            " fidelity."
+        ),
+        value=Vocoder.HIFI_GAN,
+        choices=list(Vocoder),
+    )
+    index_algorithm: DropdownConfig = DropdownConfig(
+        label="Index algorithm",
+        info=(
+            "The method to use for generating an index file for the trained voice"
+            " model. `KMeans` is particularly useful for large datasets."
+        ),
+        value=IndexAlgorithm.AUTO,
+        choices=list(IndexAlgorithm),
     )
-    segment_size: RadioConfig = RadioConfig(
-        label="Segment size",
+    pretrained_type: DropdownConfig = DropdownConfig(
+        label="Pretrained model type",
         info=(
-            "The size of the segments into which the audio is split. Using a larger"
-            " size consumes more resources, but may give better results."
+            "The type of pretrained model to finetune the voice model on. `None` will"
+            " train the voice model from scratch, while `Default` will use a pretrained"
+            " model tailored to the specific voice model architecture. `Custom` will"
+            " use a custom pretrained that you provide."
         ),
-        value=SegmentSize.SEG_2048,
-        choices=list(SegmentSize),
+        value=PretrainedType.DEFAULT,
+        choices=list(PretrainedType),
+        exclude_value=True,
     )
-    n_octaves: SliderConfig = SliderConfig.octave_shift(
-        label="Pitch shift (octaves)",
+    custom_pretrained_model: DropdownConfig = DropdownConfig(
+        label="Custom pretrained model",
+        info="Select a custom pretrained model to finetune from the dropdown.",
+        value=None,
+        visible=False,
+        render=False,
+        exclude_value=True,
+    )
+    save_interval: SliderConfig = SliderConfig(
+        label="Save interval",
         info=(
-            "The number of octaves to pitch-shift the converted voice by. Use 1 for"
-            " male-to-female and -1 for vice-versa."
+            "The epoch interval at which to to save voice model weights and"
+            " checkpoints. The best model weights are always saved regardless of this"
+            " setting."
         ),
+        value=10,
+        minimum=1,
+        maximum=100,
+        step=1,
     )
-    n_semitones: SliderConfig = SliderConfig.semitone_shift(
-        label="Pitch shift (semi-tones)",
+    save_all_checkpoints: CheckboxConfig = CheckboxConfig(
+        label="Save all checkpoints",
         info=(
-            "The number of semi-tones to pitch-shift the converted vocals by. Altering"
-            " this slightly reduces sound quality."
+            "Whether to save a unique checkpoint at each save interval. If not enabled,"
+            " only the latest checkpoint will be saved at each interval."
         ),
+        value=True,
     )
-    n_semitones_instrumentals: SliderConfig = SliderConfig.semitone_shift(
-        label="Instrumental pitch shift",
-        info="The number of semi-tones to pitch-shift the instrumentals by.",
+    save_all_weights: CheckboxConfig = CheckboxConfig(
+        label="Save all weights",
+        info=(
+            "Whether to save unique voice model weights at each save interval. If not"
+            " enabled, only the best voice model weights will be saved."
+        ),
+        value=True,
     )
-    n_semitones_backup_vocals: SliderConfig = SliderConfig.semitone_shift(
-        label="Backup vocal pitch shift",
-        info="The number of semi-tones to pitch-shift the backup vocals by.",
+    clear_saved_data: CheckboxConfig = CheckboxConfig(
+        label="Clear saved data",
+        info=(
+            "Whether to delete any existing training data associated with the voice"
+            " model before training commences. Enable this setting only if you are"
+            " training a new voice model from scratch or restarting training."
+        ),
+        value=False,
     )
-    input_audio: SongInputAudioConfig = SongInputAudioConfig()
-    song_dirs: SongDirsConfig = SongDirsConfig()
-
-
-class SpeechIntermediateAudioConfig(BaseModel):
-    """
-    Configuration settings for intermediate audio components in the
-    one-click speech generation tab.
-
-    Attributes
-    ----------
-    speech : AudioConfig
-        Configuration settings for the input speech audio component.
-    converted_speech : AudioConfig
-        Configuration settings for the converted speech audio component.
-    all : list[gr.Audio]
-        List of instances of all intermediate audio components in the
-        speech generation tab.
-
-    """
-
-    speech: AudioConfig = AudioConfig.intermediate(label="Speech")
-    converted_speech: AudioConfig = AudioConfig.intermediate(label="Converted speech")
-
-    @property
-    def all(self) -> list[gr.Audio]:
-        """
-        Retrieve instances of all intermediate audio components in the
-        speech generation tab.
-
-        Returns
-        -------
-        list[gr.Audio]
-            List of instances of all intermediate audio components in
-            the speech generation tab.
-
-        """
-        return [getattr(self, field).instance for field in self.__annotations__]
-
-
-class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
-    """
-    Configuration settings for one-click speech generation tab.
-
-    Attributes
-    ----------
-    intermediate_audio : SpeechIntermediateAudioConfig
-        Configuration settings for intermediate audio components.
-    show_intermediate_audio : CheckboxConfig
-        Configuration settings for a show intermediate audio checkbox
-        component.
-
-    See Also
-    --------
-    SpeechGenerationConfig
-        Parent model defining common component configuration settings
-        for speech generation tabs.
-
-    """
-
-    intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()
-
-    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
-        label="Show intermediate audio",
-        info="Show intermediate audio tracks produced during speech generation.",
+    upload_model: CheckboxConfig = CheckboxConfig(
+        label="Upload voice model",
+        info=(
+            "Whether to automatically upload the trained voice model so that it can be"
+            " used for generation tasks within the Ultimate RVC app."
+        ),
         value=False,
         exclude_value=True,
     )
+    upload_name: TextboxConfig = TextboxConfig(
+        label="Upload name",
+        info="The name to give the uploaded voice model.",
+        value=None,
+        visible=False,
+        exclude_value=True,
+    )
+    training_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
+    training_gpus: DropdownConfig = DropdownConfig.gpu()
+    preload_dataset: CheckboxConfig = CheckboxConfig(
+        label="Preload dataset",
+        info=(
+            "Whether to preload all training data into GPU memory. This can improve"
+            " training speed but requires a lot of VRAM.<br><br>"
+        ),
+        value=True,
+    )
+    reduce_memory_usage: CheckboxConfig = CheckboxConfig(
+        label="Reduce memory usage",
+        info=(
+            "Whether to reduce VRAM usage at the cost of slower training speed by"
+            " enabling activation checkpointing. This is useful for GPUs with limited"
+            " memory (e.g., <6GB VRAM) or when training with a batch size larger than"
+            " what your GPU can normally accommodate."
+        ),
+        value=False,
+    )
 
 
-class SpeechInputAudioConfig(BaseModel):
-    """
-    Configuration settings for input audio components in the multi-step
-    speech generation tab.
-
-    Attributes
-    ----------
-    speech : AudioConfig
-        Configuration settings for the input speech audio component.
-    converted_speech : AudioConfig
-        Configuration settings for the converted speech audio component.
-
-    all : list[AudioConfig]
-        List of configuration settings for all input audio components in
-        the multi-step speech generation tab.
-
-    """
-
-    speech: AudioConfig = AudioConfig.input("Speech")
-    converted_speech: AudioConfig = AudioConfig.input("Converted speech")
-
-    @property
-    def all(self) -> list[AudioConfig]:
-        """
-        Retrieve configuration settings for all input audio components
-        in the multi-step speech generation tab.
-
-        Returns
-        -------
-        list[AudioConfig]
-            List of configuration settings for all input audio
-            components in the multi-step speech generation tab.
-
-        """
-        return [getattr(self, field) for field in self.__annotations__]
-
-
-class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
-    """
-    Configuration settings for the multi-step speech generation tab.
-
-    Attributes
-    ----------
-    input_audio : SpeechInputAudioConfig
-        Configuration settings for input audio components.
-
-    See Also
-    --------
-    SpeechGenerationConfig
-        Parent model defining common component configuration settings
-        for speech generation tabs.
-
-    """
-
-    input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()
-
-
-class MultiStepTrainingConfig(TrainingConfig):
-    """Configuration settings for multi-step training tab."""
-
-
-class ModelManagementConfig(BaseModel):
+class GenerationConfig(BaseTabConfig):
     """
+    Common component configuration settings for generation tabs.
 
-    Configuration settings for model management tab.
-
-    Attributes
-    ----------
-    voices : DropdownConfig
-        Configuration settings for delete voice models dropdown
-        component.
-    embedders : DropdownConfig
-        Configuration settings for delete embedder models dropdown
+    voice_model : DropdownConfig
+        Configuration settings for a voice model dropdown component.
+    f0_methods : DropdownConfig
+        Configuration settings for a pitch extraction algorithms
+        dropdown component.
+    index_rate : SliderConfig
+        Configuration settings for an index rate slider component.
+    rms_mix_rate : SliderConfig
+        Configuration settings for a RMS mix rate slider component.
+    protect_rate : SliderConfig
+        Configuration settings for a protect rate slider component.
+    split_voice : CheckboxConfig
+        Configuration settings for a split voice checkbox component.
+    autotune_voice: CheckboxConfig
+        Configuration settings for an autotune voice checkbox component.
+    autotune_strength: SliderConfig
+        Configuration settings for an autotune strength slider
         component.
-    pretraineds : DropdownConfig
-        Configuration settings for delete pretrained models dropdown
+    sid : NumberConfig
+        Configuration settings for a speaker ID number component.
+    output_sr : DropdownConfig
+        Configuration settings for an output sample rate dropdown
         component.
-    traineds : DropdownConfig
-        Configuration settings for delete training models dropdown
+    output_format : DropdownConfig
+        Configuration settings for an output format dropdown
         component.
-    dummy_checkbox : CheckboxConfig
-        Configuration settings for a dummy checkbox component.
+    output_name : TextboxConfig
+        Configuration settings for an output name textbox component.
+
+    See Also
+    --------
+    BaseTabConfig
+        Parent model defining common component configuration settings
+        for UI tabs.
 
     """
 
-    voices: DropdownConfig = DropdownConfig.multi_delete(
-        label="Voice models",
-        info="Select one or more voice models to delete.",
+    voice_model: DropdownConfig = DropdownConfig(
+        label="Voice model",
+        info="Select a model to use for voice conversion.",
+        value=None,
+        render=False,
+        exclude_value=True,
     )
-    embedders: DropdownConfig = DropdownConfig.multi_delete(
-        label="Custom embedder models",
-        info="Select one or more embedder models to delete.",
+    f0_methods: DropdownConfig = DropdownConfig(
+        label="Pitch extraction algorithm(s)",
+        info=(
+            "If more than one method is selected, then the median of the pitch values"
+            " extracted by each method is used. RMVPE is recommended for most cases and"
+            " is the default when no method is selected."
+        ),
+        value=[F0Method.RMVPE],
+        choices=list(F0Method),
+        multiselect=True,
     )
-    pretraineds: DropdownConfig = DropdownConfig.multi_delete(
-        label="Custom pretrained models",
-        info="Select one or more pretrained models to delete.",
+    index_rate: SliderConfig = SliderConfig(
+        label="Index rate",
+        info=(
+            "Increase to bias the conversion towards the accent of the voice model."
+            " Decrease to potentially reduce artifacts coming from the voice"
+            " model.<br><br><br>"
+        ),
+        value=0.3,
+        minimum=0.0,
+        maximum=1.0,
     )
-    traineds: DropdownConfig = DropdownConfig.multi_delete(
-        label="Training models",
-        info="Select one or more training models to delete.",
+    rms_mix_rate: SliderConfig = SliderConfig(
+        label="RMS mix rate",
+        info=(
+            "How much to mimic the loudness (0) of the input voice or a fixed loudness"
+            " (1). A value of 1 is recommended for most cases.<br><br>"
+        ),
+        value=1.0,
+        minimum=0.0,
+        maximum=1.0,
+    )
+    protect_rate: SliderConfig = SliderConfig(
+        label="Protect rate",
+        info=(
+            "Controls the extent to which consonants and breathing sounds are protected"
+            " from artifacts. A higher value offers more protection but may worsen the"
+            " indexing effect.<br><br>"
+        ),
+        value=0.33,
+        minimum=0.0,
+        maximum=0.5,
     )
 
-    dummy_checkbox: CheckboxConfig = CheckboxConfig(
+    hop_length: SliderConfig = SliderConfig.hop_length(
+        label="Hop length",
+        info=(
+            "How often the CREPE-based pitch extraction method checks for pitch changes"
+            " measured in milliseconds. Lower values lead to longer conversion times"
+            " and a higher risk of voice cracks, but better pitch accuracy."
+        ),
+        visible=True,
+    )
+
+    split_voice: CheckboxConfig = CheckboxConfig(
+        label="Split input voice",
+        info=(
+            "Whether to split the input voice track into smaller segments before"
+            " converting it. This can improve output quality for longer voice tracks."
+        ),
+        value=False,
+    )
+    autotune_voice: CheckboxConfig = CheckboxConfig(
+        label="Autotune converted voice",
+        info="Whether to apply autotune to the converted voice.<br><br>",
         value=False,
+        exclude_value=True,
+    )
+    autotune_strength: SliderConfig = SliderConfig(
+        label="Autotune intensity",
+        info=(
+            "Higher values result in stronger snapping to the chromatic grid and"
+            " artifacting."
+        ),
+        value=1.0,
+        minimum=0.0,
+        maximum=1.0,
         visible=False,
+    )
+    sid: NumberConfig = NumberConfig(
+        label="Speaker ID",
+        info="Speaker ID for multi-speaker-models.",
+        value=0,
+        precision=0,
+    )
+    output_sr: DropdownConfig = DropdownConfig(
+        label="Output sample rate",
+        info="The sample rate of the mixed output track.",
+        value=SampleRate.HZ_44100,
+        choices=list(SampleRate),
+    )
+    output_format: DropdownConfig = DropdownConfig(
+        label="Output format",
+        info="The audio format of the mixed output track.",
+        value=AudioExt.MP3,
+        choices=list(AudioExt),
+    )
+    output_name: TextboxConfig = TextboxConfig(
+        label="Output name",
+        info="If no name is provided, a suitable name will be generated automatically.",
+        value=None,
+        placeholder="Ultimate RVC output",
         exclude_value=True,
     )
 
 
-class AudioManagementConfig(BaseModel):
+class SongGenerationConfig(GenerationConfig):
     """
-    Configuration settings for audio management tab.
+    Common component configuration settings for song generation tabs.
 
     Attributes
     ----------
-    intermediate : DropdownConfig
-        Configuration settings for delete intermediate audio files
-        dropdown component
-    speech : DropdownConfig
-        Configuration settings for delete speech audio files dropdown
-        component.
-    output : DropdownConfig
-        Configuration settings for delete output audio files dropdown
+    source_type : DropdownConfig
+        Configuration settings for a source type dropdown component.
+    source : TextboxConfig
+        Configuration settings for an input source textbox component.
+    cached_song : DropdownConfig
+        Configuration settings for a cached song dropdown component.
+    clean_strength : SliderConfig
+        Configuration settings for a clean strength slider component.
+    clean_voice : CheckboxConfig
+        Configuration settings for a clean voice checkbox component.
+    room_size : SliderConfig
+        Configuration settings for a room size slider component.
+    wet_level : SliderConfig
+        Configuration settings for a wetness level slider component.
+    dry_level : SliderConfig
+        Configuration settings for a dryness level slider component.
+    damping : SliderConfig
+        Configuration settings for a damping level slider component.
+    main_gain : SliderConfig
+        Configuration settings for a main gain slider component.
+    inst_gain : SliderConfig
+        Configuration settings for an instrumentals gain slider
         component.
-    dataset : DropdownConfig
-        Configuration settings for delete dataset audio files dropdown
+    backup_gain : SliderConfig
+        Configuration settings for a backup vocals gain slider
         component.
-    dummy_checkbox : CheckboxConfig
-        Configuration settings for a dummy checkbox component.
+
+    See Also
+    --------
+    GenerationConfig
+        Parent model defining common component configuration settings
+        for song generation tabs.
 
     """
 
-    intermediate: DropdownConfig = DropdownConfig.multi_delete(
-        label="Song directories",
+    source_type: DropdownConfig = DropdownConfig(
+        label="Source type",
+        info="The type of source to retrieve a song from.",
+        value=SongSourceType.LOCAL_FILE,
+        choices=list(SongSourceType),
+        type="index",
+        exclude_value=True,
+    )
+    source: TextboxConfig = TextboxConfig(
+        label="Source",
+        info="Link to a song on YouTube or the full path of a local audio file.",
+        value=None,
+        exclude_value=True,
+    )
+    cached_song: DropdownConfig = DropdownConfig(
+        label="Source",
+        info="Select a song from the list of cached songs.",
+        value=None,
+        visible=False,
+        render=False,
+        exclude_value=True,
+    )
+    clean_voice: CheckboxConfig = CheckboxConfig(
+        label="Clean converted voice",
         info=(
-            "Select one or more song directories containing intermediate audio files to"
-            " delete."
+            "Whether to clean the converted voice using noise reduction"
+            " algorithms.<br><br>"
         ),
+        value=False,
+        exclude_value=True,
     )
-    speech: DropdownConfig = DropdownConfig.multi_delete(
-        label="Speech audio files",
-        info="Select one or more speech audio files to delete.",
+    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
+    room_size: SliderConfig = SliderConfig(
+        label="Room size",
+        info=(
+            "Size of the room which reverb effect simulates. Increase for longer reverb"
+            " time."
+        ),
+        value=0.15,
+        minimum=0.0,
+        maximum=1.0,
     )
-    output: DropdownConfig = DropdownConfig.multi_delete(
-        label="Output audio files",
-        info="Select one or more output audio files to delete.",
+    wet_level: SliderConfig = SliderConfig(
+        label="Wetness level",
+        info="Loudness of converted vocals with reverb effect applied.",
+        value=0.2,
+        minimum=0.0,
+        maximum=1.0,
+    )
+    dry_level: SliderConfig = SliderConfig(
+        label="Dryness level",
+        info="Loudness of converted vocals without reverb effect applied.",
+        value=0.8,
+        minimum=0.0,
+        maximum=1.0,
+    )
+    damping: SliderConfig = SliderConfig(
+        label="Damping level",
+        info="Absorption of high frequencies in reverb effect.",
+        value=0.7,
+        minimum=0.0,
+        maximum=1.0,
+    )
+    main_gain: SliderConfig = SliderConfig.gain(
+        label="Main gain",
+        info="The gain to apply to the main vocals.",
     )
-    dataset: DropdownConfig = DropdownConfig.multi_delete(
-        label="Dataset audio files",
-        info="Select one or more datasets containing audio files to delete.",
+    inst_gain: SliderConfig = SliderConfig.gain(
+        label="Instrumentals gain",
+        info="The gain to apply to the instrumentals.",
     )
-
-    dummy_checkbox: CheckboxConfig = CheckboxConfig(
-        value=False,
-        visible=False,
-        exclude_value=True,
+    backup_gain: SliderConfig = SliderConfig.gain(
+        label="Backup gain",
+        info="The gain to apply to the backup vocals.",
     )
 
 
-class SettingsManagementConfig(BaseModel):
+class SpeechGenerationConfig(GenerationConfig):
     """
-    Configuration settings for settings management tab.
+    Common component configuration settings for speech generation tabs.
 
     Attributes
     ----------
-    dummy_checkbox : CheckboxConfig
-        Configuration settings for a dummy checkbox component.
+    source_type : DropdownConfig
+        Configuration settings for a source type dropdown component.
+    source : TextboxConfig
+        Configuration settings for an input source textbox component.
+    edge_tts_voice : DropdownConfig
+        Configuration settings for an Edge TTS voice dropdown
+        component.
+    n_octaves : SliderConfig
+        Configuration settings for an octave pitch shift slider
+        component.
+    n_semitones : SliderConfig
+        Configuration settings for a semitone pitch shift slider
+        component.
+    tts_pitch_shift : SliderConfig
+        Configuration settings for a TTS pitch shift slider
+        component.
+    tts_speed_change : SliderConfig
+        Configuration settings for a TTS speed change slider
+        component.
+    tts_volume_change : SliderConfig
+        Configuration settings for a TTS volume change slider
+        component.
+    clean_voice : CheckboxConfig
+        Configuration settings for a clean voice checkbox
+        component.
+    clean_strength : SliderConfig
+        Configuration settings for a clean strength slider
+        component.
+    output_gain : GainSliderConfig
+        Configuration settings for an output gain slider component.
+
+    See Also
+    --------
+    GenerationConfig
+        Parent model defining common component configuration settings
+        for generation tabs.
 
     """
 
-    load_config_name: DropdownConfig = DropdownConfig(
-        label="Configuration name",
-        info="The name of a configuration to load UI settings from",
+    source_type: DropdownConfig = DropdownConfig(
+        label="Source type",
+        info="The type of source to generate speech from.",
+        value=SpeechSourceType.TEXT,
+        choices=list(SpeechSourceType),
+        type="index",
+        exclude_value=True,
+    )
+    source: TextboxConfig = TextboxConfig(
+        label="Source",
+        info="Text to generate speech from",
+        value=None,
+        exclude_value=True,
+    )
+    edge_tts_voice: DropdownConfig = DropdownConfig(
+        label="Edge TTS voice",
+        info="Select a voice to use for text to speech conversion.",
         value=None,
         render=False,
         exclude_value=True,
     )
-    delete_config_names: DropdownConfig = DropdownConfig.multi_delete(
-        label="Configuration names",
-        info="Select the name of one or more configurations to delete",
+    n_octaves: SliderConfig = SliderConfig.octave_shift(
+        label="Octave shift",
+        info=(
+            "The number of octaves to pitch-shift the converted speech by. Use 1 for"
+            " male-to-female and -1 for vice-versa."
+        ),
     )
-    dummy_checkbox: CheckboxConfig = CheckboxConfig(
-        value=False,
-        visible=False,
+    n_semitones: SliderConfig = SliderConfig.semitone_shift(
+        label="Semitone shift",
+        info="The number of semi-tones to pitch-shift the converted speech by.",
+    )
+    tts_pitch_shift: SliderConfig = SliderConfig(
+        label="Edge TTS pitch shift",
+        info=(
+            "The number of hertz to shift the pitch of the speech generated by Edge"
+            " TTS."
+        ),
+        value=0,
+        minimum=-100,
+        maximum=100,
+        step=1,
+    )
+    tts_speed_change: SliderConfig = SliderConfig(
+        label="TTS speed change",
+        info="The percentual change to the speed of the speech generated by Edge TTS.",
+        value=0,
+        minimum=-50,
+        maximum=100,
+        step=1,
+    )
+    tts_volume_change: SliderConfig = SliderConfig(
+        label="TTS volume change",
+        info="The percentual change to the volume of the speech generated by Edge TTS.",
+        value=0,
+        minimum=-100,
+        maximum=100,
+        step=1,
+    )
+    clean_voice: CheckboxConfig = CheckboxConfig(
+        label="Clean converted voice",
+        info=(
+            "Whether to clean the converted voice using noise reduction"
+            " algorithms.<br><br>"
+        ),
+        value=True,
         exclude_value=True,
     )
-
-
-class TotalSongGenerationConfig(BaseModel):
-    """
-    All configuration settings for song generation tabs.
-
-    Attributes
-    ----------
-    one_click : OneClickSongGenerationConfig
-        Configuration settings for the one-click song generation tab.
-    multi_step : MultiStepSongGenerationConfig
-        Configuration settings for the multi-step song generation tab.
-
-    """
-
-    one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
-    multi_step: MultiStepSongGenerationConfig = MultiStepSongGenerationConfig()
-
-
-class TotalSpeechGenerationConfig(BaseModel):
-    """
-    All configuration settings for speech generation tabs.
-
-    Attributes
-    ----------
-    one_click : OneClickSpeechGenerationConfig
-        Configuration settings for the one-click speech generation tab.
-    multi_step : MultiStepSpeechGenerationConfig
-        Configuration settings for the multi-step speech generation tab.
-
-    """
-
-    one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
-    multi_step: MultiStepSpeechGenerationConfig = MultiStepSpeechGenerationConfig()
-
-
-class TotalTrainingConfig(BaseModel):
-    """
-    All configuration settings for training tabs.
-
-    Attributes
-    ----------
-    training : TrainingConfig
-        Configuration settings for the multi-step training tab.
-
-    """
-
-    multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()
-
-
-class TotalManagementConfig(BaseModel):
-    """
-    All configuration settings for management tabs.
-
-    Attributes
-    ----------
-    model : ModelManagementConfig
-        Configuration settings for the model management tab.
-    audio : AudioManagementConfig
-        Configuration settings for the audio management tab.
-    settings : SettingsManagementConfig
-        Configuration settings for the settings management tab.
-
-    """
-
-    model: ModelManagementConfig = ModelManagementConfig()
-    audio: AudioManagementConfig = AudioManagementConfig()
-    settings: SettingsManagementConfig = SettingsManagementConfig()
-
-
-class TotalConfig(BaseModel):
-    """
-    All configuration settings for the Ultimate RVC app.
-
-    Attributes
-    ----------
-    song : TotalSongGenerationConfig
-        Configuration settings for song generation tabs.
-    speech : TotalSpeechGenerationConfig
-        Configuration settings for speech generation tabs.
-    training : TotalTrainingConfig
-        Configuration settings for training tabs.
-    management : TotalManagementConfig
-        Configuration settings for management tabs.
-
-    """
-
-    song: TotalSongGenerationConfig = TotalSongGenerationConfig()
-    speech: TotalSpeechGenerationConfig = TotalSpeechGenerationConfig()
-    training: TotalTrainingConfig = TotalTrainingConfig()
-    management: TotalManagementConfig = TotalManagementConfig()
-
-    @cached_property
-    def all(self) -> list[AnyComponentConfig]:
-        """
-        Recursively collect those component configuration models nested
-        within the current model instance, which have values that are
-        not excluded.
-
-        Returns
-        -------
-        list[AnyComponentConfig]
-            A list of component configuration models found within the
-            current model instance, which have values that are not
-            excluded.
-
-        """
-
-        def _collect(model: BaseModel) -> list[AnyComponentConfig]:
-            component_configs: list[Any] = []
-            for _, value in model:
-                if isinstance(value, ComponentConfig):
-                    if not value.exclude_value:
-                        component_configs.append(value)
-                elif isinstance(value, BaseModel):
-                    component_configs.extend(_collect(value))
-            return component_configs
-
-        return _collect(self)
-
-
-
+    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=True)
+    output_gain: SliderConfig = SliderConfig.gain(
+        label="Output gain",
+        info="The gain to apply to the converted speech.<br><br>",
+    )