from __future__ import annotations

import os
import sys
from enum import IntEnum, StrEnum, auto
from functools import cached_property
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Any, TypedDict

import gradio as gr
import typer
from huggingface_hub import snapshot_download
from pydantic import BaseModel
from ultimate_rvc.common import AUDIO_DIR, MODELS_DIR, TEMP_DIR
from ultimate_rvc.core.generate.song_cover import get_named_song_dirs
from ultimate_rvc.core.generate.speech import get_edge_tts_voice_names
from ultimate_rvc.core.manage.audio import (
    get_audio_datasets,
    get_named_audio_datasets,
    get_saved_output_audio,
    get_saved_speech_audio,
)
from ultimate_rvc.core.manage.config import get_config_names, load_config
from ultimate_rvc.core.manage.models import (
    get_custom_embedder_model_names,
    get_custom_pretrained_model_names,
    get_training_model_names,
    get_voice_model_names,
)
from ultimate_rvc.web.common import (
    initialize_dropdowns,
    exception_harness,
    render_transfer_component,
    setup_transfer_event,
    toggle_visibility,
    toggle_visible_component,
    update_dropdowns,
    update_output_name,
    update_value,
)

from ultimate_rvc.web.config.component import (
    AnyComponentConfig,
    AudioConfig,
    CheckboxConfig,
    ComponentConfig,
    DropdownConfig,
    NumberConfig,
    RadioConfig,
    SliderConfig,
    TextboxConfig,
)
from ultimate_rvc.web.config.tab import (
    SongGenerationConfig,
    SpeechGenerationConfig,
    TrainingConfig,
)
from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import (
    render as render_speech_multi_step_tab,
)
from ultimate_rvc.web.tabs.generate.speech.one_click_generation import (
    render as render_speech_one_click_tab,
)
from ultimate_rvc.web.tabs.manage.audio import render as render_audio_tab
from ultimate_rvc.web.tabs.manage.models import render as render_models_tab
from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab

if TYPE_CHECKING:
    import gradio as gr
    from typing import TYPE_CHECKING

from functools import partial

import gradio as gr
from ultimate_rvc.core.common import (
    INTERMEDIATE_AUDIO_BASE_DIR,
    OUTPUT_AUDIO_DIR,
    copy_file_safe,
    display_progress,
    get_file_hash,
    json_dump,
    json_load,
    validate_model,
    validate_url,
)
from ultimate_rvc.core.exceptions import (
    Entity,
    InvalidLocationError,
    Location,
    NotFoundError,
    NotProvidedError,
    UIMessage,
    YoutubeUrlError,
)
from ultimate_rvc.core.generate.common import (
    convert,
    get_unique_base_path,
    mix_audio,
    validate_audio_dir_exists,
    validate_audio_file_exists,
    wavify,
)
from ultimate_rvc.core.generate.song_cover import (
    get_named_song_dirs,
    get_song_cover_name,
    mix_song,
    pitch_shift,
    postprocess,
    retrieve_song,
    separate_audio,
    get_named_song_dirs,
    get_song_cover_name,
    run_pipeline,
)
from ultimate_rvc.core.generate.typing_extra import (
    EffectedVocalsMetaData,
    FileMetaData,
    MixedAudioType,
    PitchShiftMetaData,
    RVCAudioMetaData,
    SeparatedAudioMetaData,
)
from ultimate_rvc.core.manage.audio import get_saved_output_audio
from ultimate_rvc.typing_extra import EmbedderModel
from ultimate_rvc.web.common import (
    PROGRESS_BAR,
    exception_harness,
    toggle_intermediate_audio,
    toggle_visibility,
    toggle_visible_component,
    update_dropdowns,
    update_output_name,
    update_value,
)
from ultimate_rvc.web.typing_extra import ConcurrencyId

type StrPath = str | PathLike[str]

type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None


class SegmentSize(IntEnum):
    """Enumeration of segment sizes for audio separation."""

    SEG_64 = 64
    SEG_128 = 128
    SEG_256 = 256
    SEG_512 = 512
    SEG_1024 = 1024
    SEG_2048 = 2048
    SEG_4096 = 4096


class F0Method(StrEnum):
    """Enumeration of pitch extraction methods."""

    RMVPE = "rmvpe"
    CREPE = "crepe"
    CREPE_TINY = "crepe-tiny"
    FCPE = "fcpe"


class RVCContentType(StrEnum):
    """Enumeration of valid content to convert with RVC."""

    VOCALS = "vocals"
    VOICE = "voice"
    SPEECH = "speech"
    AUDIO = "audio"


class SampleRate(IntEnum):
    """Enumeration of supported audio sample rates."""

    HZ_16000 = 16000
    HZ_44100 = 44100
    HZ_48000 = 48000
    HZ_96000 = 96000
    HZ_192000 = 192000


class AudioExt(StrEnum):
    """Enumeration of supported audio file formats."""

    MP3 = "mp3"
    WAV = "wav"
    FLAC = "flac"
    OGG = "ogg"


class DeviceType(StrEnum):
    """Enumeration of device types for training voice models."""

    AUTOMATIC = "Automatic"
    CPU = "CPU"
    GPU = "GPU"


class TrainingSampleRate(StrEnum):
    """Enumeration of sample rates for training voice models."""

    HZ_32K = "32000"
    HZ_40K = "40000"
    HZ_48K = "48000"


class PretrainedSampleRate(StrEnum):
    """Enumeration of valid sample rates for pretrained models."""

    HZ_32K = "32k"
    HZ_40K = "40k"
    HZ_44K = "44k"
    HZ_48K = "48k"


class TrainingF0Method(StrEnum):
    """Enumeration of pitch extraction methods for training."""

    RMVPE = "rmvpe"
    CREPE = "crepe"
    CREPE_TINY = "crepe-tiny"


class AudioSplitMethod(StrEnum):
    """
    Enumeration of methods to use for splitting audio files during
    dataset preprocessing.
    """

    SKIP = "Skip"
    SIMPLE = "Simple"
    AUTOMATIC = "Automatic"


class Vocoder(StrEnum):
    """Enumeration of vocoders for training voice models."""

    HIFI_GAN = "HiFi-GAN"
    MRF_HIFI_GAN = "MRF HiFi-GAN"
    REFINE_GAN = "RefineGAN"


class IndexAlgorithm(StrEnum):
    """Enumeration of indexing algorithms for training voice models."""

    AUTO = "Auto"
    FAISS = "Faiss"
    KMEANS = "KMeans"


class PretrainedType(StrEnum):
    """
    Enumeration of the possible types of pretrained models to finetune
    voice models on.
    """

    NONE = "None"
    DEFAULT = "Default"
    CUSTOM = "Custom"


class ConcurrencyId(StrEnum):
    """Enumeration of possible concurrency identifiers."""

    GPU = auto()


class SongSourceType(StrEnum):
    """The type of source providing the song to generate a cover of."""

    PATH = "Local or HTTP filepath"
    LOCAL_FILE = "Local file"
    CACHED_SONG = "Cached song"


class SpeechSourceType(StrEnum):
    """The type of source providing the text to generate speech from."""

    TEXT = "Text"
    LOCAL_FILE = "Local file"


class SongTransferOption(StrEnum):
    """Enumeration of possible song transfer options."""

    STEP_1_AUDIO = "Step 1: stem splitting"
    STEP_2_VOCALS = "Step 2: vocal conversion"
    STEP_3_VOCALS = "Step 3: vocal effect"
    STEP_4_INSTRUMENTALS = "Step 4: instrumentals"
    STEP_4_BACKUP_VOCALS = "Step 4: backup vocals"
    STEP_5_MAIN_VOCALS = "Step 5: main vocals"
    STEP_5_INSTRUMENTALS = "Step 5: instrumentals"
    STEP_5_BACKUP_VOCALS = "Step 5: backup vocals"


class SpeechTransferOption(StrEnum):
    """Enumeration of possible speech transfer options."""

    STEP_2_SPEECH = "Step 2: speech conversion"
    STEP_3_SPEECH = "Step 3: speech effect"


class ComponentVisibilityKwArgs(TypedDict, total=False):
    """
    Keyword arguments for setting component visibility.

    Attributes
    ----------
    visible : bool
        Whether the component should be visible.
    value : Any
        The value of the component.

    """

    visible: bool
    value: Any


class UpdateDropdownKwArgs(TypedDict, total=False):
    """
    Keyword arguments for updating a dropdown component.

    Attributes
    ----------
    choices : DropdownChoices
        The updated choices for the dropdown component.
    value : DropdownValue
        The updated value for the dropdown component.

    """

    choices: DropdownChoices
    value: DropdownValue


class TextBoxKwArgs(TypedDict, total=False):
    """
    Keyword arguments for updating a textbox component.

    Attributes
    ----------
    value : str | None
        The updated value for the textbox component.
    placeholder : str | None
        The updated placeholder for the textbox component.

    """

    value: str | None
    placeholder: str | None


class UpdateAudioKwArgs(TypedDict, total=False):
    """
    Keyword arguments for updating an audio component.

    Attributes
    ----------
    value : str | None
        The updated value for the audio component.

    """

    value: str | None


class DatasetType(StrEnum):
    """The type of dataset to train a voice model."""

    NEW_DATASET = "Create new dataset"
    EXISTING_DATASET = "Use existing dataset"


class EmbedderModel(StrEnum):
    """Enumeration of audio embedding models."""

    CONTENTVEC = "contentvec"
    CRUSTY = "Crusty"
    CUSTOM = "custom"


class SeparationModel(StrEnum):
    """Enumeration of audio separation models."""

    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
    Kim_Inst = "Kim_Inst.onnx"
    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
    kuielab_a_vocals = "kuielab_a_vocals.onnx"
    kuielab_b_vocals = "kuielab_b_vocals.onnx"
    kuielab_a_drums = "kuielab_a_drums.onnx"
    kuielab_b_drums = "kuielab_b_drums.onnx"
    kuielab_a_bass = "kuielab_a_bass.onnx"
    kuielab_b_bass = "kuielab_b_bass.onnx"
    kuielab_a_other = "kuielab_a_other.onnx"
    kuielab_b_other = "kuielab_b_other.onnx"
    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
    UVR_DeNoise = "UVR-DeNoise.pth"
    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"


now_dir = os.getcwd()

sys.path.append(now_dir)
models_dir = "models"

dump_path = os.path.join(now_dir, models_dir)

repo_id = "lainlives/voice"

hf_token = os.environ.get("HF_TOKEN")
snapshot_download(repo_id=repo_id, local_dir=dump_path, token=hf_token)

# if __name__ == "__main__":
#    start_app(share=False, ssr_mode = True)


config_name = "default"  # os.environ.get("URVC_CONFIG")
cookiefile = os.environ.get("YT_COOKIEFILE")


"""
Module defining models for representing configuration settings for
UI tabs.
"""


class SongIntermediateAudioConfig(BaseModel):
    """
    Configuration settings for intermediate audio components in the
    one-click song generation tab.

    Attributes
    ----------
    song : AudioConfig
        Configuration settings for the input song audio component.
    vocals : AudioConfig
        Configuration settings for the vocals audio component.
    instrumentals : AudioConfig
        Configuration settings for the instrumentals audio component.
    main_vocals : AudioConfig
        Configuration settings for the main vocals audio component.
    backup_vocals : AudioConfig
        Configuration settings for the backup vocals audio component.
    main_vocals_dereverbed : AudioConfig
        Configuration settings for the main vocals de-reverbed audio
        component.
    main_vocals_reverb : AudioConfig
        Configuration settings for the main vocals reverb audio
        component.
    converted_vocals : AudioConfig
        Configuration settings for the converted vocals audio
        component.
    postprocessed_vocals : AudioConfig
        Configuration settings for the postprocessed vocals audio
        component.
    instrumentals_shifted : AudioConfig
        Configuration settings for the shifted instrumentals audio
        component.
    backup_vocals_shifted : AudioConfig
        Configuration settings for the shifted backup vocals audio
        component.
    all : list[gr.Audio]
        List of instances of all intermediate audio components.

    """

    song: AudioConfig = AudioConfig.intermediate(label="Song")
    vocals: AudioConfig = AudioConfig.intermediate(label="Vocals")
    instrumentals: AudioConfig = AudioConfig.intermediate(
        label="Instrumentals",
    )
    main_vocals: AudioConfig = AudioConfig.intermediate(
        label="Main vocals",
    )
    backup_vocals: AudioConfig = AudioConfig.intermediate(
        label="Backup vocals",
    )
    main_vocals_dereverbed: AudioConfig = AudioConfig.intermediate(
        label="De-reverbed main vocals",
    )
    main_vocals_reverb: AudioConfig = AudioConfig.intermediate(
        label="Main vocals with reverb",
    )
    converted_vocals: AudioConfig = AudioConfig.intermediate(
        label="Converted vocals",
    )
    postprocessed_vocals: AudioConfig = AudioConfig.intermediate(
        label="Postprocessed vocals",
    )
    instrumentals_shifted: AudioConfig = AudioConfig.intermediate(
        label="Pitch-shifted instrumentals",
    )
    backup_vocals_shifted: AudioConfig = AudioConfig.intermediate(
        label="Pitch-shifted backup vocals",
    )

    @property
    def all(self) -> list[gr.Audio]:
        """
        Retrieve instances of all intermediate audio components
        in the one-click song generation tab.

        Returns
        -------
        list[gr.Audio]
            List of instances of all intermediate audio components in
            the one-click song generation tab.

        """
        # NOTE we are using self.__annotations__ to get the fields in
        # the order they are defined in the class
        return [getattr(self, field).instance for field in self.__annotations__]


class OneClickSongGenerationConfig(SongGenerationConfig):
    """
    Configuration settings for the one-click song generation tab.

    Attributes
    ----------
    n_octaves : SliderConfig
        Configuration settings for an octave pitch shift slider
        component.
    n_semitones : SliderConfig
        Configuration settings for a semitone pitch shift slider
        component.
    show_intermediate_audio : CheckboxConfig
        Configuration settings for a show intermediate audio checkbox
        component.
    intermediate_audio : SongIntermediateAudioConfig
        Configuration settings for intermediate audio components.

    See Also
    --------
    SongGenerationConfig
        Parent model defining common component configuration settings
        for song generation tabs.

    """

    n_octaves: SliderConfig = SliderConfig.octave_shift(
        label="Vocal pitch shift",
        info=(
            "The number of octaves to shift the pitch of the converted vocals by. Use 1"
            " for male-to-female and -1 for vice-versa."
        ),
    )

    n_semitones: SliderConfig = SliderConfig.semitone_shift(
        label="Overall pitch shift",
        info=(
            "The number of semi-tones to shift the pitch of the converted vocals,"
            " instrumentals and backup vocals by."
        ),
    )
    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
        label="Show intermediate audio",
        info="Show intermediate audio tracks produced during song cover generation.",
        value=False,
        exclude_value=True,
    )
    intermediate_audio: SongIntermediateAudioConfig = SongIntermediateAudioConfig()


class SongInputAudioConfig(BaseModel):
    """
    Configuration settings for input audio components in the multi-step
    song generation tab.

    Attributes
    ----------
    audio : AudioConfig
        Configuration settings for the input audio component.
    vocals : AudioConfig
        Configuration settings for the vocals audio component.
    converted_vocals : AudioConfig
        Configuration settings for the converted vocals audio
        component.
    instrumentals : AudioConfig
        Configuration settings for the instrumentals audio
        component.
    backup_vocals : AudioConfig
        Configuration settings for the backup vocals audio
        component.
    main_vocals : AudioConfig
        Configuration settings for the main vocals audio
        component.
    shifted_instrumentals : AudioConfig
        Configuration settings for the shifted instrumentals audio
        component.
    shifted_backup_vocals : AudioConfig
        Configuration settings for the shifted backup vocals audio
        component.
    all : list[AudioConfig]
        List of configuration settings for all input audio
        components in the multi-step song generation tab.

    """

    audio: AudioConfig = AudioConfig.input(label="Audio")
    vocals: AudioConfig = AudioConfig.input(label="Vocals")
    converted_vocals: AudioConfig = AudioConfig.input(label="Vocals")
    instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
    backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")
    main_vocals: AudioConfig = AudioConfig.input(label="Main vocals")
    shifted_instrumentals: AudioConfig = AudioConfig.input(label="Instrumentals")
    shifted_backup_vocals: AudioConfig = AudioConfig.input(label="Backup vocals")

    @property
    def all(self) -> list[AudioConfig]:
        """
        Retrieve configuration settings for all input audio components
        in the multi-step song generation tab.

        Returns
        -------
        list[AudioConfig]
            List of configuration settings for all input audio
            components in the multi-step song generation tab.

        """
        return [getattr(self, field) for field in self.__annotations__]


class SongDirsConfig(BaseModel):
    """
    Configuration settings for song directory components in the
    multi-step song generation tab.

    Attributes
    ----------
    separate_audio : DropdownConfig
        Configuration settings for the song directory component
        for separating audio.
    convert_vocals : DropdownConfig
        Configuration settings for the song directory component
        for converting vocals.
    postprocess_vocals : DropdownConfig
        Configuration settings for the song directory component
        for postprocessing vocals.
    pitch_shift_background : DropdownConfig
        Configuration settings for the song directory component
        for pitch-shifting background audio.
    mix : DropdownConfig
        Configuration settings for the song directory component
        for mixing audio.
    all : list[gr.Dropdown]
        List of instances of all song directory components in the
        multi-step song generation tab.

    """

    separate_audio: DropdownConfig = DropdownConfig.song_dir()
    convert_vocals: DropdownConfig = DropdownConfig.song_dir()
    postprocess_vocals: DropdownConfig = DropdownConfig.song_dir()
    pitch_shift_background: DropdownConfig = DropdownConfig.song_dir()
    mix: DropdownConfig = DropdownConfig.song_dir()

    @property
    def all(self) -> list[gr.Dropdown]:
        """
        Retrieve instances of all song directory components in the
        multi-step song generation tab.

        Returns
        -------
        list[gr.Dropdown]
            List of instances of all song directory components in
            the multi-step song generation tab.

        """
        return [getattr(self, field).instance for field in self.__annotations__]


class MultiStepSongGenerationConfig(SongGenerationConfig):
    """
    Configuration settings for multi-step song generation tab.

    Attributes
    ----------
    separation_model : DropdownConfig
        Configuration settings for a separation model dropdown
        component.
    segment_size : RadioConfig
        Configuration settings for a segment size radio component.
    n_octaves : SliderConfig
        Configuration settings for an octave pitch shift slider
        component.
    n_semitones : SliderConfig
        Configuration settings for a semitone pitch shift slider
        component.
    n_semitones_instrumentals : SliderConfig
        Configuration settings for an instrumentals pitch shift slider
        component.
    n_semitones_backup_vocals : SliderConfig
        Configuration settings for a backup vocals pitch shift slider
        component.
    input_audio : SongInputAudioConfig
        Configuration settings for input audio components.
    song_dirs : SongDirsConfig
        Configuration settings for song directory components.

    See Also
    --------
    SongGenerationConfig
        Parent model defining common component configuration settings
        for song generation tabs.

    """

    separation_model: DropdownConfig = DropdownConfig(
        label="Separation model",
        info="The model to use for audio separation.",
        value=SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
        choices=list(SeparationModel),
    )
    segment_size: RadioConfig = RadioConfig(
        label="Segment size",
        info=(
            "The size of the segments into which the audio is split. Using a larger"
            " size consumes more resources, but may give better results."
        ),
        value=SegmentSize.SEG_2048,
        choices=list(SegmentSize),
    )
    n_octaves: SliderConfig = SliderConfig.octave_shift(
        label="Pitch shift (octaves)",
        info=(
            "The number of octaves to pitch-shift the converted voice by. Use 1 for"
            " male-to-female and -1 for vice-versa."
        ),
    )
    n_semitones: SliderConfig = SliderConfig.semitone_shift(
        label="Pitch shift (semi-tones)",
        info=(
            "The number of semi-tones to pitch-shift the converted vocals by. Altering"
            " this slightly reduces sound quality."
        ),
    )
    n_semitones_instrumentals: SliderConfig = SliderConfig.semitone_shift(
        label="Instrumental pitch shift",
        info="The number of semi-tones to pitch-shift the instrumentals by.",
    )
    n_semitones_backup_vocals: SliderConfig = SliderConfig.semitone_shift(
        label="Backup vocal pitch shift",
        info="The number of semi-tones to pitch-shift the backup vocals by.",
    )
    input_audio: SongInputAudioConfig = SongInputAudioConfig()
    song_dirs: SongDirsConfig = SongDirsConfig()


class SpeechIntermediateAudioConfig(BaseModel):
    """
    Configuration settings for intermediate audio components in the
    one-click speech generation tab.

    Attributes
    ----------
    speech : AudioConfig
        Configuration settings for the input speech audio component.
    converted_speech : AudioConfig
        Configuration settings for the converted speech audio component.
    all : list[gr.Audio]
        List of instances of all intermediate audio components in the
        speech generation tab.

    """

    speech: AudioConfig = AudioConfig.intermediate(label="Speech")
    converted_speech: AudioConfig = AudioConfig.intermediate(label="Converted speech")

    @property
    def all(self) -> list[gr.Audio]:
        """
        Retrieve instances of all intermediate audio components in the
        speech generation tab.

        Returns
        -------
        list[gr.Audio]
            List of instances of all intermediate audio components in
            the speech generation tab.

        """
        return [getattr(self, field).instance for field in self.__annotations__]


class OneClickSpeechGenerationConfig(SpeechGenerationConfig):
    """
    Configuration settings for one-click speech generation tab.

    Attributes
    ----------
    intermediate_audio : SpeechIntermediateAudioConfig
        Configuration settings for intermediate audio components.
    show_intermediate_audio : CheckboxConfig
        Configuration settings for a show intermediate audio checkbox
        component.

    See Also
    --------
    SpeechGenerationConfig
        Parent model defining common component configuration settings
        for speech generation tabs.

    """

    intermediate_audio: SpeechIntermediateAudioConfig = SpeechIntermediateAudioConfig()

    show_intermediate_audio: CheckboxConfig = CheckboxConfig(
        label="Show intermediate audio",
        info="Show intermediate audio tracks produced during speech generation.",
        value=False,
        exclude_value=True,
    )


class SpeechInputAudioConfig(BaseModel):
    """
    Configuration settings for input audio components in the multi-step
    speech generation tab.

    Attributes
    ----------
    speech : AudioConfig
        Configuration settings for the input speech audio component.
    converted_speech : AudioConfig
        Configuration settings for the converted speech audio component.

    all : list[AudioConfig]
        List of configuration settings for all input audio components in
        the multi-step speech generation tab.

    """

    speech: AudioConfig = AudioConfig.input("Speech")
    converted_speech: AudioConfig = AudioConfig.input("Converted speech")

    @property
    def all(self) -> list[AudioConfig]:
        """
        Retrieve configuration settings for all input audio components
        in the multi-step speech generation tab.

        Returns
        -------
        list[AudioConfig]
            List of configuration settings for all input audio
            components in the multi-step speech generation tab.

        """
        return [getattr(self, field) for field in self.__annotations__]


class MultiStepSpeechGenerationConfig(SpeechGenerationConfig):
    """
    Configuration settings for the multi-step speech generation tab.

    Attributes
    ----------
    input_audio : SpeechInputAudioConfig
        Configuration settings for input audio components.

    See Also
    --------
    SpeechGenerationConfig
        Parent model defining common component configuration settings
        for speech generation tabs.

    """

    input_audio: SpeechInputAudioConfig = SpeechInputAudioConfig()


class MultiStepTrainingConfig(TrainingConfig):
    """Configuration settings for multi-step training tab."""


class ModelManagementConfig(BaseModel):
    """

    Configuration settings for model management tab.

    Attributes
    ----------
    voices : DropdownConfig
        Configuration settings for delete voice models dropdown
        component.
    embedders : DropdownConfig
        Configuration settings for delete embedder models dropdown
        component.
    pretraineds : DropdownConfig
        Configuration settings for delete pretrained models dropdown
        component.
    traineds : DropdownConfig
        Configuration settings for delete training models dropdown
        component.
    dummy_checkbox : CheckboxConfig
        Configuration settings for a dummy checkbox component.

    """

    voices: DropdownConfig = DropdownConfig.multi_delete(
        label="Voice models",
        info="Select one or more voice models to delete.",
    )
    embedders: DropdownConfig = DropdownConfig.multi_delete(
        label="Custom embedder models",
        info="Select one or more embedder models to delete.",
    )
    pretraineds: DropdownConfig = DropdownConfig.multi_delete(
        label="Custom pretrained models",
        info="Select one or more pretrained models to delete.",
    )
    traineds: DropdownConfig = DropdownConfig.multi_delete(
        label="Training models",
        info="Select one or more training models to delete.",
    )

    dummy_checkbox: CheckboxConfig = CheckboxConfig(
        value=False,
        visible=False,
        exclude_value=True,
    )


class AudioManagementConfig(BaseModel):
    """
    Configuration settings for audio management tab.

    Attributes
    ----------
    intermediate : DropdownConfig
        Configuration settings for delete intermediate audio files
        dropdown component
    speech : DropdownConfig
        Configuration settings for delete speech audio files dropdown
        component.
    output : DropdownConfig
        Configuration settings for delete output audio files dropdown
        component.
    dataset : DropdownConfig
        Configuration settings for delete dataset audio files dropdown
        component.
    dummy_checkbox : CheckboxConfig
        Configuration settings for a dummy checkbox component.

    """

    intermediate: DropdownConfig = DropdownConfig.multi_delete(
        label="Song directories",
        info=(
            "Select one or more song directories containing intermediate audio files to"
            " delete."
        ),
    )
    speech: DropdownConfig = DropdownConfig.multi_delete(
        label="Speech audio files",
        info="Select one or more speech audio files to delete.",
    )
    output: DropdownConfig = DropdownConfig.multi_delete(
        label="Output audio files",
        info="Select one or more output audio files to delete.",
    )
    dataset: DropdownConfig = DropdownConfig.multi_delete(
        label="Dataset audio files",
        info="Select one or more datasets containing audio files to delete.",
    )

    dummy_checkbox: CheckboxConfig = CheckboxConfig(
        value=False,
        visible=False,
        exclude_value=True,
    )


class SettingsManagementConfig(BaseModel):
    """
    Configuration settings for settings management tab.

    Attributes
    ----------
    dummy_checkbox : CheckboxConfig
        Configuration settings for a dummy checkbox component.

    """

    load_config_name: DropdownConfig = DropdownConfig(
        label="Configuration name",
        info="The name of a configuration to load UI settings from",
        value=None,
        render=False,
        exclude_value=True,
    )
    delete_config_names: DropdownConfig = DropdownConfig.multi_delete(
        label="Configuration names",
        info="Select the name of one or more configurations to delete",
    )
    dummy_checkbox: CheckboxConfig = CheckboxConfig(
        value=False,
        visible=False,
        exclude_value=True,
    )


class TotalSongGenerationConfig(BaseModel):
    """
    All configuration settings for song generation tabs.

    Attributes
    ----------
    one_click : OneClickSongGenerationConfig
        Configuration settings for the one-click song generation tab.
    multi_step : MultiStepSongGenerationConfig
        Configuration settings for the multi-step song generation tab.

    """

    one_click: OneClickSongGenerationConfig = OneClickSongGenerationConfig()
    multi_step: MultiStepSongGenerationConfig = MultiStepSongGenerationConfig()


class TotalSpeechGenerationConfig(BaseModel):
    """
    All configuration settings for speech generation tabs.

    Attributes
    ----------
    one_click : OneClickSpeechGenerationConfig
        Configuration settings for the one-click speech generation tab.
    multi_step : MultiStepSpeechGenerationConfig
        Configuration settings for the multi-step speech generation tab.

    """

    one_click: OneClickSpeechGenerationConfig = OneClickSpeechGenerationConfig()
    multi_step: MultiStepSpeechGenerationConfig = MultiStepSpeechGenerationConfig()


class TotalTrainingConfig(BaseModel):
    """
    All configuration settings for training tabs.

    Attributes
    ----------
    training : TrainingConfig
        Configuration settings for the multi-step training tab.

    """

    multi_step: MultiStepTrainingConfig = MultiStepTrainingConfig()


class TotalManagementConfig(BaseModel):
    """
    All configuration settings for management tabs.

    Attributes
    ----------
    model : ModelManagementConfig
        Configuration settings for the model management tab.
    audio : AudioManagementConfig
        Configuration settings for the audio management tab.
    settings : SettingsManagementConfig
        Configuration settings for the settings management tab.

    """

    model: ModelManagementConfig = ModelManagementConfig()
    audio: AudioManagementConfig = AudioManagementConfig()
    settings: SettingsManagementConfig = SettingsManagementConfig()


class TotalConfig(BaseModel):
    """
    All configuration settings for the Ultimate RVC app.

    Attributes
    ----------
    song : TotalSongGenerationConfig
        Configuration settings for song generation tabs.
    speech : TotalSpeechGenerationConfig
        Configuration settings for speech generation tabs.
    training : TotalTrainingConfig
        Configuration settings for training tabs.
    management : TotalManagementConfig
        Configuration settings for management tabs.

    """

    song: TotalSongGenerationConfig = TotalSongGenerationConfig()
    speech: TotalSpeechGenerationConfig = TotalSpeechGenerationConfig()
    training: TotalTrainingConfig = TotalTrainingConfig()
    management: TotalManagementConfig = TotalManagementConfig()

    @cached_property
    def all(self) -> list[AnyComponentConfig]:
        """
        Recursively collect those component configuration models nested
        within the current model instance, which have values that are
        not excluded.

        Returns
        -------
        list[AnyComponentConfig]
            A list of component configuration models found within the
            current model instance, which have values that are not
            excluded.

        """

        def _collect(model: BaseModel) -> list[AnyComponentConfig]:
            component_configs: list[Any] = []
            for _, value in model:
                if isinstance(value, ComponentConfig):
                    if not value.exclude_value:
                        component_configs.append(value)
                elif isinstance(value, BaseModel):
                    component_configs.extend(_collect(value))
            return component_configs

        return _collect(self)


class BaseTabConfig(BaseModel):
    """
    Base model defining common component configuration settings for
    UI tabs.

    Attributes
    ----------
    embedder_model : DropdownConfig
        Configuration settings for an embedder model dropdown component.
    custom_embedder_model : DropdownConfig
        Configuration settings for a custom embedder model dropdown
        component.

    """

    embedder_model: DropdownConfig = DropdownConfig(
        label="Embedder model",
        info="The model to use for generating speaker embeddings.",
        value=EmbedderModel.CONTENTVEC,
        choices=list(EmbedderModel),
        exclude_value=True,
    )
    custom_embedder_model: DropdownConfig = DropdownConfig(
        label="Custom embedder model",
        info="Select a custom embedder model from the dropdown.",
        value=None,
        visible=False,
        render=False,
        exclude_value=True,
    )


class TrainingConfig(BaseTabConfig):
    """
    Common component configuration settings for training tabs.

    Attributes
    ----------
    dataset_type : DropdownConfig
        Configuration settings for a dataset type dropdown component.
    dataset : DropdownConfig
        Configuration settings for a dataset dropdown component.
    dataset_name : TextboxConfig
        Configuration settings for a dataset name textbox component.
    preprocess_model : DropdownConfig
        Configuration settings for a model name dropdown component
        for audio preprocessing.
    sample_rate : DropdownConfig
        Configuration settings for a sample rate dropdown component.
    filter_audio : CheckboxConfig
        Configuration settings for a filter audio checkbox component.
    clean_audio : CheckboxConfig
        Configuration settings for a clean audio checkbox component.
    clean_strength : SliderConfig
        Configuration settings for a clean strength slider component.
    split_method : DropdownConfig
        Configuration settings for an audio splitting method dropdown
        component.
    chunk_len : SliderConfig
        Configuration settings for a chunk length slider component.
    overlap_len : SliderConfig
        Configuration settings for an overlap length slider component.
    preprocess_cores : SliderConfig
        Configuration settings for a CPU cores slider component for
        preprocessing.
    extract_model : DropdownConfig
        Configuration settings for a model name dropdown component for
        feature extraction.
    f0_method : DropdownConfig
        Configuration settings for an F0 method dropdown component.
    hop_length : SliderConfig
        Configuration settings for a hop length slider component.
    include_mutes : SliderConfig
        Configuration settings for an include mutes slider component.
    extract_cores : SliderConfig
        Configuration settings for a CPU cores slider component for
        feature extraction.
    extraction_acceleration : HardwareAccelerationConfig
        Configuration settings for a hardware acceleration component for
        feature extraction.
    extraction_gpus : DropdownConfig
        Configuration settings for a GPU dropdown compoennt for feature
        extraction.
    train_model : DropdownConfig
        Configuration settings for a model name dropdown component for
        training.
    num_epochs : SliderConfig
        Configuration settings for a number of epochs slider component.
    batch_size : SliderConfig
        Configuration settings for a batch size slider component.
    detect_overtraining : CheckboxConfig
        Configuration settings for a detect overtraining checkbox
        component.
    overtraining_threshold : SliderConfig
        Configuration settings for an overtraining threshold slider
        component.
    vocoder : DropdownConfig
        Configuration settings for a vocoder dropdown component.
    index_algorithm : DropdownConfig
        Configuration settings for an index algorithm dropdown
        component.
    pretrained_type : DropdownConfig
        Configuration settings for a pretrained model type dropdown
        component.
    custom_pretrained_model : DropdownConfig
        Configuration settings for a custom pretrained model dropdown
        component.
    save_interval : SliderConfig
        Configuration settings for a save-interval slider component.
    save_all_checkpoints : CheckboxConfig
        Configuration settings for a save-all-checkpoints checkbox
        component.
    save_all_weights : CheckboxConfig
        Configuration settings for a save-all-weights checkbox
        component.
    clear_saved_data : CheckboxConfig
        Configuration settings for a clear-saved-data checkbox
        component.
    upload_model : CheckboxConfig
        Configuration settings for an upload voice model checkbox
        component.
    upload_name : TextboxConfig
        Configuration settings for an upload name textbox component.
    training_acceleration : HardwareAccelerationConfig
        Configuration settings for a hardware acceleration component for
        training.
    training_gpus : DropdownConfig
        Configuration settings for a GPU dropdown component for
        training.
    preload_dataset : CheckboxConfig
        Configuration settings for a preload dataset checkbox component.
    reduce_memory_usage : CheckboxConfig
        Configuration settings for a reduce-memory-usage checkbox
        component.

    See Also
    --------
    BaseTabConfig
        Parent model defining common component configuration settings
        for UI tabs.

    """

    dataset_type: DropdownConfig = DropdownConfig(
        label="Dataset type",
        info="Select the type of dataset to preprocess.",
        value=DatasetType.NEW_DATASET,
        choices=list(DatasetType),
        exclude_value=True,
    )
    dataset: DropdownConfig = DropdownConfig(
        label="Dataset path",
        info=(
            "The path to an existing dataset. Either select a path to a previously"
            " created dataset or provide a path to an external dataset."
        ),
        value=None,
        allow_custom_value=True,
        visible=False,
        render=False,
        exclude_value=True,
    )
    dataset_name: TextboxConfig = TextboxConfig(
        label="Dataset name",
        info=(
            "The name of the new dataset. If the dataset already exists, the provided"
            " audio files will be added to it."
        ),
        value="My dataset",
        exclude_value=True,
    )
    preprocess_model: DropdownConfig = DropdownConfig(
        label="Model name",
        info=(
            "Name of the model to preprocess the given dataset for. Either select an"
            " existing model from the dropdown or provide the name of a new model."
        ),
        value="My model",
        allow_custom_value=True,
        render=False,
        exclude_value=True,
    )
    sample_rate: DropdownConfig = DropdownConfig(
        label="Sample rate",
        info="Target sample rate for the audio files in the provided dataset.",
        value=TrainingSampleRate.HZ_40K,
        choices=list(TrainingSampleRate),
    )
    filter_audio: CheckboxConfig = CheckboxConfig(
        label="Filter audio",
        info=(
            "Whether to remove low-frequency sounds from the audio files in the"
            " provided dataset by applying a high-pass butterworth filter.<br><br>"
        ),
        value=True,
    )
    clean_audio: CheckboxConfig = CheckboxConfig(
        label="Clean audio",
        info=(
            "Whether to clean the audio files in the provided dataset using noise"
            " reduction algorithms.<br><br><br>"
        ),
        value=False,
        exclude_value=True,
    )
    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
    split_method: DropdownConfig = DropdownConfig(
        label="Audio splitting method",
        info=(
            "The method to use for splitting the audio files in the provided dataset."
            " Use the `Skip` method to skip splitting if the audio files are already"
            " split. Use the `Simple` method if excessive silence has already been"
            " removed from the audio files. Use the `Automatic` method for automatic"
            " silence detection and splitting around it."
        ),
        value=AudioSplitMethod.AUTOMATIC,
        choices=list(AudioSplitMethod),
        exclude_value=True,
    )
    chunk_len: SliderConfig = SliderConfig(
        label="Chunk length",
        info="Length of split audio chunks.",
        value=3.0,
        minimum=0.5,
        maximum=5.0,
        step=0.1,
        visible=False,
    )
    overlap_len: SliderConfig = SliderConfig(
        label="Overlap length",
        info="Length of overlap between split audio chunks.",
        value=0.3,
        minimum=0.0,
        maximum=0.4,
        step=0.1,
        visible=False,
    )
    preprocess_cores: SliderConfig = SliderConfig.cpu_cores()

    extract_model: DropdownConfig = DropdownConfig(
        label="Model name",
        info=(
            "Name of the model with an associated preprocessed dataset to extract"
            " training features from. When a new dataset is preprocessed, its"
            " associated model is selected by default."
        ),
        value=None,
        render=False,
        exclude_value=True,
    )
    f0_method: DropdownConfig = DropdownConfig(
        label="F0 method",
        info="The method to use for extracting pitch features.",
        value=TrainingF0Method.RMVPE,
        choices=list(TrainingF0Method),
        exclude_value=True,
    )

    hop_length: SliderConfig = SliderConfig.hop_length(
        label="Hop length",
        info="The hop length to use for extracting pitch features.<br><br>",
        visible=False,
    )
    include_mutes: SliderConfig = SliderConfig(
        label="Include mutes",
        info=(
            "The number of mute audio files to include in the generated training file"
            " list. Adding silent files enables the training model to handle pure"
            " silence in inferred audio files. If the preprocessed audio dataset"
            " already contains segments of pure silence, set this to 0."
        ),
        value=0,
        minimum=0,
        maximum=10,
        step=1,
    )
    extraction_cores: SliderConfig = SliderConfig.cpu_cores()
    extraction_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
    extraction_gpus: DropdownConfig = DropdownConfig.gpu()

    train_model: DropdownConfig = DropdownConfig(
        label="Model name",
        info=(
            "Name of the model to train. When training features are extracted for a new"
            " model, its name is selected by default."
        ),
        value=None,
        render=False,
        exclude_value=True,
    )
    num_epochs: SliderConfig = SliderConfig(
        label="Number of epochs",
        info=(
            "The number of epochs to train the voice model. A higher number can improve"
            " voice model performance but may lead to overtraining."
        ),
        value=500,
        minimum=1,
        maximum=5000,
        step=1,
    )
    batch_size: SliderConfig = SliderConfig(
        label="Batch size",
        info=(
            "The number of samples in each training batch. It is advisable to align"
            " this value with the available VRAM of your GPU."
        ),
        value=16,
        minimum=1,
        maximum=128,
        step=1,
    )
    detect_overtraining: CheckboxConfig = CheckboxConfig(
        label="Detect overtraining",
        info=(
            "Whether to detect overtraining to prevent the voice model from learning"
            " the training data too well and losing the ability to generalize to new"
            " data."
        ),
        value=True,
        exclude_value=True,
    )
    overtraining_threshold: SliderConfig = SliderConfig(
        label="Overtraining threshold",
        info=(
            "The maximum number of epochs to continue training without any observed"
            " improvement in voice model performance."
        ),
        value=500,
        minimum=1,
        maximum=1000,
        visible=False,
    )
    vocoder: DropdownConfig = DropdownConfig(
        label="Vocoder",
        info=(
            "The vocoder to use for audio synthesis during training. HiFi-GAN provides"
            " basic audio fidelity, while RefineGAN provides the highest audio"
            " fidelity."
        ),
        value=Vocoder.HIFI_GAN,
        choices=list(Vocoder),
    )
    index_algorithm: DropdownConfig = DropdownConfig(
        label="Index algorithm",
        info=(
            "The method to use for generating an index file for the trained voice"
            " model. `KMeans` is particularly useful for large datasets."
        ),
        value=IndexAlgorithm.AUTO,
        choices=list(IndexAlgorithm),
    )
    pretrained_type: DropdownConfig = DropdownConfig(
        label="Pretrained model type",
        info=(
            "The type of pretrained model to finetune the voice model on. `None` will"
            " train the voice model from scratch, while `Default` will use a pretrained"
            " model tailored to the specific voice model architecture. `Custom` will"
            " use a custom pretrained that you provide."
        ),
        value=PretrainedType.DEFAULT,
        choices=list(PretrainedType),
        exclude_value=True,
    )
    custom_pretrained_model: DropdownConfig = DropdownConfig(
        label="Custom pretrained model",
        info="Select a custom pretrained model to finetune from the dropdown.",
        value=None,
        visible=False,
        render=False,
        exclude_value=True,
    )
    save_interval: SliderConfig = SliderConfig(
        label="Save interval",
        info=(
            "The epoch interval at which to to save voice model weights and"
            " checkpoints. The best model weights are always saved regardless of this"
            " setting."
        ),
        value=10,
        minimum=1,
        maximum=100,
        step=1,
    )
    save_all_checkpoints: CheckboxConfig = CheckboxConfig(
        label="Save all checkpoints",
        info=(
            "Whether to save a unique checkpoint at each save interval. If not enabled,"
            " only the latest checkpoint will be saved at each interval."
        ),
        value=True,
    )
    save_all_weights: CheckboxConfig = CheckboxConfig(
        label="Save all weights",
        info=(
            "Whether to save unique voice model weights at each save interval. If not"
            " enabled, only the best voice model weights will be saved."
        ),
        value=True,
    )
    clear_saved_data: CheckboxConfig = CheckboxConfig(
        label="Clear saved data",
        info=(
            "Whether to delete any existing training data associated with the voice"
            " model before training commences. Enable this setting only if you are"
            " training a new voice model from scratch or restarting training."
        ),
        value=False,
    )
    upload_model: CheckboxConfig = CheckboxConfig(
        label="Upload voice model",
        info=(
            "Whether to automatically upload the trained voice model so that it can be"
            " used for generation tasks within the Ultimate RVC app."
        ),
        value=False,
        exclude_value=True,
    )
    upload_name: TextboxConfig = TextboxConfig(
        label="Upload name",
        info="The name to give the uploaded voice model.",
        value=None,
        visible=False,
        exclude_value=True,
    )
    training_acceleration: DropdownConfig = DropdownConfig.hardware_acceleration()
    training_gpus: DropdownConfig = DropdownConfig.gpu()
    preload_dataset: CheckboxConfig = CheckboxConfig(
        label="Preload dataset",
        info=(
            "Whether to preload all training data into GPU memory. This can improve"
            " training speed but requires a lot of VRAM.<br><br>"
        ),
        value=True,
    )
    reduce_memory_usage: CheckboxConfig = CheckboxConfig(
        label="Reduce memory usage",
        info=(
            "Whether to reduce VRAM usage at the cost of slower training speed by"
            " enabling activation checkpointing. This is useful for GPUs with limited"
            " memory (e.g., <6GB VRAM) or when training with a batch size larger than"
            " what your GPU can normally accommodate."
        ),
        value=False,
    )


class GenerationConfig(BaseTabConfig):
    """
    Common component configuration settings for generation tabs.

    voice_model : DropdownConfig
        Configuration settings for a voice model dropdown component.
    f0_methods : DropdownConfig
        Configuration settings for a pitch extraction algorithms
        dropdown component.
    index_rate : SliderConfig
        Configuration settings for an index rate slider component.
    rms_mix_rate : SliderConfig
        Configuration settings for a RMS mix rate slider component.
    protect_rate : SliderConfig
        Configuration settings for a protect rate slider component.
    split_voice : CheckboxConfig
        Configuration settings for a split voice checkbox component.
    autotune_voice: CheckboxConfig
        Configuration settings for an autotune voice checkbox component.
    autotune_strength: SliderConfig
        Configuration settings for an autotune strength slider
        component.
    sid : NumberConfig
        Configuration settings for a speaker ID number component.
    output_sr : DropdownConfig
        Configuration settings for an output sample rate dropdown
        component.
    output_format : DropdownConfig
        Configuration settings for an output format dropdown
        component.
    output_name : TextboxConfig
        Configuration settings for an output name textbox component.

    See Also
    --------
    BaseTabConfig
        Parent model defining common component configuration settings
        for UI tabs.

    """

    voice_model: DropdownConfig = DropdownConfig(
        label="Voice model",
        info="Select a model to use for voice conversion.",
        value=None,
        render=False,
        exclude_value=True,
    )
    f0_methods: DropdownConfig = DropdownConfig(
        label="Pitch extraction algorithm(s)",
        info=(
            "If more than one method is selected, then the median of the pitch values"
            " extracted by each method is used. RMVPE is recommended for most cases and"
            " is the default when no method is selected."
        ),
        value=[F0Method.RMVPE],
        choices=list(F0Method),
        multiselect=True,
    )
    index_rate: SliderConfig = SliderConfig(
        label="Index rate",
        info=(
            "Increase to bias the conversion towards the accent of the voice model."
            " Decrease to potentially reduce artifacts coming from the voice"
            " model.<br><br><br>"
        ),
        value=0.3,
        minimum=0.0,
        maximum=1.0,
    )
    rms_mix_rate: SliderConfig = SliderConfig(
        label="RMS mix rate",
        info=(
            "How much to mimic the loudness (0) of the input voice or a fixed loudness"
            " (1). A value of 1 is recommended for most cases.<br><br>"
        ),
        value=1.0,
        minimum=0.0,
        maximum=1.0,
    )
    protect_rate: SliderConfig = SliderConfig(
        label="Protect rate",
        info=(
            "Controls the extent to which consonants and breathing sounds are protected"
            " from artifacts. A higher value offers more protection but may worsen the"
            " indexing effect.<br><br>"
        ),
        value=0.33,
        minimum=0.0,
        maximum=0.5,
    )

    hop_length: SliderConfig = SliderConfig.hop_length(
        label="Hop length",
        info=(
            "How often the CREPE-based pitch extraction method checks for pitch changes"
            " measured in milliseconds. Lower values lead to longer conversion times"
            " and a higher risk of voice cracks, but better pitch accuracy."
        ),
        visible=True,
    )

    split_voice: CheckboxConfig = CheckboxConfig(
        label="Split input voice",
        info=(
            "Whether to split the input voice track into smaller segments before"
            " converting it. This can improve output quality for longer voice tracks."
        ),
        value=False,
    )
    autotune_voice: CheckboxConfig = CheckboxConfig(
        label="Autotune converted voice",
        info="Whether to apply autotune to the converted voice.<br><br>",
        value=False,
        exclude_value=True,
    )
    autotune_strength: SliderConfig = SliderConfig(
        label="Autotune intensity",
        info=(
            "Higher values result in stronger snapping to the chromatic grid and"
            " artifacting."
        ),
        value=1.0,
        minimum=0.0,
        maximum=1.0,
        visible=False,
    )
    sid: NumberConfig = NumberConfig(
        label="Speaker ID",
        info="Speaker ID for multi-speaker-models.",
        value=0,
        precision=0,
    )
    output_sr: DropdownConfig = DropdownConfig(
        label="Output sample rate",
        info="The sample rate of the mixed output track.",
        value=SampleRate.HZ_44100,
        choices=list(SampleRate),
    )
    output_format: DropdownConfig = DropdownConfig(
        label="Output format",
        info="The audio format of the mixed output track.",
        value=AudioExt.MP3,
        choices=list(AudioExt),
    )
    output_name: TextboxConfig = TextboxConfig(
        label="Output name",
        info="If no name is provided, a suitable name will be generated automatically.",
        value=None,
        placeholder="Ultimate RVC output",
        exclude_value=True,
    )


class SongGenerationConfig(GenerationConfig):
    """
    Common component configuration settings for song generation tabs.

    Attributes
    ----------
    source_type : DropdownConfig
        Configuration settings for a source type dropdown component.
    source : TextboxConfig
        Configuration settings for an input source textbox component.
    cached_song : DropdownConfig
        Configuration settings for a cached song dropdown component.
    clean_strength : SliderConfig
        Configuration settings for a clean strength slider component.
    clean_voice : CheckboxConfig
        Configuration settings for a clean voice checkbox component.
    room_size : SliderConfig
        Configuration settings for a room size slider component.
    wet_level : SliderConfig
        Configuration settings for a wetness level slider component.
    dry_level : SliderConfig
        Configuration settings for a dryness level slider component.
    damping : SliderConfig
        Configuration settings for a damping level slider component.
    main_gain : SliderConfig
        Configuration settings for a main gain slider component.
    inst_gain : SliderConfig
        Configuration settings for an instrumentals gain slider
        component.
    backup_gain : SliderConfig
        Configuration settings for a backup vocals gain slider
        component.

    See Also
    --------
    GenerationConfig
        Parent model defining common component configuration settings
        for song generation tabs.

    """

    source_type: DropdownConfig = DropdownConfig(
        label="Source type",
        info="The type of source to retrieve a song from.",
        value=SongSourceType.LOCAL_FILE,
        choices=list(SongSourceType),
        type="index",
        exclude_value=True,
    )
    source: TextboxConfig = TextboxConfig(
        label="Source",
        info="Local (to the server) filepath or http link.  Youtube probably wont work but most other sites still do.",
        value=None,
        exclude_value=True,
    )
    cached_song: DropdownConfig = DropdownConfig(
        label="Source",
        info="Select a song from the list of cached songs.",
        value=None,
        visible=False,
        render=False,
        exclude_value=True,
    )
    clean_voice: CheckboxConfig = CheckboxConfig(
        label="Clean converted voice",
        info=(
            "Whether to clean the converted voice using noise reduction"
            " algorithms.<br><br>"
        ),
        value=False,
        exclude_value=True,
    )
    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=False)
    room_size: SliderConfig = SliderConfig(
        label="Room size",
        info=(
            "Size of the room which reverb effect simulates. Increase for longer reverb"
            " time."
        ),
        value=0.15,
        minimum=0.0,
        maximum=1.0,
    )
    wet_level: SliderConfig = SliderConfig(
        label="Wetness level",
        info="Loudness of converted vocals with reverb effect applied.",
        value=0.2,
        minimum=0.0,
        maximum=1.0,
    )
    dry_level: SliderConfig = SliderConfig(
        label="Dryness level",
        info="Loudness of converted vocals without reverb effect applied.",
        value=0.8,
        minimum=0.0,
        maximum=1.0,
    )
    damping: SliderConfig = SliderConfig(
        label="Damping level",
        info="Absorption of high frequencies in reverb effect.",
        value=0.7,
        minimum=0.0,
        maximum=1.0,
    )
    main_gain: SliderConfig = SliderConfig.gain(
        label="Main gain",
        info="The gain to apply to the main vocals.",
    )
    inst_gain: SliderConfig = SliderConfig.gain(
        label="Instrumentals gain",
        info="The gain to apply to the instrumentals.",
    )
    backup_gain: SliderConfig = SliderConfig.gain(
        label="Backup gain",
        info="The gain to apply to the backup vocals.",
    )


class SpeechGenerationConfig(GenerationConfig):
    """
    Common component configuration settings for speech generation tabs.

    Attributes
    ----------
    source_type : DropdownConfig
        Configuration settings for a source type dropdown component.
    source : TextboxConfig
        Configuration settings for an input source textbox component.
    edge_tts_voice : DropdownConfig
        Configuration settings for an Edge TTS voice dropdown
        component.
    n_octaves : SliderConfig
        Configuration settings for an octave pitch shift slider
        component.
    n_semitones : SliderConfig
        Configuration settings for a semitone pitch shift slider
        component.
    tts_pitch_shift : SliderConfig
        Configuration settings for a TTS pitch shift slider
        component.
    tts_speed_change : SliderConfig
        Configuration settings for a TTS speed change slider
        component.
    tts_volume_change : SliderConfig
        Configuration settings for a TTS volume change slider
        component.
    clean_voice : CheckboxConfig
        Configuration settings for a clean voice checkbox
        component.
    clean_strength : SliderConfig
        Configuration settings for a clean strength slider
        component.
    output_gain : GainSliderConfig
        Configuration settings for an output gain slider component.

    See Also
    --------
    GenerationConfig
        Parent model defining common component configuration settings
        for generation tabs.

    """

    source_type: DropdownConfig = DropdownConfig(
        label="Source type",
        info="The type of source to generate speech from.",
        value=SpeechSourceType.TEXT,
        choices=list(SpeechSourceType),
        type="index",
        exclude_value=True,
    )
    source: TextboxConfig = TextboxConfig(
        label="Source",
        info="Text to generate speech from",
        value=None,
        exclude_value=True,
    )
    edge_tts_voice: DropdownConfig = DropdownConfig(
        label="Edge TTS voice",
        info="Select a voice to use for text to speech conversion.",
        value=None,
        render=False,
        exclude_value=True,
    )
    n_octaves: SliderConfig = SliderConfig.octave_shift(
        label="Octave shift",
        info=(
            "The number of octaves to pitch-shift the converted speech by. Use 1 for"
            " male-to-female and -1 for vice-versa."
        ),
    )
    n_semitones: SliderConfig = SliderConfig.semitone_shift(
        label="Semitone shift",
        info="The number of semi-tones to pitch-shift the converted speech by.",
    )
    tts_pitch_shift: SliderConfig = SliderConfig(
        label="Edge TTS pitch shift",
        info=(
            "The number of hertz to shift the pitch of the speech generated by Edge"
            " TTS."
        ),
        value=0,
        minimum=-100,
        maximum=100,
        step=1,
    )
    tts_speed_change: SliderConfig = SliderConfig(
        label="TTS speed change",
        info="The percentual change to the speed of the speech generated by Edge TTS.",
        value=0,
        minimum=-50,
        maximum=100,
        step=1,
    )
    tts_volume_change: SliderConfig = SliderConfig(
        label="TTS volume change",
        info="The percentual change to the volume of the speech generated by Edge TTS.",
        value=0,
        minimum=-100,
        maximum=100,
        step=1,
    )
    clean_voice: CheckboxConfig = CheckboxConfig(
        label="Clean converted voice",
        info=(
            "Whether to clean the converted voice using noise reduction"
            " algorithms.<br><br>"
        ),
        value=True,
        exclude_value=True,
    )
    clean_strength: SliderConfig = SliderConfig.clean_strength(visible=True)
    output_gain: SliderConfig = SliderConfig.gain(
        label="Output gain",
        info="The gain to apply to the converted speech.<br><br>",
    )


total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()


def render_song_cover_multi_step_tab(
    total_config: TotalConfig, cookiefile: str | None = None
) -> None:
    """
    Render "Generate song cover - multi-step generation" tab.

    Parameters
    ----------
    total_config : TotalConfig
        Model containing all component configuration settings for the
        Ultimate RVC web UI.
    cookiefile : str, optional
        The path to a file containing cookies to use when downloading
        audio from Youtube.

    """
    tab_config = total_config.song.multi_step
    for input_track in tab_config.input_audio.all:
        input_track.instantiate()
    with gr.Tab("Multi-step"):
        _render_step_0(total_config, cookiefile=cookiefile)
        _render_step_1(tab_config)
        _render_step_2(tab_config)
        _render_step_3(tab_config)
        _render_step_4(tab_config)
        _render_step_5(total_config, tab_config)


def _render_step_0(total_config: TotalConfig, cookiefile: str | None) -> None:
    tab_config = total_config.song.multi_step

    current_song_dir = gr.State(None)
    with gr.Accordion("Step 0: song retrieval", open=True):
        gr.Markdown("")
        with gr.Row():
            with gr.Column():
                tab_config.source_type.instantiate()
            with gr.Column():
                tab_config.source.instantiate()
                local_file = gr.Audio(
                    label="Source",
                    type="filepath",
                    visible=False,
                    waveform_options=gr.WaveformOptions(show_recording_waveform=True),
                )
                tab_config.cached_song.instance.render()

            tab_config.source_type.instance.input(
                partial(toggle_visible_component, 3),
                inputs=tab_config.source_type.instance,
                outputs=[
                    tab_config.source.instance,
                    local_file,
                    tab_config.cached_song.instance,
                ],
                show_progress="hidden",
            )

            local_file.change(
                update_value,
                inputs=local_file,
                outputs=tab_config.source.instance,
                show_progress="hidden",
            )
            tab_config.cached_song.instance.input(
                update_value,
                inputs=tab_config.cached_song.instance,
                outputs=tab_config.source.instance,
                show_progress="hidden",
            )
        with gr.Accordion("Options", open=False):
            song_transfer = _render_song_transfer(
                [SongTransferOption.STEP_1_AUDIO],
                "Song",
            )
        with gr.Row():
            retrieve_song_reset_btn = gr.Button("Reset options")
            retrieve_song_btn = gr.Button("Retrieve song", variant="primary")
        song_transfer_btn = gr.Button("Transfer song")
        song_output = gr.Audio(
            label="Song",
            type="filepath",
            interactive=False,
            waveform_options=gr.WaveformOptions(show_recording_waveform=True),
        )

        retrieve_song_reset_btn.click(
            lambda: gr.Dropdown(value=[SongTransferOption.STEP_1_AUDIO]),
            outputs=song_transfer,
            show_progress="hidden",
        )

        retrieve_song_btn.click(
            partial(
                exception_harness(
                    retrieve_song,
                    info_msg="Song retrieved successfully!",
                ),
                cookiefile=cookiefile,
            ),
            inputs=tab_config.source.instance,
            outputs=[song_output, current_song_dir],
        ).then(
            partial(
                update_dropdowns,
                get_named_song_dirs,
                len(tab_config.song_dirs.all) + 2,
                value_indices=range(len(tab_config.song_dirs.all)),
            ),
            inputs=current_song_dir,
            outputs=[
                *tab_config.song_dirs.all,
                tab_config.cached_song.instance,
                total_config.song.one_click.cached_song.instance,
            ],
            show_progress="hidden",
        ).then(
            partial(update_dropdowns, get_named_song_dirs, 1, [], [0]),
            outputs=total_config.management.audio.intermediate.instance,
            show_progress="hidden",
        )
        setup_transfer_event(
            song_transfer_btn,
            song_transfer,
            song_output,
            tab_config.input_audio.all,
        )


def _render_step_1(tab_config: MultiStepSongGenerationConfig) -> None:
    with gr.Accordion("Step 1: vocal separation", open=False):
        tab_config.input_audio.audio.instance.render()
        tab_config.song_dirs.separate_audio.instance.render()
        with gr.Accordion("Options", open=False):
            with gr.Row():
                tab_config.separation_model.instantiate()
                tab_config.segment_size.instantiate()
            with gr.Row():
                primary_stem_transfer = _render_song_transfer(
                    [SongTransferOption.STEP_2_VOCALS],
                    "Primary stem",
                )
                secondary_stem_transfer = _render_song_transfer(
                    [SongTransferOption.STEP_4_INSTRUMENTALS],
                    "Secondary stem",
                )
        with gr.Row():
            separate_audio_reset_btn = gr.Button("Reset options")
            separate_vocals_btn = gr.Button("Separate vocals", variant="primary")
        with gr.Row():
            primary_stem_transfer_btn = gr.Button("Transfer primary stem")
            secondary_stem_transfer_btn = gr.Button("Transfer secondary stem")

        with gr.Row():
            primary_stem_output = gr.Audio(
                label="Primary stem",
                type="filepath",
                interactive=False,
                waveform_options=gr.WaveformOptions(show_recording_waveform=True),
            )
            secondary_stem_output = gr.Audio(
                label="Secondary stem",
                type="filepath",
                interactive=False,
                waveform_options=gr.WaveformOptions(show_recording_waveform=True),
            )

        separate_audio_reset_btn.click(
            lambda: [
                tab_config.separation_model.value,
                tab_config.segment_size.value,
                gr.Dropdown(value=[SongTransferOption.STEP_2_VOCALS]),
                gr.Dropdown(value=[SongTransferOption.STEP_4_INSTRUMENTALS]),
            ],
            outputs=[
                tab_config.separation_model.instance,
                tab_config.segment_size.instance,
                primary_stem_transfer,
                secondary_stem_transfer,
            ],
            show_progress="hidden",
        )
        separate_vocals_btn.click(
            exception_harness(
                separate_audio,
                info_msg="Vocals separated successfully!",
            ),
            inputs=[
                tab_config.input_audio.audio.instance,
                tab_config.song_dirs.separate_audio.instance,
                tab_config.separation_model.instance,
                tab_config.segment_size.instance,
            ],
            outputs=[primary_stem_output, secondary_stem_output],
            concurrency_limit=1,
            concurrency_id=ConcurrencyId.GPU,
        )
        for btn, transfer, output in [
            (primary_stem_transfer_btn, primary_stem_transfer, primary_stem_output),
            (
                secondary_stem_transfer_btn,
                secondary_stem_transfer,
                secondary_stem_output,
            ),
        ]:
            setup_transfer_event(
                btn,
                transfer,
                output,
                tab_config.input_audio.all,
            )


def _render_step_2(tab_config: MultiStepSongGenerationConfig) -> None:
    with gr.Accordion("Step 2: vocal conversion", open=False):
        tab_config.input_audio.vocals.instance.render()
        tab_config.voice_model.instance.render()
        tab_config.song_dirs.convert_vocals.instance.render()
        with gr.Accordion("Options", open=False):
            with gr.Row():
                tab_config.n_octaves.instantiate()
                tab_config.n_semitones.instantiate()

            converted_vocals_transfer = _render_song_transfer(
                [SongTransferOption.STEP_3_VOCALS],
                "Converted vocals",
            )
            with gr.Accordion("Advanced", open=False):
                with gr.Accordion("Voice synthesis", open=False):
                    with gr.Row():
                        tab_config.f0_methods.instantiate()
                        tab_config.index_rate.instantiate()
                    with gr.Row():
                        tab_config.rms_mix_rate.instantiate()
                        tab_config.protect_rate.instantiate()
                        tab_config.hop_length.instantiate()
                with gr.Accordion("Vocal enrichment", open=False), gr.Row():
                    with gr.Column():
                        tab_config.split_voice.instantiate()
                    with gr.Column():
                        tab_config.autotune_voice.instantiate()
                        tab_config.autotune_strength.instantiate()
                    with gr.Column():
                        tab_config.clean_voice.instantiate()
                        tab_config.clean_strength.instantiate()
                tab_config.autotune_voice.instance.change(
                    partial(toggle_visibility, targets={True}),
                    inputs=tab_config.autotune_voice.instance,
                    outputs=tab_config.autotune_strength.instance,
                    show_progress="hidden",
                )
                tab_config.clean_voice.instance.change(
                    partial(toggle_visibility, targets={True}),
                    inputs=tab_config.clean_voice.instance,
                    outputs=tab_config.clean_strength.instance,
                    show_progress="hidden",
                )
                with gr.Accordion("Speaker embeddings", open=False), gr.Row():
                    with gr.Column():
                        tab_config.embedder_model.instantiate()
                        tab_config.custom_embedder_model.instance.render()
                    tab_config.sid.instantiate()
                tab_config.embedder_model.instance.change(
                    partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
                    inputs=tab_config.embedder_model.instance,
                    outputs=tab_config.custom_embedder_model.instance,
                    show_progress="hidden",
                )
        with gr.Row():
            convert_vocals_reset_btn = gr.Button("Reset options")
            convert_vocals_btn = gr.Button("Convert vocals", variant="primary")
        converted_vocals_transfer_btn = gr.Button("Transfer converted vocals")
        converted_vocals_track_output = gr.Audio(
            label="Converted vocals",
            type="filepath",
            interactive=False,
            waveform_options=gr.WaveformOptions(show_recording_waveform=True),
        )

        convert_vocals_reset_btn.click(
            lambda: [
                tab_config.n_octaves.value,
                tab_config.n_semitones.value,
                tab_config.f0_methods.value,
                tab_config.index_rate.value,
                tab_config.rms_mix_rate.value,
                tab_config.protect_rate.value,
                tab_config.hop_length.value,
                tab_config.split_voice.value,
                tab_config.autotune_voice.value,
                tab_config.autotune_strength.value,
                tab_config.clean_voice.value,
                tab_config.clean_strength.value,
                tab_config.embedder_model.value,
                tab_config.sid.value,
                gr.Dropdown(value=[SongTransferOption.STEP_3_VOCALS]),
            ],
            outputs=[
                tab_config.n_octaves.instance,
                tab_config.n_semitones.instance,
                tab_config.f0_methods.instance,
                tab_config.index_rate.instance,
                tab_config.rms_mix_rate.instance,
                tab_config.protect_rate.instance,
                tab_config.hop_length.instance,
                tab_config.split_voice.instance,
                tab_config.autotune_voice.instance,
                tab_config.autotune_strength.instance,
                tab_config.clean_voice.instance,
                tab_config.clean_strength.instance,
                tab_config.embedder_model.instance,
                tab_config.sid.instance,
                converted_vocals_transfer,
            ],
            show_progress="hidden",
        )
        convert_vocals_btn.click(
            partial(
                exception_harness(convert, info_msg="Vocals converted successfully!"),
                content_type=RVCContentType.VOCALS,
            ),
            inputs=[
                tab_config.input_audio.vocals.instance,
                tab_config.song_dirs.convert_vocals.instance,
                tab_config.voice_model.instance,
                tab_config.n_octaves.instance,
                tab_config.n_semitones.instance,
                tab_config.f0_methods.instance,
                tab_config.index_rate.instance,
                tab_config.rms_mix_rate.instance,
                tab_config.protect_rate.instance,
                tab_config.hop_length.instance,
                tab_config.split_voice.instance,
                tab_config.autotune_voice.instance,
                tab_config.autotune_strength.instance,
                tab_config.clean_voice.instance,
                tab_config.clean_strength.instance,
                tab_config.embedder_model.instance,
                tab_config.custom_embedder_model.instance,
                tab_config.sid.instance,
            ],
            outputs=converted_vocals_track_output,
            concurrency_id=ConcurrencyId.GPU,
            concurrency_limit=1,
        )
        setup_transfer_event(
            converted_vocals_transfer_btn,
            converted_vocals_transfer,
            converted_vocals_track_output,
            tab_config.input_audio.all,
        )


def _render_step_3(tab_config: MultiStepSongGenerationConfig) -> None:
    with gr.Accordion("Step 3: vocal post-processing", open=False):
        tab_config.input_audio.converted_vocals.instance.render()
        tab_config.song_dirs.postprocess_vocals.instance.render()
        with gr.Accordion("Options", open=False):
            tab_config.room_size.instantiate()
            with gr.Row():
                tab_config.wet_level.instantiate()
                tab_config.dry_level.instantiate()
                tab_config.damping.instantiate()
            effected_vocals_transfer = _render_song_transfer(
                [SongTransferOption.STEP_5_MAIN_VOCALS],
                "Effected vocals",
            )
        with gr.Row():
            postprocess_vocals_reset_btn = gr.Button("Reset options")
            postprocess_vocals_btn = gr.Button("Post-process vocals", variant="primary")
        effected_vocals_transfer_btn = gr.Button("Transfer effected vocals")

        effected_vocals_track_output = gr.Audio(
            label="Effected vocals",
            type="filepath",
            interactive=False,
            waveform_options=gr.WaveformOptions(show_recording_waveform=True),
        )

        postprocess_vocals_reset_btn.click(
            lambda: [
                tab_config.room_size.value,
                tab_config.wet_level.value,
                tab_config.dry_level.value,
                tab_config.damping.value,
                gr.Dropdown(value=[SongTransferOption.STEP_5_MAIN_VOCALS]),
            ],
            outputs=[
                tab_config.room_size.instance,
                tab_config.wet_level.instance,
                tab_config.dry_level.instance,
                tab_config.damping.instance,
                effected_vocals_transfer,
            ],
            show_progress="hidden",
        )
        postprocess_vocals_btn.click(
            exception_harness(
                postprocess,
                info_msg="Vocals post-processed successfully!",
            ),
            inputs=[
                tab_config.input_audio.converted_vocals.instance,
                tab_config.song_dirs.postprocess_vocals.instance,
                tab_config.room_size.instance,
                tab_config.wet_level.instance,
                tab_config.dry_level.instance,
                tab_config.damping.instance,
            ],
            outputs=effected_vocals_track_output,
        )
        setup_transfer_event(
            effected_vocals_transfer_btn,
            effected_vocals_transfer,
            effected_vocals_track_output,
            tab_config.input_audio.all,
        )


def _render_step_4(tab_config: MultiStepSongGenerationConfig) -> None:
    with gr.Accordion("Step 4: pitch shift of background audio", open=False):
        with gr.Row():
            tab_config.input_audio.instrumentals.instance.render()
            tab_config.input_audio.backup_vocals.instance.render()
        with gr.Row():
            tab_config.n_semitones_instrumentals.instantiate()
            tab_config.n_semitones_backup_vocals.instantiate()
        tab_config.song_dirs.pitch_shift_background.instance.render()
        with gr.Accordion("Options", open=False), gr.Row():
            shifted_instrumentals_transfer = _render_song_transfer(
                [SongTransferOption.STEP_5_INSTRUMENTALS],
                "Pitch-shifted instrumentals",
            )
            shifted_backup_vocals_transfer = _render_song_transfer(
                [SongTransferOption.STEP_5_BACKUP_VOCALS],
                "Pitch-shifted backup vocals",
            )
        with gr.Row():
            pitch_shift_instrumentals_btn = gr.Button(
                "Pitch shift instrumentals",
                variant="primary",
            )
            pitch_shift_backup_vocals_btn = gr.Button(
                "Pitch shift backup vocals",
                variant="primary",
            )
        with gr.Row():
            shifted_instrumentals_transfer_btn = gr.Button(
                "Transfer shifted instrumentals",
            )
            shifted_backup_vocals_transfer_btn = gr.Button(
                "Transfer shifted backup vocals",
            )
        pitch_shift_background_reset_btn = gr.Button("Reset options")
        with gr.Row():
            shifted_instrumentals_track_output = gr.Audio(
                label="Pitch-shifted instrumentals",
                type="filepath",
                interactive=False,
                waveform_options=gr.WaveformOptions(show_recording_waveform=True),
            )
            shifted_backup_vocals_track_output = gr.Audio(
                label="Pitch-shifted backup vocals",
                type="filepath",
                interactive=False,
                waveform_options=gr.WaveformOptions(show_recording_waveform=True),
            )

        pitch_shift_background_reset_btn.click(
            lambda: [
                tab_config.n_semitones_instrumentals.value,
                tab_config.n_semitones_backup_vocals.value,
                gr.Dropdown(value=[SongTransferOption.STEP_5_INSTRUMENTALS]),
                gr.Dropdown(value=[SongTransferOption.STEP_5_BACKUP_VOCALS]),
            ],
            outputs=[
                tab_config.n_semitones_instrumentals.instance,
                tab_config.n_semitones_backup_vocals.instance,
                shifted_instrumentals_transfer,
                shifted_backup_vocals_transfer,
            ],
            show_progress="hidden",
        )
        pitch_shift_instrumentals_btn.click(
            exception_harness(
                pitch_shift,
                info_msg="Instrumentals pitch-shifted successfully!",
            ),
            inputs=[
                tab_config.input_audio.instrumentals.instance,
                tab_config.song_dirs.pitch_shift_background.instance,
                tab_config.n_semitones_instrumentals.instance,
            ],
            outputs=shifted_instrumentals_track_output,
        )
        pitch_shift_backup_vocals_btn.click(
            exception_harness(
                pitch_shift,
                info_msg="Backup vocals pitch-shifted successfully!",
            ),
            inputs=[
                tab_config.input_audio.backup_vocals.instance,
                tab_config.song_dirs.pitch_shift_background.instance,
                tab_config.n_semitones_backup_vocals.instance,
            ],
            outputs=shifted_backup_vocals_track_output,
        )
        for btn, transfer, output in [
            (
                shifted_instrumentals_transfer_btn,
                shifted_instrumentals_transfer,
                shifted_instrumentals_track_output,
            ),
            (
                shifted_backup_vocals_transfer_btn,
                shifted_backup_vocals_transfer,
                shifted_backup_vocals_track_output,
            ),
        ]:
            setup_transfer_event(
                btn,
                transfer,
                output,
                tab_config.input_audio.all,
            )


def _render_step_5(
    total_config: TotalConfig,
    tab_config: MultiStepSongGenerationConfig,
) -> None:
    with gr.Accordion("Step 5: song mixing", open=False):
        with gr.Row():
            tab_config.input_audio.main_vocals.instance.render()
            tab_config.input_audio.shifted_instrumentals.instance.render()
            tab_config.input_audio.shifted_backup_vocals.instance.render()
        tab_config.song_dirs.mix.instance.render()
        with gr.Accordion("Options", open=False):
            with gr.Row():
                tab_config.main_gain.instantiate()
                tab_config.inst_gain.instantiate()
                tab_config.backup_gain.instantiate()
            with gr.Row():
                tab_config.output_name.instantiate(
                    value=partial(
                        update_output_name,
                        get_song_cover_name,
                        False,  # noqa: FBT003,
                    ),
                    inputs=[
                        tab_config.input_audio.main_vocals.instance,
                        tab_config.song_dirs.mix.instance,
                    ],
                )
                tab_config.output_sr.instantiate()
                tab_config.output_format.instantiate()
            song_cover_transfer = _render_song_transfer([], "Song cover")
        with gr.Row():
            mix_reset_btn = gr.Button("Reset options")
            mix_btn = gr.Button("Mix song cover", variant="primary")
        song_cover_transfer_btn = gr.Button("Transfer song cover")
        song_cover_output = gr.Audio(
            label="Song cover",
            type="filepath",
            interactive=False,
            waveform_options=gr.WaveformOptions(show_recording_waveform=True),
        )
        mix_reset_btn.click(
            lambda: [
                tab_config.main_gain.value,
                tab_config.inst_gain.value,
                tab_config.backup_gain.value,
                tab_config.output_sr.value,
                tab_config.output_format.value,
                gr.Dropdown(value=[]),
            ],
            outputs=[
                tab_config.main_gain.instance,
                tab_config.inst_gain.instance,
                tab_config.backup_gain.instance,
                tab_config.output_sr.instance,
                tab_config.output_format.instance,
                song_cover_transfer,
            ],
            show_progress="hidden",
        )
        temp_audio_gains = gr.State()
        mix_btn.click(
            partial(
                _pair_audio_tracks_and_gain,
                [
                    tab_config.input_audio.main_vocals.instance,
                    tab_config.input_audio.shifted_instrumentals.instance,
                    tab_config.input_audio.shifted_backup_vocals.instance,
                ],
                [
                    tab_config.main_gain.instance,
                    tab_config.inst_gain.instance,
                    tab_config.backup_gain.instance,
                ],
            ),
            inputs={
                tab_config.input_audio.main_vocals.instance,
                tab_config.input_audio.shifted_instrumentals.instance,
                tab_config.input_audio.shifted_backup_vocals.instance,
                tab_config.main_gain.instance,
                tab_config.inst_gain.instance,
                tab_config.backup_gain.instance,
            },
            outputs=temp_audio_gains,
        ).then(
            exception_harness(mix_song, info_msg="Song cover succesfully generated."),
            inputs=[
                temp_audio_gains,
                tab_config.song_dirs.mix.instance,
                tab_config.output_sr.instance,
                tab_config.output_format.instance,
                tab_config.output_name.instance,
            ],
            outputs=song_cover_output,
        ).then(
            partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
            outputs=total_config.management.audio.output.instance,
            show_progress="hidden",
        )
        setup_transfer_event(
            song_cover_transfer_btn,
            song_cover_transfer,
            song_cover_output,
            tab_config.input_audio.all,
        )


def _render_song_transfer(
    value: list[SongTransferOption],
    label_prefix: str,
) -> gr.Dropdown:
    return render_transfer_component(value, label_prefix, SongTransferOption)


def _pair_audio_tracks_and_gain(
    audio_components: Sequence[gr.Audio],
    gain_components: Sequence[gr.Slider],
    data: dict[gr.Audio | gr.Slider, Any],
) -> list[tuple[str, int]]:
    """
    Pair audio tracks and gain levels stored in separate gradio
    components.

    This function is meant to first be partially applied to the sequence
    of audio components and the sequence of slider components containing
    the values that should be combined. The resulting function can then
    be called by an event listener whose inputs is a set containing
    those audio and slider components. The `data` parameter in that case
    will contain a mapping from each of those components to the value
    that the component stores.

    Parameters
    ----------
    audio_components : Sequence[gr.Audio]
        Audio components to pair with gain levels.
    gain_components : Sequence[gr.Slider]
        Gain level components to pair with audio tracks.
    data : dict[gr.Audio | gr.Slider, Any]
        Data from the audio and gain components.

    Returns
    -------
    list[tuple[str, int]]
        Paired audio tracks and gain levels.

    Raises
    ------
    ValueError
        If the number of audio tracks and gain levels are not the same.

    """
    audio_tracks = [data[component] for component in audio_components]
    gain_levels = [data[component] for component in gain_components]
    if len(audio_tracks) != len(gain_levels):
        err_msg = "Number of audio tracks and gain levels must be the same."
        raise ValueError(err_msg)
    return [
        (audio_track, gain_level)
        for audio_track, gain_level in zip(audio_tracks, gain_levels, strict=True)
        if audio_track
    ]


def run_newpipeline(
    source: str,
    model_name: str,
    n_octaves: int = 0,
    n_semitones: int = 0,
    f0_methods: Sequence[F0Method] | None = None,
    index_rate: float = 0.3,
    rms_mix_rate: float = 1.0,
    protect_rate: float = 0.33,
    hop_length: int = 128,
    split_vocals: bool = False,
    autotune_vocals: bool = False,
    autotune_strength: float = 1.0,
    clean_vocals: bool = False,
    clean_strength: float = 0.7,
    embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
    custom_embedder_model: str | None = None,
    sid: int = 0,
    room_size: float = 0.15,
    wet_level: float = 0.2,
    dry_level: float = 0.8,
    damping: float = 0.7,
    main_gain: int = 0,
    inst_gain: int = 0,
    backup_gain: int = 0,
    output_sr: int = 44100,
    output_format: AudioExt = AudioExt.MP3,
    output_name: str | None = None,
    cookiefile: StrPath | None = None,
    progress_bar: gr.Progress | None = None,
) -> tuple[Path, ...]:
    """
    Run the song cover generation pipeline.

    Parameters
    ----------
    source : str
        A Youtube URL, the path to a local audio file or the path to a
        song directory.
    model_name : str
        The name of the voice model to use for vocal conversion.
    n_octaves : int, default=0
        The number of octaves to pitch-shift the converted vocals by.
    n_semitones : int, default=0
        The number of semi-tones to pitch-shift the converted vocals,
        instrumentals, and backup vocals by.
    f0_methods : Sequence[F0Method], optional
        The methods to use for pitch extraction during vocal
        conversion. If None, the method used is rmvpe.
    index_rate : float, default=0.3
        The influence of the index file on the vocal conversion.
    rms_mix_rate : float, default=1.0
        The blending rate of the volume envelope of the converted
        vocals.
    protect_rate : float, default=0.33
        The protect rate for consonants and breathing sounds during
        vocal conversion.
    hop_length : int, default=128
        The hop length to use for crepe-based pitch detection.
    split_vocals : bool, default=False
        Whether to perform audio splitting before converting the main
        vocals.
    autotune_vocals : bool, default=False
        Whether to apply autotune to the converted vocals.
    autotune_strength : float, default=1.0
        The strength of the autotune to apply to the converted vocals.
    clean_vocals : bool, default=False
        Whether to clean the converted vocals.
    clean_strength : float, default=0.7
        The intensity of the cleaning to apply to the converted vocals.
    embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
        The model to use for generating speaker embeddings during vocal
        conversion.
    custom_embedder_model : StrPath, optional
        The name of a custom embedder model to use for generating
        speaker embeddings during vocal conversion.
    sid : int, default=0
        The speaker id to use for multi-speaker models during vocal
        conversion.
    room_size : float, default=0.15
        The room size of the reverb effect to apply to the converted
        vocals.
    wet_level : float, default=0.2
        The wetness level of the reverb effect to apply to the converted
        vocals.
    dry_level : float, default=0.8
        The dryness level of the reverb effect to apply to the converted
        vocals.
    damping : float, default=0.7
        The damping of the reverb effect to apply to the converted
        vocals.
    main_gain : int, default=0
        The gain to apply to the post-processed vocals.
    inst_gain : int, default=0
        The gain to apply to the pitch-shifted instrumentals.
    backup_gain : int, default=0
        The gain to apply to the pitch-shifted backup vocals.
    output_sr : int, default=44100
        The sample rate of the song cover.
    output_format : AudioExt, default=AudioExt.MP3
        The audio format of the song cover.
    output_name : str, optional
        The name of the song cover.
    cookiefile : StrPath, optional
        The path to a file containing cookies to use when downloading
        audio from Youtube.
    progress_bar : gr.Progress, optional
        Gradio progress bar to update.

    Returns
    -------
    tuple[Path,...]
        The path to the generated song cover and the paths to any
        intermediate audio files that were generated.

    """
    validate_model(model_name, Entity.VOICE_MODEL)
    if embedder_model == EmbedderModel.CUSTOM:
        validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
    display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
    song, song_dir = retrieve_song(source, cookiefile=cookiefile)
    display_progress(
        "[~] newpipeline: Separating vocals from instrumentals...", 1 / 9, progress_bar
    )
    vocals_track, instrumentals_track = separate_audio(
        song,
        song_dir,
        SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
        SegmentSize.SEG_2048,
    )
    display_progress(
        "[~] newpipeline: Separating main vocals from backup vocals...",
        2 / 9,
        progress_bar,
    )
    backup_vocals_track, main_vocals_track = separate_audio(
        vocals_track,
        song_dir,
        SeparationModel.UVR_MDX_NET_KARA_2,
        SegmentSize.SEG_2048,
    )
    display_progress("[~] newpipeline: De-noising vocals...", 3 / 9, progress_bar)
    noise_track, clean_track = separate_audio(
        clean_track,
        song_dir,
        SeparationModel.UVR_DeNoise,
        SegmentSize.SEG_2048,
    )
    display_progress("[~] newpipeline: De-reverbing vocals...", 4 / 9, progress_bar)
    reverb_track, vocals_dereverb_track = separate_audio(
        main_vocals_track,
        song_dir,
        SeparationModel.UVR_DeEcho_DeReverb,
        SegmentSize.SEG_2048,
    )

    display_progress("[~] newpipeline: Converting vocals...", 5 / 9, progress_bar)
    converted_vocals_track = convert(
        audio_track=vocals_dereverb_track,
        directory=song_dir,
        model_name=model_name,
        n_octaves=n_octaves,
        n_semitones=n_semitones,
        f0_methods=f0_methods,
        index_rate=index_rate,
        rms_mix_rate=rms_mix_rate,
        protect_rate=protect_rate,
        hop_length=hop_length,
        split_audio=split_vocals,
        autotune_audio=autotune_vocals,
        autotune_strength=autotune_strength,
        clean_audio=clean_vocals,
        clean_strength=clean_strength,
        embedder_model=embedder_model,
        custom_embedder_model=custom_embedder_model,
        sid=sid,
        content_type=RVCContentType.VOCALS,
    )
    display_progress("[~] newpipeline: Post-processing vocals...", 6 / 9, progress_bar)
    effected_vocals_track = postprocess(
        converted_vocals_track,
        song_dir,
        room_size,
        wet_level,
        dry_level,
        damping,
    )
    display_progress(
        "[~] newpipeline: Pitch-shifting instrumentals...", 7 / 9, progress_bar
    )
    shifted_instrumentals_track = pitch_shift(
        instrumentals_track,
        song_dir,
        n_semitones,
    )
    display_progress(
        "[~] newpipeline: Pitch-shifting backup vocals...", 8 / 9, progress_bar
    )
    shifted_backup_vocals_track = pitch_shift(
        backup_vocals_track,
        song_dir,
        n_semitones,
    )

    song_cover = mix_song(
        [
            (effected_vocals_track, main_gain),
            (shifted_instrumentals_track, inst_gain),
            (shifted_backup_vocals_track, backup_gain),
        ],
        song_dir,
        output_sr,
        output_format,
        output_name,
    )
    return (
        song_cover,
        song,
        vocals_track,
        instrumentals_track,
        main_vocals_track,
        backup_vocals_track,
        vocals_dereverb_track,
        reverb_track,
        converted_vocals_track,
        effected_vocals_track,
        shifted_instrumentals_track,
        shifted_backup_vocals_track,
    )


def render_app() -> gr.Blocks:
    """
    Render the Ultimate RVC web application.

    Returns
    -------
    gr.Blocks
        The rendered web application.

    """
    css = """
    h1 { text-align: center; margin-top: 20px; margin-bottom: 20px; }

    #generate-tab-button { font-weight: bold !important;}
    #manage-tab-button { font-weight: bold !important;}
    #audio-tab-button { font-weight: bold !important;}
    #settings-tab-button { font-weight: bold !important;}
    """
    cache_delete_frequency = 86400  # every 24 hours check for files to delete
    cache_delete_cutoff = 86400  # and delete files older than 24 hours

    with gr.Blocks(
        title="Redzone-6 Audio Playground",
        theme=gr.Theme.load(str(Path(__file__).parent / "config/theme.json")),
        css=css,
        delete_cache=(cache_delete_frequency, cache_delete_cutoff),
    ) as app:
        for component_config in [
            total_config.song.one_click.voice_model,
            total_config.song.one_click.cached_song,
            total_config.song.one_click.custom_embedder_model,
            total_config.song.multi_step.voice_model,
            total_config.song.multi_step.cached_song,
            total_config.song.multi_step.custom_embedder_model,
            total_config.song.multi_step.song_dirs.separate_audio,
            total_config.song.multi_step.song_dirs.convert_vocals,
            total_config.song.multi_step.song_dirs.postprocess_vocals,
            total_config.song.multi_step.song_dirs.pitch_shift_background,
            total_config.song.multi_step.song_dirs.mix,
            total_config.speech.one_click.edge_tts_voice,
            total_config.speech.one_click.voice_model,
            total_config.speech.one_click.custom_embedder_model,
            total_config.speech.multi_step.edge_tts_voice,
            total_config.speech.multi_step.voice_model,
            total_config.speech.multi_step.custom_embedder_model,
            total_config.training.multi_step.dataset,
            total_config.training.multi_step.preprocess_model,
            total_config.training.multi_step.extract_model,
            total_config.training.multi_step.train_model,
            total_config.training.multi_step.custom_embedder_model,
            total_config.training.multi_step.custom_pretrained_model,
            total_config.management.audio.intermediate,
            total_config.management.audio.speech,
            total_config.management.audio.output,
            total_config.management.audio.dataset,
            total_config.management.model.voices,
            total_config.management.model.embedders,
            total_config.management.model.pretraineds,
            total_config.management.model.traineds,
            total_config.management.settings.load_config_name,
            total_config.management.settings.delete_config_names,
        ]:
            component_config.instantiate()
        # main tab
        # with gr.Tab("Generate", elem_id="generate-tab"):
        with gr.Tab("Music", elem_id="generate-tab"):
            render_song_cover_one_click_tab(total_config, cookiefile)
            render_song_cover_multi_step_tab(total_config, cookiefile)
        with gr.Tab("Speech", elem_id="generate-tab"):
            render_speech_one_click_tab(total_config)
            render_speech_multi_step_tab(total_config)
        with gr.Tab("Configuration", elem_id="settings-tab"):
            with gr.Tab("Models"):
                render_models_tab(total_config)
            with gr.Tab("Settings"):
                render_settings_tab(total_config)
                render_audio_tab(total_config)

        app.load(
            _init_dropdowns,
            outputs=[
                total_config.speech.one_click.edge_tts_voice.instance,
                total_config.speech.multi_step.edge_tts_voice.instance,
                total_config.song.one_click.voice_model.instance,
                total_config.song.multi_step.voice_model.instance,
                total_config.speech.one_click.voice_model.instance,
                total_config.speech.multi_step.voice_model.instance,
                total_config.management.model.voices.instance,
                total_config.song.one_click.custom_embedder_model.instance,
                total_config.song.multi_step.custom_embedder_model.instance,
                total_config.speech.one_click.custom_embedder_model.instance,
                total_config.speech.multi_step.custom_embedder_model.instance,
                total_config.training.multi_step.custom_embedder_model.instance,
                total_config.management.model.embedders.instance,
                total_config.training.multi_step.custom_pretrained_model.instance,
                total_config.management.model.pretraineds.instance,
                total_config.training.multi_step.extract_model.instance,
                total_config.training.multi_step.train_model.instance,
                total_config.training.multi_step.preprocess_model.instance,
                total_config.management.model.traineds.instance,
                total_config.song.one_click.cached_song.instance,
                total_config.song.multi_step.cached_song.instance,
                total_config.song.multi_step.song_dirs.separate_audio.instance,
                total_config.song.multi_step.song_dirs.convert_vocals.instance,
                total_config.song.multi_step.song_dirs.postprocess_vocals.instance,
                total_config.song.multi_step.song_dirs.pitch_shift_background.instance,
                total_config.song.multi_step.song_dirs.mix.instance,
                total_config.management.audio.intermediate.instance,
                total_config.training.multi_step.dataset.instance,
                total_config.management.audio.speech.instance,
                total_config.management.audio.output.instance,
                total_config.management.audio.dataset.instance,
                total_config.management.settings.load_config_name.instance,
                total_config.management.settings.delete_config_names.instance,
            ],
            show_progress="hidden",
        )
    return app


def _init_dropdowns() -> list[gr.Dropdown]:
    """
    Initialize the Ultimate RVC web application by updating the choices
    and default values of non-static dropdown components.

    Returns
    -------
    tuple[gr.Dropdown, ...]
        A tuple of gr.Dropdown components with updated choices and
        default values.

    """
    # Initialize model dropdowns
    edge_tts_models = initialize_dropdowns(
        get_edge_tts_voice_names,
        2,
        "en-US-ChristopherNeural",
        range(2),
    )
    voice_models = initialize_dropdowns(
        get_voice_model_names,
        5,
        value_indices=range(4),
    )
    custom_embedder_models = initialize_dropdowns(
        get_custom_embedder_model_names,
        6,
        value_indices=range(5),
    )
    custom_pretrained_models = initialize_dropdowns(
        get_custom_pretrained_model_names,
        2,
        value_indices=range(1),
    )
    training_models = initialize_dropdowns(
        get_training_model_names,
        4,
        value_indices=range(2),
    )
    song_dirs = initialize_dropdowns(
        get_named_song_dirs,
        8,
        value_indices=range(7),
    )
    dataset = gr.Dropdown(get_audio_datasets())
    speech_delete = gr.Dropdown(get_saved_speech_audio())
    output_delete = gr.Dropdown(get_saved_output_audio())
    dataset_delete = gr.Dropdown(get_named_audio_datasets())
    configs = initialize_dropdowns(get_config_names, 2, value_indices=range(1))
    return [
        *edge_tts_models,
        *voice_models,
        *custom_embedder_models,
        *custom_pretrained_models,
        *training_models,
        *song_dirs,
        dataset,
        speech_delete,
        output_delete,
        dataset_delete,
        *configs,
    ]


def render_song_cover_one_click_tab(
    total_config: TotalConfig, cookiefile: str | None = None
) -> None:
    """
    Render "Generate song covers - One-click generation" tab.

    Parameters
    ----------
    total_config : TotalConfig
        Model containing all component configuration settings for the
        Ultimate RVC web UI.
    cookiefile : str, optional
        The path to a file containing cookies to use when downloading
        audio from Youtube.

    """
    with gr.Tab("One-click"):
        tab_config = total_config.song.one_click
        _render_input(tab_config)
        with gr.Accordion("Options", open=False):
            _render_main_options(tab_config)
            _render_conversion_options(tab_config)
            _render_mixing_options(tab_config)
            _render_output_options(tab_config)
            _render_intermediate_audio(tab_config)

        with gr.Row(equal_height=True):
            reset_btn = gr.Button(value="Reset options", scale=2)
            generate_btn = gr.Button("Generate", scale=2, variant="primary")
        song_cover = gr.Audio(
            label="Song cover",
            scale=3,
            waveform_options=gr.WaveformOptions(show_recording_waveform=False),
        )
        song_dirs = total_config.song.multi_step.song_dirs.all
        generate_btn.click(
            partial(
                exception_harness(
                    run_pipeline,
                    info_msg="Song cover generated successfully!",
                ),
                cookiefile=cookiefile,
                progress_bar=PROGRESS_BAR,
            ),
            inputs=[
                tab_config.source.instance,
                tab_config.voice_model.instance,
                tab_config.n_octaves.instance,
                tab_config.n_semitones.instance,
                tab_config.f0_methods.instance,
                tab_config.index_rate.instance,
                tab_config.rms_mix_rate.instance,
                tab_config.protect_rate.instance,
                tab_config.hop_length.instance,
                tab_config.split_voice.instance,
                tab_config.autotune_voice.instance,
                tab_config.autotune_strength.instance,
                tab_config.clean_voice.instance,
                tab_config.clean_strength.instance,
                tab_config.embedder_model.instance,
                tab_config.custom_embedder_model.instance,
                tab_config.sid.instance,
                tab_config.room_size.instance,
                tab_config.wet_level.instance,
                tab_config.dry_level.instance,
                tab_config.damping.instance,
                tab_config.main_gain.instance,
                tab_config.inst_gain.instance,
                tab_config.backup_gain.instance,
                tab_config.output_sr.instance,
                tab_config.output_format.instance,
                tab_config.output_name.instance,
            ],
            outputs=[song_cover, *tab_config.intermediate_audio.all],
            concurrency_limit=4,
            concurrency_id=ConcurrencyId.GPU,
        ).success(
            partial(update_dropdowns, get_named_song_dirs, 3 + len(song_dirs), [], [2]),
            outputs=[
                total_config.song.one_click.cached_song.instance,
                total_config.song.multi_step.cached_song.instance,
                total_config.management.audio.intermediate.instance,
                *song_dirs,
            ],
            show_progress="hidden",
        ).then(
            partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
            outputs=total_config.management.audio.output.instance,
            show_progress="hidden",
        )
        reset_btn.click(
            lambda: [
                tab_config.n_octaves.value,
                tab_config.n_semitones.value,
                tab_config.f0_methods.value,
                tab_config.index_rate.value,
                tab_config.rms_mix_rate.value,
                tab_config.protect_rate.value,
                tab_config.hop_length.value,
                tab_config.split_voice.value,
                tab_config.autotune_voice.value,
                tab_config.autotune_strength.value,
                tab_config.clean_voice.value,
                tab_config.clean_strength.value,
                tab_config.embedder_model.value,
                tab_config.sid.value,
                tab_config.room_size.value,
                tab_config.wet_level.value,
                tab_config.dry_level.value,
                tab_config.damping.value,
                tab_config.main_gain.value,
                tab_config.inst_gain.value,
                tab_config.backup_gain.value,
                tab_config.output_sr.value,
                tab_config.output_format.value,
                tab_config.show_intermediate_audio.value,
            ],
            outputs=[
                tab_config.n_octaves.instance,
                tab_config.n_semitones.instance,
                tab_config.f0_methods.instance,
                tab_config.index_rate.instance,
                tab_config.rms_mix_rate.instance,
                tab_config.protect_rate.instance,
                tab_config.hop_length.instance,
                tab_config.split_voice.instance,
                tab_config.autotune_voice.instance,
                tab_config.autotune_strength.instance,
                tab_config.clean_voice.instance,
                tab_config.clean_strength.instance,
                tab_config.embedder_model.instance,
                tab_config.sid.instance,
                tab_config.room_size.instance,
                tab_config.wet_level.instance,
                tab_config.dry_level.instance,
                tab_config.damping.instance,
                tab_config.main_gain.instance,
                tab_config.inst_gain.instance,
                tab_config.backup_gain.instance,
                tab_config.output_sr.instance,
                tab_config.output_format.instance,
                tab_config.show_intermediate_audio.instance,
            ],
            show_progress="hidden",
        )


def _render_input(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Row():
        with gr.Column():
            tab_config.source_type.instantiate()
        with gr.Column():
            tab_config.source.instantiate()
            local_file = gr.Audio(
                label="Source",
                type="filepath",
                visible=False,
                waveform_options=gr.WaveformOptions(show_recording_waveform=False),
            )
            tab_config.cached_song.instance.render()
        tab_config.source_type.instance.input(
            partial(toggle_visible_component, 3),
            inputs=tab_config.source_type.instance,
            outputs=[
                tab_config.source.instance,
                local_file,
                tab_config.cached_song.instance,
            ],
            show_progress="hidden",
        )

        local_file.change(
            update_value,
            inputs=local_file,
            outputs=tab_config.source.instance,
            show_progress="hidden",
        )
        tab_config.cached_song.instance.input(
            update_value,
            inputs=tab_config.cached_song.instance,
            outputs=tab_config.source.instance,
            show_progress="hidden",
        )

    with gr.Row():
        tab_config.voice_model.instance.render()


def _render_main_options(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Row():
        tab_config.n_octaves.instantiate()
        tab_config.n_semitones.instantiate()


def _render_conversion_options(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Accordion("Vocal conversion", open=True):
        gr.Markdown("")
        with gr.Accordion("Voice synthesis", open=True):
            with gr.Row():
                tab_config.f0_methods.instantiate()
                tab_config.index_rate.instantiate()
            with gr.Row():
                tab_config.rms_mix_rate.instantiate()
                tab_config.protect_rate.instantiate()
                tab_config.hop_length.instantiate()
        with gr.Accordion("Vocal enrichment", open=True):
            with gr.Row():
                with gr.Column():
                    tab_config.split_voice.instantiate()
                with gr.Column():
                    tab_config.autotune_voice.instantiate()
                    tab_config.autotune_strength.instantiate()
                with gr.Column():
                    tab_config.clean_voice.instantiate()
                    tab_config.clean_strength.instantiate()
            tab_config.autotune_voice.instance.change(
                partial(toggle_visibility, targets={True}),
                inputs=tab_config.autotune_voice.instance,
                outputs=tab_config.autotune_strength.instance,
                show_progress="hidden",
            )
            tab_config.clean_voice.instance.change(
                partial(toggle_visibility, targets={True}),
                inputs=tab_config.clean_voice.instance,
                outputs=tab_config.clean_strength.instance,
                show_progress="hidden",
            )
        with gr.Accordion("Speaker embedding", open=True):
            with gr.Row():
                with gr.Column():
                    tab_config.embedder_model.instantiate()
                    tab_config.custom_embedder_model.instance.render()
                tab_config.sid.instantiate()
            tab_config.embedder_model.instance.change(
                partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
                inputs=tab_config.embedder_model.instance,
                outputs=tab_config.custom_embedder_model.instance,
                show_progress="hidden",
            )


def _render_mixing_options(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Accordion("Audio mixing", open=True):
        gr.Markdown("")
        with gr.Accordion("Reverb control on converted vocals", open=True):
            with gr.Row():
                tab_config.room_size.instantiate()
            with gr.Row():
                tab_config.wet_level.instantiate()
                tab_config.dry_level.instantiate()
                tab_config.damping.instantiate()

        with gr.Accordion("Volume controls (dB)", open=True), gr.Row():
            tab_config.main_gain.instantiate()
            tab_config.inst_gain.instantiate()
            tab_config.backup_gain.instantiate()


def _render_output_options(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Accordion("Audio output", open=True):
        with gr.Row():
            tab_config.output_name.instantiate(
                value=partial(
                    update_output_name,
                    get_song_cover_name,
                    True,  # noqa: FBT003
                ),
                inputs=[
                    gr.State(None),
                    tab_config.cached_song.instance,
                    tab_config.voice_model.instance,
                ],
            )
            tab_config.output_sr.instantiate()
            tab_config.output_format.instantiate()
        with gr.Row():
            tab_config.show_intermediate_audio.instantiate()


def _render_intermediate_audio(tab_config: OneClickSongGenerationConfig) -> None:
    with gr.Accordion(
        "Intermediate audio tracks",
        open=False,
        visible=False,
    ) as intermediate_audio_accordion:
        with gr.Accordion(
            "Step 0: song retrieval",
            open=False,
        ) as song_retrieval_accordion:
            tab_config.intermediate_audio.song.instantiate()
        with (
            gr.Accordion(
                "Step 1a: vocals/instrumentals separation",
                open=False,
            ) as vocals_separation_accordion,
            gr.Row(),
        ):
            tab_config.intermediate_audio.vocals.instantiate()
            tab_config.intermediate_audio.instrumentals.instantiate()
        with (
            gr.Accordion(
                "Step 1b: main vocals/ backup vocals separation",
                open=False,
            ) as main_vocals_separation_accordion,
            gr.Row(),
        ):
            tab_config.intermediate_audio.main_vocals.instantiate()
            tab_config.intermediate_audio.backup_vocals.instantiate()
        with (
            gr.Accordion(
                "Step 1c: main vocals cleanup",
                open=False,
            ) as vocal_cleanup_accordion,
            gr.Row(),
        ):
            tab_config.intermediate_audio.main_vocals_dereverbed.instantiate()
            tab_config.intermediate_audio.main_vocals_reverb.instantiate()
        with gr.Accordion(
            "Step 2: conversion of main vocals",
            open=False,
        ) as vocal_conversion_accordion:
            tab_config.intermediate_audio.converted_vocals.instantiate()
        with gr.Accordion(
            "Step 3: post-processing of converted vocals",
            open=False,
        ) as vocals_postprocessing_accordion:
            tab_config.intermediate_audio.postprocessed_vocals.instantiate()
        with (
            gr.Accordion(
                "Step 4: pitch shift of background tracks",
                open=False,
            ) as pitch_shift_accordion,
            gr.Row(),
        ):
            tab_config.intermediate_audio.instrumentals_shifted.instantiate()
            tab_config.intermediate_audio.backup_vocals_shifted.instantiate()

    tab_config.show_intermediate_audio.instance.change(
        partial(toggle_intermediate_audio, num_components=7),
        inputs=tab_config.show_intermediate_audio.instance,
        outputs=[
            intermediate_audio_accordion,
            song_retrieval_accordion,
            vocals_separation_accordion,
            main_vocals_separation_accordion,
            vocal_cleanup_accordion,
            vocal_conversion_accordion,
            vocals_postprocessing_accordion,
            pitch_shift_accordion,
        ],
        show_progress="hidden",
    )


app = render_app()
app_wrapper = typer.Typer()


@app_wrapper.command()
def start_app(
    share: Annotated[
        bool,
        typer.Option("--share", "-s", help="Enable sharing"),
    ] = False,
    listen: Annotated[
        bool,
        typer.Option(
            "--listen",
            "-l",
            help="Make the web application reachable from your local network.",
        ),
    ] = False,
    listen_host: Annotated[
        str | None,
        typer.Option(
            "--listen-host",
            "-h",
            help="The hostname that the server will use.",
        ),
    ] = "0.0.0.0",
    listen_port: Annotated[
        int | None,
        typer.Option(
            "--listen-port",
            "-p",
            help="The listening port that the server will use.",
        ),
    ] = None,
    ssr_mode: Annotated[
        bool,
        typer.Option(
            "--ssr-mode",
            help="Enable server-side rendering mode.",
        ),
    ] = False,
) -> None:
    """Run the Ultimate RVC web application."""
    os.environ["GRADIO_TEMP_DIR"] = str(TEMP_DIR)
    gr.set_static_paths([MODELS_DIR, AUDIO_DIR])
    #    app.queue()
    app.launch(
        server_name=listen_host,
        server_port=listen_port,
        ssr_mode=ssr_mode,
    )


load_config("default", TotalConfig)
if __name__ == "__main__":
    app_wrapper()