wubby

Running

App Files Files

lainlives commited on Dec 10, 2025

Commit

92a1819

1 Parent(s): a9240b9

~

Browse files

Files changed (1) hide show

app.py +618 -30

app.py CHANGED Viewed

@@ -34,8 +34,6 @@ from ultimate_rvc.web.config.tab import (SongGenerationConfig,
                                          TrainingConfig)
 from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
     render as render_song_cover_multi_step_tab
-from ultimate_rvc.web.tabs.generate.song_cover.one_click_generation import \
-    render as render_song_cover_one_click_tab
 from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
     render as render_speech_multi_step_tab
 from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
@@ -46,7 +44,44 @@ from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
 if TYPE_CHECKING:
     import gradio as gr
 type StrPath = str | PathLike[str]
@@ -306,32 +341,6 @@ class SeparationModel(StrEnum):
     UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
     REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
     UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
-    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    Kim_Vocal_1 = "Kim_Vocal_1.onnx"
-    Kim_Vocal_2 = "Kim_Vocal_2.onnx"
-    Kim_Inst = "Kim_Inst.onnx"
-    UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
-    kuielab_a_vocals = "kuielab_a_vocals.onnx"
-    kuielab_b_vocals = "kuielab_b_vocals.onnx"
-    kuielab_a_drums = "kuielab_a_drums.onnx"
-    kuielab_b_drums = "kuielab_b_drums.onnx"
-    kuielab_a_bass = "kuielab_a_bass.onnx"
-    kuielab_b_bass = "kuielab_b_bass.onnx"
-    kuielab_a_other = "kuielab_a_other.onnx"
-    kuielab_b_other = "kuielab_b_other.onnx"
-    MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
-    UVR_DeNoise = "UVR-DeNoise.pth"
-    UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
-class SeparationModel2(StrEnum):
-    """Enumeration of audio separation models."""
-    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
-    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
-    UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
-    UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
     Kim_Vocal_1 = "Kim_Vocal_1.onnx"
     Kim_Vocal_2 = "Kim_Vocal_2.onnx"
     Kim_Inst = "Kim_Inst.onnx"
@@ -662,8 +671,8 @@ class MultiStepSongGenerationConfig(SongGenerationConfig):
     separation_model: DropdownConfig = DropdownConfig(
         label="Separation model",
         info="The model to use for audio separation.",
-        value=SeparationModel.UVR_MDX_NET_VOC_FT,
-        choices=list(SeparationModel2),
     )
     segment_size: RadioConfig = RadioConfig(
         label="Segment size",
@@ -1915,6 +1924,231 @@ class SpeechGenerationConfig(GenerationConfig):
 total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
 def render_app() -> gr.Blocks:
     """
     Render the Ultimate RVC web application.
@@ -2099,6 +2333,360 @@ def _init_dropdowns() -> list[gr.Dropdown]:
     ]
 app = render_app()
 app_wrapper = typer.Typer()

                                          TrainingConfig)
 from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
     render as render_song_cover_multi_step_tab
 from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
     render as render_speech_multi_step_tab
 from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
 if TYPE_CHECKING:
     import gradio as gr
+    from typing import TYPE_CHECKING
+from functools import partial
+import gradio as gr
+from ultimate_rvc.core.common import (INTERMEDIATE_AUDIO_BASE_DIR,
+                                      OUTPUT_AUDIO_DIR, copy_file_safe,
+                                      display_progress, get_file_hash,
+                                      json_dump, json_load, validate_model,
+                                      validate_url)
+from ultimate_rvc.core.exceptions import (Entity, InvalidLocationError,
+                                          Location, NotFoundError,
+                                          NotProvidedError, UIMessage,
+                                          YoutubeUrlError)
+from ultimate_rvc.core.generate.common import (convert, get_unique_base_path,
+                                               mix_audio,
+                                               validate_audio_dir_exists,
+                                               validate_audio_file_exists,
+                                               wavify)
+from ultimate_rvc.core.generate.song_cover import (get_named_song_dirs,
+                                                   get_song_cover_name,
+                                                   run_pipeline)
+from ultimate_rvc.core.generate.typing_extra import (EffectedVocalsMetaData,
+                                                     FileMetaData,
+                                                     MixedAudioType,
+                                                     PitchShiftMetaData,
+                                                     RVCAudioMetaData,
+                                                     SeparatedAudioMetaData,
+                                                     SongSourceType)
+from ultimate_rvc.core.manage.audio import get_saved_output_audio
+from ultimate_rvc.typing_extra import EmbedderModel
+from ultimate_rvc.web.common import (PROGRESS_BAR, exception_harness,
+                                     toggle_intermediate_audio,
+                                     toggle_visibility,
+                                     toggle_visible_component,
+                                     update_dropdowns, update_output_name,
+                                     update_value)
+from ultimate_rvc.web.typing_extra import ConcurrencyId
 type StrPath = str | PathLike[str]
     UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
     REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
     UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
     Kim_Vocal_1 = "Kim_Vocal_1.onnx"
     Kim_Vocal_2 = "Kim_Vocal_2.onnx"
     Kim_Inst = "Kim_Inst.onnx"
     separation_model: DropdownConfig = DropdownConfig(
         label="Separation model",
         info="The model to use for audio separation.",
+        value=SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
+        choices=list(SeparationModel),
     )
     segment_size: RadioConfig = RadioConfig(
         label="Segment size",
 total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
+def run_newpipeline(
+    source: str,
+    model_name: str,
+    n_octaves: int = 0,
+    n_semitones: int = 0,
+    f0_methods: Sequence[F0Method] | None = None,
+    index_rate: float = 0.3,
+    rms_mix_rate: float = 1.0,
+    protect_rate: float = 0.33,
+    hop_length: int = 128,
+    split_vocals: bool = False,
+    autotune_vocals: bool = False,
+    autotune_strength: float = 1.0,
+    clean_vocals: bool = False,
+    clean_strength: float = 0.7,
+    embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
+    custom_embedder_model: str | None = None,
+    sid: int = 0,
+    room_size: float = 0.15,
+    wet_level: float = 0.2,
+    dry_level: float = 0.8,
+    damping: float = 0.7,
+    main_gain: int = 0,
+    inst_gain: int = 0,
+    backup_gain: int = 0,
+    output_sr: int = 44100,
+    output_format: AudioExt = AudioExt.MP3,
+    output_name: str | None = None,
+    cookiefile: StrPath | None = None,
+    progress_bar: gr.Progress | None = None,
+) -> tuple[Path, ...]:
+    """
+    Run the song cover generation pipeline.
+    Parameters
+    ----------
+    source : str
+        A Youtube URL, the path to a local audio file or the path to a
+        song directory.
+    model_name : str
+        The name of the voice model to use for vocal conversion.
+    n_octaves : int, default=0
+        The number of octaves to pitch-shift the converted vocals by.
+    n_semitones : int, default=0
+        The number of semi-tones to pitch-shift the converted vocals,
+        instrumentals, and backup vocals by.
+    f0_methods : Sequence[F0Method], optional
+        The methods to use for pitch extraction during vocal
+        conversion. If None, the method used is rmvpe.
+    index_rate : float, default=0.3
+        The influence of the index file on the vocal conversion.
+    rms_mix_rate : float, default=1.0
+        The blending rate of the volume envelope of the converted
+        vocals.
+    protect_rate : float, default=0.33
+        The protect rate for consonants and breathing sounds during
+        vocal conversion.
+    hop_length : int, default=128
+        The hop length to use for crepe-based pitch detection.
+    split_vocals : bool, default=False
+        Whether to perform audio splitting before converting the main
+        vocals.
+    autotune_vocals : bool, default=False
+        Whether to apply autotune to the converted vocals.
+    autotune_strength : float, default=1.0
+        The strength of the autotune to apply to the converted vocals.
+    clean_vocals : bool, default=False
+        Whether to clean the converted vocals.
+    clean_strength : float, default=0.7
+        The intensity of the cleaning to apply to the converted vocals.
+    embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
+        The model to use for generating speaker embeddings during vocal
+        conversion.
+    custom_embedder_model : StrPath, optional
+        The name of a custom embedder model to use for generating
+        speaker embeddings during vocal conversion.
+    sid : int, default=0
+        The speaker id to use for multi-speaker models during vocal
+        conversion.
+    room_size : float, default=0.15
+        The room size of the reverb effect to apply to the converted
+        vocals.
+    wet_level : float, default=0.2
+        The wetness level of the reverb effect to apply to the converted
+        vocals.
+    dry_level : float, default=0.8
+        The dryness level of the reverb effect to apply to the converted
+        vocals.
+    damping : float, default=0.7
+        The damping of the reverb effect to apply to the converted
+        vocals.
+    main_gain : int, default=0
+        The gain to apply to the post-processed vocals.
+    inst_gain : int, default=0
+        The gain to apply to the pitch-shifted instrumentals.
+    backup_gain : int, default=0
+        The gain to apply to the pitch-shifted backup vocals.
+    output_sr : int, default=44100
+        The sample rate of the song cover.
+    output_format : AudioExt, default=AudioExt.MP3
+        The audio format of the song cover.
+    output_name : str, optional
+        The name of the song cover.
+    cookiefile : StrPath, optional
+        The path to a file containing cookies to use when downloading
+        audio from Youtube.
+    progress_bar : gr.Progress, optional
+        Gradio progress bar to update.
+    Returns
+    -------
+    tuple[Path,...]
+        The path to the generated song cover and the paths to any
+        intermediate audio files that were generated.
+    """
+    validate_model(model_name, Entity.VOICE_MODEL)
+    if embedder_model == EmbedderModel.CUSTOM:
+        validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
+    display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
+    song, song_dir = retrieve_song(source, cookiefile=cookiefile)
+    display_progress("[~] Separating vocals from instrumentals...", 1 / 9, progress_bar)
+    vocals_track, instrumentals_track = separate_audio(
+        song,
+        song_dir,
+        SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
+        SegmentSize.SEG_2048,
+    )
+    display_progress(
+        "[~] Separating main vocals from backup vocals...",
+        2 / 9,
+        progress_bar,
+    )
+    backup_vocals_track, main_vocals_track = separate_audio(
+        vocals_track,
+        song_dir,
+        SeparationModel.UVR_MDX_NET_KARA_2,
+        SegmentSize.SEG_2048,
+    )
+    display_progress("[~] De-noising vocals...", 3 / 9, progress_bar)
+    noise_track, clean_track = separate_audio(
+        clean_track,
+        song_dir,
+        SeparationModel.UVR_DeNoise,
+        SegmentSize.SEG_2048,
+    )
+    display_progress("[~] De-reverbing vocals...", 4 / 9, progress_bar)
+    reverb_track, vocals_dereverb_track = separate_audio(
+        main_vocals_track,
+        song_dir,
+        SeparationModel.UVR_DeEcho_DeReverb,
+        SegmentSize.SEG_2048,
+    )
+    display_progress("[~] Converting vocals...", 5 / 9, progress_bar)
+    converted_vocals_track = convert(
+        audio_track=vocals_dereverb_track,
+        directory=song_dir,
+        model_name=model_name,
+        n_octaves=n_octaves,
+        n_semitones=n_semitones,
+        f0_methods=f0_methods,
+        index_rate=index_rate,
+        rms_mix_rate=rms_mix_rate,
+        protect_rate=protect_rate,
+        hop_length=hop_length,
+        split_audio=split_vocals,
+        autotune_audio=autotune_vocals,
+        autotune_strength=autotune_strength,
+        clean_audio=clean_vocals,
+        clean_strength=clean_strength,
+        embedder_model=embedder_model,
+        custom_embedder_model=custom_embedder_model,
+        sid=sid,
+        content_type=RVCContentType.VOCALS,
+    )
+    display_progress("[~] Post-processing vocals...", 6 / 9, progress_bar)
+    effected_vocals_track = postprocess(
+        converted_vocals_track,
+        song_dir,
+        room_size,
+        wet_level,
+        dry_level,
+        damping,
+    )
+    display_progress("[~] Pitch-shifting instrumentals...", 7 / 9, progress_bar)
+    shifted_instrumentals_track = pitch_shift(
+        instrumentals_track,
+        song_dir,
+        n_semitones,
+    )
+    display_progress("[~] Pitch-shifting backup vocals...", 8 / 9, progress_bar)
+    shifted_backup_vocals_track = pitch_shift(
+        backup_vocals_track,
+        song_dir,
+        n_semitones,
+    )
+    song_cover = mix_song(
+        [
+            (effected_vocals_track, main_gain),
+            (shifted_instrumentals_track, inst_gain),
+            (shifted_backup_vocals_track, backup_gain),
+        ],
+        song_dir,
+        output_sr,
+        output_format,
+        output_name,
+    )
+    return (
+        song_cover,
+        song,
+        vocals_track,
+        instrumentals_track,
+        main_vocals_track,
+        backup_vocals_track,
+        vocals_dereverb_track,
+        reverb_track,
+        converted_vocals_track,
+        effected_vocals_track,
+        shifted_instrumentals_track,
+        shifted_backup_vocals_track,
+    )
 def render_app() -> gr.Blocks:
     """
     Render the Ultimate RVC web application.
     ]
+def render_song_cover_one_click_tab(
+    total_config: TotalConfig, cookiefile: str | None = None
+) -> None:
+    """
+    Render "Generate song covers - One-click generation" tab.
+    Parameters
+    ----------
+    total_config : TotalConfig
+        Model containing all component configuration settings for the
+        Ultimate RVC web UI.
+    cookiefile : str, optional
+        The path to a file containing cookies to use when downloading
+        audio from Youtube.
+    """
+    with gr.Tab("One-click"):
+        tab_config = total_config.song.one_click
+        _render_input(tab_config)
+        with gr.Accordion("Options", open=False):
+            _render_main_options(tab_config)
+            _render_conversion_options(tab_config)
+            _render_mixing_options(tab_config)
+            _render_output_options(tab_config)
+            _render_intermediate_audio(tab_config)
+        with gr.Row(equal_height=True):
+            reset_btn = gr.Button(value="Reset options", scale=2)
+            generate_btn = gr.Button("Generate", scale=2, variant="primary")
+        song_cover = gr.Audio(
+            label="Song cover",
+            scale=3,
+            waveform_options=gr.WaveformOptions(show_recording_waveform=False),
+        )
+        song_dirs = total_config.song.multi_step.song_dirs.all
+        generate_btn.click(
+            partial(
+                exception_harness(
+                    run_pipeline,
+                    info_msg="Song cover generated successfully!",
+                ),
+                cookiefile=cookiefile,
+                progress_bar=PROGRESS_BAR,
+            ),
+            inputs=[
+                tab_config.source.instance,
+                tab_config.voice_model.instance,
+                tab_config.n_octaves.instance,
+                tab_config.n_semitones.instance,
+                tab_config.f0_methods.instance,
+                tab_config.index_rate.instance,
+                tab_config.rms_mix_rate.instance,
+                tab_config.protect_rate.instance,
+                tab_config.hop_length.instance,
+                tab_config.split_voice.instance,
+                tab_config.autotune_voice.instance,
+                tab_config.autotune_strength.instance,
+                tab_config.clean_voice.instance,
+                tab_config.clean_strength.instance,
+                tab_config.embedder_model.instance,
+                tab_config.custom_embedder_model.instance,
+                tab_config.sid.instance,
+                tab_config.room_size.instance,
+                tab_config.wet_level.instance,
+                tab_config.dry_level.instance,
+                tab_config.damping.instance,
+                tab_config.main_gain.instance,
+                tab_config.inst_gain.instance,
+                tab_config.backup_gain.instance,
+                tab_config.output_sr.instance,
+                tab_config.output_format.instance,
+                tab_config.output_name.instance,
+            ],
+            outputs=[song_cover, *tab_config.intermediate_audio.all],
+            concurrency_limit=4,
+            concurrency_id=ConcurrencyId.GPU,
+        ).success(
+            partial(update_dropdowns, get_named_song_dirs, 3 + len(song_dirs), [], [2]),
+            outputs=[
+                total_config.song.one_click.cached_song.instance,
+                total_config.song.multi_step.cached_song.instance,
+                total_config.management.audio.intermediate.instance,
+                *song_dirs,
+            ],
+            show_progress="hidden",
+        ).then(
+            partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
+            outputs=total_config.management.audio.output.instance,
+            show_progress="hidden",
+        )
+        reset_btn.click(
+            lambda: [
+                tab_config.n_octaves.value,
+                tab_config.n_semitones.value,
+                tab_config.f0_methods.value,
+                tab_config.index_rate.value,
+                tab_config.rms_mix_rate.value,
+                tab_config.protect_rate.value,
+                tab_config.hop_length.value,
+                tab_config.split_voice.value,
+                tab_config.autotune_voice.value,
+                tab_config.autotune_strength.value,
+                tab_config.clean_voice.value,
+                tab_config.clean_strength.value,
+                tab_config.embedder_model.value,
+                tab_config.sid.value,
+                tab_config.room_size.value,
+                tab_config.wet_level.value,
+                tab_config.dry_level.value,
+                tab_config.damping.value,
+                tab_config.main_gain.value,
+                tab_config.inst_gain.value,
+                tab_config.backup_gain.value,
+                tab_config.output_sr.value,
+                tab_config.output_format.value,
+                tab_config.show_intermediate_audio.value,
+            ],
+            outputs=[
+                tab_config.n_octaves.instance,
+                tab_config.n_semitones.instance,
+                tab_config.f0_methods.instance,
+                tab_config.index_rate.instance,
+                tab_config.rms_mix_rate.instance,
+                tab_config.protect_rate.instance,
+                tab_config.hop_length.instance,
+                tab_config.split_voice.instance,
+                tab_config.autotune_voice.instance,
+                tab_config.autotune_strength.instance,
+                tab_config.clean_voice.instance,
+                tab_config.clean_strength.instance,
+                tab_config.embedder_model.instance,
+                tab_config.sid.instance,
+                tab_config.room_size.instance,
+                tab_config.wet_level.instance,
+                tab_config.dry_level.instance,
+                tab_config.damping.instance,
+                tab_config.main_gain.instance,
+                tab_config.inst_gain.instance,
+                tab_config.backup_gain.instance,
+                tab_config.output_sr.instance,
+                tab_config.output_format.instance,
+                tab_config.show_intermediate_audio.instance,
+            ],
+            show_progress="hidden",
+        )
+def _render_input(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Row():
+        with gr.Column():
+            tab_config.source_type.instantiate()
+        with gr.Column():
+            tab_config.source.instantiate()
+            local_file = gr.Audio(
+                label="Source",
+                type="filepath",
+                visible=False,
+                waveform_options=gr.WaveformOptions(show_recording_waveform=False),
+            )
+            tab_config.cached_song.instance.render()
+        tab_config.source_type.instance.input(
+            partial(toggle_visible_component, 3),
+            inputs=tab_config.source_type.instance,
+            outputs=[
+                tab_config.source.instance,
+                local_file,
+                tab_config.cached_song.instance,
+            ],
+            show_progress="hidden",
+        )
+        local_file.change(
+            update_value,
+            inputs=local_file,
+            outputs=tab_config.source.instance,
+            show_progress="hidden",
+        )
+        tab_config.cached_song.instance.input(
+            update_value,
+            inputs=tab_config.cached_song.instance,
+            outputs=tab_config.source.instance,
+            show_progress="hidden",
+        )
+    with gr.Row():
+        tab_config.voice_model.instance.render()
+def _render_main_options(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Row():
+        tab_config.n_octaves.instantiate()
+        tab_config.n_semitones.instantiate()
+def _render_conversion_options(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Accordion("Vocal conversion", open=True):
+        gr.Markdown("")
+        with gr.Accordion("Voice synthesis", open=True):
+            with gr.Row():
+                tab_config.f0_methods.instantiate()
+                tab_config.index_rate.instantiate()
+            with gr.Row():
+                tab_config.rms_mix_rate.instantiate()
+                tab_config.protect_rate.instantiate()
+                tab_config.hop_length.instantiate()
+        with gr.Accordion("Vocal enrichment", open=True):
+            with gr.Row():
+                with gr.Column():
+                    tab_config.split_voice.instantiate()
+                with gr.Column():
+                    tab_config.autotune_voice.instantiate()
+                    tab_config.autotune_strength.instantiate()
+                with gr.Column():
+                    tab_config.clean_voice.instantiate()
+                    tab_config.clean_strength.instantiate()
+            tab_config.autotune_voice.instance.change(
+                partial(toggle_visibility, targets={True}),
+                inputs=tab_config.autotune_voice.instance,
+                outputs=tab_config.autotune_strength.instance,
+                show_progress="hidden",
+            )
+            tab_config.clean_voice.instance.change(
+                partial(toggle_visibility, targets={True}),
+                inputs=tab_config.clean_voice.instance,
+                outputs=tab_config.clean_strength.instance,
+                show_progress="hidden",
+            )
+        with gr.Accordion("Speaker embedding", open=True):
+            with gr.Row():
+                with gr.Column():
+                    tab_config.embedder_model.instantiate()
+                    tab_config.custom_embedder_model.instance.render()
+                tab_config.sid.instantiate()
+            tab_config.embedder_model.instance.change(
+                partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
+                inputs=tab_config.embedder_model.instance,
+                outputs=tab_config.custom_embedder_model.instance,
+                show_progress="hidden",
+            )
+def _render_mixing_options(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Accordion("Audio mixing", open=True):
+        gr.Markdown("")
+        with gr.Accordion("Reverb control on converted vocals", open=True):
+            with gr.Row():
+                tab_config.room_size.instantiate()
+            with gr.Row():
+                tab_config.wet_level.instantiate()
+                tab_config.dry_level.instantiate()
+                tab_config.damping.instantiate()
+        with gr.Accordion("Volume controls (dB)", open=True), gr.Row():
+            tab_config.main_gain.instantiate()
+            tab_config.inst_gain.instantiate()
+            tab_config.backup_gain.instantiate()
+def _render_output_options(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Accordion("Audio output", open=True):
+        with gr.Row():
+            tab_config.output_name.instantiate(
+                value=partial(
+                    update_output_name,
+                    get_song_cover_name,
+                    True,  # noqa: FBT003
+                ),
+                inputs=[
+                    gr.State(None),
+                    tab_config.cached_song.instance,
+                    tab_config.voice_model.instance,
+                ],
+            )
+            tab_config.output_sr.instantiate()
+            tab_config.output_format.instantiate()
+        with gr.Row():
+            tab_config.show_intermediate_audio.instantiate()
+def _render_intermediate_audio(tab_config: OneClickSongGenerationConfig) -> None:
+    with gr.Accordion(
+        "Intermediate audio tracks",
+        open=False,
+        visible=False,
+    ) as intermediate_audio_accordion:
+        with gr.Accordion(
+            "Step 0: song retrieval",
+            open=False,
+        ) as song_retrieval_accordion:
+            tab_config.intermediate_audio.song.instantiate()
+        with (
+            gr.Accordion(
+                "Step 1a: vocals/instrumentals separation",
+                open=False,
+            ) as vocals_separation_accordion,
+            gr.Row(),
+        ):
+            tab_config.intermediate_audio.vocals.instantiate()
+            tab_config.intermediate_audio.instrumentals.instantiate()
+        with (
+            gr.Accordion(
+                "Step 1b: main vocals/ backup vocals separation",
+                open=False,
+            ) as main_vocals_separation_accordion,
+            gr.Row(),
+        ):
+            tab_config.intermediate_audio.main_vocals.instantiate()
+            tab_config.intermediate_audio.backup_vocals.instantiate()
+        with (
+            gr.Accordion(
+                "Step 1c: main vocals cleanup",
+                open=False,
+            ) as vocal_cleanup_accordion,
+            gr.Row(),
+        ):
+            tab_config.intermediate_audio.main_vocals_dereverbed.instantiate()
+            tab_config.intermediate_audio.main_vocals_reverb.instantiate()
+        with gr.Accordion(
+            "Step 2: conversion of main vocals",
+            open=False,
+        ) as vocal_conversion_accordion:
+            tab_config.intermediate_audio.converted_vocals.instantiate()
+        with gr.Accordion(
+            "Step 3: post-processing of converted vocals",
+            open=False,
+        ) as vocals_postprocessing_accordion:
+            tab_config.intermediate_audio.postprocessed_vocals.instantiate()
+        with (
+            gr.Accordion(
+                "Step 4: pitch shift of background tracks",
+                open=False,
+            ) as pitch_shift_accordion,
+            gr.Row(),
+        ):
+            tab_config.intermediate_audio.instrumentals_shifted.instantiate()
+            tab_config.intermediate_audio.backup_vocals_shifted.instantiate()
+    tab_config.show_intermediate_audio.instance.change(
+        partial(toggle_intermediate_audio, num_components=7),
+        inputs=tab_config.show_intermediate_audio.instance,
+        outputs=[
+            intermediate_audio_accordion,
+            song_retrieval_accordion,
+            vocals_separation_accordion,
+            main_vocals_separation_accordion,
+            vocal_cleanup_accordion,
+            vocal_conversion_accordion,
+            vocals_postprocessing_accordion,
+            pitch_shift_accordion,
+        ],
+        show_progress="hidden",
+    )
 app = render_app()
 app_wrapper = typer.Typer()