lainlives commited on
Commit
92a1819
·
1 Parent(s): a9240b9
Files changed (1) hide show
  1. app.py +618 -30
app.py CHANGED
@@ -34,8 +34,6 @@ from ultimate_rvc.web.config.tab import (SongGenerationConfig,
34
  TrainingConfig)
35
  from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
36
  render as render_song_cover_multi_step_tab
37
- from ultimate_rvc.web.tabs.generate.song_cover.one_click_generation import \
38
- render as render_song_cover_one_click_tab
39
  from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
40
  render as render_speech_multi_step_tab
41
  from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
@@ -46,7 +44,44 @@ from ultimate_rvc.web.tabs.manage.settings import render as render_settings_tab
46
 
47
  if TYPE_CHECKING:
48
  import gradio as gr
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  type StrPath = str | PathLike[str]
52
 
@@ -306,32 +341,6 @@ class SeparationModel(StrEnum):
306
  UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
307
  REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
308
  UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
309
- UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
310
- Kim_Vocal_1 = "Kim_Vocal_1.onnx"
311
- Kim_Vocal_2 = "Kim_Vocal_2.onnx"
312
- Kim_Inst = "Kim_Inst.onnx"
313
- UVR_MDX_NET_Inst_HQ_4 = "UVR-MDX-NET-Inst_HQ_4.onnx"
314
- kuielab_a_vocals = "kuielab_a_vocals.onnx"
315
- kuielab_b_vocals = "kuielab_b_vocals.onnx"
316
- kuielab_a_drums = "kuielab_a_drums.onnx"
317
- kuielab_b_drums = "kuielab_b_drums.onnx"
318
- kuielab_a_bass = "kuielab_a_bass.onnx"
319
- kuielab_b_bass = "kuielab_b_bass.onnx"
320
- kuielab_a_other = "kuielab_a_other.onnx"
321
- kuielab_b_other = "kuielab_b_other.onnx"
322
- MDX23C_8KFFT_InstVoc_HQ_2 = "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"
323
- UVR_DeNoise = "UVR-DeNoise.pth"
324
- UVR_DeEcho_DeReverb = "UVR-DeEcho-DeReverb.pth"
325
-
326
-
327
- class SeparationModel2(StrEnum):
328
- """Enumeration of audio separation models."""
329
-
330
- UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
331
- UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
332
- REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
333
- UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
334
- UVR_MDX_NET_Voc_FT = "UVR-MDX-NET-Voc_FT.onnx"
335
  Kim_Vocal_1 = "Kim_Vocal_1.onnx"
336
  Kim_Vocal_2 = "Kim_Vocal_2.onnx"
337
  Kim_Inst = "Kim_Inst.onnx"
@@ -662,8 +671,8 @@ class MultiStepSongGenerationConfig(SongGenerationConfig):
662
  separation_model: DropdownConfig = DropdownConfig(
663
  label="Separation model",
664
  info="The model to use for audio separation.",
665
- value=SeparationModel.UVR_MDX_NET_VOC_FT,
666
- choices=list(SeparationModel2),
667
  )
668
  segment_size: RadioConfig = RadioConfig(
669
  label="Segment size",
@@ -1915,6 +1924,231 @@ class SpeechGenerationConfig(GenerationConfig):
1915
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
1916
 
1917
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1918
  def render_app() -> gr.Blocks:
1919
  """
1920
  Render the Ultimate RVC web application.
@@ -2099,6 +2333,360 @@ def _init_dropdowns() -> list[gr.Dropdown]:
2099
  ]
2100
 
2101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2102
  app = render_app()
2103
  app_wrapper = typer.Typer()
2104
 
 
34
  TrainingConfig)
35
  from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
36
  render as render_song_cover_multi_step_tab
 
 
37
  from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
38
  render as render_speech_multi_step_tab
39
  from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
 
44
 
45
  if TYPE_CHECKING:
46
  import gradio as gr
47
+ from typing import TYPE_CHECKING
48
 
49
+ from functools import partial
50
+
51
+ import gradio as gr
52
+ from ultimate_rvc.core.common import (INTERMEDIATE_AUDIO_BASE_DIR,
53
+ OUTPUT_AUDIO_DIR, copy_file_safe,
54
+ display_progress, get_file_hash,
55
+ json_dump, json_load, validate_model,
56
+ validate_url)
57
+ from ultimate_rvc.core.exceptions import (Entity, InvalidLocationError,
58
+ Location, NotFoundError,
59
+ NotProvidedError, UIMessage,
60
+ YoutubeUrlError)
61
+ from ultimate_rvc.core.generate.common import (convert, get_unique_base_path,
62
+ mix_audio,
63
+ validate_audio_dir_exists,
64
+ validate_audio_file_exists,
65
+ wavify)
66
+ from ultimate_rvc.core.generate.song_cover import (get_named_song_dirs,
67
+ get_song_cover_name,
68
+ run_pipeline)
69
+ from ultimate_rvc.core.generate.typing_extra import (EffectedVocalsMetaData,
70
+ FileMetaData,
71
+ MixedAudioType,
72
+ PitchShiftMetaData,
73
+ RVCAudioMetaData,
74
+ SeparatedAudioMetaData,
75
+ SongSourceType)
76
+ from ultimate_rvc.core.manage.audio import get_saved_output_audio
77
+ from ultimate_rvc.typing_extra import EmbedderModel
78
+ from ultimate_rvc.web.common import (PROGRESS_BAR, exception_harness,
79
+ toggle_intermediate_audio,
80
+ toggle_visibility,
81
+ toggle_visible_component,
82
+ update_dropdowns, update_output_name,
83
+ update_value)
84
+ from ultimate_rvc.web.typing_extra import ConcurrencyId
85
 
86
  type StrPath = str | PathLike[str]
87
 
 
341
  UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
342
  REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
343
  UVR_MDX_NET_Inst_HQ_3 = "UVR-MDX-NET-Inst_HQ_3.onnx"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  Kim_Vocal_1 = "Kim_Vocal_1.onnx"
345
  Kim_Vocal_2 = "Kim_Vocal_2.onnx"
346
  Kim_Inst = "Kim_Inst.onnx"
 
671
  separation_model: DropdownConfig = DropdownConfig(
672
  label="Separation model",
673
  info="The model to use for audio separation.",
674
+ value=SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
675
+ choices=list(SeparationModel),
676
  )
677
  segment_size: RadioConfig = RadioConfig(
678
  label="Segment size",
 
1924
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
1925
 
1926
 
1927
+ def run_newpipeline(
1928
+ source: str,
1929
+ model_name: str,
1930
+ n_octaves: int = 0,
1931
+ n_semitones: int = 0,
1932
+ f0_methods: Sequence[F0Method] | None = None,
1933
+ index_rate: float = 0.3,
1934
+ rms_mix_rate: float = 1.0,
1935
+ protect_rate: float = 0.33,
1936
+ hop_length: int = 128,
1937
+ split_vocals: bool = False,
1938
+ autotune_vocals: bool = False,
1939
+ autotune_strength: float = 1.0,
1940
+ clean_vocals: bool = False,
1941
+ clean_strength: float = 0.7,
1942
+ embedder_model: EmbedderModel = EmbedderModel.CONTENTVEC,
1943
+ custom_embedder_model: str | None = None,
1944
+ sid: int = 0,
1945
+ room_size: float = 0.15,
1946
+ wet_level: float = 0.2,
1947
+ dry_level: float = 0.8,
1948
+ damping: float = 0.7,
1949
+ main_gain: int = 0,
1950
+ inst_gain: int = 0,
1951
+ backup_gain: int = 0,
1952
+ output_sr: int = 44100,
1953
+ output_format: AudioExt = AudioExt.MP3,
1954
+ output_name: str | None = None,
1955
+ cookiefile: StrPath | None = None,
1956
+ progress_bar: gr.Progress | None = None,
1957
+ ) -> tuple[Path, ...]:
1958
+ """
1959
+ Run the song cover generation pipeline.
1960
+
1961
+ Parameters
1962
+ ----------
1963
+ source : str
1964
+ A Youtube URL, the path to a local audio file or the path to a
1965
+ song directory.
1966
+ model_name : str
1967
+ The name of the voice model to use for vocal conversion.
1968
+ n_octaves : int, default=0
1969
+ The number of octaves to pitch-shift the converted vocals by.
1970
+ n_semitones : int, default=0
1971
+ The number of semi-tones to pitch-shift the converted vocals,
1972
+ instrumentals, and backup vocals by.
1973
+ f0_methods : Sequence[F0Method], optional
1974
+ The methods to use for pitch extraction during vocal
1975
+ conversion. If None, the method used is rmvpe.
1976
+ index_rate : float, default=0.3
1977
+ The influence of the index file on the vocal conversion.
1978
+ rms_mix_rate : float, default=1.0
1979
+ The blending rate of the volume envelope of the converted
1980
+ vocals.
1981
+ protect_rate : float, default=0.33
1982
+ The protect rate for consonants and breathing sounds during
1983
+ vocal conversion.
1984
+ hop_length : int, default=128
1985
+ The hop length to use for crepe-based pitch detection.
1986
+ split_vocals : bool, default=False
1987
+ Whether to perform audio splitting before converting the main
1988
+ vocals.
1989
+ autotune_vocals : bool, default=False
1990
+ Whether to apply autotune to the converted vocals.
1991
+ autotune_strength : float, default=1.0
1992
+ The strength of the autotune to apply to the converted vocals.
1993
+ clean_vocals : bool, default=False
1994
+ Whether to clean the converted vocals.
1995
+ clean_strength : float, default=0.7
1996
+ The intensity of the cleaning to apply to the converted vocals.
1997
+ embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
1998
+ The model to use for generating speaker embeddings during vocal
1999
+ conversion.
2000
+ custom_embedder_model : StrPath, optional
2001
+ The name of a custom embedder model to use for generating
2002
+ speaker embeddings during vocal conversion.
2003
+ sid : int, default=0
2004
+ The speaker id to use for multi-speaker models during vocal
2005
+ conversion.
2006
+ room_size : float, default=0.15
2007
+ The room size of the reverb effect to apply to the converted
2008
+ vocals.
2009
+ wet_level : float, default=0.2
2010
+ The wetness level of the reverb effect to apply to the converted
2011
+ vocals.
2012
+ dry_level : float, default=0.8
2013
+ The dryness level of the reverb effect to apply to the converted
2014
+ vocals.
2015
+ damping : float, default=0.7
2016
+ The damping of the reverb effect to apply to the converted
2017
+ vocals.
2018
+ main_gain : int, default=0
2019
+ The gain to apply to the post-processed vocals.
2020
+ inst_gain : int, default=0
2021
+ The gain to apply to the pitch-shifted instrumentals.
2022
+ backup_gain : int, default=0
2023
+ The gain to apply to the pitch-shifted backup vocals.
2024
+ output_sr : int, default=44100
2025
+ The sample rate of the song cover.
2026
+ output_format : AudioExt, default=AudioExt.MP3
2027
+ The audio format of the song cover.
2028
+ output_name : str, optional
2029
+ The name of the song cover.
2030
+ cookiefile : StrPath, optional
2031
+ The path to a file containing cookies to use when downloading
2032
+ audio from Youtube.
2033
+ progress_bar : gr.Progress, optional
2034
+ Gradio progress bar to update.
2035
+
2036
+ Returns
2037
+ -------
2038
+ tuple[Path,...]
2039
+ The path to the generated song cover and the paths to any
2040
+ intermediate audio files that were generated.
2041
+
2042
+ """
2043
+ validate_model(model_name, Entity.VOICE_MODEL)
2044
+ if embedder_model == EmbedderModel.CUSTOM:
2045
+ validate_model(custom_embedder_model, Entity.CUSTOM_EMBEDDER_MODEL)
2046
+ display_progress("[~] Retrieving song...", 0 / 9, progress_bar)
2047
+ song, song_dir = retrieve_song(source, cookiefile=cookiefile)
2048
+ display_progress("[~] Separating vocals from instrumentals...", 1 / 9, progress_bar)
2049
+ vocals_track, instrumentals_track = separate_audio(
2050
+ song,
2051
+ song_dir,
2052
+ SeparationModel.MDX23C_8KFFT_InstVoc_HQ_2,
2053
+ SegmentSize.SEG_2048,
2054
+ )
2055
+ display_progress(
2056
+ "[~] Separating main vocals from backup vocals...",
2057
+ 2 / 9,
2058
+ progress_bar,
2059
+ )
2060
+ backup_vocals_track, main_vocals_track = separate_audio(
2061
+ vocals_track,
2062
+ song_dir,
2063
+ SeparationModel.UVR_MDX_NET_KARA_2,
2064
+ SegmentSize.SEG_2048,
2065
+ )
2066
+ display_progress("[~] De-noising vocals...", 3 / 9, progress_bar)
2067
+ noise_track, clean_track = separate_audio(
2068
+ clean_track,
2069
+ song_dir,
2070
+ SeparationModel.UVR_DeNoise,
2071
+ SegmentSize.SEG_2048,
2072
+ )
2073
+ display_progress("[~] De-reverbing vocals...", 4 / 9, progress_bar)
2074
+ reverb_track, vocals_dereverb_track = separate_audio(
2075
+ main_vocals_track,
2076
+ song_dir,
2077
+ SeparationModel.UVR_DeEcho_DeReverb,
2078
+ SegmentSize.SEG_2048,
2079
+ )
2080
+
2081
+ display_progress("[~] Converting vocals...", 5 / 9, progress_bar)
2082
+ converted_vocals_track = convert(
2083
+ audio_track=vocals_dereverb_track,
2084
+ directory=song_dir,
2085
+ model_name=model_name,
2086
+ n_octaves=n_octaves,
2087
+ n_semitones=n_semitones,
2088
+ f0_methods=f0_methods,
2089
+ index_rate=index_rate,
2090
+ rms_mix_rate=rms_mix_rate,
2091
+ protect_rate=protect_rate,
2092
+ hop_length=hop_length,
2093
+ split_audio=split_vocals,
2094
+ autotune_audio=autotune_vocals,
2095
+ autotune_strength=autotune_strength,
2096
+ clean_audio=clean_vocals,
2097
+ clean_strength=clean_strength,
2098
+ embedder_model=embedder_model,
2099
+ custom_embedder_model=custom_embedder_model,
2100
+ sid=sid,
2101
+ content_type=RVCContentType.VOCALS,
2102
+ )
2103
+ display_progress("[~] Post-processing vocals...", 6 / 9, progress_bar)
2104
+ effected_vocals_track = postprocess(
2105
+ converted_vocals_track,
2106
+ song_dir,
2107
+ room_size,
2108
+ wet_level,
2109
+ dry_level,
2110
+ damping,
2111
+ )
2112
+ display_progress("[~] Pitch-shifting instrumentals...", 7 / 9, progress_bar)
2113
+ shifted_instrumentals_track = pitch_shift(
2114
+ instrumentals_track,
2115
+ song_dir,
2116
+ n_semitones,
2117
+ )
2118
+ display_progress("[~] Pitch-shifting backup vocals...", 8 / 9, progress_bar)
2119
+ shifted_backup_vocals_track = pitch_shift(
2120
+ backup_vocals_track,
2121
+ song_dir,
2122
+ n_semitones,
2123
+ )
2124
+
2125
+ song_cover = mix_song(
2126
+ [
2127
+ (effected_vocals_track, main_gain),
2128
+ (shifted_instrumentals_track, inst_gain),
2129
+ (shifted_backup_vocals_track, backup_gain),
2130
+ ],
2131
+ song_dir,
2132
+ output_sr,
2133
+ output_format,
2134
+ output_name,
2135
+ )
2136
+ return (
2137
+ song_cover,
2138
+ song,
2139
+ vocals_track,
2140
+ instrumentals_track,
2141
+ main_vocals_track,
2142
+ backup_vocals_track,
2143
+ vocals_dereverb_track,
2144
+ reverb_track,
2145
+ converted_vocals_track,
2146
+ effected_vocals_track,
2147
+ shifted_instrumentals_track,
2148
+ shifted_backup_vocals_track,
2149
+ )
2150
+
2151
+
2152
  def render_app() -> gr.Blocks:
2153
  """
2154
  Render the Ultimate RVC web application.
 
2333
  ]
2334
 
2335
 
2336
+ def render_song_cover_one_click_tab(
2337
+ total_config: TotalConfig, cookiefile: str | None = None
2338
+ ) -> None:
2339
+ """
2340
+ Render "Generate song covers - One-click generation" tab.
2341
+
2342
+ Parameters
2343
+ ----------
2344
+ total_config : TotalConfig
2345
+ Model containing all component configuration settings for the
2346
+ Ultimate RVC web UI.
2347
+ cookiefile : str, optional
2348
+ The path to a file containing cookies to use when downloading
2349
+ audio from Youtube.
2350
+
2351
+ """
2352
+ with gr.Tab("One-click"):
2353
+ tab_config = total_config.song.one_click
2354
+ _render_input(tab_config)
2355
+ with gr.Accordion("Options", open=False):
2356
+ _render_main_options(tab_config)
2357
+ _render_conversion_options(tab_config)
2358
+ _render_mixing_options(tab_config)
2359
+ _render_output_options(tab_config)
2360
+ _render_intermediate_audio(tab_config)
2361
+
2362
+ with gr.Row(equal_height=True):
2363
+ reset_btn = gr.Button(value="Reset options", scale=2)
2364
+ generate_btn = gr.Button("Generate", scale=2, variant="primary")
2365
+ song_cover = gr.Audio(
2366
+ label="Song cover",
2367
+ scale=3,
2368
+ waveform_options=gr.WaveformOptions(show_recording_waveform=False),
2369
+ )
2370
+ song_dirs = total_config.song.multi_step.song_dirs.all
2371
+ generate_btn.click(
2372
+ partial(
2373
+ exception_harness(
2374
+ run_pipeline,
2375
+ info_msg="Song cover generated successfully!",
2376
+ ),
2377
+ cookiefile=cookiefile,
2378
+ progress_bar=PROGRESS_BAR,
2379
+ ),
2380
+ inputs=[
2381
+ tab_config.source.instance,
2382
+ tab_config.voice_model.instance,
2383
+ tab_config.n_octaves.instance,
2384
+ tab_config.n_semitones.instance,
2385
+ tab_config.f0_methods.instance,
2386
+ tab_config.index_rate.instance,
2387
+ tab_config.rms_mix_rate.instance,
2388
+ tab_config.protect_rate.instance,
2389
+ tab_config.hop_length.instance,
2390
+ tab_config.split_voice.instance,
2391
+ tab_config.autotune_voice.instance,
2392
+ tab_config.autotune_strength.instance,
2393
+ tab_config.clean_voice.instance,
2394
+ tab_config.clean_strength.instance,
2395
+ tab_config.embedder_model.instance,
2396
+ tab_config.custom_embedder_model.instance,
2397
+ tab_config.sid.instance,
2398
+ tab_config.room_size.instance,
2399
+ tab_config.wet_level.instance,
2400
+ tab_config.dry_level.instance,
2401
+ tab_config.damping.instance,
2402
+ tab_config.main_gain.instance,
2403
+ tab_config.inst_gain.instance,
2404
+ tab_config.backup_gain.instance,
2405
+ tab_config.output_sr.instance,
2406
+ tab_config.output_format.instance,
2407
+ tab_config.output_name.instance,
2408
+ ],
2409
+ outputs=[song_cover, *tab_config.intermediate_audio.all],
2410
+ concurrency_limit=4,
2411
+ concurrency_id=ConcurrencyId.GPU,
2412
+ ).success(
2413
+ partial(update_dropdowns, get_named_song_dirs, 3 + len(song_dirs), [], [2]),
2414
+ outputs=[
2415
+ total_config.song.one_click.cached_song.instance,
2416
+ total_config.song.multi_step.cached_song.instance,
2417
+ total_config.management.audio.intermediate.instance,
2418
+ *song_dirs,
2419
+ ],
2420
+ show_progress="hidden",
2421
+ ).then(
2422
+ partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
2423
+ outputs=total_config.management.audio.output.instance,
2424
+ show_progress="hidden",
2425
+ )
2426
+ reset_btn.click(
2427
+ lambda: [
2428
+ tab_config.n_octaves.value,
2429
+ tab_config.n_semitones.value,
2430
+ tab_config.f0_methods.value,
2431
+ tab_config.index_rate.value,
2432
+ tab_config.rms_mix_rate.value,
2433
+ tab_config.protect_rate.value,
2434
+ tab_config.hop_length.value,
2435
+ tab_config.split_voice.value,
2436
+ tab_config.autotune_voice.value,
2437
+ tab_config.autotune_strength.value,
2438
+ tab_config.clean_voice.value,
2439
+ tab_config.clean_strength.value,
2440
+ tab_config.embedder_model.value,
2441
+ tab_config.sid.value,
2442
+ tab_config.room_size.value,
2443
+ tab_config.wet_level.value,
2444
+ tab_config.dry_level.value,
2445
+ tab_config.damping.value,
2446
+ tab_config.main_gain.value,
2447
+ tab_config.inst_gain.value,
2448
+ tab_config.backup_gain.value,
2449
+ tab_config.output_sr.value,
2450
+ tab_config.output_format.value,
2451
+ tab_config.show_intermediate_audio.value,
2452
+ ],
2453
+ outputs=[
2454
+ tab_config.n_octaves.instance,
2455
+ tab_config.n_semitones.instance,
2456
+ tab_config.f0_methods.instance,
2457
+ tab_config.index_rate.instance,
2458
+ tab_config.rms_mix_rate.instance,
2459
+ tab_config.protect_rate.instance,
2460
+ tab_config.hop_length.instance,
2461
+ tab_config.split_voice.instance,
2462
+ tab_config.autotune_voice.instance,
2463
+ tab_config.autotune_strength.instance,
2464
+ tab_config.clean_voice.instance,
2465
+ tab_config.clean_strength.instance,
2466
+ tab_config.embedder_model.instance,
2467
+ tab_config.sid.instance,
2468
+ tab_config.room_size.instance,
2469
+ tab_config.wet_level.instance,
2470
+ tab_config.dry_level.instance,
2471
+ tab_config.damping.instance,
2472
+ tab_config.main_gain.instance,
2473
+ tab_config.inst_gain.instance,
2474
+ tab_config.backup_gain.instance,
2475
+ tab_config.output_sr.instance,
2476
+ tab_config.output_format.instance,
2477
+ tab_config.show_intermediate_audio.instance,
2478
+ ],
2479
+ show_progress="hidden",
2480
+ )
2481
+
2482
+
2483
+ def _render_input(tab_config: OneClickSongGenerationConfig) -> None:
2484
+ with gr.Row():
2485
+ with gr.Column():
2486
+ tab_config.source_type.instantiate()
2487
+ with gr.Column():
2488
+ tab_config.source.instantiate()
2489
+ local_file = gr.Audio(
2490
+ label="Source",
2491
+ type="filepath",
2492
+ visible=False,
2493
+ waveform_options=gr.WaveformOptions(show_recording_waveform=False),
2494
+ )
2495
+ tab_config.cached_song.instance.render()
2496
+ tab_config.source_type.instance.input(
2497
+ partial(toggle_visible_component, 3),
2498
+ inputs=tab_config.source_type.instance,
2499
+ outputs=[
2500
+ tab_config.source.instance,
2501
+ local_file,
2502
+ tab_config.cached_song.instance,
2503
+ ],
2504
+ show_progress="hidden",
2505
+ )
2506
+
2507
+ local_file.change(
2508
+ update_value,
2509
+ inputs=local_file,
2510
+ outputs=tab_config.source.instance,
2511
+ show_progress="hidden",
2512
+ )
2513
+ tab_config.cached_song.instance.input(
2514
+ update_value,
2515
+ inputs=tab_config.cached_song.instance,
2516
+ outputs=tab_config.source.instance,
2517
+ show_progress="hidden",
2518
+ )
2519
+
2520
+ with gr.Row():
2521
+ tab_config.voice_model.instance.render()
2522
+
2523
+
2524
+ def _render_main_options(tab_config: OneClickSongGenerationConfig) -> None:
2525
+ with gr.Row():
2526
+ tab_config.n_octaves.instantiate()
2527
+ tab_config.n_semitones.instantiate()
2528
+
2529
+
2530
+ def _render_conversion_options(tab_config: OneClickSongGenerationConfig) -> None:
2531
+ with gr.Accordion("Vocal conversion", open=True):
2532
+ gr.Markdown("")
2533
+ with gr.Accordion("Voice synthesis", open=True):
2534
+ with gr.Row():
2535
+ tab_config.f0_methods.instantiate()
2536
+ tab_config.index_rate.instantiate()
2537
+ with gr.Row():
2538
+ tab_config.rms_mix_rate.instantiate()
2539
+ tab_config.protect_rate.instantiate()
2540
+ tab_config.hop_length.instantiate()
2541
+ with gr.Accordion("Vocal enrichment", open=True):
2542
+ with gr.Row():
2543
+ with gr.Column():
2544
+ tab_config.split_voice.instantiate()
2545
+ with gr.Column():
2546
+ tab_config.autotune_voice.instantiate()
2547
+ tab_config.autotune_strength.instantiate()
2548
+ with gr.Column():
2549
+ tab_config.clean_voice.instantiate()
2550
+ tab_config.clean_strength.instantiate()
2551
+ tab_config.autotune_voice.instance.change(
2552
+ partial(toggle_visibility, targets={True}),
2553
+ inputs=tab_config.autotune_voice.instance,
2554
+ outputs=tab_config.autotune_strength.instance,
2555
+ show_progress="hidden",
2556
+ )
2557
+ tab_config.clean_voice.instance.change(
2558
+ partial(toggle_visibility, targets={True}),
2559
+ inputs=tab_config.clean_voice.instance,
2560
+ outputs=tab_config.clean_strength.instance,
2561
+ show_progress="hidden",
2562
+ )
2563
+ with gr.Accordion("Speaker embedding", open=True):
2564
+ with gr.Row():
2565
+ with gr.Column():
2566
+ tab_config.embedder_model.instantiate()
2567
+ tab_config.custom_embedder_model.instance.render()
2568
+ tab_config.sid.instantiate()
2569
+ tab_config.embedder_model.instance.change(
2570
+ partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
2571
+ inputs=tab_config.embedder_model.instance,
2572
+ outputs=tab_config.custom_embedder_model.instance,
2573
+ show_progress="hidden",
2574
+ )
2575
+
2576
+
2577
+ def _render_mixing_options(tab_config: OneClickSongGenerationConfig) -> None:
2578
+ with gr.Accordion("Audio mixing", open=True):
2579
+ gr.Markdown("")
2580
+ with gr.Accordion("Reverb control on converted vocals", open=True):
2581
+ with gr.Row():
2582
+ tab_config.room_size.instantiate()
2583
+ with gr.Row():
2584
+ tab_config.wet_level.instantiate()
2585
+ tab_config.dry_level.instantiate()
2586
+ tab_config.damping.instantiate()
2587
+
2588
+ with gr.Accordion("Volume controls (dB)", open=True), gr.Row():
2589
+ tab_config.main_gain.instantiate()
2590
+ tab_config.inst_gain.instantiate()
2591
+ tab_config.backup_gain.instantiate()
2592
+
2593
+
2594
+ def _render_output_options(tab_config: OneClickSongGenerationConfig) -> None:
2595
+ with gr.Accordion("Audio output", open=True):
2596
+ with gr.Row():
2597
+ tab_config.output_name.instantiate(
2598
+ value=partial(
2599
+ update_output_name,
2600
+ get_song_cover_name,
2601
+ True, # noqa: FBT003
2602
+ ),
2603
+ inputs=[
2604
+ gr.State(None),
2605
+ tab_config.cached_song.instance,
2606
+ tab_config.voice_model.instance,
2607
+ ],
2608
+ )
2609
+ tab_config.output_sr.instantiate()
2610
+ tab_config.output_format.instantiate()
2611
+ with gr.Row():
2612
+ tab_config.show_intermediate_audio.instantiate()
2613
+
2614
+
2615
+ def _render_intermediate_audio(tab_config: OneClickSongGenerationConfig) -> None:
2616
+ with gr.Accordion(
2617
+ "Intermediate audio tracks",
2618
+ open=False,
2619
+ visible=False,
2620
+ ) as intermediate_audio_accordion:
2621
+ with gr.Accordion(
2622
+ "Step 0: song retrieval",
2623
+ open=False,
2624
+ ) as song_retrieval_accordion:
2625
+ tab_config.intermediate_audio.song.instantiate()
2626
+ with (
2627
+ gr.Accordion(
2628
+ "Step 1a: vocals/instrumentals separation",
2629
+ open=False,
2630
+ ) as vocals_separation_accordion,
2631
+ gr.Row(),
2632
+ ):
2633
+ tab_config.intermediate_audio.vocals.instantiate()
2634
+ tab_config.intermediate_audio.instrumentals.instantiate()
2635
+ with (
2636
+ gr.Accordion(
2637
+ "Step 1b: main vocals/ backup vocals separation",
2638
+ open=False,
2639
+ ) as main_vocals_separation_accordion,
2640
+ gr.Row(),
2641
+ ):
2642
+ tab_config.intermediate_audio.main_vocals.instantiate()
2643
+ tab_config.intermediate_audio.backup_vocals.instantiate()
2644
+ with (
2645
+ gr.Accordion(
2646
+ "Step 1c: main vocals cleanup",
2647
+ open=False,
2648
+ ) as vocal_cleanup_accordion,
2649
+ gr.Row(),
2650
+ ):
2651
+ tab_config.intermediate_audio.main_vocals_dereverbed.instantiate()
2652
+ tab_config.intermediate_audio.main_vocals_reverb.instantiate()
2653
+ with gr.Accordion(
2654
+ "Step 2: conversion of main vocals",
2655
+ open=False,
2656
+ ) as vocal_conversion_accordion:
2657
+ tab_config.intermediate_audio.converted_vocals.instantiate()
2658
+ with gr.Accordion(
2659
+ "Step 3: post-processing of converted vocals",
2660
+ open=False,
2661
+ ) as vocals_postprocessing_accordion:
2662
+ tab_config.intermediate_audio.postprocessed_vocals.instantiate()
2663
+ with (
2664
+ gr.Accordion(
2665
+ "Step 4: pitch shift of background tracks",
2666
+ open=False,
2667
+ ) as pitch_shift_accordion,
2668
+ gr.Row(),
2669
+ ):
2670
+ tab_config.intermediate_audio.instrumentals_shifted.instantiate()
2671
+ tab_config.intermediate_audio.backup_vocals_shifted.instantiate()
2672
+
2673
+ tab_config.show_intermediate_audio.instance.change(
2674
+ partial(toggle_intermediate_audio, num_components=7),
2675
+ inputs=tab_config.show_intermediate_audio.instance,
2676
+ outputs=[
2677
+ intermediate_audio_accordion,
2678
+ song_retrieval_accordion,
2679
+ vocals_separation_accordion,
2680
+ main_vocals_separation_accordion,
2681
+ vocal_cleanup_accordion,
2682
+ vocal_conversion_accordion,
2683
+ vocals_postprocessing_accordion,
2684
+ pitch_shift_accordion,
2685
+ ],
2686
+ show_progress="hidden",
2687
+ )
2688
+
2689
+
2690
  app = render_app()
2691
  app_wrapper = typer.Typer()
2692