lainlives commited on
Commit
bbed340
·
1 Parent(s): 92a1819

Overhaul that probably will break everything

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +695 -12
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  models/
2
  logs/
 
 
 
1
  models/
2
  logs/
3
+ audio/
4
+ temp/
app.py CHANGED
@@ -32,8 +32,6 @@ from ultimate_rvc.web.config.component import (AnyComponentConfig, AudioConfig,
32
  from ultimate_rvc.web.config.tab import (SongGenerationConfig,
33
  SpeechGenerationConfig,
34
  TrainingConfig)
35
- from ultimate_rvc.web.tabs.generate.song_cover.multi_step_generation import \
36
- render as render_song_cover_multi_step_tab
37
  from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
38
  render as render_speech_multi_step_tab
39
  from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
@@ -217,6 +215,7 @@ class ConcurrencyId(StrEnum):
217
  class SongSourceType(StrEnum):
218
  """The type of source providing the song to generate a cover of."""
219
 
 
220
  LOCAL_FILE = "Local file"
221
  CACHED_SONG = "Cached song"
222
 
@@ -244,8 +243,8 @@ class SongTransferOption(StrEnum):
244
  class SpeechTransferOption(StrEnum):
245
  """Enumeration of possible speech transfer options."""
246
 
247
- STEP_2_SPEECH = "Step 2: vocal conversion"
248
- STEP_3_SPEECH = "Step 3: vocal effect"
249
 
250
 
251
  class ComponentVisibilityKwArgs(TypedDict, total=False):
@@ -316,14 +315,10 @@ class UpdateAudioKwArgs(TypedDict, total=False):
316
  class DatasetType(StrEnum):
317
  """The type of dataset to train a voice model."""
318
 
319
- NEW_DATASET = "New dataset"
320
- EXISTING_DATASET = "Existing dataset"
321
 
322
 
323
- embedders_list = [
324
- ("embedders/contentvec/", ["pytorch_model.bin", "config.json"]),
325
- ("embedders/custom/Crusty/", ["model.safetensors", "config.json"]),
326
- ]
327
 
328
 
329
  class EmbedderModel(StrEnum):
@@ -1734,7 +1729,7 @@ class SongGenerationConfig(GenerationConfig):
1734
  )
1735
  source: TextboxConfig = TextboxConfig(
1736
  label="Source",
1737
- info="Link to a song on YouTube or the full path of a local audio file.",
1738
  value=None,
1739
  exclude_value=True,
1740
  )
@@ -1924,6 +1919,693 @@ class SpeechGenerationConfig(GenerationConfig):
1924
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
1925
 
1926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1927
  def run_newpipeline(
1928
  source: str,
1929
  model_name: str,
@@ -2371,7 +3053,8 @@ def render_song_cover_one_click_tab(
2371
  generate_btn.click(
2372
  partial(
2373
  exception_harness(
2374
- run_pipeline,
 
2375
  info_msg="Song cover generated successfully!",
2376
  ),
2377
  cookiefile=cookiefile,
 
32
  from ultimate_rvc.web.config.tab import (SongGenerationConfig,
33
  SpeechGenerationConfig,
34
  TrainingConfig)
 
 
35
  from ultimate_rvc.web.tabs.generate.speech.multi_step_generation import \
36
  render as render_speech_multi_step_tab
37
  from ultimate_rvc.web.tabs.generate.speech.one_click_generation import \
 
215
  class SongSourceType(StrEnum):
216
  """The type of source providing the song to generate a cover of."""
217
 
218
+ PATH = "Local or HTTP filepath"
219
  LOCAL_FILE = "Local file"
220
  CACHED_SONG = "Cached song"
221
 
 
243
  class SpeechTransferOption(StrEnum):
244
  """Enumeration of possible speech transfer options."""
245
 
246
+ STEP_2_SPEECH = "Step 2: speech conversion"
247
+ STEP_3_SPEECH = "Step 3: speech effect"
248
 
249
 
250
  class ComponentVisibilityKwArgs(TypedDict, total=False):
 
315
  class DatasetType(StrEnum):
316
  """The type of dataset to train a voice model."""
317
 
318
+ NEW_DATASET = "Create new dataset"
319
+ EXISTING_DATASET = "Use existing dataset"
320
 
321
 
 
 
 
 
322
 
323
 
324
  class EmbedderModel(StrEnum):
 
1729
  )
1730
  source: TextboxConfig = TextboxConfig(
1731
  label="Source",
1732
+ info="Local (to the server) filepath or http link. Youtube probably wont work but most other sites still do.",
1733
  value=None,
1734
  exclude_value=True,
1735
  )
 
1919
  total_config = load_config(config_name, TotalConfig) if config_name else TotalConfig()
1920
 
1921
 
1922
+ def render_song_cover_multi_step_tab(total_config: TotalConfig, cookiefile: str | None = None) -> None:
1923
+ """
1924
+ Render "Generate song cover - multi-step generation" tab.
1925
+
1926
+ Parameters
1927
+ ----------
1928
+ total_config : TotalConfig
1929
+ Model containing all component configuration settings for the
1930
+ Ultimate RVC web UI.
1931
+ cookiefile : str, optional
1932
+ The path to a file containing cookies to use when downloading
1933
+ audio from Youtube.
1934
+
1935
+ """
1936
+ tab_config = total_config.song.multi_step
1937
+ for input_track in tab_config.input_audio.all:
1938
+ input_track.instantiate()
1939
+ with gr.Tab("Multi-step"):
1940
+ _render_step_0(total_config, cookiefile=cookiefile)
1941
+ _render_step_1(tab_config)
1942
+ _render_step_2(tab_config)
1943
+ _render_step_3(tab_config)
1944
+ _render_step_4(tab_config)
1945
+ _render_step_5(total_config, tab_config)
1946
+
1947
+
1948
+ def _render_step_0(total_config: TotalConfig, cookiefile: str | None) -> None:
1949
+ tab_config = total_config.song.multi_step
1950
+
1951
+ current_song_dir = gr.State(None)
1952
+ with gr.Accordion("Step 0: song retrieval", open=True):
1953
+ gr.Markdown("")
1954
+ with gr.Row():
1955
+ with gr.Column():
1956
+ tab_config.source_type.instantiate()
1957
+ with gr.Column():
1958
+ tab_config.source.instantiate()
1959
+ local_file = gr.Audio(
1960
+ label="Source",
1961
+ type="filepath",
1962
+ visible=False,
1963
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
1964
+ )
1965
+ tab_config.cached_song.instance.render()
1966
+
1967
+ tab_config.source_type.instance.input(
1968
+ partial(toggle_visible_component, 3),
1969
+ inputs=tab_config.source_type.instance,
1970
+ outputs=[
1971
+ tab_config.source.instance,
1972
+ local_file,
1973
+ tab_config.cached_song.instance,
1974
+ ],
1975
+ show_progress="hidden",
1976
+ )
1977
+
1978
+ local_file.change(
1979
+ update_value,
1980
+ inputs=local_file,
1981
+ outputs=tab_config.source.instance,
1982
+ show_progress="hidden",
1983
+ )
1984
+ tab_config.cached_song.instance.input(
1985
+ update_value,
1986
+ inputs=tab_config.cached_song.instance,
1987
+ outputs=tab_config.source.instance,
1988
+ show_progress="hidden",
1989
+ )
1990
+ with gr.Accordion("Options", open=False):
1991
+ song_transfer = _render_song_transfer(
1992
+ [SongTransferOption.STEP_1_AUDIO],
1993
+ "Song",
1994
+ )
1995
+ with gr.Row():
1996
+ retrieve_song_reset_btn = gr.Button("Reset options")
1997
+ retrieve_song_btn = gr.Button("Retrieve song", variant="primary")
1998
+ song_transfer_btn = gr.Button("Transfer song")
1999
+ song_output = gr.Audio(
2000
+ label="Song",
2001
+ type="filepath",
2002
+ interactive=False,
2003
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2004
+ )
2005
+
2006
+ retrieve_song_reset_btn.click(
2007
+ lambda: gr.Dropdown(value=[SongTransferOption.STEP_1_AUDIO]),
2008
+ outputs=song_transfer,
2009
+ show_progress="hidden",
2010
+ )
2011
+
2012
+ retrieve_song_btn.click(
2013
+ partial(
2014
+ exception_harness(
2015
+ retrieve_song,
2016
+ info_msg="Song retrieved successfully!",
2017
+ ),
2018
+ cookiefile=cookiefile,
2019
+ ),
2020
+ inputs=tab_config.source.instance,
2021
+ outputs=[song_output, current_song_dir],
2022
+ ).then(
2023
+ partial(
2024
+ update_dropdowns,
2025
+ get_named_song_dirs,
2026
+ len(tab_config.song_dirs.all) + 2,
2027
+ value_indices=range(len(tab_config.song_dirs.all)),
2028
+ ),
2029
+ inputs=current_song_dir,
2030
+ outputs=[
2031
+ *tab_config.song_dirs.all,
2032
+ tab_config.cached_song.instance,
2033
+ total_config.song.one_click.cached_song.instance,
2034
+ ],
2035
+ show_progress="hidden",
2036
+ ).then(
2037
+ partial(update_dropdowns, get_named_song_dirs, 1, [], [0]),
2038
+ outputs=total_config.management.audio.intermediate.instance,
2039
+ show_progress="hidden",
2040
+ )
2041
+ setup_transfer_event(
2042
+ song_transfer_btn,
2043
+ song_transfer,
2044
+ song_output,
2045
+ tab_config.input_audio.all,
2046
+ )
2047
+
2048
+
2049
+ def _render_step_1(tab_config: MultiStepSongGenerationConfig) -> None:
2050
+ with gr.Accordion("Step 1: vocal separation", open=False):
2051
+ tab_config.input_audio.audio.instance.render()
2052
+ tab_config.song_dirs.separate_audio.instance.render()
2053
+ with gr.Accordion("Options", open=False):
2054
+ with gr.Row():
2055
+ tab_config.separation_model.instantiate()
2056
+ tab_config.segment_size.instantiate()
2057
+ with gr.Row():
2058
+ primary_stem_transfer = _render_song_transfer(
2059
+ [SongTransferOption.STEP_2_VOCALS],
2060
+ "Primary stem",
2061
+ )
2062
+ secondary_stem_transfer = _render_song_transfer(
2063
+ [SongTransferOption.STEP_4_INSTRUMENTALS],
2064
+ "Secondary stem",
2065
+ )
2066
+ with gr.Row():
2067
+ separate_audio_reset_btn = gr.Button("Reset options")
2068
+ separate_vocals_btn = gr.Button("Separate vocals", variant="primary")
2069
+ with gr.Row():
2070
+ primary_stem_transfer_btn = gr.Button("Transfer primary stem")
2071
+ secondary_stem_transfer_btn = gr.Button("Transfer secondary stem")
2072
+
2073
+ with gr.Row():
2074
+ primary_stem_output = gr.Audio(
2075
+ label="Primary stem",
2076
+ type="filepath",
2077
+ interactive=False,
2078
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2079
+ )
2080
+ secondary_stem_output = gr.Audio(
2081
+ label="Secondary stem",
2082
+ type="filepath",
2083
+ interactive=False,
2084
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2085
+ )
2086
+
2087
+ separate_audio_reset_btn.click(
2088
+ lambda: [
2089
+ tab_config.separation_model.value,
2090
+ tab_config.segment_size.value,
2091
+ gr.Dropdown(value=[SongTransferOption.STEP_2_VOCALS]),
2092
+ gr.Dropdown(value=[SongTransferOption.STEP_4_INSTRUMENTALS]),
2093
+ ],
2094
+ outputs=[
2095
+ tab_config.separation_model.instance,
2096
+ tab_config.segment_size.instance,
2097
+ primary_stem_transfer,
2098
+ secondary_stem_transfer,
2099
+ ],
2100
+ show_progress="hidden",
2101
+ )
2102
+ separate_vocals_btn.click(
2103
+ exception_harness(
2104
+ separate_audio,
2105
+ info_msg="Vocals separated successfully!",
2106
+ ),
2107
+ inputs=[
2108
+ tab_config.input_audio.audio.instance,
2109
+ tab_config.song_dirs.separate_audio.instance,
2110
+ tab_config.separation_model.instance,
2111
+ tab_config.segment_size.instance,
2112
+ ],
2113
+ outputs=[primary_stem_output, secondary_stem_output],
2114
+ concurrency_limit=1,
2115
+ concurrency_id=ConcurrencyId.GPU,
2116
+ )
2117
+ for btn, transfer, output in [
2118
+ (primary_stem_transfer_btn, primary_stem_transfer, primary_stem_output),
2119
+ (
2120
+ secondary_stem_transfer_btn,
2121
+ secondary_stem_transfer,
2122
+ secondary_stem_output,
2123
+ ),
2124
+ ]:
2125
+ setup_transfer_event(
2126
+ btn,
2127
+ transfer,
2128
+ output,
2129
+ tab_config.input_audio.all,
2130
+ )
2131
+
2132
+
2133
+ def _render_step_2(tab_config: MultiStepSongGenerationConfig) -> None:
2134
+ with gr.Accordion("Step 2: vocal conversion", open=False):
2135
+ tab_config.input_audio.vocals.instance.render()
2136
+ tab_config.voice_model.instance.render()
2137
+ tab_config.song_dirs.convert_vocals.instance.render()
2138
+ with gr.Accordion("Options", open=False):
2139
+ with gr.Row():
2140
+ tab_config.n_octaves.instantiate()
2141
+ tab_config.n_semitones.instantiate()
2142
+
2143
+ converted_vocals_transfer = _render_song_transfer(
2144
+ [SongTransferOption.STEP_3_VOCALS],
2145
+ "Converted vocals",
2146
+ )
2147
+ with gr.Accordion("Advanced", open=False):
2148
+ with gr.Accordion("Voice synthesis", open=False):
2149
+ with gr.Row():
2150
+ tab_config.f0_methods.instantiate()
2151
+ tab_config.index_rate.instantiate()
2152
+ with gr.Row():
2153
+ tab_config.rms_mix_rate.instantiate()
2154
+ tab_config.protect_rate.instantiate()
2155
+ tab_config.hop_length.instantiate()
2156
+ with gr.Accordion("Vocal enrichment", open=False), gr.Row():
2157
+ with gr.Column():
2158
+ tab_config.split_voice.instantiate()
2159
+ with gr.Column():
2160
+ tab_config.autotune_voice.instantiate()
2161
+ tab_config.autotune_strength.instantiate()
2162
+ with gr.Column():
2163
+ tab_config.clean_voice.instantiate()
2164
+ tab_config.clean_strength.instantiate()
2165
+ tab_config.autotune_voice.instance.change(
2166
+ partial(toggle_visibility, targets={True}),
2167
+ inputs=tab_config.autotune_voice.instance,
2168
+ outputs=tab_config.autotune_strength.instance,
2169
+ show_progress="hidden",
2170
+ )
2171
+ tab_config.clean_voice.instance.change(
2172
+ partial(toggle_visibility, targets={True}),
2173
+ inputs=tab_config.clean_voice.instance,
2174
+ outputs=tab_config.clean_strength.instance,
2175
+ show_progress="hidden",
2176
+ )
2177
+ with gr.Accordion("Speaker embeddings", open=False), gr.Row():
2178
+ with gr.Column():
2179
+ tab_config.embedder_model.instantiate()
2180
+ tab_config.custom_embedder_model.instance.render()
2181
+ tab_config.sid.instantiate()
2182
+ tab_config.embedder_model.instance.change(
2183
+ partial(toggle_visibility, targets={EmbedderModel.CUSTOM}),
2184
+ inputs=tab_config.embedder_model.instance,
2185
+ outputs=tab_config.custom_embedder_model.instance,
2186
+ show_progress="hidden",
2187
+ )
2188
+ with gr.Row():
2189
+ convert_vocals_reset_btn = gr.Button("Reset options")
2190
+ convert_vocals_btn = gr.Button("Convert vocals", variant="primary")
2191
+ converted_vocals_transfer_btn = gr.Button("Transfer converted vocals")
2192
+ converted_vocals_track_output = gr.Audio(
2193
+ label="Converted vocals",
2194
+ type="filepath",
2195
+ interactive=False,
2196
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2197
+ )
2198
+
2199
+ convert_vocals_reset_btn.click(
2200
+ lambda: [
2201
+ tab_config.n_octaves.value,
2202
+ tab_config.n_semitones.value,
2203
+ tab_config.f0_methods.value,
2204
+ tab_config.index_rate.value,
2205
+ tab_config.rms_mix_rate.value,
2206
+ tab_config.protect_rate.value,
2207
+ tab_config.hop_length.value,
2208
+ tab_config.split_voice.value,
2209
+ tab_config.autotune_voice.value,
2210
+ tab_config.autotune_strength.value,
2211
+ tab_config.clean_voice.value,
2212
+ tab_config.clean_strength.value,
2213
+ tab_config.embedder_model.value,
2214
+ tab_config.sid.value,
2215
+ gr.Dropdown(value=[SongTransferOption.STEP_3_VOCALS]),
2216
+ ],
2217
+ outputs=[
2218
+ tab_config.n_octaves.instance,
2219
+ tab_config.n_semitones.instance,
2220
+ tab_config.f0_methods.instance,
2221
+ tab_config.index_rate.instance,
2222
+ tab_config.rms_mix_rate.instance,
2223
+ tab_config.protect_rate.instance,
2224
+ tab_config.hop_length.instance,
2225
+ tab_config.split_voice.instance,
2226
+ tab_config.autotune_voice.instance,
2227
+ tab_config.autotune_strength.instance,
2228
+ tab_config.clean_voice.instance,
2229
+ tab_config.clean_strength.instance,
2230
+ tab_config.embedder_model.instance,
2231
+ tab_config.sid.instance,
2232
+ converted_vocals_transfer,
2233
+ ],
2234
+ show_progress="hidden",
2235
+ )
2236
+ convert_vocals_btn.click(
2237
+ partial(
2238
+ exception_harness(convert, info_msg="Vocals converted successfully!"),
2239
+ content_type=RVCContentType.VOCALS,
2240
+ ),
2241
+ inputs=[
2242
+ tab_config.input_audio.vocals.instance,
2243
+ tab_config.song_dirs.convert_vocals.instance,
2244
+ tab_config.voice_model.instance,
2245
+ tab_config.n_octaves.instance,
2246
+ tab_config.n_semitones.instance,
2247
+ tab_config.f0_methods.instance,
2248
+ tab_config.index_rate.instance,
2249
+ tab_config.rms_mix_rate.instance,
2250
+ tab_config.protect_rate.instance,
2251
+ tab_config.hop_length.instance,
2252
+ tab_config.split_voice.instance,
2253
+ tab_config.autotune_voice.instance,
2254
+ tab_config.autotune_strength.instance,
2255
+ tab_config.clean_voice.instance,
2256
+ tab_config.clean_strength.instance,
2257
+ tab_config.embedder_model.instance,
2258
+ tab_config.custom_embedder_model.instance,
2259
+ tab_config.sid.instance,
2260
+ ],
2261
+ outputs=converted_vocals_track_output,
2262
+ concurrency_id=ConcurrencyId.GPU,
2263
+ concurrency_limit=1,
2264
+ )
2265
+ setup_transfer_event(
2266
+ converted_vocals_transfer_btn,
2267
+ converted_vocals_transfer,
2268
+ converted_vocals_track_output,
2269
+ tab_config.input_audio.all,
2270
+ )
2271
+
2272
+
2273
+ def _render_step_3(tab_config: MultiStepSongGenerationConfig) -> None:
2274
+ with gr.Accordion("Step 3: vocal post-processing", open=False):
2275
+ tab_config.input_audio.converted_vocals.instance.render()
2276
+ tab_config.song_dirs.postprocess_vocals.instance.render()
2277
+ with gr.Accordion("Options", open=False):
2278
+ tab_config.room_size.instantiate()
2279
+ with gr.Row():
2280
+ tab_config.wet_level.instantiate()
2281
+ tab_config.dry_level.instantiate()
2282
+ tab_config.damping.instantiate()
2283
+ effected_vocals_transfer = _render_song_transfer(
2284
+ [SongTransferOption.STEP_5_MAIN_VOCALS],
2285
+ "Effected vocals",
2286
+ )
2287
+ with gr.Row():
2288
+ postprocess_vocals_reset_btn = gr.Button("Reset options")
2289
+ postprocess_vocals_btn = gr.Button("Post-process vocals", variant="primary")
2290
+ effected_vocals_transfer_btn = gr.Button("Transfer effected vocals")
2291
+
2292
+ effected_vocals_track_output = gr.Audio(
2293
+ label="Effected vocals",
2294
+ type="filepath",
2295
+ interactive=False,
2296
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2297
+ )
2298
+
2299
+ postprocess_vocals_reset_btn.click(
2300
+ lambda: [
2301
+ tab_config.room_size.value,
2302
+ tab_config.wet_level.value,
2303
+ tab_config.dry_level.value,
2304
+ tab_config.damping.value,
2305
+ gr.Dropdown(value=[SongTransferOption.STEP_5_MAIN_VOCALS]),
2306
+ ],
2307
+ outputs=[
2308
+ tab_config.room_size.instance,
2309
+ tab_config.wet_level.instance,
2310
+ tab_config.dry_level.instance,
2311
+ tab_config.damping.instance,
2312
+ effected_vocals_transfer,
2313
+ ],
2314
+ show_progress="hidden",
2315
+ )
2316
+ postprocess_vocals_btn.click(
2317
+ exception_harness(
2318
+ postprocess,
2319
+ info_msg="Vocals post-processed successfully!",
2320
+ ),
2321
+ inputs=[
2322
+ tab_config.input_audio.converted_vocals.instance,
2323
+ tab_config.song_dirs.postprocess_vocals.instance,
2324
+ tab_config.room_size.instance,
2325
+ tab_config.wet_level.instance,
2326
+ tab_config.dry_level.instance,
2327
+ tab_config.damping.instance,
2328
+ ],
2329
+ outputs=effected_vocals_track_output,
2330
+ )
2331
+ setup_transfer_event(
2332
+ effected_vocals_transfer_btn,
2333
+ effected_vocals_transfer,
2334
+ effected_vocals_track_output,
2335
+ tab_config.input_audio.all,
2336
+ )
2337
+
2338
+
2339
+ def _render_step_4(tab_config: MultiStepSongGenerationConfig) -> None:
2340
+ with gr.Accordion("Step 4: pitch shift of background audio", open=False):
2341
+ with gr.Row():
2342
+ tab_config.input_audio.instrumentals.instance.render()
2343
+ tab_config.input_audio.backup_vocals.instance.render()
2344
+ with gr.Row():
2345
+ tab_config.n_semitones_instrumentals.instantiate()
2346
+ tab_config.n_semitones_backup_vocals.instantiate()
2347
+ tab_config.song_dirs.pitch_shift_background.instance.render()
2348
+ with gr.Accordion("Options", open=False), gr.Row():
2349
+ shifted_instrumentals_transfer = _render_song_transfer(
2350
+ [SongTransferOption.STEP_5_INSTRUMENTALS],
2351
+ "Pitch-shifted instrumentals",
2352
+ )
2353
+ shifted_backup_vocals_transfer = _render_song_transfer(
2354
+ [SongTransferOption.STEP_5_BACKUP_VOCALS],
2355
+ "Pitch-shifted backup vocals",
2356
+ )
2357
+ with gr.Row():
2358
+ pitch_shift_instrumentals_btn = gr.Button(
2359
+ "Pitch shift instrumentals",
2360
+ variant="primary",
2361
+ )
2362
+ pitch_shift_backup_vocals_btn = gr.Button(
2363
+ "Pitch shift backup vocals",
2364
+ variant="primary",
2365
+ )
2366
+ with gr.Row():
2367
+ shifted_instrumentals_transfer_btn = gr.Button(
2368
+ "Transfer shifted instrumentals",
2369
+ )
2370
+ shifted_backup_vocals_transfer_btn = gr.Button(
2371
+ "Transfer shifted backup vocals",
2372
+ )
2373
+ pitch_shift_background_reset_btn = gr.Button("Reset options")
2374
+ with gr.Row():
2375
+ shifted_instrumentals_track_output = gr.Audio(
2376
+ label="Pitch-shifted instrumentals",
2377
+ type="filepath",
2378
+ interactive=False,
2379
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2380
+ )
2381
+ shifted_backup_vocals_track_output = gr.Audio(
2382
+ label="Pitch-shifted backup vocals",
2383
+ type="filepath",
2384
+ interactive=False,
2385
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2386
+ )
2387
+
2388
+ pitch_shift_background_reset_btn.click(
2389
+ lambda: [
2390
+ tab_config.n_semitones_instrumentals.value,
2391
+ tab_config.n_semitones_backup_vocals.value,
2392
+ gr.Dropdown(value=[SongTransferOption.STEP_5_INSTRUMENTALS]),
2393
+ gr.Dropdown(value=[SongTransferOption.STEP_5_BACKUP_VOCALS]),
2394
+ ],
2395
+ outputs=[
2396
+ tab_config.n_semitones_instrumentals.instance,
2397
+ tab_config.n_semitones_backup_vocals.instance,
2398
+ shifted_instrumentals_transfer,
2399
+ shifted_backup_vocals_transfer,
2400
+ ],
2401
+ show_progress="hidden",
2402
+ )
2403
+ pitch_shift_instrumentals_btn.click(
2404
+ exception_harness(
2405
+ pitch_shift,
2406
+ info_msg="Instrumentals pitch-shifted successfully!",
2407
+ ),
2408
+ inputs=[
2409
+ tab_config.input_audio.instrumentals.instance,
2410
+ tab_config.song_dirs.pitch_shift_background.instance,
2411
+ tab_config.n_semitones_instrumentals.instance,
2412
+ ],
2413
+ outputs=shifted_instrumentals_track_output,
2414
+ )
2415
+ pitch_shift_backup_vocals_btn.click(
2416
+ exception_harness(
2417
+ pitch_shift,
2418
+ info_msg="Backup vocals pitch-shifted successfully!",
2419
+ ),
2420
+ inputs=[
2421
+ tab_config.input_audio.backup_vocals.instance,
2422
+ tab_config.song_dirs.pitch_shift_background.instance,
2423
+ tab_config.n_semitones_backup_vocals.instance,
2424
+ ],
2425
+ outputs=shifted_backup_vocals_track_output,
2426
+ )
2427
+ for btn, transfer, output in [
2428
+ (
2429
+ shifted_instrumentals_transfer_btn,
2430
+ shifted_instrumentals_transfer,
2431
+ shifted_instrumentals_track_output,
2432
+ ),
2433
+ (
2434
+ shifted_backup_vocals_transfer_btn,
2435
+ shifted_backup_vocals_transfer,
2436
+ shifted_backup_vocals_track_output,
2437
+ ),
2438
+ ]:
2439
+ setup_transfer_event(
2440
+ btn,
2441
+ transfer,
2442
+ output,
2443
+ tab_config.input_audio.all,
2444
+ )
2445
+
2446
+
2447
+ def _render_step_5(
2448
+ total_config: TotalConfig,
2449
+ tab_config: MultiStepSongGenerationConfig,
2450
+ ) -> None:
2451
+ with gr.Accordion("Step 5: song mixing", open=False):
2452
+ with gr.Row():
2453
+ tab_config.input_audio.main_vocals.instance.render()
2454
+ tab_config.input_audio.shifted_instrumentals.instance.render()
2455
+ tab_config.input_audio.shifted_backup_vocals.instance.render()
2456
+ tab_config.song_dirs.mix.instance.render()
2457
+ with gr.Accordion("Options", open=False):
2458
+ with gr.Row():
2459
+ tab_config.main_gain.instantiate()
2460
+ tab_config.inst_gain.instantiate()
2461
+ tab_config.backup_gain.instantiate()
2462
+ with gr.Row():
2463
+ tab_config.output_name.instantiate(
2464
+ value=partial(
2465
+ update_output_name,
2466
+ get_song_cover_name,
2467
+ False, # noqa: FBT003,
2468
+ ),
2469
+ inputs=[
2470
+ tab_config.input_audio.main_vocals.instance,
2471
+ tab_config.song_dirs.mix.instance,
2472
+ ],
2473
+ )
2474
+ tab_config.output_sr.instantiate()
2475
+ tab_config.output_format.instantiate()
2476
+ song_cover_transfer = _render_song_transfer([], "Song cover")
2477
+ with gr.Row():
2478
+ mix_reset_btn = gr.Button("Reset options")
2479
+ mix_btn = gr.Button("Mix song cover", variant="primary")
2480
+ song_cover_transfer_btn = gr.Button("Transfer song cover")
2481
+ song_cover_output = gr.Audio(
2482
+ label="Song cover",
2483
+ type="filepath",
2484
+ interactive=False,
2485
+ waveform_options=gr.WaveformOptions(show_recording_waveform=True),
2486
+ )
2487
+ mix_reset_btn.click(
2488
+ lambda: [
2489
+ tab_config.main_gain.value,
2490
+ tab_config.inst_gain.value,
2491
+ tab_config.backup_gain.value,
2492
+ tab_config.output_sr.value,
2493
+ tab_config.output_format.value,
2494
+ gr.Dropdown(value=[]),
2495
+ ],
2496
+ outputs=[
2497
+ tab_config.main_gain.instance,
2498
+ tab_config.inst_gain.instance,
2499
+ tab_config.backup_gain.instance,
2500
+ tab_config.output_sr.instance,
2501
+ tab_config.output_format.instance,
2502
+ song_cover_transfer,
2503
+ ],
2504
+ show_progress="hidden",
2505
+ )
2506
+ temp_audio_gains = gr.State()
2507
+ mix_btn.click(
2508
+ partial(
2509
+ _pair_audio_tracks_and_gain,
2510
+ [
2511
+ tab_config.input_audio.main_vocals.instance,
2512
+ tab_config.input_audio.shifted_instrumentals.instance,
2513
+ tab_config.input_audio.shifted_backup_vocals.instance,
2514
+ ],
2515
+ [
2516
+ tab_config.main_gain.instance,
2517
+ tab_config.inst_gain.instance,
2518
+ tab_config.backup_gain.instance,
2519
+ ],
2520
+ ),
2521
+ inputs={
2522
+ tab_config.input_audio.main_vocals.instance,
2523
+ tab_config.input_audio.shifted_instrumentals.instance,
2524
+ tab_config.input_audio.shifted_backup_vocals.instance,
2525
+ tab_config.main_gain.instance,
2526
+ tab_config.inst_gain.instance,
2527
+ tab_config.backup_gain.instance,
2528
+ },
2529
+ outputs=temp_audio_gains,
2530
+ ).then(
2531
+ exception_harness(mix_song, info_msg="Song cover succesfully generated."),
2532
+ inputs=[
2533
+ temp_audio_gains,
2534
+ tab_config.song_dirs.mix.instance,
2535
+ tab_config.output_sr.instance,
2536
+ tab_config.output_format.instance,
2537
+ tab_config.output_name.instance,
2538
+ ],
2539
+ outputs=song_cover_output,
2540
+ ).then(
2541
+ partial(update_dropdowns, get_saved_output_audio, 1, [], [0]),
2542
+ outputs=total_config.management.audio.output.instance,
2543
+ show_progress="hidden",
2544
+ )
2545
+ setup_transfer_event(
2546
+ song_cover_transfer_btn,
2547
+ song_cover_transfer,
2548
+ song_cover_output,
2549
+ tab_config.input_audio.all,
2550
+ )
2551
+
2552
+
2553
+ def _render_song_transfer(
2554
+ value: list[SongTransferOption],
2555
+ label_prefix: str,
2556
+ ) -> gr.Dropdown:
2557
+ return render_transfer_component(value, label_prefix, SongTransferOption)
2558
+
2559
+
2560
+ def _pair_audio_tracks_and_gain(
2561
+ audio_components: Sequence[gr.Audio],
2562
+ gain_components: Sequence[gr.Slider],
2563
+ data: dict[gr.Audio | gr.Slider, Any],
2564
+ ) -> list[tuple[str, int]]:
2565
+ """
2566
+ Pair audio tracks and gain levels stored in separate gradio
2567
+ components.
2568
+
2569
+ This function is meant to first be partially applied to the sequence
2570
+ of audio components and the sequence of slider components containing
2571
+ the values that should be combined. The resulting function can then
2572
+ be called by an event listener whose inputs is a set containing
2573
+ those audio and slider components. The `data` parameter in that case
2574
+ will contain a mapping from each of those components to the value
2575
+ that the component stores.
2576
+
2577
+ Parameters
2578
+ ----------
2579
+ audio_components : Sequence[gr.Audio]
2580
+ Audio components to pair with gain levels.
2581
+ gain_components : Sequence[gr.Slider]
2582
+ Gain level components to pair with audio tracks.
2583
+ data : dict[gr.Audio | gr.Slider, Any]
2584
+ Data from the audio and gain components.
2585
+
2586
+ Returns
2587
+ -------
2588
+ list[tuple[str, int]]
2589
+ Paired audio tracks and gain levels.
2590
+
2591
+ Raises
2592
+ ------
2593
+ ValueError
2594
+ If the number of audio tracks and gain levels are not the same.
2595
+
2596
+ """
2597
+ audio_tracks = [data[component] for component in audio_components]
2598
+ gain_levels = [data[component] for component in gain_components]
2599
+ if len(audio_tracks) != len(gain_levels):
2600
+ err_msg = "Number of audio tracks and gain levels must be the same."
2601
+ raise ValueError(err_msg)
2602
+ return [
2603
+ (audio_track, gain_level)
2604
+ for audio_track, gain_level in zip(audio_tracks, gain_levels, strict=True)
2605
+ if audio_track
2606
+ ]
2607
+
2608
+
2609
  def run_newpipeline(
2610
  source: str,
2611
  model_name: str,
 
3053
  generate_btn.click(
3054
  partial(
3055
  exception_harness(
3056
+ info_msg="New pipeline engaged expect bugs.",
3057
+ run_newpipeline,
3058
  info_msg="Song cover generated successfully!",
3059
  ),
3060
  cookiefile=cookiefile,