Spaces:

lainlives
/

mangler

Paused

App Files Files Community

lainlives commited on Dec 12, 2025

Commit

2e759b6

1 Parent(s): 337635d

~

Browse files

Files changed (13) hide show

main/app/app.py +0 -1
main/app/tabs/downloads/downloads.py +2 -2
main/app/tabs/editing/child/audio_effects.py +10 -10
main/app/tabs/editing/child/quirk.py +2 -2
main/app/tabs/extra/child/create_srt.py +1 -1
main/app/tabs/extra/child/f0_extract.py +2 -2
main/app/tabs/extra/child/settings.py +3 -3
main/app/tabs/inference/child/convert.py +662 -164
main/app/tabs/inference/child/convert_tts.py +9 -9
main/app/tabs/inference/child/convert_with_whisper.py +5 -5
main/app/tabs/inference/child/separate.py +407 -138
main/app/tabs/training/child/create_reference.py +4 -4
main/app/tabs/training/child/training.py +7 -7

main/app/app.py CHANGED Viewed

@@ -469,7 +469,6 @@ with gr.Blocks(
     if __name__ == "__main__":
         logger.info(config.device.replace("privateuseone", "dml"))
         logger.info(translations["start_app"])
-        logger.info(translations["set_lang"].format(lang=en - US))
         port = configs.get("app_port", 7860)
         server_name = configs.get("server_name", "0.0.0.0")

     if __name__ == "__main__":
         logger.info(config.device.replace("privateuseone", "dml"))
         logger.info(translations["start_app"])
         port = configs.get("app_port", 7860)
         server_name = configs.get("server_name", "0.0.0.0")

main/app/tabs/downloads/downloads.py CHANGED Viewed

@@ -37,7 +37,7 @@ def download_tab():
                 with gr.Column():
                     model_upload = gr.Files(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
         with gr.Row():
-            with gr.Accordion(translations["download_pretrained_2"], open=False):
                 with gr.Row():
                     pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
                 with gr.Row():
@@ -109,4 +109,4 @@ def download_tab():
                 inputs=[pretrain_upload],
                 outputs=[],
                 api_name="upload_pretrain"
-            )

                 with gr.Column():
                     model_upload = gr.Files(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
         with gr.Row():
+            with gr.Accordion(translations["download_pretrained_2"], open=True):
                 with gr.Row():
                     pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
                 with gr.Row():
                 inputs=[pretrain_upload],
                 outputs=[],
                 api_name="upload_pretrain"
+            )

main/app/tabs/editing/child/audio_effects.py CHANGED Viewed

@@ -23,7 +23,7 @@ def audio_effects_tab():
                 compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
                 more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
     with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
             with gr.Row():
                 upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
             with gr.Row():
@@ -41,13 +41,13 @@ def audio_effects_tab():
             with gr.Row():
                 audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
     with gr.Row():
-        with gr.Accordion(translations["use_presets"], open=False):
             with gr.Row():
                 presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
             with gr.Row():
                 load_click = gr.Button(translations["load_file"], variant="primary")
                 refresh_click = gr.Button(translations["refresh"])
-            with gr.Accordion(translations["export_file"], open=False):
                 with gr.Row():
                     with gr.Column():
                         name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
@@ -59,7 +59,7 @@ def audio_effects_tab():
     with gr.Row():
         with gr.Column():
             with gr.Row():
-                with gr.Accordion(translations["reverb"], open=False, visible=reverb_check_box.value) as reverb_accordion:
                     reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
                     reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
                     reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
@@ -67,20 +67,20 @@ def audio_effects_tab():
                     reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
                     reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
             with gr.Row():
-                with gr.Accordion(translations["chorus"], open=False, visible=chorus_check_box.value) as chorus_accordion:
                     chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
                     chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
                     chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
                     chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
                     chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
             with gr.Row():
-                with gr.Accordion(translations["delay"], open=False, visible=delay_check_box.value) as delay_accordion:
                     delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
                     delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
                     delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
         with gr.Column():
             with gr.Row():
-                with gr.Accordion(translations["more_option"], open=False, visible=more_options.value) as more_accordion:
                     with gr.Row():
                         fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
                         bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
@@ -114,14 +114,14 @@ def audio_effects_tab():
                         clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
                         bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
             with gr.Row():
-                with gr.Accordion(translations["phaser"], open=False, visible=phaser_check_box.value) as phaser_accordion:
                     phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
                     phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
                     phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
                     phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
                     phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
             with gr.Row():
-                with gr.Accordion(translations["compressor"], open=False, visible=compressor_check_box.value) as compressor_accordion:
                     compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
                     compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
                     compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
@@ -390,4 +390,4 @@ def audio_effects_tab():
             ],
             outputs=[audio_play_output],
             api_name="audio_effects"
-        )

                 compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
                 more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
     with gr.Row():
+        with gr.Accordion(translations["input_output"], open=True):
             with gr.Row():
                 upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
             with gr.Row():
             with gr.Row():
                 audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
     with gr.Row():
+        with gr.Accordion(translations["use_presets"], open=True):
             with gr.Row():
                 presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
             with gr.Row():
                 load_click = gr.Button(translations["load_file"], variant="primary")
                 refresh_click = gr.Button(translations["refresh"])
+            with gr.Accordion(translations["export_file"], open=True):
                 with gr.Row():
                     with gr.Column():
                         name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
     with gr.Row():
         with gr.Column():
             with gr.Row():
+                with gr.Accordion(translations["reverb"], open=True, visible=reverb_check_box.value) as reverb_accordion:
                     reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
                     reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
                     reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
                     reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
                     reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
             with gr.Row():
+                with gr.Accordion(translations["chorus"], open=True, visible=chorus_check_box.value) as chorus_accordion:
                     chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
                     chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
                     chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
                     chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
                     chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
             with gr.Row():
+                with gr.Accordion(translations["delay"], open=True, visible=delay_check_box.value) as delay_accordion:
                     delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
                     delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
                     delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
         with gr.Column():
             with gr.Row():
+                with gr.Accordion(translations["more_option"], open=True, visible=more_options.value) as more_accordion:
                     with gr.Row():
                         fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
                         bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
                         clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
                         bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
             with gr.Row():
+                with gr.Accordion(translations["phaser"], open=True, visible=phaser_check_box.value) as phaser_accordion:
                     phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
                     phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
                     phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
                     phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
                     phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
             with gr.Row():
+                with gr.Accordion(translations["compressor"], open=True, visible=compressor_check_box.value) as compressor_accordion:
                     compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
                     compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
                     compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
             ],
             outputs=[audio_play_output],
             api_name="audio_effects"
+        )

main/app/tabs/editing/child/quirk.py CHANGED Viewed

@@ -19,7 +19,7 @@ def quirk_tab():
     with gr.Row():
         apply_quirk_button = gr.Button(translations["apply"], variant="primary")
     with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
             with gr.Row():
                 quirk_upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
             with gr.Column():
@@ -45,4 +45,4 @@ def quirk_tab():
             ],
             outputs=[output_audio_play],
             api_name="quirk"
-        )

     with gr.Row():
         apply_quirk_button = gr.Button(translations["apply"], variant="primary")
     with gr.Row():
+        with gr.Accordion(translations["input_output"], open=True):
             with gr.Row():
                 quirk_upload_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
             with gr.Column():
             ],
             outputs=[output_audio_play],
             api_name="quirk"
+        )

main/app/tabs/extra/child/create_srt.py CHANGED Viewed

@@ -21,7 +21,7 @@ def create_srt_tab():
     with gr.Row():
         convert_button = gr.Button(translations["convert_audio"], variant="primary")
     with gr.Row():
-        with gr.Accordion(translations["input_output"], open=False):
             with gr.Column():
                 input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
                 output_file = gr.Textbox(label=translations["srt_output_file"], value="srt/output.srt", placeholder="srt/output.srt", interactive=True)

     with gr.Row():
         convert_button = gr.Button(translations["convert_audio"], variant="primary")
     with gr.Row():
+        with gr.Accordion(translations["input_output"], open=True):
             with gr.Column():
                 input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
                 output_file = gr.Textbox(label=translations["srt_output_file"], value="srt/output.srt", placeholder="srt/output.srt", interactive=True)

main/app/tabs/extra/child/f0_extract.py CHANGED Viewed

@@ -19,7 +19,7 @@ def f0_extract_tab():
             upload_audio_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
             audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
         with gr.Column():
-            with gr.Accordion(translations["f0_method"], open=False):
                 with gr.Group():
                     with gr.Row():
                         onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
@@ -48,4 +48,4 @@ def f0_extract_tab():
             ],
             outputs=[file_output, image_output],
             api_name="f0_extract"
-        )

             upload_audio_file = gr.Files(label=translations["drop_audio"], file_types=file_types)
             audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
         with gr.Column():
+            with gr.Accordion(translations["f0_method"], open=True):
                 with gr.Group():
                     with gr.Row():
                         onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
             ],
             outputs=[file_output, image_output],
             api_name="f0_extract"
+        )

main/app/tabs/extra/child/settings.py CHANGED Viewed

@@ -31,11 +31,11 @@ def settings_tab(app):
             font_button = gr.Button(translations["change_font"])
     with gr.Row():
         with gr.Column():
-            with gr.Accordion(translations["stop"], open=False, visible=True):
                 separate_stop = gr.Button(translations["stop_separate"])
                 convert_stop = gr.Button(translations["stop_convert"])
                 create_dataset_stop = gr.Button(translations["stop_create_dataset"])
-                with gr.Accordion(translations["stop_training"], open=False):
                     model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
                     preprocess_stop = gr.Button(translations["stop_preprocess"])
                     extract_stop = gr.Button(translations["stop_extract"])
@@ -58,4 +58,4 @@ def settings_tab(app):
     with gr.Row():
         preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
         extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
-        train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])

             font_button = gr.Button(translations["change_font"])
     with gr.Row():
         with gr.Column():
+            with gr.Accordion(translations["stop"], open=True, visible=True):
                 separate_stop = gr.Button(translations["stop_separate"])
                 convert_stop = gr.Button(translations["stop_convert"])
                 create_dataset_stop = gr.Button(translations["stop_create_dataset"])
+                with gr.Accordion(translations["stop_training"], open=True):
                     model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
                     preprocess_stop = gr.Button(translations["stop_preprocess"])
                     extract_stop = gr.Button(translations["stop_extract"])
     with gr.Row():
         preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
         extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
+        train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])

main/app/tabs/inference/child/convert.py CHANGED Viewed

@@ -7,8 +7,38 @@ sys.path.append(os.getcwd())
 from main.app.core.presets import load_presets, save_presets
 from main.app.core.inference import convert_audio, convert_selection
-from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, presets_file, configs, file_types, export_format_choices, hybrid_f0_method
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, change_f0_choices, unlock_f0, change_preset_choices, change_backing_choices, hoplength_show, change_models_choices, get_index, index_strength_show, change_embedders_mode, shutil_move
 def convert_tab():
     with gr.Row():
@@ -17,225 +47,677 @@ def convert_tab():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
-                    cleaner0 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
-                    autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
-                    use_audio = gr.Checkbox(label=translations["use_audio"], value=False, interactive=True)
-                    checkpointing = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
                 with gr.Row():
-                    use_original = gr.Checkbox(label=translations["convert_original"], value=False, interactive=True, visible=use_audio.value)
-                    convert_backing = gr.Checkbox(label=translations["convert_backing"], value=False, interactive=True, visible=use_audio.value)
-                    not_merge_backing = gr.Checkbox(label=translations["not_merge_backing"], value=False, interactive=True, visible=use_audio.value)
-                    merge_instrument = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True, visible=use_audio.value)
             with gr.Row():
-                pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
-                clean_strength0 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner0.value)
-            with gr.Row():
                 with gr.Column():
-                    audio_select = gr.Dropdown(label=translations["select_separate"], choices=[], value="", interactive=True, allow_custom_value=True, visible=False)
-                    convert_button_2 = gr.Button(translations["convert_audio"], visible=False)
     with gr.Row():
         with gr.Column():
             convert_button = gr.Button(translations["convert_audio"], variant="primary")
     with gr.Row():
         with gr.Column():
-            input0 = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
         with gr.Column():
             with gr.Accordion(translations["model_accordion"], open=True):
                 with gr.Row():
-                    model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
-                    model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
                 with gr.Row():
                     refresh = gr.Button(translations["refresh"])
                 with gr.Row():
-                    index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
-            with gr.Accordion(translations["input_output"], open=False):
                 with gr.Column():
-                    export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                    input_audio0 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
-                    output_audio = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
                 with gr.Column():
                     refresh0 = gr.Button(translations["refresh"])
-            with gr.Accordion(translations["setting"], open=False):
-                with gr.Accordion(translations["f0_method"], open=False):
                     with gr.Group():
                         with gr.Row():
-                            onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
-                            unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
-                        method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
-                        hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method.value == "hybrid")
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
-                    alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["f0_file"], open=False):
-                    upload_f0_file = gr.File(label=translations["upload_f0"], file_types=[".txt"])
-                    f0_file_dropdown = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
                     refresh_f0_file = gr.Button(translations["refresh"])
-                with gr.Accordion(translations["hubert_model"], open=False):
-                    embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
-                    embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
-                    custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
-                with gr.Accordion(translations["use_presets"], open=False):
                     with gr.Row():
-                        presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
                     with gr.Row():
-                        load_click = gr.Button(translations["load_file"], variant="primary")
                         refresh_click = gr.Button(translations["refresh"])
                     with gr.Accordion(translations["export_file"], open=False):
                         with gr.Row():
                             with gr.Column():
                                 with gr.Group():
                                     with gr.Row():
-                                        cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
-                                        autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
-                                        pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
-                                        index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
-                                        resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
-                                        filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
-                                        rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
-                                        protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
-                                        split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
-                                        formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
                         with gr.Row():
                             with gr.Column():
-                                name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
-                                save_file_button = gr.Button(translations["export_file"])
                     with gr.Row():
-                        upload_presets = gr.Files(label=translations["upload_presets"], file_types=[".conversion.json"])
                 with gr.Column():
                     with gr.Group():
                         with gr.Row():
-                            split_audio = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
-                            formant_shifting = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
                         with gr.Row():
-                            proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
-                            audio_processing = gr.Checkbox(label=translations["audio_processing"], value=False, interactive=True)
-                    resample_sr = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
-                    proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
-                    f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
-                    filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
-                    rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
-                    protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
                 with gr.Row():
-                    formant_qfrency = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
-                    formant_timbre = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
     with gr.Row():
         gr.Markdown(translations["output_convert"])
     with gr.Row():
-        main_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["main_convert"])
-        backing_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_backing"], visible=convert_backing.value)
-        main_backing = gr.Audio(show_download_button=True, interactive=False, label=translations["main_or_backing"], visible=convert_backing.value)
     with gr.Row():
-        original_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_original"], visible=use_original.value)
-        vocal_instrument = gr.Audio(show_download_button=True, interactive=False, label=translations["voice_or_instruments"], visible=merge_instrument.value)
     with gr.Row():
-        upload_f0_file.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file], outputs=[f0_file_dropdown])
-        refresh_f0_file.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown])
-        unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[method])
     with gr.Row():
         load_click.click(
-            fn=load_presets,
             inputs=[
-                presets_name,
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
                 f0_autotune_strength,
-                formant_shifting,
-                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
-                proposal_pitch_threshold
-            ],
             outputs=[
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                formant_shifting,
-                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
-                proposal_pitch_threshold
-            ]
         )
         refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
         save_file_button.click(
-            fn=save_presets,
             inputs=[
-                name_to_save_file,
-                cleaner0,
-                autotune,
-                pitch,
-                clean_strength0,
-                index_strength,
-                resample_sr,
-                filter_radius,
-                rms_mix_rate,
-                protect,
-                split_audio,
-                f0_autotune_strength,
-                cleaner_chbox,
-                autotune_chbox,
-                pitch_chbox,
-                index_strength_chbox,
-                resample_sr_chbox,
-                filter_radius_chbox,
-                rms_mix_rate_chbox,
-                protect_chbox,
-                split_audio_chbox,
-                formant_shifting_chbox,
-                formant_shifting,
-                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
-                proposal_pitch_threshold
-            ],
-            outputs=[presets_name]
         )
     with gr.Row():
-        upload_presets.upload(fn=lambda presets_in: [shutil_move(preset.name, configs["presets_path"]) for preset in presets_in][0], inputs=[upload_presets], outputs=[presets_name])
         autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
-        use_audio.change(fn=lambda a: [visible(a), visible(a), visible(a), visible(a), visible(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), visible(not a), visible(not a), visible(not a), visible(not a)], inputs=[use_audio], outputs=[main_backing, use_original, convert_backing, not_merge_backing, merge_instrument, use_original, convert_backing, not_merge_backing, merge_instrument, input_audio0, output_audio, input0, play_audio])
     with gr.Row():
-        convert_backing.change(fn=lambda a,b: [change_backing_choices(a, b), visible(a)], inputs=[convert_backing, not_merge_backing], outputs=[use_original, backing_convert])
-        use_original.change(fn=lambda audio, original: [visible(original), visible(not original), visible(audio and not original), valueFalse_interactive(not original), valueFalse_interactive(not original)], inputs=[use_audio, use_original], outputs=[original_convert, main_convert, main_backing, convert_backing, not_merge_backing])
         cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
     with gr.Row():
-        merge_instrument.change(fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument])
-        not_merge_backing.change(fn=lambda audio, merge, cvb: [visible(audio and not merge), change_backing_choices(cvb, merge)], inputs=[use_audio, not_merge_backing, convert_backing], outputs=[main_backing, use_original])
-        method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method, hybrid_method], outputs=[hybrid_method, alpha, hop_length])
     with gr.Row():
-        hybrid_method.change(fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length])
-        refresh.click(fn=change_models_choices, inputs=[], outputs=[model_pth, model_index])
         model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
     with gr.Row():
-        input0.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[input0], outputs=[input_audio0])
-        input_audio0.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio0], outputs=[play_audio])
-        formant_shifting.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[formant_shifting], outputs=[formant_qfrency, formant_timbre])
     with gr.Row():
-        embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[custom_embedders])
-        refresh0.click(fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0])
-        model_index.change(fn=index_strength_show, inputs=[model_index], outputs=[index_strength])
     with gr.Row():
-        convert_button.click(fn=lambda: visible(False), inputs=[], outputs=[convert_button])
-        convert_button_2.click(fn=lambda: [visible(False), visible(False)], inputs=[], outputs=[audio_select, convert_button_2])
     with gr.Row():
-        proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
-        embed_mode.change(fn=change_embedders_mode, inputs=[embed_mode], outputs=[embedders])
     with gr.Row():
         convert_button.click(
             fn=convert_selection,
@@ -268,18 +750,27 @@ def convert_tab():
                 f0_autotune_strength,
                 checkpointing,
                 onnx_f0_mode,
-                formant_shifting,
-                formant_qfrency,
                 formant_timbre,
                 f0_file_dropdown,
                 embed_mode,
                 proposal_pitch,
                 proposal_pitch_threshold,
                 audio_processing,
-                alpha
             ],
-            outputs=[audio_select, main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button, convert_button_2],
-            api_name="convert_selection"
         )
         convert_button_2.click(
             fn=convert_audio,
@@ -313,16 +804,23 @@ def convert_tab():
                 audio_select,
                 checkpointing,
                 onnx_f0_mode,
-                formant_shifting,
-                formant_qfrency,
                 formant_timbre,
                 f0_file_dropdown,
                 embed_mode,
                 proposal_pitch,
                 proposal_pitch_threshold,
                 audio_processing,
-                alpha
             ],
-            outputs=[main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button],
-            api_name="convert_audio"
-        )

 from main.app.core.presets import load_presets, save_presets
 from main.app.core.inference import convert_audio, convert_selection
+from main.app.variables import (
+    translations,
+    paths_for_files,
+    sample_rate_choice,
+    model_name,
+    index_path,
+    method_f0,
+    f0_file,
+    embedders_mode,
+    embedders_model,
+    presets_file,
+    configs,
+    file_types,
+    export_format_choices,
+    hybrid_f0_method,
+)
+from main.app.core.ui import (
+    visible,
+    valueFalse_interactive,
+    change_audios_choices,
+    change_f0_choices,
+    unlock_f0,
+    change_preset_choices,
+    change_backing_choices,
+    hoplength_show,
+    change_models_choices,
+    get_index,
+    index_strength_show,
+    change_embedders_mode,
+    shutil_move,
+)
 def convert_tab():
     with gr.Row():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
+                    cleaner0 = gr.Checkbox(
+                        label=translations["clear_audio"], value=False, interactive=True
+                    )
+                    autotune = gr.Checkbox(
+                        label=translations["autotune"], value=False, interactive=True
+                    )
+                    use_audio = gr.Checkbox(
+                        label=translations["use_audio"], value=False, interactive=True
+                    )
+                    checkpointing = gr.Checkbox(
+                        label=translations["memory_efficient_training"],
+                        value=False,
+                        interactive=True,
+                    )
                 with gr.Row():
+                    use_original = gr.Checkbox(
+                        label=translations["convert_original"],
+                        value=False,
+                        interactive=True,
+                        visible=use_audio.value,
+                    )
+                    convert_backing = gr.Checkbox(
+                        label=translations["convert_backing"],
+                        value=False,
+                        interactive=True,
+                        visible=use_audio.value,
+                    )
+                    not_merge_backing = gr.Checkbox(
+                        label=translations["not_merge_backing"],
+                        value=False,
+                        interactive=True,
+                        visible=use_audio.value,
+                    )
+                    merge_instrument = gr.Checkbox(
+                        label=translations["merge_instruments"],
+                        value=False,
+                        interactive=True,
+                        visible=use_audio.value,
+                    )
+            with gr.Row():
+                pitch = gr.Slider(
+                    minimum=-20,
+                    maximum=20,
+                    step=1,
+                    info=translations["pitch_info"],
+                    label=translations["pitch"],
+                    value=0,
+                    interactive=True,
+                )
+                clean_strength0 = gr.Slider(
+                    label=translations["clean_strength"],
+                    info=translations["clean_strength_info"],
+                    minimum=0,
+                    maximum=1,
+                    value=0.5,
+                    step=0.1,
+                    interactive=True,
+                    visible=cleaner0.value,
+                )
             with gr.Row():
                 with gr.Column():
+                    audio_select = gr.Dropdown(
+                        label=translations["select_separate"],
+                        choices=[],
+                        value="",
+                        interactive=True,
+                        allow_custom_value=True,
+                        visible=False,
+                    )
+                    convert_button_2 = gr.Button(
+                        translations["convert_audio"], visible=False
+                    )
     with gr.Row():
         with gr.Column():
             convert_button = gr.Button(translations["convert_audio"], variant="primary")
     with gr.Row():
         with gr.Column():
+            input0 = gr.Files(label=translations["drop_audio"], file_types=file_types)
+            play_audio = gr.Audio(
+                show_download_button=True,
+                interactive=False,
+                label=translations["input_audio"],
+            )
         with gr.Column():
             with gr.Accordion(translations["model_accordion"], open=True):
                 with gr.Row():
+                    model_pth = gr.Dropdown(
+                        label=translations["model_name"],
+                        choices=model_name,
+                        value=model_name[0] if len(model_name) >= 1 else "",
+                        interactive=True,
+                        allow_custom_value=True,
+                    )
+                    model_index = gr.Dropdown(
+                        label=translations["index_path"],
+                        choices=index_path,
+                        value=index_path[0] if len(index_path) >= 1 else "",
+                        interactive=True,
+                        allow_custom_value=True,
+                    )
                 with gr.Row():
                     refresh = gr.Button(translations["refresh"])
                 with gr.Row():
+                    index_strength = gr.Slider(
+                        label=translations["index_strength"],
+                        info=translations["index_strength_info"],
+                        minimum=0,
+                        maximum=1,
+                        value=0.5,
+                        step=0.01,
+                        interactive=True,
+                        visible=model_index.value != "",
+                    )
+            with gr.Accordion(translations["input_output"], open=True):
                 with gr.Column():
+                    export_format = gr.Radio(
+                        label=translations["export_format"],
+                        info=translations["export_info"],
+                        choices=export_format_choices,
+                        value="wav",
+                        interactive=True,
+                    )
+                    input_audio0 = gr.Dropdown(
+                        label=translations["audio_path"],
+                        value="",
+                        choices=paths_for_files,
+                        info=translations["provide_audio"],
+                        allow_custom_value=True,
+                        interactive=True,
+                    )
+                    output_audio = gr.Textbox(
+                        label=translations["output_path"],
+                        value="audios/output.wav",
+                        placeholder="audios/output.wav",
+                        info=translations["output_path_info"],
+                        interactive=True,
+                        allow_custom_value=False,
+                    )
                 with gr.Column():
                     refresh0 = gr.Button(translations["refresh"])
+            with gr.Accordion(translations["setting"], open=True):
+                with gr.Accordion(translations["f0_method"], open=True):
                     with gr.Group():
                         with gr.Row():
+                            onnx_f0_mode = gr.Checkbox(
+                                label=translations["f0_onnx_mode"],
+                                info=translations["f0_onnx_mode_info"],
+                                value=False,
+                                interactive=True,
+                            )
+                            unlock_full_method = gr.Checkbox(
+                                label=translations["f0_unlock"],
+                                info=translations["f0_unlock_info"],
+                                value=False,
+                                interactive=True,
+                            )
+                        method = gr.Radio(
+                            label=translations["f0_method"],
+                            info=translations["f0_method_info"],
+                            choices=method_f0,
+                            value="rmvpe",
+                            interactive=True,
+                        )
+                        hybrid_method = gr.Dropdown(
+                            label=translations["f0_method_hybrid"],
+                            info=translations["f0_method_hybrid_info"],
+                            choices=hybrid_f0_method,
+                            value=hybrid_f0_method[0],
+                            interactive=True,
+                            allow_custom_value=True,
+                            visible=method.value == "hybrid",
+                        )
+                    hop_length = gr.Slider(
+                        label=translations["hop_length"],
+                        info=translations["hop_length_info"],
+                        minimum=64,
+                        maximum=512,
+                        value=160,
+                        step=1,
+                        interactive=True,
+                        visible=False,
+                    )
+                    alpha = gr.Slider(
+                        label=translations["alpha_label"],
+                        info=translations["alpha_info"],
+                        minimum=0.1,
+                        maximum=1,
+                        value=0.5,
+                        step=0.1,
+                        interactive=True,
+                        visible=False,
+                    )
+                with gr.Accordion(translations["f0_file"], open=True):
+                    upload_f0_file = gr.File(
+                        label=translations["upload_f0"], file_types=[".txt"]
+                    )
+                    f0_file_dropdown = gr.Dropdown(
+                        label=translations["f0_file_2"],
+                        value="",
+                        choices=f0_file,
+                        allow_custom_value=True,
+                        interactive=True,
+                    )
                     refresh_f0_file = gr.Button(translations["refresh"])
+                with gr.Accordion(translations["hubert_model"], open=True):
+                    embed_mode = gr.Radio(
+                        label=translations["embed_mode"],
+                        info=translations["embed_mode_info"],
+                        value="fairseq",
+                        choices=embedders_mode,
+                        interactive=True,
+                        visible=True,
+                    )
+                    embedders = gr.Radio(
+                        label=translations["hubert_model"],
+                        info=translations["hubert_info"],
+                        choices=embedders_model,
+                        value="hubert_base",
+                        interactive=True,
+                    )
+                    custom_embedders = gr.Textbox(
+                        label=translations["modelname"],
+                        info=translations["modelname_info"],
+                        value="",
+                        placeholder="hubert_base",
+                        interactive=True,
+                        visible=embedders.value == "custom",
+                    )
+                with gr.Accordion(translations["use_presets"], open=True):
                     with gr.Row():
+                        presets_name = gr.Dropdown(
+                            label=translations["file_preset"],
+                            choices=presets_file,
+                            value=presets_file[0] if len(presets_file) > 0 else "",
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
                     with gr.Row():
+                        load_click = gr.Button(
+                            translations["load_file"], variant="primary"
+                        )
                         refresh_click = gr.Button(translations["refresh"])
                     with gr.Accordion(translations["export_file"], open=False):
                         with gr.Row():
                             with gr.Column():
                                 with gr.Group():
                                     with gr.Row():
+                                        cleaner_chbox = gr.Checkbox(
+                                            label=translations["save_clean"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        autotune_chbox = gr.Checkbox(
+                                            label=translations["save_autotune"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        pitch_chbox = gr.Checkbox(
+                                            label=translations["save_pitch"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        index_strength_chbox = gr.Checkbox(
+                                            label=translations["save_index_2"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        resample_sr_chbox = gr.Checkbox(
+                                            label=translations["save_resample"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        filter_radius_chbox = gr.Checkbox(
+                                            label=translations["save_filter"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        rms_mix_rate_chbox = gr.Checkbox(
+                                            label=translations["save_envelope"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        protect_chbox = gr.Checkbox(
+                                            label=translations["save_protect"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        split_audio_chbox = gr.Checkbox(
+                                            label=translations["save_split"],
+                                            value=True,
+                                            interactive=True,
+                                        )
+                                        formant_shifting_chbox = gr.Checkbox(
+                                            label=translations["formantshift"],
+                                            value=True,
+                                            interactive=True,
+                                        )
                         with gr.Row():
                             with gr.Column():
+                                name_to_save_file = gr.Textbox(
+                                    label=translations["filename_to_save"]
+                                )
+                                save_file_button = gr.Button(
+                                    translations["export_file"]
+                                )
                     with gr.Row():
+                        upload_presets = gr.Files(
+                            label=translations["upload_presets"],
+                            file_types=[".conversion.json"],
+                        )
                 with gr.Column():
                     with gr.Group():
                         with gr.Row():
+                            split_audio = gr.Checkbox(
+                                label=translations["split_audio"],
+                                value=False,
+                                interactive=True,
+                            )
+                            formant_shifting = gr.Checkbox(
+                                label=translations["formantshift"],
+                                value=False,
+                                interactive=True,
+                            )
                         with gr.Row():
+                            proposal_pitch = gr.Checkbox(
+                                label=translations["proposal_pitch"],
+                                value=False,
+                                interactive=True,
+                            )
+                            audio_processing = gr.Checkbox(
+                                label=translations["audio_processing"],
+                                value=False,
+                                interactive=True,
+                            )
+                    resample_sr = gr.Radio(
+                        choices=[0] + sample_rate_choice,
+                        label=translations["resample"],
+                        info=translations["resample_info"],
+                        value=0,
+                        interactive=True,
+                    )
+                    proposal_pitch_threshold = gr.Slider(
+                        minimum=50.0,
+                        maximum=1200.0,
+                        label=translations["proposal_pitch_threshold"],
+                        info=translations["proposal_pitch_threshold_info"],
+                        value=255.0,
+                        step=0.1,
+                        interactive=True,
+                        visible=proposal_pitch.value,
+                    )
+                    f0_autotune_strength = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=translations["autotune_rate"],
+                        info=translations["autotune_rate_info"],
+                        value=1,
+                        step=0.1,
+                        interactive=True,
+                        visible=autotune.value,
+                    )
+                    filter_radius = gr.Slider(
+                        minimum=0,
+                        maximum=7,
+                        label=translations["filter_radius"],
+                        info=translations["filter_radius_info"],
+                        value=3,
+                        step=1,
+                        interactive=True,
+                    )
+                    rms_mix_rate = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=translations["rms_mix_rate"],
+                        info=translations["rms_mix_rate_info"],
+                        value=1,
+                        step=0.1,
+                        interactive=True,
+                    )
+                    protect = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=translations["protect"],
+                        info=translations["protect_info"],
+                        value=0.5,
+                        step=0.01,
+                        interactive=True,
+                    )
                 with gr.Row():
+                    formant_qfrency = gr.Slider(
+                        value=1.0,
+                        label=translations["formant_qfrency"],
+                        info=translations["formant_qfrency"],
+                        minimum=0.0,
+                        maximum=16.0,
+                        step=0.1,
+                        interactive=True,
+                        visible=False,
+                    )
+                    formant_timbre = gr.Slider(
+                        value=1.0,
+                        label=translations["formant_timbre"],
+                        info=translations["formant_timbre"],
+                        minimum=0.0,
+                        maximum=16.0,
+                        step=0.1,
+                        interactive=True,
+                        visible=False,
+                    )
     with gr.Row():
         gr.Markdown(translations["output_convert"])
     with gr.Row():
+        main_convert = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["main_convert"],
+        )
+        backing_convert = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["convert_backing"],
+            visible=convert_backing.value,
+        )
+        main_backing = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["main_or_backing"],
+            visible=convert_backing.value,
+        )
     with gr.Row():
+        original_convert = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["convert_original"],
+            visible=use_original.value,
+        )
+        vocal_instrument = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["voice_or_instruments"],
+            visible=merge_instrument.value,
+        )
     with gr.Row():
+        upload_f0_file.upload(
+            fn=lambda inp: shutil_move(inp.name, configs["f0_path"]),
+            inputs=[upload_f0_file],
+            outputs=[f0_file_dropdown],
+        )
+        refresh_f0_file.click(
+            fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown]
+        )
+        unlock_full_method.change(
+            fn=unlock_f0, inputs=[unlock_full_method], outputs=[method]
+        )
     with gr.Row():
         load_click.click(
+            fn=load_presets,
             inputs=[
+                presets_name,
+                cleaner0,
+                autotune,
+                pitch,
+                clean_strength0,
+                index_strength,
+                resample_sr,
+                filter_radius,
+                rms_mix_rate,
+                protect,
+                split_audio,
                 f0_autotune_strength,
+                formant_shifting,
+                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
+                proposal_pitch_threshold,
+            ],
             outputs=[
+                cleaner0,
+                autotune,
+                pitch,
+                clean_strength0,
+                index_strength,
+                resample_sr,
+                filter_radius,
+                rms_mix_rate,
+                protect,
+                split_audio,
+                f0_autotune_strength,
+                formant_shifting,
+                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
+                proposal_pitch_threshold,
+            ],
         )
         refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
         save_file_button.click(
+            fn=save_presets,
             inputs=[
+                name_to_save_file,
+                cleaner0,
+                autotune,
+                pitch,
+                clean_strength0,
+                index_strength,
+                resample_sr,
+                filter_radius,
+                rms_mix_rate,
+                protect,
+                split_audio,
+                f0_autotune_strength,
+                cleaner_chbox,
+                autotune_chbox,
+                pitch_chbox,
+                index_strength_chbox,
+                resample_sr_chbox,
+                filter_radius_chbox,
+                rms_mix_rate_chbox,
+                protect_chbox,
+                split_audio_chbox,
+                formant_shifting_chbox,
+                formant_shifting,
+                formant_qfrency,
                 formant_timbre,
                 proposal_pitch,
+                proposal_pitch_threshold,
+            ],
+            outputs=[presets_name],
         )
     with gr.Row():
+        upload_presets.upload(
+            fn=lambda presets_in: [
+                shutil_move(preset.name, configs["presets_path"])
+                for preset in presets_in
+            ][0],
+            inputs=[upload_presets],
+            outputs=[presets_name],
+        )
         autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
+        use_audio.change(
+            fn=lambda a: [
+                visible(a),
+                visible(a),
+                visible(a),
+                visible(a),
+                visible(a),
+                valueFalse_interactive(a),
+                valueFalse_interactive(a),
+                valueFalse_interactive(a),
+                valueFalse_interactive(a),
+                visible(not a),
+                visible(not a),
+                visible(not a),
+                visible(not a),
+            ],
+            inputs=[use_audio],
+            outputs=[
+                main_backing,
+                use_original,
+                convert_backing,
+                not_merge_backing,
+                merge_instrument,
+                use_original,
+                convert_backing,
+                not_merge_backing,
+                merge_instrument,
+                input_audio0,
+                output_audio,
+                input0,
+                play_audio,
+            ],
+        )
     with gr.Row():
+        convert_backing.change(
+            fn=lambda a, b: [change_backing_choices(a, b), visible(a)],
+            inputs=[convert_backing, not_merge_backing],
+            outputs=[use_original, backing_convert],
+        )
+        use_original.change(
+            fn=lambda audio, original: [
+                visible(original),
+                visible(not original),
+                visible(audio and not original),
+                valueFalse_interactive(not original),
+                valueFalse_interactive(not original),
+            ],
+            inputs=[use_audio, use_original],
+            outputs=[
+                original_convert,
+                main_convert,
+                main_backing,
+                convert_backing,
+                not_merge_backing,
+            ],
+        )
         cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
     with gr.Row():
+        merge_instrument.change(
+            fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument]
+        )
+        not_merge_backing.change(
+            fn=lambda audio, merge, cvb: [
+                visible(audio and not merge),
+                change_backing_choices(cvb, merge),
+            ],
+            inputs=[use_audio, not_merge_backing, convert_backing],
+            outputs=[main_backing, use_original],
+        )
+        method.change(
+            fn=lambda method, hybrid: [
+                visible(method == "hybrid"),
+                visible(method == "hybrid"),
+                hoplength_show(method, hybrid),
+            ],
+            inputs=[method, hybrid_method],
+            outputs=[hybrid_method, alpha, hop_length],
+        )
     with gr.Row():
+        hybrid_method.change(
+            fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length]
+        )
+        refresh.click(
+            fn=change_models_choices, inputs=[], outputs=[model_pth, model_index]
+        )
         model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
     with gr.Row():
+        input0.upload(
+            fn=lambda audio_in: [
+                shutil_move(audio.name, configs["audios_path"]) for audio in audio_in
+            ][0],
+            inputs=[input0],
+            outputs=[input_audio0],
+        )
+        input_audio0.change(
+            fn=lambda audio: audio if os.path.isfile(audio) else None,
+            inputs=[input_audio0],
+            outputs=[play_audio],
+        )
+        formant_shifting.change(
+            fn=lambda a: [visible(a) for _ in range(2)],
+            inputs=[formant_shifting],
+            outputs=[formant_qfrency, formant_timbre],
+        )
     with gr.Row():
+        embedders.change(
+            fn=lambda embedders: visible(embedders == "custom"),
+            inputs=[embedders],
+            outputs=[custom_embedders],
+        )
+        refresh0.click(
+            fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0]
+        )
+        model_index.change(
+            fn=index_strength_show, inputs=[model_index], outputs=[index_strength]
+        )
     with gr.Row():
+        convert_button.click(
+            fn=lambda: visible(False), inputs=[], outputs=[convert_button]
+        )
+        convert_button_2.click(
+            fn=lambda: [visible(False), visible(False)],
+            inputs=[],
+            outputs=[audio_select, convert_button_2],
+        )
     with gr.Row():
+        proposal_pitch.change(
+            fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold]
+        )
+        embed_mode.change(
+            fn=change_embedders_mode, inputs=[embed_mode], outputs=[embedders]
+        )
     with gr.Row():
         convert_button.click(
             fn=convert_selection,
                 f0_autotune_strength,
                 checkpointing,
                 onnx_f0_mode,
+                formant_shifting,
+                formant_qfrency,
                 formant_timbre,
                 f0_file_dropdown,
                 embed_mode,
                 proposal_pitch,
                 proposal_pitch_threshold,
                 audio_processing,
+                alpha,
             ],
+            outputs=[
+                audio_select,
+                main_convert,
+                backing_convert,
+                main_backing,
+                original_convert,
+                vocal_instrument,
+                convert_button,
+                convert_button_2,
+            ],
+            api_name="convert_selection",
         )
         convert_button_2.click(
             fn=convert_audio,
                 audio_select,
                 checkpointing,
                 onnx_f0_mode,
+                formant_shifting,
+                formant_qfrency,
                 formant_timbre,
                 f0_file_dropdown,
                 embed_mode,
                 proposal_pitch,
                 proposal_pitch_threshold,
                 audio_processing,
+                alpha,
             ],
+            outputs=[
+                main_convert,
+                backing_convert,
+                main_backing,
+                original_convert,
+                vocal_instrument,
+                convert_button,
+            ],
+            api_name="convert_audio",
+        )

main/app/tabs/inference/child/convert_tts.py CHANGED Viewed

@@ -34,7 +34,7 @@ def convert_tts_tab():
             txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
             tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
             tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
-            with gr.Accordion(translations["translate"], open=False):
                 with gr.Row():
                     source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
                     target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
@@ -48,12 +48,12 @@ def convert_tts_tab():
                     refresh1 = gr.Button(translations["refresh"])
                 with gr.Row():
                     index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
-            with gr.Accordion(translations["output_path"], open=False):
                 export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
                 output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
                 output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
-            with gr.Accordion(translations["setting"], open=False):
-                with gr.Accordion(translations["f0_method"], open=False):
                     with gr.Group():
                         with gr.Row():
                             onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
@@ -62,21 +62,21 @@ def convert_tts_tab():
                         hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
                     hop_length0 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                     alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["f0_file"], open=False):
                     upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
                     f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
                     refresh_f0_file0 = gr.Button(translations["refresh"])
-                with gr.Accordion(translations["hubert_model"], open=False):
                     embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                     embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
                     custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
-                with gr.Accordion(translations["use_presets"], open=False):
                     with gr.Row():
                         presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
                     with gr.Row():
                         load_click = gr.Button(translations["load_file"], variant="primary")
                         refresh_click = gr.Button(translations["refresh"])
-                    with gr.Accordion(translations["export_file"], open=False):
                         with gr.Row():
                             with gr.Column():
                                 with gr.Group():
@@ -277,4 +277,4 @@ def convert_tts_tab():
             ],
             outputs=[tts_voice_convert],
             api_name="convert_tts"
-        )

             txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
             tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
             tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
+            with gr.Accordion(translations["translate"], open=True):
                 with gr.Row():
                     source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
                     target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
                     refresh1 = gr.Button(translations["refresh"])
                 with gr.Row():
                     index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
+            with gr.Accordion(translations["output_path"], open=True):
                 export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
                 output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
                 output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
+            with gr.Accordion(translations["setting"], open=True):
+                with gr.Accordion(translations["f0_method"], open=True):
                     with gr.Group():
                         with gr.Row():
                             onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
                         hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
                     hop_length0 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                     alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
+                with gr.Accordion(translations["f0_file"], open=True):
                     upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
                     f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
                     refresh_f0_file0 = gr.Button(translations["refresh"])
+                with gr.Accordion(translations["hubert_model"], open=True):
                     embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                     embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
                     custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
+                with gr.Accordion(translations["use_presets"], open=True):
                     with gr.Row():
                         presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
                     with gr.Row():
                         load_click = gr.Button(translations["load_file"], variant="primary")
                         refresh_click = gr.Button(translations["refresh"])
+                    with gr.Accordion(translations["export_file"], open=True):
                         with gr.Row():
                             with gr.Column():
                                 with gr.Group():
             ],
             outputs=[tts_voice_convert],
             api_name="convert_tts"
+        )

main/app/tabs/inference/child/convert_with_whisper.py CHANGED Viewed

@@ -38,7 +38,7 @@ def convert_with_whisper_tab():
                 with gr.Row():
                     pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
                     index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
-            with gr.Accordion(translations["input_output"], open=False):
                 with gr.Column():
                     export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
                     input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
@@ -57,10 +57,10 @@ def convert_with_whisper_tab():
                 with gr.Row():
                     pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
                     index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
-            with gr.Accordion(translations["setting"], open=False):
                 with gr.Row():
                     model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=whisper_model, value="medium", interactive=True)
-                with gr.Accordion(translations["f0_method"], open=False):
                     with gr.Group():
                         with gr.Row():
                             onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
@@ -69,7 +69,7 @@ def convert_with_whisper_tab():
                         hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
                     hop_length3 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                     alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                with gr.Accordion(translations["hubert_model"], open=False):
                     embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                     embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
                     custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
@@ -161,4 +161,4 @@ def convert_with_whisper_tab():
             ],
             outputs=[play_audio3],
             api_name="convert_with_whisper"
-        )

                 with gr.Row():
                     pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
                     index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
+            with gr.Accordion(translations["input_output"], open=True):
                 with gr.Column():
                     export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
                     input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
                 with gr.Row():
                     pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
                     index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
+            with gr.Accordion(translations["setting"], open=True):
                 with gr.Row():
                     model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=whisper_model, value="medium", interactive=True)
+                with gr.Accordion(translations["f0_method"], open=True):
                     with gr.Group():
                         with gr.Row():
                             onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
                         hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=hybrid_f0_method, value=hybrid_f0_method[0], interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
                     hop_length3 = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                     alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
+                with gr.Accordion(translations["hubert_model"], open=True):
                     embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                     embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
                     custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
             ],
             outputs=[play_audio3],
             api_name="convert_with_whisper"
+        )

main/app/tabs/inference/child/separate.py CHANGED Viewed

@@ -7,224 +7,498 @@ sys.path.append(os.getcwd())
 from main.app.core.downloads import download_url
 from main.app.core.separate import separate_music
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, shutil_move, separate_change
-from main.app.variables import translations, uvr_model, karaoke_models, reverb_models, vr_models, denoise_models, mdx_models, paths_for_files, sample_rate_choice, configs, file_types, export_format_choices
 def separate_tab():
-    with gr.Row():
         gr.Markdown(translations["4_part"])
     with gr.Row():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
-                    enable_denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False)
-                    separate_backing = gr.Checkbox(label=translations["separator_backing"], value=False, interactive=True)
-                    separate_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True)
-                    enable_tta = gr.Checkbox(label=translations["enable_tta"], value=False, interactive=False)
-                    high_end_process = gr.Checkbox(label=translations["high_end_process"], value=False, interactive=False)
-                    enable_post_process = gr.Checkbox(label=translations["enable_post_process"], value=False, interactive=False)
                 with gr.Row():
-                    model_name = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
-                    karaoke_model = gr.Dropdown(label=translations["separator_backing_model"], value=list(karaoke_models.keys())[0], choices=list(karaoke_models.keys()), interactive=True, visible=separate_backing.value)
-                    reverb_model = gr.Dropdown(label=translations["dereveb_model"], value=list(reverb_models.keys())[0], choices=list(reverb_models.keys()), interactive=True, visible=separate_reverb.value)
-                    denoise_model = gr.Dropdown(label=translations["denoise_model"], value=list(denoise_models.keys())[0], choices=list(denoise_models.keys()), interactive=True, visible=enable_denoise.value and model_name.value in list(vr_models.keys()))
     with gr.Row():
         with gr.Column():
-            separate_button = gr.Button(translations["separator_tab"], variant="primary")
     with gr.Row():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
-                    shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
-                    batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=False)
                 with gr.Row():
-                    segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
-                    aggression = gr.Slider(label=translations['aggression'], info=translations["aggression_info"], minimum=1, maximum=50, value=5, step=1, interactive=True, visible=False)
-            drop_audio = gr.Files(label=translations["drop_audio"], file_types=file_types)
-            with gr.Accordion(translations["use_url"], open=False):
-                url = gr.Textbox(label=translations["url_audio"], value="", placeholder="https://www.youtube.com/...", scale=6)
                 download_button = gr.Button(translations["downloads"])
         with gr.Column():
             with gr.Group():
                 with gr.Row():
-                    overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
                 with gr.Row():
-                    window_size = gr.Slider(label=translations["window_size"], info=translations["window_size_info"], minimum=320, maximum=1024, value=512, step=32, interactive=True, visible=False)
-                    hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=8192, value=1024, step=1, interactive=True, visible=False)
-                    post_process_threshold = gr.Slider(label=translations['post_process_threshold'], info=translations["post_process_threshold_info"], minimum=0.1, maximum=0.3, value=0.2, step=0.1, interactive=True, visible=False)
-            sample_rate = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
-            with gr.Accordion(translations["input_output"], open=False):
-                export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=export_format_choices, value="wav", interactive=True)
-                input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
                 refresh_audio = gr.Button(translations["refresh"])
-                output_dirs = gr.Textbox(label=translations["output_folder"], value="audios", placeholder="audios", info=translations["output_folder_info"], interactive=True)
-            audio_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
     with gr.Row():
         gr.Markdown(translations["output_separator"])
     with gr.Row():
-        instruments_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["instruments"])
-        original_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["original_vocal"])
-        main_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["main_vocal"], visible=separate_backing.value)
-        backing_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["backing_vocal"], visible=separate_backing.value)
     with gr.Row():
-        model_name.change(fn=lambda a: valueFalse_interactive(a in list(mdx_models.keys()) + list(vr_models.keys())), inputs=[model_name], outputs=[enable_denoise])
-        separate_backing.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
-        separate_reverb.change(fn=lambda a, b: valueFalse_interactive(a or b), inputs=[separate_backing, separate_reverb], outputs=[enable_denoise])
     with gr.Row():
-        input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[audio_input])
-        drop_audio.upload(fn=lambda audio_in: [shutil_move(audio.name, configs["audios_path"]) for audio in audio_in][0], inputs=[drop_audio], outputs=[input_audio])
-        refresh_audio.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
     with gr.Row():
-        separate_backing.change(fn=lambda a: [visible(a) for _ in range(2)], inputs=[separate_backing], outputs=[main_vocals, backing_vocals])
         download_button.click(
-            fn=download_url,
-            inputs=[url],
             outputs=[input_audio, audio_input, url],
-            api_name='download_url'
         )
     with gr.Row():
         model_name.change(
             fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
     with gr.Row():
         karaoke_model.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
         separate_backing.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
     with gr.Row():
         reverb_model.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
         separate_reverb.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
     with gr.Row():
         enable_denoise.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
         enable_post_process.change(
-            fn=separate_change,
-            inputs=[model_name, karaoke_model, reverb_model, enable_post_process, separate_backing, separate_reverb, enable_denoise],
             outputs=[
                 karaoke_model,
                 reverb_model,
-                overlap,
-                segments_size,
-                hop_length,
                 batch_size,
-                shifts,
-                window_size,
-                aggression,
                 post_process_threshold,
                 denoise_model,
-                enable_tta,
-                high_end_process,
                 enable_post_process,
-            ]
         )
     with gr.Row():
         separate_button.click(
@@ -232,32 +506,27 @@ def separate_tab():
             inputs=[
                 input_audio,
                 output_dirs,
-                export_format,
-                model_name,
                 karaoke_model,
                 reverb_model,
                 denoise_model,
                 sample_rate,
-                shifts,
-                batch_size,
-                overlap,
                 aggression,
-                hop_length,
                 window_size,
-                segments_size,
                 post_process_threshold,
                 enable_tta,
-                enable_denoise,
                 high_end_process,
                 enable_post_process,
                 separate_backing,
-                separate_reverb
             ],
-            outputs=[
-                original_vocals,
-                instruments_audio,
-                main_vocals,
-                backing_vocals
-            ],
-            api_name="separate_music"
-        )

 from main.app.core.downloads import download_url
 from main.app.core.separate import separate_music
+from main.app.core.ui import (
+    visible,
+    valueFalse_interactive,
+    change_audios_choices,
+    shutil_move,
+    separate_change,
+)
+from main.app.variables import (
+    translations,
+    uvr_model,
+    karaoke_models,
+    reverb_models,
+    vr_models,
+    denoise_models,
+    mdx_models,
+    paths_for_files,
+    sample_rate_choice,
+    configs,
+    file_types,
+    export_format_choices,
+)
 def separate_tab():
+    with gr.Row():
         gr.Markdown(translations["4_part"])
     with gr.Row():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
+                    enable_denoise = gr.Checkbox(
+                        label=translations["denoise_mdx"],
+                        value=False,
+                        interactive=False,
+                    )
+                    separate_backing = gr.Checkbox(
+                        label=translations["separator_backing"],
+                        value=False,
+                        interactive=True,
+                    )
+                    separate_reverb = gr.Checkbox(
+                        label=translations["dereveb_audio"],
+                        value=False,
+                        interactive=True,
+                    )
+                    enable_tta = gr.Checkbox(
+                        label=translations["enable_tta"], value=False, interactive=False
+                    )
+                    high_end_process = gr.Checkbox(
+                        label=translations["high_end_process"],
+                        value=False,
+                        interactive=False,
+                    )
+                    enable_post_process = gr.Checkbox(
+                        label=translations["enable_post_process"],
+                        value=False,
+                        interactive=False,
+                    )
                 with gr.Row():
+                    model_name = gr.Dropdown(
+                        label=translations["separator_model"],
+                        value=uvr_model[0],
+                        choices=uvr_model,
+                        interactive=True,
+                    )
+                    karaoke_model = gr.Dropdown(
+                        label=translations["separator_backing_model"],
+                        value=list(karaoke_models.keys())[0],
+                        choices=list(karaoke_models.keys()),
+                        interactive=True,
+                        visible=separate_backing.value,
+                    )
+                    reverb_model = gr.Dropdown(
+                        label=translations["dereveb_model"],
+                        value=list(reverb_models.keys())[0],
+                        choices=list(reverb_models.keys()),
+                        interactive=True,
+                        visible=separate_reverb.value,
+                    )
+                    denoise_model = gr.Dropdown(
+                        label=translations["denoise_model"],
+                        value=list(denoise_models.keys())[0],
+                        choices=list(denoise_models.keys()),
+                        interactive=True,
+                        visible=enable_denoise.value
+                        and model_name.value in list(vr_models.keys()),
+                    )
     with gr.Row():
         with gr.Column():
+            separate_button = gr.Button(
+                translations["separator_tab"], variant="primary"
+            )
     with gr.Row():
         with gr.Column():
             with gr.Group():
                 with gr.Row():
+                    shifts = gr.Slider(
+                        label=translations["shift"],
+                        info=translations["shift_info"],
+                        minimum=1,
+                        maximum=20,
+                        value=2,
+                        step=1,
+                        interactive=True,
+                    )
+                    batch_size = gr.Slider(
+                        label=translations["batch_size"],
+                        info=translations["mdx_batch_size_info"],
+                        minimum=1,
+                        maximum=64,
+                        value=1,
+                        step=1,
+                        interactive=True,
+                        visible=False,
+                    )
                 with gr.Row():
+                    segments_size = gr.Slider(
+                        label=translations["segments_size"],
+                        info=translations["segments_size_info"],
+                        minimum=32,
+                        maximum=3072,
+                        value=256,
+                        step=32,
+                        interactive=True,
+                    )
+                    aggression = gr.Slider(
+                        label=translations["aggression"],
+                        info=translations["aggression_info"],
+                        minimum=1,
+                        maximum=50,
+                        value=5,
+                        step=1,
+                        interactive=True,
+                        visible=False,
+                    )
+            drop_audio = gr.Files(
+                label=translations["drop_audio"], file_types=file_types
+            )
+            with gr.Accordion(translations["use_url"], open=True):
+                url = gr.Textbox(
+                    label=translations["url_audio"],
+                    value="",
+                    placeholder="https://www.youtube.com/...",
+                    scale=6,
+                )
                 download_button = gr.Button(translations["downloads"])
         with gr.Column():
             with gr.Group():
                 with gr.Row():
+                    overlap = gr.Radio(
+                        label=translations["overlap"],
+                        info=translations["overlap_info"],
+                        choices=["0.25", "0.5", "0.75", "0.99"],
+                        value="0.25",
+                        interactive=True,
+                    )
                 with gr.Row():
+                    window_size = gr.Slider(
+                        label=translations["window_size"],
+                        info=translations["window_size_info"],
+                        minimum=320,
+                        maximum=1024,
+                        value=512,
+                        step=32,
+                        interactive=True,
+                        visible=False,
+                    )
+                    hop_length = gr.Slider(
+                        label=translations["hop_length"],
+                        info=translations["hop_length_info"],
+                        minimum=64,
+                        maximum=8192,
+                        value=1024,
+                        step=1,
+                        interactive=True,
+                        visible=False,
+                    )
+                    post_process_threshold = gr.Slider(
+                        label=translations["post_process_threshold"],
+                        info=translations["post_process_threshold_info"],
+                        minimum=0.1,
+                        maximum=0.3,
+                        value=0.2,
+                        step=0.1,
+                        interactive=True,
+                        visible=False,
+                    )
+            sample_rate = gr.Radio(
+                choices=sample_rate_choice,
+                value=44100,
+                label=translations["sr"],
+                info=translations["sr_info"],
+                interactive=True,
+            )
+            with gr.Accordion(translations["input_output"], open=True):
+                export_format = gr.Radio(
+                    label=translations["export_format"],
+                    info=translations["export_info"],
+                    choices=export_format_choices,
+                    value="wav",
+                    interactive=True,
+                )
+                input_audio = gr.Dropdown(
+                    label=translations["audio_path"],
+                    value="",
+                    choices=paths_for_files,
+                    allow_custom_value=True,
+                    interactive=True,
+                )
                 refresh_audio = gr.Button(translations["refresh"])
+                output_dirs = gr.Textbox(
+                    label=translations["output_folder"],
+                    value="audios",
+                    placeholder="audios",
+                    info=translations["output_folder_info"],
+                    interactive=True,
+                )
+            audio_input = gr.Audio(
+                show_download_button=True,
+                interactive=False,
+                label=translations["input_audio"],
+            )
     with gr.Row():
         gr.Markdown(translations["output_separator"])
     with gr.Row():
+        instruments_audio = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["instruments"],
+        )
+        original_vocals = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["original_vocal"],
+        )
+        main_vocals = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["main_vocal"],
+            visible=separate_backing.value,
+        )
+        backing_vocals = gr.Audio(
+            show_download_button=True,
+            interactive=False,
+            label=translations["backing_vocal"],
+            visible=separate_backing.value,
+        )
     with gr.Row():
+        model_name.change(
+            fn=lambda a: valueFalse_interactive(
+                a in list(mdx_models.keys()) + list(vr_models.keys())
+            ),
+            inputs=[model_name],
+            outputs=[enable_denoise],
+        )
+        separate_backing.change(
+            fn=lambda a, b: valueFalse_interactive(a or b),
+            inputs=[separate_backing, separate_reverb],
+            outputs=[enable_denoise],
+        )
+        separate_reverb.change(
+            fn=lambda a, b: valueFalse_interactive(a or b),
+            inputs=[separate_backing, separate_reverb],
+            outputs=[enable_denoise],
+        )
     with gr.Row():
+        input_audio.change(
+            fn=lambda audio: audio if os.path.isfile(audio) else None,
+            inputs=[input_audio],
+            outputs=[audio_input],
+        )
+        drop_audio.upload(
+            fn=lambda audio_in: [
+                shutil_move(audio.name, configs["audios_path"]) for audio in audio_in
+            ][0],
+            inputs=[drop_audio],
+            outputs=[input_audio],
+        )
+        refresh_audio.click(
+            fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio]
+        )
     with gr.Row():
+        separate_backing.change(
+            fn=lambda a: [visible(a) for _ in range(2)],
+            inputs=[separate_backing],
+            outputs=[main_vocals, backing_vocals],
+        )
         download_button.click(
+            fn=download_url,
+            inputs=[url],
             outputs=[input_audio, audio_input, url],
+            api_name="download_url",
         )
     with gr.Row():
         model_name.change(
             fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
     with gr.Row():
         karaoke_model.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
         separate_backing.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
     with gr.Row():
         reverb_model.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
         separate_reverb.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
     with gr.Row():
         enable_denoise.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
         enable_post_process.change(
+            fn=separate_change,
+            inputs=[
+                model_name,
+                karaoke_model,
+                reverb_model,
+                enable_post_process,
+                separate_backing,
+                separate_reverb,
+                enable_denoise,
+            ],
             outputs=[
                 karaoke_model,
                 reverb_model,
+                overlap,
+                segments_size,
+                hop_length,
                 batch_size,
+                shifts,
+                window_size,
+                aggression,
                 post_process_threshold,
                 denoise_model,
+                enable_tta,
+                high_end_process,
                 enable_post_process,
+            ],
         )
     with gr.Row():
         separate_button.click(
             inputs=[
                 input_audio,
                 output_dirs,
+                export_format,
+                model_name,
                 karaoke_model,
                 reverb_model,
                 denoise_model,
                 sample_rate,
+                shifts,
+                batch_size,
+                overlap,
                 aggression,
+                hop_length,
                 window_size,
+                segments_size,
                 post_process_threshold,
                 enable_tta,
+                enable_denoise,
                 high_end_process,
                 enable_post_process,
                 separate_backing,
+                separate_reverb,
             ],
+            outputs=[original_vocals, instruments_audio, main_vocals, backing_vocals],
+            api_name="separate_music",
+        )

main/app/tabs/training/child/create_reference.py CHANGED Viewed

@@ -27,7 +27,7 @@ def create_reference_tab():
         f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
     with gr.Row():
         with gr.Column():
-            with gr.Accordion(translations["input_output"], open=False):
                 with gr.Column():
                     input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
                     reference_name = gr.Textbox(label=translations["reference_name"], value="reference", placeholder="reference", info=translations["reference_name_info"], interactive=True)
@@ -38,7 +38,7 @@ def create_reference_tab():
                 with gr.Column():
                     play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
         with gr.Column() as f0_method_column:
-            with gr.Accordion(label=translations["f0_method"], open=False):
                 with gr.Group():
                     with gr.Row():
                         onnx_f0 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
@@ -48,7 +48,7 @@ def create_reference_tab():
                     with gr.Row():
                         alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
         with gr.Column():
-            with gr.Accordion(label=translations["hubert_model"], open=False):
                 with gr.Row():
                     version = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
                 with gr.Group():
@@ -94,4 +94,4 @@ def create_reference_tab():
             ],
             outputs=[create_reference_info],
             api_name="create_reference"
-        )

         f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=f0_autotune.value)
     with gr.Row():
         with gr.Column():
+            with gr.Accordion(translations["input_output"], open=True):
                 with gr.Column():
                     input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
                     reference_name = gr.Textbox(label=translations["reference_name"], value="reference", placeholder="reference", info=translations["reference_name_info"], interactive=True)
                 with gr.Column():
                     play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
         with gr.Column() as f0_method_column:
+            with gr.Accordion(label=translations["f0_method"], open=True):
                 with gr.Group():
                     with gr.Row():
                         onnx_f0 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
                     with gr.Row():
                         alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
         with gr.Column():
+            with gr.Accordion(label=translations["hubert_model"], open=True):
                 with gr.Row():
                     version = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
                 with gr.Group():
             ],
             outputs=[create_reference_info],
             api_name="create_reference"
+        )

main/app/tabs/training/child/training.py CHANGED Viewed

@@ -43,7 +43,7 @@ def training_model_tab():
         with gr.Column():
             with gr.Row():
                 with gr.Column():
-                    with gr.Accordion(label=translations["f0_method"], open=False):
                         with gr.Group():
                             with gr.Row():
                                 onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
@@ -54,7 +54,7 @@ def training_model_tab():
                         extract_hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                         f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
                         alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
-                    with gr.Accordion(label=translations["hubert_model"], open=False):
                         with gr.Group():
                             embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                             extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
@@ -72,7 +72,7 @@ def training_model_tab():
                     index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
                     training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
             with gr.Row():
-                with gr.Accordion(label=translations["setting"], open=False):
                     with gr.Row():
                         index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
                     with gr.Row():
@@ -94,7 +94,7 @@ def training_model_tab():
                             chunk_len = gr.Slider(minimum=0.5, maximum=5.0, value=3.0, step=0.1, label=translations["chunk_length"], info=translations["chunk_length_info"], interactive=True)
                             overlap_len = gr.Slider(minimum=0.0, maximum=0.4, value=0.3, step=0.1, label=translations["overlap_length"], info=translations["overlap_length_info"], interactive=True)
                         threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
-                        with gr.Accordion(translations["setting_cpu_gpu"], open=False):
                             with gr.Column():
                                 gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
                                 gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
@@ -112,7 +112,7 @@ def training_model_tab():
                         model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
                     with gr.Row():
                         with gr.Column():
-                            with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
                                 pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
                                 pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
                                 refresh_pretrain = gr.Button(translations["refresh"], scale=2)
@@ -120,7 +120,7 @@ def training_model_tab():
                 training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False, lines=3)
             with gr.Row():
                 with gr.Column():
-                    with gr.Accordion(translations["export_model"], open=False):
                         with gr.Row():
                             model_file = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
                             index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
@@ -256,4 +256,4 @@ def training_model_tab():
             ],
             outputs=[training_info],
             api_name="training_model"
-        )

         with gr.Column():
             with gr.Row():
                 with gr.Column():
+                    with gr.Accordion(label=translations["f0_method"], open=True):
                         with gr.Group():
                             with gr.Row():
                                 onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
                         extract_hop_length = gr.Slider(label=translations['hop_length'], info=translations["hop_length_info"], minimum=64, maximum=512, value=160, step=1, interactive=True, visible=False)
                         f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
                         alpha = gr.Slider(label=translations["alpha_label"], info=translations["alpha_info"], minimum=0.1, maximum=1, value=0.5, step=0.1, interactive=True, visible=False)
+                    with gr.Accordion(label=translations["hubert_model"], open=True):
                         with gr.Group():
                             embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
                             extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
                     index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
                     training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
             with gr.Row():
+                with gr.Accordion(label=translations["setting"], open=True):
                     with gr.Row():
                         index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
                     with gr.Row():
                             chunk_len = gr.Slider(minimum=0.5, maximum=5.0, value=3.0, step=0.1, label=translations["chunk_length"], info=translations["chunk_length_info"], interactive=True)
                             overlap_len = gr.Slider(minimum=0.0, maximum=0.4, value=0.3, step=0.1, label=translations["overlap_length"], info=translations["overlap_length_info"], interactive=True)
                         threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
+                        with gr.Accordion(translations["setting_cpu_gpu"], open=True):
                             with gr.Column():
                                 gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
                                 gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
                         model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
                     with gr.Row():
                         with gr.Column():
+                            with gr.Accordion(translations["custom_pretrain_info"], open=True, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
                                 pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
                                 pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
                                 refresh_pretrain = gr.Button(translations["refresh"], scale=2)
                 training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False, lines=3)
             with gr.Row():
                 with gr.Column():
+                    with gr.Accordion(translations["export_model"], open=True):
                         with gr.Row():
                             model_file = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
                             index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
             ],
             outputs=[training_info],
             api_name="training_model"
+        )