wubby

Running

App Files Files

lainlives commited on Nov 27, 2025

Commit

0a0c4f3

1 Parent(s): 83bd5b3

~

Browse files

Files changed (1) hide show

config/Default Configuration.json +2461 -0

config/Default Configuration.json ADDED Viewed

	@@ -0,0 +1,2461 @@

+{
+    "song": {
+        "one_click": {
+            "embedder_model": {
+                "label": "Embedder model",
+                "info": "The model to use for generating speaker embeddings.",
+                "value": "contentvec",
+                "choices": [
+                    "contentvec",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_embedder_model": {
+                "label": "Custom embedder model",
+                "info": "Select a custom embedder model from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "voice_model": {
+                "label": "Voice model",
+                "info": "Select a model to use for voice conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "f0_methods": {
+                "label": "Pitch extraction algorithm(s)",
+                "info": "If more than one method is selected, then the median of the pitch values extracted by each method is used. RMVPE is recommended for most cases and is the default when no method is selected.",
+                "value": [
+                    "rmvpe",
+                    "crepe",
+                    "fcpe"
+                ],
+                "choices": [
+                    "rmvpe",
+                    "crepe",
+                    "crepe-tiny",
+                    "fcpe"
+                ],
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "index_rate": {
+                "label": "Index rate",
+                "info": "Increase to bias the conversion towards the accent of the voice model. Decrease to potentially reduce artifacts coming from the voice model.<br><br><br>",
+                "value": 0.2,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "rms_mix_rate": {
+                "label": "RMS mix rate",
+                "info": "How much to mimic the loudness (0) of the input voice or a fixed loudness (1). A value of 1 is recommended for most cases.<br><br>",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "protect_rate": {
+                "label": "Protect rate",
+                "info": "Controls the extent to which consonants and breathing sounds are protected from artifacts. A higher value offers more protection but may worsen the indexing effect.<br><br>",
+                "value": 0.149,
+                "minimum": 0.0,
+                "maximum": 0.5,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "hop_length": {
+                "label": "Hop length",
+                "info": "How often the CREPE-based pitch extraction method checks for pitch changes measured in milliseconds. Lower values lead to longer conversion times and a higher risk of voice cracks, but better pitch accuracy.",
+                "value": 128,
+                "minimum": 1.0,
+                "maximum": 512.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "split_voice": {
+                "label": "Split input voice",
+                "info": "Whether to split the input voice track into smaller segments before converting it. This can improve output quality for longer voice tracks.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "autotune_voice": {
+                "label": "Autotune converted voice",
+                "info": "Whether to apply autotune to the converted voice.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "autotune_strength": {
+                "label": "Autotune intensity",
+                "info": "Higher values result in stronger snapping to the chromatic grid and artifacting.",
+                "value": 0.69,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "sid": {
+                "label": "Speaker ID",
+                "info": "Speaker ID for multi-speaker-models.",
+                "value": 0,
+                "precision": 0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_sr": {
+                "label": "Output sample rate",
+                "info": "The sample rate of the mixed output track.",
+                "value": 44100,
+                "choices": [
+                    16000,
+                    44100,
+                    48000,
+                    96000,
+                    192000
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_format": {
+                "label": "Output format",
+                "info": "The audio format of the mixed output track.",
+                "value": "mp3",
+                "choices": [
+                    "mp3",
+                    "wav",
+                    "flac",
+                    "ogg",
+                    "m4a",
+                    "aac"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_name": {
+                "label": "Output name",
+                "info": "If no name is provided, a suitable name will be generated automatically.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": "Ultimate RVC output"
+            },
+            "source_type": {
+                "label": "Source type",
+                "info": "The type of source to retrieve a song from.",
+                "value": "YouTube link/local path",
+                "choices": [
+                    "YouTube link/local path",
+                    "Local file",
+                    "Cached song"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "index",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "source": {
+                "label": "Source",
+                "info": "Link to a song on YouTube or the full path of a local audio file.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "cached_song": {
+                "label": "Source",
+                "info": "Select a song from the list of cached songs.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "clean_voice": {
+                "label": "Clean converted voice",
+                "info": "Whether to clean the converted voice using noise reduction algorithms.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "clean_strength": {
+                "label": "Cleaning intensity",
+                "info": "Higher values result in stronger cleaning, but may lead to a more compressed sound.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": 0.1,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "room_size": {
+                "label": "Room size",
+                "info": "Size of the room which reverb effect simulates. Increase for longer reverb time.",
+                "value": 0.15,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "wet_level": {
+                "label": "Wetness level",
+                "info": "Loudness of converted vocals with reverb effect applied.",
+                "value": 0.2,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "dry_level": {
+                "label": "Dryness level",
+                "info": "Loudness of converted vocals without reverb effect applied.",
+                "value": 0.8,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "damping": {
+                "label": "Damping level",
+                "info": "Absorption of high frequencies in reverb effect.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "main_gain": {
+                "label": "Main gain",
+                "info": "The gain to apply to the main vocals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "inst_gain": {
+                "label": "Instrumentals gain",
+                "info": "The gain to apply to the instrumentals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "backup_gain": {
+                "label": "Backup gain",
+                "info": "The gain to apply to the backup vocals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_octaves": {
+                "label": "Vocal pitch shift",
+                "info": "The number of octaves to shift the pitch of the converted vocals by. Use 1 for male-to-female and -1 for vice-versa.",
+                "value": 0,
+                "minimum": -3.0,
+                "maximum": 3.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones": {
+                "label": "Overall pitch shift",
+                "info": "The number of semi-tones to shift the pitch of the converted vocals, instrumentals and backup vocals by.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "show_intermediate_audio": {
+                "label": "Show intermediate audio",
+                "info": "Show intermediate audio tracks produced during song cover generation.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "intermediate_audio": {
+                "song": {
+                    "label": "Song",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "vocals": {
+                    "label": "Vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "instrumentals": {
+                    "label": "Instrumentals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "main_vocals": {
+                    "label": "Main vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "backup_vocals": {
+                    "label": "Backup vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "main_vocals_dereverbed": {
+                    "label": "De-reverbed main vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "main_vocals_reverb": {
+                    "label": "Main vocals with reverb",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "converted_vocals": {
+                    "label": "Converted vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "postprocessed_vocals": {
+                    "label": "Postprocessed vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "instrumentals_shifted": {
+                    "label": "Pitch-shifted instrumentals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "backup_vocals_shifted": {
+                    "label": "Pitch-shifted backup vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                }
+            }
+        },
+        "multi_step": {
+            "embedder_model": {
+                "label": "Embedder model",
+                "info": "The model to use for generating speaker embeddings.",
+                "value": "contentvec",
+                "choices": [
+                    "contentvec",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_embedder_model": {
+                "label": "Custom embedder model",
+                "info": "Select a custom embedder model from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "voice_model": {
+                "label": "Voice model",
+                "info": "Select a model to use for voice conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "f0_methods": {
+                "label": "Pitch extraction algorithm(s)",
+                "info": "If more than one method is selected, then the median of the pitch values extracted by each method is used. RMVPE is recommended for most cases and is the default when no method is selected.",
+                "value": [
+                    "rmvpe"
+                ],
+                "choices": [
+                    "rmvpe",
+                    "crepe",
+                    "crepe-tiny",
+                    "fcpe"
+                ],
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "index_rate": {
+                "label": "Index rate",
+                "info": "Increase to bias the conversion towards the accent of the voice model. Decrease to potentially reduce artifacts coming from the voice model.<br><br><br>",
+                "value": 0.3,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "rms_mix_rate": {
+                "label": "RMS mix rate",
+                "info": "How much to mimic the loudness (0) of the input voice or a fixed loudness (1). A value of 1 is recommended for most cases.<br><br>",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "protect_rate": {
+                "label": "Protect rate",
+                "info": "Controls the extent to which consonants and breathing sounds are protected from artifacts. A higher value offers more protection but may worsen the indexing effect.<br><br>",
+                "value": 0.33,
+                "minimum": 0.0,
+                "maximum": 0.5,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "hop_length": {
+                "label": "Hop length",
+                "info": "How often the CREPE-based pitch extraction method checks for pitch changes measured in milliseconds. Lower values lead to longer conversion times and a higher risk of voice cracks, but better pitch accuracy.",
+                "value": 128,
+                "minimum": 1.0,
+                "maximum": 512.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "split_voice": {
+                "label": "Split input voice",
+                "info": "Whether to split the input voice track into smaller segments before converting it. This can improve output quality for longer voice tracks.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "autotune_voice": {
+                "label": "Autotune converted voice",
+                "info": "Whether to apply autotune to the converted voice.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "autotune_strength": {
+                "label": "Autotune intensity",
+                "info": "Higher values result in stronger snapping to the chromatic grid and artifacting.",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "sid": {
+                "label": "Speaker ID",
+                "info": "Speaker ID for multi-speaker-models.",
+                "value": 0,
+                "precision": 0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_sr": {
+                "label": "Output sample rate",
+                "info": "The sample rate of the mixed output track.",
+                "value": 44100,
+                "choices": [
+                    16000,
+                    44100,
+                    48000,
+                    96000,
+                    192000
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_format": {
+                "label": "Output format",
+                "info": "The audio format of the mixed output track.",
+                "value": "mp3",
+                "choices": [
+                    "mp3",
+                    "wav",
+                    "flac",
+                    "ogg",
+                    "m4a",
+                    "aac"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_name": {
+                "label": "Output name",
+                "info": "If no name is provided, a suitable name will be generated automatically.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": "Ultimate RVC output"
+            },
+            "source_type": {
+                "label": "Source type",
+                "info": "The type of source to retrieve a song from.",
+                "value": "YouTube link/local path",
+                "choices": [
+                    "YouTube link/local path",
+                    "Local file",
+                    "Cached song"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "index",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "source": {
+                "label": "Source",
+                "info": "Link to a song on YouTube or the full path of a local audio file.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "cached_song": {
+                "label": "Source",
+                "info": "Select a song from the list of cached songs.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "clean_voice": {
+                "label": "Clean converted voice",
+                "info": "Whether to clean the converted voice using noise reduction algorithms.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "clean_strength": {
+                "label": "Cleaning intensity",
+                "info": "Higher values result in stronger cleaning, but may lead to a more compressed sound.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": 0.1,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "room_size": {
+                "label": "Room size",
+                "info": "Size of the room which reverb effect simulates. Increase for longer reverb time.",
+                "value": 0.15,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "wet_level": {
+                "label": "Wetness level",
+                "info": "Loudness of converted vocals with reverb effect applied.",
+                "value": 0.2,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "dry_level": {
+                "label": "Dryness level",
+                "info": "Loudness of converted vocals without reverb effect applied.",
+                "value": 0.8,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "damping": {
+                "label": "Damping level",
+                "info": "Absorption of high frequencies in reverb effect.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "main_gain": {
+                "label": "Main gain",
+                "info": "The gain to apply to the main vocals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "inst_gain": {
+                "label": "Instrumentals gain",
+                "info": "The gain to apply to the instrumentals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "backup_gain": {
+                "label": "Backup gain",
+                "info": "The gain to apply to the backup vocals.",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "separation_model": {
+                "label": "Separation model",
+                "info": "The model to use for audio separation.",
+                "value": "UVR-MDX-NET-Voc_FT.onnx",
+                "choices": [
+                    "UVR-MDX-NET-Voc_FT.onnx",
+                    "UVR_MDXNET_KARA_2.onnx",
+                    "Reverb_HQ_By_FoxJoy.onnx"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "segment_size": {
+                "label": "Segment size",
+                "info": "The size of the segments into which the audio is split. Using a larger size consumes more resources, but may give better results.",
+                "value": 512,
+                "choices": [
+                    64,
+                    128,
+                    256,
+                    512,
+                    1024,
+                    2048
+                ],
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_octaves": {
+                "label": "Pitch shift (octaves)",
+                "info": "The number of octaves to pitch-shift the converted voice by. Use 1 for male-to-female and -1 for vice-versa.",
+                "value": 0,
+                "minimum": -3.0,
+                "maximum": 3.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones": {
+                "label": "Pitch shift (semi-tones)",
+                "info": "The number of semi-tones to pitch-shift the converted vocals by. Altering this slightly reduces sound quality.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones_instrumentals": {
+                "label": "Instrumental pitch shift",
+                "info": "The number of semi-tones to pitch-shift the instrumentals by.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones_backup_vocals": {
+                "label": "Backup vocal pitch shift",
+                "info": "The number of semi-tones to pitch-shift the backup vocals by.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "input_audio": {
+                "audio": {
+                    "label": "Audio",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "vocals": {
+                    "label": "Vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "converted_vocals": {
+                    "label": "Vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "instrumentals": {
+                    "label": "Instrumentals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "backup_vocals": {
+                    "label": "Backup vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "main_vocals": {
+                    "label": "Main vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "shifted_instrumentals": {
+                    "label": "Instrumentals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "shifted_backup_vocals": {
+                    "label": "Backup vocals",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                }
+            },
+            "song_dirs": {
+                "separate_audio": {
+                    "label": "Song directory",
+                    "info": "Directory where intermediate audio files are stored and loaded from locally. When a new song is retrieved, its directory is chosen by default.",
+                    "value": null,
+                    "choices": null,
+                    "multiselect": null,
+                    "allow_custom_value": false,
+                    "type": "value",
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true
+                },
+                "convert_vocals": {
+                    "label": "Song directory",
+                    "info": "Directory where intermediate audio files are stored and loaded from locally. When a new song is retrieved, its directory is chosen by default.",
+                    "value": null,
+                    "choices": null,
+                    "multiselect": null,
+                    "allow_custom_value": false,
+                    "type": "value",
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true
+                },
+                "postprocess_vocals": {
+                    "label": "Song directory",
+                    "info": "Directory where intermediate audio files are stored and loaded from locally. When a new song is retrieved, its directory is chosen by default.",
+                    "value": null,
+                    "choices": null,
+                    "multiselect": null,
+                    "allow_custom_value": false,
+                    "type": "value",
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true
+                },
+                "pitch_shift_background": {
+                    "label": "Song directory",
+                    "info": "Directory where intermediate audio files are stored and loaded from locally. When a new song is retrieved, its directory is chosen by default.",
+                    "value": null,
+                    "choices": null,
+                    "multiselect": null,
+                    "allow_custom_value": false,
+                    "type": "value",
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true
+                },
+                "mix": {
+                    "label": "Song directory",
+                    "info": "Directory where intermediate audio files are stored and loaded from locally. When a new song is retrieved, its directory is chosen by default.",
+                    "value": null,
+                    "choices": null,
+                    "multiselect": null,
+                    "allow_custom_value": false,
+                    "type": "value",
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true
+                }
+            }
+        }
+    },
+    "speech": {
+        "one_click": {
+            "embedder_model": {
+                "label": "Embedder model",
+                "info": "The model to use for generating speaker embeddings.",
+                "value": "contentvec",
+                "choices": [
+                    "contentvec",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_embedder_model": {
+                "label": "Custom embedder model",
+                "info": "Select a custom embedder model from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "voice_model": {
+                "label": "Voice model",
+                "info": "Select a model to use for voice conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "f0_methods": {
+                "label": "Pitch extraction algorithm(s)",
+                "info": "If more than one method is selected, then the median of the pitch values extracted by each method is used. RMVPE is recommended for most cases and is the default when no method is selected.",
+                "value": [
+                    "rmvpe"
+                ],
+                "choices": [
+                    "rmvpe",
+                    "crepe",
+                    "crepe-tiny",
+                    "fcpe"
+                ],
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "index_rate": {
+                "label": "Index rate",
+                "info": "Increase to bias the conversion towards the accent of the voice model. Decrease to potentially reduce artifacts coming from the voice model.<br><br><br>",
+                "value": 0.3,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "rms_mix_rate": {
+                "label": "RMS mix rate",
+                "info": "How much to mimic the loudness (0) of the input voice or a fixed loudness (1). A value of 1 is recommended for most cases.<br><br>",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "protect_rate": {
+                "label": "Protect rate",
+                "info": "Controls the extent to which consonants and breathing sounds are protected from artifacts. A higher value offers more protection but may worsen the indexing effect.<br><br>",
+                "value": 0.33,
+                "minimum": 0.0,
+                "maximum": 0.5,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "hop_length": {
+                "label": "Hop length",
+                "info": "How often the CREPE-based pitch extraction method checks for pitch changes measured in milliseconds. Lower values lead to longer conversion times and a higher risk of voice cracks, but better pitch accuracy.",
+                "value": 128,
+                "minimum": 1.0,
+                "maximum": 512.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "split_voice": {
+                "label": "Split input voice",
+                "info": "Whether to split the input voice track into smaller segments before converting it. This can improve output quality for longer voice tracks.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "autotune_voice": {
+                "label": "Autotune converted voice",
+                "info": "Whether to apply autotune to the converted voice.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "autotune_strength": {
+                "label": "Autotune intensity",
+                "info": "Higher values result in stronger snapping to the chromatic grid and artifacting.",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "sid": {
+                "label": "Speaker ID",
+                "info": "Speaker ID for multi-speaker-models.",
+                "value": 0,
+                "precision": 0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_sr": {
+                "label": "Output sample rate",
+                "info": "The sample rate of the mixed output track.",
+                "value": 44100,
+                "choices": [
+                    16000,
+                    44100,
+                    48000,
+                    96000,
+                    192000
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_format": {
+                "label": "Output format",
+                "info": "The audio format of the mixed output track.",
+                "value": "mp3",
+                "choices": [
+                    "mp3",
+                    "wav",
+                    "flac",
+                    "ogg",
+                    "m4a",
+                    "aac"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_name": {
+                "label": "Output name",
+                "info": "If no name is provided, a suitable name will be generated automatically.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": "Ultimate RVC output"
+            },
+            "source_type": {
+                "label": "Source type",
+                "info": "The type of source to generate speech from.",
+                "value": "Text",
+                "choices": [
+                    "Text",
+                    "Local file"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "index",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "source": {
+                "label": "Source",
+                "info": "Text to generate speech from",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "edge_tts_voice": {
+                "label": "Edge TTS voice",
+                "info": "Select a voice to use for text to speech conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "n_octaves": {
+                "label": "Octave shift",
+                "info": "The number of octaves to pitch-shift the converted speech by. Use 1 for male-to-female and -1 for vice-versa.",
+                "value": 0,
+                "minimum": -3.0,
+                "maximum": 3.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones": {
+                "label": "Semitone shift",
+                "info": "The number of semi-tones to pitch-shift the converted speech by.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_pitch_shift": {
+                "label": "Edge TTS pitch shift",
+                "info": "The number of hertz to shift the pitch of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -100.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_speed_change": {
+                "label": "TTS speed change",
+                "info": "The percentual change to the speed of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -50.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_volume_change": {
+                "label": "TTS volume change",
+                "info": "The percentual change to the volume of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -100.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "clean_voice": {
+                "label": "Clean converted voice",
+                "info": "Whether to clean the converted voice using noise reduction algorithms.<br><br>",
+                "value": true,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "clean_strength": {
+                "label": "Cleaning intensity",
+                "info": "Higher values result in stronger cleaning, but may lead to a more compressed sound.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": 0.1,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_gain": {
+                "label": "Output gain",
+                "info": "The gain to apply to the converted speech.<br><br>",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "intermediate_audio": {
+                "speech": {
+                    "label": "Speech",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "converted_speech": {
+                    "label": "Converted speech",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": true,
+                    "exclude_value": true,
+                    "interactive": null
+                }
+            },
+            "show_intermediate_audio": {
+                "label": "Show intermediate audio",
+                "info": "Show intermediate audio tracks produced during speech generation.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            }
+        },
+        "multi_step": {
+            "embedder_model": {
+                "label": "Embedder model",
+                "info": "The model to use for generating speaker embeddings.",
+                "value": "contentvec",
+                "choices": [
+                    "contentvec",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_embedder_model": {
+                "label": "Custom embedder model",
+                "info": "Select a custom embedder model from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "voice_model": {
+                "label": "Voice model",
+                "info": "Select a model to use for voice conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "f0_methods": {
+                "label": "Pitch extraction algorithm(s)",
+                "info": "If more than one method is selected, then the median of the pitch values extracted by each method is used. RMVPE is recommended for most cases and is the default when no method is selected.",
+                "value": [
+                    "rmvpe"
+                ],
+                "choices": [
+                    "rmvpe",
+                    "crepe",
+                    "crepe-tiny",
+                    "fcpe"
+                ],
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "index_rate": {
+                "label": "Index rate",
+                "info": "Increase to bias the conversion towards the accent of the voice model. Decrease to potentially reduce artifacts coming from the voice model.<br><br><br>",
+                "value": 0.3,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "rms_mix_rate": {
+                "label": "RMS mix rate",
+                "info": "How much to mimic the loudness (0) of the input voice or a fixed loudness (1). A value of 1 is recommended for most cases.<br><br>",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "protect_rate": {
+                "label": "Protect rate",
+                "info": "Controls the extent to which consonants and breathing sounds are protected from artifacts. A higher value offers more protection but may worsen the indexing effect.<br><br>",
+                "value": 0.33,
+                "minimum": 0.0,
+                "maximum": 0.5,
+                "step": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "hop_length": {
+                "label": "Hop length",
+                "info": "How often the CREPE-based pitch extraction method checks for pitch changes measured in milliseconds. Lower values lead to longer conversion times and a higher risk of voice cracks, but better pitch accuracy.",
+                "value": 128,
+                "minimum": 1.0,
+                "maximum": 512.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "split_voice": {
+                "label": "Split input voice",
+                "info": "Whether to split the input voice track into smaller segments before converting it. This can improve output quality for longer voice tracks.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "autotune_voice": {
+                "label": "Autotune converted voice",
+                "info": "Whether to apply autotune to the converted voice.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "autotune_strength": {
+                "label": "Autotune intensity",
+                "info": "Higher values result in stronger snapping to the chromatic grid and artifacting.",
+                "value": 1,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "sid": {
+                "label": "Speaker ID",
+                "info": "Speaker ID for multi-speaker-models.",
+                "value": 0,
+                "precision": 0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_sr": {
+                "label": "Output sample rate",
+                "info": "The sample rate of the mixed output track.",
+                "value": 44100,
+                "choices": [
+                    16000,
+                    44100,
+                    48000,
+                    96000,
+                    192000
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_format": {
+                "label": "Output format",
+                "info": "The audio format of the mixed output track.",
+                "value": "mp3",
+                "choices": [
+                    "mp3",
+                    "wav",
+                    "flac",
+                    "ogg",
+                    "m4a",
+                    "aac"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_name": {
+                "label": "Output name",
+                "info": "If no name is provided, a suitable name will be generated automatically.",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": "Ultimate RVC output"
+            },
+            "source_type": {
+                "label": "Source type",
+                "info": "The type of source to generate speech from.",
+                "value": "Text",
+                "choices": [
+                    "Text",
+                    "Local file"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "index",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "source": {
+                "label": "Source",
+                "info": "Text to generate speech from",
+                "value": null,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "edge_tts_voice": {
+                "label": "Edge TTS voice",
+                "info": "Select a voice to use for text to speech conversion.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "n_octaves": {
+                "label": "Octave shift",
+                "info": "The number of octaves to pitch-shift the converted speech by. Use 1 for male-to-female and -1 for vice-versa.",
+                "value": 0,
+                "minimum": -3.0,
+                "maximum": 3.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "n_semitones": {
+                "label": "Semitone shift",
+                "info": "The number of semi-tones to pitch-shift the converted speech by.",
+                "value": 0,
+                "minimum": -12.0,
+                "maximum": 12.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_pitch_shift": {
+                "label": "Edge TTS pitch shift",
+                "info": "The number of hertz to shift the pitch of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -100.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_speed_change": {
+                "label": "TTS speed change",
+                "info": "The percentual change to the speed of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -50.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "tts_volume_change": {
+                "label": "TTS volume change",
+                "info": "The percentual change to the volume of the speech generated by Edge TTS.",
+                "value": 0,
+                "minimum": -100.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "clean_voice": {
+                "label": "Clean converted voice",
+                "info": "Whether to clean the converted voice using noise reduction algorithms.<br><br>",
+                "value": true,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "clean_strength": {
+                "label": "Cleaning intensity",
+                "info": "Higher values result in stronger cleaning, but may lead to a more compressed sound.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": 0.1,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "output_gain": {
+                "label": "Output gain",
+                "info": "The gain to apply to the converted speech.<br><br>",
+                "value": 0,
+                "minimum": -20.0,
+                "maximum": 20.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "input_audio": {
+                "speech": {
+                    "label": "Speech",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                },
+                "converted_speech": {
+                    "label": "Converted speech",
+                    "value": null,
+                    "visible": true,
+                    "scale": null,
+                    "render": false,
+                    "exclude_value": true,
+                    "interactive": null
+                }
+            }
+        }
+    },
+    "training": {
+        "multi_step": {
+            "embedder_model": {
+                "label": "Embedder model",
+                "info": "The model to use for generating speaker embeddings.",
+                "value": "contentvec",
+                "choices": [
+                    "contentvec",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_embedder_model": {
+                "label": "Custom embedder model",
+                "info": "Select a custom embedder model from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dataset_type": {
+                "label": "Dataset type",
+                "info": "Select the type of dataset to preprocess.",
+                "value": "New dataset",
+                "choices": [
+                    "New dataset",
+                    "Existing dataset"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "dataset": {
+                "label": "Dataset path",
+                "info": "The path to an existing dataset. Either select a path to a previously created dataset or provide a path to an external dataset.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": true,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dataset_name": {
+                "label": "Dataset name",
+                "info": "The name of the new dataset. If the dataset already exists, the provided audio files will be added to it.",
+                "value": "My dataset",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "preprocess_model": {
+                "label": "Model name",
+                "info": "Name of the model to preprocess the given dataset for. Either select an existing model from the dropdown or provide the name of a new model.",
+                "value": "My model",
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": true,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "sample_rate": {
+                "label": "Sample rate",
+                "info": "Target sample rate for the audio files in the provided dataset.",
+                "value": "40000",
+                "choices": [
+                    "32000",
+                    "40000",
+                    "48000"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "filter_audio": {
+                "label": "Filter audio",
+                "info": "Whether to remove low-frequency sounds from the audio files in the provided dataset by applying a high-pass butterworth filter.<br><br>",
+                "value": true,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "clean_audio": {
+                "label": "Clean audio",
+                "info": "Whether to clean the audio files in the provided dataset using noise reduction algorithms.<br><br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "clean_strength": {
+                "label": "Cleaning intensity",
+                "info": "Higher values result in stronger cleaning, but may lead to a more compressed sound.",
+                "value": 0.7,
+                "minimum": 0.0,
+                "maximum": 1.0,
+                "step": 0.1,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "split_method": {
+                "label": "Audio splitting method",
+                "info": "The method to use for splitting the audio files in the provided dataset. Use the `Skip` method to skip splitting if the audio files are already split. Use the `Simple` method if excessive silence has already been removed from the audio files. Use the `Automatic` method for automatic silence detection and splitting around it.",
+                "value": "Automatic",
+                "choices": [
+                    "Skip",
+                    "Simple",
+                    "Automatic"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "chunk_len": {
+                "label": "Chunk length",
+                "info": "Length of split audio chunks.",
+                "value": 3,
+                "minimum": 0.5,
+                "maximum": 5.0,
+                "step": 0.1,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "overlap_len": {
+                "label": "Overlap length",
+                "info": "Length of overlap between split audio chunks.",
+                "value": 0.3,
+                "minimum": 0.0,
+                "maximum": 0.4,
+                "step": 0.1,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "preprocess_cores": {
+                "label": "CPU cores",
+                "info": "The number of CPU cores to use for multi-threading.",
+                "value": null,
+                "minimum": 1.0,
+                "maximum": 1.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "extract_model": {
+                "label": "Model name",
+                "info": "Name of the model with an associated preprocessed dataset to extract training features from. When a new dataset is preprocessed, its associated model is selected by default.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "f0_method": {
+                "label": "F0 method",
+                "info": "The method to use for extracting pitch features.",
+                "value": "rmvpe",
+                "choices": [
+                    "rmvpe",
+                    "crepe",
+                    "crepe-tiny"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "hop_length": {
+                "label": "Hop length",
+                "info": "The hop length to use for extracting pitch features.<br><br>",
+                "value": 128,
+                "minimum": 1.0,
+                "maximum": 512.0,
+                "step": 1.0,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "include_mutes": {
+                "label": "Include mutes",
+                "info": "The number of mute audio files to include in the generated training file list. Adding silent files enables the training model to handle pure silence in inferred audio files. If the preprocessed audio dataset already contains segments of pure silence, set this to 0.",
+                "value": 2,
+                "minimum": 0.0,
+                "maximum": 10.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "extraction_cores": {
+                "label": "CPU cores",
+                "info": "The number of CPU cores to use for multi-threading.",
+                "value": null,
+                "minimum": 1.0,
+                "maximum": 1.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "extraction_acceleration": {
+                "label": "Hardware acceleration",
+                "info": "The type of hardware acceleration to use. 'Automatic' will automatically select the first available GPU and fall back to CPU if no GPUs are available.",
+                "value": "Automatic",
+                "choices": [
+                    "Automatic",
+                    "CPU",
+                    "GPU"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "extraction_gpus": {
+                "label": "GPU(s)",
+                "info": "The GPU(s) to use for hardware acceleration.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "train_model": {
+                "label": "Model name",
+                "info": "Name of the model to train. When training features are extracted for a new model, its name is selected by default.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "num_epochs": {
+                "label": "Number of epochs",
+                "info": "The number of epochs to train the voice model. A higher number can improve voice model performance but may lead to overtraining.",
+                "value": 500,
+                "minimum": 1.0,
+                "maximum": 1000.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "batch_size": {
+                "label": "Batch size",
+                "info": "The number of samples in each training batch. It is advisable to align this value with the available VRAM of your GPU.",
+                "value": 8,
+                "minimum": 1.0,
+                "maximum": 64.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "detect_overtraining": {
+                "label": "Detect overtraining",
+                "info": "Whether to detect overtraining to prevent the voice model from learning the training data too well and losing the ability to generalize to new data.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "overtraining_threshold": {
+                "label": "Overtraining threshold",
+                "info": "The maximum number of epochs to continue training without any observed improvement in voice model performance.",
+                "value": 50,
+                "minimum": 1.0,
+                "maximum": 100.0,
+                "step": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "vocoder": {
+                "label": "Vocoder",
+                "info": "The vocoder to use for audio synthesis during training. HiFi-GAN provides basic audio fidelity, while RefineGAN provides the highest audio fidelity.",
+                "value": "HiFi-GAN",
+                "choices": [
+                    "HiFi-GAN",
+                    "MRF HiFi-GAN",
+                    "RefineGAN"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "index_algorithm": {
+                "label": "Index algorithm",
+                "info": "The method to use for generating an index file for the trained voice model. `KMeans` is particularly useful for large datasets.",
+                "value": "Auto",
+                "choices": [
+                    "Auto",
+                    "Faiss",
+                    "KMeans"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "pretrained_type": {
+                "label": "Pretrained model type",
+                "info": "The type of pretrained model to finetune the voice model on. `None` will train the voice model from scratch, while `Default` will use a pretrained model tailored to the specific voice model architecture. `Custom` will use a custom pretrained that you provide.",
+                "value": "Default",
+                "choices": [
+                    "None",
+                    "Default",
+                    "Custom"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "custom_pretrained_model": {
+                "label": "Custom pretrained model",
+                "info": "Select a custom pretrained model to finetune from the dropdown.",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "save_interval": {
+                "label": "Save interval",
+                "info": "The epoch interval at which to to save voice model weights and checkpoints. The best model weights are always saved regardless of this setting.",
+                "value": 10,
+                "minimum": 1.0,
+                "maximum": 100.0,
+                "step": 1.0,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "save_all_checkpoints": {
+                "label": "Save all checkpoints",
+                "info": "Whether to save a unique checkpoint at each save interval. If not enabled, only the latest checkpoint will be saved at each interval.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "save_all_weights": {
+                "label": "Save all weights",
+                "info": "Whether to save unique voice model weights at each save interval. If not enabled, only the best voice model weights will be saved.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "clear_saved_data": {
+                "label": "Clear saved data",
+                "info": "Whether to delete any existing training data associated with the voice model before training commences. Enable this setting only if you are training a new voice model from scratch or restarting training.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "upload_model": {
+                "label": "Upload voice model",
+                "info": "Whether to automatically upload the trained voice model so that it can be used for generation tasks within the Ultimate RVC app.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "upload_name": {
+                "label": "Upload name",
+                "info": "The name to give the uploaded voice model.",
+                "value": null,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true,
+                "placeholder": null
+            },
+            "training_acceleration": {
+                "label": "Hardware acceleration",
+                "info": "The type of hardware acceleration to use. 'Automatic' will automatically select the first available GPU and fall back to CPU if no GPUs are available.",
+                "value": "Automatic",
+                "choices": [
+                    "Automatic",
+                    "CPU",
+                    "GPU"
+                ],
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "training_gpus": {
+                "label": "GPU(s)",
+                "info": "The GPU(s) to use for hardware acceleration.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            },
+            "preload_dataset": {
+                "label": "Preload dataset",
+                "info": "Whether to preload all training data into GPU memory. This can improve training speed but requires a lot of VRAM.<br><br>",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            },
+            "reduce_memory_usage": {
+                "label": "Reduce memory usage",
+                "info": "Whether to reduce VRAM usage at the cost of slower training speed by enabling activation checkpointing. This is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate.",
+                "value": false,
+                "visible": true,
+                "scale": null,
+                "render": true,
+                "exclude_value": false
+            }
+        }
+    },
+    "management": {
+        "model": {
+            "voices": {
+                "label": "Voice models",
+                "info": "Select one or more voice models to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "embedders": {
+                "label": "Custom embedder models",
+                "info": "Select one or more embedder models to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "pretraineds": {
+                "label": "Custom pretrained models",
+                "info": "Select one or more pretrained models to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "traineds": {
+                "label": "Training models",
+                "info": "Select one or more training models to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dummy_checkbox": {
+                "label": null,
+                "info": null,
+                "value": false,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            }
+        },
+        "audio": {
+            "intermediate": {
+                "label": "Song directories",
+                "info": "Select one or more song directories containing intermediate audio files to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "speech": {
+                "label": "Speech audio files",
+                "info": "Select one or more speech audio files to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "output": {
+                "label": "Output audio files",
+                "info": "Select one or more output audio files to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dataset": {
+                "label": "Dataset audio files",
+                "info": "Select one or more datasets containing audio files to delete.",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dummy_checkbox": {
+                "label": null,
+                "info": null,
+                "value": false,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            }
+        },
+        "settings": {
+            "load_config_name": {
+                "label": "Configuration name",
+                "info": "The name of a configuration to load UI settings from",
+                "value": null,
+                "choices": null,
+                "multiselect": null,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "delete_config_names": {
+                "label": "Configuration names",
+                "info": "Select the name of one or more configurations to delete",
+                "value": null,
+                "choices": null,
+                "multiselect": true,
+                "allow_custom_value": false,
+                "type": "value",
+                "visible": true,
+                "scale": null,
+                "render": false,
+                "exclude_value": true
+            },
+            "dummy_checkbox": {
+                "label": null,
+                "info": null,
+                "value": false,
+                "visible": false,
+                "scale": null,
+                "render": true,
+                "exclude_value": true
+            }
+        }
+    }
+}