Spaces:

EleshVaishnav
/

VoiceConversionWebUI

Build error

App Files Files Community

Elesh Vaishnav commited on Oct 18, 2025

Commit

5682687

verified ·

1 Parent(s): f3d5970

Upload 26 files

Browse files

Files changed (26) hide show

tabs/download/download.py +227 -0
tabs/extra/extra.py +25 -0
tabs/extra/model_information.py +29 -0
tabs/extra/sections/analyzer.py +30 -0
tabs/extra/sections/f0_extractor.py +66 -0
tabs/extra/sections/processing.py +34 -0
tabs/inference/inference.py +2334 -0
tabs/plugins/plugins.py +34 -0
tabs/plugins/plugins_core.py +134 -0
tabs/realtime/realtime.py +1129 -0
tabs/report/main.js +74 -0
tabs/report/record_button.js +40 -0
tabs/report/recorder.js +112 -0
tabs/report/report.py +80 -0
tabs/settings/sections/filter.py +47 -0
tabs/settings/sections/lang.py +57 -0
tabs/settings/sections/model_author.py +54 -0
tabs/settings/sections/precision.py +54 -0
tabs/settings/sections/presence.py +55 -0
tabs/settings/sections/restart.py +58 -0
tabs/settings/sections/themes.py +30 -0
tabs/settings/sections/version.py +24 -0
tabs/settings/settings.py +42 -0
tabs/train/train.py +1033 -0
tabs/tts/tts.py +462 -0
tabs/voice_blender/voice_blender.py +98 -0

tabs/download/download.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import os
+import sys
+import json
+import shutil
+import requests
+import tempfile
+import gradio as gr
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from core import run_download_script
+from rvc.lib.utils import format_title
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+gradio_temp_dir = os.path.join(tempfile.gettempdir(), "gradio")
+if os.path.exists(gradio_temp_dir):
+    shutil.rmtree(gradio_temp_dir)
+def save_drop_model(dropbox):
+    if "pth" not in dropbox and "index" not in dropbox:
+        raise gr.Error(
+            message="The file you dropped is not a valid model file. Please try again."
+        )
+    file_name = format_title(os.path.basename(dropbox))
+    model_name = file_name
+    if ".pth" in model_name:
+        model_name = model_name.split(".pth")[0]
+    elif ".index" in model_name:
+        replacements = ["nprobe_1_", "_v1", "_v2", "added_"]
+        for rep in replacements:
+            model_name = model_name.replace(rep, "")
+        model_name = model_name.split(".index")[0]
+    model_path = os.path.join(now_dir, "logs", model_name)
+    if not os.path.exists(model_path):
+        os.makedirs(model_path)
+    if os.path.exists(os.path.join(model_path, file_name)):
+        os.remove(os.path.join(model_path, file_name))
+    shutil.move(dropbox, os.path.join(model_path, file_name))
+    print(f"{file_name} saved in {model_path}")
+    gr.Info(f"{file_name} saved in {model_path}")
+    return None
+json_url = "https://huggingface.co/IAHispano/Applio/raw/main/pretrains.json"
+def fetch_pretrained_data():
+    pretraineds_custom_path = os.path.join("rvc", "models", "pretraineds", "custom")
+    os.makedirs(pretraineds_custom_path, exist_ok=True)
+    try:
+        with open(
+            os.path.join(pretraineds_custom_path, json_url.split("/")[-1]), "r"
+        ) as f:
+            data = json.load(f)
+    except:
+        try:
+            response = requests.get(json_url)
+            response.raise_for_status()
+            data = response.json()
+            with open(
+                os.path.join(pretraineds_custom_path, json_url.split("/")[-1]),
+                "w",
+                encoding="utf-8",
+            ) as f:
+                json.dump(
+                    data,
+                    f,
+                    indent=2,
+                    separators=(",", ": "),
+                    ensure_ascii=False,
+                )
+        except:
+            data = {
+                "Titan": {
+                    "32k": {"D": "null", "G": "null"},
+                },
+            }
+    return data
+def get_pretrained_list():
+    data = fetch_pretrained_data()
+    return list(data.keys())
+def get_pretrained_sample_rates(model):
+    data = fetch_pretrained_data()
+    return list(data[model].keys())
+def get_file_size(url):
+    response = requests.head(url)
+    return int(response.headers.get("content-length", 0))
+def download_file(url, destination_path, progress_bar):
+    os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+    response = requests.get(url, stream=True)
+    block_size = 1024
+    with open(destination_path, "wb") as file:
+        for data in response.iter_content(block_size):
+            file.write(data)
+            progress_bar.update(len(data))
+def download_pretrained_model(model, sample_rate):
+    data = fetch_pretrained_data()
+    paths = data[model][sample_rate]
+    pretraineds_custom_path = os.path.join("rvc", "models", "pretraineds", "custom")
+    os.makedirs(pretraineds_custom_path, exist_ok=True)
+    d_url = f"https://huggingface.co/{paths['D']}"
+    g_url = f"https://huggingface.co/{paths['G']}"
+    total_size = get_file_size(d_url) + get_file_size(g_url)
+    gr.Info("Downloading pretrained model...")
+    with tqdm(
+        total=total_size, unit="iB", unit_scale=True, desc="Downloading files"
+    ) as progress_bar:
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            futures = [
+                executor.submit(
+                    download_file,
+                    d_url,
+                    os.path.join(pretraineds_custom_path, os.path.basename(paths["D"])),
+                    progress_bar,
+                ),
+                executor.submit(
+                    download_file,
+                    g_url,
+                    os.path.join(pretraineds_custom_path, os.path.basename(paths["G"])),
+                    progress_bar,
+                ),
+            ]
+            for future in futures:
+                future.result()
+    gr.Info("Pretrained model downloaded successfully!")
+    print("Pretrained model downloaded successfully!")
+def update_sample_rate_dropdown(model):
+    return {
+        "choices": get_pretrained_sample_rates(model),
+        "value": get_pretrained_sample_rates(model)[0],
+        "__type__": "update",
+    }
+def download_tab():
+    with gr.Column():
+        gr.Markdown(value=i18n("## Download Model"))
+        model_link = gr.Textbox(
+            label=i18n("Model Link"),
+            placeholder=i18n("Introduce the model link"),
+            interactive=True,
+        )
+        model_download_output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=8,
+            interactive=False,
+        )
+        model_download_button = gr.Button(i18n("Download Model"))
+        model_download_button.click(
+            fn=run_download_script,
+            inputs=[model_link],
+            outputs=[model_download_output_info],
+        )
+        gr.Markdown(value=i18n("## Drop files"))
+        dropbox = gr.File(
+            label=i18n(
+                "Drag your .pth file and .index file into this space. Drag one and then the other."
+            ),
+            type="filepath",
+        )
+        dropbox.upload(
+            fn=save_drop_model,
+            inputs=[dropbox],
+            outputs=[dropbox],
+        )
+        gr.Markdown(value=i18n("## Download Pretrained Models"))
+        pretrained_model = gr.Dropdown(
+            label=i18n("Pretrained"),
+            info=i18n("Select the pretrained model you want to download."),
+            choices=get_pretrained_list(),
+            value="Titan",
+            interactive=True,
+        )
+        pretrained_sample_rate = gr.Dropdown(
+            label=i18n("Sampling Rate"),
+            info=i18n("And select the sampling rate."),
+            choices=get_pretrained_sample_rates(pretrained_model.value),
+            value="40k",
+            interactive=True,
+            allow_custom_value=True,
+        )
+        pretrained_model.change(
+            update_sample_rate_dropdown,
+            inputs=[pretrained_model],
+            outputs=[pretrained_sample_rate],
+        )
+        download_pretrained = gr.Button(i18n("Download"))
+        download_pretrained.click(
+            fn=download_pretrained_model,
+            inputs=[pretrained_model, pretrained_sample_rate],
+            outputs=[],
+        )

tabs/extra/extra.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from tabs.extra.sections.processing import processing_tab
+from tabs.extra.sections.analyzer import analyzer_tab
+from tabs.extra.sections.f0_extractor import f0_extractor_tab
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def extra_tab():
+    with gr.TabItem(i18n("Model information")):
+        processing_tab()
+    with gr.TabItem(i18n("F0 Curve")):
+        f0_extractor_tab()
+    with gr.TabItem(i18n("Audio Analyzer")):
+        analyzer_tab()

tabs/extra/model_information.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import gradio as gr
+from core import run_model_information_script
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def model_information_tab():
+    with gr.Column():
+        model_name = gr.Textbox(
+            label=i18n("Path to Model"),
+            info=i18n("Introduce the model pth path"),
+            placeholder=i18n("Introduce the model pth path"),
+            interactive=True,
+        )
+        model_information_output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=12,
+            interactive=False,
+        )
+        model_information_button = gr.Button(i18n("See Model Information"))
+        model_information_button.click(
+            fn=run_model_information_script,
+            inputs=[model_name],
+            outputs=[model_information_output_info],
+        )

tabs/extra/sections/analyzer.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os, sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from core import run_audio_analyzer_script
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def analyzer_tab():
+    with gr.Column():
+        audio_input = gr.Audio(type="filepath")
+        output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=8,
+            interactive=False,
+        )
+        get_info_button = gr.Button(value=i18n("Get information about the audio"))
+        image_output = gr.Image(type="filepath", interactive=False)
+    get_info_button.click(
+        fn=run_audio_analyzer_script,
+        inputs=[audio_input],
+        outputs=[output_info, image_output],
+    )

tabs/extra/sections/f0_extractor.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import librosa
+import gradio as gr
+from matplotlib import pyplot as plt
+from rvc.lib.predictors.F0Extractor import F0Extractor
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def extract_f0_curve(audio_path: str, method: str):
+    print("Extracting F0 Curve...")
+    image_path = os.path.join("logs", "f0_plot.png")
+    txt_path = os.path.join("logs", "f0_curve.txt")
+    y, sr = librosa.load(audio_path, sr=None)
+    hop_length = 160
+    librosa.note_to_hz("C1")
+    librosa.note_to_hz("C8")
+    f0_extractor = F0Extractor(audio_path, sample_rate=sr, method=method)
+    f0 = f0_extractor.extract_f0()
+    plt.figure(figsize=(10, 4))
+    plt.plot(f0)
+    plt.title(method)
+    plt.xlabel("Time (frames)")
+    plt.ylabel("Frequency (Hz)")
+    plt.savefig(image_path)
+    plt.close()
+    with open(txt_path, "w") as txtfile:
+        for i, f0_value in enumerate(f0):
+            frequency = i * sr / hop_length
+            txtfile.write(f"{frequency},{f0_value}\n")
+    print("F0 Curve extracted successfully!")
+    return image_path, txt_path
+def f0_extractor_tab():
+    audio = gr.Audio(label=i18n("Upload Audio"), type="filepath")
+    f0_method = gr.Radio(
+        label=i18n("Pitch extraction algorithm"),
+        info=i18n(
+            "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+        ),
+        choices=["crepe", "fcpe", "rmvpe"],
+        value="rmvpe",
+    )
+    button = gr.Button(i18n("Extract F0 Curve"))
+    with gr.Row():
+        txt_output = gr.File(label=i18n("F0 Curve"), type="filepath")
+        image_output = gr.Image(type="filepath", interactive=False)
+    button.click(
+        fn=extract_f0_curve,
+        inputs=[
+            audio,
+            f0_method,
+        ],
+        outputs=[image_output, txt_output],
+    )

tabs/extra/sections/processing.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+import sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from core import run_model_information_script
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def processing_tab():
+    model_view_model_path = gr.Textbox(
+        label=i18n("Path to Model"),
+        info=i18n("Introduce the model pth path"),
+        value="",
+        interactive=True,
+        placeholder=i18n("Enter path to model"),
+    )
+    model_view_output_info = gr.Textbox(
+        label=i18n("Output Information"),
+        info=i18n("The output information will be displayed here."),
+        value="",
+        max_lines=11,
+    )
+    model_view_button = gr.Button(i18n("View"))
+    model_view_button.click(
+        fn=run_model_information_script,
+        inputs=[model_view_model_path],
+        outputs=[model_view_output_info],
+    )

tabs/inference/inference.py ADDED Viewed

	@@ -0,0 +1,2334 @@

+import os, sys
+import gradio as gr
+import regex as re
+import shutil
+import datetime
+import json
+import torch
+from core import (
+    run_infer_script,
+    run_batch_infer_script,
+)
+from assets.i18n.i18n import I18nAuto
+from rvc.lib.utils import format_title
+from tabs.settings.sections.restart import stop_infer
+from tabs.settings.sections.filter import get_filter_trigger, load_config_filter
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+model_root = os.path.join(now_dir, "logs")
+audio_root = os.path.join(now_dir, "assets", "audios")
+custom_embedder_root = os.path.join(
+    now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+PRESETS_DIR = os.path.join(now_dir, "assets", "presets")
+FORMANTSHIFT_DIR = os.path.join(now_dir, "assets", "formant_shift")
+os.makedirs(custom_embedder_root, exist_ok=True)
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+model_root_relative = os.path.relpath(model_root, now_dir)
+audio_root_relative = os.path.relpath(audio_root, now_dir)
+sup_audioext = {
+    "wav",
+    "mp3",
+    "flac",
+    "ogg",
+    "opus",
+    "m4a",
+    "mp4",
+    "aac",
+    "alac",
+    "wma",
+    "aiff",
+    "webm",
+    "ac3",
+}
+def normalize_path(p):
+    return os.path.normpath(p).replace("\\", "/").lower()
+# BASE model/index folder names for many latin languages (legacy: zips = models)
+MODEL_FOLDER = re.compile(r"^(?:model.{0,4}|mdl(?:s)?|weight.{0,4}|zip(?:s)?)$")
+INDEX_FOLDER = re.compile(r"^(?:ind.{0,4}|idx(?:s)?)$")
+def is_mdl_alias(name: str) -> bool:
+    return bool(MODEL_FOLDER.match(name))
+def is_idx_alias(name: str) -> bool:
+    return bool(INDEX_FOLDER.match(name))
+def alias_score(path: str, want_model: bool) -> int:
+    """
+    Handles duplicate files, compare file type to path and assign a score:
+    2 = Path contains correct alias  (e.g., model file in 'modelos/' folder)
+    1 = Path contains opposite alias (e.g., model file in 'index/' folder)
+    0 = Path contains no recognized aliases
+    """
+    parts = normalize_path(os.path.dirname(path)).split("/")
+    has_mdl = any(is_mdl_alias(p) for p in parts)
+    has_idx = any(is_idx_alias(p) for p in parts)
+    if want_model:
+        return 2 if has_mdl else (1 if has_idx else 0)
+    else:
+        return 2 if has_idx else (1 if has_mdl else 0)
+def get_files(type="model"):
+    assert type in ("model", "index"), "Invalid type for get_files (models or index)"
+    is_model = type == "model"
+    exts = (".pth", ".onnx") if is_model else (".index",)
+    exclude_prefixes = ("G_", "D_") if is_model else ()
+    exclude_substr = None if is_model else "trained"
+    best = {}
+    order = 0
+    for root, _, files in os.walk(model_root_relative, followlinks=True):
+        for file in files:
+            if not file.endswith(exts):
+                continue
+            if any(file.startswith(p) for p in exclude_prefixes):
+                continue
+            if exclude_substr and exclude_substr in file:
+                continue
+            full = os.path.join(root, file)
+            real = os.path.realpath(full)
+            score = alias_score(full, is_model)
+            prev = best.get(real)
+            if (
+                prev is None
+            ):  # Prefer higher score; if equal score, use first encountered
+                best[real] = (score, order, full)
+            else:
+                prev_score, prev_order, _ = prev
+                if score > prev_score:
+                    best[real] = (score, prev_order, full)
+            order += 1
+    return [t[2] for t in sorted(best.values(), key=lambda x: x[1])]
+default_weight = next(iter(get_files("model")), None)
+audio_paths = [
+    os.path.join(root, name)
+    for root, _, files in os.walk(audio_root_relative, topdown=False)
+    for name in files
+    if name.endswith(tuple(sup_audioext))
+    and root == audio_root_relative
+    and "_output" not in name
+]
+custom_embedders = [
+    os.path.join(dirpath, dirname)
+    for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+    for dirname in dirnames
+]
+def update_sliders(preset):
+    with open(
+        os.path.join(PRESETS_DIR, f"{preset}.json"), "r", encoding="utf-8"
+    ) as json_file:
+        values = json.load(json_file)
+    return (
+        values["pitch"],
+        values["index_rate"],
+        values["rms_mix_rate"],
+        values["protect"],
+    )
+def update_sliders_formant(preset):
+    with open(
+        os.path.join(FORMANTSHIFT_DIR, f"{preset}.json"), "r", encoding="utf-8"
+    ) as json_file:
+        values = json.load(json_file)
+    return (
+        values["formant_qfrency"],
+        values["formant_timbre"],
+    )
+def export_presets(presets, file_path):
+    with open(file_path, "w", encoding="utf-8") as json_file:
+        json.dump(presets, json_file, ensure_ascii=False, indent=4)
+def import_presets(file_path):
+    with open(file_path, "r", encoding="utf-8") as json_file:
+        presets = json.load(json_file)
+    return presets
+def get_presets_data(pitch, index_rate, rms_mix_rate, protect):
+    return {
+        "pitch": pitch,
+        "index_rate": index_rate,
+        "rms_mix_rate": rms_mix_rate,
+        "protect": protect,
+    }
+def export_presets_button(preset_name, pitch, index_rate, rms_mix_rate, protect):
+    if preset_name:
+        file_path = os.path.join(PRESETS_DIR, f"{preset_name}.json")
+        presets_data = get_presets_data(pitch, index_rate, rms_mix_rate, protect)
+        with open(file_path, "w", encoding="utf-8") as json_file:
+            json.dump(presets_data, json_file, ensure_ascii=False, indent=4)
+        return "Export successful"
+    return "Export cancelled"
+def import_presets_button(file_path):
+    if file_path:
+        imported_presets = import_presets(file_path.name)
+        return (
+            list(imported_presets.keys()),
+            imported_presets,
+            "Presets imported successfully!",
+        )
+    return [], {}, "No file selected for import."
+def list_json_files(directory):
+    return [f.rsplit(".", 1)[0] for f in os.listdir(directory) if f.endswith(".json")]
+def refresh_presets():
+    json_files = list_json_files(PRESETS_DIR)
+    return gr.update(choices=json_files)
+def output_path_fn(input_audio_path):
+    original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
+        0
+    ]
+    new_name = original_name_without_extension + "_output.wav"
+    output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
+    return output_path
+def change_choices(model):
+    if model:
+        speakers = get_speakers_id(model)
+    else:
+        speakers = [0]
+    models_list = get_files("model")
+    indexes_list = sorted(get_files("index"))
+    audio_paths = [
+        os.path.join(root, name)
+        for root, _, files in os.walk(audio_root_relative, topdown=False)
+        for name in files
+        if name.endswith(tuple(sup_audioext))
+        and root == audio_root_relative
+        and "_output" not in name
+    ]
+    return (
+        {"choices": sorted(models_list), "__type__": "update"},
+        {"choices": sorted(indexes_list), "__type__": "update"},
+        {"choices": sorted(audio_paths), "__type__": "update"},
+        {
+            "choices": (
+                sorted(speakers)
+                if speakers is not None and isinstance(speakers, (list, tuple))
+                else [0]
+            ),
+            "__type__": "update",
+        },
+        {
+            "choices": (
+                sorted(speakers)
+                if speakers is not None and isinstance(speakers, (list, tuple))
+                else [0]
+            ),
+            "__type__": "update",
+        },
+    )
+def extract_model_and_epoch(path):
+    base_name = os.path.basename(path)
+    match = re.match(r"(.+?)_(\d+)e_", base_name)
+    if match:
+        model, epoch = match.groups()
+        return model, int(epoch)
+    return "", 0
+def save_to_wav(record_button):
+    if record_button is None:
+        pass
+    else:
+        path_to_file = record_button
+        new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
+        target_path = os.path.join(audio_root_relative, os.path.basename(new_name))
+        shutil.move(path_to_file, target_path)
+        return target_path, output_path_fn(target_path)
+def save_to_wav2(upload_audio):
+    file_path = upload_audio
+    formated_name = format_title(os.path.basename(file_path))
+    target_path = os.path.join(audio_root_relative, formated_name)
+    if os.path.exists(target_path):
+        os.remove(target_path)
+    shutil.copy(file_path, target_path)
+    return target_path, output_path_fn(target_path)
+def delete_outputs():
+    gr.Info(f"Outputs cleared!")
+    for root, _, files in os.walk(audio_root_relative, topdown=False):
+        for name in files:
+            if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
+                os.remove(os.path.join(root, name))
+def folders_same(
+    a: str, b: str
+) -> bool:  # Used to "pair" index and model folders based on path names
+    """
+    True if:
+      1) The two normalized paths are totally identical..OR
+      2) One lives under a MODEL_FOLDER and the other lives
+         under an INDEX_FOLDER, at the same relative subpath
+         i.e.  logs/models/miku  and  logs/index/miku  =  "SAME FOLDER"
+    """
+    a = normalize_path(a)
+    b = normalize_path(b)
+    if a == b:
+        return True
+    def split_after_alias(p):
+        parts = p.split("/")
+        for i, part in enumerate(parts):
+            if is_mdl_alias(part) or is_idx_alias(part):
+                base = part
+                rel = "/".join(parts[i + 1 :])
+                return base, rel
+        return None, None
+    base_a, rel_a = split_after_alias(a)
+    base_b, rel_b = split_after_alias(b)
+    if rel_a is None or rel_b is None:
+        return False
+    if rel_a == rel_b and (
+        (is_mdl_alias(base_a) and is_idx_alias(base_b))
+        or (is_idx_alias(base_a) and is_mdl_alias(base_b))
+    ):
+        return True
+    return False
+def match_index(model_file_value):
+    if not model_file_value:
+        return ""
+    # Derive the information about the model's name and path for index matching
+    model_folder = normalize_path(os.path.dirname(model_file_value))
+    model_name = os.path.basename(model_file_value)
+    base_name = os.path.splitext(model_name)[0]
+    common = re.sub(r"[_\-\.\+](?:e|s|v|V)\d.*$", "", base_name)
+    prefix_match = re.match(r"^(.*?)[_\-\.\+]", base_name)
+    prefix = prefix_match.group(1) if prefix_match else None
+    same_count = 0
+    last_same = None
+    same_substr = None
+    same_prefixed = None
+    external_exact = None
+    external_substr = None
+    external_pref = None
+    for idx in get_files("index"):
+        idx_folder = os.path.dirname(idx)
+        idx_folder_n = normalize_path(idx_folder)
+        idx_name = os.path.basename(idx)
+        idx_base = os.path.splitext(idx_name)[0]
+        in_same = folders_same(model_folder, idx_folder_n)
+        if in_same:
+            same_count += 1
+            last_same = idx
+            # 1) EXACT match to loaded model name and folders_same = True
+            if idx_base == base_name:
+                return idx
+            # 2) Substring match to model name and folders_same
+            if common in idx_base and same_substr is None:
+                same_substr = idx
+            # 3) Prefix match to model name and folders_same
+            if prefix and idx_base.startswith(prefix) and same_prefixed is None:
+                same_prefixed = idx
+        # If it's NOT in a paired folder (folders_same = False) we look elseware:
+        else:
+            # 4) EXACT match to model name in external directory
+            if idx_base == base_name and external_exact is None:
+                external_exact = idx
+            # 5) Substring match to model name in ED
+            if common in idx_base and external_substr is None:
+                external_substr = idx
+            # 6) Prefix match to model name in ED
+            if prefix and idx_base.startswith(prefix) and external_pref is None:
+                external_pref = idx
+    # Fallback: If there is exactly one index file in the same (or paired) folder,
+    # we should assume that's the intended index file even if the name doesnt match
+    if same_count == 1:
+        return last_same
+    # Then by remaining priority queue:
+    if same_substr:
+        return same_substr
+    if same_prefixed:
+        return same_prefixed
+    if external_exact:
+        return external_exact
+    if external_substr:
+        return external_substr
+    if external_pref:
+        return external_pref
+    return ""
+def create_folder_and_move_files(folder_name, bin_file, config_file):
+    if not folder_name:
+        return "Folder name must not be empty."
+    folder_name = os.path.basename(folder_name)
+    target_folder = os.path.join(custom_embedder_root, folder_name)
+    normalize_pathd_target_folder = os.path.abspath(target_folder)
+    normalize_pathd_custom_embedder_root = os.path.abspath(custom_embedder_root)
+    if not normalize_pathd_target_folder.startswith(
+        normalize_pathd_custom_embedder_root
+    ):
+        return "Invalid folder name. Folder must be within the custom embedder root directory."
+    os.makedirs(target_folder, exist_ok=True)
+    if bin_file:
+        shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
+    if config_file:
+        shutil.copy(
+            config_file, os.path.join(target_folder, os.path.basename(config_file))
+        )
+    return f"Files moved to folder {target_folder}"
+def refresh_formant():
+    json_files = list_json_files(FORMANTSHIFT_DIR)
+    return gr.update(choices=json_files)
+def refresh_embedders_folders():
+    custom_embedders = [
+        os.path.join(dirpath, dirname)
+        for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+        for dirname in dirnames
+    ]
+    return custom_embedders
+def get_speakers_id(model):
+    if model:
+        try:
+            model_data = torch.load(
+                os.path.join(now_dir, model), map_location="cpu", weights_only=True
+            )
+            speakers_id = model_data.get("speakers_id")
+            if speakers_id:
+                return list(range(speakers_id))
+            else:
+                return [0]
+        except Exception as e:
+            return [0]
+    else:
+        return [0]
+def filter_dropdowns(filter_text):
+    ft = filter_text.lower()
+    all_models = sorted(get_files("model"), key=extract_model_and_epoch)
+    all_indexes = sorted(get_files("index"))
+    filtered_models = [m for m in all_models if ft in m.lower()]
+    filtered_indexes = [i for i in all_indexes if ft in i.lower()]
+    return (gr.update(choices=filtered_models), gr.update(choices=filtered_indexes))
+def update_filter_visibility(_):
+    en = load_config_filter()
+    if not en:
+        box = gr.update(visible=False, value="")
+        m_upd, i_upd = filter_dropdowns("")
+        return box, m_upd, i_upd
+    return gr.update(visible=True), gr.skip(), gr.skip()
+# Inference tab
+def inference_tab():
+    trigger = get_filter_trigger()
+    with gr.Column():
+        with gr.Row():
+            model_file = gr.Dropdown(
+                label=i18n("Voice Model"),
+                info=i18n("Select the voice model to use for the conversion."),
+                choices=sorted(get_files("model"), key=extract_model_and_epoch),
+                value=default_weight,
+                interactive=True,
+                allow_custom_value=True,
+            )
+            filter_box_inf = gr.Textbox(
+                label=i18n("Filter"),
+                info=i18n("Path must contain:"),
+                placeholder=i18n("Type to filter..."),
+                interactive=True,
+                scale=0.1,
+                visible=load_config_filter(),
+            )
+            index_file = gr.Dropdown(
+                label=i18n("Index File"),
+                info=i18n("Select the index file to use for the conversion."),
+                choices=sorted(get_files("index")),
+                value=match_index(default_weight),
+                interactive=True,
+                allow_custom_value=True,
+            )
+        filter_box_inf.blur(
+            fn=filter_dropdowns,
+            inputs=[filter_box_inf],
+            outputs=[model_file, index_file],
+        )
+        trigger.change(
+            fn=update_filter_visibility,
+            inputs=[trigger],
+            outputs=[filter_box_inf, model_file, index_file],
+            show_progress=False,
+        )
+        with gr.Row():
+            unload_button = gr.Button(i18n("Unload Voice"))
+            refresh_button = gr.Button(i18n("Refresh"))
+            unload_button.click(
+                fn=lambda: (
+                    {"value": "", "__type__": "update"},
+                    {"value": "", "__type__": "update"},
+                ),
+                inputs=[],
+                outputs=[model_file, index_file],
+            )
+            model_file.select(
+                fn=lambda model_file_value: match_index(model_file_value),
+                inputs=[model_file],
+                outputs=[index_file],
+            )
+    # Single inference tab
+    with gr.Tab(i18n("Single")):
+        with gr.Column():
+            upload_audio = gr.Audio(
+                label=i18n("Upload Audio"), type="filepath", editable=False
+            )
+            with gr.Row():
+                audio = gr.Dropdown(
+                    label=i18n("Select Audio"),
+                    info=i18n("Select the audio to convert."),
+                    choices=sorted(audio_paths),
+                    value=audio_paths[0] if audio_paths else "",
+                    interactive=True,
+                    allow_custom_value=True,
+                )
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            with gr.Column():
+                clear_outputs_infer = gr.Button(
+                    i18n("Clear Outputs (Deletes all audios in assets/audios)")
+                )
+                output_path = gr.Textbox(
+                    label=i18n("Output Path"),
+                    placeholder=i18n("Enter output path"),
+                    info=i18n(
+                        "The path where the output audio will be saved, by default in assets/audios/output.wav"
+                    ),
+                    value=(
+                        output_path_fn(audio_paths[0])
+                        if audio_paths
+                        else os.path.join(now_dir, "assets", "audios", "output.wav")
+                    ),
+                    interactive=True,
+                )
+                export_format = gr.Radio(
+                    label=i18n("Export Format"),
+                    info=i18n("Select the format to export the audio."),
+                    choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+                    value="WAV",
+                    interactive=True,
+                )
+                sid = gr.Dropdown(
+                    label=i18n("Speaker ID"),
+                    info=i18n("Select the speaker ID to use for the conversion."),
+                    choices=get_speakers_id(model_file.value),
+                    value=0,
+                    interactive=True,
+                )
+                split_audio = gr.Checkbox(
+                    label=i18n("Split Audio"),
+                    info=i18n(
+                        "Split the audio into chunks for inference to obtain better results in some cases."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                autotune = gr.Checkbox(
+                    label=i18n("Autotune"),
+                    info=i18n(
+                        "Apply a soft autotune to your inferences, recommended for singing conversions."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                autotune_strength = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Autotune Strength"),
+                    info=i18n(
+                        "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
+                    ),
+                    visible=False,
+                    value=1,
+                    interactive=True,
+                )
+                proposed_pitch = gr.Checkbox(
+                    label=i18n("Proposed Pitch"),
+                    info=i18n(
+                        "Adjust the input audio pitch to match the voice model range."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                proposed_pitch_threshold = gr.Slider(
+                    minimum=50.0,
+                    maximum=1200.0,
+                    label=i18n("Proposed Pitch Threshold"),
+                    info=i18n(
+                        "Male voice models typically use 155.0 and female voice models typically use 255.0."
+                    ),
+                    visible=False,
+                    value=155.0,
+                    interactive=True,
+                )
+                clean_audio = gr.Checkbox(
+                    label=i18n("Clean Audio"),
+                    info=i18n(
+                        "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                clean_strength = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Clean Strength"),
+                    info=i18n(
+                        "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+                    ),
+                    visible=False,
+                    value=0.5,
+                    interactive=True,
+                )
+                formant_shifting = gr.Checkbox(
+                    label=i18n("Formant Shifting"),
+                    info=i18n(
+                        "Enable formant shifting. Used for male to female and vice-versa convertions."
+                    ),
+                    value=False,
+                    visible=True,
+                    interactive=True,
+                )
+                post_process = gr.Checkbox(
+                    label=i18n("Post-Process"),
+                    info=i18n("Post-process the audio to apply effects to the output."),
+                    value=False,
+                    interactive=True,
+                )
+                with gr.Row(visible=False) as formant_row:
+                    formant_preset = gr.Dropdown(
+                        label=i18n("Browse presets for formanting"),
+                        info=i18n(
+                            "Presets are located in /assets/formant_shift folder"
+                        ),
+                        choices=list_json_files(FORMANTSHIFT_DIR),
+                        visible=False,
+                        interactive=True,
+                    )
+                    formant_refresh_button = gr.Button(
+                        value="Refresh",
+                        visible=False,
+                    )
+                formant_qfrency = gr.Slider(
+                    value=1.0,
+                    info=i18n("Default value is 1.0"),
+                    label=i18n("Quefrency for formant shifting"),
+                    minimum=0.0,
+                    maximum=16.0,
+                    step=0.1,
+                    visible=False,
+                    interactive=True,
+                )
+                formant_timbre = gr.Slider(
+                    value=1.0,
+                    info=i18n("Default value is 1.0"),
+                    label=i18n("Timbre for formant shifting"),
+                    minimum=0.0,
+                    maximum=16.0,
+                    step=0.1,
+                    visible=False,
+                    interactive=True,
+                )
+                reverb = gr.Checkbox(
+                    label=i18n("Reverb"),
+                    info=i18n("Apply reverb to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_room_size = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Room Size"),
+                    info=i18n("Set the room size of the reverb."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_damping = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Damping"),
+                    info=i18n("Set the damping of the reverb."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_wet_gain = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Wet Gain"),
+                    info=i18n("Set the wet gain of the reverb."),
+                    value=0.33,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_dry_gain = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Dry Gain"),
+                    info=i18n("Set the dry gain of the reverb."),
+                    value=0.4,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_width = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Width"),
+                    info=i18n("Set the width of the reverb."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_freeze_mode = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Freeze Mode"),
+                    info=i18n("Set the freeze mode of the reverb."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                pitch_shift = gr.Checkbox(
+                    label=i18n("Pitch Shift"),
+                    info=i18n("Apply pitch shift to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                pitch_shift_semitones = gr.Slider(
+                    minimum=-12,
+                    maximum=12,
+                    label=i18n("Pitch Shift Semitones"),
+                    info=i18n("Set the pitch shift semitones."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter = gr.Checkbox(
+                    label=i18n("Limiter"),
+                    info=i18n("Apply limiter to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter_threshold = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Limiter Threshold dB"),
+                    info=i18n("Set the limiter threshold dB."),
+                    value=-6,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter_release_time = gr.Slider(
+                    minimum=0.01,
+                    maximum=1,
+                    label=i18n("Limiter Release Time"),
+                    info=i18n("Set the limiter release time."),
+                    value=0.05,
+                    interactive=True,
+                    visible=False,
+                )
+                gain = gr.Checkbox(
+                    label=i18n("Gain"),
+                    info=i18n("Apply gain to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                gain_db = gr.Slider(
+                    minimum=-60,
+                    maximum=60,
+                    label=i18n("Gain dB"),
+                    info=i18n("Set the gain dB."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                distortion = gr.Checkbox(
+                    label=i18n("Distortion"),
+                    info=i18n("Apply distortion to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                distortion_gain = gr.Slider(
+                    minimum=-60,
+                    maximum=60,
+                    label=i18n("Distortion Gain"),
+                    info=i18n("Set the distortion gain."),
+                    value=25,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus = gr.Checkbox(
+                    label=i18n("Chorus"),
+                    info=i18n("Apply chorus to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_rate = gr.Slider(
+                    minimum=0,
+                    maximum=100,
+                    label=i18n("Chorus Rate Hz"),
+                    info=i18n("Set the chorus rate Hz."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_depth = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Depth"),
+                    info=i18n("Set the chorus depth."),
+                    value=0.25,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_center_delay = gr.Slider(
+                    minimum=7,
+                    maximum=8,
+                    label=i18n("Chorus Center Delay ms"),
+                    info=i18n("Set the chorus center delay ms."),
+                    value=7,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_feedback = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Feedback"),
+                    info=i18n("Set the chorus feedback."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_mix = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Mix"),
+                    info=i18n("Set the chorus mix."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                bitcrush = gr.Checkbox(
+                    label=i18n("Bitcrush"),
+                    info=i18n("Apply bitcrush to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                bitcrush_bit_depth = gr.Slider(
+                    minimum=1,
+                    maximum=32,
+                    label=i18n("Bitcrush Bit Depth"),
+                    info=i18n("Set the bitcrush bit depth."),
+                    value=8,
+                    interactive=True,
+                    visible=False,
+                )
+                clipping = gr.Checkbox(
+                    label=i18n("Clipping"),
+                    info=i18n("Apply clipping to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                clipping_threshold = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Clipping Threshold"),
+                    info=i18n("Set the clipping threshold."),
+                    value=-6,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor = gr.Checkbox(
+                    label=i18n("Compressor"),
+                    info=i18n("Apply compressor to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_threshold = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Compressor Threshold dB"),
+                    info=i18n("Set the compressor threshold dB."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_ratio = gr.Slider(
+                    minimum=1,
+                    maximum=20,
+                    label=i18n("Compressor Ratio"),
+                    info=i18n("Set the compressor ratio."),
+                    value=1,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_attack = gr.Slider(
+                    minimum=0.0,
+                    maximum=100,
+                    label=i18n("Compressor Attack ms"),
+                    info=i18n("Set the compressor attack ms."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_release = gr.Slider(
+                    minimum=0.01,
+                    maximum=100,
+                    label=i18n("Compressor Release ms"),
+                    info=i18n("Set the compressor release ms."),
+                    value=100,
+                    interactive=True,
+                    visible=False,
+                )
+                delay = gr.Checkbox(
+                    label=i18n("Delay"),
+                    info=i18n("Apply delay to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_seconds = gr.Slider(
+                    minimum=0.0,
+                    maximum=5.0,
+                    label=i18n("Delay Seconds"),
+                    info=i18n("Set the delay seconds."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_feedback = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    label=i18n("Delay Feedback"),
+                    info=i18n("Set the delay feedback."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_mix = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    label=i18n("Delay Mix"),
+                    info=i18n("Set the delay mix."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                with gr.Accordion(i18n("Preset Settings"), open=False):
+                    with gr.Row():
+                        preset_dropdown = gr.Dropdown(
+                            label=i18n("Select Custom Preset"),
+                            choices=list_json_files(PRESETS_DIR),
+                            interactive=True,
+                        )
+                        presets_refresh_button = gr.Button(i18n("Refresh Presets"))
+                    import_file = gr.File(
+                        label=i18n("Select file to import"),
+                        file_count="single",
+                        type="filepath",
+                        interactive=True,
+                    )
+                    import_file.change(
+                        import_presets_button,
+                        inputs=import_file,
+                        outputs=[preset_dropdown],
+                    )
+                    presets_refresh_button.click(
+                        refresh_presets, outputs=preset_dropdown
+                    )
+                    with gr.Row():
+                        preset_name_input = gr.Textbox(
+                            label=i18n("Preset Name"),
+                            placeholder=i18n("Enter preset name"),
+                        )
+                        export_button = gr.Button(i18n("Export Preset"))
+                pitch = gr.Slider(
+                    minimum=-24,
+                    maximum=24,
+                    step=1,
+                    label=i18n("Pitch"),
+                    info=i18n(
+                        "Set the pitch of the audio, the higher the value, the higher the pitch."
+                    ),
+                    value=0,
+                    interactive=True,
+                )
+                index_rate = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Search Feature Ratio"),
+                    info=i18n(
+                        "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+                    ),
+                    value=0.75,
+                    interactive=True,
+                )
+                rms_mix_rate = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Volume Envelope"),
+                    info=i18n(
+                        "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+                    ),
+                    value=1,
+                    interactive=True,
+                )
+                protect = gr.Slider(
+                    minimum=0,
+                    maximum=0.5,
+                    label=i18n("Protect Voiceless Consonants"),
+                    info=i18n(
+                        "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+                    ),
+                    value=0.5,
+                    interactive=True,
+                )
+                preset_dropdown.change(
+                    update_sliders,
+                    inputs=preset_dropdown,
+                    outputs=[
+                        pitch,
+                        index_rate,
+                        rms_mix_rate,
+                        protect,
+                    ],
+                )
+                export_button.click(
+                    export_presets_button,
+                    inputs=[
+                        preset_name_input,
+                        pitch,
+                        index_rate,
+                        rms_mix_rate,
+                        protect,
+                    ],
+                )
+                f0_method = gr.Radio(
+                    label=i18n("Pitch extraction algorithm"),
+                    info=i18n(
+                        "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+                    ),
+                    choices=[
+                        "crepe",
+                        "crepe-tiny",
+                        "rmvpe",
+                        "fcpe",
+                        "swift",
+                    ],
+                    value="rmvpe",
+                    interactive=True,
+                )
+                embedder_model = gr.Radio(
+                    label=i18n("Embedder Model"),
+                    info=i18n("Model used for learning speaker embedding."),
+                    choices=[
+                        "contentvec",
+                        "spin",
+                        "spin-v2",
+                        "chinese-hubert-base",
+                        "japanese-hubert-base",
+                        "korean-hubert-base",
+                        "custom",
+                    ],
+                    value="contentvec",
+                    interactive=True,
+                )
+                with gr.Column(visible=False) as embedder_custom:
+                    with gr.Accordion(i18n("Custom Embedder"), open=True):
+                        with gr.Row():
+                            embedder_model_custom = gr.Dropdown(
+                                label=i18n("Select Custom Embedder"),
+                                choices=refresh_embedders_folders(),
+                                interactive=True,
+                                allow_custom_value=True,
+                            )
+                            refresh_embedders_button = gr.Button(
+                                i18n("Refresh embedders")
+                            )
+                        folder_name_input = gr.Textbox(
+                            label=i18n("Folder Name"), interactive=True
+                        )
+                        with gr.Row():
+                            bin_file_upload = gr.File(
+                                label=i18n("Upload .bin"),
+                                type="filepath",
+                                interactive=True,
+                            )
+                            config_file_upload = gr.File(
+                                label=i18n("Upload .json"),
+                                type="filepath",
+                                interactive=True,
+                            )
+                        move_files_button = gr.Button(
+                            i18n("Move files to custom embedder folder")
+                        )
+        def enforce_terms(terms_accepted, *args):
+            if not terms_accepted:
+                message = "You must agree to the Terms of Use to proceed."
+                gr.Info(message)
+                return message, None
+            return run_infer_script(*args)
+        def enforce_terms_batch(terms_accepted, *args):
+            if not terms_accepted:
+                message = "You must agree to the Terms of Use to proceed."
+                gr.Info(message)
+                return message, None
+            return run_batch_infer_script(*args)
+        terms_checkbox = gr.Checkbox(
+            label=i18n("I agree to the terms of use"),
+            info=i18n(
+                "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
+            ),
+            value=False,
+            interactive=True,
+        )
+        convert_button1 = gr.Button(i18n("Convert"))
+        with gr.Row():
+            vc_output1 = gr.Textbox(
+                label=i18n("Output Information"),
+                info=i18n("The output information will be displayed here."),
+            )
+            vc_output2 = gr.Audio(label=i18n("Export Audio"))
+    # Batch inference tab
+    with gr.Tab(i18n("Batch")):
+        with gr.Row():
+            with gr.Column():
+                input_folder_batch = gr.Textbox(
+                    label=i18n("Input Folder"),
+                    info=i18n("Select the folder containing the audios to convert."),
+                    placeholder=i18n("Enter input path"),
+                    value=os.path.join(now_dir, "assets", "audios"),
+                    interactive=True,
+                )
+                output_folder_batch = gr.Textbox(
+                    label=i18n("Output Folder"),
+                    info=i18n(
+                        "Select the folder where the output audios will be saved."
+                    ),
+                    placeholder=i18n("Enter output path"),
+                    value=os.path.join(now_dir, "assets", "audios"),
+                    interactive=True,
+                )
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            with gr.Column():
+                clear_outputs_batch = gr.Button(
+                    i18n("Clear Outputs (Deletes all audios in assets/audios)")
+                )
+                export_format_batch = gr.Radio(
+                    label=i18n("Export Format"),
+                    info=i18n("Select the format to export the audio."),
+                    choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+                    value="WAV",
+                    interactive=True,
+                )
+                sid_batch = gr.Dropdown(
+                    label=i18n("Speaker ID"),
+                    info=i18n("Select the speaker ID to use for the conversion."),
+                    choices=get_speakers_id(model_file.value),
+                    value=0,
+                    interactive=True,
+                )
+                split_audio_batch = gr.Checkbox(
+                    label=i18n("Split Audio"),
+                    info=i18n(
+                        "Split the audio into chunks for inference to obtain better results in some cases."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                autotune_batch = gr.Checkbox(
+                    label=i18n("Autotune"),
+                    info=i18n(
+                        "Apply a soft autotune to your inferences, recommended for singing conversions."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                autotune_strength_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Autotune Strength"),
+                    info=i18n(
+                        "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
+                    ),
+                    visible=False,
+                    value=1,
+                    interactive=True,
+                )
+                proposed_pitch_batch = gr.Checkbox(
+                    label=i18n("Proposed Pitch"),
+                    info=i18n(
+                        "Adjust the input audio pitch to match the voice model range."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                proposed_pitch_threshold_batch = gr.Slider(
+                    minimum=50.0,
+                    maximum=1200.0,
+                    label=i18n("Proposed Pitch Threshold"),
+                    info=i18n(
+                        "Male voice models typically use 155.0 and female voice models typically use 255.0."
+                    ),
+                    visible=False,
+                    value=155.0,
+                    interactive=True,
+                )
+                clean_audio_batch = gr.Checkbox(
+                    label=i18n("Clean Audio"),
+                    info=i18n(
+                        "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+                    ),
+                    visible=True,
+                    value=False,
+                    interactive=True,
+                )
+                clean_strength_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Clean Strength"),
+                    info=i18n(
+                        "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+                    ),
+                    visible=False,
+                    value=0.5,
+                    interactive=True,
+                )
+                formant_shifting_batch = gr.Checkbox(
+                    label=i18n("Formant Shifting"),
+                    info=i18n(
+                        "Enable formant shifting. Used for male to female and vice-versa convertions."
+                    ),
+                    value=False,
+                    visible=True,
+                    interactive=True,
+                )
+                post_process_batch = gr.Checkbox(
+                    label=i18n("Post-Process"),
+                    info=i18n("Post-process the audio to apply effects to the output."),
+                    value=False,
+                    interactive=True,
+                )
+                with gr.Row(visible=False) as formant_row_batch:
+                    formant_preset_batch = gr.Dropdown(
+                        label=i18n("Browse presets for formanting"),
+                        info=i18n(
+                            "Presets are located in /assets/formant_shift folder"
+                        ),
+                        choices=list_json_files(FORMANTSHIFT_DIR),
+                        visible=False,
+                        interactive=True,
+                    )
+                    formant_refresh_button_batch = gr.Button(
+                        value="Refresh",
+                        visible=False,
+                    )
+                formant_qfrency_batch = gr.Slider(
+                    value=1.0,
+                    info=i18n("Default value is 1.0"),
+                    label=i18n("Quefrency for formant shifting"),
+                    minimum=0.0,
+                    maximum=16.0,
+                    step=0.1,
+                    visible=False,
+                    interactive=True,
+                )
+                formant_timbre_batch = gr.Slider(
+                    value=1.0,
+                    info=i18n("Default value is 1.0"),
+                    label=i18n("Timbre for formant shifting"),
+                    minimum=0.0,
+                    maximum=16.0,
+                    step=0.1,
+                    visible=False,
+                    interactive=True,
+                )
+                reverb_batch = gr.Checkbox(
+                    label=i18n("Reverb"),
+                    info=i18n("Apply reverb to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_room_size_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Room Size"),
+                    info=i18n("Set the room size of the reverb."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_damping_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Damping"),
+                    info=i18n("Set the damping of the reverb."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_wet_gain_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Wet Gain"),
+                    info=i18n("Set the wet gain of the reverb."),
+                    value=0.33,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_dry_gain_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Dry Gain"),
+                    info=i18n("Set the dry gain of the reverb."),
+                    value=0.4,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_width_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Width"),
+                    info=i18n("Set the width of the reverb."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                reverb_freeze_mode_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Reverb Freeze Mode"),
+                    info=i18n("Set the freeze mode of the reverb."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                pitch_shift_batch = gr.Checkbox(
+                    label=i18n("Pitch Shift"),
+                    info=i18n("Apply pitch shift to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                pitch_shift_semitones_batch = gr.Slider(
+                    minimum=-12,
+                    maximum=12,
+                    label=i18n("Pitch Shift Semitones"),
+                    info=i18n("Set the pitch shift semitones."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter_batch = gr.Checkbox(
+                    label=i18n("Limiter"),
+                    info=i18n("Apply limiter to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter_threshold_batch = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Limiter Threshold dB"),
+                    info=i18n("Set the limiter threshold dB."),
+                    value=-6,
+                    interactive=True,
+                    visible=False,
+                )
+                limiter_release_time_batch = gr.Slider(
+                    minimum=0.01,
+                    maximum=1,
+                    label=i18n("Limiter Release Time"),
+                    info=i18n("Set the limiter release time."),
+                    value=0.05,
+                    interactive=True,
+                    visible=False,
+                )
+                gain_batch = gr.Checkbox(
+                    label=i18n("Gain"),
+                    info=i18n("Apply gain to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                gain_db_batch = gr.Slider(
+                    minimum=-60,
+                    maximum=60,
+                    label=i18n("Gain dB"),
+                    info=i18n("Set the gain dB."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                distortion_batch = gr.Checkbox(
+                    label=i18n("Distortion"),
+                    info=i18n("Apply distortion to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                distortion_gain_batch = gr.Slider(
+                    minimum=-60,
+                    maximum=60,
+                    label=i18n("Distortion Gain"),
+                    info=i18n("Set the distortion gain."),
+                    value=25,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_batch = gr.Checkbox(
+                    label=i18n("Chorus"),
+                    info=i18n("Apply chorus to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_rate_batch = gr.Slider(
+                    minimum=0,
+                    maximum=100,
+                    label=i18n("Chorus Rate Hz"),
+                    info=i18n("Set the chorus rate Hz."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_depth_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Depth"),
+                    info=i18n("Set the chorus depth."),
+                    value=0.25,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_center_delay_batch = gr.Slider(
+                    minimum=7,
+                    maximum=8,
+                    label=i18n("Chorus Center Delay ms"),
+                    info=i18n("Set the chorus center delay ms."),
+                    value=7,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_feedback_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Feedback"),
+                    info=i18n("Set the chorus feedback."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                chorus_mix_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Chorus Mix"),
+                    info=i18n("Set the chorus mix."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                bitcrush_batch = gr.Checkbox(
+                    label=i18n("Bitcrush"),
+                    info=i18n("Apply bitcrush to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                bitcrush_bit_depth_batch = gr.Slider(
+                    minimum=1,
+                    maximum=32,
+                    label=i18n("Bitcrush Bit Depth"),
+                    info=i18n("Set the bitcrush bit depth."),
+                    value=8,
+                    interactive=True,
+                    visible=False,
+                )
+                clipping_batch = gr.Checkbox(
+                    label=i18n("Clipping"),
+                    info=i18n("Apply clipping to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                clipping_threshold_batch = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Clipping Threshold"),
+                    info=i18n("Set the clipping threshold."),
+                    value=-6,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_batch = gr.Checkbox(
+                    label=i18n("Compressor"),
+                    info=i18n("Apply compressor to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_threshold_batch = gr.Slider(
+                    minimum=-60,
+                    maximum=0,
+                    label=i18n("Compressor Threshold dB"),
+                    info=i18n("Set the compressor threshold dB."),
+                    value=0,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_ratio_batch = gr.Slider(
+                    minimum=1,
+                    maximum=20,
+                    label=i18n("Compressor Ratio"),
+                    info=i18n("Set the compressor ratio."),
+                    value=1,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_attack_batch = gr.Slider(
+                    minimum=0.0,
+                    maximum=100,
+                    label=i18n("Compressor Attack ms"),
+                    info=i18n("Set the compressor attack ms."),
+                    value=1.0,
+                    interactive=True,
+                    visible=False,
+                )
+                compressor_release_batch = gr.Slider(
+                    minimum=0.01,
+                    maximum=100,
+                    label=i18n("Compressor Release ms"),
+                    info=i18n("Set the compressor release ms."),
+                    value=100,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_batch = gr.Checkbox(
+                    label=i18n("Delay"),
+                    info=i18n("Apply delay to the audio."),
+                    value=False,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_seconds_batch = gr.Slider(
+                    minimum=0.0,
+                    maximum=5.0,
+                    label=i18n("Delay Seconds"),
+                    info=i18n("Set the delay seconds."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_feedback_batch = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    label=i18n("Delay Feedback"),
+                    info=i18n("Set the delay feedback."),
+                    value=0.0,
+                    interactive=True,
+                    visible=False,
+                )
+                delay_mix_batch = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    label=i18n("Delay Mix"),
+                    info=i18n("Set the delay mix."),
+                    value=0.5,
+                    interactive=True,
+                    visible=False,
+                )
+                with gr.Accordion(i18n("Preset Settings"), open=False):
+                    with gr.Row():
+                        preset_dropdown = gr.Dropdown(
+                            label=i18n("Select Custom Preset"),
+                            interactive=True,
+                        )
+                        presets_batch_refresh_button = gr.Button(
+                            i18n("Refresh Presets")
+                        )
+                    import_file = gr.File(
+                        label=i18n("Select file to import"),
+                        file_count="single",
+                        type="filepath",
+                        interactive=True,
+                    )
+                    import_file.change(
+                        import_presets_button,
+                        inputs=import_file,
+                        outputs=[preset_dropdown],
+                    )
+                    presets_batch_refresh_button.click(
+                        refresh_presets, outputs=preset_dropdown
+                    )
+                    with gr.Row():
+                        preset_name_input = gr.Textbox(
+                            label=i18n("Preset Name"),
+                            placeholder=i18n("Enter preset name"),
+                        )
+                        export_button = gr.Button(i18n("Export Preset"))
+                pitch_batch = gr.Slider(
+                    minimum=-24,
+                    maximum=24,
+                    step=1,
+                    label=i18n("Pitch"),
+                    info=i18n(
+                        "Set the pitch of the audio, the higher the value, the higher the pitch."
+                    ),
+                    value=0,
+                    interactive=True,
+                )
+                index_rate_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Search Feature Ratio"),
+                    info=i18n(
+                        "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+                    ),
+                    value=0.75,
+                    interactive=True,
+                )
+                rms_mix_rate_batch = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    label=i18n("Volume Envelope"),
+                    info=i18n(
+                        "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+                    ),
+                    value=1,
+                    interactive=True,
+                )
+                protect_batch = gr.Slider(
+                    minimum=0,
+                    maximum=0.5,
+                    label=i18n("Protect Voiceless Consonants"),
+                    info=i18n(
+                        "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+                    ),
+                    value=0.5,
+                    interactive=True,
+                )
+                preset_dropdown.change(
+                    update_sliders,
+                    inputs=preset_dropdown,
+                    outputs=[
+                        pitch_batch,
+                        index_rate_batch,
+                        rms_mix_rate_batch,
+                        protect_batch,
+                    ],
+                )
+                export_button.click(
+                    export_presets_button,
+                    inputs=[
+                        preset_name_input,
+                        pitch,
+                        index_rate,
+                        rms_mix_rate_batch,
+                        protect,
+                    ],
+                    outputs=[],
+                )
+                f0_method_batch = gr.Radio(
+                    label=i18n("Pitch extraction algorithm"),
+                    info=i18n(
+                        "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+                    ),
+                    choices=[
+                        "crepe",
+                        "crepe-tiny",
+                        "rmvpe",
+                        "fcpe",
+                        "swift",
+                    ],
+                    value="rmvpe",
+                    interactive=True,
+                )
+                embedder_model_batch = gr.Radio(
+                    label=i18n("Embedder Model"),
+                    info=i18n("Model used for learning speaker embedding."),
+                    choices=[
+                        "contentvec",
+                        "spin",
+                        "spin-v2",
+                        "chinese-hubert-base",
+                        "japanese-hubert-base",
+                        "korean-hubert-base",
+                        "custom",
+                    ],
+                    value="contentvec",
+                    interactive=True,
+                )
+                with gr.Column(visible=False) as embedder_custom_batch:
+                    with gr.Accordion(i18n("Custom Embedder"), open=True):
+                        with gr.Row():
+                            embedder_model_custom_batch = gr.Dropdown(
+                                label=i18n("Select Custom Embedder"),
+                                choices=refresh_embedders_folders(),
+                                interactive=True,
+                                allow_custom_value=True,
+                            )
+                            refresh_embedders_button_batch = gr.Button(
+                                i18n("Refresh embedders")
+                            )
+                        folder_name_input_batch = gr.Textbox(
+                            label=i18n("Folder Name"), interactive=True
+                        )
+                        with gr.Row():
+                            bin_file_upload_batch = gr.File(
+                                label=i18n("Upload .bin"),
+                                type="filepath",
+                                interactive=True,
+                            )
+                            config_file_upload_batch = gr.File(
+                                label=i18n("Upload .json"),
+                                type="filepath",
+                                interactive=True,
+                            )
+                        move_files_button_batch = gr.Button(
+                            i18n("Move files to custom embedder folder")
+                        )
+        terms_checkbox_batch = gr.Checkbox(
+            label=i18n("I agree to the terms of use"),
+            info=i18n(
+                "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
+            ),
+            value=False,
+            interactive=True,
+        )
+        convert_button_batch = gr.Button(i18n("Convert"))
+        stop_button = gr.Button(i18n("Stop convert"), visible=False)
+        stop_button.click(fn=stop_infer, inputs=[], outputs=[])
+        with gr.Row():
+            vc_output3 = gr.Textbox(
+                label=i18n("Output Information"),
+                info=i18n("The output information will be displayed here."),
+            )
+    def toggle_visible(checkbox):
+        return {"visible": checkbox, "__type__": "update"}
+    def toggle_visible_embedder_custom(embedder_model):
+        if embedder_model == "custom":
+            return {"visible": True, "__type__": "update"}
+        return {"visible": False, "__type__": "update"}
+    def enable_stop_convert_button():
+        return {"visible": False, "__type__": "update"}, {
+            "visible": True,
+            "__type__": "update",
+        }
+    def disable_stop_convert_button():
+        return {"visible": True, "__type__": "update"}, {
+            "visible": False,
+            "__type__": "update",
+        }
+    def toggle_visible_formant_shifting(checkbox):
+        if checkbox:
+            return (
+                gr.update(visible=True),
+                gr.update(visible=True),
+                gr.update(visible=True),
+                gr.update(visible=True),
+                gr.update(visible=True),
+            )
+        else:
+            return (
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+            )
+    def update_visibility(checkbox, count):
+        return [gr.update(visible=checkbox) for _ in range(count)]
+    def post_process_visible(checkbox):
+        return update_visibility(checkbox, 10)
+    def reverb_visible(checkbox):
+        return update_visibility(checkbox, 6)
+    def limiter_visible(checkbox):
+        return update_visibility(checkbox, 2)
+    def chorus_visible(checkbox):
+        return update_visibility(checkbox, 6)
+    def bitcrush_visible(checkbox):
+        return update_visibility(checkbox, 1)
+    def compress_visible(checkbox):
+        return update_visibility(checkbox, 4)
+    def delay_visible(checkbox):
+        return update_visibility(checkbox, 3)
+    autotune.change(
+        fn=toggle_visible,
+        inputs=[autotune],
+        outputs=[autotune_strength],
+    )
+    proposed_pitch.change(
+        fn=toggle_visible,
+        inputs=[proposed_pitch],
+        outputs=[proposed_pitch_threshold],
+    )
+    proposed_pitch_batch.change(
+        fn=toggle_visible,
+        inputs=[proposed_pitch_batch],
+        outputs=[proposed_pitch_threshold_batch],
+    )
+    clean_audio.change(
+        fn=toggle_visible,
+        inputs=[clean_audio],
+        outputs=[clean_strength],
+    )
+    formant_shifting.change(
+        fn=toggle_visible_formant_shifting,
+        inputs=[formant_shifting],
+        outputs=[
+            formant_row,
+            formant_preset,
+            formant_refresh_button,
+            formant_qfrency,
+            formant_timbre,
+        ],
+    )
+    formant_shifting_batch.change(
+        fn=toggle_visible_formant_shifting,
+        inputs=[formant_shifting],
+        outputs=[
+            formant_row_batch,
+            formant_preset_batch,
+            formant_refresh_button_batch,
+            formant_qfrency_batch,
+            formant_timbre_batch,
+        ],
+    )
+    formant_refresh_button.click(
+        fn=refresh_formant,
+        inputs=[],
+        outputs=[formant_preset],
+    )
+    formant_preset.change(
+        fn=update_sliders_formant,
+        inputs=[formant_preset],
+        outputs=[
+            formant_qfrency,
+            formant_timbre,
+        ],
+    )
+    formant_preset_batch.change(
+        fn=update_sliders_formant,
+        inputs=[formant_preset_batch],
+        outputs=[
+            formant_qfrency,
+            formant_timbre,
+        ],
+    )
+    post_process.change(
+        fn=post_process_visible,
+        inputs=[post_process],
+        outputs=[
+            reverb,
+            pitch_shift,
+            limiter,
+            gain,
+            distortion,
+            chorus,
+            bitcrush,
+            clipping,
+            compressor,
+            delay,
+        ],
+    )
+    reverb.change(
+        fn=reverb_visible,
+        inputs=[reverb],
+        outputs=[
+            reverb_room_size,
+            reverb_damping,
+            reverb_wet_gain,
+            reverb_dry_gain,
+            reverb_width,
+            reverb_freeze_mode,
+        ],
+    )
+    pitch_shift.change(
+        fn=toggle_visible,
+        inputs=[pitch_shift],
+        outputs=[pitch_shift_semitones],
+    )
+    limiter.change(
+        fn=limiter_visible,
+        inputs=[limiter],
+        outputs=[limiter_threshold, limiter_release_time],
+    )
+    gain.change(
+        fn=toggle_visible,
+        inputs=[gain],
+        outputs=[gain_db],
+    )
+    distortion.change(
+        fn=toggle_visible,
+        inputs=[distortion],
+        outputs=[distortion_gain],
+    )
+    chorus.change(
+        fn=chorus_visible,
+        inputs=[chorus],
+        outputs=[
+            chorus_rate,
+            chorus_depth,
+            chorus_center_delay,
+            chorus_feedback,
+            chorus_mix,
+        ],
+    )
+    bitcrush.change(
+        fn=bitcrush_visible,
+        inputs=[bitcrush],
+        outputs=[bitcrush_bit_depth],
+    )
+    clipping.change(
+        fn=toggle_visible,
+        inputs=[clipping],
+        outputs=[clipping_threshold],
+    )
+    compressor.change(
+        fn=compress_visible,
+        inputs=[compressor],
+        outputs=[
+            compressor_threshold,
+            compressor_ratio,
+            compressor_attack,
+            compressor_release,
+        ],
+    )
+    delay.change(
+        fn=delay_visible,
+        inputs=[delay],
+        outputs=[delay_seconds, delay_feedback, delay_mix],
+    )
+    post_process_batch.change(
+        fn=post_process_visible,
+        inputs=[post_process_batch],
+        outputs=[
+            reverb_batch,
+            pitch_shift_batch,
+            limiter_batch,
+            gain_batch,
+            distortion_batch,
+            chorus_batch,
+            bitcrush_batch,
+            clipping_batch,
+            compressor_batch,
+            delay_batch,
+        ],
+    )
+    reverb_batch.change(
+        fn=reverb_visible,
+        inputs=[reverb_batch],
+        outputs=[
+            reverb_room_size_batch,
+            reverb_damping_batch,
+            reverb_wet_gain_batch,
+            reverb_dry_gain_batch,
+            reverb_width_batch,
+            reverb_freeze_mode_batch,
+        ],
+    )
+    pitch_shift_batch.change(
+        fn=toggle_visible,
+        inputs=[pitch_shift_batch],
+        outputs=[pitch_shift_semitones_batch],
+    )
+    limiter_batch.change(
+        fn=limiter_visible,
+        inputs=[limiter_batch],
+        outputs=[limiter_threshold_batch, limiter_release_time_batch],
+    )
+    gain_batch.change(
+        fn=toggle_visible,
+        inputs=[gain_batch],
+        outputs=[gain_db_batch],
+    )
+    distortion_batch.change(
+        fn=toggle_visible,
+        inputs=[distortion_batch],
+        outputs=[distortion_gain_batch],
+    )
+    chorus_batch.change(
+        fn=chorus_visible,
+        inputs=[chorus_batch],
+        outputs=[
+            chorus_rate_batch,
+            chorus_depth_batch,
+            chorus_center_delay_batch,
+            chorus_feedback_batch,
+            chorus_mix_batch,
+        ],
+    )
+    bitcrush_batch.change(
+        fn=bitcrush_visible,
+        inputs=[bitcrush_batch],
+        outputs=[bitcrush_bit_depth_batch],
+    )
+    clipping_batch.change(
+        fn=toggle_visible,
+        inputs=[clipping_batch],
+        outputs=[clipping_threshold_batch],
+    )
+    compressor_batch.change(
+        fn=compress_visible,
+        inputs=[compressor_batch],
+        outputs=[
+            compressor_threshold_batch,
+            compressor_ratio_batch,
+            compressor_attack_batch,
+            compressor_release_batch,
+        ],
+    )
+    delay_batch.change(
+        fn=delay_visible,
+        inputs=[delay_batch],
+        outputs=[delay_seconds_batch, delay_feedback_batch, delay_mix_batch],
+    )
+    autotune_batch.change(
+        fn=toggle_visible,
+        inputs=[autotune_batch],
+        outputs=[autotune_strength_batch],
+    )
+    clean_audio_batch.change(
+        fn=toggle_visible,
+        inputs=[clean_audio_batch],
+        outputs=[clean_strength_batch],
+    )
+    refresh_button.click(
+        fn=change_choices,
+        inputs=[model_file],
+        outputs=[model_file, index_file, audio, sid, sid_batch],
+    ).then(
+        fn=filter_dropdowns,
+        inputs=[filter_box_inf],
+        outputs=[model_file, index_file],
+    )
+    audio.change(
+        fn=output_path_fn,
+        inputs=[audio],
+        outputs=[output_path],
+    )
+    upload_audio.upload(
+        fn=save_to_wav2,
+        inputs=[upload_audio],
+        outputs=[audio, output_path],
+    )
+    upload_audio.stop_recording(
+        fn=save_to_wav,
+        inputs=[upload_audio],
+        outputs=[audio, output_path],
+    )
+    clear_outputs_infer.click(
+        fn=delete_outputs,
+        inputs=[],
+        outputs=[],
+    )
+    clear_outputs_batch.click(
+        fn=delete_outputs,
+        inputs=[],
+        outputs=[],
+    )
+    embedder_model.change(
+        fn=toggle_visible_embedder_custom,
+        inputs=[embedder_model],
+        outputs=[embedder_custom],
+    )
+    embedder_model_batch.change(
+        fn=toggle_visible_embedder_custom,
+        inputs=[embedder_model_batch],
+        outputs=[embedder_custom_batch],
+    )
+    move_files_button.click(
+        fn=create_folder_and_move_files,
+        inputs=[folder_name_input, bin_file_upload, config_file_upload],
+        outputs=[],
+    )
+    refresh_embedders_button.click(
+        fn=lambda: gr.update(choices=refresh_embedders_folders()),
+        inputs=[],
+        outputs=[embedder_model_custom],
+    )
+    move_files_button_batch.click(
+        fn=create_folder_and_move_files,
+        inputs=[
+            folder_name_input_batch,
+            bin_file_upload_batch,
+            config_file_upload_batch,
+        ],
+        outputs=[],
+    )
+    refresh_embedders_button_batch.click(
+        fn=lambda: gr.update(choices=refresh_embedders_folders()),
+        inputs=[],
+        outputs=[embedder_model_custom_batch],
+    )
+    convert_button1.click(
+        fn=enforce_terms,
+        inputs=[
+            terms_checkbox,
+            pitch,
+            index_rate,
+            rms_mix_rate,
+            protect,
+            f0_method,
+            audio,
+            output_path,
+            model_file,
+            index_file,
+            split_audio,
+            autotune,
+            autotune_strength,
+            proposed_pitch,
+            proposed_pitch_threshold,
+            clean_audio,
+            clean_strength,
+            export_format,
+            embedder_model,
+            embedder_model_custom,
+            formant_shifting,
+            formant_qfrency,
+            formant_timbre,
+            post_process,
+            reverb,
+            pitch_shift,
+            limiter,
+            gain,
+            distortion,
+            chorus,
+            bitcrush,
+            clipping,
+            compressor,
+            delay,
+            reverb_room_size,
+            reverb_damping,
+            reverb_wet_gain,
+            reverb_dry_gain,
+            reverb_width,
+            reverb_freeze_mode,
+            pitch_shift_semitones,
+            limiter_threshold,
+            limiter_release_time,
+            gain_db,
+            distortion_gain,
+            chorus_rate,
+            chorus_depth,
+            chorus_center_delay,
+            chorus_feedback,
+            chorus_mix,
+            bitcrush_bit_depth,
+            clipping_threshold,
+            compressor_threshold,
+            compressor_ratio,
+            compressor_attack,
+            compressor_release,
+            delay_seconds,
+            delay_feedback,
+            delay_mix,
+            sid,
+        ],
+        outputs=[vc_output1, vc_output2],
+    )
+    convert_button_batch.click(
+        fn=enforce_terms_batch,
+        inputs=[
+            terms_checkbox_batch,
+            pitch_batch,
+            index_rate_batch,
+            rms_mix_rate_batch,
+            protect_batch,
+            f0_method_batch,
+            input_folder_batch,
+            output_folder_batch,
+            model_file,
+            index_file,
+            split_audio_batch,
+            autotune_batch,
+            autotune_strength_batch,
+            proposed_pitch_batch,
+            proposed_pitch_threshold_batch,
+            clean_audio_batch,
+            clean_strength_batch,
+            export_format_batch,
+            embedder_model_batch,
+            embedder_model_custom_batch,
+            formant_shifting_batch,
+            formant_qfrency_batch,
+            formant_timbre_batch,
+            post_process_batch,
+            reverb_batch,
+            pitch_shift_batch,
+            limiter_batch,
+            gain_batch,
+            distortion_batch,
+            chorus_batch,
+            bitcrush_batch,
+            clipping_batch,
+            compressor_batch,
+            delay_batch,
+            reverb_room_size_batch,
+            reverb_damping_batch,
+            reverb_wet_gain_batch,
+            reverb_dry_gain_batch,
+            reverb_width_batch,
+            reverb_freeze_mode_batch,
+            pitch_shift_semitones_batch,
+            limiter_threshold_batch,
+            limiter_release_time_batch,
+            gain_db_batch,
+            distortion_gain_batch,
+            chorus_rate_batch,
+            chorus_depth_batch,
+            chorus_center_delay_batch,
+            chorus_feedback_batch,
+            chorus_mix_batch,
+            bitcrush_bit_depth_batch,
+            clipping_threshold_batch,
+            compressor_threshold_batch,
+            compressor_ratio_batch,
+            compressor_attack_batch,
+            compressor_release_batch,
+            delay_seconds_batch,
+            delay_feedback_batch,
+            delay_mix_batch,
+            sid_batch,
+        ],
+        outputs=[vc_output3],
+    )
+    convert_button_batch.click(
+        fn=enable_stop_convert_button,
+        inputs=[],
+        outputs=[convert_button_batch, stop_button],
+    )
+    stop_button.click(
+        fn=disable_stop_convert_button,
+        inputs=[],
+        outputs=[convert_button_batch, stop_button],
+    )

tabs/plugins/plugins.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os, sys
+import gradio as gr
+import importlib.util
+import tabs.plugins.plugins_core as plugins_core
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+plugins_core.check_new_folders()
+def plugins_tab():
+    with gr.TabItem(i18n("Plugin Installer")):
+        dropbox = gr.File(
+            label=i18n("Drag your plugin.zip to install it"),
+            type="filepath",
+        )
+        dropbox.upload(
+            fn=plugins_core.save_plugin_dropbox,
+            inputs=[dropbox],
+            outputs=[dropbox],
+        )
+    for plugin in os.listdir(os.path.join(now_dir, "tabs", "plugins", "installed")):
+        plugin_main = f"tabs.plugins.installed.{plugin}.plugin"
+        plugin_import = importlib.import_module(plugin_main)
+        with gr.TabItem(plugin):
+            plugin_import.applio_plugin()

tabs/plugins/plugins_core.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import os, sys, shutil
+import json
+import gradio as gr
+import zipfile
+import subprocess
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from tabs.settings.sections.restart import restart_applio
+plugins_path = os.path.join(now_dir, "tabs", "plugins", "installed")
+if not os.path.exists(plugins_path):
+    os.makedirs(plugins_path)
+json_file_path = os.path.join(now_dir, "assets", "config.json")
+current_folders = os.listdir(plugins_path)
+def get_existing_folders():
+    if os.path.exists(json_file_path):
+        with open(json_file_path, "r") as file:
+            config = json.load(file)
+            return config["plugins"]
+    else:
+        return []
+def save_existing_folders(existing_folders):
+    with open(json_file_path, "r") as file:
+        config = json.load(file)
+        config["plugins"] = existing_folders
+    with open(json_file_path, "w") as file:
+        json.dump(config, file, indent=2)
+def save_plugin_dropbox(dropbox):
+    if "zip" not in dropbox:
+        raise gr.Error(
+            message="The file you dropped is not a valid plugin.zip. Please try again."
+        )
+    else:
+        file_name = os.path.basename(dropbox)
+        folder_name = file_name.split(".zip")[0]
+        folder_path = os.path.join(plugins_path, folder_name)
+        zip_file_path = os.path.join(plugins_path, file_name)
+        if os.path.exists(folder_name):
+            os.remove(folder_name)
+        shutil.move(dropbox, os.path.join(plugins_path, file_name))
+        print("Proceeding with the extraction...")
+        with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
+            zip_ref.extractall(plugins_path)
+        os.remove(zip_file_path)
+        if os.path.exists(os.path.join(folder_path, "requirements.txt")):
+            if os.name == "nt":
+                subprocess.run(
+                    [
+                        os.path.join("env", "python.exe"),
+                        "-m",
+                        "pip",
+                        "install",
+                        "-r",
+                        os.path.join(folder_path, "requirements.txt"),
+                    ]
+                )
+            else:
+                subprocess.run(
+                    [
+                        "python",
+                        "-m",
+                        "pip",
+                        "install",
+                        "-r",
+                        os.path.join(folder_path, "requirements.txt"),
+                    ]
+                )
+        else:
+            print("No requirements.txt file found in the plugin folder.")
+        save_existing_folders(get_existing_folders() + [folder_name])
+        print(
+            f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
+        )
+        gr.Info(
+            f"{folder_name} plugin installed in {plugins_path}! Restarting applio to apply the changes."
+        )
+        restart_applio()
+    return None
+def check_new_folders():
+    existing_folders = get_existing_folders()
+    new_folders = set(current_folders) - set(existing_folders)
+    save_existing_folders(current_folders)
+    if new_folders:
+        for new_folder in new_folders:
+            complete_path = os.path.join(plugins_path, new_folder)
+            print(f"New plugin {new_folder} found, installing it...")
+            if os.path.exists(os.path.join(complete_path, "requirements.txt")):
+                if os.name == "nt":
+                    subprocess.run(
+                        [
+                            os.path.join("env", "python.exe"),
+                            "-m",
+                            "pip",
+                            "install",
+                            "-r",
+                            os.path.join(complete_path, "requirements.txt"),
+                        ]
+                    )
+                else:
+                    subprocess.run(
+                        [
+                            "python",
+                            "-m",
+                            "pip",
+                            "install",
+                            "-r",
+                            os.path.join(complete_path, "requirements.txt"),
+                        ]
+                    )
+            else:
+                print("No requirements.txt file found in the plugin folder.")
+        print("Plugins checked and installed! Restarting applio to apply the changes.")
+        restart_applio()

tabs/realtime/realtime.py ADDED Viewed

	@@ -0,0 +1,1129 @@

+import gradio as gr
+import sounddevice as sd
+import os
+import sys
+import time
+import json
+import regex as re
+import shutil
+import torch
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from rvc.realtime.callbacks import AudioCallbacks
+from rvc.realtime.audio import list_audio_device
+from rvc.realtime.core import AUDIO_SAMPLE_RATE
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+model_root = os.path.join(now_dir, "logs")
+custom_embedder_root = os.path.join(
+    now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+os.makedirs(custom_embedder_root, exist_ok=True)
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+model_root_relative = os.path.relpath(model_root, now_dir)
+def normalize_path(p):
+    return os.path.normpath(p).replace("\\", "/").lower()
+MODEL_FOLDER = re.compile(r"^(?:model.{0,4}|mdl(?:s)?|weight.{0,4}|zip(?:s)?)$")
+INDEX_FOLDER = re.compile(r"^(?:ind.{0,4}|idx(?:s)?)$")
+def is_mdl_alias(name: str) -> bool:
+    return bool(MODEL_FOLDER.match(name))
+def is_idx_alias(name: str) -> bool:
+    return bool(INDEX_FOLDER.match(name))
+def alias_score(path: str, want_model: bool) -> int:
+    parts = normalize_path(os.path.dirname(path)).split("/")
+    has_mdl = any(is_mdl_alias(p) for p in parts)
+    has_idx = any(is_idx_alias(p) for p in parts)
+    if want_model:
+        return 2 if has_mdl else (1 if has_idx else 0)
+    else:
+        return 2 if has_idx else (1 if has_mdl else 0)
+def get_files(type="model"):
+    assert type in ("model", "index"), "Invalid type for get_files (models or index)"
+    is_model = type == "model"
+    exts = (".pth", ".onnx") if is_model else (".index",)
+    exclude_prefixes = ("G_", "D_") if is_model else ()
+    exclude_substr = None if is_model else "trained"
+    best = {}
+    order = 0
+    for root, _, files in os.walk(model_root_relative, followlinks=True):
+        for file in files:
+            if not file.endswith(exts):
+                continue
+            if any(file.startswith(p) for p in exclude_prefixes):
+                continue
+            if exclude_substr and exclude_substr in file:
+                continue
+            full = os.path.join(root, file)
+            real = os.path.realpath(full)
+            score = alias_score(full, is_model)
+            prev = best.get(real)
+            if (
+                prev is None
+            ):  # Prefer higher score; if equal score, use first encountered
+                best[real] = (score, order, full)
+            else:
+                prev_score, prev_order, _ = prev
+                if score > prev_score:
+                    best[real] = (score, prev_order, full)
+            order += 1
+    return [t[2] for t in sorted(best.values(), key=lambda x: x[1])]
+def folders_same(
+    a: str, b: str
+) -> bool:  # Used to "pair" index and model folders based on path names
+    """
+    True if:
+      1) The two normalized paths are totally identical..OR
+      2) One lives under a MODEL_FOLDER and the other lives
+         under an INDEX_FOLDER, at the same relative subpath
+         i.e.  logs/models/miku  and  logs/index/miku  =  "SAME FOLDER"
+    """
+    a = normalize_path(a)
+    b = normalize_path(b)
+    if a == b:
+        return True
+    def split_after_alias(p):
+        parts = p.split("/")
+        for i, part in enumerate(parts):
+            if is_mdl_alias(part) or is_idx_alias(part):
+                base = part
+                rel = "/".join(parts[i + 1 :])
+                return base, rel
+        return None, None
+    base_a, rel_a = split_after_alias(a)
+    base_b, rel_b = split_after_alias(b)
+    if rel_a is None or rel_b is None:
+        return False
+    if rel_a == rel_b and (
+        (is_mdl_alias(base_a) and is_idx_alias(base_b))
+        or (is_idx_alias(base_a) and is_mdl_alias(base_b))
+    ):
+        return True
+    return False
+def match_index(model_file_value):
+    if not model_file_value:
+        return ""
+    # Derive the information about the model's name and path for index matching
+    model_folder = normalize_path(os.path.dirname(model_file_value))
+    model_name = os.path.basename(model_file_value)
+    base_name = os.path.splitext(model_name)[0]
+    common = re.sub(r"[_\-\.\+](?:e|s|v|V)\d.*$", "", base_name)
+    prefix_match = re.match(r"^(.*?)[_\-\.\+]", base_name)
+    prefix = prefix_match.group(1) if prefix_match else None
+    same_count = 0
+    last_same = None
+    same_substr = None
+    same_prefixed = None
+    external_exact = None
+    external_substr = None
+    external_pref = None
+    for idx in get_files("index"):
+        idx_folder = os.path.dirname(idx)
+        idx_folder_n = normalize_path(idx_folder)
+        idx_name = os.path.basename(idx)
+        idx_base = os.path.splitext(idx_name)[0]
+        in_same = folders_same(model_folder, idx_folder_n)
+        if in_same:
+            same_count += 1
+            last_same = idx
+            # 1) EXACT match to loaded model name and folders_same = True
+            if idx_base == base_name:
+                return idx
+            # 2) Substring match to model name and folders_same
+            if common in idx_base and same_substr is None:
+                same_substr = idx
+            # 3) Prefix match to model name and folders_same
+            if prefix and idx_base.startswith(prefix) and same_prefixed is None:
+                same_prefixed = idx
+        # If it's NOT in a paired folder (folders_same = False) we look elseware:
+        else:
+            # 4) EXACT match to model name in external directory
+            if idx_base == base_name and external_exact is None:
+                external_exact = idx
+            # 5) Substring match to model name in ED
+            if common in idx_base and external_substr is None:
+                external_substr = idx
+            # 6) Prefix match to model name in ED
+            if prefix and idx_base.startswith(prefix) and external_pref is None:
+                external_pref = idx
+    # Fallback: If there is exactly one index file in the same (or paired) folder,
+    # we should assume that's the intended index file even if the name doesnt match
+    if same_count == 1:
+        return last_same
+    # Then by remaining priority queue:
+    if same_substr:
+        return same_substr
+    if same_prefixed:
+        return same_prefixed
+    if external_exact:
+        return external_exact
+    if external_substr:
+        return external_substr
+    if external_pref:
+        return external_pref
+    return ""
+def extract_model_and_epoch(path):
+    base_name = os.path.basename(path)
+    match = re.match(r"(.+?)_(\d+)e_", base_name)
+    if match:
+        model, epoch = match.groups()
+        return model, int(epoch)
+    return "", 0
+def get_speakers_id(model):
+    if model:
+        try:
+            model_data = torch.load(
+                os.path.join(now_dir, model), map_location="cpu", weights_only=True
+            )
+            speakers_id = model_data.get("speakers_id")
+            if speakers_id:
+                return list(range(speakers_id))
+            else:
+                return [0]
+        except Exception as e:
+            return [0]
+    else:
+        return [0]
+def create_folder_and_move_files(folder_name, bin_file, config_file):
+    if not folder_name:
+        return "Folder name must not be empty."
+    folder_name = os.path.basename(folder_name)
+    target_folder = os.path.join(custom_embedder_root, folder_name)
+    normalized_target_folder = os.path.abspath(target_folder)
+    normalized_custom_embedder_root = os.path.abspath(custom_embedder_root)
+    if not normalized_target_folder.startswith(normalized_custom_embedder_root):
+        return "Invalid folder name. Folder must be within the custom embedder root directory."
+    os.makedirs(target_folder, exist_ok=True)
+    if bin_file:
+        shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
+    if config_file:
+        shutil.copy(
+            config_file, os.path.join(target_folder, os.path.basename(config_file))
+        )
+    return f"Files moved to folder {target_folder}"
+def refresh_embedders_folders():
+    custom_embedders = [
+        os.path.join(dirpath, dirname)
+        for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+        for dirname in dirnames
+    ]
+    return custom_embedders
+names = get_files("model")
+default_weight = names[0] if names else None
+PASS_THROUGH = False
+interactive_true = gr.update(interactive=True)
+interactive_false = gr.update(interactive=False)
+running, callbacks, audio_manager = False, None, None
+CONFIG_PATH = os.path.join(now_dir, "assets", "config.json")
+def save_realtime_settings(
+    input_device, output_device, monitor_device, model_file, index_file
+):
+    """Save realtime settings to config.json"""
+    try:
+        if os.path.exists(CONFIG_PATH):
+            with open(CONFIG_PATH, "r", encoding="utf-8") as f:
+                config = json.load(f)
+        else:
+            config = {}
+        if "realtime" not in config:
+            config["realtime"] = {}
+        # Only save non-None values, preserve existing values for None inputs
+        if input_device is not None:
+            config["realtime"]["input_device"] = input_device or ""
+        if output_device is not None:
+            config["realtime"]["output_device"] = output_device or ""
+        if monitor_device is not None:
+            config["realtime"]["monitor_device"] = monitor_device or ""
+        if model_file is not None:
+            config["realtime"]["model_file"] = model_file or ""
+        if index_file is not None:
+            config["realtime"]["index_file"] = index_file or ""
+        with open(CONFIG_PATH, "w", encoding="utf-8") as f:
+            json.dump(config, f, indent=2, ensure_ascii=False)
+    except Exception as e:
+        print(f"Error saving realtime settings: {e}")
+def load_realtime_settings():
+    """Load realtime settings from config.json"""
+    try:
+        if os.path.exists(CONFIG_PATH):
+            with open(CONFIG_PATH, "r", encoding="utf-8") as f:
+                config = json.load(f)
+                realtime_config = config.get("realtime", {})
+                return {
+                    "input_device": realtime_config.get("input_device", ""),
+                    "output_device": realtime_config.get("output_device", ""),
+                    "monitor_device": realtime_config.get("monitor_device", ""),
+                    "model_file": realtime_config.get("model_file", ""),
+                    "index_file": realtime_config.get("index_file", ""),
+                }
+    except Exception as e:
+        print(f"Error loading realtime settings: {e}")
+    return {
+        "input_device": "",
+        "output_device": "",
+        "monitor_device": "",
+        "model_file": "",
+        "index_file": "",
+    }
+def get_safe_dropdown_value(saved_value, choices, fallback_value=None):
+    """Safely get a dropdown value, ensuring it exists in choices"""
+    if saved_value and saved_value in choices:
+        return saved_value
+    elif fallback_value and fallback_value in choices:
+        return fallback_value
+    elif choices:
+        return choices[0]
+    else:
+        return None
+def get_safe_index_value(saved_value, choices, fallback_value=None):
+    """Safely get an index file value, handling file path matching"""
+    # Handle empty string, None, or whitespace-only values
+    if not saved_value or (isinstance(saved_value, str) and not saved_value.strip()):
+        if fallback_value and fallback_value in choices:
+            return fallback_value
+        elif choices:
+            return choices[0]
+        else:
+            return None
+    # Check exact match first
+    if saved_value in choices:
+        return saved_value
+    # Check if saved value is a filename that matches any choice
+    saved_filename = os.path.basename(saved_value)
+    for choice in choices:
+        if os.path.basename(choice) == saved_filename:
+            return choice
+    # Fallback to default or first choice
+    if fallback_value and fallback_value in choices:
+        return fallback_value
+    elif choices:
+        return choices[0]
+    else:
+        return None
+def start_realtime(
+    input_audio_device: str,
+    input_audio_gain: int,
+    input_asio_channels: int,
+    output_audio_device: str,
+    output_audio_gain: int,
+    output_asio_channels: int,
+    monitor_output_device: str,
+    monitor_audio_gain: int,
+    monitor_asio_channels: int,
+    use_monitor_device: bool,
+    exclusive_mode: bool,
+    vad_enabled: bool,
+    chunk_size: float,
+    cross_fade_overlap_size: float,
+    extra_convert_size: float,
+    silent_threshold: int,
+    pitch: int,
+    index_rate: float,
+    volume_envelope: float,
+    protect: float,
+    f0_method: str,
+    pth_path: str,
+    index_path: str,
+    sid: int,
+    f0_autotune: bool,
+    f0_autotune_strength: float,
+    proposed_pitch: bool,
+    proposed_pitch_threshold: float,
+    embedder_model: str,
+    embedder_model_custom: str = None,
+):
+    global running, callbacks, audio_manager
+    running = True
+    if not input_audio_device or not output_audio_device:
+        yield (
+            "Please select valid input/output devices!",
+            interactive_true,
+            interactive_false,
+        )
+        return
+    if use_monitor_device and not monitor_output_device:
+        yield (
+            "Please select a valid monitor device!",
+            interactive_true,
+            interactive_false,
+        )
+        return
+    if not pth_path:
+        yield (
+            "Model path not provided. Aborting conversion.",
+            interactive_true,
+            interactive_false,
+        )
+        return
+    yield "Starting Realtime...", interactive_false, interactive_true
+    read_chunk_size = int(chunk_size * AUDIO_SAMPLE_RATE / 1000 / 128)
+    sid = int(sid) if sid is not None else 0
+    input_audio_gain /= 100.0
+    output_audio_gain /= 100.0
+    monitor_audio_gain /= 100.0
+    try:
+        input_devices, output_devices = get_audio_devices_formatted()
+        input_device_id = input_devices[input_audio_device]
+        output_device_id = output_devices[output_audio_device]
+        output_monitor_id = (
+            output_devices[monitor_output_device] if use_monitor_device else None
+        )
+    except (ValueError, IndexError):
+        yield "Incorrectly formatted audio device. Stopping.", interactive_true, interactive_false
+        return
+    callbacks = AudioCallbacks(
+        pass_through=PASS_THROUGH,
+        read_chunk_size=read_chunk_size,
+        cross_fade_overlap_size=cross_fade_overlap_size,
+        extra_convert_size=extra_convert_size,
+        model_path=pth_path,
+        index_path=str(index_path),
+        f0_method=f0_method,
+        embedder_model=embedder_model,
+        embedder_model_custom=embedder_model_custom,
+        silent_threshold=silent_threshold,
+        f0_up_key=pitch,
+        index_rate=index_rate,
+        protect=protect,
+        volume_envelope=volume_envelope,
+        f0_autotune=f0_autotune,
+        f0_autotune_strength=f0_autotune_strength,
+        proposed_pitch=proposed_pitch,
+        proposed_pitch_threshold=proposed_pitch_threshold,
+        input_audio_gain=input_audio_gain,
+        output_audio_gain=output_audio_gain,
+        monitor_audio_gain=monitor_audio_gain,
+        monitor=use_monitor_device,
+        vad_enabled=vad_enabled,
+        vad_sensitivity=3,
+        vad_frame_ms=30,
+        sid=sid,
+    )
+    audio_manager = callbacks.audio
+    audio_manager.start(
+        input_device_id=input_device_id,
+        output_device_id=output_device_id,
+        output_monitor_id=output_monitor_id,
+        exclusive_mode=exclusive_mode,
+        asio_input_channel=input_asio_channels,
+        asio_output_channel=output_asio_channels,
+        asio_output_monitor_channel=monitor_asio_channels,
+        read_chunk_size=read_chunk_size,
+    )
+    yield "Realtime is ready!", interactive_false, interactive_true
+    while running and callbacks is not None and audio_manager is not None:
+        time.sleep(0.1)
+        if hasattr(audio_manager, "latency"):
+            yield f"Latency: {audio_manager.latency:.2f} ms", interactive_false, interactive_true
+    return gr.update(), gr.update(), gr.update()
+def stop_realtime():
+    global running, callbacks, audio_manager
+    if running and audio_manager is not None and callbacks is not None:
+        audio_manager.stop()
+        running = False
+        if hasattr(audio_manager, "latency"):
+            del audio_manager.latency
+        audio_manager = callbacks = None
+        return gr.update(value="Stopping..."), gr.update(), gr.update()
+    else:
+        return "Realtime pipeline not found!", interactive_true, interactive_false
+def get_audio_devices_formatted():
+    try:
+        input_devices, output_devices = list_audio_device()
+        def priority(name: str) -> int:
+            n = name.lower()
+            if "virtual" in n:
+                return 0
+            if "vb" in n:
+                return 1
+            return 2
+        output_sorted = sorted(output_devices, key=lambda d: priority(d.name))
+        input_sorted = sorted(
+            input_devices, key=lambda d: priority(d.name), reverse=True
+        )
+        input_device_list = {
+            f"{input_sorted.index(d)+1}: {d.name} ({d.host_api})": d.index
+            for d in input_sorted
+        }
+        output_device_list = {
+            f"{output_sorted.index(d)+1}: {d.name} ({d.host_api})": d.index
+            for d in output_sorted
+        }
+        return input_device_list, output_device_list
+    except Exception:
+        return [], []
+def realtime_tab():
+    input_devices, output_devices = get_audio_devices_formatted()
+    input_devices, output_devices = list(input_devices.keys()), list(
+        output_devices.keys()
+    )
+    # Load saved settings
+    saved_settings = load_realtime_settings()
+    with gr.Blocks() as ui:
+        with gr.Row():
+            start_button = gr.Button(i18n("Start"), variant="primary")
+            stop_button = gr.Button(i18n("Stop"), interactive=False)
+        latency_info = gr.Label(label=i18n("Status"), value="Realtime not started.")
+        terms_checkbox = gr.Checkbox(
+            label=i18n("I agree to the terms of use"),
+            info=i18n(
+                "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your realtime."
+            ),
+            value=False,
+            interactive=True,
+        )
+        with gr.Tabs():
+            with gr.TabItem(i18n("Audio Settings")):
+                with gr.Row():
+                    refresh_devices_button = gr.Button(i18n("Refresh Audio Devices"))
+                with gr.Row():
+                    with gr.Accordion(i18n("Input Device"), open=True):
+                        with gr.Column():
+                            input_audio_device = gr.Dropdown(
+                                label=i18n("Input Device"),
+                                info=i18n(
+                                    "Select the microphone or audio interface you will be speaking into."
+                                ),
+                                choices=input_devices,
+                                value=get_safe_dropdown_value(
+                                    saved_settings["input_device"], input_devices
+                                ),
+                                interactive=True,
+                            )
+                            input_audio_gain = gr.Slider(
+                                minimum=0,
+                                maximum=200,
+                                value=100,
+                                label=i18n("Input Gain (%)"),
+                                info=i18n(
+                                    "Adjusts the input volume before processing. Prevents clipping or boosts a quiet mic."
+                                ),
+                                interactive=True,
+                            )
+                            input_asio_channels = gr.Slider(
+                                minimum=-1,
+                                maximum=16,
+                                value=-1,
+                                step=1,
+                                label=i18n("Input ASIO Channel"),
+                                info=i18n(
+                                    "For ASIO drivers, selects a specific input channel. Leave at -1 for default."
+                                ),
+                                interactive=True,
+                            )
+                    with gr.Accordion("Output Device", open=True):
+                        with gr.Column():
+                            output_audio_device = gr.Dropdown(
+                                label=i18n("Output Device"),
+                                info=i18n(
+                                    "Select the device where the final converted voice will be sent (e.g., a virtual cable)."
+                                ),
+                                choices=output_devices,
+                                value=get_safe_dropdown_value(
+                                    saved_settings["output_device"], output_devices
+                                ),
+                                interactive=True,
+                            )
+                            output_audio_gain = gr.Slider(
+                                minimum=0,
+                                maximum=200,
+                                value=100,
+                                label=i18n("Output Gain (%)"),
+                                info=i18n(
+                                    "Adjusts the final volume of the converted voice after processing."
+                                ),
+                                interactive=True,
+                            )
+                            output_asio_channels = gr.Slider(
+                                minimum=-1,
+                                maximum=16,
+                                value=-1,
+                                step=1,
+                                label=i18n("Output ASIO Channel"),
+                                info=i18n(
+                                    "For ASIO drivers, selects a specific output channel. Leave at -1 for default."
+                                ),
+                                interactive=True,
+                            )
+                with gr.Accordion("Monitor Device (Optional)", open=False):
+                    with gr.Column():
+                        use_monitor_device = gr.Checkbox(
+                            label=i18n("Use Monitor Device"),
+                            value=False,
+                            interactive=True,
+                        )
+                        monitor_output_device = gr.Dropdown(
+                            label=i18n("Monitor Device"),
+                            info=i18n(
+                                "Select the device for monitoring your voice (e.g., your headphones)."
+                            ),
+                            choices=output_devices,
+                            value=get_safe_dropdown_value(
+                                saved_settings["monitor_device"], output_devices
+                            ),
+                            interactive=True,
+                        )
+                        monitor_audio_gain = gr.Slider(
+                            minimum=0,
+                            maximum=200,
+                            value=100,
+                            label=i18n("Monitor Gain (%)"),
+                            info=i18n(
+                                "Adjusts the volume of the monitor feed, independent of the main output."
+                            ),
+                            interactive=True,
+                        )
+                        monitor_asio_channels = gr.Slider(
+                            minimum=-1,
+                            maximum=16,
+                            value=-1,
+                            step=1,
+                            label=i18n("Monitor ASIO Channel"),
+                            info=i18n(
+                                "For ASIO drivers, selects a specific monitor output channel. Leave at -1 for default."
+                            ),
+                            interactive=True,
+                        )
+                with gr.Row():
+                    exclusive_mode = gr.Checkbox(
+                        label=i18n("Exclusive Mode (WASAPI)"),
+                        info=i18n(
+                            "For WASAPI (Windows), gives the app exclusive control for potentially lower latency."
+                        ),
+                        value=True,
+                        interactive=True,
+                    )
+                    vad_enabled = gr.Checkbox(
+                        label=i18n("Enable VAD"),
+                        info=i18n(
+                            "Enables Voice Activity Detection to only process audio when you are speaking, saving CPU."
+                        ),
+                        value=True,
+                        interactive=True,
+                    )
+            with gr.TabItem(i18n("Model Settings")):
+                with gr.Row():
+                    model_choices = (
+                        sorted(names, key=extract_model_and_epoch) if names else []
+                    )
+                    model_file = gr.Dropdown(
+                        label=i18n("Voice Model"),
+                        choices=model_choices,
+                        interactive=True,
+                        value=get_safe_dropdown_value(
+                            saved_settings["model_file"], model_choices, default_weight
+                        ),
+                        allow_custom_value=True,
+                    )
+                    index_choices = get_files("index")
+                    index_file = gr.Dropdown(
+                        label=i18n("Index File"),
+                        choices=index_choices,
+                        value=get_safe_index_value(
+                            saved_settings["index_file"],
+                            index_choices,
+                            match_index(default_weight) if default_weight else None,
+                        ),
+                        interactive=True,
+                        allow_custom_value=True,
+                    )
+                with gr.Row():
+                    unload_button = gr.Button(i18n("Unload Voice"))
+                    refresh_button = gr.Button(i18n("Refresh"))
+                with gr.Column():
+                    autotune = gr.Checkbox(
+                        label=i18n("Autotune"),
+                        info=i18n(
+                            "Apply a soft autotune to your inferences, recommended for singing conversions."
+                        ),
+                        visible=True,
+                        value=False,
+                        interactive=True,
+                    )
+                    autotune_strength = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=i18n("Autotune Strength"),
+                        info=i18n(
+                            "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
+                        ),
+                        visible=False,
+                        value=1,
+                        interactive=True,
+                    )
+                    proposed_pitch = gr.Checkbox(
+                        label=i18n("Proposed Pitch"),
+                        info=i18n(
+                            "Adjust the input audio pitch to match the voice model range."
+                        ),
+                        visible=True,
+                        value=False,
+                        interactive=True,
+                    )
+                    proposed_pitch_threshold = gr.Slider(
+                        minimum=50.0,
+                        maximum=1200.0,
+                        label=i18n("Proposed Pitch Threshold"),
+                        info=i18n(
+                            "Male voice models typically use 155.0 and female voice models typically use 255.0."
+                        ),
+                        visible=False,
+                        value=155.0,
+                        interactive=True,
+                    )
+                    sid = gr.Dropdown(
+                        label=i18n("Speaker ID"),
+                        choices=(
+                            get_speakers_id(default_weight) if default_weight else [0]
+                        ),
+                        value=0,
+                        interactive=True,
+                    )
+                    pitch = gr.Slider(
+                        minimum=-24,
+                        maximum=24,
+                        step=1,
+                        label=i18n("Pitch"),
+                        info=i18n(
+                            "Set the pitch of the audio, the higher the value, the higher the pitch."
+                        ),
+                        value=0,
+                        interactive=True,
+                    )
+                    index_rate = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=i18n("Search Feature Ratio"),
+                        info=i18n(
+                            "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+                        ),
+                        value=0.75,
+                        interactive=True,
+                    )
+                    volume_envelope = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        value=1,
+                        label=i18n("Volume Envelope"),
+                        info=i18n(
+                            "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+                        ),
+                        interactive=True,
+                    )
+                    protect = gr.Slider(
+                        minimum=0,
+                        maximum=0.5,
+                        value=0.5,
+                        label=i18n("Protect Voiceless Consonants"),
+                        info=i18n(
+                            "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+                        ),
+                        interactive=True,
+                    )
+                    f0_method = gr.Radio(
+                        choices=["rmvpe", "fcpe", "swift"],
+                        value="swift",
+                        label=i18n("Pitch extraction algorithm"),
+                        info=i18n(
+                            "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+                        ),
+                        interactive=True,
+                    )
+                    embedder_model = gr.Radio(
+                        choices=[
+                            "contentvec",
+                            "spin",
+                            "chinese-hubert-base",
+                            "japanese-hubert-base",
+                            "korean-hubert-base",
+                            "custom",
+                        ],
+                        value="contentvec",
+                        label=i18n("Embedder Model"),
+                        info=i18n("Model used for learning speaker embedding."),
+                        interactive=True,
+                    )
+                    with gr.Column(visible=False) as embedder_custom:
+                        with gr.Accordion(i18n("Custom Embedder"), open=True):
+                            with gr.Row():
+                                embedder_model_custom = gr.Dropdown(
+                                    label=i18n("Select Custom Embedder"),
+                                    choices=refresh_embedders_folders(),
+                                    interactive=True,
+                                    allow_custom_value=True,
+                                )
+                                refresh_embedders_button = gr.Button(
+                                    i18n("Refresh embedders")
+                                )
+                            folder_name_input = gr.Textbox(
+                                label=i18n("Folder Name"), interactive=True
+                            )
+                            with gr.Row():
+                                bin_file_upload = gr.File(
+                                    label=i18n("Upload .bin"),
+                                    type="filepath",
+                                    interactive=True,
+                                )
+                                config_file_upload = gr.File(
+                                    label=i18n("Upload .json"),
+                                    type="filepath",
+                                    interactive=True,
+                                )
+                            move_files_button = gr.Button(
+                                i18n("Move files to custom embedder folder")
+                            )
+            with gr.TabItem(i18n("Performance Settings")):
+                chunk_size = gr.Slider(
+                    minimum=2.7,
+                    maximum=2730.7,
+                    value=512,
+                    step=1,
+                    label=i18n("Chunk Size (ms)"),
+                    info=i18n(
+                        "Audio buffer size in milliseconds. Lower values may reduce latency but increase CPU load."
+                    ),
+                    interactive=True,
+                )
+                cross_fade_overlap_size = gr.Slider(
+                    minimum=0.05,
+                    maximum=0.2,
+                    value=0.05,
+                    step=0.01,
+                    label=i18n("Crossfade Overlap Size (s)"),
+                    info=i18n(
+                        "Duration of the fade between audio chunks to prevent clicks. Higher values create smoother transitions but may increase latency."
+                    ),
+                    interactive=True,
+                )
+                extra_convert_size = gr.Slider(
+                    minimum=0.1,
+                    maximum=5,
+                    value=0.5,
+                    step=0.1,
+                    label=i18n("Extra Conversion Size (s)"),
+                    info=i18n(
+                        "Amount of extra audio processed to provide context to the model. Improves conversion quality at the cost of higher CPU usage."
+                    ),
+                    interactive=True,
+                )
+                silent_threshold = gr.Slider(
+                    minimum=-90,
+                    maximum=-60,
+                    value=-90,
+                    step=1,
+                    label=i18n("Silence Threshold (dB)"),
+                    info=i18n(
+                        "Volume level below which audio is treated as silence and not processed. Helps to save CPU resources and reduce background noise."
+                    ),
+                    interactive=True,
+                )
+        def enforce_terms(terms_accepted, *args):
+            if not terms_accepted:
+                message = "You must agree to the Terms of Use to proceed."
+                gr.Info(message)
+                yield message, interactive_true, interactive_false
+                return
+            yield from start_realtime(*args)
+        def update_on_model_change(model_path):
+            new_index = match_index(model_path)
+            new_sids = get_speakers_id(model_path)
+            # Get updated index choices
+            new_index_choices = get_files("index")
+            # Use the matched index as fallback, but handle empty strings
+            fallback_index = new_index if new_index and new_index.strip() else None
+            safe_index_value = get_safe_index_value(
+                "", new_index_choices, fallback_index
+            )
+            return gr.update(
+                choices=new_index_choices, value=safe_index_value
+            ), gr.update(choices=new_sids, value=0 if new_sids else None)
+        def refresh_devices():
+            sd._terminate()
+            sd._initialize()
+            input_choices, output_choices = get_audio_devices_formatted()
+            input_choices, output_choices = list(input_choices.keys()), list(
+                output_choices.keys()
+            )
+            return (
+                gr.update(choices=input_choices),
+                gr.update(choices=output_choices),
+                gr.update(choices=output_choices),
+            )
+        def toggle_visible(checkbox):
+            return {"visible": checkbox, "__type__": "update"}
+        def toggle_visible_embedder_custom(embedder_model):
+            if embedder_model == "custom":
+                return {"visible": True, "__type__": "update"}
+            return {"visible": False, "__type__": "update"}
+        refresh_devices_button.click(
+            fn=refresh_devices,
+            outputs=[input_audio_device, output_audio_device, monitor_output_device],
+        )
+        autotune.change(
+            fn=toggle_visible,
+            inputs=[autotune],
+            outputs=[autotune_strength],
+        )
+        proposed_pitch.change(
+            fn=toggle_visible,
+            inputs=[proposed_pitch],
+            outputs=[proposed_pitch_threshold],
+        )
+        embedder_model.change(
+            fn=toggle_visible_embedder_custom,
+            inputs=[embedder_model],
+            outputs=[embedder_custom],
+        )
+        move_files_button.click(
+            fn=create_folder_and_move_files,
+            inputs=[folder_name_input, bin_file_upload, config_file_upload],
+            outputs=[],
+        )
+        refresh_embedders_button.click(
+            fn=lambda: gr.update(choices=refresh_embedders_folders()),
+            inputs=[],
+            outputs=[embedder_model_custom],
+        )
+        start_button.click(
+            fn=enforce_terms,
+            inputs=[
+                terms_checkbox,
+                input_audio_device,
+                input_audio_gain,
+                input_asio_channels,
+                output_audio_device,
+                output_audio_gain,
+                output_asio_channels,
+                monitor_output_device,
+                monitor_audio_gain,
+                monitor_asio_channels,
+                use_monitor_device,
+                exclusive_mode,
+                vad_enabled,
+                chunk_size,
+                cross_fade_overlap_size,
+                extra_convert_size,
+                silent_threshold,
+                pitch,
+                index_rate,
+                volume_envelope,
+                protect,
+                f0_method,
+                model_file,
+                index_file,
+                sid,
+                autotune,
+                autotune_strength,
+                proposed_pitch,
+                proposed_pitch_threshold,
+                embedder_model,
+                embedder_model_custom,
+            ],
+            outputs=[latency_info, start_button, stop_button],
+        )
+        stop_button.click(
+            fn=stop_realtime, outputs=[latency_info, start_button, stop_button]
+        ).then(
+            fn=lambda: (
+                yield gr.update(value="Stopped"),
+                interactive_true,
+                interactive_false,
+            ),
+            inputs=None,
+            outputs=[latency_info, start_button, stop_button],
+        )
+        unload_button.click(
+            fn=lambda: (
+                {"value": "", "__type__": "update"},
+                {"value": "", "__type__": "update"},
+            ),
+            inputs=[],
+            outputs=[model_file, index_file],
+        )
+        model_file.select(
+            fn=update_on_model_change, inputs=[model_file], outputs=[index_file, sid]
+        )
+        # Save settings when devices or model change
+        def save_input_device(input_device):
+            if input_device:
+                save_realtime_settings(input_device, None, None, None, None)
+        def save_output_device(output_device):
+            if output_device:
+                save_realtime_settings(None, output_device, None, None, None)
+        def save_monitor_device(monitor_device):
+            if monitor_device:
+                save_realtime_settings(None, None, monitor_device, None, None)
+        def save_model_file(model_file):
+            if model_file:
+                save_realtime_settings(None, None, None, model_file, None)
+        def save_index_file(index_file):
+            # Only save if index_file is not None and not empty
+            if index_file:
+                save_realtime_settings(None, None, None, None, index_file)
+        # Add event handlers to save settings
+        input_audio_device.change(
+            fn=save_input_device, inputs=[input_audio_device], outputs=[]
+        )
+        output_audio_device.change(
+            fn=save_output_device, inputs=[output_audio_device], outputs=[]
+        )
+        monitor_output_device.change(
+            fn=save_monitor_device, inputs=[monitor_output_device], outputs=[]
+        )
+        def refresh_all():
+            new_names = get_files("model")
+            new_indexes = get_files("index")
+            input_choices, output_choices = get_audio_devices_formatted()
+            input_choices, output_choices = list(input_choices.keys()), list(
+                output_choices.keys()
+            )
+            return (
+                gr.update(choices=sorted(new_names, key=extract_model_and_epoch)),
+                gr.update(choices=new_indexes),
+                gr.update(choices=input_choices),
+                gr.update(choices=output_choices),
+                gr.update(choices=output_choices),
+            )
+        model_file.change(fn=save_model_file, inputs=[model_file], outputs=[])
+        index_file.change(fn=save_index_file, inputs=[index_file], outputs=[])
+        refresh_button.click(
+            fn=refresh_all,
+            outputs=[
+                model_file,
+                index_file,
+                input_audio_device,
+                output_audio_device,
+                monitor_output_device,
+            ],
+        )

tabs/report/main.js ADDED Viewed

	@@ -0,0 +1,74 @@

+// main.js
+if (!ScreenCastRecorder.isSupportedBrowser()) {
+    console.error("Screen Recording not supported in this browser");
+}
+let recorder;
+let outputBlob;
+const stopRecording = () => __awaiter(void 0, void 0, void 0, function* () {
+    let currentState = "RECORDING";
+    // We should do nothing if the user try to stop recording when it is not started
+    if (currentState === "OFF" || recorder == null) {
+        return;
+    }
+    // if (currentState === "COUNTDOWN") {
+    //     this.setState({
+    //         currentState: "OFF",
+    //     })
+    // }
+    if (currentState === "RECORDING") {
+        if (recorder.getState() === "inactive") {
+            // this.setState({
+            //     currentState: "OFF",
+            // })
+            console.log("Inactive");
+        }
+        else {
+            outputBlob = yield recorder.stop();
+            console.log("Done recording");
+            // this.setState({
+            //   outputBlob,
+            //   currentState: "PREVIEW_FILE",
+            // })
+            window.currentState = "PREVIEW_FILE";
+            const videoSource = URL.createObjectURL(outputBlob);
+            window.videoSource = videoSource;
+            const fileName = "recording";
+            const link = document.createElement("a");
+            link.setAttribute("href", videoSource);
+            link.setAttribute("download", `${fileName}.webm`);
+            link.click();
+        }
+    }
+});
+const startRecording = () => __awaiter(void 0, void 0, void 0, function* () {
+    const recordAudio = false;
+    recorder = new ScreenCastRecorder({
+        recordAudio,
+        onErrorOrStop: () => stopRecording(),
+    });
+    try {
+        yield recorder.initialize();
+    }
+    catch (e) {
+        console.warn(`ScreenCastRecorder.initialize error: ${e}`);
+        //   this.setState({ currentState: "UNSUPPORTED" })
+        window.currentState = "UNSUPPORTED";
+        return;
+    }
+    // this.setState({ currentState: "COUNTDOWN" })
+    const hasStarted = recorder.start();
+    if (hasStarted) {
+        // this.setState({
+        //     currentState: "RECORDING",
+        // })
+        console.log("Started recording");
+        window.currentState = "RECORDING";
+    }
+    else {
+        stopRecording().catch(err => console.warn(`withScreencast.stopRecording threw an error: ${err}`));
+    }
+});
+// Set global functions to window.
+window.startRecording = startRecording;
+window.stopRecording = stopRecording;

tabs/report/record_button.js ADDED Viewed

	@@ -0,0 +1,40 @@

+// Setup if needed and start recording.
+async () => {
+  // Set up recording functions if not already initialized
+  if (!window.startRecording) {
+      let recorder_js = null;
+      let main_js = null;
+  }
+  // Function to fetch and convert video blob to base64 using async/await without explicit Promise
+  async function getVideoBlobAsBase64(objectURL) {
+      const response = await fetch(objectURL);
+      if (!response.ok) {
+        throw new Error('Failed to fetch video blob.');
+      }
+      const blob = await response.blob();
+      const reader = new FileReader();
+      reader.readAsDataURL(blob);
+      return new Promise((resolve, reject) => {
+        reader.onloadend = () => {
+          if (reader.result) {
+            resolve(reader.result.split(',')[1]); // Return the base64 string (without data URI prefix)
+          } else {
+            reject('Failed to convert blob to base64.');
+          }
+        };
+      });
+  }
+  if (window.currentState === "RECORDING") {
+      await window.stopRecording();
+      const base64String = await getVideoBlobAsBase64(window.videoSource);
+      return base64String;
+  } else {
+      window.startRecording();
+      return "Record";
+  }
+}

tabs/report/recorder.js ADDED Viewed

	@@ -0,0 +1,112 @@

+// recorder.js
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+  return new (P || (P = Promise))(function (resolve, reject) {
+      function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+      function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+      function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+      step((generator = generator.apply(thisArg, _arguments || [])).next());
+  });
+};
+const BLOB_TYPE = "video/webm";
+class ScreenCastRecorder {
+  /** True if the current browser likely supports screencasts. */
+  static isSupportedBrowser() {
+      return (navigator.mediaDevices != null &&
+          navigator.mediaDevices.getUserMedia != null &&
+          navigator.mediaDevices.getDisplayMedia != null &&
+          MediaRecorder.isTypeSupported(BLOB_TYPE));
+  }
+  constructor({ recordAudio, onErrorOrStop }) {
+      this.recordAudio = recordAudio;
+      this.onErrorOrStopCallback = onErrorOrStop;
+      this.inputStream = null;
+      this.recordedChunks = [];
+      this.mediaRecorder = null;
+  }
+  /**
+   * This asynchronous method will initialize the screen recording object asking
+   * for permissions to the user which are needed to start recording.
+   */
+  initialize() {
+      return __awaiter(this, void 0, void 0, function* () {
+          const desktopStream = yield navigator.mediaDevices.getDisplayMedia({
+              video: true,
+          });
+          let tracks = desktopStream.getTracks();
+          if (this.recordAudio) {
+              const voiceStream = yield navigator.mediaDevices.getUserMedia({
+                  video: false,
+                  audio: true,
+              });
+              tracks = tracks.concat(voiceStream.getAudioTracks());
+          }
+          this.recordedChunks = [];
+          this.inputStream = new MediaStream(tracks);
+          this.mediaRecorder = new MediaRecorder(this.inputStream, {
+              mimeType: BLOB_TYPE,
+          });
+          this.mediaRecorder.ondataavailable = e => this.recordedChunks.push(e.data);
+      });
+  }
+  getState() {
+      if (this.mediaRecorder) {
+          return this.mediaRecorder.state;
+      }
+      return "inactive";
+  }
+  /**
+   * This method will start the screen recording if the user has granted permissions
+   * and the mediaRecorder has been initialized
+   *
+   * @returns {boolean}
+   */
+  start() {
+      if (!this.mediaRecorder) {
+          console.warn(`ScreenCastRecorder.start: mediaRecorder is null`);
+          return false;
+      }
+      const logRecorderError = (e) => {
+          console.warn(`mediaRecorder.start threw an error: ${e}`);
+      };
+      this.mediaRecorder.onerror = (e) => {
+          logRecorderError(e);
+          this.onErrorOrStopCallback();
+      };
+      this.mediaRecorder.onstop = () => this.onErrorOrStopCallback();
+      try {
+          this.mediaRecorder.start();
+      }
+      catch (e) {
+          logRecorderError(e);
+          return false;
+      }
+      return true;
+  }
+  /**
+   * This method will stop recording and then return the generated Blob
+   *
+   * @returns {(Promise|undefined)}
+   *  A Promise which will return the generated Blob
+   *  Undefined if the MediaRecorder could not initialize
+   */
+  stop() {
+      if (!this.mediaRecorder) {
+          return undefined;
+      }
+      let resolver;
+      const promise = new Promise(r => {
+          resolver = r;
+      });
+      this.mediaRecorder.onstop = () => resolver();
+      this.mediaRecorder.stop();
+      if (this.inputStream) {
+          this.inputStream.getTracks().forEach(s => s.stop());
+          this.inputStream = null;
+      }
+      return promise.then(() => this.buildOutputBlob());
+  }
+  buildOutputBlob() {
+      return new Blob(this.recordedChunks, { type: BLOB_TYPE });
+  }
+}

tabs/report/report.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import sys
+import base64
+import pathlib
+import tempfile
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+recorder_js_path = os.path.join(now_dir, "tabs", "report", "recorder.js")
+main_js_path = os.path.join(now_dir, "tabs", "report", "main.js")
+record_button_js_path = os.path.join(now_dir, "tabs", "report", "record_button.js")
+recorder_js = pathlib.Path(recorder_js_path).read_text()
+main_js = pathlib.Path(main_js_path).read_text()
+record_button_js = (
+    pathlib.Path(record_button_js_path)
+    .read_text()
+    .replace("let recorder_js = null;", recorder_js)
+    .replace("let main_js = null;", main_js)
+)
+def save_base64_video(base64_string):
+    base64_video = base64_string
+    video_data = base64.b64decode(base64_video)
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
+        temp_filename = temp_file.name
+        temp_file.write(video_data)
+    print(f"Temporary MP4 file saved as: {temp_filename}")
+    return temp_filename
+def report_tab():
+    instructions = [
+        i18n("# How to Report an Issue on GitHub"),
+        i18n(
+            "1. Click on the 'Record Screen' button below to start recording the issue you are experiencing."
+        ),
+        i18n(
+            "2. Once you have finished recording the issue, click on the 'Stop Recording' button (the same button, but the label changes depending on whether you are actively recording or not)."
+        ),
+        i18n(
+            "3. Go to [GitHub Issues](https://github.com/IAHispano/Applio/issues) and click on the 'New Issue' button."
+        ),
+        i18n(
+            "4. Complete the provided issue template, ensuring to include details as needed, and utilize the assets section to upload the recorded file from the previous step."
+        ),
+    ]
+    components = [gr.Markdown(value=instruction) for instruction in instructions]
+    start_button = gr.Button(i18n("Record Screen"))
+    video_component = gr.Video(interactive=False)
+    def toggle_button_label(returned_string):
+        if returned_string.startswith("Record"):
+            return gr.Button(value="Stop Recording"), None
+        else:
+            try:
+                temp_filename = save_base64_video(returned_string)
+            except Exception as error:
+                print(f"An error occurred converting video to mp4: {error}")
+                return gr.Button(value="Record Screen"), gr.Warning(
+                    f"Failed to convert video to mp4:\n{error}"
+                )
+            return gr.Button(value="Record Screen"), gr.Video(
+                value=temp_filename, interactive=False
+            )
+    start_button.click(
+        fn=toggle_button_label,
+        inputs=[start_button],
+        outputs=[start_button, video_component],
+        js=record_button_js,
+    )

tabs/settings/sections/filter.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import sys
+import json
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+config_file = os.path.join(now_dir, "assets", "config.json")
+filter_trigger = None
+def get_filter_trigger():
+    global filter_trigger
+    if filter_trigger is None:
+        filter_trigger = gr.Textbox(visible=False)
+    return filter_trigger
+def load_config_filter():
+    with open(config_file, "r", encoding="utf8") as f:
+        cfg = json.load(f)
+    return bool(cfg.get("model_index_filter", False))
+def save_config_filter(val: bool):
+    with open(config_file, "r", encoding="utf8") as f:
+        cfg = json.load(f)
+    cfg["model_index_filter"] = bool(val)
+    with open(config_file, "w", encoding="utf8") as f:
+        json.dump(cfg, f, indent=2)
+def filter_tab():
+    checkbox = gr.Checkbox(
+        label=i18n("Enable model/index list filter"),
+        info=i18n(
+            "Adds a keyword filter for the model/index selection lists in the Inference and TTS tabs."
+        ),
+        value=load_config_filter(),
+        interactive=True,
+    )
+    checkbox.change(fn=save_config_filter, inputs=[checkbox], outputs=[])
+    return checkbox

tabs/settings/sections/lang.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os, sys
+import json
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+config_file = os.path.join(now_dir, "assets", "config.json")
+def get_language_settings():
+    with open(config_file, "r", encoding="utf8") as file:
+        config = json.load(file)
+    if config["lang"]["override"] == False:
+        return "Language automatically detected in the system"
+    else:
+        return config["lang"]["selected_lang"]
+def save_lang_settings(selected_language):
+    with open(config_file, "r", encoding="utf8") as file:
+        config = json.load(file)
+    if selected_language == "Language automatically detected in the system":
+        config["lang"]["override"] = False
+    else:
+        config["lang"]["override"] = True
+        config["lang"]["selected_lang"] = selected_language
+    gr.Info("Language have been saved. Restart Applio to apply the changes.")
+    with open(config_file, "w", encoding="utf8") as file:
+        json.dump(config, file, indent=2)
+def lang_tab():
+    with gr.Column():
+        selected_language = gr.Dropdown(
+            label=i18n("Language"),
+            info=i18n(
+                "Select the language you want to use. (Requires restarting Applio)"
+            ),
+            value=get_language_settings(),
+            choices=["Language automatically detected in the system"]
+            + i18n._get_available_languages(),
+            interactive=True,
+        )
+        selected_language.change(
+            fn=save_lang_settings,
+            inputs=[selected_language],
+            outputs=[],
+        )

tabs/settings/sections/model_author.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import sys
+import json
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def set_model_author(model_author: str):
+    with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+        config = json.load(f)
+    config["model_author"] = model_author
+    with open(os.path.join(now_dir, "assets", "config.json"), "w") as f:
+        json.dump(config, f, indent=4)
+    print(f"Model author set to {model_author}.")
+    return f"Model author set to {model_author}."
+def get_model_author():
+    with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+        config = json.load(f)
+    return config["model_author"] if "model_author" in config else None
+def model_author_tab():
+    model_author_name = gr.Textbox(
+        label=i18n("Model Author Name"),
+        info=i18n("The name that will appear in the model information."),
+        value=get_model_author(),
+        placeholder=i18n("Enter your nickname"),
+        interactive=True,
+    )
+    model_author_output_info = gr.Textbox(
+        label=i18n("Output Information"),
+        info=i18n("The output information will be displayed here."),
+        value="",
+        max_lines=1,
+    )
+    button = gr.Button(i18n("Set name"))
+    button.click(
+        fn=set_model_author,
+        inputs=[model_author_name],
+        outputs=[model_author_output_info],
+    )

tabs/settings/sections/precision.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import sys
+import json
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def set_precision(precision: str):
+    with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+        config = json.load(f)
+    config["precision"] = precision
+    with open(os.path.join(now_dir, "assets", "config.json"), "w") as f:
+        json.dump(config, f, indent=4)
+    print(f"Precision set to {precision}.")
+    return f"Precision set to {precision}."
+def get_precision():
+    with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
+        config = json.load(f)
+    return config["precision"] if "precision" in config else None
+def precision_tab():
+    precision = gr.Radio(
+        label=i18n("Precision"),
+        info=i18n("Select the precision you want to use for training and inference."),
+        value=get_precision(),
+        choices=["fp32", "fp16", "bf16"],
+        interactive=True,
+    )
+    precision_info = gr.Textbox(
+        label=i18n("Output Information"),
+        info=i18n("The output information will be displayed here."),
+        value="",
+        max_lines=1,
+    )
+    button = gr.Button(i18n("Update precision"))
+    button.click(
+        fn=set_precision,
+        inputs=[precision],
+        outputs=[precision_info],
+    )

tabs/settings/sections/presence.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import sys
+import gradio as gr
+import json
+from assets.i18n.i18n import I18nAuto
+from assets.discord_presence import RPCManager
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+config_file = os.path.join(now_dir, "assets", "config.json")
+def load_config_presence():
+    with open(config_file, "r", encoding="utf8") as file:
+        config = json.load(file)
+        return config["discord_presence"]
+def save_config(value):
+    with open(config_file, "r", encoding="utf8") as file:
+        config = json.load(file)
+        config["discord_presence"] = value
+    with open(config_file, "w", encoding="utf8") as file:
+        json.dump(config, file, indent=2)
+def presence_tab():
+    with gr.Row():
+        with gr.Column():
+            presence = gr.Checkbox(
+                label=i18n("Enable Applio integration with Discord presence"),
+                info=i18n(
+                    "It will activate the possibility of displaying the current Applio activity in Discord."
+                ),
+                interactive=True,
+                value=load_config_presence(),
+            )
+            presence.change(
+                fn=toggle,
+                inputs=[presence],
+                outputs=[],
+            )
+def toggle(checkbox):
+    save_config(bool(checkbox))
+    if load_config_presence() == True:
+        try:
+            RPCManager.start_presence()
+        except KeyboardInterrupt:
+            RPCManager.stop_presence()
+    else:
+        RPCManager.stop_presence()

tabs/settings/sections/restart.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import gradio as gr
+import os
+import sys
+import json
+now_dir = os.getcwd()
+def stop_train(model_name: str):
+    pid_file_path = os.path.join(now_dir, "logs", model_name, "config.json")
+    try:
+        with open(pid_file_path, "r") as pid_file:
+            pid_data = json.load(pid_file)
+            pids = pid_data.get("process_pids", [])
+        with open(pid_file_path, "w") as pid_file:
+            pid_data.pop("process_pids", None)
+            json.dump(pid_data, pid_file, indent=4)
+        for pid in pids:
+            os.kill(pid, 9)
+    except:
+        pass
+def stop_infer():
+    pid_file_path = os.path.join(now_dir, "assets", "infer_pid.txt")
+    try:
+        with open(pid_file_path, "r") as pid_file:
+            pids = [int(pid) for pid in pid_file.readlines()]
+        for pid in pids:
+            os.kill(pid, 9)
+        os.remove(pid_file_path)
+    except:
+        pass
+def restart_applio():
+    if os.name != "nt":
+        os.system("clear")
+    else:
+        os.system("cls")
+    python = sys.executable
+    os.execl(python, python, *sys.argv)
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def restart_tab():
+    with gr.Row():
+        with gr.Column():
+            restart_button = gr.Button(i18n("Restart Applio"))
+            restart_button.click(
+                fn=restart_applio,
+                inputs=[],
+                outputs=[],
+            )

tabs/settings/sections/themes.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+import sys
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+import assets.themes.loadThemes as loadThemes
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+i18n = I18nAuto()
+def theme_tab():
+    with gr.Row():
+        with gr.Column():
+            themes_select = gr.Dropdown(
+                loadThemes.get_theme_list(),
+                value=loadThemes.load_theme(),
+                label=i18n("Theme"),
+                info=i18n(
+                    "Select the theme you want to use. (Requires restarting Applio)"
+                ),
+                visible=True,
+            )
+            themes_select.change(
+                fn=loadThemes.select_theme,
+                inputs=themes_select,
+                outputs=[],
+            )

tabs/settings/sections/version.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import gradio as gr
+from assets.version_checker import compare_version
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+def version_tab():
+    with gr.Row():
+        with gr.Column():
+            version_check = gr.Textbox(
+                label=i18n("Version Checker"),
+                info=i18n(
+                    "Check which version of Applio is the latest to see if you need to update."
+                ),
+                interactive=False,
+            )
+            version_button = gr.Button(i18n("Check for updates"))
+            version_button.click(
+                fn=compare_version,
+                inputs=[],
+                outputs=[version_check],
+            )

tabs/settings/settings.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from assets.i18n.i18n import I18nAuto
+i18n = I18nAuto()
+from tabs.settings.sections.presence import presence_tab
+from tabs.settings.sections.themes import theme_tab
+from tabs.settings.sections.version import version_tab
+from tabs.settings.sections.lang import lang_tab
+from tabs.settings.sections.restart import restart_tab
+from tabs.settings.sections.model_author import model_author_tab
+from tabs.settings.sections.precision import precision_tab
+from tabs.settings.sections.filter import filter_tab, get_filter_trigger
+def settings_tab(filter_state_trigger=None):
+    if filter_state_trigger is None:
+        filter_state_trigger = get_filter_trigger()
+    with gr.TabItem(label=i18n("General")):
+        filter_component = filter_tab()
+        filter_component.change(
+            fn=lambda checked: gr.update(value=str(checked)),
+            inputs=[filter_component],
+            outputs=[filter_state_trigger],
+            show_progress=False,
+        )
+        presence_tab()
+        theme_tab()
+        version_tab()
+        lang_tab()
+        restart_tab()
+    with gr.TabItem(label=i18n("Training")):
+        model_author_tab()
+        precision_tab()

tabs/train/train.py ADDED Viewed

	@@ -0,0 +1,1033 @@

+import os
+import shutil
+import sys
+from multiprocessing import cpu_count
+import gradio as gr
+from assets.i18n.i18n import I18nAuto
+from core import (
+    run_extract_script,
+    run_index_script,
+    run_preprocess_script,
+    run_prerequisites_script,
+    run_train_script,
+)
+from rvc.configs.config import get_gpu_info, get_number_of_gpus, max_vram_gpu
+from rvc.lib.utils import format_title
+from tabs.settings.sections.restart import stop_train
+i18n = I18nAuto()
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+sup_audioext = {
+    "wav",
+    "mp3",
+    "flac",
+    "ogg",
+    "opus",
+    "m4a",
+    "mp4",
+    "aac",
+    "alac",
+    "wma",
+    "aiff",
+    "webm",
+    "ac3",
+}
+# Custom Pretraineds
+pretraineds_custom_path = os.path.join(
+    now_dir, "rvc", "models", "pretraineds", "custom"
+)
+pretraineds_custom_path_relative = os.path.relpath(pretraineds_custom_path, now_dir)
+custom_embedder_root = os.path.join(
+    now_dir, "rvc", "models", "embedders", "embedders_custom"
+)
+custom_embedder_root_relative = os.path.relpath(custom_embedder_root, now_dir)
+os.makedirs(custom_embedder_root, exist_ok=True)
+os.makedirs(pretraineds_custom_path_relative, exist_ok=True)
+def get_pretrained_list(suffix):
+    return [
+        os.path.join(dirpath, filename)
+        for dirpath, _, filenames in os.walk(pretraineds_custom_path_relative)
+        for filename in filenames
+        if filename.endswith(".pth") and suffix in filename
+    ]
+pretraineds_list_d = get_pretrained_list("D")
+pretraineds_list_g = get_pretrained_list("G")
+def refresh_custom_pretraineds():
+    return (
+        {"choices": sorted(get_pretrained_list("G")), "__type__": "update"},
+        {"choices": sorted(get_pretrained_list("D")), "__type__": "update"},
+    )
+# Dataset Creator
+datasets_path = os.path.join(now_dir, "assets", "datasets")
+if not os.path.exists(datasets_path):
+    os.makedirs(datasets_path)
+datasets_path_relative = os.path.relpath(datasets_path, now_dir)
+def get_datasets_list():
+    return [
+        dirpath
+        for dirpath, _, filenames in os.walk(datasets_path_relative)
+        if any(filename.endswith(tuple(sup_audioext)) for filename in filenames)
+    ]
+def refresh_datasets():
+    return {"choices": sorted(get_datasets_list()), "__type__": "update"}
+# Model Names
+models_path = os.path.join(now_dir, "logs")
+def get_models_list():
+    return [
+        os.path.basename(dirpath)
+        for dirpath in os.listdir(models_path)
+        if os.path.isdir(os.path.join(models_path, dirpath))
+        and all(excluded not in dirpath for excluded in ["zips", "mute", "reference"])
+    ]
+def refresh_models():
+    return {"choices": sorted(get_models_list()), "__type__": "update"}
+# Refresh Models and Datasets
+def refresh_models_and_datasets():
+    return (
+        {"choices": sorted(get_models_list()), "__type__": "update"},
+        {"choices": sorted(get_datasets_list()), "__type__": "update"},
+    )
+# Refresh Custom Embedders
+def get_embedder_custom_list():
+    return [
+        os.path.join(dirpath, dirname)
+        for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+        for dirname in dirnames
+    ]
+def refresh_custom_embedder_list():
+    return {"choices": sorted(get_embedder_custom_list()), "__type__": "update"}
+# Drop Model
+def save_drop_model(dropbox):
+    if ".pth" not in dropbox:
+        gr.Info(
+            i18n(
+                "The file you dropped is not a valid pretrained file. Please try again."
+            )
+        )
+    else:
+        file_name = os.path.basename(dropbox)
+        pretrained_path = os.path.join(pretraineds_custom_path_relative, file_name)
+        if os.path.exists(pretrained_path):
+            os.remove(pretrained_path)
+        shutil.copy(dropbox, pretrained_path)
+        gr.Info(
+            i18n(
+                "Click the refresh button to see the pretrained file in the dropdown menu."
+            )
+        )
+    return None
+# Drop Dataset
+def save_drop_dataset_audio(dropbox, dataset_name):
+    if not dataset_name:
+        gr.Info("Please enter a valid dataset name. Please try again.")
+        return None, None
+    else:
+        file_extension = os.path.splitext(dropbox)[1][1:].lower()
+        if file_extension not in sup_audioext:
+            gr.Info("The file you dropped is not a valid audio file. Please try again.")
+        else:
+            dataset_name = format_title(dataset_name)
+            audio_file = format_title(os.path.basename(dropbox))
+            dataset_path = os.path.join(now_dir, "assets", "datasets", dataset_name)
+            if not os.path.exists(dataset_path):
+                os.makedirs(dataset_path)
+            destination_path = os.path.join(dataset_path, audio_file)
+            if os.path.exists(destination_path):
+                os.remove(destination_path)
+            shutil.copy(dropbox, destination_path)
+            gr.Info(
+                i18n(
+                    "The audio file has been successfully added to the dataset. Please click the preprocess button."
+                )
+            )
+            dataset_path = os.path.dirname(destination_path)
+            relative_dataset_path = os.path.relpath(dataset_path, now_dir)
+            return None, relative_dataset_path
+# Drop Custom Embedder
+def create_folder_and_move_files(folder_name, bin_file, config_file):
+    if not folder_name:
+        return "Folder name must not be empty."
+    folder_name = os.path.basename(folder_name)
+    target_folder = os.path.join(custom_embedder_root, folder_name)
+    normalized_target_folder = os.path.abspath(target_folder)
+    normalized_custom_embedder_root = os.path.abspath(custom_embedder_root)
+    if not normalized_target_folder.startswith(normalized_custom_embedder_root):
+        return "Invalid folder name. Folder must be within the custom embedder root directory."
+    os.makedirs(target_folder, exist_ok=True)
+    if bin_file:
+        shutil.copy(bin_file, os.path.join(target_folder, os.path.basename(bin_file)))
+    if config_file:
+        shutil.copy(
+            config_file, os.path.join(target_folder, os.path.basename(config_file))
+        )
+    return f"Files moved to folder {target_folder}"
+def refresh_embedders_folders():
+    custom_embedders = [
+        os.path.join(dirpath, dirname)
+        for dirpath, dirnames, _ in os.walk(custom_embedder_root_relative)
+        for dirname in dirnames
+    ]
+    return custom_embedders
+# Export
+def get_pth_list():
+    return [
+        os.path.relpath(os.path.join(dirpath, filename), now_dir)
+        for dirpath, _, filenames in os.walk(models_path)
+        for filename in filenames
+        if filename.endswith(".pth")
+    ]
+def get_index_list():
+    return [
+        os.path.relpath(os.path.join(dirpath, filename), now_dir)
+        for dirpath, _, filenames in os.walk(models_path)
+        for filename in filenames
+        if filename.endswith(".index") and "trained" not in filename
+    ]
+def refresh_pth_and_index_list():
+    return (
+        {"choices": sorted(get_pth_list()), "__type__": "update"},
+        {"choices": sorted(get_index_list()), "__type__": "update"},
+    )
+# Export Pth and Index Files
+def export_pth(pth_path):
+    allowed_paths = get_pth_list()
+    normalized_allowed_paths = [
+        os.path.abspath(os.path.join(now_dir, p)) for p in allowed_paths
+    ]
+    normalized_pth_path = os.path.abspath(os.path.join(now_dir, pth_path))
+    if normalized_pth_path in normalized_allowed_paths:
+        return pth_path
+    else:
+        print(f"Attempted to export invalid pth path: {pth_path}")
+        return None
+def export_index(index_path):
+    allowed_paths = get_index_list()
+    normalized_allowed_paths = [
+        os.path.abspath(os.path.join(now_dir, p)) for p in allowed_paths
+    ]
+    normalized_index_path = os.path.abspath(os.path.join(now_dir, index_path))
+    if normalized_index_path in normalized_allowed_paths:
+        return index_path
+    else:
+        print(f"Attempted to export invalid index path: {index_path}")
+        return None
+# Upload to Google Drive
+def upload_to_google_drive(pth_path, index_path):
+    def upload_file(file_path):
+        if file_path:
+            try:
+                gr.Info(f"Uploading {pth_path} to Google Drive...")
+                google_drive_folder = "/content/drive/MyDrive/ApplioExported"
+                if not os.path.exists(google_drive_folder):
+                    os.makedirs(google_drive_folder)
+                google_drive_file_path = os.path.join(
+                    google_drive_folder, os.path.basename(file_path)
+                )
+                if os.path.exists(google_drive_file_path):
+                    os.remove(google_drive_file_path)
+                shutil.copy2(file_path, google_drive_file_path)
+                gr.Info("File uploaded successfully.")
+            except Exception as error:
+                print(f"An error occurred uploading to Google Drive: {error}")
+                gr.Info("Error uploading to Google Drive")
+    upload_file(pth_path)
+    upload_file(index_path)
+def auto_enable_checkpointing():
+    try:
+        return max_vram_gpu(0) < 6
+    except:
+        return False
+# Train Tab
+def train_tab():
+    # Model settings section
+    with gr.Accordion(i18n("Model Settings")):
+        with gr.Row():
+            with gr.Column():
+                model_name = gr.Dropdown(
+                    label=i18n("Model Name"),
+                    info=i18n("Name of the new model."),
+                    choices=get_models_list(),
+                    value="my-project",
+                    interactive=True,
+                    allow_custom_value=True,
+                )
+                architecture = gr.Radio(
+                    label=i18n("Architecture"),
+                    info=i18n(
+                        "Choose the model architecture:\n- **RVC (V2)**: Default option, compatible with all clients.\n- **Applio**: Advanced quality with improved vocoders and higher sample rates, Applio-only."
+                    ),
+                    choices=["RVC", "Applio"],
+                    value="RVC",
+                    interactive=True,
+                    visible=False,  # to be visible once pretraineds are ready
+                )
+            with gr.Column():
+                sampling_rate = gr.Radio(
+                    label=i18n("Sampling Rate"),
+                    info=i18n("The sampling rate of the audio files."),
+                    choices=["32000", "40000", "48000"],
+                    value="40000",
+                    interactive=True,
+                )
+                vocoder = gr.Radio(
+                    label=i18n("Vocoder"),
+                    info=i18n(
+                        "Choose the vocoder for audio synthesis:\n- **HiFi-GAN**: Default option, compatible with all clients.\n- **MRF HiFi-GAN**: Higher fidelity, Applio-only.\n- **RefineGAN**: Superior audio quality, Applio-only."
+                    ),
+                    choices=["HiFi-GAN", "MRF HiFi-GAN", "RefineGAN"],
+                    value="HiFi-GAN",
+                    interactive=False,
+                    visible=False,  # to be visible once pretraineds are ready
+                )
+        with gr.Accordion(
+            i18n("Advanced Settings"),
+            open=False,
+        ):
+            with gr.Row():
+                with gr.Column():
+                    cpu_cores = gr.Slider(
+                        1,
+                        min(cpu_count(), 32),  # max 32 parallel processes
+                        min(cpu_count(), 32),
+                        step=1,
+                        label=i18n("CPU Cores"),
+                        info=i18n(
+                            "The number of CPU cores to use in the extraction process. The default setting are your cpu cores, which is recommended for most cases."
+                        ),
+                        interactive=True,
+                    )
+                with gr.Column():
+                    gpu = gr.Textbox(
+                        label=i18n("GPU Number"),
+                        info=i18n(
+                            "Specify the number of GPUs you wish to utilize for extracting by entering them separated by hyphens (-)."
+                        ),
+                        placeholder=i18n("0 to ∞ separated by -"),
+                        value=str(get_number_of_gpus()),
+                        interactive=True,
+                    )
+                    gr.Textbox(
+                        label=i18n("GPU Information"),
+                        info=i18n("The GPU information will be displayed here."),
+                        value=get_gpu_info(),
+                        interactive=False,
+                    )
+    # Preprocess section
+    with gr.Accordion(i18n("Preprocess")):
+        dataset_path = gr.Dropdown(
+            label=i18n("Dataset Path"),
+            info=i18n("Path to the dataset folder."),
+            # placeholder=i18n("Enter dataset path"),
+            choices=get_datasets_list(),
+            allow_custom_value=True,
+            interactive=True,
+        )
+        dataset_creator = gr.Checkbox(
+            label=i18n("Dataset Creator"),
+            value=False,
+            interactive=True,
+            visible=True,
+        )
+        with gr.Column(visible=False) as dataset_creator_settings:
+            with gr.Accordion(i18n("Dataset Creator")):
+                dataset_name = gr.Textbox(
+                    label=i18n("Dataset Name"),
+                    info=i18n("Name of the new dataset."),
+                    placeholder=i18n("Enter dataset name"),
+                    interactive=True,
+                )
+                upload_audio_dataset = gr.File(
+                    label=i18n("Upload Audio Dataset"),
+                    type="filepath",
+                    interactive=True,
+                )
+        refresh = gr.Button(i18n("Refresh"))
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            cut_preprocess = gr.Radio(
+                label=i18n("Audio cutting"),
+                info=i18n(
+                    "Audio file slicing method: Select 'Skip' if the files are already pre-sliced, 'Simple' if excessive silence has already been removed from the files, or 'Automatic' for automatic silence detection and slicing around it."
+                ),
+                choices=["Skip", "Simple", "Automatic"],
+                value="Automatic",
+                interactive=True,
+            )
+            with gr.Row():
+                chunk_len = gr.Slider(
+                    0.5,
+                    5.0,
+                    3.0,
+                    step=0.1,
+                    label=i18n("Chunk length (sec)"),
+                    info=i18n("Length of the audio slice for 'Simple' method."),
+                    interactive=True,
+                )
+                overlap_len = gr.Slider(
+                    0.0,
+                    0.4,
+                    0.3,
+                    step=0.1,
+                    label=i18n("Overlap length (sec)"),
+                    info=i18n(
+                        "Length of the overlap between slices for 'Simple' method."
+                    ),
+                    interactive=True,
+                )
+            with gr.Row():
+                process_effects = gr.Checkbox(
+                    label=i18n("Noise filter"),
+                    info=i18n(
+                        "It's recommended to deactivate this option if your dataset has already been processed."
+                    ),
+                    value=True,
+                    interactive=True,
+                    visible=True,
+                )
+                normalization_mode = gr.Radio(
+                    label=i18n("Normalization mode"),
+                    info=i18n(
+                        "Audio normalization: Select 'none' if the files are already normalized, 'pre' to normalize the entire input file at once, or 'post' to normalize each slice individually."
+                    ),
+                    choices=["none", "pre", "post"],
+                    value="none",
+                    interactive=True,
+                    visible=True,
+                )
+                noise_reduction = gr.Checkbox(
+                    label=i18n("Noise Reduction"),
+                    info=i18n(
+                        "It's recommended keep deactivate this option if your dataset has already been processed."
+                    ),
+                    value=False,
+                    interactive=True,
+                    visible=True,
+                )
+            clean_strength = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("Noise Reduction Strength"),
+                info=i18n(
+                    "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+                ),
+                visible=False,
+                value=0.5,
+                interactive=True,
+            )
+        preprocess_output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=8,
+            interactive=False,
+        )
+        with gr.Row():
+            preprocess_button = gr.Button(i18n("Preprocess Dataset"))
+            preprocess_button.click(
+                fn=run_preprocess_script,
+                inputs=[
+                    model_name,
+                    dataset_path,
+                    sampling_rate,
+                    cpu_cores,
+                    cut_preprocess,
+                    process_effects,
+                    noise_reduction,
+                    clean_strength,
+                    chunk_len,
+                    overlap_len,
+                    normalization_mode,
+                ],
+                outputs=[preprocess_output_info],
+            )
+    # Extract section
+    with gr.Accordion(i18n("Extract")):
+        with gr.Row():
+            f0_method = gr.Radio(
+                label=i18n("Pitch extraction algorithm"),
+                info=i18n(
+                    "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+                ),
+                choices=["crepe", "crepe-tiny", "rmvpe", "fcpe"],
+                value="rmvpe",
+                interactive=True,
+            )
+            embedder_model = gr.Radio(
+                label=i18n("Embedder Model"),
+                info=i18n("Model used for learning speaker embedding."),
+                choices=[
+                    "contentvec",
+                    "spin",
+                    "spin-v2",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom",
+                ],
+                value="contentvec",
+                interactive=True,
+            )
+        include_mutes = gr.Slider(
+            0,
+            10,
+            2,
+            step=1,
+            label=i18n("Silent training files"),
+            info=i18n(
+                "Adding several silent files to the training set enables the model to handle pure silence in inferred audio files. Select 0 if your dataset is clean and already contains segments of pure silence."
+            ),
+            value=True,
+            interactive=True,
+        )
+        with gr.Row(visible=False) as embedder_custom:
+            with gr.Accordion(i18n("Custom Embedder"), open=True):
+                with gr.Row():
+                    embedder_model_custom = gr.Dropdown(
+                        label=i18n("Select Custom Embedder"),
+                        choices=refresh_embedders_folders(),
+                        interactive=True,
+                        allow_custom_value=True,
+                    )
+                    refresh_embedders_button = gr.Button(i18n("Refresh embedders"))
+                folder_name_input = gr.Textbox(
+                    label=i18n("Folder Name"), interactive=True
+                )
+                with gr.Row():
+                    bin_file_upload = gr.File(
+                        label=i18n("Upload .bin"), type="filepath", interactive=True
+                    )
+                    config_file_upload = gr.File(
+                        label=i18n("Upload .json"), type="filepath", interactive=True
+                    )
+                move_files_button = gr.Button(
+                    i18n("Move files to custom embedder folder")
+                )
+        extract_output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=8,
+            interactive=False,
+        )
+        extract_button = gr.Button(i18n("Extract Features"))
+        extract_button.click(
+            fn=run_extract_script,
+            inputs=[
+                model_name,
+                f0_method,
+                cpu_cores,
+                gpu,
+                sampling_rate,
+                embedder_model,
+                embedder_model_custom,
+                include_mutes,
+            ],
+            outputs=[extract_output_info],
+        )
+    # Training section
+    with gr.Accordion(i18n("Training")):
+        with gr.Row():
+            batch_size = gr.Slider(
+                1,
+                64,
+                4,
+                step=1,
+                label=i18n("Batch Size"),
+                info=i18n(
+                    "It's advisable to align it with the available VRAM of your GPU. A setting of 4 offers improved accuracy but slower processing, while 8 provides faster and standard results."
+                ),
+                interactive=True,
+            )
+            save_every_epoch = gr.Slider(
+                1,
+                100,
+                10,
+                step=1,
+                label=i18n("Save Every Epoch"),
+                info=i18n("Determine at how many epochs the model will saved at."),
+                interactive=True,
+            )
+            total_epoch = gr.Slider(
+                1,
+                10000,
+                500,
+                step=1,
+                label=i18n("Total Epoch"),
+                info=i18n(
+                    "Specifies the overall quantity of epochs for the model training process."
+                ),
+                interactive=True,
+            )
+        with gr.Accordion(i18n("Advanced Settings"), open=False):
+            with gr.Row():
+                with gr.Column():
+                    save_only_latest = gr.Checkbox(
+                        label=i18n("Save Only Latest"),
+                        info=i18n(
+                            "Enabling this setting will result in the G and D files saving only their most recent versions, effectively conserving storage space."
+                        ),
+                        value=True,
+                        interactive=True,
+                    )
+                    save_every_weights = gr.Checkbox(
+                        label=i18n("Save Every Weights"),
+                        info=i18n(
+                            "This setting enables you to save the weights of the model at the conclusion of each epoch."
+                        ),
+                        value=True,
+                        interactive=True,
+                    )
+                    pretrained = gr.Checkbox(
+                        label=i18n("Pretrained"),
+                        info=i18n(
+                            "Utilize pretrained models when training your own. This approach reduces training duration and enhances overall quality."
+                        ),
+                        value=True,
+                        interactive=True,
+                    )
+                with gr.Column():
+                    cleanup = gr.Checkbox(
+                        label=i18n("Fresh Training"),
+                        info=i18n(
+                            "Enable this setting only if you are training a new model from scratch or restarting the training. Deletes all previously generated weights and tensorboard logs."
+                        ),
+                        value=False,
+                        interactive=True,
+                    )
+                    cache_dataset_in_gpu = gr.Checkbox(
+                        label=i18n("Cache Dataset in GPU"),
+                        info=i18n(
+                            "Cache the dataset in GPU memory to speed up the training process."
+                        ),
+                        value=False,
+                        interactive=True,
+                    )
+                    checkpointing = gr.Checkbox(
+                        label=i18n("Checkpointing"),
+                        info=i18n(
+                            "Enables memory-efficient training. This reduces VRAM usage at the cost of slower training speed. It is useful for GPUs with limited memory (e.g., <6GB VRAM) or when training with a batch size larger than what your GPU can normally accommodate."
+                        ),
+                        value=auto_enable_checkpointing(),
+                        interactive=True,
+                    )
+            with gr.Row():
+                custom_pretrained = gr.Checkbox(
+                    label=i18n("Custom Pretrained"),
+                    info=i18n(
+                        "Utilizing custom pretrained models can lead to superior results, as selecting the most suitable pretrained models tailored to the specific use case can significantly enhance performance."
+                    ),
+                    value=False,
+                    interactive=True,
+                )
+                overtraining_detector = gr.Checkbox(
+                    label=i18n("Overtraining Detector"),
+                    info=i18n(
+                        "Detect overtraining to prevent the model from learning the training data too well and losing the ability to generalize to new data."
+                    ),
+                    value=False,
+                    interactive=True,
+                )
+            with gr.Row():
+                with gr.Column(visible=False) as pretrained_custom_settings:
+                    with gr.Accordion(i18n("Pretrained Custom Settings")):
+                        upload_pretrained = gr.File(
+                            label=i18n("Upload Pretrained Model"),
+                            type="filepath",
+                            interactive=True,
+                        )
+                        refresh_custom_pretaineds_button = gr.Button(
+                            i18n("Refresh Custom Pretraineds")
+                        )
+                        g_pretrained_path = gr.Dropdown(
+                            label=i18n("Custom Pretrained G"),
+                            info=i18n(
+                                "Select the custom pretrained model for the generator."
+                            ),
+                            choices=sorted(pretraineds_list_g),
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                        d_pretrained_path = gr.Dropdown(
+                            label=i18n("Custom Pretrained D"),
+                            info=i18n(
+                                "Select the custom pretrained model for the discriminator."
+                            ),
+                            choices=sorted(pretraineds_list_d),
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                with gr.Column(visible=False) as overtraining_settings:
+                    with gr.Accordion(i18n("Overtraining Detector Settings")):
+                        overtraining_threshold = gr.Slider(
+                            1,
+                            100,
+                            50,
+                            step=1,
+                            label=i18n("Overtraining Threshold"),
+                            info=i18n(
+                                "Set the maximum number of epochs you want your model to stop training if no improvement is detected."
+                            ),
+                            interactive=True,
+                        )
+            index_algorithm = gr.Radio(
+                label=i18n("Index Algorithm"),
+                info=i18n(
+                    "KMeans is a clustering algorithm that divides the dataset into K clusters. This setting is particularly useful for large datasets."
+                ),
+                choices=["Auto", "Faiss", "KMeans"],
+                value="Auto",
+                interactive=True,
+            )
+        def enforce_terms(terms_accepted, *args):
+            if not terms_accepted:
+                message = "You must agree to the Terms of Use to proceed."
+                gr.Info(message)
+                return message
+            return run_train_script(*args)
+        terms_checkbox = gr.Checkbox(
+            label=i18n("I agree to the terms of use"),
+            info=i18n(
+                "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your training."
+            ),
+            value=False,
+            interactive=True,
+        )
+        train_output_info = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+            value="",
+            max_lines=8,
+            interactive=False,
+        )
+        with gr.Row():
+            train_button = gr.Button(i18n("Start Training"))
+            train_button.click(
+                fn=enforce_terms,
+                inputs=[
+                    terms_checkbox,
+                    model_name,
+                    save_every_epoch,
+                    save_only_latest,
+                    save_every_weights,
+                    total_epoch,
+                    sampling_rate,
+                    batch_size,
+                    gpu,
+                    overtraining_detector,
+                    overtraining_threshold,
+                    pretrained,
+                    cleanup,
+                    index_algorithm,
+                    cache_dataset_in_gpu,
+                    custom_pretrained,
+                    g_pretrained_path,
+                    d_pretrained_path,
+                    vocoder,
+                    checkpointing,
+                ],
+                outputs=[train_output_info],
+            )
+            stop_train_button = gr.Button(i18n("Stop Training"), visible=False)
+            stop_train_button.click(
+                fn=stop_train,
+                inputs=[model_name],
+                outputs=[],
+            )
+            index_button = gr.Button(i18n("Generate Index"))
+            index_button.click(
+                fn=run_index_script,
+                inputs=[model_name, index_algorithm],
+                outputs=[train_output_info],
+            )
+    # Export Model section
+    with gr.Accordion(i18n("Export Model"), open=False):
+        if not os.name == "nt":
+            gr.Markdown(
+                i18n(
+                    "The button 'Upload' is only for google colab: Uploads the exported files to the ApplioExported folder in your Google Drive."
+                )
+            )
+        with gr.Row():
+            with gr.Column():
+                pth_file_export = gr.File(
+                    label=i18n("Exported Pth file"),
+                    type="filepath",
+                    value=None,
+                    interactive=False,
+                )
+                pth_dropdown_export = gr.Dropdown(
+                    label=i18n("Pth file"),
+                    info=i18n("Select the pth file to be exported"),
+                    choices=get_pth_list(),
+                    value=None,
+                    interactive=True,
+                    allow_custom_value=True,
+                )
+            with gr.Column():
+                index_file_export = gr.File(
+                    label=i18n("Exported Index File"),
+                    type="filepath",
+                    value=None,
+                    interactive=False,
+                )
+                index_dropdown_export = gr.Dropdown(
+                    label=i18n("Index File"),
+                    info=i18n("Select the index file to be exported"),
+                    choices=get_index_list(),
+                    value=None,
+                    interactive=True,
+                    allow_custom_value=True,
+                )
+        with gr.Row():
+            with gr.Column():
+                refresh_export = gr.Button(i18n("Refresh"))
+                if not os.name == "nt":
+                    upload_exported = gr.Button(i18n("Upload"))
+                    upload_exported.click(
+                        fn=upload_to_google_drive,
+                        inputs=[pth_dropdown_export, index_dropdown_export],
+                        outputs=[],
+                    )
+            def toggle_visible(checkbox):
+                return {"visible": checkbox, "__type__": "update"}
+            def toggle_pretrained(pretrained, custom_pretrained):
+                if custom_pretrained == False:
+                    return {"visible": pretrained, "__type__": "update"}, {
+                        "visible": False,
+                        "__type__": "update",
+                    }
+                else:
+                    return {"visible": pretrained, "__type__": "update"}, {
+                        "visible": pretrained,
+                        "__type__": "update",
+                    }
+            def enable_stop_train_button():
+                return {"visible": False, "__type__": "update"}, {
+                    "visible": True,
+                    "__type__": "update",
+                }
+            def disable_stop_train_button():
+                return {"visible": True, "__type__": "update"}, {
+                    "visible": False,
+                    "__type__": "update",
+                }
+            def download_prerequisites():
+                gr.Info(
+                    "Checking for prerequisites with pitch guidance... Missing files will be downloaded. If you already have them, this step will be skipped."
+                )
+                run_prerequisites_script(
+                    pretraineds_hifigan=True,
+                    models=False,
+                    exe=False,
+                )
+                gr.Info(
+                    "Prerequisites check complete. Missing files were downloaded, and you may now start preprocessing."
+                )
+            def toggle_visible_embedder_custom(embedder_model):
+                if embedder_model == "custom":
+                    return {"visible": True, "__type__": "update"}
+                return {"visible": False, "__type__": "update"}
+            def toggle_architecture(architecture):
+                if architecture == "Applio":
+                    return {
+                        "choices": ["32000", "40000", "48000"],
+                        "__type__": "update",
+                    }, {
+                        "interactive": True,
+                        "__type__": "update",
+                    }
+                else:
+                    return {
+                        "choices": ["32000", "40000", "48000"],
+                        "__type__": "update",
+                        "value": "40000",
+                    }, {"interactive": False, "__type__": "update", "value": "HiFi-GAN"}
+            def update_slider_visibility(noise_reduction):
+                return gr.update(visible=noise_reduction)
+            noise_reduction.change(
+                fn=update_slider_visibility,
+                inputs=noise_reduction,
+                outputs=clean_strength,
+            )
+            architecture.change(
+                fn=toggle_architecture,
+                inputs=[architecture],
+                outputs=[sampling_rate, vocoder],
+            )
+            refresh.click(
+                fn=refresh_models_and_datasets,
+                inputs=[],
+                outputs=[model_name, dataset_path],
+            )
+            dataset_creator.change(
+                fn=toggle_visible,
+                inputs=[dataset_creator],
+                outputs=[dataset_creator_settings],
+            )
+            upload_audio_dataset.upload(
+                fn=save_drop_dataset_audio,
+                inputs=[upload_audio_dataset, dataset_name],
+                outputs=[upload_audio_dataset, dataset_path],
+            )
+            embedder_model.change(
+                fn=toggle_visible_embedder_custom,
+                inputs=[embedder_model],
+                outputs=[embedder_custom],
+            )
+            embedder_model.change(
+                fn=toggle_visible_embedder_custom,
+                inputs=[embedder_model],
+                outputs=[embedder_custom],
+            )
+            move_files_button.click(
+                fn=create_folder_and_move_files,
+                inputs=[folder_name_input, bin_file_upload, config_file_upload],
+                outputs=[],
+            )
+            refresh_embedders_button.click(
+                fn=refresh_embedders_folders, inputs=[], outputs=[embedder_model_custom]
+            )
+            pretrained.change(
+                fn=toggle_pretrained,
+                inputs=[pretrained, custom_pretrained],
+                outputs=[custom_pretrained, pretrained_custom_settings],
+            )
+            custom_pretrained.change(
+                fn=toggle_visible,
+                inputs=[custom_pretrained],
+                outputs=[pretrained_custom_settings],
+            )
+            refresh_custom_pretaineds_button.click(
+                fn=refresh_custom_pretraineds,
+                inputs=[],
+                outputs=[g_pretrained_path, d_pretrained_path],
+            )
+            upload_pretrained.upload(
+                fn=save_drop_model,
+                inputs=[upload_pretrained],
+                outputs=[upload_pretrained],
+            )
+            overtraining_detector.change(
+                fn=toggle_visible,
+                inputs=[overtraining_detector],
+                outputs=[overtraining_settings],
+            )
+            train_button.click(
+                fn=enable_stop_train_button,
+                inputs=[],
+                outputs=[train_button, stop_train_button],
+            )
+            train_output_info.change(
+                fn=disable_stop_train_button,
+                inputs=[],
+                outputs=[train_button, stop_train_button],
+            )
+            pth_dropdown_export.change(
+                fn=export_pth,
+                inputs=[pth_dropdown_export],
+                outputs=[pth_file_export],
+            )
+            index_dropdown_export.change(
+                fn=export_index,
+                inputs=[index_dropdown_export],
+                outputs=[index_file_export],
+            )
+            refresh_export.click(
+                fn=refresh_pth_and_index_list,
+                inputs=[],
+                outputs=[pth_dropdown_export, index_dropdown_export],
+            )

tabs/tts/tts.py ADDED Viewed

	@@ -0,0 +1,462 @@

+import json
+import os
+import random
+import sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from assets.i18n.i18n import I18nAuto
+from core import run_tts_script
+from tabs.settings.sections.filter import get_filter_trigger, load_config_filter
+from tabs.inference.inference import (
+    change_choices,
+    create_folder_and_move_files,
+    get_files,
+    get_speakers_id,
+    match_index,
+    refresh_embedders_folders,
+    extract_model_and_epoch,
+    default_weight,
+    filter_dropdowns,
+    update_filter_visibility,
+)
+i18n = I18nAuto()
+with open(
+    os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r", encoding="utf-8"
+) as file:
+    tts_voices_data = json.load(file)
+short_names = [voice.get("ShortName", "") for voice in tts_voices_data]
+def process_input(file_path):
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            file.read()
+        gr.Info(f"The file has been loaded!")
+        return file_path, file_path
+    except UnicodeDecodeError:
+        gr.Info(f"The file has to be in UTF-8 encoding.")
+        return None, None
+# TTS tab
+def tts_tab():
+    trigger = get_filter_trigger()
+    with gr.Column():
+        with gr.Row():
+            model_file = gr.Dropdown(
+                label=i18n("Voice Model"),
+                info=i18n("Select the voice model to use for the conversion."),
+                choices=sorted(get_files("model"), key=extract_model_and_epoch),
+                interactive=True,
+                value=default_weight,
+                allow_custom_value=True,
+            )
+            filter_box_tts = gr.Textbox(
+                label=i18n("Filter"),
+                info=i18n("Path must contain:"),
+                placeholder=i18n("Type to filter..."),
+                interactive=True,
+                scale=0.1,
+                visible=load_config_filter(),
+                elem_id="filter_box_tts",
+            )
+            index_file = gr.Dropdown(
+                label=i18n("Index File"),
+                info=i18n("Select the index file to use for the conversion."),
+                choices=sorted(get_files("index")),
+                value=match_index(default_weight),
+                interactive=True,
+                allow_custom_value=True,
+            )
+            filter_box_tts.blur(
+                fn=filter_dropdowns,
+                inputs=[filter_box_tts],
+                outputs=[model_file, index_file],
+            )
+            trigger.change(
+                fn=update_filter_visibility,
+                inputs=[trigger],
+                outputs=[filter_box_tts, model_file, index_file],
+                show_progress=False,
+            )
+        with gr.Row():
+            unload_button = gr.Button(i18n("Unload Voice"))
+            refresh_button = gr.Button(i18n("Refresh"))
+            unload_button.click(
+                fn=lambda: (
+                    {"value": "", "__type__": "update"},
+                    {"value": "", "__type__": "update"},
+                ),
+                inputs=[],
+                outputs=[model_file, index_file],
+            )
+            model_file.select(
+                fn=lambda model_file_value: match_index(model_file_value),
+                inputs=[model_file],
+                outputs=[index_file],
+            )
+    gr.Markdown(
+        i18n(
+            f"Applio is a Speech-to-Speech conversion software, utilizing EdgeTTS as middleware for running the Text-to-Speech (TTS) component. Read more about it [here!](https://docs.applio.org/applio/getting-started/tts)"
+        )
+    )
+    tts_voice = gr.Dropdown(
+        label=i18n("TTS Voices"),
+        info=i18n("Select the TTS voice to use for the conversion."),
+        choices=short_names,
+        interactive=True,
+        value=random.choice(short_names),
+    )
+    tts_rate = gr.Slider(
+        minimum=-100,
+        maximum=100,
+        step=1,
+        label=i18n("TTS Speed"),
+        info=i18n("Increase or decrease TTS speed."),
+        value=0,
+        interactive=True,
+    )
+    with gr.Tabs():
+        with gr.Tab(label=i18n("Text to Speech")):
+            tts_text = gr.Textbox(
+                label=i18n("Text to Synthesize"),
+                info=i18n("Enter the text to synthesize."),
+                placeholder=i18n("Enter text to synthesize"),
+                lines=3,
+            )
+        with gr.Tab(label=i18n("File to Speech")):
+            txt_file = gr.File(
+                label=i18n("Upload a .txt file"),
+                type="filepath",
+            )
+            input_tts_path = gr.Textbox(
+                label=i18n("Input path for text file"),
+                placeholder=i18n(
+                    "The path to the text file that contains content for text to speech."
+                ),
+                value="",
+                interactive=True,
+            )
+    with gr.Accordion(i18n("Advanced Settings"), open=False):
+        with gr.Column():
+            output_tts_path = gr.Textbox(
+                label=i18n("Output Path for TTS Audio"),
+                placeholder=i18n("Enter output path"),
+                value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
+                interactive=True,
+            )
+            output_rvc_path = gr.Textbox(
+                label=i18n("Output Path for RVC Audio"),
+                placeholder=i18n("Enter output path"),
+                value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
+                interactive=True,
+            )
+            export_format = gr.Radio(
+                label=i18n("Export Format"),
+                info=i18n("Select the format to export the audio."),
+                choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
+                value="WAV",
+                interactive=True,
+            )
+            sid = gr.Dropdown(
+                label=i18n("Speaker ID"),
+                info=i18n("Select the speaker ID to use for the conversion."),
+                choices=get_speakers_id(model_file.value),
+                value=0,
+                interactive=True,
+            )
+            split_audio = gr.Checkbox(
+                label=i18n("Split Audio"),
+                info=i18n(
+                    "Split the audio into chunks for inference to obtain better results in some cases."
+                ),
+                visible=True,
+                value=False,
+                interactive=True,
+            )
+            autotune = gr.Checkbox(
+                label=i18n("Autotune"),
+                info=i18n(
+                    "Apply a soft autotune to your inferences, recommended for singing conversions."
+                ),
+                visible=True,
+                value=False,
+                interactive=True,
+            )
+            autotune_strength = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("Autotune Strength"),
+                info=i18n(
+                    "Set the autotune strength - the more you increase it the more it will snap to the chromatic grid."
+                ),
+                visible=False,
+                value=1,
+                interactive=True,
+            )
+            proposed_pitch = gr.Checkbox(
+                label=i18n("Proposed Pitch"),
+                info=i18n(
+                    "Adjust the input audio pitch to match the voice model range."
+                ),
+                visible=True,
+                value=False,
+                interactive=True,
+            )
+            proposed_pitch_threshold = gr.Slider(
+                minimum=50.0,
+                maximum=1200.0,
+                label=i18n("Proposed Pitch Threshold"),
+                info=i18n(
+                    "Male voice models typically use 155.0 and female voice models typically use 255.0."
+                ),
+                visible=False,
+                value=155.0,
+                interactive=True,
+            )
+            clean_audio = gr.Checkbox(
+                label=i18n("Clean Audio"),
+                info=i18n(
+                    "Clean your audio output using noise detection algorithms, recommended for speaking audios."
+                ),
+                visible=True,
+                value=False,
+                interactive=True,
+            )
+            clean_strength = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("Clean Strength"),
+                info=i18n(
+                    "Set the clean-up level to the audio you want, the more you increase it the more it will clean up, but it is possible that the audio will be more compressed."
+                ),
+                visible=True,
+                value=0.5,
+                interactive=True,
+            )
+            pitch = gr.Slider(
+                minimum=-24,
+                maximum=24,
+                step=1,
+                label=i18n("Pitch"),
+                info=i18n(
+                    "Set the pitch of the audio, the higher the value, the higher the pitch."
+                ),
+                value=0,
+                interactive=True,
+            )
+            index_rate = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("Search Feature Ratio"),
+                info=i18n(
+                    "Influence exerted by the index file; a higher value corresponds to greater influence. However, opting for lower values can help mitigate artifacts present in the audio."
+                ),
+                value=0.75,
+                interactive=True,
+            )
+            rms_mix_rate = gr.Slider(
+                minimum=0,
+                maximum=1,
+                label=i18n("Volume Envelope"),
+                info=i18n(
+                    "Substitute or blend with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is employed."
+                ),
+                value=1,
+                interactive=True,
+            )
+            protect = gr.Slider(
+                minimum=0,
+                maximum=0.5,
+                label=i18n("Protect Voiceless Consonants"),
+                info=i18n(
+                    "Safeguard distinct consonants and breathing sounds to prevent electro-acoustic tearing and other artifacts. Pulling the parameter to its maximum value of 0.5 offers comprehensive protection. However, reducing this value might decrease the extent of protection while potentially mitigating the indexing effect."
+                ),
+                value=0.5,
+                interactive=True,
+            )
+            f0_method = gr.Radio(
+                label=i18n("Pitch extraction algorithm"),
+                info=i18n(
+                    "Pitch extraction algorithm to use for the audio conversion. The default algorithm is rmvpe, which is recommended for most cases."
+                ),
+                choices=[
+                    "crepe",
+                    "crepe-tiny",
+                    "rmvpe",
+                    "fcpe",
+                    "swift",
+                ],
+                value="rmvpe",
+                interactive=True,
+            )
+            embedder_model = gr.Radio(
+                label=i18n("Embedder Model"),
+                info=i18n("Model used for learning speaker embedding."),
+                choices=[
+                    "contentvec",
+                    "spin",
+                    "spin-v2",
+                    "chinese-hubert-base",
+                    "japanese-hubert-base",
+                    "korean-hubert-base",
+                    "custom",
+                ],
+                value="contentvec",
+                interactive=True,
+            )
+            with gr.Column(visible=False) as embedder_custom:
+                with gr.Accordion(i18n("Custom Embedder"), open=True):
+                    with gr.Row():
+                        embedder_model_custom = gr.Dropdown(
+                            label=i18n("Select Custom Embedder"),
+                            choices=refresh_embedders_folders(),
+                            interactive=True,
+                            allow_custom_value=True,
+                        )
+                        refresh_embedders_button = gr.Button(i18n("Refresh embedders"))
+                    folder_name_input = gr.Textbox(
+                        label=i18n("Folder Name"), interactive=True
+                    )
+                    with gr.Row():
+                        bin_file_upload = gr.File(
+                            label=i18n("Upload .bin"),
+                            type="filepath",
+                            interactive=True,
+                        )
+                        config_file_upload = gr.File(
+                            label=i18n("Upload .json"),
+                            type="filepath",
+                            interactive=True,
+                        )
+                    move_files_button = gr.Button(
+                        i18n("Move files to custom embedder folder")
+                    )
+            f0_file = gr.File(
+                label=i18n(
+                    "The f0 curve represents the variations in the base frequency of a voice over time, showing how pitch rises and falls."
+                ),
+                visible=True,
+            )
+    def enforce_terms(terms_accepted, *args):
+        if not terms_accepted:
+            message = "You must agree to the Terms of Use to proceed."
+            gr.Info(message)
+            return message, None
+        return run_tts_script(*args)
+    terms_checkbox = gr.Checkbox(
+        label=i18n("I agree to the terms of use"),
+        info=i18n(
+            "Please ensure compliance with the terms and conditions detailed in [this document](https://github.com/IAHispano/Applio/blob/main/TERMS_OF_USE.md) before proceeding with your inference."
+        ),
+        value=False,
+        interactive=True,
+    )
+    convert_button = gr.Button(i18n("Convert"))
+    with gr.Row():
+        vc_output1 = gr.Textbox(
+            label=i18n("Output Information"),
+            info=i18n("The output information will be displayed here."),
+        )
+        vc_output2 = gr.Audio(label=i18n("Export Audio"))
+    def toggle_visible(checkbox):
+        return {"visible": checkbox, "__type__": "update"}
+    def toggle_visible_embedder_custom(embedder_model):
+        if embedder_model == "custom":
+            return {"visible": True, "__type__": "update"}
+        return {"visible": False, "__type__": "update"}
+    autotune.change(
+        fn=toggle_visible,
+        inputs=[autotune],
+        outputs=[autotune_strength],
+    )
+    proposed_pitch.change(
+        fn=toggle_visible,
+        inputs=[proposed_pitch],
+        outputs=[proposed_pitch_threshold],
+    )
+    clean_audio.change(
+        fn=toggle_visible,
+        inputs=[clean_audio],
+        outputs=[clean_strength],
+    )
+    refresh_button.click(
+        fn=change_choices,
+        inputs=[model_file],
+        outputs=[model_file, index_file, sid, sid],
+    ).then(
+        fn=filter_dropdowns,
+        inputs=[filter_box_tts],
+        outputs=[model_file, index_file],
+    )
+    txt_file.upload(
+        fn=process_input,
+        inputs=[txt_file],
+        outputs=[input_tts_path, txt_file],
+    )
+    embedder_model.change(
+        fn=toggle_visible_embedder_custom,
+        inputs=[embedder_model],
+        outputs=[embedder_custom],
+    )
+    move_files_button.click(
+        fn=create_folder_and_move_files,
+        inputs=[folder_name_input, bin_file_upload, config_file_upload],
+        outputs=[],
+    )
+    refresh_embedders_button.click(
+        fn=lambda: gr.update(choices=refresh_embedders_folders()),
+        inputs=[],
+        outputs=[embedder_model_custom],
+    )
+    convert_button.click(
+        fn=enforce_terms,
+        inputs=[
+            terms_checkbox,
+            input_tts_path,
+            tts_text,
+            tts_voice,
+            tts_rate,
+            pitch,
+            index_rate,
+            rms_mix_rate,
+            protect,
+            f0_method,
+            output_tts_path,
+            output_rvc_path,
+            model_file,
+            index_file,
+            split_audio,
+            autotune,
+            autotune_strength,
+            proposed_pitch,
+            proposed_pitch_threshold,
+            clean_audio,
+            clean_strength,
+            export_format,
+            embedder_model,
+            embedder_model_custom,
+            sid,
+        ],
+        outputs=[vc_output1, vc_output2],
+    )

tabs/voice_blender/voice_blender.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os, sys
+import gradio as gr
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+from assets.i18n.i18n import I18nAuto
+from core import run_model_blender_script
+i18n = I18nAuto()
+def update_model_fusion(dropbox):
+    return dropbox, None
+def voice_blender_tab():
+    gr.Markdown(i18n("## Voice Blender"))
+    gr.Markdown(
+        i18n(
+            "Select two voice models, set your desired blend percentage, and blend them into an entirely new voice."
+        )
+    )
+    with gr.Column():
+        model_fusion_name = gr.Textbox(
+            label=i18n("Model Name"),
+            info=i18n("Name of the new model."),
+            value="",
+            max_lines=1,
+            interactive=True,
+            placeholder=i18n("Enter model name"),
+        )
+        with gr.Row():
+            with gr.Column():
+                model_fusion_a_dropbox = gr.File(
+                    label=i18n("Drag and drop your model here"), type="filepath"
+                )
+                model_fusion_a = gr.Textbox(
+                    label=i18n("Path to Model"),
+                    value="",
+                    interactive=True,
+                    placeholder=i18n("Enter path to model"),
+                    info=i18n("You can also use a custom path."),
+                )
+            with gr.Column():
+                model_fusion_b_dropbox = gr.File(
+                    label=i18n("Drag and drop your model here"), type="filepath"
+                )
+                model_fusion_b = gr.Textbox(
+                    label=i18n("Path to Model"),
+                    value="",
+                    interactive=True,
+                    placeholder=i18n("Enter path to model"),
+                    info=i18n("You can also use a custom path."),
+                )
+        alpha_a = gr.Slider(
+            minimum=0,
+            maximum=1,
+            label=i18n("Blend Ratio"),
+            value=0.5,
+            interactive=True,
+            info=i18n(
+                "Adjusting the position more towards one side or the other will make the model more similar to the first or second."
+            ),
+        )
+        model_fusion_button = gr.Button(i18n("Fusion"))
+        with gr.Row():
+            model_fusion_output_info = gr.Textbox(
+                label=i18n("Output Information"),
+                info=i18n("The output information will be displayed here."),
+                value="",
+            )
+            model_fusion_pth_output = gr.File(
+                label=i18n("Download Model"), type="filepath", interactive=False
+            )
+    model_fusion_button.click(
+        fn=run_model_blender_script,
+        inputs=[
+            model_fusion_name,
+            model_fusion_a,
+            model_fusion_b,
+            alpha_a,
+        ],
+        outputs=[model_fusion_output_info, model_fusion_pth_output],
+    )
+    model_fusion_a_dropbox.upload(
+        fn=update_model_fusion,
+        inputs=model_fusion_a_dropbox,
+        outputs=[model_fusion_a, model_fusion_a_dropbox],
+    )
+    model_fusion_b_dropbox.upload(
+        fn=update_model_fusion,
+        inputs=model_fusion_b_dropbox,
+        outputs=[model_fusion_b, model_fusion_b_dropbox],
+    )