noblebarkrr
/

mvsepless_plugins

Model card Files Files and versions

xet

Community

noblebarkrr commited on Jul 17, 2025

Commit

9aa1213

verified ·

1 Parent(s): 77e5590

Аналог AICoverGen для MVSepLess и Vbach

Browse files

Files changed (1) hide show

vbachgen.py +999 -0

vbachgen.py ADDED Viewed

	@@ -0,0 +1,999 @@

+import os
+import gradio as gr
+import soundfile as sf
+import librosa
+import numpy as np
+from pydub import AudioSegment
+from pedalboard import Pedalboard, Compressor, Reverb, Delay, NoiseGate, Chorus
+from pedalboard.io import AudioFile
+from separator.audio_writer import write_audio_file
+import tempfile
+from model_list import models_data as mvsepless_models
+from multi_inference import single_multi_inference
+RVC_MODELS_DIR = os.path.join(os.getcwd(), "voice_models")
+OUTPUT_FORMAT = ["mp3", "wav", "flac", "aiff", "m4a", "aac", "ogg", "opus"]
+# Глобальная переменная для хранения промежуточных файлов
+saved_processing_data = None
+# Словари переводов
+TRANSLATIONS = {
+    "ru": {
+        # Общие элементы
+        "model_name_label": "Имя модели",
+        "update_button": "Обновить",
+        "input_audio_label": "Входная песня",
+        "generate_btn": "Сгенерировать кавер",
+        "remix_btn": "Пересвести кавер",
+        "final_result": "Финальный результат",
+        "intermediate_files": "Промежуточные файлы",
+        "status_label": "Статус",
+        "tab_separation": "Разделение",
+        "tab_voice_settings": "Настройки преобразования голоса",
+        "tab_mixing_settings": "Настройки сведения аудио",
+        # Разделение
+        "preclear_vocals": "Очистить вокал от реверба/эха",
+        "split_vocals": "Разделить вокал на лид/бэк-вокалы",
+        "vocal_model": "Вокальная модель",
+        "dereverb_model": "Dereverb/Deecho модель",
+        "karaoke_model": "Караоке модель",
+        # Преобразование голоса
+        "conversion_mode": "Режим преобразования",
+        "conversion_info": "lead - только основной вокал\nback - только бэк-вокал\nlead/back - основной и бэк-вокалы\nfull - весь вокал",
+        "vocal_pitch": "Высота тона вокала",
+        "backing_pitch": "Высота тона бэк-вокала",
+        "pitch_method": "Метод извлечения тона",
+        "max_pitch": "Верхний лимит определения высоты тона",
+        "index_rate": "Влияние индекса",
+        "filter_radius": "Радиус фильтра",
+        "rms_envelope": "Огибающая громкости",
+        "protect_cons": "Защита согласных",
+        "hop_length": "Длина шага",
+        # Сведение
+        "volume_adjust": "Изменение громкости",
+        "vocals_gain": "Вокал",
+        "backing_gain": "Бэк-вокал",
+        "inst_gain": "Инструментал",
+        "output_format": "Формат вывода",
+        "add_unconv": "Добавить к инструменталу непреобразованный вокал",
+        "add_effects": "Добавить эффекты на голос",
+        # Эффекты
+        "effects_tab": "Эффекты",
+        "echo_tab": "Эхо",
+        "echo_delay": "Время задержки (сек)",
+        "echo_feedback": "Обратная связь",
+        "echo_mix": "Смешение",
+        "reverb_tab": "Реверберация",
+        "reverb_size": "Размер комнаты",
+        "reverb_width": "Ширина реверберации",
+        "reverb_wet": "Уровень влажности",
+        "reverb_dry": "Уровень сухости",
+        "reverb_damping": "Уровень демпфирования",
+        "chorus_tab": "Хорус",
+        "chorus_rate": "Скорость хоруса",
+        "chorus_depth": "Глубина хоруса",
+        "chorus_delay": "Задержка центра (мс)",
+        "chorus_feedback": "Обратная связь",
+        "chorus_mix": "Смешение",
+        # Обработка
+        "processing_tab": "Обработка",
+        "compressor_tab": "Компрессор",
+        "comp_ratio": "Соотношение",
+        "comp_threshold": "Порог",
+        "comp_attack": "Время атаки (мс)",
+        "comp_release": "Время спада (мс)",
+        "noise_gate_tab": "Подавление шума",
+        "gate_threshold": "Порог",
+        "gate_ratio": "Соотношение",
+        "gate_attack": "Время атаки (мс)",
+        "gate_release": "Время спада (мс)",
+        # Статусы
+        "start_processing": "Начало обработки...",
+        "separation": "Разделение на музыку и вокал...",
+        "extracting": "Извлечение лид/бэк-вокала...",
+        "cleaning": "Очистка вокалов...",
+        "converting": "Преобразование вокалов...",
+        "mixing": "Сведение итогового кавера...",
+        # Ошибки
+        "error_audio_model": "Сначала загрузите аудио и выберите модель",
+        "error_audio": "Сначала загрузите аудио",
+        "error_model": "Сначала выберите модель",
+        "error_generate_first": "Сначала сгенерируйте кавер хотя бы один раз!",
+        "vbach_required": "Vbach (RVC) не установлен. В блокноте запустите ячейку 'Установка' с флагом install_vbach [✓]"
+    },
+    "en": {
+        # General elements
+        "model_name_label": "Model name",
+        "update_button": "Update",
+        "input_audio_label": "Input song",
+        "generate_btn": "Generate cover",
+        "remix_btn": "Remix cover",
+        "final_result": "Final result",
+        "intermediate_files": "Intermediate files",
+        "status_label": "Status",
+        "tab_separation": "Separation",
+        "tab_voice_settings": "Voice conversion settings",
+        "tab_mixing_settings": "Audio mixing settings",
+        # Separation
+        "preclear_vocals": "Clear vocals from reverb/echo",
+        "split_vocals": "Split vocals into lead/backing",
+        "vocal_model": "Vocal model",
+        "dereverb_model": "Dereverb/Deecho model",
+        "karaoke_model": "Karaoke model",
+        # Voice conversion
+        "conversion_mode": "Conversion mode",
+        "conversion_info": "lead - lead vocals only\nback - backing vocals only\nlead/back - both vocals\nfull - full vocals",
+        "vocal_pitch": "Vocal pitch",
+        "backing_pitch": "Backing vocals pitch",
+        "pitch_method": "Pitch extraction method",
+        "max_pitch": "Max pitch detection frequency",
+        "index_rate": "Index rate",
+        "filter_radius": "Filter radius",
+        "rms_envelope": "RMS envelope",
+        "protect_cons": "Protect consonants",
+        "hop_length": "Hop length",
+        # Mixing
+        "volume_adjust": "Volume adjustment",
+        "vocals_gain": "Vocals",
+        "backing_gain": "Backing vocals",
+        "inst_gain": "Instrumental",
+        "output_format": "Output format",
+        "add_unconv": "Add unconverted vocals to instrumental",
+        "add_effects": "Apply effects to vocals",
+        # Effects
+        "effects_tab": "Effects",
+        "echo_tab": "Echo",
+        "echo_delay": "Delay time (sec)",
+        "echo_feedback": "Feedback",
+        "echo_mix": "Mix",
+        "reverb_tab": "Reverb",
+        "reverb_size": "Room size",
+        "reverb_width": "Reverb width",
+        "reverb_wet": "Wet level",
+        "reverb_dry": "Dry level",
+        "reverb_damping": "Damping",
+        "chorus_tab": "Chorus",
+        "chorus_rate": "Rate (Hz)",
+        "chorus_depth": "Depth",
+        "chorus_delay": "Center delay (ms)",
+        "chorus_feedback": "Feedback",
+        "chorus_mix": "Mix",
+        # Processing
+        "processing_tab": "Processing",
+        "compressor_tab": "Compressor",
+        "comp_ratio": "Ratio",
+        "comp_threshold": "Threshold (dB)",
+        "comp_attack": "Attack time (ms)",
+        "comp_release": "Release time (ms)",
+        "noise_gate_tab": "Noise Gate",
+        "gate_threshold": "Threshold (dB)",
+        "gate_ratio": "Ratio",
+        "gate_attack": "Attack time (ms)",
+        "gate_release": "Release time (ms)",
+        # Statuses
+        "start_processing": "Starting processing...",
+        "separation": "Separating music and vocals...",
+        "extracting": "Extracting lead/backing vocals...",
+        "cleaning": "Cleaning vocals...",
+        "converting": "Converting vocals...",
+        "mixing": "Mixing final cover...",
+        # Errors
+        "error_audio_model": "Please upload audio and select model first",
+        "error_audio": "Please upload audio first",
+        "error_model": "Please select model first",
+        "error_generate_first": "Generate a cover at least once first!",
+        "vbach_required": "Vbach (RVC) is not installed. In the notebook, run the 'Установка' cell with the flag install_vbach [✓]"
+    }
+}
+CURRENT_LANG = "ru"
+def set_language(lang):
+    global CURRENT_LANG
+    CURRENT_LANG = lang
+def t(key):
+    """Функция для получения перевода"""
+    return TRANSLATIONS[CURRENT_LANG].get(key, key)
+def list_models(category, model_type=None):
+    list_models = []
+    if not model_type:
+        for m_type in list(mvsepless_models.keys()):
+            for m_name in list(mvsepless_models[m_type].keys()):
+                if mvsepless_models[m_type][m_name]["category"] in category:
+                    list_models.append(f"{m_type} / {m_name}")
+    else:
+        for m_type in model_type:
+            for m_name in list(mvsepless_models[m_type].keys()):
+                if mvsepless_models[m_type][m_name]["category"] in category:
+                    list_models.append(f"{m_type} / {m_name}")
+    return list_models
+def find_file_from_stem(results, stem_names=["Vocals", "vocals"]):
+    for stem_name, stem_file in results:
+        if stem_name in stem_names:
+            stem_path = stem_file
+    return stem_path
+def mix_and_save(
+    inst_path,
+    list_vocals,
+    converted_vocals_list,
+    mix_params,
+    params,
+    rvc_params,
+    temp_dir,
+    input_audio
+):
+    # Mixing
+    final_audio = None
+    samplerate = 44100
+    # Load instrumental
+    if inst_path and os.path.exists(inst_path):
+        inst_data, samplerate = librosa.load(inst_path, mono=False, sr=None, dtype='float32')
+        if inst_data.ndim == 1:
+            inst_data = np.expand_dims(inst_data, axis=0)  # (1, n) mono → (1, n)
+        inst_gain = 10 ** (mix_params["gain"]["instrum"] / 20.0)
+        inst_data *= inst_gain
+        final_audio = inst_data.copy()
+    # Add unconverted vocals if requested
+    if mix_params["add_unconverted_vocals_to_instrumental"]:
+        if params["conversion_mode"] == "lead" and list_vocals[1][1]:
+            back_vocals, _ = librosa.load(list_vocals[1][1], mono=False, sr=samplerate, dtype='float32')
+            if back_vocals.ndim == 1:
+                back_vocals = np.expand_dims(back_vocals, axis=0)
+            final_audio = back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) if final_audio is None else final_audio + back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0))
+        elif params["conversion_mode"] == "back" and list_vocals[2][1]:
+            lead_vocals, _ = librosa.load(list_vocals[2][1], mono=False, sr=samplerate, dtype='float32')
+            if lead_vocals.ndim == 1:
+                lead_vocals = np.expand_dims(lead_vocals, axis=0)
+            final_audio = lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) if final_audio is None else final_audio + lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0))
+    # Process converted vocals
+    for i, vocal_path in enumerate(converted_vocals_list):
+        if not vocal_path or not os.path.exists(vocal_path):
+            continue
+        vocal_data, sr = librosa.load(vocal_path, mono=False, sr=None, dtype='float32')
+        if vocal_data.ndim == 1:
+            vocal_data = np.expand_dims(vocal_data, axis=0)
+        # Resample if needed
+        if sr != samplerate:
+            audio_segment = AudioSegment.from_wav(vocal_path)
+            audio_segment = audio_segment.set_frame_rate(samplerate)
+            samples = np.array(audio_segment.get_array_of_samples())
+            channels = audio_segment.channels
+            samples = samples.astype(np.float32)
+            max_value = 2 ** (8 * audio_segment.sample_width - 1)
+            samples /= max_value
+            vocal_data = samples.reshape((-1, channels)).T  # shape: (channels, n_samples)
+        # Apply effects
+        if mix_params.get("use_effects", False):
+            board = Pedalboard()
+            effects = mix_params.get("pedalboard_settings", {})
+            if "compressor" in effects:
+                comp = effects["compressor"]
+                board.append(Compressor(
+                    ratio=comp["ratio"],
+                    threshold_db=comp["threshold"],
+                    attack_ms=comp["attack"],
+                    release_ms=comp["release"]
+                ))
+            if "noise_gate" in effects:
+                ng = effects["noise_gate"]
+                board.append(NoiseGate(
+                    threshold_db=ng["threshold"],
+                    ratio=ng["ratio"],
+                    attack_ms=ng["attack"],
+                    release_ms=ng["release"]
+                ))
+            if "echo" in effects:
+                echo = effects["echo"]
+                board.append(Delay(
+                    delay_seconds=echo["delay"],
+                    feedback=echo["feedback"],
+                    mix=echo["mix"]
+                ))
+            if "reverb" in effects:
+                rev = effects["reverb"]
+                board.append(Reverb(
+                    room_size=rev["room_size"],
+                    dry_level=rev["dry"],
+                    wet_level=rev["wet"],
+                    damping=rev["damping"],
+                    width=rev["width"]
+                ))
+            if "chorus" in effects:
+                chorus = effects["chorus"]
+                board.append(Chorus(
+                    rate_hz=chorus["rate"],
+                    depth=chorus["depth"],
+                    centre_delay_ms=chorus["center_delay"],
+                    feedback=chorus["feedback"],
+                    mix=chorus["mix"]
+                ))
+            vocal_data = board(vocal_data, samplerate)
+        # Apply gain
+        gain_db = mix_params["gain"]["vocals1"] if i == 0 else mix_params["gain"]["vocals2"]
+        vocal_data *= 10 ** (gain_db / 20.0)
+        # Mix vocals into final
+        if final_audio is None:
+            final_audio = vocal_data.copy()
+        else:
+            # Ensure equal shape
+            min_len = min(final_audio.shape[1], vocal_data.shape[1])
+            final_audio = final_audio[:, :min_len] + vocal_data[:, :min_len]
+    # Save output
+    max_amplitude = np.max(np.abs(final_audio))
+    if max_amplitude > 0:
+       normalization_factor = 1.0 / max_amplitude
+       final_audio = final_audio * normalization_factor
+    filename = f"{rvc_params['model_name']} - {os.path.splitext(os.path.basename(input_audio))[0]}.{params['output_format']}" if input_audio else f"remixed.{params['output_format']}"
+    final_path = os.path.join(temp_dir, filename)
+    write_audio_file(final_path, final_audio, samplerate, params['output_format'], "320k")
+    return final_path
+def gen_cover(
+    input_audio,
+    anti_instrum_model,
+    karaoke_model,
+    dereverb_model,
+    output_format,
+    karaoke_check,
+    conversion_mode,
+    preclear_vocals_check,
+    voice_name,
+    pitch1_val,
+    pitch2_val,
+    method_pitch,
+    index_rate,
+    fr,
+    rms,
+    protect,
+    hop_mangio_crepe,
+    f0_max,
+    unconv_vocals_check,
+    use_effects,
+    instrumental_gain,
+    vocal1_gain,
+    vocal2_gain,
+    echo_delay,
+    echo_feedback,
+    echo_mix,
+    reverb_rm_size,
+    reverb_width,
+    reverb_wet,
+    reverb_dry,
+    reverb_damping,
+    chorus_rate_hz,
+    chorus_depth,
+    chorus_centre_delay_ms,
+    chorus_feedback,
+    chorus_mix,
+    compressor_ratio,
+    compressor_threshold,
+    compressor_attack,
+    compressor_release,
+    noise_gate_threshold,
+    noise_gate_ratio,
+    noise_gate_attack,
+    noise_gate_release
+):
+    global saved_processing_data
+    if not input_audio and not voice_name:
+        raise gr.Error(t("error_audio_model"))
+    if not input_audio:
+        raise gr.Error(t("error_audio"))
+    if not voice_name:
+        raise gr.Error(t("error_model"))
+    # Собираем параметры в структуры
+    models = [
+        anti_instrum_model,
+        karaoke_model,
+        dereverb_model
+    ]
+    params = {
+        "output_format": output_format,
+        "extract_karaoke": karaoke_check,
+        "conversion_mode": conversion_mode,
+        "preclear_vocals": preclear_vocals_check
+    }
+    rvc_params = {
+        "model_name": voice_name,
+        "pitch1": pitch1_val,
+        "pitch2": pitch2_val,
+        "f0_method": method_pitch,
+        'index_rate': index_rate,
+        'filter_radius': fr,
+        'rms': rms,
+        'protect': protect,
+        'hop_length': hop_mangio_crepe,
+        'f0_max': f0_max
+    }
+    mix_params = {
+        "add_unconverted_vocals_to_instrumental": unconv_vocals_check,
+        "use_effects": use_effects,
+        "gain": {
+            "instrum": instrumental_gain,
+            "vocals1": vocal1_gain,
+            "vocals2": vocal2_gain
+        },
+        "pedalboard_settings": {
+            "echo": {
+                "delay": echo_delay,
+                "feedback": echo_feedback,
+                "mix": echo_mix
+            },
+            "reverb": {
+                "room_size": reverb_rm_size,
+                "wet": reverb_wet,
+                "dry": reverb_dry,
+                "damping": reverb_damping,
+                "width": reverb_width,
+            },
+            "compressor": {
+                "ratio": compressor_ratio,
+                "threshold": compressor_threshold,
+                "attack": compressor_attack,
+                "release": compressor_release
+            },
+            "noise_gate": {
+                "threshold": noise_gate_threshold,
+                "ratio": noise_gate_ratio,
+                "attack": noise_gate_attack,
+                "release": noise_gate_release,
+            },
+            "chorus": {
+                "rate": chorus_rate_hz,
+                "depth": chorus_depth,
+                "center_delay": chorus_centre_delay_ms,
+                "feedback": chorus_feedback,
+                "mix": chorus_mix
+            }
+        }
+    }
+    progress = gr.Progress()
+    progress(0, desc=t("start_processing"))
+    generated_files = []
+    converted_vocals_list = []
+    temp_dir = tempfile.mkdtemp()
+    inst_model = models[0]
+    kar_model = models[1]
+    dereverb_model = models[2]
+    progress(0.1, desc=t("separation"))
+    # Separate vocals and instrumental
+    inst_output = single_multi_inference(input_audio, os.path.join(temp_dir, "inst_output"),
+                                        inst_model.split(" / ")[0], inst_model.split(" / ")[1],
+                                        True, vr_aggr=5, output_format="wav",
+                                        output_bitrate="320k", template="VbachGen_NAME_STEM",
+                                        call_method="cli", selected_stems=[])
+    inst_file = ("instrumental", find_file_from_stem(inst_output, ["Instrumental", "instrumental", "other", "Other"]))
+    full_vocals_file = ("full_vocals", find_file_from_stem(inst_output, ["Vocals", "vocals"]))
+    progress(0.2, desc=t("extracting"))
+    back_vocals_file = (None, None)
+    lead_vocals_file = (None, None)
+    if params["extract_karaoke"] == True:
+        karaoke_output = single_multi_inference(full_vocals_file[1], os.path.join(temp_dir, "kar_output"),
+                                               kar_model.split(" / ")[0], kar_model.split(" / ")[1],
+                                               True, vr_aggr=5, output_format="wav",
+                                               output_bitrate="320k", template="NAME_MODEL_STEM",
+                                               call_method="cli", selected_stems=[])
+        back_vocals_file = ("back_vocals", find_file_from_stem(karaoke_output, ["Instrumental", "instrumental", "other", "Other"]))
+        lead_vocals_file = ("lead_vocals", find_file_from_stem(karaoke_output, ["Vocals", "vocals", "karaoke"]))
+    list_vocals = [
+        full_vocals_file,
+        back_vocals_file if params["extract_karaoke"] else (None, None),
+        lead_vocals_file if params["extract_karaoke"] else (None, None)
+    ]
+    clear_list_vocals = [
+        full_vocals_file,
+        back_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["back", "lead/back"] else (None, None),
+        lead_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["lead", "lead/back"] else (None, None)
+    ]
+    progress(0.3, desc=t("cleaning"))
+    # Pre-clear vocals if needed
+    cleared_vocals = []
+    if params["preclear_vocals"] == True:
+        for i, (name, file) in enumerate(clear_list_vocals):
+            if file and os.path.exists(file):
+                clear_output = single_multi_inference(file, os.path.join(temp_dir, f"cleared_output_{i}"),
+                                                    dereverb_model.split(" / ")[0], dereverb_model.split(" / ")[1],
+                                                    True, vr_aggr=5, output_format="wav",
+                                                    output_bitrate="320k", template="NAME_STEM",
+                                                    call_method="cli", selected_stems=[])
+                cleared_file = find_file_from_stem(clear_output, ["No Echo", "No Reverb", "Dry", "Other"])
+                cleared_vocals.append((name, cleared_file))
+        for i, voc in enumerate(list_vocals):
+            for clear_voc in cleared_vocals:
+                if clear_voc[0] == voc[0]:
+                    list_vocals[i] = clear_voc
+                    break
+    progress(0.5, desc=t("converting"))
+    # Vocal conversion
+    if params["conversion_mode"] == "full" and list_vocals[0][1]:
+        full_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "full_vocals_converted.wav")
+        cmd = f"python -m vbach.cli.vbach '{list_vocals[0][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'full_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
+        os.system(cmd)
+        converted_vocals_list.append(full_vocals_converted_path)
+    elif params["conversion_mode"] == "lead/back" and list_vocals[1][1] and list_vocals[2][1]:
+        lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav")
+        cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
+        os.system(cmd)
+        back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav")
+        cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
+        os.system(cmd)
+        converted_vocals_list.append(back_vocals_converted_path)
+        converted_vocals_list.append(lead_vocals_converted_path)
+    elif params["conversion_mode"] == "back" and list_vocals[1][1]:
+        back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav")
+        cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['protect']} --protect {rvc_params['rms']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
+        os.system(cmd)
+        converted_vocals_list.append(back_vocals_converted_path)
+    elif params["conversion_mode"] == "lead" and list_vocals[2][1]:
+        lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav")
+        cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
+        os.system(cmd)
+        converted_vocals_list.append(lead_vocals_converted_path)
+    # Prepare files list
+    generated_files.append(inst_file[1])
+    for name, file in list_vocals:
+        if file:
+            generated_files.append(file)
+    generated_files.extend(converted_vocals_list)
+    # Сохраняем данные для возможного пересведения
+    saved_processing_data = {
+        "inst_path": inst_file[1],
+        "list_vocals": list_vocals,
+        "converted_vocals_list": converted_vocals_list,
+        "params": params,
+        "rvc_params": rvc_params,
+        "input_audio": input_audio
+    }
+    progress(0.9, desc=t("mixing"))
+    # Сведение
+    final_path = mix_and_save(
+        inst_file[1],
+        list_vocals,
+        converted_vocals_list,
+        mix_params,
+        params,
+        rvc_params,
+        temp_dir,
+        input_audio
+    )
+    generated_files.append(final_path)
+    return generated_files, final_path
+def remix_cover(
+    use_effects,
+    instrumental_gain,
+    vocal1_gain,
+    vocal2_gain,
+    echo_delay,
+    echo_feedback,
+    echo_mix,
+    reverb_rm_size,
+    reverb_width,
+    reverb_wet,
+    reverb_dry,
+    reverb_damping,
+    chorus_rate_hz,
+    chorus_depth,
+    chorus_centre_delay_ms,
+    chorus_feedback,
+    chorus_mix,
+    compressor_ratio,
+    compressor_threshold,
+    compressor_attack,
+    compressor_release,
+    noise_gate_threshold,
+    noise_gate_ratio,
+    noise_gate_attack,
+    noise_gate_release
+):
+    global saved_processing_data
+    if not saved_processing_data:
+        raise gr.Error(t("error_generate_first"))
+    # Восстанавливаем данные из сохраненной сессии
+    data = saved_processing_data
+    temp_dir = tempfile.mkdtemp()
+    # Подготавливаем параметры для сведения
+    mix_params = {
+        "add_unconverted_vocals_to_instrumental": True,  # Всегда используем оригинальные вокалы
+        "use_effects": use_effects,
+        "gain": {
+            "instrum": instrumental_gain,
+            "vocals1": vocal1_gain,
+            "vocals2": vocal2_gain
+        },
+        "pedalboard_settings": {
+            "echo": {
+                "delay": echo_delay,
+                "feedback": echo_feedback,
+                "mix": echo_mix
+            },
+            "reverb": {
+                "room_size": reverb_rm_size,
+                "wet": reverb_wet,
+                "dry": reverb_dry,
+                "damping": reverb_damping,
+                "width": reverb_width,
+            },
+            "compressor": {
+                "ratio": compressor_ratio,
+                "threshold": compressor_threshold,
+                "attack": compressor_attack,
+                "release": compressor_release
+            },
+            "noise_gate": {
+                "threshold": noise_gate_threshold,
+                "ratio": noise_gate_ratio,
+                "attack": noise_gate_attack,
+                "release": noise_gate_release,
+            },
+            "chorus": {
+                "rate": chorus_rate_hz,
+                "depth": chorus_depth,
+                "center_delay": chorus_centre_delay_ms,
+                "feedback": chorus_feedback,
+                "mix": chorus_mix
+            }
+        }
+    }
+    # Запускаем только этап сведения
+    final_path = mix_and_save(
+        data["inst_path"],
+        data["list_vocals"],
+        data["converted_vocals_list"],
+        mix_params,
+        data["params"],
+        data["rvc_params"],
+        temp_dir,
+        data["input_audio"]
+    )
+    return final_path
+def vbach_plugin_name():
+    return "VbachGen"
+def vbachgen_plugin(lang):
+    set_language(lang)
+    with gr.Blocks():
+        if os.path.exists("vbach"):
+            with gr.Row(equal_height=False, variant="panel"):
+                with gr.Column():
+                    model_name = gr.Dropdown(label=t("model_name_label"), interactive=True, filterable=False, scale=6)
+                    model_update_btn = gr.Button(t("update_button"), variant="primary", scale=3, size="lg")
+                with gr.Row(min_height=150):
+                    input_audio = gr.File(label=t("input_audio_label"), interactive=True, type="filepath", file_count="single")
+            with gr.Row():
+                with gr.Column():
+                    with gr.Tab(t("tab_separation")):
+                        preclear_vocals_check = gr.Checkbox(label=t("preclear_vocals"), value=False)
+                        karaoke_check = gr.Checkbox(label=t("split_vocals"), value=False)
+                        with gr.Column(variant="panel"):
+                            with gr.Group() as extract_vocals_group:
+                                anti_instrum_model = gr.Dropdown(
+                                    label=t("vocal_model"),
+                                    choices=list_models(["Инструментал", "Вокал", "Инструментал и вокал"], ["mel_band_roformer", "bs_roformer", "mdx23c", "mdx", "htdemucs"]),
+                                    interactive=True,
+                                    filterable=False
+                                )
+                            with gr.Group(visible=False) as deecho_group:
+                                dereverb_model = gr.Dropdown(
+                                    label=t("dereverb_model"),
+                                    choices=list_models(["Реверб и эхо", "Реверб", "Эхо"], ["vr"]),
+                                    interactive=True,
+                                    filterable=False
+                                )
+                            with gr.Group(visible=False) as karaoke_group:
+                                karaoke_model = gr.Dropdown(
+                                    label=t("karaoke_model"),
+                                    choices=list_models(["Караоке"]),
+                                    interactive=True,
+                                    filterable=False
+                                )
+                    with gr.Tab(t("tab_voice_settings")):
+                        conversion_mode = gr.Dropdown(
+                            label=t("conversion_mode"),
+                            choices=["lead", "back", "lead/back", "full"],
+                            value="full",
+                            filterable=False,
+                            visible=False,
+                            info=t("conversion_info")
+                        )
+                        with gr.Row():
+                            pitch1 = gr.Slider(-48, 48, value=0, step=12, label=t("vocal_pitch"), interactive=True)
+                            pitch2 = gr.Slider(-48, 48, value=0, step=12, label=t("backing_pitch"), visible=False, interactive=True)
+                        with gr.Row():
+                            method_pitch = gr.Dropdown(
+                                label=t("pitch_method"),
+                                choices=["mangio-crepe", "rmvpe+", "fcpe"],
+                                value="rmvpe+",
+                                interactive=True,
+                                filterable=False
+                            )
+                            f0_max = gr.Slider(50, 2000, value=1100, step=50, label=t("max_pitch"), interactive=True)
+                        with gr.Row():
+                            with gr.Column(scale=1):
+                                index_rate = gr.Slider(0, 1, value=0, step=0.1, label=t("index_rate"), interactive=True)
+                                fr = gr.Slider(0, 7, value=3, step=1, label=t("filter_radius"), interactive=True)
+                            with gr.Column(scale=1):
+                                rms = gr.Slider(0, 1, value=0.25, step=0.05, label=t("rms_envelope"), interactive=True)
+                                protect = gr.Slider(minimum=0, maximum=0.5, step=0.01, value=0.33, label=t("protect_cons"), interactive=True)
+                        hop_mangio_crepe = gr.Slider(1, 512, value=128, step=1, label=t("hop_length"), interactive=True, visible=False)
+                    with gr.Tab(t("tab_mixing_settings")):
+                        gr.Markdown(f"<center><h2>{t('volume_adjust')}</h2></center>")
+                        with gr.Row(variant="panel"):
+                            vocal1_gain = gr.Slider(-30, 30, value=0, step=1, label=t("vocals_gain"), scale=3, interactive=True)
+                            vocal2_gain = gr.Slider(-30, 30, value=0, step=1, label=t("backing_gain"), scale=3, visible=False, interactive=True)
+                            instrumental_gain = gr.Slider(-30, 30, value=0, step=1, label=t("inst_gain"), scale=3, interactive=True)
+                        output_format = gr.Dropdown(
+                            label=t("output_format"),
+                            choices=OUTPUT_FORMAT,
+                            value="wav",
+                            interactive=True,
+                            filterable=False
+                        )
+                        unconv_vocals_check = gr.Checkbox(label=t("add_unconv"), visible=False)
+                        use_effects = gr.Checkbox(label=t("add_effects"), value=False)
+                        with gr.Column(variant="panel", visible=False) as effects_accordion:
+                            with gr.Tab(t("effects_tab")):
+                                with gr.Tab(t("echo_tab")):
+                                    with gr.Group():
+                                        with gr.Column(variant="panel"):
+                                            with gr.Row():
+                                                echo_delay = gr.Slider(0, 3, value=0, label=t("echo_delay"), interactive=True)
+                                                echo_feedback = gr.Slider(0, 1, value=0, label=t("echo_feedback"), interactive=True)
+                                                echo_mix = gr.Slider(0, 1, value=0, label=t("echo_mix"), interactive=True)
+                                with gr.Tab(t("reverb_tab")):
+                                    with gr.Group():
+                                        with gr.Column(variant="panel"):
+                                            with gr.Row():
+                                                reverb_rm_size = gr.Slider(0, 1, value=0.1, label=t("reverb_size"), interactive=True)
+                                                reverb_width = gr.Slider(0, 1, value=1.0, label=t("reverb_width"), interactive=True)
+                                            with gr.Row():
+                                                reverb_wet = gr.Slider(0, 1, value=0.1, label=t("reverb_wet"), interactive=True)
+                                                reverb_dry = gr.Slider(0, 1, value=0.8, label=t("reverb_dry"), interactive=True)
+                                            with gr.Row():
+                                                reverb_damping = gr.Slider(0, 1, value=0.9, label=t("reverb_damping"), interactive=True)
+                                with gr.Tab(t("chorus_tab")):
+                                    with gr.Group():
+                                        with gr.Column(variant="panel"):
+                                            with gr.Row():
+                                                chorus_rate_hz = gr.Slider(0.1, 10, value=0, label=t("chorus_rate"), interactive=True)
+                                                chorus_depth = gr.Slider(0, 1, value=0, label=t("chorus_depth"), interactive=True)
+                                            with gr.Row():
+                                                chorus_centre_delay_ms = gr.Slider(0, 50, value=0, label=t("chorus_delay"), interactive=True)
+                                                chorus_feedback = gr.Slider(0, 1, value=0, label=t("chorus_feedback"), interactive=True)
+                                            with gr.Row():
+                                                chorus_mix = gr.Slider(0, 1, value=0, label=t("chorus_mix"), interactive=True)
+                            with gr.Tab(t("processing_tab")):
+                                with gr.Tab(t("compressor_tab")):
+                                    with gr.Row(variant="panel"):
+                                        compressor_ratio = gr.Slider(1, 20, value=4, label=t("comp_ratio"), interactive=True)
+                                        compressor_threshold = gr.Slider(-60, 0, value=-12, label=t("comp_threshold"), interactive=True)
+                                        compressor_attack = gr.Slider(0, 2000, value=100, label=t("comp_attack"), interactive=True)
+                                        compressor_release = gr.Slider(0, 2000, value=100, label=t("comp_release"), interactive=True)
+                                with gr.Tab(t("noise_gate_tab")):
+                                    with gr.Group():
+                                        with gr.Column(variant="panel"):
+                                            with gr.Row():
+                                                noise_gate_threshold = gr.Slider(-60, 0, value=-40, label=t("gate_threshold"), interactive=True)
+                                                noise_gate_ratio = gr.Slider(1, 20, value=8, label=t("gate_ratio"), interactive=True)
+                                            with gr.Row():
+                                                noise_gate_attack = gr.Slider(0, 100, value=10, label=t("gate_attack"), interactive=True)
+                                                noise_gate_release = gr.Slider(0, 1000, value=100, label=t("gate_release"), interactive=True)
+                # Outputs
+                with gr.Column(variant="panel"):
+                    final_ai_cover = gr.Audio(label=t("final_result"), interactive=False, streaming=True)
+                    generated_files_list = gr.Files(label=t("intermediate_files"))
+                    with gr.Row():
+                        generate_btn = gr.Button(t("generate_btn"), variant="primary")
+                        remix_btn = gr.Button(t("remix_btn"), variant="secondary")
+            status_text = gr.Textbox(label=t("status_label"), interactive=False)
+            # Event handlers
+            method_pitch.change(fn=lambda x: gr.update(visible=True if x == "mangio-crepe" else False), inputs=method_pitch, outputs=hop_mangio_crepe)
+            model_update_btn.click(fn=(lambda : gr.update(choices=[d for d in os.listdir(RVC_MODELS_DIR) if os.path.isdir(os.path.join(RVC_MODELS_DIR, d))])), inputs=None, outputs=model_name)
+            use_effects.change(
+                fn=lambda x: gr.update(visible=x),
+                inputs=use_effects,
+                outputs=effects_accordion
+            )
+            karaoke_check.change(
+                fn=lambda x: gr.update(visible=x),
+                inputs=karaoke_check,
+                outputs=karaoke_group
+            ).then(fn=lambda x: gr.update(value="full", visible=x), inputs=karaoke_check, outputs=conversion_mode).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check)
+            preclear_vocals_check.change(
+                fn=lambda x: gr.update(visible=x),
+                inputs=preclear_vocals_check,
+                outputs=deecho_group
+            )
+            conversion_mode.change(
+                fn=lambda mode: (
+                    gr.update(visible=mode in ["lead", "lead/back"]),
+                    gr.update(visible=mode in ["back", "lead/back"]),
+                    gr.update(visible=mode in ["lead/back"])
+                ),
+                inputs=conversion_mode,
+                outputs=[vocal1_gain, vocal2_gain, pitch2]
+            ).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check)
+            generate_btn.click(
+                fn=gen_cover,
+                inputs=[
+                    input_audio,
+                    anti_instrum_model,
+                    karaoke_model,
+                    dereverb_model,
+                    output_format,
+                    karaoke_check,
+                    conversion_mode,
+                    preclear_vocals_check,
+                    model_name,
+                    pitch1,
+                    pitch2,
+                    method_pitch,
+                    index_rate,
+                    fr,
+                    rms,
+                    protect,
+                    hop_mangio_crepe,
+                    f0_max,
+                    unconv_vocals_check,
+                    use_effects,
+                    instrumental_gain,
+                    vocal1_gain,
+                    vocal2_gain,
+                    echo_delay,
+                    echo_feedback,
+                    echo_mix,
+                    reverb_rm_size,
+                    reverb_width,
+                    reverb_wet,
+                    reverb_dry,
+                    reverb_damping,
+                    chorus_rate_hz,
+                    chorus_depth,
+                    chorus_centre_delay_ms,
+                    chorus_feedback,
+                    chorus_mix,
+                    compressor_ratio,
+                    compressor_threshold,
+                    compressor_attack,
+                    compressor_release,
+                    noise_gate_threshold,
+                    noise_gate_ratio,
+                    noise_gate_attack,
+                    noise_gate_release
+                ],
+                outputs=[generated_files_list, final_ai_cover]
+            )
+            remix_btn.click(
+                fn=remix_cover,
+                inputs=[
+                    use_effects,
+                    instrumental_gain,
+                    vocal1_gain,
+                    vocal2_gain,
+                    echo_delay,
+                    echo_feedback,
+                    echo_mix,
+                    reverb_rm_size,
+                    reverb_width,
+                    reverb_wet,
+                    reverb_dry,
+                    reverb_damping,
+                    chorus_rate_hz,
+                    chorus_depth,
+                    chorus_centre_delay_ms,
+                    chorus_feedback,
+                    chorus_mix,
+                    compressor_ratio,
+                    compressor_threshold,
+                    compressor_attack,
+                    compressor_release,
+                    noise_gate_threshold,
+                    noise_gate_ratio,
+                    noise_gate_attack,
+                    noise_gate_release
+                ],
+                outputs=[final_ai_cover]
+            )
+        else:
+            gr.Markdown(f"<center><h2>{t('vbach_required')}</h2></center>")