import os import gradio as gr import soundfile as sf import librosa import numpy as np from pydub import AudioSegment from pedalboard import Pedalboard, Compressor, Reverb, Delay, NoiseGate, Chorus from pedalboard.io import AudioFile from separator.audio_writer import write_audio_file import tempfile from model_list import models_data as mvsepless_models from multi_inference import single_multi_inference RVC_MODELS_DIR = os.path.join(os.getcwd(), "voice_models") OUTPUT_FORMAT = ["mp3", "wav", "flac", "aiff", "m4a", "aac", "ogg", "opus"] # Глобальная переменная для хранения промежуточных файлов saved_processing_data = None # Словари переводов TRANSLATIONS = { "ru": { # Общие элементы "model_name_label": "Имя модели", "update_button": "Обновить", "input_audio_label": "Входная песня", "generate_btn": "Сгенерировать кавер", "remix_btn": "Пересвести кавер", "final_result": "Финальный результат", "intermediate_files": "Промежуточные файлы", "status_label": "Статус", "tab_separation": "Разделение", "tab_voice_settings": "Настройки преобразования голоса", "tab_mixing_settings": "Настройки сведения аудио", # Разделение "preclear_vocals": "Очистить вокал от реверба/эха", "split_vocals": "Разделить вокал на лид/бэк-вокалы", "vocal_model": "Вокальная модель", "dereverb_model": "Dereverb/Deecho модель", "karaoke_model": "Караоке модель", # Преобразование голоса "conversion_mode": "Режим преобразования", "conversion_info": "lead - только основной вокал\nback - только бэк-вокал\nlead/back - основной и бэк-вокалы\nfull - весь вокал", "vocal_pitch": "Высота тона вокала", "backing_pitch": "Высота тона бэк-вокала", "pitch_method": "Метод извлечения тона", "max_pitch": "Верхний лимит определения высоты тона", "index_rate": "Влияние индекса", "filter_radius": "Радиус фильтра", "rms_envelope": "Огибающая громкости", "protect_cons": "Защита согласных", "hop_length": "Длина шага", # Сведение "volume_adjust": "Изменение громкости", "vocals_gain": "Вокал", "backing_gain": "Бэк-вокал", "inst_gain": "Инструментал", "output_format": "Формат вывода", "add_unconv": "Добавить к инструменталу непреобразованный вокал", "add_effects": "Добавить эффекты на голос", # Эффекты "effects_tab": "Эффекты", "echo_tab": "Эхо", "echo_delay": "Время задержки (сек)", "echo_feedback": "Обратная связь", "echo_mix": "Смешение", "reverb_tab": "Реверберация", "reverb_size": "Размер комнаты", "reverb_width": "Ширина реверберации", "reverb_wet": "Уровень влажности", "reverb_dry": "Уровень сухости", "reverb_damping": "Уровень демпфирования", "chorus_tab": "Хорус", "chorus_rate": "Скорость хоруса", "chorus_depth": "Глубина хоруса", "chorus_delay": "Задержка центра (мс)", "chorus_feedback": "Обратная связь", "chorus_mix": "Смешение", # Обработка "processing_tab": "Обработка", "compressor_tab": "Компрессор", "comp_ratio": "Соотношение", "comp_threshold": "Порог", "comp_attack": "Время атаки (мс)", "comp_release": "Время спада (мс)", "noise_gate_tab": "Подавление шума", "gate_threshold": "Порог", "gate_ratio": "Соотношение", "gate_attack": "Время атаки (мс)", "gate_release": "Время спада (мс)", # Статусы "start_processing": "Начало обработки...", "separation": "Разделение на музыку и вокал...", "extracting": "Извлечение лид/бэк-вокала...", "cleaning": "Очистка вокалов...", "converting": "Преобразование вокалов...", "mixing": "Сведение итогового кавера...", # Ошибки "error_audio_model": "Сначала загрузите аудио и выберите модель", "error_audio": "Сначала загрузите аудио", "error_model": "Сначала выберите модель", "error_generate_first": "Сначала сгенерируйте кавер хотя бы один раз!", "vbach_required": "Vbach (RVC) не установлен. В блокноте запустите ячейку 'Установка' с флагом install_vbach [✓]" }, "en": { # General elements "model_name_label": "Model name", "update_button": "Update", "input_audio_label": "Input song", "generate_btn": "Generate cover", "remix_btn": "Remix cover", "final_result": "Final result", "intermediate_files": "Intermediate files", "status_label": "Status", "tab_separation": "Separation", "tab_voice_settings": "Voice conversion settings", "tab_mixing_settings": "Audio mixing settings", # Separation "preclear_vocals": "Clear vocals from reverb/echo", "split_vocals": "Split vocals into lead/backing", "vocal_model": "Vocal model", "dereverb_model": "Dereverb/Deecho model", "karaoke_model": "Karaoke model", # Voice conversion "conversion_mode": "Conversion mode", "conversion_info": "lead - lead vocals only\nback - backing vocals only\nlead/back - both vocals\nfull - full vocals", "vocal_pitch": "Vocal pitch", "backing_pitch": "Backing vocals pitch", "pitch_method": "Pitch extraction method", "max_pitch": "Max pitch detection frequency", "index_rate": "Index rate", "filter_radius": "Filter radius", "rms_envelope": "RMS envelope", "protect_cons": "Protect consonants", "hop_length": "Hop length", # Mixing "volume_adjust": "Volume adjustment", "vocals_gain": "Vocals", "backing_gain": "Backing vocals", "inst_gain": "Instrumental", "output_format": "Output format", "add_unconv": "Add unconverted vocals to instrumental", "add_effects": "Apply effects to vocals", # Effects "effects_tab": "Effects", "echo_tab": "Echo", "echo_delay": "Delay time (sec)", "echo_feedback": "Feedback", "echo_mix": "Mix", "reverb_tab": "Reverb", "reverb_size": "Room size", "reverb_width": "Reverb width", "reverb_wet": "Wet level", "reverb_dry": "Dry level", "reverb_damping": "Damping", "chorus_tab": "Chorus", "chorus_rate": "Rate (Hz)", "chorus_depth": "Depth", "chorus_delay": "Center delay (ms)", "chorus_feedback": "Feedback", "chorus_mix": "Mix", # Processing "processing_tab": "Processing", "compressor_tab": "Compressor", "comp_ratio": "Ratio", "comp_threshold": "Threshold (dB)", "comp_attack": "Attack time (ms)", "comp_release": "Release time (ms)", "noise_gate_tab": "Noise Gate", "gate_threshold": "Threshold (dB)", "gate_ratio": "Ratio", "gate_attack": "Attack time (ms)", "gate_release": "Release time (ms)", # Statuses "start_processing": "Starting processing...", "separation": "Separating music and vocals...", "extracting": "Extracting lead/backing vocals...", "cleaning": "Cleaning vocals...", "converting": "Converting vocals...", "mixing": "Mixing final cover...", # Errors "error_audio_model": "Please upload audio and select model first", "error_audio": "Please upload audio first", "error_model": "Please select model first", "error_generate_first": "Generate a cover at least once first!", "vbach_required": "Vbach (RVC) is not installed. In the notebook, run the 'Установка' cell with the flag install_vbach [✓]" } } CURRENT_LANG = "ru" def set_language(lang): global CURRENT_LANG CURRENT_LANG = lang def t(key): """Функция для получения перевода""" return TRANSLATIONS[CURRENT_LANG].get(key, key) def list_models(category, model_type=None): list_models = [] if not model_type: for m_type in list(mvsepless_models.keys()): for m_name in list(mvsepless_models[m_type].keys()): if mvsepless_models[m_type][m_name]["category"] in category: list_models.append(f"{m_type} / {m_name}") else: for m_type in model_type: for m_name in list(mvsepless_models[m_type].keys()): if mvsepless_models[m_type][m_name]["category"] in category: list_models.append(f"{m_type} / {m_name}") return list_models def find_file_from_stem(results, stem_names=["Vocals", "vocals"]): for stem_name, stem_file in results: if stem_name in stem_names: stem_path = stem_file return stem_path def mix_and_save( inst_path, list_vocals, converted_vocals_list, mix_params, params, rvc_params, temp_dir, input_audio ): # Mixing final_audio = None samplerate = 44100 # Load instrumental if inst_path and os.path.exists(inst_path): inst_data, samplerate = librosa.load(inst_path, mono=False, sr=None, dtype='float32') if inst_data.ndim == 1: inst_data = np.expand_dims(inst_data, axis=0) # (1, n) mono → (1, n) inst_gain = 10 ** (mix_params["gain"]["instrum"] / 20.0) inst_data *= inst_gain final_audio = inst_data.copy() # Add unconverted vocals if requested if mix_params["add_unconverted_vocals_to_instrumental"]: if params["conversion_mode"] == "lead" and list_vocals[1][1]: back_vocals, _ = librosa.load(list_vocals[1][1], mono=False, sr=samplerate, dtype='float32') if back_vocals.ndim == 1: back_vocals = np.expand_dims(back_vocals, axis=0) final_audio = back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) if final_audio is None else final_audio + back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) elif params["conversion_mode"] == "back" and list_vocals[2][1]: lead_vocals, _ = librosa.load(list_vocals[2][1], mono=False, sr=samplerate, dtype='float32') if lead_vocals.ndim == 1: lead_vocals = np.expand_dims(lead_vocals, axis=0) final_audio = lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) if final_audio is None else final_audio + lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) # Process converted vocals for i, vocal_path in enumerate(converted_vocals_list): if not vocal_path or not os.path.exists(vocal_path): continue vocal_data, sr = librosa.load(vocal_path, mono=False, sr=None, dtype='float32') if vocal_data.ndim == 1: vocal_data = np.expand_dims(vocal_data, axis=0) # Resample if needed if sr != samplerate: audio_segment = AudioSegment.from_wav(vocal_path) audio_segment = audio_segment.set_frame_rate(samplerate) samples = np.array(audio_segment.get_array_of_samples()) channels = audio_segment.channels samples = samples.astype(np.float32) max_value = 2 ** (8 * audio_segment.sample_width - 1) samples /= max_value vocal_data = samples.reshape((-1, channels)).T # shape: (channels, n_samples) # Apply effects if mix_params.get("use_effects", False): board = Pedalboard() effects = mix_params.get("pedalboard_settings", {}) if "compressor" in effects: comp = effects["compressor"] board.append(Compressor( ratio=comp["ratio"], threshold_db=comp["threshold"], attack_ms=comp["attack"], release_ms=comp["release"] )) if "noise_gate" in effects: ng = effects["noise_gate"] board.append(NoiseGate( threshold_db=ng["threshold"], ratio=ng["ratio"], attack_ms=ng["attack"], release_ms=ng["release"] )) if "echo" in effects: echo = effects["echo"] board.append(Delay( delay_seconds=echo["delay"], feedback=echo["feedback"], mix=echo["mix"] )) if "reverb" in effects: rev = effects["reverb"] board.append(Reverb( room_size=rev["room_size"], dry_level=rev["dry"], wet_level=rev["wet"], damping=rev["damping"], width=rev["width"] )) if "chorus" in effects: chorus = effects["chorus"] board.append(Chorus( rate_hz=chorus["rate"], depth=chorus["depth"], centre_delay_ms=chorus["center_delay"], feedback=chorus["feedback"], mix=chorus["mix"] )) vocal_data = board(vocal_data, samplerate) # Apply gain gain_db = mix_params["gain"]["vocals1"] if i == 0 else mix_params["gain"]["vocals2"] vocal_data *= 10 ** (gain_db / 20.0) # Mix vocals into final if final_audio is None: final_audio = vocal_data.copy() else: # Ensure equal shape min_len = min(final_audio.shape[1], vocal_data.shape[1]) final_audio = final_audio[:, :min_len] + vocal_data[:, :min_len] # Save output max_amplitude = np.max(np.abs(final_audio)) if max_amplitude > 0: normalization_factor = 1.0 / max_amplitude final_audio = final_audio * normalization_factor filename = f"{rvc_params['model_name']} - {os.path.splitext(os.path.basename(input_audio))[0]}.{params['output_format']}" if input_audio else f"remixed.{params['output_format']}" final_path = os.path.join(temp_dir, filename) write_audio_file(final_path, final_audio, samplerate, params['output_format'], "320k") return final_path def gen_cover( input_audio, anti_instrum_model, karaoke_model, dereverb_model, output_format, karaoke_check, conversion_mode, preclear_vocals_check, voice_name, pitch1_val, pitch2_val, method_pitch, index_rate, fr, rms, protect, hop_mangio_crepe, f0_max, unconv_vocals_check, use_effects, instrumental_gain, vocal1_gain, vocal2_gain, echo_delay, echo_feedback, echo_mix, reverb_rm_size, reverb_width, reverb_wet, reverb_dry, reverb_damping, chorus_rate_hz, chorus_depth, chorus_centre_delay_ms, chorus_feedback, chorus_mix, compressor_ratio, compressor_threshold, compressor_attack, compressor_release, noise_gate_threshold, noise_gate_ratio, noise_gate_attack, noise_gate_release ): global saved_processing_data if not input_audio and not voice_name: raise gr.Error(t("error_audio_model")) if not input_audio: raise gr.Error(t("error_audio")) if not voice_name: raise gr.Error(t("error_model")) # Собираем параметры в структуры models = [ anti_instrum_model, karaoke_model, dereverb_model ] params = { "output_format": output_format, "extract_karaoke": karaoke_check, "conversion_mode": conversion_mode, "preclear_vocals": preclear_vocals_check } rvc_params = { "model_name": voice_name, "pitch1": pitch1_val, "pitch2": pitch2_val, "f0_method": method_pitch, 'index_rate': index_rate, 'filter_radius': fr, 'rms': rms, 'protect': protect, 'hop_length': hop_mangio_crepe, 'f0_max': f0_max } mix_params = { "add_unconverted_vocals_to_instrumental": unconv_vocals_check, "use_effects": use_effects, "gain": { "instrum": instrumental_gain, "vocals1": vocal1_gain, "vocals2": vocal2_gain }, "pedalboard_settings": { "echo": { "delay": echo_delay, "feedback": echo_feedback, "mix": echo_mix }, "reverb": { "room_size": reverb_rm_size, "wet": reverb_wet, "dry": reverb_dry, "damping": reverb_damping, "width": reverb_width, }, "compressor": { "ratio": compressor_ratio, "threshold": compressor_threshold, "attack": compressor_attack, "release": compressor_release }, "noise_gate": { "threshold": noise_gate_threshold, "ratio": noise_gate_ratio, "attack": noise_gate_attack, "release": noise_gate_release, }, "chorus": { "rate": chorus_rate_hz, "depth": chorus_depth, "center_delay": chorus_centre_delay_ms, "feedback": chorus_feedback, "mix": chorus_mix } } } progress = gr.Progress() progress(0, desc=t("start_processing")) generated_files = [] converted_vocals_list = [] temp_dir = tempfile.mkdtemp() inst_model = models[0] kar_model = models[1] dereverb_model = models[2] progress(0.1, desc=t("separation")) # Separate vocals and instrumental inst_output = single_multi_inference(input_audio, os.path.join(temp_dir, "inst_output"), inst_model.split(" / ")[0], inst_model.split(" / ")[1], True, vr_aggr=5, output_format="wav", output_bitrate="320k", template="VbachGen_NAME_STEM", call_method="cli", selected_stems=[]) inst_file = ("instrumental", find_file_from_stem(inst_output, ["Instrumental", "instrumental", "other", "Other"])) full_vocals_file = ("full_vocals", find_file_from_stem(inst_output, ["Vocals", "vocals"])) progress(0.2, desc=t("extracting")) back_vocals_file = (None, None) lead_vocals_file = (None, None) if params["extract_karaoke"] == True: karaoke_output = single_multi_inference(full_vocals_file[1], os.path.join(temp_dir, "kar_output"), kar_model.split(" / ")[0], kar_model.split(" / ")[1], True, vr_aggr=5, output_format="wav", output_bitrate="320k", template="NAME_MODEL_STEM", call_method="cli", selected_stems=[]) back_vocals_file = ("back_vocals", find_file_from_stem(karaoke_output, ["Instrumental", "instrumental", "other", "Other"])) lead_vocals_file = ("lead_vocals", find_file_from_stem(karaoke_output, ["Vocals", "vocals", "karaoke"])) list_vocals = [ full_vocals_file, back_vocals_file if params["extract_karaoke"] else (None, None), lead_vocals_file if params["extract_karaoke"] else (None, None) ] clear_list_vocals = [ full_vocals_file, back_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["back", "lead/back"] else (None, None), lead_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["lead", "lead/back"] else (None, None) ] progress(0.3, desc=t("cleaning")) # Pre-clear vocals if needed cleared_vocals = [] if params["preclear_vocals"] == True: for i, (name, file) in enumerate(clear_list_vocals): if file and os.path.exists(file): clear_output = single_multi_inference(file, os.path.join(temp_dir, f"cleared_output_{i}"), dereverb_model.split(" / ")[0], dereverb_model.split(" / ")[1], True, vr_aggr=5, output_format="wav", output_bitrate="320k", template="NAME_STEM", call_method="cli", selected_stems=[]) cleared_file = find_file_from_stem(clear_output, ["No Echo", "No Reverb", "Dry", "Other"]) cleared_vocals.append((name, cleared_file)) for i, voc in enumerate(list_vocals): for clear_voc in cleared_vocals: if clear_voc[0] == voc[0]: list_vocals[i] = clear_voc break progress(0.5, desc=t("converting")) # Vocal conversion if params["conversion_mode"] == "full" and list_vocals[0][1]: full_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "full_vocals_converted.wav") cmd = f"python -m vbach.cli.vbach '{list_vocals[0][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'full_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" os.system(cmd) converted_vocals_list.append(full_vocals_converted_path) elif params["conversion_mode"] == "lead/back" and list_vocals[1][1] and list_vocals[2][1]: lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" os.system(cmd) back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" os.system(cmd) converted_vocals_list.append(back_vocals_converted_path) converted_vocals_list.append(lead_vocals_converted_path) elif params["conversion_mode"] == "back" and list_vocals[1][1]: back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['protect']} --protect {rvc_params['rms']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" os.system(cmd) converted_vocals_list.append(back_vocals_converted_path) elif params["conversion_mode"] == "lead" and list_vocals[2][1]: lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" os.system(cmd) converted_vocals_list.append(lead_vocals_converted_path) # Prepare files list generated_files.append(inst_file[1]) for name, file in list_vocals: if file: generated_files.append(file) generated_files.extend(converted_vocals_list) # Сохраняем данные для возможного пересведения saved_processing_data = { "inst_path": inst_file[1], "list_vocals": list_vocals, "converted_vocals_list": converted_vocals_list, "params": params, "rvc_params": rvc_params, "input_audio": input_audio } progress(0.9, desc=t("mixing")) # Сведение final_path = mix_and_save( inst_file[1], list_vocals, converted_vocals_list, mix_params, params, rvc_params, temp_dir, input_audio ) generated_files.append(final_path) return generated_files, final_path def remix_cover( use_effects, instrumental_gain, vocal1_gain, vocal2_gain, echo_delay, echo_feedback, echo_mix, reverb_rm_size, reverb_width, reverb_wet, reverb_dry, reverb_damping, chorus_rate_hz, chorus_depth, chorus_centre_delay_ms, chorus_feedback, chorus_mix, compressor_ratio, compressor_threshold, compressor_attack, compressor_release, noise_gate_threshold, noise_gate_ratio, noise_gate_attack, noise_gate_release ): global saved_processing_data if not saved_processing_data: raise gr.Error(t("error_generate_first")) # Восстанавливаем данные из сохраненной сессии data = saved_processing_data temp_dir = tempfile.mkdtemp() # Подготавливаем параметры для сведения mix_params = { "add_unconverted_vocals_to_instrumental": True, # Всегда используем оригинальные вокалы "use_effects": use_effects, "gain": { "instrum": instrumental_gain, "vocals1": vocal1_gain, "vocals2": vocal2_gain }, "pedalboard_settings": { "echo": { "delay": echo_delay, "feedback": echo_feedback, "mix": echo_mix }, "reverb": { "room_size": reverb_rm_size, "wet": reverb_wet, "dry": reverb_dry, "damping": reverb_damping, "width": reverb_width, }, "compressor": { "ratio": compressor_ratio, "threshold": compressor_threshold, "attack": compressor_attack, "release": compressor_release }, "noise_gate": { "threshold": noise_gate_threshold, "ratio": noise_gate_ratio, "attack": noise_gate_attack, "release": noise_gate_release, }, "chorus": { "rate": chorus_rate_hz, "depth": chorus_depth, "center_delay": chorus_centre_delay_ms, "feedback": chorus_feedback, "mix": chorus_mix } } } # Запускаем только этап сведения final_path = mix_and_save( data["inst_path"], data["list_vocals"], data["converted_vocals_list"], mix_params, data["params"], data["rvc_params"], temp_dir, data["input_audio"] ) return final_path def vbach_plugin_name(): return "VbachGen" def vbachgen_plugin(lang): set_language(lang) with gr.Blocks(): if os.path.exists("vbach"): with gr.Row(equal_height=False, variant="panel"): with gr.Column(): model_name = gr.Dropdown(label=t("model_name_label"), interactive=True, filterable=False, scale=6) model_update_btn = gr.Button(t("update_button"), variant="primary", scale=3, size="lg") with gr.Row(min_height=150): input_audio = gr.File(label=t("input_audio_label"), interactive=True, type="filepath", file_count="single") with gr.Row(): with gr.Column(): with gr.Tab(t("tab_separation")): preclear_vocals_check = gr.Checkbox(label=t("preclear_vocals"), value=False) karaoke_check = gr.Checkbox(label=t("split_vocals"), value=False) with gr.Column(variant="panel"): with gr.Group() as extract_vocals_group: anti_instrum_model = gr.Dropdown( label=t("vocal_model"), choices=list_models(["Инструментал", "Вокал", "Инструментал и вокал"], ["mel_band_roformer", "bs_roformer", "mdx23c", "mdx", "htdemucs"]), interactive=True, filterable=False ) with gr.Group(visible=False) as deecho_group: dereverb_model = gr.Dropdown( label=t("dereverb_model"), choices=list_models(["Реверб и эхо", "Реверб", "Эхо"], ["vr"]), interactive=True, filterable=False ) with gr.Group(visible=False) as karaoke_group: karaoke_model = gr.Dropdown( label=t("karaoke_model"), choices=list_models(["Караоке"]), interactive=True, filterable=False ) with gr.Tab(t("tab_voice_settings")): conversion_mode = gr.Dropdown( label=t("conversion_mode"), choices=["lead", "back", "lead/back", "full"], value="full", filterable=False, visible=False, info=t("conversion_info") ) with gr.Row(): pitch1 = gr.Slider(-48, 48, value=0, step=12, label=t("vocal_pitch"), interactive=True) pitch2 = gr.Slider(-48, 48, value=0, step=12, label=t("backing_pitch"), visible=False, interactive=True) with gr.Row(): method_pitch = gr.Dropdown( label=t("pitch_method"), choices=["mangio-crepe", "rmvpe+", "fcpe"], value="rmvpe+", interactive=True, filterable=False ) f0_max = gr.Slider(50, 2000, value=1100, step=50, label=t("max_pitch"), interactive=True) with gr.Row(): with gr.Column(scale=1): index_rate = gr.Slider(0, 1, value=0, step=0.1, label=t("index_rate"), interactive=True) fr = gr.Slider(0, 7, value=3, step=1, label=t("filter_radius"), interactive=True) with gr.Column(scale=1): rms = gr.Slider(0, 1, value=0.25, step=0.05, label=t("rms_envelope"), interactive=True) protect = gr.Slider(minimum=0, maximum=0.5, step=0.01, value=0.33, label=t("protect_cons"), interactive=True) hop_mangio_crepe = gr.Slider(1, 512, value=128, step=1, label=t("hop_length"), interactive=True, visible=False) with gr.Tab(t("tab_mixing_settings")): gr.Markdown(f"