mvsepless_plugins / vbachgen.py
noblebarkrr's picture
Аналог AICoverGen для MVSepLess и Vbach
9aa1213 verified
import os
import gradio as gr
import soundfile as sf
import librosa
import numpy as np
from pydub import AudioSegment
from pedalboard import Pedalboard, Compressor, Reverb, Delay, NoiseGate, Chorus
from pedalboard.io import AudioFile
from separator.audio_writer import write_audio_file
import tempfile
from model_list import models_data as mvsepless_models
from multi_inference import single_multi_inference
RVC_MODELS_DIR = os.path.join(os.getcwd(), "voice_models")
OUTPUT_FORMAT = ["mp3", "wav", "flac", "aiff", "m4a", "aac", "ogg", "opus"]
# Глобальная переменная для хранения промежуточных файлов
saved_processing_data = None
# Словари переводов
TRANSLATIONS = {
"ru": {
# Общие элементы
"model_name_label": "Имя модели",
"update_button": "Обновить",
"input_audio_label": "Входная песня",
"generate_btn": "Сгенерировать кавер",
"remix_btn": "Пересвести кавер",
"final_result": "Финальный результат",
"intermediate_files": "Промежуточные файлы",
"status_label": "Статус",
"tab_separation": "Разделение",
"tab_voice_settings": "Настройки преобразования голоса",
"tab_mixing_settings": "Настройки сведения аудио",
# Разделение
"preclear_vocals": "Очистить вокал от реверба/эха",
"split_vocals": "Разделить вокал на лид/бэк-вокалы",
"vocal_model": "Вокальная модель",
"dereverb_model": "Dereverb/Deecho модель",
"karaoke_model": "Караоке модель",
# Преобразование голоса
"conversion_mode": "Режим преобразования",
"conversion_info": "lead - только основной вокал\nback - только бэк-вокал\nlead/back - основной и бэк-вокалы\nfull - весь вокал",
"vocal_pitch": "Высота тона вокала",
"backing_pitch": "Высота тона бэк-вокала",
"pitch_method": "Метод извлечения тона",
"max_pitch": "Верхний лимит определения высоты тона",
"index_rate": "Влияние индекса",
"filter_radius": "Радиус фильтра",
"rms_envelope": "Огибающая громкости",
"protect_cons": "Защита согласных",
"hop_length": "Длина шага",
# Сведение
"volume_adjust": "Изменение громкости",
"vocals_gain": "Вокал",
"backing_gain": "Бэк-вокал",
"inst_gain": "Инструментал",
"output_format": "Формат вывода",
"add_unconv": "Добавить к инструменталу непреобразованный вокал",
"add_effects": "Добавить эффекты на голос",
# Эффекты
"effects_tab": "Эффекты",
"echo_tab": "Эхо",
"echo_delay": "Время задержки (сек)",
"echo_feedback": "Обратная связь",
"echo_mix": "Смешение",
"reverb_tab": "Реверберация",
"reverb_size": "Размер комнаты",
"reverb_width": "Ширина реверберации",
"reverb_wet": "Уровень влажности",
"reverb_dry": "Уровень сухости",
"reverb_damping": "Уровень демпфирования",
"chorus_tab": "Хорус",
"chorus_rate": "Скорость хоруса",
"chorus_depth": "Глубина хоруса",
"chorus_delay": "Задержка центра (мс)",
"chorus_feedback": "Обратная связь",
"chorus_mix": "Смешение",
# Обработка
"processing_tab": "Обработка",
"compressor_tab": "Компрессор",
"comp_ratio": "Соотношение",
"comp_threshold": "Порог",
"comp_attack": "Время атаки (мс)",
"comp_release": "Время спада (мс)",
"noise_gate_tab": "Подавление шума",
"gate_threshold": "Порог",
"gate_ratio": "Соотношение",
"gate_attack": "Время атаки (мс)",
"gate_release": "Время спада (мс)",
# Статусы
"start_processing": "Начало обработки...",
"separation": "Разделение на музыку и вокал...",
"extracting": "Извлечение лид/бэк-вокала...",
"cleaning": "Очистка вокалов...",
"converting": "Преобразование вокалов...",
"mixing": "Сведение итогового кавера...",
# Ошибки
"error_audio_model": "Сначала загрузите аудио и выберите модель",
"error_audio": "Сначала загрузите аудио",
"error_model": "Сначала выберите модель",
"error_generate_first": "Сначала сгенерируйте кавер хотя бы один раз!",
"vbach_required": "Vbach (RVC) не установлен. В блокноте запустите ячейку 'Установка' с флагом install_vbach [✓]"
},
"en": {
# General elements
"model_name_label": "Model name",
"update_button": "Update",
"input_audio_label": "Input song",
"generate_btn": "Generate cover",
"remix_btn": "Remix cover",
"final_result": "Final result",
"intermediate_files": "Intermediate files",
"status_label": "Status",
"tab_separation": "Separation",
"tab_voice_settings": "Voice conversion settings",
"tab_mixing_settings": "Audio mixing settings",
# Separation
"preclear_vocals": "Clear vocals from reverb/echo",
"split_vocals": "Split vocals into lead/backing",
"vocal_model": "Vocal model",
"dereverb_model": "Dereverb/Deecho model",
"karaoke_model": "Karaoke model",
# Voice conversion
"conversion_mode": "Conversion mode",
"conversion_info": "lead - lead vocals only\nback - backing vocals only\nlead/back - both vocals\nfull - full vocals",
"vocal_pitch": "Vocal pitch",
"backing_pitch": "Backing vocals pitch",
"pitch_method": "Pitch extraction method",
"max_pitch": "Max pitch detection frequency",
"index_rate": "Index rate",
"filter_radius": "Filter radius",
"rms_envelope": "RMS envelope",
"protect_cons": "Protect consonants",
"hop_length": "Hop length",
# Mixing
"volume_adjust": "Volume adjustment",
"vocals_gain": "Vocals",
"backing_gain": "Backing vocals",
"inst_gain": "Instrumental",
"output_format": "Output format",
"add_unconv": "Add unconverted vocals to instrumental",
"add_effects": "Apply effects to vocals",
# Effects
"effects_tab": "Effects",
"echo_tab": "Echo",
"echo_delay": "Delay time (sec)",
"echo_feedback": "Feedback",
"echo_mix": "Mix",
"reverb_tab": "Reverb",
"reverb_size": "Room size",
"reverb_width": "Reverb width",
"reverb_wet": "Wet level",
"reverb_dry": "Dry level",
"reverb_damping": "Damping",
"chorus_tab": "Chorus",
"chorus_rate": "Rate (Hz)",
"chorus_depth": "Depth",
"chorus_delay": "Center delay (ms)",
"chorus_feedback": "Feedback",
"chorus_mix": "Mix",
# Processing
"processing_tab": "Processing",
"compressor_tab": "Compressor",
"comp_ratio": "Ratio",
"comp_threshold": "Threshold (dB)",
"comp_attack": "Attack time (ms)",
"comp_release": "Release time (ms)",
"noise_gate_tab": "Noise Gate",
"gate_threshold": "Threshold (dB)",
"gate_ratio": "Ratio",
"gate_attack": "Attack time (ms)",
"gate_release": "Release time (ms)",
# Statuses
"start_processing": "Starting processing...",
"separation": "Separating music and vocals...",
"extracting": "Extracting lead/backing vocals...",
"cleaning": "Cleaning vocals...",
"converting": "Converting vocals...",
"mixing": "Mixing final cover...",
# Errors
"error_audio_model": "Please upload audio and select model first",
"error_audio": "Please upload audio first",
"error_model": "Please select model first",
"error_generate_first": "Generate a cover at least once first!",
"vbach_required": "Vbach (RVC) is not installed. In the notebook, run the 'Установка' cell with the flag install_vbach [✓]"
}
}
CURRENT_LANG = "ru"
def set_language(lang):
global CURRENT_LANG
CURRENT_LANG = lang
def t(key):
"""Функция для получения перевода"""
return TRANSLATIONS[CURRENT_LANG].get(key, key)
def list_models(category, model_type=None):
list_models = []
if not model_type:
for m_type in list(mvsepless_models.keys()):
for m_name in list(mvsepless_models[m_type].keys()):
if mvsepless_models[m_type][m_name]["category"] in category:
list_models.append(f"{m_type} / {m_name}")
else:
for m_type in model_type:
for m_name in list(mvsepless_models[m_type].keys()):
if mvsepless_models[m_type][m_name]["category"] in category:
list_models.append(f"{m_type} / {m_name}")
return list_models
def find_file_from_stem(results, stem_names=["Vocals", "vocals"]):
for stem_name, stem_file in results:
if stem_name in stem_names:
stem_path = stem_file
return stem_path
def mix_and_save(
inst_path,
list_vocals,
converted_vocals_list,
mix_params,
params,
rvc_params,
temp_dir,
input_audio
):
# Mixing
final_audio = None
samplerate = 44100
# Load instrumental
if inst_path and os.path.exists(inst_path):
inst_data, samplerate = librosa.load(inst_path, mono=False, sr=None, dtype='float32')
if inst_data.ndim == 1:
inst_data = np.expand_dims(inst_data, axis=0) # (1, n) mono → (1, n)
inst_gain = 10 ** (mix_params["gain"]["instrum"] / 20.0)
inst_data *= inst_gain
final_audio = inst_data.copy()
# Add unconverted vocals if requested
if mix_params["add_unconverted_vocals_to_instrumental"]:
if params["conversion_mode"] == "lead" and list_vocals[1][1]:
back_vocals, _ = librosa.load(list_vocals[1][1], mono=False, sr=samplerate, dtype='float32')
if back_vocals.ndim == 1:
back_vocals = np.expand_dims(back_vocals, axis=0)
final_audio = back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) if final_audio is None else final_audio + back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0))
elif params["conversion_mode"] == "back" and list_vocals[2][1]:
lead_vocals, _ = librosa.load(list_vocals[2][1], mono=False, sr=samplerate, dtype='float32')
if lead_vocals.ndim == 1:
lead_vocals = np.expand_dims(lead_vocals, axis=0)
final_audio = lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) if final_audio is None else final_audio + lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0))
# Process converted vocals
for i, vocal_path in enumerate(converted_vocals_list):
if not vocal_path or not os.path.exists(vocal_path):
continue
vocal_data, sr = librosa.load(vocal_path, mono=False, sr=None, dtype='float32')
if vocal_data.ndim == 1:
vocal_data = np.expand_dims(vocal_data, axis=0)
# Resample if needed
if sr != samplerate:
audio_segment = AudioSegment.from_wav(vocal_path)
audio_segment = audio_segment.set_frame_rate(samplerate)
samples = np.array(audio_segment.get_array_of_samples())
channels = audio_segment.channels
samples = samples.astype(np.float32)
max_value = 2 ** (8 * audio_segment.sample_width - 1)
samples /= max_value
vocal_data = samples.reshape((-1, channels)).T # shape: (channels, n_samples)
# Apply effects
if mix_params.get("use_effects", False):
board = Pedalboard()
effects = mix_params.get("pedalboard_settings", {})
if "compressor" in effects:
comp = effects["compressor"]
board.append(Compressor(
ratio=comp["ratio"],
threshold_db=comp["threshold"],
attack_ms=comp["attack"],
release_ms=comp["release"]
))
if "noise_gate" in effects:
ng = effects["noise_gate"]
board.append(NoiseGate(
threshold_db=ng["threshold"],
ratio=ng["ratio"],
attack_ms=ng["attack"],
release_ms=ng["release"]
))
if "echo" in effects:
echo = effects["echo"]
board.append(Delay(
delay_seconds=echo["delay"],
feedback=echo["feedback"],
mix=echo["mix"]
))
if "reverb" in effects:
rev = effects["reverb"]
board.append(Reverb(
room_size=rev["room_size"],
dry_level=rev["dry"],
wet_level=rev["wet"],
damping=rev["damping"],
width=rev["width"]
))
if "chorus" in effects:
chorus = effects["chorus"]
board.append(Chorus(
rate_hz=chorus["rate"],
depth=chorus["depth"],
centre_delay_ms=chorus["center_delay"],
feedback=chorus["feedback"],
mix=chorus["mix"]
))
vocal_data = board(vocal_data, samplerate)
# Apply gain
gain_db = mix_params["gain"]["vocals1"] if i == 0 else mix_params["gain"]["vocals2"]
vocal_data *= 10 ** (gain_db / 20.0)
# Mix vocals into final
if final_audio is None:
final_audio = vocal_data.copy()
else:
# Ensure equal shape
min_len = min(final_audio.shape[1], vocal_data.shape[1])
final_audio = final_audio[:, :min_len] + vocal_data[:, :min_len]
# Save output
max_amplitude = np.max(np.abs(final_audio))
if max_amplitude > 0:
normalization_factor = 1.0 / max_amplitude
final_audio = final_audio * normalization_factor
filename = f"{rvc_params['model_name']} - {os.path.splitext(os.path.basename(input_audio))[0]}.{params['output_format']}" if input_audio else f"remixed.{params['output_format']}"
final_path = os.path.join(temp_dir, filename)
write_audio_file(final_path, final_audio, samplerate, params['output_format'], "320k")
return final_path
def gen_cover(
input_audio,
anti_instrum_model,
karaoke_model,
dereverb_model,
output_format,
karaoke_check,
conversion_mode,
preclear_vocals_check,
voice_name,
pitch1_val,
pitch2_val,
method_pitch,
index_rate,
fr,
rms,
protect,
hop_mangio_crepe,
f0_max,
unconv_vocals_check,
use_effects,
instrumental_gain,
vocal1_gain,
vocal2_gain,
echo_delay,
echo_feedback,
echo_mix,
reverb_rm_size,
reverb_width,
reverb_wet,
reverb_dry,
reverb_damping,
chorus_rate_hz,
chorus_depth,
chorus_centre_delay_ms,
chorus_feedback,
chorus_mix,
compressor_ratio,
compressor_threshold,
compressor_attack,
compressor_release,
noise_gate_threshold,
noise_gate_ratio,
noise_gate_attack,
noise_gate_release
):
global saved_processing_data
if not input_audio and not voice_name:
raise gr.Error(t("error_audio_model"))
if not input_audio:
raise gr.Error(t("error_audio"))
if not voice_name:
raise gr.Error(t("error_model"))
# Собираем параметры в структуры
models = [
anti_instrum_model,
karaoke_model,
dereverb_model
]
params = {
"output_format": output_format,
"extract_karaoke": karaoke_check,
"conversion_mode": conversion_mode,
"preclear_vocals": preclear_vocals_check
}
rvc_params = {
"model_name": voice_name,
"pitch1": pitch1_val,
"pitch2": pitch2_val,
"f0_method": method_pitch,
'index_rate': index_rate,
'filter_radius': fr,
'rms': rms,
'protect': protect,
'hop_length': hop_mangio_crepe,
'f0_max': f0_max
}
mix_params = {
"add_unconverted_vocals_to_instrumental": unconv_vocals_check,
"use_effects": use_effects,
"gain": {
"instrum": instrumental_gain,
"vocals1": vocal1_gain,
"vocals2": vocal2_gain
},
"pedalboard_settings": {
"echo": {
"delay": echo_delay,
"feedback": echo_feedback,
"mix": echo_mix
},
"reverb": {
"room_size": reverb_rm_size,
"wet": reverb_wet,
"dry": reverb_dry,
"damping": reverb_damping,
"width": reverb_width,
},
"compressor": {
"ratio": compressor_ratio,
"threshold": compressor_threshold,
"attack": compressor_attack,
"release": compressor_release
},
"noise_gate": {
"threshold": noise_gate_threshold,
"ratio": noise_gate_ratio,
"attack": noise_gate_attack,
"release": noise_gate_release,
},
"chorus": {
"rate": chorus_rate_hz,
"depth": chorus_depth,
"center_delay": chorus_centre_delay_ms,
"feedback": chorus_feedback,
"mix": chorus_mix
}
}
}
progress = gr.Progress()
progress(0, desc=t("start_processing"))
generated_files = []
converted_vocals_list = []
temp_dir = tempfile.mkdtemp()
inst_model = models[0]
kar_model = models[1]
dereverb_model = models[2]
progress(0.1, desc=t("separation"))
# Separate vocals and instrumental
inst_output = single_multi_inference(input_audio, os.path.join(temp_dir, "inst_output"),
inst_model.split(" / ")[0], inst_model.split(" / ")[1],
True, vr_aggr=5, output_format="wav",
output_bitrate="320k", template="VbachGen_NAME_STEM",
call_method="cli", selected_stems=[])
inst_file = ("instrumental", find_file_from_stem(inst_output, ["Instrumental", "instrumental", "other", "Other"]))
full_vocals_file = ("full_vocals", find_file_from_stem(inst_output, ["Vocals", "vocals"]))
progress(0.2, desc=t("extracting"))
back_vocals_file = (None, None)
lead_vocals_file = (None, None)
if params["extract_karaoke"] == True:
karaoke_output = single_multi_inference(full_vocals_file[1], os.path.join(temp_dir, "kar_output"),
kar_model.split(" / ")[0], kar_model.split(" / ")[1],
True, vr_aggr=5, output_format="wav",
output_bitrate="320k", template="NAME_MODEL_STEM",
call_method="cli", selected_stems=[])
back_vocals_file = ("back_vocals", find_file_from_stem(karaoke_output, ["Instrumental", "instrumental", "other", "Other"]))
lead_vocals_file = ("lead_vocals", find_file_from_stem(karaoke_output, ["Vocals", "vocals", "karaoke"]))
list_vocals = [
full_vocals_file,
back_vocals_file if params["extract_karaoke"] else (None, None),
lead_vocals_file if params["extract_karaoke"] else (None, None)
]
clear_list_vocals = [
full_vocals_file,
back_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["back", "lead/back"] else (None, None),
lead_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["lead", "lead/back"] else (None, None)
]
progress(0.3, desc=t("cleaning"))
# Pre-clear vocals if needed
cleared_vocals = []
if params["preclear_vocals"] == True:
for i, (name, file) in enumerate(clear_list_vocals):
if file and os.path.exists(file):
clear_output = single_multi_inference(file, os.path.join(temp_dir, f"cleared_output_{i}"),
dereverb_model.split(" / ")[0], dereverb_model.split(" / ")[1],
True, vr_aggr=5, output_format="wav",
output_bitrate="320k", template="NAME_STEM",
call_method="cli", selected_stems=[])
cleared_file = find_file_from_stem(clear_output, ["No Echo", "No Reverb", "Dry", "Other"])
cleared_vocals.append((name, cleared_file))
for i, voc in enumerate(list_vocals):
for clear_voc in cleared_vocals:
if clear_voc[0] == voc[0]:
list_vocals[i] = clear_voc
break
progress(0.5, desc=t("converting"))
# Vocal conversion
if params["conversion_mode"] == "full" and list_vocals[0][1]:
full_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "full_vocals_converted.wav")
cmd = f"python -m vbach.cli.vbach '{list_vocals[0][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'full_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
os.system(cmd)
converted_vocals_list.append(full_vocals_converted_path)
elif params["conversion_mode"] == "lead/back" and list_vocals[1][1] and list_vocals[2][1]:
lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav")
cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
os.system(cmd)
back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav")
cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
os.system(cmd)
converted_vocals_list.append(back_vocals_converted_path)
converted_vocals_list.append(lead_vocals_converted_path)
elif params["conversion_mode"] == "back" and list_vocals[1][1]:
back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav")
cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['protect']} --protect {rvc_params['rms']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
os.system(cmd)
converted_vocals_list.append(back_vocals_converted_path)
elif params["conversion_mode"] == "lead" and list_vocals[2][1]:
lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav")
cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'"
os.system(cmd)
converted_vocals_list.append(lead_vocals_converted_path)
# Prepare files list
generated_files.append(inst_file[1])
for name, file in list_vocals:
if file:
generated_files.append(file)
generated_files.extend(converted_vocals_list)
# Сохраняем данные для возможного пересведения
saved_processing_data = {
"inst_path": inst_file[1],
"list_vocals": list_vocals,
"converted_vocals_list": converted_vocals_list,
"params": params,
"rvc_params": rvc_params,
"input_audio": input_audio
}
progress(0.9, desc=t("mixing"))
# Сведение
final_path = mix_and_save(
inst_file[1],
list_vocals,
converted_vocals_list,
mix_params,
params,
rvc_params,
temp_dir,
input_audio
)
generated_files.append(final_path)
return generated_files, final_path
def remix_cover(
use_effects,
instrumental_gain,
vocal1_gain,
vocal2_gain,
echo_delay,
echo_feedback,
echo_mix,
reverb_rm_size,
reverb_width,
reverb_wet,
reverb_dry,
reverb_damping,
chorus_rate_hz,
chorus_depth,
chorus_centre_delay_ms,
chorus_feedback,
chorus_mix,
compressor_ratio,
compressor_threshold,
compressor_attack,
compressor_release,
noise_gate_threshold,
noise_gate_ratio,
noise_gate_attack,
noise_gate_release
):
global saved_processing_data
if not saved_processing_data:
raise gr.Error(t("error_generate_first"))
# Восстанавливаем данные из сохраненной сессии
data = saved_processing_data
temp_dir = tempfile.mkdtemp()
# Подготавливаем параметры для сведения
mix_params = {
"add_unconverted_vocals_to_instrumental": True, # Всегда используем оригинальные вокалы
"use_effects": use_effects,
"gain": {
"instrum": instrumental_gain,
"vocals1": vocal1_gain,
"vocals2": vocal2_gain
},
"pedalboard_settings": {
"echo": {
"delay": echo_delay,
"feedback": echo_feedback,
"mix": echo_mix
},
"reverb": {
"room_size": reverb_rm_size,
"wet": reverb_wet,
"dry": reverb_dry,
"damping": reverb_damping,
"width": reverb_width,
},
"compressor": {
"ratio": compressor_ratio,
"threshold": compressor_threshold,
"attack": compressor_attack,
"release": compressor_release
},
"noise_gate": {
"threshold": noise_gate_threshold,
"ratio": noise_gate_ratio,
"attack": noise_gate_attack,
"release": noise_gate_release,
},
"chorus": {
"rate": chorus_rate_hz,
"depth": chorus_depth,
"center_delay": chorus_centre_delay_ms,
"feedback": chorus_feedback,
"mix": chorus_mix
}
}
}
# Запускаем только этап сведения
final_path = mix_and_save(
data["inst_path"],
data["list_vocals"],
data["converted_vocals_list"],
mix_params,
data["params"],
data["rvc_params"],
temp_dir,
data["input_audio"]
)
return final_path
def vbach_plugin_name():
return "VbachGen"
def vbachgen_plugin(lang):
set_language(lang)
with gr.Blocks():
if os.path.exists("vbach"):
with gr.Row(equal_height=False, variant="panel"):
with gr.Column():
model_name = gr.Dropdown(label=t("model_name_label"), interactive=True, filterable=False, scale=6)
model_update_btn = gr.Button(t("update_button"), variant="primary", scale=3, size="lg")
with gr.Row(min_height=150):
input_audio = gr.File(label=t("input_audio_label"), interactive=True, type="filepath", file_count="single")
with gr.Row():
with gr.Column():
with gr.Tab(t("tab_separation")):
preclear_vocals_check = gr.Checkbox(label=t("preclear_vocals"), value=False)
karaoke_check = gr.Checkbox(label=t("split_vocals"), value=False)
with gr.Column(variant="panel"):
with gr.Group() as extract_vocals_group:
anti_instrum_model = gr.Dropdown(
label=t("vocal_model"),
choices=list_models(["Инструментал", "Вокал", "Инструментал и вокал"], ["mel_band_roformer", "bs_roformer", "mdx23c", "mdx", "htdemucs"]),
interactive=True,
filterable=False
)
with gr.Group(visible=False) as deecho_group:
dereverb_model = gr.Dropdown(
label=t("dereverb_model"),
choices=list_models(["Реверб и эхо", "Реверб", "Эхо"], ["vr"]),
interactive=True,
filterable=False
)
with gr.Group(visible=False) as karaoke_group:
karaoke_model = gr.Dropdown(
label=t("karaoke_model"),
choices=list_models(["Караоке"]),
interactive=True,
filterable=False
)
with gr.Tab(t("tab_voice_settings")):
conversion_mode = gr.Dropdown(
label=t("conversion_mode"),
choices=["lead", "back", "lead/back", "full"],
value="full",
filterable=False,
visible=False,
info=t("conversion_info")
)
with gr.Row():
pitch1 = gr.Slider(-48, 48, value=0, step=12, label=t("vocal_pitch"), interactive=True)
pitch2 = gr.Slider(-48, 48, value=0, step=12, label=t("backing_pitch"), visible=False, interactive=True)
with gr.Row():
method_pitch = gr.Dropdown(
label=t("pitch_method"),
choices=["mangio-crepe", "rmvpe+", "fcpe"],
value="rmvpe+",
interactive=True,
filterable=False
)
f0_max = gr.Slider(50, 2000, value=1100, step=50, label=t("max_pitch"), interactive=True)
with gr.Row():
with gr.Column(scale=1):
index_rate = gr.Slider(0, 1, value=0, step=0.1, label=t("index_rate"), interactive=True)
fr = gr.Slider(0, 7, value=3, step=1, label=t("filter_radius"), interactive=True)
with gr.Column(scale=1):
rms = gr.Slider(0, 1, value=0.25, step=0.05, label=t("rms_envelope"), interactive=True)
protect = gr.Slider(minimum=0, maximum=0.5, step=0.01, value=0.33, label=t("protect_cons"), interactive=True)
hop_mangio_crepe = gr.Slider(1, 512, value=128, step=1, label=t("hop_length"), interactive=True, visible=False)
with gr.Tab(t("tab_mixing_settings")):
gr.Markdown(f"<center><h2>{t('volume_adjust')}</h2></center>")
with gr.Row(variant="panel"):
vocal1_gain = gr.Slider(-30, 30, value=0, step=1, label=t("vocals_gain"), scale=3, interactive=True)
vocal2_gain = gr.Slider(-30, 30, value=0, step=1, label=t("backing_gain"), scale=3, visible=False, interactive=True)
instrumental_gain = gr.Slider(-30, 30, value=0, step=1, label=t("inst_gain"), scale=3, interactive=True)
output_format = gr.Dropdown(
label=t("output_format"),
choices=OUTPUT_FORMAT,
value="wav",
interactive=True,
filterable=False
)
unconv_vocals_check = gr.Checkbox(label=t("add_unconv"), visible=False)
use_effects = gr.Checkbox(label=t("add_effects"), value=False)
with gr.Column(variant="panel", visible=False) as effects_accordion:
with gr.Tab(t("effects_tab")):
with gr.Tab(t("echo_tab")):
with gr.Group():
with gr.Column(variant="panel"):
with gr.Row():
echo_delay = gr.Slider(0, 3, value=0, label=t("echo_delay"), interactive=True)
echo_feedback = gr.Slider(0, 1, value=0, label=t("echo_feedback"), interactive=True)
echo_mix = gr.Slider(0, 1, value=0, label=t("echo_mix"), interactive=True)
with gr.Tab(t("reverb_tab")):
with gr.Group():
with gr.Column(variant="panel"):
with gr.Row():
reverb_rm_size = gr.Slider(0, 1, value=0.1, label=t("reverb_size"), interactive=True)
reverb_width = gr.Slider(0, 1, value=1.0, label=t("reverb_width"), interactive=True)
with gr.Row():
reverb_wet = gr.Slider(0, 1, value=0.1, label=t("reverb_wet"), interactive=True)
reverb_dry = gr.Slider(0, 1, value=0.8, label=t("reverb_dry"), interactive=True)
with gr.Row():
reverb_damping = gr.Slider(0, 1, value=0.9, label=t("reverb_damping"), interactive=True)
with gr.Tab(t("chorus_tab")):
with gr.Group():
with gr.Column(variant="panel"):
with gr.Row():
chorus_rate_hz = gr.Slider(0.1, 10, value=0, label=t("chorus_rate"), interactive=True)
chorus_depth = gr.Slider(0, 1, value=0, label=t("chorus_depth"), interactive=True)
with gr.Row():
chorus_centre_delay_ms = gr.Slider(0, 50, value=0, label=t("chorus_delay"), interactive=True)
chorus_feedback = gr.Slider(0, 1, value=0, label=t("chorus_feedback"), interactive=True)
with gr.Row():
chorus_mix = gr.Slider(0, 1, value=0, label=t("chorus_mix"), interactive=True)
with gr.Tab(t("processing_tab")):
with gr.Tab(t("compressor_tab")):
with gr.Row(variant="panel"):
compressor_ratio = gr.Slider(1, 20, value=4, label=t("comp_ratio"), interactive=True)
compressor_threshold = gr.Slider(-60, 0, value=-12, label=t("comp_threshold"), interactive=True)
compressor_attack = gr.Slider(0, 2000, value=100, label=t("comp_attack"), interactive=True)
compressor_release = gr.Slider(0, 2000, value=100, label=t("comp_release"), interactive=True)
with gr.Tab(t("noise_gate_tab")):
with gr.Group():
with gr.Column(variant="panel"):
with gr.Row():
noise_gate_threshold = gr.Slider(-60, 0, value=-40, label=t("gate_threshold"), interactive=True)
noise_gate_ratio = gr.Slider(1, 20, value=8, label=t("gate_ratio"), interactive=True)
with gr.Row():
noise_gate_attack = gr.Slider(0, 100, value=10, label=t("gate_attack"), interactive=True)
noise_gate_release = gr.Slider(0, 1000, value=100, label=t("gate_release"), interactive=True)
# Outputs
with gr.Column(variant="panel"):
final_ai_cover = gr.Audio(label=t("final_result"), interactive=False, streaming=True)
generated_files_list = gr.Files(label=t("intermediate_files"))
with gr.Row():
generate_btn = gr.Button(t("generate_btn"), variant="primary")
remix_btn = gr.Button(t("remix_btn"), variant="secondary")
status_text = gr.Textbox(label=t("status_label"), interactive=False)
# Event handlers
method_pitch.change(fn=lambda x: gr.update(visible=True if x == "mangio-crepe" else False), inputs=method_pitch, outputs=hop_mangio_crepe)
model_update_btn.click(fn=(lambda : gr.update(choices=[d for d in os.listdir(RVC_MODELS_DIR) if os.path.isdir(os.path.join(RVC_MODELS_DIR, d))])), inputs=None, outputs=model_name)
use_effects.change(
fn=lambda x: gr.update(visible=x),
inputs=use_effects,
outputs=effects_accordion
)
karaoke_check.change(
fn=lambda x: gr.update(visible=x),
inputs=karaoke_check,
outputs=karaoke_group
).then(fn=lambda x: gr.update(value="full", visible=x), inputs=karaoke_check, outputs=conversion_mode).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check)
preclear_vocals_check.change(
fn=lambda x: gr.update(visible=x),
inputs=preclear_vocals_check,
outputs=deecho_group
)
conversion_mode.change(
fn=lambda mode: (
gr.update(visible=mode in ["lead", "lead/back"]),
gr.update(visible=mode in ["back", "lead/back"]),
gr.update(visible=mode in ["lead/back"])
),
inputs=conversion_mode,
outputs=[vocal1_gain, vocal2_gain, pitch2]
).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check)
generate_btn.click(
fn=gen_cover,
inputs=[
input_audio,
anti_instrum_model,
karaoke_model,
dereverb_model,
output_format,
karaoke_check,
conversion_mode,
preclear_vocals_check,
model_name,
pitch1,
pitch2,
method_pitch,
index_rate,
fr,
rms,
protect,
hop_mangio_crepe,
f0_max,
unconv_vocals_check,
use_effects,
instrumental_gain,
vocal1_gain,
vocal2_gain,
echo_delay,
echo_feedback,
echo_mix,
reverb_rm_size,
reverb_width,
reverb_wet,
reverb_dry,
reverb_damping,
chorus_rate_hz,
chorus_depth,
chorus_centre_delay_ms,
chorus_feedback,
chorus_mix,
compressor_ratio,
compressor_threshold,
compressor_attack,
compressor_release,
noise_gate_threshold,
noise_gate_ratio,
noise_gate_attack,
noise_gate_release
],
outputs=[generated_files_list, final_ai_cover]
)
remix_btn.click(
fn=remix_cover,
inputs=[
use_effects,
instrumental_gain,
vocal1_gain,
vocal2_gain,
echo_delay,
echo_feedback,
echo_mix,
reverb_rm_size,
reverb_width,
reverb_wet,
reverb_dry,
reverb_damping,
chorus_rate_hz,
chorus_depth,
chorus_centre_delay_ms,
chorus_feedback,
chorus_mix,
compressor_ratio,
compressor_threshold,
compressor_attack,
compressor_release,
noise_gate_threshold,
noise_gate_ratio,
noise_gate_attack,
noise_gate_release
],
outputs=[final_ai_cover]
)
else:
gr.Markdown(f"<center><h2>{t('vbach_required')}</h2></center>")