|
|
import os |
|
|
import gradio as gr |
|
|
import soundfile as sf |
|
|
import librosa |
|
|
import numpy as np |
|
|
from pydub import AudioSegment |
|
|
from pedalboard import Pedalboard, Compressor, Reverb, Delay, NoiseGate, Chorus |
|
|
from pedalboard.io import AudioFile |
|
|
from separator.audio_writer import write_audio_file |
|
|
import tempfile |
|
|
from model_list import models_data as mvsepless_models |
|
|
from multi_inference import single_multi_inference |
|
|
|
|
|
RVC_MODELS_DIR = os.path.join(os.getcwd(), "voice_models") |
|
|
OUTPUT_FORMAT = ["mp3", "wav", "flac", "aiff", "m4a", "aac", "ogg", "opus"] |
|
|
|
|
|
|
|
|
saved_processing_data = None |
|
|
|
|
|
|
|
|
TRANSLATIONS = { |
|
|
"ru": { |
|
|
|
|
|
"model_name_label": "Имя модели", |
|
|
"update_button": "Обновить", |
|
|
"input_audio_label": "Входная песня", |
|
|
"generate_btn": "Сгенерировать кавер", |
|
|
"remix_btn": "Пересвести кавер", |
|
|
"final_result": "Финальный результат", |
|
|
"intermediate_files": "Промежуточные файлы", |
|
|
"status_label": "Статус", |
|
|
"tab_separation": "Разделение", |
|
|
"tab_voice_settings": "Настройки преобразования голоса", |
|
|
"tab_mixing_settings": "Настройки сведения аудио", |
|
|
|
|
|
|
|
|
"preclear_vocals": "Очистить вокал от реверба/эха", |
|
|
"split_vocals": "Разделить вокал на лид/бэк-вокалы", |
|
|
"vocal_model": "Вокальная модель", |
|
|
"dereverb_model": "Dereverb/Deecho модель", |
|
|
"karaoke_model": "Караоке модель", |
|
|
|
|
|
|
|
|
"conversion_mode": "Режим преобразования", |
|
|
"conversion_info": "lead - только основной вокал\nback - только бэк-вокал\nlead/back - основной и бэк-вокалы\nfull - весь вокал", |
|
|
"vocal_pitch": "Высота тона вокала", |
|
|
"backing_pitch": "Высота тона бэк-вокала", |
|
|
"pitch_method": "Метод извлечения тона", |
|
|
"max_pitch": "Верхний лимит определения высоты тона", |
|
|
"index_rate": "Влияние индекса", |
|
|
"filter_radius": "Радиус фильтра", |
|
|
"rms_envelope": "Огибающая громкости", |
|
|
"protect_cons": "Защита согласных", |
|
|
"hop_length": "Длина шага", |
|
|
|
|
|
|
|
|
"volume_adjust": "Изменение громкости", |
|
|
"vocals_gain": "Вокал", |
|
|
"backing_gain": "Бэк-вокал", |
|
|
"inst_gain": "Инструментал", |
|
|
"output_format": "Формат вывода", |
|
|
"add_unconv": "Добавить к инструменталу непреобразованный вокал", |
|
|
"add_effects": "Добавить эффекты на голос", |
|
|
|
|
|
|
|
|
"effects_tab": "Эффекты", |
|
|
"echo_tab": "Эхо", |
|
|
"echo_delay": "Время задержки (сек)", |
|
|
"echo_feedback": "Обратная связь", |
|
|
"echo_mix": "Смешение", |
|
|
"reverb_tab": "Реверберация", |
|
|
"reverb_size": "Размер комнаты", |
|
|
"reverb_width": "Ширина реверберации", |
|
|
"reverb_wet": "Уровень влажности", |
|
|
"reverb_dry": "Уровень сухости", |
|
|
"reverb_damping": "Уровень демпфирования", |
|
|
"chorus_tab": "Хорус", |
|
|
"chorus_rate": "Скорость хоруса", |
|
|
"chorus_depth": "Глубина хоруса", |
|
|
"chorus_delay": "Задержка центра (мс)", |
|
|
"chorus_feedback": "Обратная связь", |
|
|
"chorus_mix": "Смешение", |
|
|
|
|
|
|
|
|
"processing_tab": "Обработка", |
|
|
"compressor_tab": "Компрессор", |
|
|
"comp_ratio": "Соотношение", |
|
|
"comp_threshold": "Порог", |
|
|
"comp_attack": "Время атаки (мс)", |
|
|
"comp_release": "Время спада (мс)", |
|
|
"noise_gate_tab": "Подавление шума", |
|
|
"gate_threshold": "Порог", |
|
|
"gate_ratio": "Соотношение", |
|
|
"gate_attack": "Время атаки (мс)", |
|
|
"gate_release": "Время спада (мс)", |
|
|
|
|
|
|
|
|
"start_processing": "Начало обработки...", |
|
|
"separation": "Разделение на музыку и вокал...", |
|
|
"extracting": "Извлечение лид/бэк-вокала...", |
|
|
"cleaning": "Очистка вокалов...", |
|
|
"converting": "Преобразование вокалов...", |
|
|
"mixing": "Сведение итогового кавера...", |
|
|
|
|
|
|
|
|
"error_audio_model": "Сначала загрузите аудио и выберите модель", |
|
|
"error_audio": "Сначала загрузите аудио", |
|
|
"error_model": "Сначала выберите модель", |
|
|
"error_generate_first": "Сначала сгенерируйте кавер хотя бы один раз!", |
|
|
|
|
|
"vbach_required": "Vbach (RVC) не установлен. В блокноте запустите ячейку 'Установка' с флагом install_vbach [✓]" |
|
|
}, |
|
|
"en": { |
|
|
|
|
|
"model_name_label": "Model name", |
|
|
"update_button": "Update", |
|
|
"input_audio_label": "Input song", |
|
|
"generate_btn": "Generate cover", |
|
|
"remix_btn": "Remix cover", |
|
|
"final_result": "Final result", |
|
|
"intermediate_files": "Intermediate files", |
|
|
"status_label": "Status", |
|
|
"tab_separation": "Separation", |
|
|
"tab_voice_settings": "Voice conversion settings", |
|
|
"tab_mixing_settings": "Audio mixing settings", |
|
|
|
|
|
|
|
|
"preclear_vocals": "Clear vocals from reverb/echo", |
|
|
"split_vocals": "Split vocals into lead/backing", |
|
|
"vocal_model": "Vocal model", |
|
|
"dereverb_model": "Dereverb/Deecho model", |
|
|
"karaoke_model": "Karaoke model", |
|
|
|
|
|
|
|
|
"conversion_mode": "Conversion mode", |
|
|
"conversion_info": "lead - lead vocals only\nback - backing vocals only\nlead/back - both vocals\nfull - full vocals", |
|
|
"vocal_pitch": "Vocal pitch", |
|
|
"backing_pitch": "Backing vocals pitch", |
|
|
"pitch_method": "Pitch extraction method", |
|
|
"max_pitch": "Max pitch detection frequency", |
|
|
"index_rate": "Index rate", |
|
|
"filter_radius": "Filter radius", |
|
|
"rms_envelope": "RMS envelope", |
|
|
"protect_cons": "Protect consonants", |
|
|
"hop_length": "Hop length", |
|
|
|
|
|
|
|
|
"volume_adjust": "Volume adjustment", |
|
|
"vocals_gain": "Vocals", |
|
|
"backing_gain": "Backing vocals", |
|
|
"inst_gain": "Instrumental", |
|
|
"output_format": "Output format", |
|
|
"add_unconv": "Add unconverted vocals to instrumental", |
|
|
"add_effects": "Apply effects to vocals", |
|
|
|
|
|
|
|
|
"effects_tab": "Effects", |
|
|
"echo_tab": "Echo", |
|
|
"echo_delay": "Delay time (sec)", |
|
|
"echo_feedback": "Feedback", |
|
|
"echo_mix": "Mix", |
|
|
"reverb_tab": "Reverb", |
|
|
"reverb_size": "Room size", |
|
|
"reverb_width": "Reverb width", |
|
|
"reverb_wet": "Wet level", |
|
|
"reverb_dry": "Dry level", |
|
|
"reverb_damping": "Damping", |
|
|
"chorus_tab": "Chorus", |
|
|
"chorus_rate": "Rate (Hz)", |
|
|
"chorus_depth": "Depth", |
|
|
"chorus_delay": "Center delay (ms)", |
|
|
"chorus_feedback": "Feedback", |
|
|
"chorus_mix": "Mix", |
|
|
|
|
|
|
|
|
"processing_tab": "Processing", |
|
|
"compressor_tab": "Compressor", |
|
|
"comp_ratio": "Ratio", |
|
|
"comp_threshold": "Threshold (dB)", |
|
|
"comp_attack": "Attack time (ms)", |
|
|
"comp_release": "Release time (ms)", |
|
|
"noise_gate_tab": "Noise Gate", |
|
|
"gate_threshold": "Threshold (dB)", |
|
|
"gate_ratio": "Ratio", |
|
|
"gate_attack": "Attack time (ms)", |
|
|
"gate_release": "Release time (ms)", |
|
|
|
|
|
|
|
|
"start_processing": "Starting processing...", |
|
|
"separation": "Separating music and vocals...", |
|
|
"extracting": "Extracting lead/backing vocals...", |
|
|
"cleaning": "Cleaning vocals...", |
|
|
"converting": "Converting vocals...", |
|
|
"mixing": "Mixing final cover...", |
|
|
|
|
|
|
|
|
"error_audio_model": "Please upload audio and select model first", |
|
|
"error_audio": "Please upload audio first", |
|
|
"error_model": "Please select model first", |
|
|
"error_generate_first": "Generate a cover at least once first!", |
|
|
|
|
|
"vbach_required": "Vbach (RVC) is not installed. In the notebook, run the 'Установка' cell with the flag install_vbach [✓]" |
|
|
} |
|
|
} |
|
|
|
|
|
CURRENT_LANG = "ru" |
|
|
|
|
|
def set_language(lang): |
|
|
global CURRENT_LANG |
|
|
CURRENT_LANG = lang |
|
|
|
|
|
def t(key): |
|
|
"""Функция для получения перевода""" |
|
|
return TRANSLATIONS[CURRENT_LANG].get(key, key) |
|
|
|
|
|
def list_models(category, model_type=None): |
|
|
list_models = [] |
|
|
if not model_type: |
|
|
for m_type in list(mvsepless_models.keys()): |
|
|
for m_name in list(mvsepless_models[m_type].keys()): |
|
|
if mvsepless_models[m_type][m_name]["category"] in category: |
|
|
list_models.append(f"{m_type} / {m_name}") |
|
|
else: |
|
|
for m_type in model_type: |
|
|
for m_name in list(mvsepless_models[m_type].keys()): |
|
|
if mvsepless_models[m_type][m_name]["category"] in category: |
|
|
list_models.append(f"{m_type} / {m_name}") |
|
|
return list_models |
|
|
|
|
|
def find_file_from_stem(results, stem_names=["Vocals", "vocals"]): |
|
|
for stem_name, stem_file in results: |
|
|
if stem_name in stem_names: |
|
|
stem_path = stem_file |
|
|
return stem_path |
|
|
|
|
|
def mix_and_save( |
|
|
inst_path, |
|
|
list_vocals, |
|
|
converted_vocals_list, |
|
|
mix_params, |
|
|
params, |
|
|
rvc_params, |
|
|
temp_dir, |
|
|
input_audio |
|
|
): |
|
|
|
|
|
final_audio = None |
|
|
samplerate = 44100 |
|
|
|
|
|
|
|
|
if inst_path and os.path.exists(inst_path): |
|
|
inst_data, samplerate = librosa.load(inst_path, mono=False, sr=None, dtype='float32') |
|
|
if inst_data.ndim == 1: |
|
|
inst_data = np.expand_dims(inst_data, axis=0) |
|
|
inst_gain = 10 ** (mix_params["gain"]["instrum"] / 20.0) |
|
|
inst_data *= inst_gain |
|
|
final_audio = inst_data.copy() |
|
|
|
|
|
|
|
|
if mix_params["add_unconverted_vocals_to_instrumental"]: |
|
|
if params["conversion_mode"] == "lead" and list_vocals[1][1]: |
|
|
back_vocals, _ = librosa.load(list_vocals[1][1], mono=False, sr=samplerate, dtype='float32') |
|
|
if back_vocals.ndim == 1: |
|
|
back_vocals = np.expand_dims(back_vocals, axis=0) |
|
|
final_audio = back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) if final_audio is None else final_audio + back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) |
|
|
elif params["conversion_mode"] == "back" and list_vocals[2][1]: |
|
|
lead_vocals, _ = librosa.load(list_vocals[2][1], mono=False, sr=samplerate, dtype='float32') |
|
|
if lead_vocals.ndim == 1: |
|
|
lead_vocals = np.expand_dims(lead_vocals, axis=0) |
|
|
final_audio = lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) if final_audio is None else final_audio + lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) |
|
|
|
|
|
|
|
|
for i, vocal_path in enumerate(converted_vocals_list): |
|
|
if not vocal_path or not os.path.exists(vocal_path): |
|
|
continue |
|
|
|
|
|
vocal_data, sr = librosa.load(vocal_path, mono=False, sr=None, dtype='float32') |
|
|
if vocal_data.ndim == 1: |
|
|
vocal_data = np.expand_dims(vocal_data, axis=0) |
|
|
|
|
|
|
|
|
if sr != samplerate: |
|
|
audio_segment = AudioSegment.from_wav(vocal_path) |
|
|
audio_segment = audio_segment.set_frame_rate(samplerate) |
|
|
samples = np.array(audio_segment.get_array_of_samples()) |
|
|
channels = audio_segment.channels |
|
|
|
|
|
samples = samples.astype(np.float32) |
|
|
max_value = 2 ** (8 * audio_segment.sample_width - 1) |
|
|
samples /= max_value |
|
|
|
|
|
vocal_data = samples.reshape((-1, channels)).T |
|
|
|
|
|
|
|
|
if mix_params.get("use_effects", False): |
|
|
board = Pedalboard() |
|
|
effects = mix_params.get("pedalboard_settings", {}) |
|
|
|
|
|
if "compressor" in effects: |
|
|
comp = effects["compressor"] |
|
|
board.append(Compressor( |
|
|
ratio=comp["ratio"], |
|
|
threshold_db=comp["threshold"], |
|
|
attack_ms=comp["attack"], |
|
|
release_ms=comp["release"] |
|
|
)) |
|
|
|
|
|
if "noise_gate" in effects: |
|
|
ng = effects["noise_gate"] |
|
|
board.append(NoiseGate( |
|
|
threshold_db=ng["threshold"], |
|
|
ratio=ng["ratio"], |
|
|
attack_ms=ng["attack"], |
|
|
release_ms=ng["release"] |
|
|
)) |
|
|
|
|
|
if "echo" in effects: |
|
|
echo = effects["echo"] |
|
|
board.append(Delay( |
|
|
delay_seconds=echo["delay"], |
|
|
feedback=echo["feedback"], |
|
|
mix=echo["mix"] |
|
|
)) |
|
|
|
|
|
if "reverb" in effects: |
|
|
rev = effects["reverb"] |
|
|
board.append(Reverb( |
|
|
room_size=rev["room_size"], |
|
|
dry_level=rev["dry"], |
|
|
wet_level=rev["wet"], |
|
|
damping=rev["damping"], |
|
|
width=rev["width"] |
|
|
)) |
|
|
|
|
|
if "chorus" in effects: |
|
|
chorus = effects["chorus"] |
|
|
board.append(Chorus( |
|
|
rate_hz=chorus["rate"], |
|
|
depth=chorus["depth"], |
|
|
centre_delay_ms=chorus["center_delay"], |
|
|
feedback=chorus["feedback"], |
|
|
mix=chorus["mix"] |
|
|
)) |
|
|
|
|
|
vocal_data = board(vocal_data, samplerate) |
|
|
|
|
|
|
|
|
gain_db = mix_params["gain"]["vocals1"] if i == 0 else mix_params["gain"]["vocals2"] |
|
|
vocal_data *= 10 ** (gain_db / 20.0) |
|
|
|
|
|
|
|
|
if final_audio is None: |
|
|
final_audio = vocal_data.copy() |
|
|
else: |
|
|
|
|
|
min_len = min(final_audio.shape[1], vocal_data.shape[1]) |
|
|
final_audio = final_audio[:, :min_len] + vocal_data[:, :min_len] |
|
|
|
|
|
|
|
|
max_amplitude = np.max(np.abs(final_audio)) |
|
|
if max_amplitude > 0: |
|
|
normalization_factor = 1.0 / max_amplitude |
|
|
final_audio = final_audio * normalization_factor |
|
|
filename = f"{rvc_params['model_name']} - {os.path.splitext(os.path.basename(input_audio))[0]}.{params['output_format']}" if input_audio else f"remixed.{params['output_format']}" |
|
|
final_path = os.path.join(temp_dir, filename) |
|
|
write_audio_file(final_path, final_audio, samplerate, params['output_format'], "320k") |
|
|
|
|
|
return final_path |
|
|
|
|
|
def gen_cover( |
|
|
input_audio, |
|
|
anti_instrum_model, |
|
|
karaoke_model, |
|
|
dereverb_model, |
|
|
output_format, |
|
|
karaoke_check, |
|
|
conversion_mode, |
|
|
preclear_vocals_check, |
|
|
voice_name, |
|
|
pitch1_val, |
|
|
pitch2_val, |
|
|
method_pitch, |
|
|
index_rate, |
|
|
fr, |
|
|
rms, |
|
|
protect, |
|
|
hop_mangio_crepe, |
|
|
f0_max, |
|
|
unconv_vocals_check, |
|
|
use_effects, |
|
|
instrumental_gain, |
|
|
vocal1_gain, |
|
|
vocal2_gain, |
|
|
echo_delay, |
|
|
echo_feedback, |
|
|
echo_mix, |
|
|
reverb_rm_size, |
|
|
reverb_width, |
|
|
reverb_wet, |
|
|
reverb_dry, |
|
|
reverb_damping, |
|
|
chorus_rate_hz, |
|
|
chorus_depth, |
|
|
chorus_centre_delay_ms, |
|
|
chorus_feedback, |
|
|
chorus_mix, |
|
|
compressor_ratio, |
|
|
compressor_threshold, |
|
|
compressor_attack, |
|
|
compressor_release, |
|
|
noise_gate_threshold, |
|
|
noise_gate_ratio, |
|
|
noise_gate_attack, |
|
|
noise_gate_release |
|
|
): |
|
|
|
|
|
global saved_processing_data |
|
|
|
|
|
if not input_audio and not voice_name: |
|
|
raise gr.Error(t("error_audio_model")) |
|
|
|
|
|
if not input_audio: |
|
|
raise gr.Error(t("error_audio")) |
|
|
|
|
|
if not voice_name: |
|
|
raise gr.Error(t("error_model")) |
|
|
|
|
|
|
|
|
models = [ |
|
|
anti_instrum_model, |
|
|
karaoke_model, |
|
|
dereverb_model |
|
|
] |
|
|
|
|
|
params = { |
|
|
"output_format": output_format, |
|
|
"extract_karaoke": karaoke_check, |
|
|
"conversion_mode": conversion_mode, |
|
|
"preclear_vocals": preclear_vocals_check |
|
|
} |
|
|
|
|
|
rvc_params = { |
|
|
"model_name": voice_name, |
|
|
"pitch1": pitch1_val, |
|
|
"pitch2": pitch2_val, |
|
|
"f0_method": method_pitch, |
|
|
'index_rate': index_rate, |
|
|
'filter_radius': fr, |
|
|
'rms': rms, |
|
|
'protect': protect, |
|
|
'hop_length': hop_mangio_crepe, |
|
|
'f0_max': f0_max |
|
|
} |
|
|
|
|
|
mix_params = { |
|
|
"add_unconverted_vocals_to_instrumental": unconv_vocals_check, |
|
|
"use_effects": use_effects, |
|
|
"gain": { |
|
|
"instrum": instrumental_gain, |
|
|
"vocals1": vocal1_gain, |
|
|
"vocals2": vocal2_gain |
|
|
}, |
|
|
"pedalboard_settings": { |
|
|
"echo": { |
|
|
"delay": echo_delay, |
|
|
"feedback": echo_feedback, |
|
|
"mix": echo_mix |
|
|
}, |
|
|
"reverb": { |
|
|
"room_size": reverb_rm_size, |
|
|
"wet": reverb_wet, |
|
|
"dry": reverb_dry, |
|
|
"damping": reverb_damping, |
|
|
"width": reverb_width, |
|
|
}, |
|
|
"compressor": { |
|
|
"ratio": compressor_ratio, |
|
|
"threshold": compressor_threshold, |
|
|
"attack": compressor_attack, |
|
|
"release": compressor_release |
|
|
}, |
|
|
"noise_gate": { |
|
|
"threshold": noise_gate_threshold, |
|
|
"ratio": noise_gate_ratio, |
|
|
"attack": noise_gate_attack, |
|
|
"release": noise_gate_release, |
|
|
}, |
|
|
"chorus": { |
|
|
"rate": chorus_rate_hz, |
|
|
"depth": chorus_depth, |
|
|
"center_delay": chorus_centre_delay_ms, |
|
|
"feedback": chorus_feedback, |
|
|
"mix": chorus_mix |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
progress = gr.Progress() |
|
|
|
|
|
progress(0, desc=t("start_processing")) |
|
|
|
|
|
generated_files = [] |
|
|
converted_vocals_list = [] |
|
|
temp_dir = tempfile.mkdtemp() |
|
|
inst_model = models[0] |
|
|
kar_model = models[1] |
|
|
dereverb_model = models[2] |
|
|
|
|
|
progress(0.1, desc=t("separation")) |
|
|
|
|
|
|
|
|
inst_output = single_multi_inference(input_audio, os.path.join(temp_dir, "inst_output"), |
|
|
inst_model.split(" / ")[0], inst_model.split(" / ")[1], |
|
|
True, vr_aggr=5, output_format="wav", |
|
|
output_bitrate="320k", template="VbachGen_NAME_STEM", |
|
|
call_method="cli", selected_stems=[]) |
|
|
inst_file = ("instrumental", find_file_from_stem(inst_output, ["Instrumental", "instrumental", "other", "Other"])) |
|
|
full_vocals_file = ("full_vocals", find_file_from_stem(inst_output, ["Vocals", "vocals"])) |
|
|
|
|
|
progress(0.2, desc=t("extracting")) |
|
|
|
|
|
back_vocals_file = (None, None) |
|
|
lead_vocals_file = (None, None) |
|
|
|
|
|
if params["extract_karaoke"] == True: |
|
|
karaoke_output = single_multi_inference(full_vocals_file[1], os.path.join(temp_dir, "kar_output"), |
|
|
kar_model.split(" / ")[0], kar_model.split(" / ")[1], |
|
|
True, vr_aggr=5, output_format="wav", |
|
|
output_bitrate="320k", template="NAME_MODEL_STEM", |
|
|
call_method="cli", selected_stems=[]) |
|
|
back_vocals_file = ("back_vocals", find_file_from_stem(karaoke_output, ["Instrumental", "instrumental", "other", "Other"])) |
|
|
lead_vocals_file = ("lead_vocals", find_file_from_stem(karaoke_output, ["Vocals", "vocals", "karaoke"])) |
|
|
|
|
|
list_vocals = [ |
|
|
full_vocals_file, |
|
|
back_vocals_file if params["extract_karaoke"] else (None, None), |
|
|
lead_vocals_file if params["extract_karaoke"] else (None, None) |
|
|
] |
|
|
|
|
|
clear_list_vocals = [ |
|
|
full_vocals_file, |
|
|
back_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["back", "lead/back"] else (None, None), |
|
|
lead_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["lead", "lead/back"] else (None, None) |
|
|
] |
|
|
|
|
|
progress(0.3, desc=t("cleaning")) |
|
|
|
|
|
|
|
|
cleared_vocals = [] |
|
|
if params["preclear_vocals"] == True: |
|
|
for i, (name, file) in enumerate(clear_list_vocals): |
|
|
if file and os.path.exists(file): |
|
|
clear_output = single_multi_inference(file, os.path.join(temp_dir, f"cleared_output_{i}"), |
|
|
dereverb_model.split(" / ")[0], dereverb_model.split(" / ")[1], |
|
|
True, vr_aggr=5, output_format="wav", |
|
|
output_bitrate="320k", template="NAME_STEM", |
|
|
call_method="cli", selected_stems=[]) |
|
|
cleared_file = find_file_from_stem(clear_output, ["No Echo", "No Reverb", "Dry", "Other"]) |
|
|
cleared_vocals.append((name, cleared_file)) |
|
|
for i, voc in enumerate(list_vocals): |
|
|
for clear_voc in cleared_vocals: |
|
|
if clear_voc[0] == voc[0]: |
|
|
list_vocals[i] = clear_voc |
|
|
break |
|
|
|
|
|
progress(0.5, desc=t("converting")) |
|
|
|
|
|
|
|
|
if params["conversion_mode"] == "full" and list_vocals[0][1]: |
|
|
full_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "full_vocals_converted.wav") |
|
|
cmd = f"python -m vbach.cli.vbach '{list_vocals[0][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'full_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
|
|
os.system(cmd) |
|
|
converted_vocals_list.append(full_vocals_converted_path) |
|
|
|
|
|
elif params["conversion_mode"] == "lead/back" and list_vocals[1][1] and list_vocals[2][1]: |
|
|
lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") |
|
|
cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
|
|
os.system(cmd) |
|
|
|
|
|
back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") |
|
|
cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
|
|
os.system(cmd) |
|
|
|
|
|
converted_vocals_list.append(back_vocals_converted_path) |
|
|
converted_vocals_list.append(lead_vocals_converted_path) |
|
|
|
|
|
elif params["conversion_mode"] == "back" and list_vocals[1][1]: |
|
|
back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") |
|
|
cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['protect']} --protect {rvc_params['rms']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
|
|
os.system(cmd) |
|
|
converted_vocals_list.append(back_vocals_converted_path) |
|
|
|
|
|
elif params["conversion_mode"] == "lead" and list_vocals[2][1]: |
|
|
lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") |
|
|
cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
|
|
os.system(cmd) |
|
|
converted_vocals_list.append(lead_vocals_converted_path) |
|
|
|
|
|
|
|
|
generated_files.append(inst_file[1]) |
|
|
for name, file in list_vocals: |
|
|
if file: |
|
|
generated_files.append(file) |
|
|
generated_files.extend(converted_vocals_list) |
|
|
|
|
|
|
|
|
saved_processing_data = { |
|
|
"inst_path": inst_file[1], |
|
|
"list_vocals": list_vocals, |
|
|
"converted_vocals_list": converted_vocals_list, |
|
|
"params": params, |
|
|
"rvc_params": rvc_params, |
|
|
"input_audio": input_audio |
|
|
} |
|
|
|
|
|
progress(0.9, desc=t("mixing")) |
|
|
|
|
|
|
|
|
final_path = mix_and_save( |
|
|
inst_file[1], |
|
|
list_vocals, |
|
|
converted_vocals_list, |
|
|
mix_params, |
|
|
params, |
|
|
rvc_params, |
|
|
temp_dir, |
|
|
input_audio |
|
|
) |
|
|
|
|
|
generated_files.append(final_path) |
|
|
|
|
|
return generated_files, final_path |
|
|
|
|
|
def remix_cover( |
|
|
use_effects, |
|
|
instrumental_gain, |
|
|
vocal1_gain, |
|
|
vocal2_gain, |
|
|
echo_delay, |
|
|
echo_feedback, |
|
|
echo_mix, |
|
|
reverb_rm_size, |
|
|
reverb_width, |
|
|
reverb_wet, |
|
|
reverb_dry, |
|
|
reverb_damping, |
|
|
chorus_rate_hz, |
|
|
chorus_depth, |
|
|
chorus_centre_delay_ms, |
|
|
chorus_feedback, |
|
|
chorus_mix, |
|
|
compressor_ratio, |
|
|
compressor_threshold, |
|
|
compressor_attack, |
|
|
compressor_release, |
|
|
noise_gate_threshold, |
|
|
noise_gate_ratio, |
|
|
noise_gate_attack, |
|
|
noise_gate_release |
|
|
): |
|
|
global saved_processing_data |
|
|
if not saved_processing_data: |
|
|
raise gr.Error(t("error_generate_first")) |
|
|
|
|
|
|
|
|
data = saved_processing_data |
|
|
temp_dir = tempfile.mkdtemp() |
|
|
|
|
|
|
|
|
mix_params = { |
|
|
"add_unconverted_vocals_to_instrumental": True, |
|
|
"use_effects": use_effects, |
|
|
"gain": { |
|
|
"instrum": instrumental_gain, |
|
|
"vocals1": vocal1_gain, |
|
|
"vocals2": vocal2_gain |
|
|
}, |
|
|
"pedalboard_settings": { |
|
|
"echo": { |
|
|
"delay": echo_delay, |
|
|
"feedback": echo_feedback, |
|
|
"mix": echo_mix |
|
|
}, |
|
|
"reverb": { |
|
|
"room_size": reverb_rm_size, |
|
|
"wet": reverb_wet, |
|
|
"dry": reverb_dry, |
|
|
"damping": reverb_damping, |
|
|
"width": reverb_width, |
|
|
}, |
|
|
"compressor": { |
|
|
"ratio": compressor_ratio, |
|
|
"threshold": compressor_threshold, |
|
|
"attack": compressor_attack, |
|
|
"release": compressor_release |
|
|
}, |
|
|
"noise_gate": { |
|
|
"threshold": noise_gate_threshold, |
|
|
"ratio": noise_gate_ratio, |
|
|
"attack": noise_gate_attack, |
|
|
"release": noise_gate_release, |
|
|
}, |
|
|
"chorus": { |
|
|
"rate": chorus_rate_hz, |
|
|
"depth": chorus_depth, |
|
|
"center_delay": chorus_centre_delay_ms, |
|
|
"feedback": chorus_feedback, |
|
|
"mix": chorus_mix |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
final_path = mix_and_save( |
|
|
data["inst_path"], |
|
|
data["list_vocals"], |
|
|
data["converted_vocals_list"], |
|
|
mix_params, |
|
|
data["params"], |
|
|
data["rvc_params"], |
|
|
temp_dir, |
|
|
data["input_audio"] |
|
|
) |
|
|
|
|
|
return final_path |
|
|
|
|
|
def vbach_plugin_name(): |
|
|
return "VbachGen" |
|
|
|
|
|
def vbachgen_plugin(lang): |
|
|
set_language(lang) |
|
|
with gr.Blocks(): |
|
|
if os.path.exists("vbach"): |
|
|
with gr.Row(equal_height=False, variant="panel"): |
|
|
with gr.Column(): |
|
|
model_name = gr.Dropdown(label=t("model_name_label"), interactive=True, filterable=False, scale=6) |
|
|
model_update_btn = gr.Button(t("update_button"), variant="primary", scale=3, size="lg") |
|
|
with gr.Row(min_height=150): |
|
|
input_audio = gr.File(label=t("input_audio_label"), interactive=True, type="filepath", file_count="single") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
|
|
|
with gr.Tab(t("tab_separation")): |
|
|
preclear_vocals_check = gr.Checkbox(label=t("preclear_vocals"), value=False) |
|
|
karaoke_check = gr.Checkbox(label=t("split_vocals"), value=False) |
|
|
|
|
|
with gr.Column(variant="panel"): |
|
|
with gr.Group() as extract_vocals_group: |
|
|
anti_instrum_model = gr.Dropdown( |
|
|
label=t("vocal_model"), |
|
|
choices=list_models(["Инструментал", "Вокал", "Инструментал и вокал"], ["mel_band_roformer", "bs_roformer", "mdx23c", "mdx", "htdemucs"]), |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
with gr.Group(visible=False) as deecho_group: |
|
|
dereverb_model = gr.Dropdown( |
|
|
label=t("dereverb_model"), |
|
|
choices=list_models(["Реверб и эхо", "Реверб", "Эхо"], ["vr"]), |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
with gr.Group(visible=False) as karaoke_group: |
|
|
karaoke_model = gr.Dropdown( |
|
|
label=t("karaoke_model"), |
|
|
choices=list_models(["Караоке"]), |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab(t("tab_voice_settings")): |
|
|
conversion_mode = gr.Dropdown( |
|
|
label=t("conversion_mode"), |
|
|
choices=["lead", "back", "lead/back", "full"], |
|
|
value="full", |
|
|
filterable=False, |
|
|
visible=False, |
|
|
info=t("conversion_info") |
|
|
) |
|
|
with gr.Row(): |
|
|
pitch1 = gr.Slider(-48, 48, value=0, step=12, label=t("vocal_pitch"), interactive=True) |
|
|
pitch2 = gr.Slider(-48, 48, value=0, step=12, label=t("backing_pitch"), visible=False, interactive=True) |
|
|
with gr.Row(): |
|
|
method_pitch = gr.Dropdown( |
|
|
label=t("pitch_method"), |
|
|
choices=["mangio-crepe", "rmvpe+", "fcpe"], |
|
|
value="rmvpe+", |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
f0_max = gr.Slider(50, 2000, value=1100, step=50, label=t("max_pitch"), interactive=True) |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
index_rate = gr.Slider(0, 1, value=0, step=0.1, label=t("index_rate"), interactive=True) |
|
|
fr = gr.Slider(0, 7, value=3, step=1, label=t("filter_radius"), interactive=True) |
|
|
with gr.Column(scale=1): |
|
|
rms = gr.Slider(0, 1, value=0.25, step=0.05, label=t("rms_envelope"), interactive=True) |
|
|
protect = gr.Slider(minimum=0, maximum=0.5, step=0.01, value=0.33, label=t("protect_cons"), interactive=True) |
|
|
hop_mangio_crepe = gr.Slider(1, 512, value=128, step=1, label=t("hop_length"), interactive=True, visible=False) |
|
|
|
|
|
|
|
|
with gr.Tab(t("tab_mixing_settings")): |
|
|
gr.Markdown(f"<center><h2>{t('volume_adjust')}</h2></center>") |
|
|
with gr.Row(variant="panel"): |
|
|
vocal1_gain = gr.Slider(-30, 30, value=0, step=1, label=t("vocals_gain"), scale=3, interactive=True) |
|
|
vocal2_gain = gr.Slider(-30, 30, value=0, step=1, label=t("backing_gain"), scale=3, visible=False, interactive=True) |
|
|
instrumental_gain = gr.Slider(-30, 30, value=0, step=1, label=t("inst_gain"), scale=3, interactive=True) |
|
|
|
|
|
output_format = gr.Dropdown( |
|
|
label=t("output_format"), |
|
|
choices=OUTPUT_FORMAT, |
|
|
value="wav", |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
unconv_vocals_check = gr.Checkbox(label=t("add_unconv"), visible=False) |
|
|
use_effects = gr.Checkbox(label=t("add_effects"), value=False) |
|
|
with gr.Column(variant="panel", visible=False) as effects_accordion: |
|
|
with gr.Tab(t("effects_tab")): |
|
|
with gr.Tab(t("echo_tab")): |
|
|
with gr.Group(): |
|
|
with gr.Column(variant="panel"): |
|
|
with gr.Row(): |
|
|
echo_delay = gr.Slider(0, 3, value=0, label=t("echo_delay"), interactive=True) |
|
|
echo_feedback = gr.Slider(0, 1, value=0, label=t("echo_feedback"), interactive=True) |
|
|
echo_mix = gr.Slider(0, 1, value=0, label=t("echo_mix"), interactive=True) |
|
|
|
|
|
with gr.Tab(t("reverb_tab")): |
|
|
with gr.Group(): |
|
|
with gr.Column(variant="panel"): |
|
|
with gr.Row(): |
|
|
reverb_rm_size = gr.Slider(0, 1, value=0.1, label=t("reverb_size"), interactive=True) |
|
|
reverb_width = gr.Slider(0, 1, value=1.0, label=t("reverb_width"), interactive=True) |
|
|
with gr.Row(): |
|
|
reverb_wet = gr.Slider(0, 1, value=0.1, label=t("reverb_wet"), interactive=True) |
|
|
reverb_dry = gr.Slider(0, 1, value=0.8, label=t("reverb_dry"), interactive=True) |
|
|
with gr.Row(): |
|
|
reverb_damping = gr.Slider(0, 1, value=0.9, label=t("reverb_damping"), interactive=True) |
|
|
|
|
|
with gr.Tab(t("chorus_tab")): |
|
|
with gr.Group(): |
|
|
with gr.Column(variant="panel"): |
|
|
with gr.Row(): |
|
|
chorus_rate_hz = gr.Slider(0.1, 10, value=0, label=t("chorus_rate"), interactive=True) |
|
|
chorus_depth = gr.Slider(0, 1, value=0, label=t("chorus_depth"), interactive=True) |
|
|
with gr.Row(): |
|
|
chorus_centre_delay_ms = gr.Slider(0, 50, value=0, label=t("chorus_delay"), interactive=True) |
|
|
chorus_feedback = gr.Slider(0, 1, value=0, label=t("chorus_feedback"), interactive=True) |
|
|
with gr.Row(): |
|
|
chorus_mix = gr.Slider(0, 1, value=0, label=t("chorus_mix"), interactive=True) |
|
|
|
|
|
with gr.Tab(t("processing_tab")): |
|
|
with gr.Tab(t("compressor_tab")): |
|
|
with gr.Row(variant="panel"): |
|
|
compressor_ratio = gr.Slider(1, 20, value=4, label=t("comp_ratio"), interactive=True) |
|
|
compressor_threshold = gr.Slider(-60, 0, value=-12, label=t("comp_threshold"), interactive=True) |
|
|
compressor_attack = gr.Slider(0, 2000, value=100, label=t("comp_attack"), interactive=True) |
|
|
compressor_release = gr.Slider(0, 2000, value=100, label=t("comp_release"), interactive=True) |
|
|
|
|
|
with gr.Tab(t("noise_gate_tab")): |
|
|
with gr.Group(): |
|
|
with gr.Column(variant="panel"): |
|
|
with gr.Row(): |
|
|
noise_gate_threshold = gr.Slider(-60, 0, value=-40, label=t("gate_threshold"), interactive=True) |
|
|
noise_gate_ratio = gr.Slider(1, 20, value=8, label=t("gate_ratio"), interactive=True) |
|
|
with gr.Row(): |
|
|
noise_gate_attack = gr.Slider(0, 100, value=10, label=t("gate_attack"), interactive=True) |
|
|
noise_gate_release = gr.Slider(0, 1000, value=100, label=t("gate_release"), interactive=True) |
|
|
|
|
|
|
|
|
with gr.Column(variant="panel"): |
|
|
final_ai_cover = gr.Audio(label=t("final_result"), interactive=False, streaming=True) |
|
|
generated_files_list = gr.Files(label=t("intermediate_files")) |
|
|
with gr.Row(): |
|
|
generate_btn = gr.Button(t("generate_btn"), variant="primary") |
|
|
remix_btn = gr.Button(t("remix_btn"), variant="secondary") |
|
|
status_text = gr.Textbox(label=t("status_label"), interactive=False) |
|
|
|
|
|
|
|
|
method_pitch.change(fn=lambda x: gr.update(visible=True if x == "mangio-crepe" else False), inputs=method_pitch, outputs=hop_mangio_crepe) |
|
|
|
|
|
model_update_btn.click(fn=(lambda : gr.update(choices=[d for d in os.listdir(RVC_MODELS_DIR) if os.path.isdir(os.path.join(RVC_MODELS_DIR, d))])), inputs=None, outputs=model_name) |
|
|
|
|
|
|
|
|
use_effects.change( |
|
|
fn=lambda x: gr.update(visible=x), |
|
|
inputs=use_effects, |
|
|
outputs=effects_accordion |
|
|
) |
|
|
|
|
|
karaoke_check.change( |
|
|
fn=lambda x: gr.update(visible=x), |
|
|
inputs=karaoke_check, |
|
|
outputs=karaoke_group |
|
|
).then(fn=lambda x: gr.update(value="full", visible=x), inputs=karaoke_check, outputs=conversion_mode).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check) |
|
|
|
|
|
preclear_vocals_check.change( |
|
|
fn=lambda x: gr.update(visible=x), |
|
|
inputs=preclear_vocals_check, |
|
|
outputs=deecho_group |
|
|
) |
|
|
|
|
|
conversion_mode.change( |
|
|
fn=lambda mode: ( |
|
|
gr.update(visible=mode in ["lead", "lead/back"]), |
|
|
gr.update(visible=mode in ["back", "lead/back"]), |
|
|
gr.update(visible=mode in ["lead/back"]) |
|
|
), |
|
|
inputs=conversion_mode, |
|
|
outputs=[vocal1_gain, vocal2_gain, pitch2] |
|
|
).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check) |
|
|
|
|
|
generate_btn.click( |
|
|
fn=gen_cover, |
|
|
inputs=[ |
|
|
input_audio, |
|
|
anti_instrum_model, |
|
|
karaoke_model, |
|
|
dereverb_model, |
|
|
output_format, |
|
|
karaoke_check, |
|
|
conversion_mode, |
|
|
preclear_vocals_check, |
|
|
model_name, |
|
|
pitch1, |
|
|
pitch2, |
|
|
method_pitch, |
|
|
index_rate, |
|
|
fr, |
|
|
rms, |
|
|
protect, |
|
|
hop_mangio_crepe, |
|
|
f0_max, |
|
|
unconv_vocals_check, |
|
|
use_effects, |
|
|
instrumental_gain, |
|
|
vocal1_gain, |
|
|
vocal2_gain, |
|
|
echo_delay, |
|
|
echo_feedback, |
|
|
echo_mix, |
|
|
reverb_rm_size, |
|
|
reverb_width, |
|
|
reverb_wet, |
|
|
reverb_dry, |
|
|
reverb_damping, |
|
|
chorus_rate_hz, |
|
|
chorus_depth, |
|
|
chorus_centre_delay_ms, |
|
|
chorus_feedback, |
|
|
chorus_mix, |
|
|
compressor_ratio, |
|
|
compressor_threshold, |
|
|
compressor_attack, |
|
|
compressor_release, |
|
|
noise_gate_threshold, |
|
|
noise_gate_ratio, |
|
|
noise_gate_attack, |
|
|
noise_gate_release |
|
|
], |
|
|
outputs=[generated_files_list, final_ai_cover] |
|
|
) |
|
|
|
|
|
remix_btn.click( |
|
|
fn=remix_cover, |
|
|
inputs=[ |
|
|
use_effects, |
|
|
instrumental_gain, |
|
|
vocal1_gain, |
|
|
vocal2_gain, |
|
|
echo_delay, |
|
|
echo_feedback, |
|
|
echo_mix, |
|
|
reverb_rm_size, |
|
|
reverb_width, |
|
|
reverb_wet, |
|
|
reverb_dry, |
|
|
reverb_damping, |
|
|
chorus_rate_hz, |
|
|
chorus_depth, |
|
|
chorus_centre_delay_ms, |
|
|
chorus_feedback, |
|
|
chorus_mix, |
|
|
compressor_ratio, |
|
|
compressor_threshold, |
|
|
compressor_attack, |
|
|
compressor_release, |
|
|
noise_gate_threshold, |
|
|
noise_gate_ratio, |
|
|
noise_gate_attack, |
|
|
noise_gate_release |
|
|
], |
|
|
outputs=[final_ai_cover] |
|
|
) |
|
|
else: |
|
|
gr.Markdown(f"<center><h2>{t('vbach_required')}</h2></center>") |