|
|
import gradio as gr |
|
|
import json |
|
|
import pandas as pd |
|
|
import tempfile |
|
|
import os |
|
|
from separator.ensemble import ensemble_audio_files |
|
|
from pydub.utils import mediainfo |
|
|
from pydub import AudioSegment |
|
|
import numpy as np |
|
|
import librosa |
|
|
import librosa.display |
|
|
import soundfile as sf |
|
|
from separator.audio_writer import write_audio_file |
|
|
from multi_inference import MVSEPLESS |
|
|
from pydub.exceptions import CouldntDecodeError |
|
|
|
|
|
mvsepless = MVSEPLESS() |
|
|
|
|
|
TRANSLATIONS = { |
|
|
"ru": { |
|
|
"app_title": "EnsembLess", |
|
|
"auto_ensemble": "Авто-ансамбль", |
|
|
"invert_ensemble": "Инвертировать ансамбль", |
|
|
"give_name_preset": "Дайте имя пресету", |
|
|
"export": "Экспорт", |
|
|
"import": "Импорт", |
|
|
"manual_ensemble": "Ручной ансамбль", |
|
|
"inverter": "Инвертер", |
|
|
"model_selection": "Выберите модель для добавления в ансамбль", |
|
|
"model_type": "Тип модели", |
|
|
"model_name": "Имя модели", |
|
|
"stem_selection": "Стем, который будет использован в ансамбле", |
|
|
"weight": "Весы", |
|
|
"invert_weights": "Использовать перевернутые весы для инвертированного стема", |
|
|
"add_button": "➕ Добавить", |
|
|
"current_ensemble": "Текущий ансамбль", |
|
|
"remove_index": "Индекс модели, который хотите удалить (начинается с 1)", |
|
|
"remove_button": "❌ Удалить", |
|
|
"clear_button": "Очистить", |
|
|
"input_audio": "Входное аудио", |
|
|
"settings": "Настройки", |
|
|
"method": "Метод", |
|
|
"output_format": "Формат вывода", |
|
|
"run_button": "Создать ансамбль", |
|
|
"results": "Результаты", |
|
|
"inverted_result": "Инвертированный результат", |
|
|
"invert_method": "Метод инвертирования", |
|
|
"invert_button": "Инвертировать", |
|
|
"audio_files": "Аудио файлы", |
|
|
"weights_input": "Весы", |
|
|
"main_audio": "Основное аудио", |
|
|
"audio_to_remove": "Аудио для удаления", |
|
|
"processing_method": "Метод обработки", |
|
|
"analyze_title": "РЕЗУЛЬТАТЫ АНАЛИЗА:", |
|
|
"all_same_rate": "✅ ВСЕ ФАЙЛЫ имеют одинаковую частоту дискретизации: {rate} Hz", |
|
|
"different_rates": "⚠️ Файлы имеют РАЗНУЮ частоту дискретизации", |
|
|
"resample_warning": "К загруженному аудио автоматически применён ресэмплинг для лучшего инвертирования", |
|
|
"error_no_files": "Ошибка: файлы не загружены", |
|
|
"error_unsupported_format": "не поддерживаемый формат", |
|
|
"error_general": "ошибка ({error})", |
|
|
"error_no_models": "Добавьте хотя бы одну модель для создания ансамбля", |
|
|
"error_no_audio": "Сначала загрузите аудио", |
|
|
"error_both_audio": "Пожалуйста, загрузите оба аудиофайла", |
|
|
"language": "Язык", |
|
|
"batch_processing": "Пакетная обработка", |
|
|
"batch_info": "Позволяет загрузить сразу несколько файлов", |
|
|
"separation_info": "Информация о разделении", |
|
|
"vocal_separation": "Разделение вокалы", |
|
|
"stereo_mode": "Стерео режим", |
|
|
"stem": "Стем", |
|
|
"p_stem": "Основной стем", |
|
|
"s_stem": "Инвертированный стем", |
|
|
"vocal_multi_separation": "Мульти-вокал", |
|
|
"ensemble": "Ансамбль", |
|
|
"transform": "Преобразование", |
|
|
"algorithm": "Алгоритм: {model_fullname}", |
|
|
"output_format_info": "Формат выходных данных: {output_format}", |
|
|
"process1": "Начало обработки", |
|
|
"process2": "Модель", |
|
|
"process3": "Автоматическое выравнивание длин аудио", |
|
|
"process4": "Создание ансамбля", |
|
|
"result_source": "Промежуточные файлы", |
|
|
"local_path": "Указать путь к аудио локально", |
|
|
"resample": "Ресэмпл" |
|
|
}, |
|
|
"en": { |
|
|
"app_title": "EnsembLess", |
|
|
"auto_ensemble": "Auto-Ensemble", |
|
|
"invert_ensemble": "Invert ensemble", |
|
|
"give_name_preset": "Give name of preset", |
|
|
"export": "Export", |
|
|
"import": "Import", |
|
|
"manual_ensemble": "Manual Ensemble", |
|
|
"inverter": "Inverter", |
|
|
"model_selection": "Select a model to add to the ensemble", |
|
|
"model_type": "Model Type", |
|
|
"model_name": "Model Name", |
|
|
"stem_selection": "Stem to use in the ensemble", |
|
|
"weight": "Weights", |
|
|
"invert_weights": "Use inverted weights for inverted stem", |
|
|
"add_button": "➕ Add", |
|
|
"current_ensemble": "Current Ensemble", |
|
|
"remove_index": "Index of model to remove (starts from 1)", |
|
|
"remove_button": "❌ Remove", |
|
|
"clear_button": "Clear", |
|
|
"input_audio": "Input Audio", |
|
|
"settings": "Settings", |
|
|
"method": "Method", |
|
|
"output_format": "Output Format", |
|
|
"run_button": "Create Ensemble", |
|
|
"results": "Results", |
|
|
"inverted_result": "Inverted Result", |
|
|
"invert_method": "Inversion Method", |
|
|
"invert_button": "Invert", |
|
|
"audio_files": "Audio Files", |
|
|
"weights_input": "Weights", |
|
|
"main_audio": "Main Audio", |
|
|
"audio_to_remove": "Audio to Remove", |
|
|
"processing_method": "Processing Method", |
|
|
"analyze_title": "ANALYSIS RESULTS:", |
|
|
"all_same_rate": "✅ ALL FILES have the same sample rate: {rate} Hz", |
|
|
"different_rates": "⚠️ Files have DIFFERENT sample rates", |
|
|
"resample_warning": "Resampling applied automatically for better inversion", |
|
|
"error_no_files": "Error: no files uploaded", |
|
|
"error_unsupported_format": "unsupported format", |
|
|
"error_general": "error ({error})", |
|
|
"error_no_models": "Add at least one model to create an ensemble", |
|
|
"error_no_audio": "Please upload audio first", |
|
|
"error_both_audio": "Please upload both audio files", |
|
|
"language": "Language", |
|
|
"batch_processing": "Batch Processing", |
|
|
"batch_info": "Allows uploading multiple files at once", |
|
|
"separation_info": "Separation Info", |
|
|
"vocal_separation": "Vocal Separation", |
|
|
"stereo_mode": "Stereo Mode", |
|
|
"stem": "Stem", |
|
|
"p_stem": "Primary stem", |
|
|
"s_stem": "Secondary stem", |
|
|
"vocal_multi_separation": "Multi-Vocal", |
|
|
"ensemble": "Ensemble", |
|
|
"transform": "Transform", |
|
|
"algorithm": "Algorithm: {model_fullname}", |
|
|
"output_format_info": "Output format: {output_format}", |
|
|
"process1": "Start process", |
|
|
"process2": "Model", |
|
|
"process3": "Auto post-padding audios", |
|
|
"process4": "Build ensemble", |
|
|
"result_source": "Intermediate files", |
|
|
"local_path": "Specify path to audio locally", |
|
|
"resample": "Resample" |
|
|
} |
|
|
} |
|
|
|
|
|
INVERT_METHODS = { |
|
|
"min_fft": "max_fft", |
|
|
"max_fft": "min_fft", |
|
|
"min_wave": "max_wave", |
|
|
"max_wave": "min_wave", |
|
|
"median_fft": "median_fft", |
|
|
"median_wave": "median_wave", |
|
|
"avg_fft": "avg_fft", |
|
|
"avg_wave": "avg_wave" |
|
|
} |
|
|
|
|
|
|
|
|
CURRENT_LANG = "ru" |
|
|
|
|
|
def set_language(lang): |
|
|
global CURRENT_LANG |
|
|
CURRENT_LANG = lang |
|
|
|
|
|
def t(key, **kwargs): |
|
|
"""Функция для получения перевода с подстановкой значений""" |
|
|
translation = TRANSLATIONS[CURRENT_LANG].get(key, key) |
|
|
return translation.format(**kwargs) if kwargs else translation |
|
|
|
|
|
|
|
|
|
|
|
N_FFT = 2048 |
|
|
WIN_LENGTH = 2048 |
|
|
HOP_LENGTH = WIN_LENGTH // 4 |
|
|
|
|
|
class Inverter: |
|
|
def __init__(self): |
|
|
self.test = "test" |
|
|
|
|
|
def load_audio(self, filepath): |
|
|
"""Загрузка аудиофайла с помощью librosa""" |
|
|
if filepath is None: |
|
|
return None, None |
|
|
try: |
|
|
return librosa.load(filepath, sr=None, mono=False) |
|
|
except Exception as e: |
|
|
print(f"Ошибка загрузки аудио: {e}") |
|
|
return None, None |
|
|
|
|
|
def process_channel(self, y1_ch, y2_ch, sr, method): |
|
|
"""Обработка одного аудиоканала""" |
|
|
if method == "waveform": |
|
|
return y1_ch - y2_ch |
|
|
|
|
|
elif method == "spectrogram": |
|
|
|
|
|
S1 = librosa.stft(y1_ch, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH) |
|
|
S2 = librosa.stft(y2_ch, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH) |
|
|
|
|
|
|
|
|
mag1 = np.abs(S1) |
|
|
mag2 = np.abs(S2) |
|
|
|
|
|
|
|
|
mag_result = np.maximum(mag1 - mag2, 0) |
|
|
|
|
|
|
|
|
phase = np.angle(S1) |
|
|
|
|
|
|
|
|
S_result = mag_result * np.exp(1j * phase) |
|
|
|
|
|
|
|
|
return librosa.istft( |
|
|
S_result, |
|
|
n_fft=N_FFT, |
|
|
hop_length=HOP_LENGTH, |
|
|
win_length=WIN_LENGTH, |
|
|
length=len(y1_ch) |
|
|
) |
|
|
|
|
|
def process_audio(self, audio1_path, audio2_path, out_format, method): |
|
|
|
|
|
y1, sr1 = self.load_audio(audio1_path) |
|
|
y2, sr2 = self.load_audio(audio2_path) |
|
|
|
|
|
if sr1 is None or sr2 is None: |
|
|
raise gr.Error(t("error_both_audio")) |
|
|
|
|
|
|
|
|
channels1 = 1 if y1.ndim == 1 else y1.shape[0] |
|
|
channels2 = 1 if y2.ndim == 1 else y2.shape[0] |
|
|
|
|
|
|
|
|
if channels1 > 1: |
|
|
y1 = y1.T |
|
|
else: |
|
|
y1 = y1.reshape(-1, 1) |
|
|
|
|
|
if channels2 > 1: |
|
|
y2 = y2.T |
|
|
else: |
|
|
y2 = y2.reshape(-1, 1) |
|
|
|
|
|
|
|
|
if sr1 != sr2: |
|
|
if channels2 > 1: |
|
|
|
|
|
y2_resampled = np.zeros((len(y2), channels2), dtype=np.float32) |
|
|
for c in range(channels2): |
|
|
y2_resampled[:, c] = librosa.resample( |
|
|
y2[:, c], |
|
|
orig_sr=sr2, |
|
|
target_sr=sr1 |
|
|
) |
|
|
y2 = y2_resampled |
|
|
else: |
|
|
y2 = librosa.resample(y2[:, 0], orig_sr=sr2, target_sr=sr1) |
|
|
y2 = y2.reshape(-1, 1) |
|
|
sr2 = sr1 |
|
|
|
|
|
|
|
|
min_len = min(len(y1), len(y2)) |
|
|
y1 = y1[:min_len] |
|
|
y2 = y2[:min_len] |
|
|
|
|
|
|
|
|
result_channels = [] |
|
|
|
|
|
|
|
|
if channels1 == 1 and channels2 > 1: |
|
|
y2 = y2.mean(axis=1, keepdims=True) |
|
|
channels2 = 1 |
|
|
|
|
|
for c in range(channels1): |
|
|
|
|
|
y1_ch = y1[:, c] |
|
|
|
|
|
|
|
|
if channels2 == 1: |
|
|
y2_ch = y2[:, 0] |
|
|
else: |
|
|
|
|
|
y2_ch = y2[:, min(c, channels2-1)] |
|
|
|
|
|
|
|
|
result_ch = self.process_channel(y1_ch, y2_ch, sr1, method) |
|
|
result_channels.append(result_ch) |
|
|
|
|
|
|
|
|
if len(result_channels) > 1: |
|
|
result = np.column_stack(result_channels) |
|
|
else: |
|
|
result = np.array(result_channels[0]) |
|
|
|
|
|
|
|
|
if result.ndim > 1: |
|
|
|
|
|
for c in range(result.shape[1]): |
|
|
channel = result[:, c] |
|
|
max_val = np.max(np.abs(channel)) |
|
|
if max_val > 0: |
|
|
result[:, c] = channel * 0.9 / max_val |
|
|
else: |
|
|
max_val = np.max(np.abs(result)) |
|
|
if max_val > 0: |
|
|
result = result * 0.9 / max_val |
|
|
|
|
|
folder_path = os.path.dirname(audio2_path) |
|
|
|
|
|
inverted_wav = os.path.join(folder_path, "inverted.wav") |
|
|
sf.write(inverted_wav, result, sr1) |
|
|
inverted = os.path.join(folder_path, f"inverted_ensemble.{out_format}") |
|
|
write_audio_file(inverted, result.T, sr1, out_format, "320k") |
|
|
return inverted, inverted_wav |
|
|
|
|
|
class EnsembLess: |
|
|
def __init__(self): |
|
|
self.test = "test" |
|
|
|
|
|
def get_model_types(self): |
|
|
return mvsepless.get_mt() |
|
|
|
|
|
def get_models_by_type(self, model_type): |
|
|
return mvsepless.get_mn(model_type) |
|
|
|
|
|
def get_stems_by_model(self, model_type, model_name): |
|
|
stems = mvsepless.get_stems(model_type, model_name) |
|
|
if set(stems) == {"bass", "drums", "vocals", "other"} or set(stems) == {"bass", "drums", "vocals", "other", "piano", "guitar"} and not mvsepless.get_tgt_inst(model_type, model_name): |
|
|
stems.append("instrumental +") |
|
|
stems.append("instrumental -") |
|
|
return stems |
|
|
|
|
|
def get_invert_stems_by_model(self, model_type, model_name, primary_stem): |
|
|
invert_stems = [] |
|
|
stems = mvsepless.get_stems(model_type, model_name) |
|
|
for stem in stems: |
|
|
if stem != primary_stem: |
|
|
invert_stems.append(stem) |
|
|
|
|
|
if not mvsepless.get_tgt_inst(model_type, model_name) and model_type not in ["vr", "mdx"]: |
|
|
|
|
|
invert_stems.append("inverted +") |
|
|
invert_stems.append("inverted -") |
|
|
|
|
|
return invert_stems |
|
|
|
|
|
def invert_weights(self, weights): |
|
|
total_weight = sum(weights) |
|
|
return [total_weight - w for w in weights] |
|
|
|
|
|
def analyze_sample_rate(self, files): |
|
|
""" |
|
|
Анализирует частоту дискретизации для списка аудиофайлов |
|
|
Возвращает форматированную строку с результатами |
|
|
""" |
|
|
if not files: |
|
|
return t("error_no_files") |
|
|
|
|
|
results = [] |
|
|
common_rate = None |
|
|
all_same = True |
|
|
|
|
|
for file_info in files: |
|
|
try: |
|
|
|
|
|
audio = AudioSegment.from_file(file_info.name) |
|
|
rate = audio.frame_rate |
|
|
|
|
|
|
|
|
if common_rate is None: |
|
|
common_rate = rate |
|
|
elif common_rate != rate: |
|
|
all_same = False |
|
|
|
|
|
results.append(f"{file_info.name.split('/')[-1]}: {rate} Hz") |
|
|
|
|
|
except CouldntDecodeError: |
|
|
results.append(f"{file_info.name.split('/')[-1]}: {t('error_unsupported_format')}") |
|
|
except Exception as e: |
|
|
results.append(f"{file_info.name.split('/')[-1]}: {t('error_general', error=str(e))}") |
|
|
|
|
|
|
|
|
header = t("analyze_title") + "\n" + "-" * 50 + "\n" |
|
|
body = "\n".join(results) |
|
|
footer = "\n" + "-" * 50 + "\n" |
|
|
|
|
|
if all_same and common_rate is not None: |
|
|
footer += f"\n{t('all_same_rate', rate=common_rate)}" |
|
|
elif common_rate is not None: |
|
|
footer += f"\n{t('different_rates')}" |
|
|
|
|
|
return header + body + footer |
|
|
|
|
|
def resample_audio(self, audio_path): |
|
|
if not audio_path or not os.path.isfile(audio_path): |
|
|
gr.Warning(t("error_no_audio")) |
|
|
return None |
|
|
|
|
|
original_name = os.path.splitext(os.path.basename(audio_path))[0] |
|
|
folder_path = os.path.dirname(audio_path) |
|
|
resampled_path = os.path.join(folder_path, f"resampled_{original_name}.wav") |
|
|
|
|
|
target_sr = 44100 |
|
|
|
|
|
|
|
|
y, orig_sr = librosa.load(audio_path, sr=None, mono=False) |
|
|
|
|
|
|
|
|
if y.ndim == 1: |
|
|
channels = 1 |
|
|
y = y.reshape(-1, 1) |
|
|
else: |
|
|
channels = y.shape[0] |
|
|
y = y.T |
|
|
|
|
|
|
|
|
if orig_sr != target_sr: |
|
|
resampled_channels = [] |
|
|
for channel in range(channels): |
|
|
channel_data = y[:, channel] |
|
|
resampled = librosa.resample( |
|
|
y=channel_data, |
|
|
orig_sr=orig_sr, |
|
|
target_sr=target_sr, |
|
|
res_type="kaiser_best" |
|
|
) |
|
|
resampled_channels.append(resampled) |
|
|
|
|
|
|
|
|
min_length = min(len(c) for c in resampled_channels) |
|
|
resampled_data = np.vstack([c[:min_length] for c in resampled_channels]).T |
|
|
else: |
|
|
resampled_data = y |
|
|
|
|
|
|
|
|
sf.write( |
|
|
resampled_path, |
|
|
resampled_data, |
|
|
target_sr, |
|
|
subtype="PCM_16" |
|
|
) |
|
|
|
|
|
gr.Warning(message=t("resample_warning")) |
|
|
return resampled_path |
|
|
|
|
|
def maximize_length_audio(self, output): |
|
|
padded_files = [] |
|
|
audio_data = [] |
|
|
max_length = 0 |
|
|
for file in output: |
|
|
data, sr = librosa.load(file, sr=None, mono=False) |
|
|
if data.ndim == 1: |
|
|
data = np.stack([data, data]) |
|
|
elif data.shape[0] != 2: |
|
|
data = data.T |
|
|
audio_data.append([file, data]) |
|
|
max_length = max(max_length, data.shape[1]) |
|
|
|
|
|
for i, [file, data] in enumerate(audio_data): |
|
|
if data.shape[1] < max_length: |
|
|
pad_width = ((0, 0), (0, max_length - data.shape[1])) |
|
|
padded_data = np.pad(data, pad_width, mode='constant') |
|
|
else: |
|
|
padded_data = data |
|
|
sf.write(file, padded_data.T, sr) |
|
|
padded_files.append(file) |
|
|
return padded_files |
|
|
|
|
|
def maximize_length_audio_wav(self, output): |
|
|
padded_files = [] |
|
|
audio_data = [] |
|
|
max_length = 0 |
|
|
for file in output: |
|
|
data, sr = sf.read(file) |
|
|
if data.ndim == 1: |
|
|
data = np.stack([data, data]) |
|
|
elif data.shape[0] != 2: |
|
|
data = data.T |
|
|
audio_data.append([file, data]) |
|
|
max_length = max(max_length, data.shape[1]) |
|
|
|
|
|
for i, [file, data] in enumerate(audio_data): |
|
|
if data.shape[1] < max_length: |
|
|
pad_width = ((0, 0), (0, max_length - data.shape[1])) |
|
|
padded_data = np.pad(data, pad_width, mode='constant') |
|
|
else: |
|
|
padded_data = data |
|
|
sf.write(file, padded_data.T, sr) |
|
|
padded_files.append(file) |
|
|
return padded_files |
|
|
|
|
|
def manual_ensemble(self, input_audios, method, weights, out_format): |
|
|
temp_dir = tempfile.mkdtemp() |
|
|
weights = [float(x) for x in weights.split(",")] |
|
|
|
|
|
a1, a2 = ensemble_audio_files(input_audios, output=os.path.join(temp_dir, f"ensemble_{method}"), ensemble_type=method, weights=weights, out_format=out_format) |
|
|
return a1, a2 |
|
|
|
|
|
def auto_ensemble(self, input_audio, input_settings, type, out_format, invert_weights, invert_ensemble): |
|
|
|
|
|
progress = gr.Progress() |
|
|
progress(0, desc=f"{t('process1')}...") |
|
|
|
|
|
base_name = os.path.splitext(os.path.basename(input_audio))[0] |
|
|
temp_dir = tempfile.mkdtemp() |
|
|
source_files = [] |
|
|
output_p_files = [] |
|
|
output_s_files = [] |
|
|
output_p_weights = [] |
|
|
|
|
|
block_count = len(input_settings) |
|
|
|
|
|
for i, (input_model, weight, p_stem, s_stem) in enumerate(input_settings): |
|
|
output_s_files.append(None) |
|
|
progress(i / block_count, desc=f"{t('process2')} {i+1}/{block_count}") |
|
|
model_type, model_name = input_model.split(" / ") |
|
|
output_dir_p = os.path.join(temp_dir, f"{model_type}_{model_name}_p_stems") |
|
|
output_p = mvsepless.separator(input_file=input_audio, output_dir=output_dir_p, model_type=model_type, model_name=model_name, ext_inst=True, vr_aggr=10, output_format="wav", template="MODEL_STEM", call_method="cli") |
|
|
for stem, file in output_p: |
|
|
source_files.append(file) |
|
|
if stem == p_stem: |
|
|
output_p_files.append(file) |
|
|
output_p_weights.append(weight) |
|
|
elif invert_ensemble: |
|
|
if stem == s_stem: |
|
|
output_s_files[i] = file |
|
|
|
|
|
if invert_ensemble: |
|
|
if not output_s_files[i]: |
|
|
|
|
|
output_dir_s = os.path.join(temp_dir, f"{model_type}_{model_name}_s_stems") |
|
|
output_s = mvsepless.separator(input_file=input_audio, output_dir=output_dir_s, model_type=model_type, model_name=model_name, ext_inst=True, vr_aggr=10, output_format="wav", template="MODEL_STEM", call_method="cli", selected_stems=[p_stem if not mvsepless.get_tgt_inst(model_type, model_name) else "both"]) |
|
|
for stem, file in output_s: |
|
|
source_files.append(file) |
|
|
if stem == s_stem: |
|
|
output_s_files[i] = file |
|
|
source_files.append(file) |
|
|
|
|
|
progress(0.9, desc=f"{t('process3')}...") |
|
|
|
|
|
if invert_ensemble: |
|
|
|
|
|
pass |
|
|
progress(0.95, desc=f"{t('process4')}...") |
|
|
if invert_ensemble: |
|
|
if invert_weights: |
|
|
output_s_weights = self.invert_weights(output_p_weights) |
|
|
else: |
|
|
output_s_weights = output_p_weights |
|
|
output_s, output_wav_s = ensemble_audio_files(files=output_s_files, output=os.path.join(temp_dir, f"ensemble_invert_{base_name}_{type}"), ensemble_type=INVERT_METHODS[type], weights=output_s_weights, out_format=out_format) |
|
|
else: |
|
|
output_s, output_wav_s = None, None |
|
|
|
|
|
output_p, output_wav_p = ensemble_audio_files(files=output_p_files, output=os.path.join(temp_dir, f"ensemble_{base_name}_{type}"), ensemble_type=type, weights=output_p_weights, out_format=out_format) |
|
|
|
|
|
return output_p, output_wav_p, output_s, output_wav_s, source_files |
|
|
|
|
|
class EnsembleManager: |
|
|
def __init__(self): |
|
|
self.models = [] |
|
|
self.presets_dir = os.path.join(os.getcwd(), "presets") |
|
|
os.makedirs(self.presets_dir, exist_ok=True) |
|
|
|
|
|
def export_preset(self, name): |
|
|
if not name: |
|
|
name = "ensembless_preset" |
|
|
filepath = os.path.join(self.presets_dir, f"{name}.json") |
|
|
with open(filepath, 'w') as f: |
|
|
json.dump(self.models, f) |
|
|
return filepath |
|
|
|
|
|
def import_preset(self, filepath): |
|
|
with open(filepath, 'r') as f: |
|
|
self.models = json.load(f) |
|
|
return self.get_df() |
|
|
|
|
|
def add_model(self, model_type, model_name, p_stem, s_stem, weight): |
|
|
model_info = { |
|
|
'type': model_type, |
|
|
'name': model_name, |
|
|
'p_stem': p_stem, |
|
|
's_stem': s_stem, |
|
|
'weight': float(weight) |
|
|
} |
|
|
self.models.append(model_info) |
|
|
return self.get_df() |
|
|
|
|
|
def remove_model(self, index): |
|
|
if 0 <= index < len(self.models): |
|
|
del self.models[index] |
|
|
return self.get_df() |
|
|
|
|
|
def clear_models(self): |
|
|
self.models = [] |
|
|
return self.get_df() |
|
|
|
|
|
def get_df(self): |
|
|
if not self.models: |
|
|
columns = ["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")] |
|
|
return pd.DataFrame(columns=columns) |
|
|
|
|
|
data = [] |
|
|
for i, model in enumerate(self.models): |
|
|
data.append([ |
|
|
f"{i+1}", |
|
|
model['type'], |
|
|
model['name'], |
|
|
model['p_stem'], |
|
|
model['s_stem'], |
|
|
model['weight'] |
|
|
]) |
|
|
columns = ["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")] |
|
|
return pd.DataFrame(data, columns=columns) |
|
|
|
|
|
def get_settings(self): |
|
|
return [(f"{m['type']} / {m['name']}", m['weight'], m['p_stem'], m['s_stem']) for m in self.models] |
|
|
|
|
|
inverter = Inverter() |
|
|
manager = EnsembleManager() |
|
|
ensembless = EnsembLess() |
|
|
|
|
|
class EnsembLess_ui_updates: |
|
|
|
|
|
def update_model_dropdown(self, model_type): |
|
|
models = ensembless.get_models_by_type(model_type) |
|
|
return gr.Dropdown(choices=models, value=models[0] if models else None) |
|
|
|
|
|
def update_stem_dropdown(self, model_type, model_name): |
|
|
stems = ensembless.get_stems_by_model(model_type, model_name) |
|
|
return gr.Dropdown(choices=stems, value=stems[0] if stems else None) |
|
|
|
|
|
def update_invert_stem_dropdown(self, model_type, model_name, primary_stem): |
|
|
stems = ensembless.get_invert_stems_by_model(model_type, model_name, primary_stem) |
|
|
return gr.Dropdown(choices=stems, value=stems[0] if stems else None) |
|
|
|
|
|
def add_model(self, model_type, model_name, p_stem, s_stem, weight): |
|
|
return manager.add_model(model_type, model_name, p_stem, s_stem, weight) |
|
|
|
|
|
def remove_model(self, index): |
|
|
if index >= 0: |
|
|
return manager.remove_model(index-1) |
|
|
return manager.get_df() |
|
|
|
|
|
def clear_all_models(self): |
|
|
return manager.clear_models() |
|
|
|
|
|
def run_ensemble(self, input_audio, ensemble_type, output_format, invert_weights, invert_ensemble): |
|
|
if not manager.models: |
|
|
raise gr.Error(t("error_no_models")) |
|
|
|
|
|
if not input_audio: |
|
|
raise gr.Error(t("error_no_audio")) |
|
|
|
|
|
input_settings = manager.get_settings() |
|
|
|
|
|
o, o_wav, i, i_wav, result_source = ensembless.auto_ensemble( |
|
|
input_audio=input_audio, |
|
|
input_settings=input_settings, |
|
|
type=ensemble_type, |
|
|
out_format=output_format, |
|
|
invert_weights=invert_weights, |
|
|
invert_ensemble=invert_ensemble, |
|
|
) |
|
|
return o, o_wav, i, i_wav, result_source |
|
|
|
|
|
ensembless_ui = EnsembLess_ui_updates() |
|
|
|
|
|
def ensembless_plugin_name(): |
|
|
return "EnsembLess" |
|
|
|
|
|
|
|
|
def ensembless_plugin(lang): |
|
|
set_language(lang) |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab(t("auto_ensemble")): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.Markdown(f"### {t('model_selection')}") |
|
|
model_type = gr.Dropdown( |
|
|
choices=ensembless.get_model_types(), |
|
|
label=t("model_type"), |
|
|
value=ensembless.get_model_types()[0] if ensembless.get_model_types() else None, |
|
|
filterable=False |
|
|
) |
|
|
model_name = gr.Dropdown( |
|
|
choices=ensembless.get_models_by_type(ensembless.get_model_types()[0]), |
|
|
label=t("model_name"), |
|
|
interactive=True, |
|
|
value=ensembless.get_models_by_type(ensembless.get_model_types()[0])[0], |
|
|
filterable=False |
|
|
) |
|
|
stem = gr.Dropdown( |
|
|
choices=ensembless.get_stems_by_model(ensembless.get_model_types()[0], ensembless.get_models_by_type(ensembless.get_model_types()[0])[0]), |
|
|
label=t("p_stem"), |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
invert_stem = gr.Dropdown( |
|
|
choices=ensembless.get_invert_stems_by_model(ensembless.get_model_types()[0], ensembless.get_models_by_type(ensembless.get_model_types()[0])[0], "vocals"), |
|
|
label=t("s_stem"), |
|
|
interactive=True, |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
weight = gr.Slider( |
|
|
label=t("weight"), |
|
|
value=1.0, |
|
|
minimum=0.1, |
|
|
maximum=10.0, |
|
|
step=0.1 |
|
|
) |
|
|
add_btn = gr.Button(t("add_button"), variant="primary") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
|
|
gr.Markdown(f"### {t('current_ensemble')}") |
|
|
ensemble_df = gr.Dataframe( |
|
|
value=manager.get_df(), |
|
|
headers=["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")], |
|
|
datatype=["str", "str", "str", "str", "str", "number"], |
|
|
interactive=False |
|
|
) |
|
|
with gr.Row(equal_height=True): |
|
|
export_preset_name = gr.Textbox(label=t("give_name_preset"), interactive=True, value="ensembless_preset") |
|
|
with gr.Column(): |
|
|
export_btn = gr.DownloadButton(t("export"), variant="secondary") |
|
|
import_btn = gr.UploadButton(t("import"), file_types=[".json"], file_count="single") |
|
|
with gr.Row(equal_height=True): |
|
|
remove_idx = gr.Number( |
|
|
label=t("remove_index"), |
|
|
precision=0, |
|
|
minimum=1, |
|
|
interactive=True |
|
|
) |
|
|
with gr.Column(): |
|
|
remove_btn = gr.Button(t("remove_button"), variant="stop") |
|
|
clear_btn = gr.Button(t("clear_button"), variant="stop") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown(f"### {t('input_audio')}") |
|
|
input_audio = gr.Audio(type="filepath", show_label=False) |
|
|
input_audio_resampled = gr.Text(visible=False) |
|
|
|
|
|
gr.Markdown(f"### {t('settings')}") |
|
|
ensemble_type = gr.Dropdown( |
|
|
choices=['avg_wave', 'median_wave', 'min_wave', 'max_wave', |
|
|
'avg_fft', 'median_fft', 'min_fft', 'max_fft'], |
|
|
value='avg_fft', |
|
|
label=t("method"), |
|
|
filterable=False |
|
|
) |
|
|
invert_ensem = gr.Checkbox(label=t("invert_ensemble")) |
|
|
invert_weights = gr.Checkbox(label=t("invert_weights")) |
|
|
output_format = gr.Dropdown( |
|
|
choices=["wav", "mp3", "flac", "m4a", "aac", "ogg", "opus", "aiff"], |
|
|
value="mp3", |
|
|
label=t("output_format"), |
|
|
filterable=False |
|
|
) |
|
|
run_btn = gr.Button(t("run_button"), variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
with gr.Tab(t('results')): |
|
|
|
|
|
with gr.Column(): |
|
|
output_audio = gr.Audio(label=t("results"), type="filepath", interactive=False, show_download_button=True) |
|
|
output_wav = gr.Text(label="Результат в WAV", interactive=False, visible=False) |
|
|
|
|
|
gr.Markdown(f"###### {t('inverted_result')}") |
|
|
|
|
|
invert_method = gr.Radio( |
|
|
choices=["waveform", "spectrogram"], |
|
|
label=t("invert_method"), |
|
|
value="waveform" |
|
|
) |
|
|
invert_btn = gr.Button(t("invert_button")) |
|
|
inverted_output_audio = gr.Audio(label=t("inverted_result"), type="filepath", interactive=False, show_download_button=True) |
|
|
inverted_wav = gr.Text(label="Инвертированный результат в WAV", interactive=False, visible=False) |
|
|
|
|
|
with gr.Tab(t('result_source')): |
|
|
result_source = gr.Files(interactive=False, label=t('result_source')) |
|
|
|
|
|
stem.change(ensembless_ui.update_invert_stem_dropdown, inputs=[model_type, model_name, stem], outputs=invert_stem) |
|
|
|
|
|
model_type.change( |
|
|
ensembless_ui.update_model_dropdown, |
|
|
inputs=model_type, |
|
|
outputs=model_name |
|
|
) |
|
|
model_name.change( |
|
|
ensembless_ui.update_stem_dropdown, |
|
|
inputs=[model_type, model_name], |
|
|
outputs=stem |
|
|
) |
|
|
|
|
|
ensemble_df.change( |
|
|
manager.export_preset, |
|
|
inputs=export_preset_name, |
|
|
outputs=export_btn |
|
|
) |
|
|
|
|
|
export_preset_name.change( |
|
|
manager.export_preset, |
|
|
inputs=export_preset_name, |
|
|
outputs=export_btn |
|
|
) |
|
|
|
|
|
import_btn.upload( |
|
|
manager.import_preset, |
|
|
inputs=import_btn, |
|
|
outputs=ensemble_df |
|
|
) |
|
|
|
|
|
invert_btn.click( |
|
|
inverter.process_audio, |
|
|
inputs=[input_audio_resampled, output_wav, output_format, invert_method], |
|
|
outputs=[inverted_output_audio, inverted_wav] |
|
|
) |
|
|
|
|
|
input_audio.upload( |
|
|
ensembless.resample_audio, |
|
|
inputs=input_audio, |
|
|
outputs=input_audio_resampled |
|
|
) |
|
|
|
|
|
add_btn.click( |
|
|
ensembless_ui.add_model, |
|
|
inputs=[model_type, model_name, stem, invert_stem, weight], |
|
|
outputs=ensemble_df |
|
|
) |
|
|
|
|
|
remove_btn.click( |
|
|
ensembless_ui.remove_model, |
|
|
inputs=remove_idx, |
|
|
outputs=ensemble_df |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
ensembless_ui.clear_all_models, |
|
|
outputs=ensemble_df |
|
|
) |
|
|
|
|
|
run_btn.click( |
|
|
ensembless_ui.run_ensemble, |
|
|
inputs=[input_audio_resampled, ensemble_type, output_format, invert_weights, invert_ensem], |
|
|
outputs=[output_audio, output_wav, inverted_output_audio, inverted_wav, result_source] |
|
|
) |
|
|
|
|
|
with gr.Tab(t("manual_ensemble")): |
|
|
with gr.Row(equal_height=True): |
|
|
input_files = gr.Files(show_label=False, type="filepath", file_types=[".wav", ".mp3", ".flac", ".m4a", ".aac", ".ogg", ".opus", ".aiff"]) |
|
|
with gr.Column(): |
|
|
info_audios = gr.Textbox(label="", interactive=False) |
|
|
man_method = gr.Dropdown( |
|
|
choices=['avg_wave', 'median_wave', 'min_wave', 'max_wave', |
|
|
'avg_fft', 'median_fft', 'min_fft', 'max_fft'], |
|
|
value='avg_fft', |
|
|
label=t("method"), |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
weights_input = gr.Textbox(label=t("weights_input"), value="1.0,1.0") |
|
|
|
|
|
output_man_format = gr.Dropdown( |
|
|
choices=["wav", "mp3", "flac", "m4a", "aac", "ogg", "opus", "aiff"], |
|
|
value="mp3", |
|
|
label=t("output_format"), |
|
|
filterable=False |
|
|
) |
|
|
|
|
|
run_man_btn = gr.Button(t("run_button"), variant="primary") |
|
|
|
|
|
output_man_audio = gr.Audio(label=t("results"), type="filepath", interactive=False, show_download_button=True) |
|
|
output_man_wav = gr.Text(label="Результат в WAV", interactive=False, visible=False) |
|
|
|
|
|
input_files.upload( |
|
|
fn=ensembless.analyze_sample_rate, |
|
|
inputs=input_files, |
|
|
outputs=info_audios |
|
|
) |
|
|
|
|
|
run_man_btn.click( |
|
|
ensembless.manual_ensemble, |
|
|
inputs=[input_files, man_method, weights_input, output_man_format], |
|
|
outputs=[output_man_audio, output_man_wav] |
|
|
) |