mvsepless_colab / mvsepless /plugins /remove_center.py
noblebarkrr's picture
Убраны комментарии и отформатирован код
6cc8dc1 verified
import gradio as gr
import os, sys, subprocess
import tempfile
from scipy import signal
import numpy as np
from datetime import datetime
if not __package__:
from audio import Audio
else:
from ..audio import Audio
class Plugin(Audio):
def __init__(self):
super().__init__()
self.name = "Вычитание фантомного центра"
self.requirements = []
self.install_requirements(self.requirements)
self.w_types = [
"boxcar",
"triang",
"blackman",
"hamming",
"hann",
"bartlett",
"flattop",
"parzen",
"bohman",
"blackmanharris",
"nuttall",
"barthann",
"cosine",
"exponential",
"tukey",
"taylor",
"lanczos",
]
def install_requirements(self, requirements: list):
if requirements:
cmd = [os.sys.executable, "-m", "pip", "install"]
for pkg in requirements:
cmd.append(pkg)
result = subprocess.run(cmd, text=True, capture_output=True)
def remove_center(
self,
input_file,
output_format="flac",
out_center="center.flac",
out_stereo_base="stereo_base.flac",
rdf=0.99999,
window_size=4096,
overlap=2,
window_type="blackman",
stereo_mode="stereo",
):
output_file = out_stereo_base
output_center_file = out_center
data, samplerate, _ = self.read(i=input_file, mono=False, sr=None)
if data.ndim != 2 or data.shape[0] != 2:
raise ValueError("Требуется стереофайл (2 канала)")
left = data[0, :]
right = data[1, :]
mono = left * 0.5 + right * 0.5
nperseg = window_size
noverlap = nperseg // overlap
f, t, Z_left = signal.stft(
left, fs=samplerate, nperseg=nperseg, noverlap=noverlap, window=window_type
)
f, t, Z_right = signal.stft(
right, fs=samplerate, nperseg=nperseg, noverlap=noverlap, window=window_type
)
f, t, Z_mono = signal.stft(
mono, fs=samplerate, nperseg=nperseg, noverlap=noverlap, window=window_type
)
if stereo_mode == "mono":
Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(
1j * np.angle(Z_mono)
)
Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(
1j * np.angle(Z_mono)
)
else:
Z_common_left = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(
1j * np.angle(Z_right)
)
Z_common_right = np.minimum(np.abs(Z_left), np.abs(Z_right)) * np.exp(
1j * np.angle(Z_left)
)
reduction_factor = rdf
Z_new_left = Z_left - Z_common_left * reduction_factor
Z_new_right = Z_right - Z_common_right * reduction_factor
_, new_left = signal.istft(
Z_new_left,
fs=samplerate,
nperseg=nperseg,
noverlap=noverlap,
window=window_type,
)
_, new_right = signal.istft(
Z_new_right,
fs=samplerate,
nperseg=nperseg,
noverlap=noverlap,
window=window_type,
)
_, common_signal_left = signal.istft(
Z_common_left,
fs=samplerate,
nperseg=nperseg,
noverlap=noverlap,
window=window_type,
)
_, common_signal_right = signal.istft(
Z_common_right,
fs=samplerate,
nperseg=nperseg,
noverlap=noverlap,
window=window_type,
)
new_left = new_left[: len(left)]
new_right = new_right[: len(right)]
common_signal_left = common_signal_left[: len(left)]
common_signal_right = common_signal_right[: len(right)]
peak = np.max([np.abs(new_left).max(), np.abs(new_right).max()])
if peak > 1.0:
new_left = new_left / peak
new_right = new_right / peak
output_file = self.write(
o=output_file,
array=np.column_stack((new_left, new_right)),
sr=samplerate,
of=output_format,
br="320k",
)
inverted_center_left = -common_signal_left
inverted_center_right = -common_signal_right
mixed_left = left + inverted_center_left
mixed_right = right + inverted_center_right
peak_mixed = np.max([np.abs(mixed_left).max(), np.abs(mixed_right).max()])
if peak_mixed > 1.0:
mixed_left = mixed_left / peak_mixed
mixed_right = mixed_right / peak_mixed
output_center_file = self.write(
o=output_center_file,
array=np.column_stack((common_signal_left, common_signal_right)),
sr=samplerate,
of=output_format,
br="320k",
)
return (output_file, output_center_file)
def UI(self):
with gr.Row():
rmv_center_ui_input_audio = gr.File(
label="Входное аудио",
interactive=True,
type="filepath",
file_count="single",
file_types=[f".{of}" for of in self.input_formats],
)
with gr.Group():
with gr.Row():
rmv_center_ui_reduction_f = gr.Slider(
0.1,
10,
value=1,
step=0.1,
label="Фактор подавления",
interactive=True,
visible=False,
)
rmv_center_ui_overlap = gr.Slider(
2,
30,
value=2,
step=1,
label="Перекрытие",
interactive=True,
visible=True,
)
rmv_center_ui_window_size = gr.Number(
label="Размер окна",
interactive=True,
visible=True,
minimum=32,
maximum=882000,
precision=1,
value=2048,
)
with gr.Row():
rmv_center_ui_format = gr.Dropdown(
self.output_formats,
value=self.output_formats[0],
filterable=False,
label="Формат выходного файла",
interactive=True,
)
rmv_center_ui_window_types = gr.Dropdown(
self.w_types,
value=self.w_types[4],
filterable=False,
label="Тип окна",
interactive=True,
)
rmv_center_ui_mono_mode = gr.Dropdown(
["mono", "stereo"],
value="mono",
filterable=False,
label="Стерео-режим",
interactive=True,
)
rmv_center_ui_extract_btn = gr.Button("Разделить")
with gr.Group():
with gr.Column():
with gr.Row():
rmv_center_ui_mid = gr.Audio(
type="filepath",
interactive=False,
label="Фантомный центр",
visible=True,
show_download_button=True,
)
rmv_center_ui_side = gr.Audio(
type="filepath",
interactive=False,
label="Стерео-база",
visible=True,
show_download_button=True,
)
@rmv_center_ui_extract_btn.click(
inputs=[
rmv_center_ui_input_audio,
rmv_center_ui_format,
rmv_center_ui_reduction_f,
rmv_center_ui_window_size,
rmv_center_ui_overlap,
rmv_center_ui_window_types,
rmv_center_ui_mono_mode,
],
outputs=[rmv_center_ui_side, rmv_center_ui_mid],
)
def wrap_remove_center(input_audio, output_format, rf, ws, ovlp, wt, mono_mode):
if input_audio:
temp_dir = tempfile.mkdtemp(prefix="remove_center_")
basename = self.short(
os.path.splitext(os.path.basename(input_audio))[0], length=80
)
side, mid = self.remove_center(
input_file=input_audio,
output_format=output_format,
out_center=os.path.join(
temp_dir, f"{basename}_center.{output_format}"
),
out_stereo_base=os.path.join(
temp_dir, f"{basename}_stereo_base.{output_format}"
),
overlap=ovlp,
rdf=rf,
stereo_mode=mono_mode,
window_size=ws,
window_type=wt,
)
return side, mid