Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files- mvsepless/additional_app.py +0 -0
- mvsepless/app.py +58 -47
- mvsepless/audio.py +1502 -1502
- mvsepless/custom_models.json +28 -0
- mvsepless/i18n.py +0 -0
- mvsepless/infer_utils.py +824 -824
- mvsepless/install.py +355 -355
- mvsepless/namer.py +164 -164
- mvsepless/separator.py +0 -0
- mvsepless/vbachgen.py +0 -0
mvsepless/additional_app.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mvsepless/app.py
CHANGED
|
@@ -344,7 +344,8 @@ class SeparatorGradio(GradioHelper, DownloadModelManager):
|
|
| 344 |
theme: gr.Theme,
|
| 345 |
add_app: bool = True,
|
| 346 |
plugins: bool = True,
|
| 347 |
-
add_vbach: bool = False
|
|
|
|
| 348 |
) -> gr.Blocks:
|
| 349 |
"""
|
| 350 |
Создать пользовательский интерфейс
|
|
@@ -866,56 +867,66 @@ class SeparatorGradio(GradioHelper, DownloadModelManager):
|
|
| 866 |
return gr.update(value="")
|
| 867 |
|
| 868 |
# Вкладка менеджера моделей
|
| 869 |
-
|
| 870 |
-
with gr.Tab(_i18n("
|
| 871 |
-
with gr.
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
gr.
|
| 912 |
-
|
| 913 |
-
|
|
|
|
| 914 |
|
| 915 |
# Импорт дополнительных модулей
|
| 916 |
-
from additional_app import AutoEnsembless, ManualEnsembless, PluginManager, Inverter_UI, AudioApp
|
| 917 |
|
| 918 |
if add_app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 919 |
with gr.Tab(_i18n("tab_audio_processing")):
|
| 920 |
_audio_app = AudioApp(user_directory)
|
| 921 |
_audio_app.UI()
|
|
|
|
| 344 |
theme: gr.Theme,
|
| 345 |
add_app: bool = True,
|
| 346 |
plugins: bool = True,
|
| 347 |
+
add_vbach: bool = False,
|
| 348 |
+
model_manager_add: bool = True
|
| 349 |
) -> gr.Blocks:
|
| 350 |
"""
|
| 351 |
Создать пользовательский интерфейс
|
|
|
|
| 867 |
return gr.update(value="")
|
| 868 |
|
| 869 |
# Вкладка менеджера моделей
|
| 870 |
+
if model_manager_add:
|
| 871 |
+
with gr.Tab(_i18n("tab_model_manager")):
|
| 872 |
+
with gr.Tab(_i18n("tab_download_model")):
|
| 873 |
+
with gr.Group():
|
| 874 |
+
select_dwm_preset = gr.Dropdown(
|
| 875 |
+
label=_i18n("select_preset"),
|
| 876 |
+
interactive=True,
|
| 877 |
+
choices=list(self.dwm_presets.keys()),
|
| 878 |
+
value=None,
|
| 879 |
+
)
|
| 880 |
+
select_dwm_names = gr.Dropdown(
|
| 881 |
+
label=_i18n("select_models"),
|
| 882 |
+
interactive=True,
|
| 883 |
+
choices=default_model,
|
| 884 |
+
value=[],
|
| 885 |
+
multiselect=True
|
| 886 |
+
)
|
| 887 |
+
dwm_status = gr.Textbox(
|
| 888 |
+
container=False,
|
| 889 |
+
lines=3,
|
| 890 |
+
interactive=False,
|
| 891 |
+
max_lines=3,
|
| 892 |
+
visible=False
|
| 893 |
+
)
|
| 894 |
+
download_dwm_button = gr.Button(_i18n("download_btn"))
|
| 895 |
+
|
| 896 |
+
select_dwm_preset.change(
|
| 897 |
+
lambda x: gr.update(value=self.parse_models_from_dwm_preset(x)),
|
| 898 |
+
inputs=select_dwm_preset,
|
| 899 |
+
outputs=select_dwm_names,
|
| 900 |
+
trigger_mode="once"
|
| 901 |
+
)
|
| 902 |
+
|
| 903 |
+
download_dwm_button.click(
|
| 904 |
+
lambda: gr.update(visible=True),
|
| 905 |
+
outputs=dwm_status
|
| 906 |
+
).then(
|
| 907 |
+
lambda x: (self.batch_download(x), gr.update(visible=False)),
|
| 908 |
+
inputs=select_dwm_names,
|
| 909 |
+
outputs=[gr.State(None), dwm_status]
|
| 910 |
+
)
|
| 911 |
+
|
| 912 |
+
with gr.Tab(_i18n("tab_delete_models")):
|
| 913 |
+
gr.Markdown(f"<h3><center>{_i18n('delete_all_warning')}</center></h3>")
|
| 914 |
+
delete_models_cache_btn = gr.Button(_i18n("delete_all_btn"), variant="stop")
|
| 915 |
+
delete_models_cache_btn.click(self.delete_models_cache, inputs=None, outputs=None)
|
| 916 |
|
| 917 |
# Импорт дополнительных модулей
|
| 918 |
+
from additional_app import AutoEnsembless, ManualEnsembless, PluginManager, Inverter_UI, AudioApp, CustomSeparator
|
| 919 |
|
| 920 |
if add_app:
|
| 921 |
+
with gr.Tab(_i18n("tab_custom_separation")):
|
| 922 |
+
_custom_sep = CustomSeparator(
|
| 923 |
+
self.input_files,
|
| 924 |
+
self.upload_files,
|
| 925 |
+
user_directory,
|
| 926 |
+
device=self.device,
|
| 927 |
+
history=self.history
|
| 928 |
+
)
|
| 929 |
+
_custom_sep.UI()
|
| 930 |
with gr.Tab(_i18n("tab_audio_processing")):
|
| 931 |
_audio_app = AudioApp(user_directory)
|
| 932 |
_audio_app.UI()
|
mvsepless/audio.py
CHANGED
|
@@ -1,1503 +1,1503 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import subprocess
|
| 3 |
-
import numpy as np
|
| 4 |
-
from gradio_helper import str2bool
|
| 5 |
-
from scipy.signal import ShortTimeFFT, resample
|
| 6 |
-
from scipy.signal.windows import dpss, hann
|
| 7 |
-
from numpy.typing import DTypeLike
|
| 8 |
-
from typing import List, Tuple, Optional, Union, Dict, Any, Callable
|
| 9 |
-
from i18n import _i18n
|
| 10 |
-
|
| 11 |
-
ffmpeg_path = "ffmpeg"
|
| 12 |
-
ffprobe_path = "ffprobe"
|
| 13 |
-
n_fft = 4096
|
| 14 |
-
hop = 1024
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
def average(*ints: Union[int, float]) -> float:
|
| 18 |
-
"""
|
| 19 |
-
Вычислить среднее арифметическое
|
| 20 |
-
|
| 21 |
-
Args:
|
| 22 |
-
*ints: Числа для усреднения
|
| 23 |
-
|
| 24 |
-
Returns:
|
| 25 |
-
Среднее значение
|
| 26 |
-
"""
|
| 27 |
-
numbers = len(ints)
|
| 28 |
-
return sum(ints) / numbers
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def check_installed() -> None:
|
| 32 |
-
"""Проверить наличие ffmpeg и ffprobe"""
|
| 33 |
-
try:
|
| 34 |
-
ffmpeg_version_output = subprocess.check_output(
|
| 35 |
-
[ffmpeg_path, "-version"], text=True
|
| 36 |
-
)
|
| 37 |
-
print(_i18n("ffmpeg_found"))
|
| 38 |
-
except:
|
| 39 |
-
print(_i18n("ffmpeg_not_found"))
|
| 40 |
-
|
| 41 |
-
try:
|
| 42 |
-
ffprobe_version_output = subprocess.check_output(
|
| 43 |
-
[ffprobe_path, "-version"], text=True
|
| 44 |
-
)
|
| 45 |
-
print(_i18n("ffprobe_found"))
|
| 46 |
-
except:
|
| 47 |
-
print(_i18n("ffprobe_not_found"))
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
def get_ogg_bitrate(sample_rate: int, channels: int = 2) -> int:
|
| 51 |
-
"""
|
| 52 |
-
Определяет рекомендуемый битрейт для OGG на основе частоты дискретизации
|
| 53 |
-
|
| 54 |
-
Args:
|
| 55 |
-
sample_rate: Частота дискретизации
|
| 56 |
-
channels: Количество каналов
|
| 57 |
-
|
| 58 |
-
Returns:
|
| 59 |
-
Рекомендуемый битрейт
|
| 60 |
-
"""
|
| 61 |
-
if sample_rate >= 40000:
|
| 62 |
-
per_channel = 240
|
| 63 |
-
elif sample_rate >= 26000:
|
| 64 |
-
per_channel = 190
|
| 65 |
-
elif sample_rate >= 15000:
|
| 66 |
-
per_channel = 90
|
| 67 |
-
elif sample_rate >= 9000:
|
| 68 |
-
per_channel = 50
|
| 69 |
-
elif sample_rate >= 8000:
|
| 70 |
-
per_channel = 42
|
| 71 |
-
else:
|
| 72 |
-
per_channel = 30
|
| 73 |
-
|
| 74 |
-
return int(per_channel * channels)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
SAMPLE_FORMATS_DICT: Dict[Union[str, type], str] = {
|
| 78 |
-
"int16": "s16le",
|
| 79 |
-
"int32": "s32le",
|
| 80 |
-
"float32": "f32le",
|
| 81 |
-
"float64": "f64le",
|
| 82 |
-
np.int16: "s16le",
|
| 83 |
-
np.int32: "s32le",
|
| 84 |
-
np.float32: "f32le",
|
| 85 |
-
np.float64: "f64le",
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
audio_formats: List[str] = [
|
| 89 |
-
'aac', 'ac3', 'ac4', 'adts', 'aiff', 'au', 'caf', 'dts', 'eac3',
|
| 90 |
-
'flac', 'm4a', 'mp3', 'mp2', 'ogg', 'oga', 'opus', 'ra', 'raw',
|
| 91 |
-
'snd', 'voc', 'wav', 'wma', 'wv'
|
| 92 |
-
]
|
| 93 |
-
|
| 94 |
-
video_formats_with_audio: List[str] = [
|
| 95 |
-
'3gp', '3g2', 'asf', 'avi', 'flv', 'f4v', 'm4v', 'mkv', 'mov',
|
| 96 |
-
'mp4', 'mpeg', 'mpg', 'mts', 'mxf', 'ogv', 'rm', 'rmvb', 'ts',
|
| 97 |
-
'vob', 'webm', 'wmv'
|
| 98 |
-
]
|
| 99 |
-
|
| 100 |
-
input_formats: List[str] = video_formats_with_audio + audio_formats
|
| 101 |
-
|
| 102 |
-
output_formats: List[str] = [
|
| 103 |
-
"mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff", "wma"
|
| 104 |
-
]
|
| 105 |
-
|
| 106 |
-
input_extensions: List[str] = [f".{of}" for of in input_formats]
|
| 107 |
-
|
| 108 |
-
output_extensions: List[str] = [f".{of}" for of in output_formats]
|
| 109 |
-
|
| 110 |
-
codec_args: Dict[str, Dict[bool, List[str]]] = {
|
| 111 |
-
".mp3": {
|
| 112 |
-
True: ["-c:a", "libmp3lame", "-sample_fmt", "fltp"],
|
| 113 |
-
False: ["-c:a", "libmp3lame", "-sample_fmt", "s16p"]
|
| 114 |
-
},
|
| 115 |
-
".wav": {
|
| 116 |
-
True: ["-c:a", "pcm_f32le", "-sample_fmt", "flt"],
|
| 117 |
-
False: ["-c:a", "pcm_s16le", "-sample_fmt", "s16"]
|
| 118 |
-
},
|
| 119 |
-
".flac": {
|
| 120 |
-
True: ["-c:a", "flac", "-sample_fmt", "s32"],
|
| 121 |
-
False: ["-c:a", "flac", "-sample_fmt", "s16"]
|
| 122 |
-
},
|
| 123 |
-
".ogg": {
|
| 124 |
-
True: ["-c:a", "libvorbis", "-sample_fmt", "fltp"],
|
| 125 |
-
False: ["-c:a", "libvorbis", "-sample_fmt", "fltp"]
|
| 126 |
-
},
|
| 127 |
-
".opus": {
|
| 128 |
-
True: ["-c:a", "libopus", "-sample_fmt", "flt"],
|
| 129 |
-
False: ["-c:a", "libopus", "-sample_fmt", "s16"]
|
| 130 |
-
},
|
| 131 |
-
".m4a": {
|
| 132 |
-
True: ["-c:a", "aac", "-sample_fmt", "fltp"],
|
| 133 |
-
False: ["-c:a", "aac", "-sample_fmt", "fltp"]
|
| 134 |
-
},
|
| 135 |
-
".aac": {
|
| 136 |
-
True: ["-c:a", "aac", "-sample_fmt", "fltp"],
|
| 137 |
-
False: ["-c:a", "aac", "-sample_fmt", "fltp"]
|
| 138 |
-
},
|
| 139 |
-
".ac3": {
|
| 140 |
-
True: ["-c:a", "ac3", "-sample_fmt", "fltp"],
|
| 141 |
-
False: ["-c:a", "ac3", "-sample_fmt", "fltp"]
|
| 142 |
-
},
|
| 143 |
-
".aiff": {
|
| 144 |
-
True: ["-c:a", "pcm_f32be", "-sample_fmt", "flt"],
|
| 145 |
-
False: ["-c:a", "pcm_s16be", "-sample_fmt", "s16"]
|
| 146 |
-
},
|
| 147 |
-
".wma": {
|
| 148 |
-
True: ["-c:a", "wmav2", "-sample_fmt", "fltp"],
|
| 149 |
-
False: ["-c:a", "wmav2", "-sample_fmt", "fltp"]
|
| 150 |
-
}
|
| 151 |
-
}
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
def get_codec_args(extension: str, prefer_float: bool) -> List[str]:
|
| 155 |
-
"""
|
| 156 |
-
Получить аргументы кодека для FFmpeg
|
| 157 |
-
|
| 158 |
-
Args:
|
| 159 |
-
extension: Расширение файла
|
| 160 |
-
prefer_float: Предпочитать float формат
|
| 161 |
-
|
| 162 |
-
Returns:
|
| 163 |
-
Список аргументов FFmpeg
|
| 164 |
-
"""
|
| 165 |
-
if extension not in codec_args:
|
| 166 |
-
return []
|
| 167 |
-
return codec_args[extension][prefer_float]
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
allowed_chars: str = r"1234567890"
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
def sanitize_output(output: str) -> str:
|
| 174 |
-
"""
|
| 175 |
-
Очистит
|
| 176 |
-
|
| 177 |
-
Args:
|
| 178 |
-
output: Выходная строка
|
| 179 |
-
|
| 180 |
-
Returns:
|
| 181 |
-
Очищенная строка
|
| 182 |
-
"""
|
| 183 |
-
return "".join([char for char in output if char in allowed_chars])
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
def get_sr(path: str, stream: int = 0) -> int:
|
| 187 |
-
"""
|
| 188 |
-
Получить частоту дискретизации аудиофайла
|
| 189 |
-
|
| 190 |
-
Args:
|
| 191 |
-
path: Путь к файлу
|
| 192 |
-
stream: Номер аудиопотока
|
| 193 |
-
|
| 194 |
-
Returns:
|
| 195 |
-
Частота дискретизации
|
| 196 |
-
"""
|
| 197 |
-
cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
|
| 198 |
-
"-show_entries", "stream=sample_rate", "-select_streams", f"a:{stream}",
|
| 199 |
-
"-of", "compact=p=0:nk=1"]
|
| 200 |
-
process = subprocess.Popen(
|
| 201 |
-
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
| 202 |
-
)
|
| 203 |
-
stdout, stderr = process.communicate()
|
| 204 |
-
sample_rate = stdout.decode('utf-8').strip()
|
| 205 |
-
sample_rate = sanitize_output(sample_rate)
|
| 206 |
-
if sample_rate.isdigit():
|
| 207 |
-
return int(sample_rate)
|
| 208 |
-
else:
|
| 209 |
-
print(_i18n("sr_read_error", path=path))
|
| 210 |
-
return 0
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
def get_channels(path: str, stream: int = 0) -> int:
|
| 214 |
-
"""
|
| 215 |
-
Получить количество каналов аудиофайла
|
| 216 |
-
|
| 217 |
-
Args:
|
| 218 |
-
path: Путь к файлу
|
| 219 |
-
stream: Номер аудиопотока
|
| 220 |
-
|
| 221 |
-
Returns:
|
| 222 |
-
Количество каналов
|
| 223 |
-
"""
|
| 224 |
-
cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
|
| 225 |
-
"-show_entries", "stream=channels", "-select_streams", f"a:{stream}",
|
| 226 |
-
"-of", "compact=p=0:nk=1"]
|
| 227 |
-
process = subprocess.Popen(
|
| 228 |
-
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
| 229 |
-
)
|
| 230 |
-
stdout, stderr = process.communicate()
|
| 231 |
-
channels = stdout.decode('utf-8').strip()
|
| 232 |
-
channels = sanitize_output(channels)
|
| 233 |
-
if channels.isdigit():
|
| 234 |
-
return int(channels)
|
| 235 |
-
else:
|
| 236 |
-
print(_i18n("channels_read_error", path=path))
|
| 237 |
-
return 0
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
def check(path: str) -> bool:
|
| 241 |
-
"""
|
| 242 |
-
Проверить, является ли файл валидным аудио
|
| 243 |
-
|
| 244 |
-
Args:
|
| 245 |
-
path: Путь к файлу
|
| 246 |
-
|
| 247 |
-
Returns:
|
| 248 |
-
True если файл содержит аудио
|
| 249 |
-
"""
|
| 250 |
-
channels = get_channels(path)
|
| 251 |
-
sr = get_sr(path)
|
| 252 |
-
return channels != 0 and sr != 0
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
def read(
|
| 256 |
-
path: str,
|
| 257 |
-
sr: Optional[int] = None,
|
| 258 |
-
mono: bool = False,
|
| 259 |
-
dtype: DTypeLike = "float32",
|
| 260 |
-
multi_channel: bool = False,
|
| 261 |
-
num_channels: int = 2,
|
| 262 |
-
stream: int = 0,
|
| 263 |
-
flatten: bool = False
|
| 264 |
-
) -> Tuple[np.ndarray, int]:
|
| 265 |
-
"""
|
| 266 |
-
Прочитать аудиофайл
|
| 267 |
-
|
| 268 |
-
Args:
|
| 269 |
-
path: Путь к файлу
|
| 270 |
-
sr: Частота дискретизации
|
| 271 |
-
mono: Читать как моно
|
| 272 |
-
dtype: Тип данных
|
| 273 |
-
multi_channel: Многоканальный режим
|
| 274 |
-
num_channels: Количество каналов
|
| 275 |
-
stream: Номер аудиопотока
|
| 276 |
-
flatten: Вернуть плоский массив
|
| 277 |
-
|
| 278 |
-
Returns:
|
| 279 |
-
Кортеж (аудиоданные, частота дискретизации)
|
| 280 |
-
"""
|
| 281 |
-
output_format = SAMPLE_FORMATS_DICT.get(dtype, None)
|
| 282 |
-
if not sr:
|
| 283 |
-
sr = get_sr(path, stream)
|
| 284 |
-
channels = 1 if mono else (get_channels(path, stream) if multi_channel else num_channels)
|
| 285 |
-
|
| 286 |
-
if not output_format:
|
| 287 |
-
output_format = "f32le"
|
| 288 |
-
cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
|
| 289 |
-
"-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
|
| 290 |
-
process = subprocess.Popen(
|
| 291 |
-
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
|
| 292 |
-
)
|
| 293 |
-
stdout, stderr = process.communicate()
|
| 294 |
-
y = np.frombuffer(stdout, dtype=np.float32)
|
| 295 |
-
y = convert_to_dtype(y, dtype)
|
| 296 |
-
else:
|
| 297 |
-
cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
|
| 298 |
-
"-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
|
| 299 |
-
process = subprocess.Popen(
|
| 300 |
-
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
|
| 301 |
-
)
|
| 302 |
-
stdout, stderr = process.communicate()
|
| 303 |
-
y = np.frombuffer(stdout, dtype=dtype)
|
| 304 |
-
|
| 305 |
-
if mono:
|
| 306 |
-
if flatten:
|
| 307 |
-
y = y.flatten()
|
| 308 |
-
else:
|
| 309 |
-
y = y.reshape((-1, 1)).T
|
| 310 |
-
else:
|
| 311 |
-
y = y.reshape((-1, channels)).T
|
| 312 |
-
|
| 313 |
-
return y.copy(), sr
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
def multiread(
|
| 317 |
-
paths: Union[List[str], Tuple[str, ...]],
|
| 318 |
-
*args,
|
| 319 |
-
**kwargs
|
| 320 |
-
) -> Tuple[List[np.ndarray], List[int]]:
|
| 321 |
-
"""
|
| 322 |
-
Прочитать несколько аудиофайлов
|
| 323 |
-
|
| 324 |
-
Args:
|
| 325 |
-
paths: Список путей к файлам
|
| 326 |
-
*args: Аргументы для read
|
| 327 |
-
**kwargs: Именованные аргументы для read
|
| 328 |
-
|
| 329 |
-
Returns:
|
| 330 |
-
Кортеж (список аудиоданных, список частот дискрет
|
| 331 |
-
"""
|
| 332 |
-
readed_files = []
|
| 333 |
-
srs = []
|
| 334 |
-
len_arrays = len(paths)
|
| 335 |
-
for i, path in enumerate(paths, start=1):
|
| 336 |
-
array, sr = read(path, *args, **kwargs)
|
| 337 |
-
readed_files.append(array)
|
| 338 |
-
srs.append(sr)
|
| 339 |
-
print(_i18n("reading_progress", current=i, total=len_arrays), end="\r")
|
| 340 |
-
print("")
|
| 341 |
-
return readed_files, srs
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
def bitrate_to_int(a: Union[str, int, float]) -> int:
|
| 345 |
-
"""
|
| 346 |
-
Преобразовать битрейт в целое число
|
| 347 |
-
|
| 348 |
-
Args:
|
| 349 |
-
a: Битрейт в виде строки или числа
|
| 350 |
-
|
| 351 |
-
Returns:
|
| 352 |
-
Битрейт как целое число
|
| 353 |
-
"""
|
| 354 |
-
if isinstance(a, str):
|
| 355 |
-
if a.endswith(("k", "K")):
|
| 356 |
-
numeric_part = a[:-1]
|
| 357 |
-
if numeric_part.isdigit():
|
| 358 |
-
return int(numeric_part)
|
| 359 |
-
else:
|
| 360 |
-
print(_i18n("invalid_bitrate", bitrate=a))
|
| 361 |
-
return 320
|
| 362 |
-
else:
|
| 363 |
-
if a.isdigit():
|
| 364 |
-
return int(a)
|
| 365 |
-
else:
|
| 366 |
-
print(_i18n("invalid_bitrate", bitrate=a))
|
| 367 |
-
return 320
|
| 368 |
-
elif isinstance(a, (int, float)):
|
| 369 |
-
return int(a)
|
| 370 |
-
else:
|
| 371 |
-
return 320
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
def get_info_array(y: np.ndarray) -> Tuple[int, int, Optional[int], bool]:
|
| 375 |
-
"""
|
| 376 |
-
Получить информацию об аудио массиве
|
| 377 |
-
|
| 378 |
-
Args:
|
| 379 |
-
y: Аудио массив
|
| 380 |
-
|
| 381 |
-
Returns:
|
| 382 |
-
Кортеж (количество каналов, количество сэмплов, индекс оси, флаг flatten)
|
| 383 |
-
"""
|
| 384 |
-
if y.ndim == 1:
|
| 385 |
-
flatten = True
|
| 386 |
-
channels = 1
|
| 387 |
-
samples = len(y)
|
| 388 |
-
array_index = -1
|
| 389 |
-
elif y.ndim == 2:
|
| 390 |
-
flatten = False
|
| 391 |
-
if y.shape[0] < y.shape[1]:
|
| 392 |
-
channels = y.shape[0]
|
| 393 |
-
samples = y.shape[1]
|
| 394 |
-
array_index = 1
|
| 395 |
-
else:
|
| 396 |
-
channels = y.shape[1]
|
| 397 |
-
samples = y.shape[0]
|
| 398 |
-
array_index = 0
|
| 399 |
-
else:
|
| 400 |
-
raise ValueError(_i18n("array_dim_error"))
|
| 401 |
-
return channels, samples, array_index, flatten
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
def get_axis_from_array_index(index: int) -> int:
|
| 405 |
-
"""
|
| 406 |
-
Получить ось для операций на основе индекса массива
|
| 407 |
-
|
| 408 |
-
Args:
|
| 409 |
-
index: Индекс массива
|
| 410 |
-
|
| 411 |
-
Returns:
|
| 412 |
-
Номер оси
|
| 413 |
-
"""
|
| 414 |
-
if index == -1:
|
| 415 |
-
return -1
|
| 416 |
-
elif index == 1:
|
| 417 |
-
return 0
|
| 418 |
-
elif index == 0:
|
| 419 |
-
return 1
|
| 420 |
-
else:
|
| 421 |
-
return -1
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
def get_duration_from_array(y: np.ndarray, sr: Optional[int] = None) -> Union[float, int]:
|
| 425 |
-
"""
|
| 426 |
-
Получить длительность аудио из массива
|
| 427 |
-
|
| 428 |
-
Args:
|
| 429 |
-
y: Аудио массив
|
| 430 |
-
sr: Частота дискретизации
|
| 431 |
-
|
| 432 |
-
Returns:
|
| 433 |
-
Длительность в секундах или количество сэмплов
|
| 434 |
-
"""
|
| 435 |
-
len_samples: int = get_info_array(y)[1]
|
| 436 |
-
if sr is not None:
|
| 437 |
-
return len_samples / sr
|
| 438 |
-
else:
|
| 439 |
-
return len_samples
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
def is_float(y: np.ndarray) -> bool:
|
| 443 |
-
"""
|
| 444 |
-
Проверить, является ли массив float типом
|
| 445 |
-
|
| 446 |
-
Args:
|
| 447 |
-
y: Аудио массив
|
| 448 |
-
|
| 449 |
-
Returns:
|
| 450 |
-
True если тип float
|
| 451 |
-
"""
|
| 452 |
-
return np.issubdtype(y.dtype, np.floating)
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
def is_float_dtype(dtype: DTypeLike) -> bool:
|
| 456 |
-
"""
|
| 457 |
-
Проверить, является ли тип данных float
|
| 458 |
-
|
| 459 |
-
Args:
|
| 460 |
-
dtype: Тип данных
|
| 461 |
-
|
| 462 |
-
Returns:
|
| 463 |
-
True если тип float
|
| 464 |
-
"""
|
| 465 |
-
return np.issubdtype(dtype, np.floating)
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
def float_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 469 |
-
"""
|
| 470 |
-
Преобразовать float массив в целочисленный
|
| 471 |
-
|
| 472 |
-
Args:
|
| 473 |
-
y: Float массив
|
| 474 |
-
dtype: Целевой тип данных
|
| 475 |
-
|
| 476 |
-
Returns:
|
| 477 |
-
Целочисленный массив
|
| 478 |
-
"""
|
| 479 |
-
info = np.iinfo(dtype)
|
| 480 |
-
min_val = info.min
|
| 481 |
-
max_val = info.max
|
| 482 |
-
|
| 483 |
-
if min_val < 0:
|
| 484 |
-
y_scaled = y * max_val
|
| 485 |
-
y_rounded = np.round(y_scaled)
|
| 486 |
-
y_clipped = np.clip(y_rounded, min_val, max_val)
|
| 487 |
-
return y_clipped.astype(dtype)
|
| 488 |
-
elif min_val == 0:
|
| 489 |
-
y_normalized = (y + 1) / 2
|
| 490 |
-
y_scaled = y_normalized * max_val
|
| 491 |
-
y_rounded = np.round(y_scaled)
|
| 492 |
-
y_clipped = np.clip(y_rounded, 0, max_val)
|
| 493 |
-
return y_clipped.astype(dtype)
|
| 494 |
-
else:
|
| 495 |
-
raise ValueError(_i18n("unexpected_min_val", value=min_val))
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
def int_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 499 |
-
"""
|
| 500 |
-
Преобразовать целочисленный массив в другой целочисленный тип
|
| 501 |
-
|
| 502 |
-
Args:
|
| 503 |
-
y: Целочисленный массив
|
| 504 |
-
dtype: Целевой тип данных
|
| 505 |
-
|
| 506 |
-
Returns:
|
| 507 |
-
Преобразованный массив
|
| 508 |
-
"""
|
| 509 |
-
info_dst = np.iinfo(dtype)
|
| 510 |
-
info_src = np.iinfo(y.dtype)
|
| 511 |
-
y_float = y.astype(np.float64)
|
| 512 |
-
src_range = info_src.max - info_src.min
|
| 513 |
-
dst_range = info_dst.max - info_dst.min
|
| 514 |
-
if src_range == 0:
|
| 515 |
-
return np.full_like(y, info_dst.min, dtype=dtype)
|
| 516 |
-
y_scaled = (y_float - info_src.min) * (dst_range / src_range) + info_dst.min
|
| 517 |
-
y_rounded = np.round(y_scaled)
|
| 518 |
-
y_clipped = np.clip(y_rounded, info_dst.min, info_dst.max)
|
| 519 |
-
return y_clipped.astype(dtype)
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
def int_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 523 |
-
"""
|
| 524 |
-
Преобразовать целочисленный массив в float
|
| 525 |
-
|
| 526 |
-
Args:
|
| 527 |
-
y: Целочисленный массив
|
| 528 |
-
dtype: Целевой тип данных
|
| 529 |
-
|
| 530 |
-
Returns:
|
| 531 |
-
Float массив
|
| 532 |
-
"""
|
| 533 |
-
info = np.iinfo(y.dtype)
|
| 534 |
-
if info.min == 0:
|
| 535 |
-
y_normalized = (y.astype(np.float64) + -int(average(info.min, info.max))) / info.max
|
| 536 |
-
elif info.min < 0:
|
| 537 |
-
abs_max = max(abs(info.min), abs(info.max))
|
| 538 |
-
y_normalized = y.astype(np.float64) / abs_max
|
| 539 |
-
else:
|
| 540 |
-
raise ValueError(_i18n("unexpected_min_val", value=info.min))
|
| 541 |
-
return y_normalized.astype(dtype)
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
def float_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 545 |
-
"""
|
| 546 |
-
Преобразовать float массив в другой float тип
|
| 547 |
-
|
| 548 |
-
Args:
|
| 549 |
-
y: Float массив
|
| 550 |
-
dtype: Целевой тип данных
|
| 551 |
-
|
| 552 |
-
Returns:
|
| 553 |
-
Преобразованный массив
|
| 554 |
-
"""
|
| 555 |
-
return y.astype(dtype)
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
def get_center_value_from_dtype(dtype: DTypeLike) -> int:
|
| 559 |
-
"""
|
| 560 |
-
Получить центральное значение для типа данных
|
| 561 |
-
|
| 562 |
-
Args:
|
| 563 |
-
dtype: Тип данных
|
| 564 |
-
|
| 565 |
-
Returns:
|
| 566 |
-
Центральное значение
|
| 567 |
-
"""
|
| 568 |
-
if is_float_dtype(dtype):
|
| 569 |
-
return 0
|
| 570 |
-
else:
|
| 571 |
-
info = np.iinfo(dtype)
|
| 572 |
-
return int(average(info.min, info.max))
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
def convert_to_dtype(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 576 |
-
"""
|
| 577 |
-
Преобразовать массив в указанный тип данных
|
| 578 |
-
|
| 579 |
-
Args:
|
| 580 |
-
y: Входной массив
|
| 581 |
-
dtype: Целевой тип данных
|
| 582 |
-
|
| 583 |
-
Returns:
|
| 584 |
-
Преобразованный массив
|
| 585 |
-
"""
|
| 586 |
-
if is_float(y):
|
| 587 |
-
if is_float_dtype(dtype):
|
| 588 |
-
return float_to_float(y, dtype)
|
| 589 |
-
else:
|
| 590 |
-
return float_to_int(y, dtype)
|
| 591 |
-
else:
|
| 592 |
-
if is_float_dtype(dtype):
|
| 593 |
-
return int_to_float(y, dtype)
|
| 594 |
-
else:
|
| 595 |
-
return int_to_int(y, dtype)
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
def dc_offset(y: np.ndarray, offset: Union[float, int]) -> np.ndarray:
|
| 599 |
-
"""
|
| 600 |
-
Добавить смещение постоянного тока
|
| 601 |
-
|
| 602 |
-
Args:
|
| 603 |
-
y: Аудио массив
|
| 604 |
-
offset: Смещение
|
| 605 |
-
|
| 606 |
-
Returns:
|
| 607 |
-
Массив со смещением
|
| 608 |
-
"""
|
| 609 |
-
orig_dtype = y.dtype
|
| 610 |
-
y = convert_to_dtype(y, np.float32)
|
| 611 |
-
y = y + offset
|
| 612 |
-
return convert_to_dtype(y, orig_dtype)
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
def gain(y: np.ndarray, gain_value: Union[float, int]) -> np.ndarray:
|
| 616 |
-
"""
|
| 617 |
-
Применить усиление к аудио
|
| 618 |
-
|
| 619 |
-
Args:
|
| 620 |
-
y: Аудио массив
|
| 621 |
-
gain_value: Коэффициент усиления
|
| 622 |
-
|
| 623 |
-
Returns:
|
| 624 |
-
Усиленный массив
|
| 625 |
-
"""
|
| 626 |
-
orig_dtype = y.dtype
|
| 627 |
-
y = convert_to_dtype(y, np.float32)
|
| 628 |
-
y = y * gain_value
|
| 629 |
-
return convert_to_dtype(y, orig_dtype)
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
def normalize(y: np.ndarray, target_peak: Union[float, int] = 1.0) -> np.ndarray:
|
| 633 |
-
"""
|
| 634 |
-
Нормализовать аудио по пиковому значению
|
| 635 |
-
|
| 636 |
-
Args:
|
| 637 |
-
y: Аудио массив
|
| 638 |
-
target_peak: Целевое пиковое значение
|
| 639 |
-
|
| 640 |
-
Returns:
|
| 641 |
-
Нормализованный массив
|
| 642 |
-
"""
|
| 643 |
-
orig_dtype = y.dtype
|
| 644 |
-
y = convert_to_dtype(y, np.float32)
|
| 645 |
-
current_peak = np.max(np.abs(y))
|
| 646 |
-
if current_peak > 0:
|
| 647 |
-
scaling_factor = target_peak / current_peak
|
| 648 |
-
y = y * scaling_factor
|
| 649 |
-
return convert_to_dtype(y, orig_dtype)
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
def create_zero_array(samples: int, dtype: DTypeLike) -> np.ndarray:
|
| 653 |
-
"""
|
| 654 |
-
Создать массив нулей с центром для типа данных
|
| 655 |
-
|
| 656 |
-
Args:
|
| 657 |
-
samples: Количество сэмплов
|
| 658 |
-
dtype: Тип данных
|
| 659 |
-
|
| 660 |
-
Returns:
|
| 661 |
-
Массив нулей
|
| 662 |
-
"""
|
| 663 |
-
return np.array([get_center_value_from_dtype(dtype) for _c in range(samples)], dtype=dtype)
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
def split_channels(y: np.ndarray) -> Tuple[np.ndarray, ...]:
|
| 667 |
-
"""
|
| 668 |
-
Разделить многоканальное аудио на отдельные каналы
|
| 669 |
-
|
| 670 |
-
Args:
|
| 671 |
-
y: Аудио массив
|
| 672 |
-
|
| 673 |
-
Returns:
|
| 674 |
-
Кортеж массивов каналов
|
| 675 |
-
"""
|
| 676 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 677 |
-
channels_arrays = []
|
| 678 |
-
if not flatten:
|
| 679 |
-
if array_index == 1:
|
| 680 |
-
for ch in range(channels):
|
| 681 |
-
channels_arrays.append(y[ch, :])
|
| 682 |
-
else:
|
| 683 |
-
for ch in range(channels):
|
| 684 |
-
channels_arrays.append(y[:, ch])
|
| 685 |
-
return tuple(channels_arrays)
|
| 686 |
-
else:
|
| 687 |
-
return (y,)
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
from scipy.signal import windows
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
def get_stft_obj(sr: int, n_fft: int, hop: int) -> ShortTimeFFT:
|
| 694 |
-
"""
|
| 695 |
-
Создает STFT с окном DPSS для сверхточного разделения частот
|
| 696 |
-
|
| 697 |
-
Args:
|
| 698 |
-
sr: Частота дискретизации
|
| 699 |
-
n_fft: Размер FFT
|
| 700 |
-
hop: Шаг
|
| 701 |
-
|
| 702 |
-
Returns:
|
| 703 |
-
Объект ShortTimeFFT
|
| 704 |
-
"""
|
| 705 |
-
win_dpss = str2bool(os.environ.get("MVSEPLESS_DPSS", "False"))
|
| 706 |
-
if win_dpss:
|
| 707 |
-
win = dpss(n_fft, NW=3, sym=False)
|
| 708 |
-
else:
|
| 709 |
-
win = hann(n_fft, sym=False)
|
| 710 |
-
return ShortTimeFFT(win, hop=hop, fs=sr, scale_to='magnitude', phase_shift=None)
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
def split_mid_side(
|
| 714 |
-
y: np.ndarray,
|
| 715 |
-
var: int = 1,
|
| 716 |
-
sr: Optional[int] = None
|
| 717 |
-
) -> Tuple[np.ndarray, np.ndarray]:
|
| 718 |
-
"""
|
| 719 |
-
Разделить стерео на Mid/Side
|
| 720 |
-
|
| 721 |
-
Args:
|
| 722 |
-
y: Аудио массив
|
| 723 |
-
var: Вариант разделения (0-4)
|
| 724 |
-
sr: Частота дискретизации
|
| 725 |
-
|
| 726 |
-
Returns:
|
| 727 |
-
Кортеж (mid, side)
|
| 728 |
-
"""
|
| 729 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 730 |
-
axis = get_axis_from_array_index(array_index)
|
| 731 |
-
if channels != 2:
|
| 732 |
-
raise Exception(_i18n("stereo_required"))
|
| 733 |
-
orig_dtype = y.dtype
|
| 734 |
-
y = convert_to_dtype(y, np.float32)
|
| 735 |
-
channels_arrays = split_channels(y)
|
| 736 |
-
left_channel = channels_arrays[0]
|
| 737 |
-
right_channel = channels_arrays[1]
|
| 738 |
-
mid_channel_one = (left_channel * 0.5) + (right_channel * 0.5)
|
| 739 |
-
|
| 740 |
-
if var == 0:
|
| 741 |
-
print(_i18n("mid_side_var0"))
|
| 742 |
-
side_channel = np.stack([(left_channel + -mid_channel_one), (right_channel + -mid_channel_one)], axis=axis)
|
| 743 |
-
mid_channel = y + -side_channel
|
| 744 |
-
elif var == 1:
|
| 745 |
-
print(_i18n("mid_side_var1"))
|
| 746 |
-
mid_channel = np.stack([mid_channel_one, mid_channel_one], axis=axis)
|
| 747 |
-
side_channel = y + -mid_channel
|
| 748 |
-
elif var == 2:
|
| 749 |
-
print(_i18n("mid_side_var2"))
|
| 750 |
-
same_sign = (left_channel * right_channel) > 0
|
| 751 |
-
center_mono = np.where(
|
| 752 |
-
same_sign,
|
| 753 |
-
np.minimum(np.abs(left_channel), np.abs(right_channel)) * np.sign(left_channel),
|
| 754 |
-
0.0
|
| 755 |
-
)
|
| 756 |
-
mid_channel = np.stack([center_mono, center_mono], axis=axis)
|
| 757 |
-
stereo_L = left_channel - center_mono
|
| 758 |
-
stereo_R = right_channel - center_mono
|
| 759 |
-
side_channel = np.stack([stereo_L, stereo_R], axis=axis)
|
| 760 |
-
elif var == 3:
|
| 761 |
-
print(_i18n("mid_side_var3"))
|
| 762 |
-
if not sr:
|
| 763 |
-
raise Exception(_i18n("sr_required"))
|
| 764 |
-
|
| 765 |
-
sft = get_stft_obj(sr, n_fft=n_fft, hop=hop)
|
| 766 |
-
y_float = convert_to_dtype(y, np.float32)
|
| 767 |
-
channels = split_channels(y_float)
|
| 768 |
-
|
| 769 |
-
# Получаем спектры левого и правого каналов
|
| 770 |
-
Lf = sft.stft(channels[0])
|
| 771 |
-
Rf = sft.stft(channels[1])
|
| 772 |
-
|
| 773 |
-
# Вычисляем схожесть (когерентность)
|
| 774 |
-
similarity_L = np.real(Lf * np.conj(Rf))
|
| 775 |
-
similarity_R = np.real(Rf * np.conj(Lf))
|
| 776 |
-
mask_l = similarity_L > 0
|
| 777 |
-
mask_r = similarity_R > 0
|
| 778 |
-
magL = np.abs(Lf)
|
| 779 |
-
magR = np.abs(Rf)
|
| 780 |
-
|
| 781 |
-
magC_L = np.minimum(magL, magR) * mask_l
|
| 782 |
-
magC_R = np.minimum(magL, magR) * mask_r
|
| 783 |
-
|
| 784 |
-
C_L = magC_L * np.exp(1j * np.angle(Rf))
|
| 785 |
-
C_R = magC_R * np.exp(1j * np.angle(Lf))
|
| 786 |
-
SL = Lf - C_L
|
| 787 |
-
SR = Rf - C_R
|
| 788 |
-
|
| 789 |
-
len_orig = y.shape[-1]
|
| 790 |
-
center_l = sft.istft(C_L, k1=len_orig)
|
| 791 |
-
center_r = sft.istft(C_R, k1=len_orig)
|
| 792 |
-
side_l = sft.istft(SL, k1=len_orig)
|
| 793 |
-
side_r = sft.istft(SR, k1=len_orig)
|
| 794 |
-
|
| 795 |
-
mid_ch = multi_channel_array_from_arrays(center_l, center_r, index=array_index, dtype=y.dtype)
|
| 796 |
-
side_ch = multi_channel_array_from_arrays(side_l, side_r, index=array_index, dtype=y.dtype)
|
| 797 |
-
|
| 798 |
-
return mid_ch, side_ch
|
| 799 |
-
elif var == 4:
|
| 800 |
-
print(_i18n("mid_side_var4"))
|
| 801 |
-
mid_channel = mid_channel_one
|
| 802 |
-
side_channel = left_channel + -right_channel
|
| 803 |
-
else:
|
| 804 |
-
raise ValueError(_i18n("unknown_var", var=var))
|
| 805 |
-
|
| 806 |
-
return convert_to_dtype(mid_channel, orig_dtype), convert_to_dtype(side_channel, orig_dtype)
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
def mid_side_to_stereo(
|
| 810 |
-
y: np.ndarray,
|
| 811 |
-
z: np.ndarray,
|
| 812 |
-
index: int = -1,
|
| 813 |
-
dtype: DTypeLike = np.float32
|
| 814 |
-
) -> np.ndarray:
|
| 815 |
-
"""
|
| 816 |
-
Преобразовать Mid/Side обратно в стерео
|
| 817 |
-
|
| 818 |
-
Args:
|
| 819 |
-
y: Mid канал
|
| 820 |
-
z: Side канал
|
| 821 |
-
index: Индекс оси
|
| 822 |
-
dtype: Тип данных
|
| 823 |
-
|
| 824 |
-
Returns:
|
| 825 |
-
Стерео массив
|
| 826 |
-
"""
|
| 827 |
-
y, z = convert_to_dtype(y, np.float32), convert_to_dtype(z, np.float32)
|
| 828 |
-
mid = multi_channel_array_from_arrays(y, y, index=index, dtype=np.float32)
|
| 829 |
-
side = multi_channel_array_from_arrays(z, -z, index=index, dtype=np.float32)
|
| 830 |
-
return convert_to_dtype(mid + side, dtype)
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
def mono_to_stereo(
|
| 834 |
-
y: np.ndarray,
|
| 835 |
-
index: int,
|
| 836 |
-
num_channels: int = 2
|
| 837 |
-
) -> np.ndarray:
|
| 838 |
-
"""
|
| 839 |
-
Преобразовать моно в стерео
|
| 840 |
-
|
| 841 |
-
Args:
|
| 842 |
-
y: Моно массив
|
| 843 |
-
index: Индекс оси
|
| 844 |
-
num_channels: Количество каналов
|
| 845 |
-
|
| 846 |
-
Returns:
|
| 847 |
-
Стерео массив
|
| 848 |
-
"""
|
| 849 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 850 |
-
axis = get_axis_from_array_index(array_index)
|
| 851 |
-
new_axis = get_axis_from_array_index(index)
|
| 852 |
-
orig_dtype = y.dtype
|
| 853 |
-
if channels == 1:
|
| 854 |
-
if flatten:
|
| 855 |
-
return np.stack([y for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
|
| 856 |
-
else:
|
| 857 |
-
return np.stack([y.flatten() for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
|
| 858 |
-
else:
|
| 859 |
-
if num_channels <= channels:
|
| 860 |
-
return y
|
| 861 |
-
else:
|
| 862 |
-
for _i in range(num_channels - channels):
|
| 863 |
-
y = np.append(y, create_zero_array(samples, orig_dtype), axis=new_axis)
|
| 864 |
-
return y
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
def stereo_to_mono(y: np.ndarray, to_flatten: bool = False) -> np.ndarray:
|
| 868 |
-
"""
|
| 869 |
-
Преобразовать стерео в моно
|
| 870 |
-
|
| 871 |
-
Args:
|
| 872 |
-
y: Стерео массив
|
| 873 |
-
to_flatten: Вернуть плоский массив
|
| 874 |
-
|
| 875 |
-
Returns:
|
| 876 |
-
Моно массив
|
| 877 |
-
"""
|
| 878 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 879 |
-
orig_dtype = y.dtype
|
| 880 |
-
y = convert_to_dtype(y, np.float32)
|
| 881 |
-
if channels > 1:
|
| 882 |
-
mono = create_zero_array(samples, np.float64)
|
| 883 |
-
for ch in split_channels(y):
|
| 884 |
-
mono = mono + gain(ch, (1 / channels))
|
| 885 |
-
if not to_flatten:
|
| 886 |
-
if array_index == 0:
|
| 887 |
-
return mono.reshape((1, -1))
|
| 888 |
-
else:
|
| 889 |
-
return mono.reshape((-1, 1))
|
| 890 |
-
else:
|
| 891 |
-
return mono
|
| 892 |
-
else:
|
| 893 |
-
return y
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
def multi_channel_array_from_arrays(
|
| 897 |
-
*arrays: np.ndarray,
|
| 898 |
-
index: int = -1,
|
| 899 |
-
dtype: DTypeLike
|
| 900 |
-
) -> np.ndarray:
|
| 901 |
-
"""
|
| 902 |
-
Создать многоканальный массив из отдельных каналов
|
| 903 |
-
|
| 904 |
-
Args:
|
| 905 |
-
*arrays: Массивы каналов
|
| 906 |
-
index: Индекс оси
|
| 907 |
-
dtype: Тип данных
|
| 908 |
-
|
| 909 |
-
Returns:
|
| 910 |
-
Многоканальный массив
|
| 911 |
-
"""
|
| 912 |
-
return np.stack([convert_to_dtype(array, dtype) for array in arrays],
|
| 913 |
-
axis=get_axis_from_array_index(index),
|
| 914 |
-
dtype=dtype)
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
def reshape(y: np.ndarray, shape: Tuple[str, ...] = ("channels", "samples")) -> np.ndarray:
|
| 918 |
-
"""
|
| 919 |
-
Изменить форму аудио массива
|
| 920 |
-
|
| 921 |
-
Args:
|
| 922 |
-
y: Аудио массив
|
| 923 |
-
shape: Целевая форма
|
| 924 |
-
|
| 925 |
-
Returns:
|
| 926 |
-
Измененный массив
|
| 927 |
-
"""
|
| 928 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 929 |
-
|
| 930 |
-
if shape == ("channels", "samples"):
|
| 931 |
-
if array_index == 0:
|
| 932 |
-
return y.T
|
| 933 |
-
elif array_index == 1:
|
| 934 |
-
return y
|
| 935 |
-
elif array_index is None and flatten:
|
| 936 |
-
return y.reshape((-1, 1)).T
|
| 937 |
-
else:
|
| 938 |
-
if y.shape[0] == channels:
|
| 939 |
-
return y
|
| 940 |
-
else:
|
| 941 |
-
return y.T
|
| 942 |
-
|
| 943 |
-
elif shape == ("samples", "channels"):
|
| 944 |
-
if array_index == 1: # (channels, samples)
|
| 945 |
-
return y.T
|
| 946 |
-
elif array_index == 0: # (samples, channels)
|
| 947 |
-
return y
|
| 948 |
-
elif array_index == -1 and flatten:
|
| 949 |
-
return y.reshape((-1, 1))
|
| 950 |
-
else:
|
| 951 |
-
if y.shape[0] == samples:
|
| 952 |
-
return y
|
| 953 |
-
else:
|
| 954 |
-
return y.T
|
| 955 |
-
|
| 956 |
-
elif shape == ("samples",):
|
| 957 |
-
if channels == 1 and not flatten:
|
| 958 |
-
return y.flatten()
|
| 959 |
-
elif flatten:
|
| 960 |
-
return y
|
| 961 |
-
else:
|
| 962 |
-
return stereo_to_mono(y, to_flatten=True)
|
| 963 |
-
|
| 964 |
-
else:
|
| 965 |
-
raise ValueError(f"{_i18n('unknown_shape')}: {shape}")
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
def easy_resampler(y: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
| 969 |
-
"""
|
| 970 |
-
Простой ресемплинг аудио
|
| 971 |
-
|
| 972 |
-
Args:
|
| 973 |
-
y: Аудио массив
|
| 974 |
-
orig_sr: Исходная частота
|
| 975 |
-
target_sr: Целевая частота
|
| 976 |
-
|
| 977 |
-
Returns:
|
| 978 |
-
Ресемплированный массив
|
| 979 |
-
"""
|
| 980 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 981 |
-
orig_dtype = y.dtype
|
| 982 |
-
ratio = float(target_sr) / orig_sr
|
| 983 |
-
n_samples = int(np.ceil(samples * ratio))
|
| 984 |
-
resampled = resample(y, n_samples, axis=array_index)
|
| 985 |
-
return convert_to_dtype(resampled, orig_dtype)
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
def add_zero_to_end(y: np.ndarray, max_samples: int) -> np.ndarray:
|
| 989 |
-
"""
|
| 990 |
-
Добавить нули в конец массива до указанной длины
|
| 991 |
-
|
| 992 |
-
Args:
|
| 993 |
-
y: Аудио массив
|
| 994 |
-
max_samples: Максимальное количество сэмплов
|
| 995 |
-
|
| 996 |
-
Returns:
|
| 997 |
-
Дополненный массив
|
| 998 |
-
"""
|
| 999 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 1000 |
-
center_value = get_center_value_from_dtype(y.dtype)
|
| 1001 |
-
if samples < max_samples:
|
| 1002 |
-
if flatten:
|
| 1003 |
-
pad_width = (0, max_samples - samples)
|
| 1004 |
-
else:
|
| 1005 |
-
if array_index == 1:
|
| 1006 |
-
pad_width = ((0, 0), (0, max_samples - samples))
|
| 1007 |
-
else:
|
| 1008 |
-
pad_width = ((0, max_samples - samples), (0, 0))
|
| 1009 |
-
return np.pad(y, pad_width, mode="constant", constant_values=center_value)
|
| 1010 |
-
else:
|
| 1011 |
-
return trim(y, 0, max_samples)
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
def fit_arrays(
|
| 1015 |
-
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1016 |
-
srs: Union[Tuple[int, ...], List[int]],
|
| 1017 |
-
max_channels: int = 2,
|
| 1018 |
-
min_sr: int = 44100,
|
| 1019 |
-
flatten: bool = False,
|
| 1020 |
-
max_samples: int = -1,
|
| 1021 |
-
extend: bool = True
|
| 1022 |
-
) -> Tuple[np.ndarray, ...]:
|
| 1023 |
-
"""
|
| 1024 |
-
Привести несколько массивов к единому формату
|
| 1025 |
-
|
| 1026 |
-
Args:
|
| 1027 |
-
arrays: Список массивов
|
| 1028 |
-
srs: Список частот дискретизации
|
| 1029 |
-
max_channels: Максимальное количество каналов
|
| 1030 |
-
min_sr: Минимальная частота дискретизации
|
| 1031 |
-
flatten: Вернуть плоские массивы
|
| 1032 |
-
max_samples: Максимальное количество сэмплов
|
| 1033 |
-
extend: Дополнить до максимальной длины
|
| 1034 |
-
|
| 1035 |
-
Returns:
|
| 1036 |
-
Кортеж приведенных массивов
|
| 1037 |
-
"""
|
| 1038 |
-
if len(arrays) != len(srs):
|
| 1039 |
-
raise Exception(_i18n("arrays_srs_mismatch"))
|
| 1040 |
-
|
| 1041 |
-
new_arrays = []
|
| 1042 |
-
|
| 1043 |
-
arrays_with_srs = list(zip(arrays, srs))
|
| 1044 |
-
len_arrays = len(arrays_with_srs)
|
| 1045 |
-
|
| 1046 |
-
durations = [get_duration_from_array(array) for array, _c in arrays_with_srs]
|
| 1047 |
-
max_samples = max(durations) if durations else 0
|
| 1048 |
-
|
| 1049 |
-
for i, (array, sr) in enumerate(arrays_with_srs, start=1):
|
| 1050 |
-
channels1, samples1, array_index1, _c = get_info_array(array)
|
| 1051 |
-
a1 = easy_resampler(array, sr, min_sr)
|
| 1052 |
-
if flatten:
|
| 1053 |
-
a1 = stereo_to_mono(a1, to_flatten=True)
|
| 1054 |
-
else:
|
| 1055 |
-
if max_channels >= 2:
|
| 1056 |
-
a1 = mono_to_stereo(a1, array_index1, max_channels)
|
| 1057 |
-
else:
|
| 1058 |
-
a1 = stereo_to_mono(a1)
|
| 1059 |
-
a1 = reshape(a1, shape=("channels", "samples"))
|
| 1060 |
-
new_arrays.append(a1)
|
| 1061 |
-
print(_i18n("fitting_progress", current=i, total=len_arrays), end="\r")
|
| 1062 |
-
print("")
|
| 1063 |
-
|
| 1064 |
-
if extend:
|
| 1065 |
-
for i, array_ in enumerate(new_arrays):
|
| 1066 |
-
new_arrays[i] = add_zero_to_end(array_, max_samples)
|
| 1067 |
-
print(_i18n("extending_progress", current=i, total=len_arrays), end="\r")
|
| 1068 |
-
print("")
|
| 1069 |
-
|
| 1070 |
-
return tuple(new_arrays)
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
def subtractor(
|
| 1074 |
-
y: np.ndarray,
|
| 1075 |
-
z: np.ndarray,
|
| 1076 |
-
sr1: int,
|
| 1077 |
-
sr2: int,
|
| 1078 |
-
spectrogram: bool = False
|
| 1079 |
-
) -> Tuple[np.ndarray, int]:
|
| 1080 |
-
"""
|
| 1081 |
-
Вычесть одно аудио из другого
|
| 1082 |
-
|
| 1083 |
-
Args:
|
| 1084 |
-
y: Первое аудио
|
| 1085 |
-
z: Второе аудио
|
| 1086 |
-
sr1: Частота первого
|
| 1087 |
-
sr2: Частота второго
|
| 1088 |
-
spectrogram: Использовать спектрограмму
|
| 1089 |
-
|
| 1090 |
-
Returns:
|
| 1091 |
-
Кортеж (результат, частота дискретизации)
|
| 1092 |
-
"""
|
| 1093 |
-
channels1, _, array_index1, flatten1 = get_info_array(y)
|
| 1094 |
-
channels2, _, array_index2, flatten2 = get_info_array(z)
|
| 1095 |
-
orig_dtype1 = y.dtype
|
| 1096 |
-
y = convert_to_dtype(y, np.float32)
|
| 1097 |
-
z = convert_to_dtype(z, np.float32)
|
| 1098 |
-
max_channels = max(channels1, channels2)
|
| 1099 |
-
min_sr = min(sr1, sr2)
|
| 1100 |
-
yz = fit_arrays([y, z], [sr1, sr2], max_channels=max_channels, min_sr=min_sr)
|
| 1101 |
-
y, z = yz[0], yz[1]
|
| 1102 |
-
|
| 1103 |
-
if spectrogram:
|
| 1104 |
-
print(_i18n("subtract_spectrogram"))
|
| 1105 |
-
sft = get_stft_obj(min_sr, n_fft=n_fft, hop=hop)
|
| 1106 |
-
res_channels = []
|
| 1107 |
-
|
| 1108 |
-
# Обрабатываем каналы по одному, чтобы не забивать RAM
|
| 1109 |
-
for ch_y, ch_z in zip(split_channels(y), split_channels(z)):
|
| 1110 |
-
spec_y = sft.stft(ch_y.astype(np.float32))
|
| 1111 |
-
spec_z = sft.stft(ch_z.astype(np.float32))
|
| 1112 |
-
|
| 1113 |
-
# Вычитание амплитуд: Mag_res = max(Mag_y - Mag_z, 0)
|
| 1114 |
-
# Сохраняем фазу сигнала 'y'
|
| 1115 |
-
res_spec = np.maximum(np.abs(spec_y) - np.abs(spec_z), 0) * np.exp(1j * np.angle(spec_y))
|
| 1116 |
-
|
| 1117 |
-
del spec_y, spec_z # Явно освобождаем память
|
| 1118 |
-
|
| 1119 |
-
res_wav = sft.istft(res_spec, k1=ch_y.shape[-1])
|
| 1120 |
-
res_channels.append(res_wav)
|
| 1121 |
-
|
| 1122 |
-
subtracted = multi_channel_array_from_arrays(*res_channels, index=1, dtype=orig_dtype1)
|
| 1123 |
-
return subtracted, min_sr
|
| 1124 |
-
else:
|
| 1125 |
-
print(_i18n("subtract_phase"))
|
| 1126 |
-
return convert_to_dtype(y - z, orig_dtype1), min_sr
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
def absmax(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
|
| 1130 |
-
"""
|
| 1131 |
-
Получить элемент с максимальным абсолютным значением
|
| 1132 |
-
|
| 1133 |
-
Args:
|
| 1134 |
-
a: Входной массив
|
| 1135 |
-
axis: Ось
|
| 1136 |
-
|
| 1137 |
-
Returns:
|
| 1138 |
-
Элемент с максимальным абсолютным значением
|
| 1139 |
-
"""
|
| 1140 |
-
if axis is None:
|
| 1141 |
-
return a.flatten()[np.argmax(np.abs(a))]
|
| 1142 |
-
dims = list(a.shape)
|
| 1143 |
-
dims.pop(axis)
|
| 1144 |
-
indices = np.ogrid[tuple(slice(0, d) for d in dims)]
|
| 1145 |
-
argmax = np.abs(a).argmax(axis=axis)
|
| 1146 |
-
indices = list(indices)
|
| 1147 |
-
indices.insert(axis % len(a.shape), argmax)
|
| 1148 |
-
return a[tuple(indices)]
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
def absmin(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
|
| 1152 |
-
"""
|
| 1153 |
-
Получить элемент с минимальным абсолютным значением
|
| 1154 |
-
|
| 1155 |
-
Args:
|
| 1156 |
-
a: Входной массив
|
| 1157 |
-
axis: Ось
|
| 1158 |
-
|
| 1159 |
-
Returns:
|
| 1160 |
-
Элемент с минимальным абсолютным значением
|
| 1161 |
-
"""
|
| 1162 |
-
if axis is None:
|
| 1163 |
-
return a.flatten()[np.argmin(np.abs(a))]
|
| 1164 |
-
dims = list(a.shape)
|
| 1165 |
-
dims.pop(axis)
|
| 1166 |
-
indices = np.ogrid[tuple(slice(0, d) for d in dims)]
|
| 1167 |
-
argmax = np.abs(a).argmin(axis=axis)
|
| 1168 |
-
indices.insert((len(a.shape) + axis) % len(a.shape), argmax)
|
| 1169 |
-
return a[tuple(indices)]
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
def lambda_max(
|
| 1173 |
-
arr: np.ndarray,
|
| 1174 |
-
axis: Optional[int] = None,
|
| 1175 |
-
key: Optional[Callable] = None,
|
| 1176 |
-
keepdims: bool = False
|
| 1177 |
-
) -> np.ndarray:
|
| 1178 |
-
"""
|
| 1179 |
-
Применить функцию максимума с к
|
| 1180 |
-
|
| 1181 |
-
Args:
|
| 1182 |
-
arr: Входной массив
|
| 1183 |
-
axis: Ось
|
| 1184 |
-
key: Функция ключа
|
| 1185 |
-
keepdims: Сохранить размерность
|
| 1186 |
-
|
| 1187 |
-
Returns:
|
| 1188 |
-
Результат
|
| 1189 |
-
"""
|
| 1190 |
-
if key is None:
|
| 1191 |
-
key = np.abs
|
| 1192 |
-
idxs = np.argmax(key(arr), axis)
|
| 1193 |
-
if axis is not None:
|
| 1194 |
-
idxs = np.expand_dims(idxs, axis)
|
| 1195 |
-
result = np.take_along_axis(arr, idxs, axis)
|
| 1196 |
-
if not keepdims:
|
| 1197 |
-
result = np.squeeze(result, axis=axis)
|
| 1198 |
-
return result
|
| 1199 |
-
else:
|
| 1200 |
-
return arr.flatten()[idxs]
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
def lambda_min(
|
| 1204 |
-
arr: np.ndarray,
|
| 1205 |
-
axis: Optional[int] = None,
|
| 1206 |
-
key: Optional[Callable] = None,
|
| 1207 |
-
keepdims: bool = False
|
| 1208 |
-
) -> np.ndarray:
|
| 1209 |
-
"""
|
| 1210 |
-
Применить функцию минимума с ключом
|
| 1211 |
-
|
| 1212 |
-
Args:
|
| 1213 |
-
arr: Входной массив
|
| 1214 |
-
axis: Ось
|
| 1215 |
-
key: Функция ключа
|
| 1216 |
-
keepdims: Сохранить размерность
|
| 1217 |
-
|
| 1218 |
-
Returns:
|
| 1219 |
-
Результат
|
| 1220 |
-
"""
|
| 1221 |
-
if key is None:
|
| 1222 |
-
key = np.abs
|
| 1223 |
-
idxs = np.argmin(key(arr), axis)
|
| 1224 |
-
if axis is not None:
|
| 1225 |
-
idxs = np.expand_dims(idxs, axis)
|
| 1226 |
-
result = np.take_along_axis(arr, idxs, axis)
|
| 1227 |
-
if not keepdims:
|
| 1228 |
-
result = np.squeeze(result, axis=axis)
|
| 1229 |
-
return result
|
| 1230 |
-
else:
|
| 1231 |
-
return arr.flatten()[idxs]
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
def ensemble(
|
| 1235 |
-
pred_tracks: List[np.ndarray],
|
| 1236 |
-
srs: List[int],
|
| 1237 |
-
weights: List[float],
|
| 1238 |
-
algorithm: str,
|
| 1239 |
-
dtype: np.dtype = np.float32
|
| 1240 |
-
) -> Tuple[np.ndarray, int]:
|
| 1241 |
-
"""
|
| 1242 |
-
Создать ансамбль из нескольких предсказаний
|
| 1243 |
-
|
| 1244 |
-
Args:
|
| 1245 |
-
pred_tracks: Список предсказаний
|
| 1246 |
-
srs: Список частот дискретизации
|
| 1247 |
-
weights: Веса
|
| 1248 |
-
algorithm: Алгоритм объединения
|
| 1249 |
-
dtype: Тип данных
|
| 1250 |
-
|
| 1251 |
-
Returns:
|
| 1252 |
-
Кортеж (результат, частота дискретизации)
|
| 1253 |
-
"""
|
| 1254 |
-
if algorithm == "min_fft":
|
| 1255 |
-
max_sr = int(min(srs))
|
| 1256 |
-
else:
|
| 1257 |
-
max_sr = int(max(srs))
|
| 1258 |
-
|
| 1259 |
-
# Подгоняем все треки к одной длине и частоте
|
| 1260 |
-
pred_tracks = list(fit_arrays(pred_tracks, srs, max_channels=2, min_sr=max_sr))
|
| 1261 |
-
|
| 1262 |
-
sft = get_stft_obj(max_sr, n_fft=2048, hop=1024)
|
| 1263 |
-
final_length = pred_tracks[0].shape[-1]
|
| 1264 |
-
ensemble_wav_channels = []
|
| 1265 |
-
|
| 1266 |
-
for ch_idx in range(2): # Для каждого канала (L и R)
|
| 1267 |
-
accumulator = None
|
| 1268 |
-
total_weight = sum(weights)
|
| 1269 |
-
|
| 1270 |
-
for i, track in enumerate(pred_tracks):
|
| 1271 |
-
# Извлекаем канал и считаем STFT
|
| 1272 |
-
spec = sft.stft(track[ch_idx].astype(np.float32))
|
| 1273 |
-
|
| 1274 |
-
if algorithm == "avg_fft":
|
| 1275 |
-
weighted_spec = spec * weights[i]
|
| 1276 |
-
if accumulator is None:
|
| 1277 |
-
accumulator = weighted_spec
|
| 1278 |
-
else:
|
| 1279 |
-
accumulator += weighted_spec
|
| 1280 |
-
elif algorithm in ["min_fft", "max_fft", "median_fft"]:
|
| 1281 |
-
# Для медианы и экстремумов собираем стек для одного канала
|
| 1282 |
-
if i == 0:
|
| 1283 |
-
accumulator = [spec]
|
| 1284 |
-
else:
|
| 1285 |
-
accumulator.append(spec)
|
| 1286 |
-
|
| 1287 |
-
del spec
|
| 1288 |
-
|
| 1289 |
-
# Финализация алгоритма
|
| 1290 |
-
if algorithm == "avg_fft":
|
| 1291 |
-
res_spec = accumulator / total_weight
|
| 1292 |
-
elif algorithm == "median_fft":
|
| 1293 |
-
res_spec = np.median(np.real(accumulator), axis=0) + 1j * np.median(np.imag(accumulator), axis=0)
|
| 1294 |
-
elif algorithm == "min_fft":
|
| 1295 |
-
res_spec = lambda_min(np.array(accumulator), axis=0, key=np.abs)
|
| 1296 |
-
elif algorithm == "max_fft":
|
| 1297 |
-
res_spec = absmax(np.array(accumulator), axis=0)
|
| 1298 |
-
else:
|
| 1299 |
-
raise ValueError(_i18n("unknown_algorithm", alg=algorithm))
|
| 1300 |
-
|
| 1301 |
-
ensemble_wav_channels.append(sft.istft(res_spec, k1=final_length))
|
| 1302 |
-
del accumulator
|
| 1303 |
-
|
| 1304 |
-
result = multi_channel_array_from_arrays(*ensemble_wav_channels, index=1, dtype=dtype)
|
| 1305 |
-
print(_i18n("ensemble_complete"))
|
| 1306 |
-
return result, max_sr
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
def concatenate(
|
| 1310 |
-
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1311 |
-
srs: Union[Tuple[int, ...], List[int]],
|
| 1312 |
-
dtype=np.float32
|
| 1313 |
-
) -> Tuple[np.ndarray, int]:
|
| 1314 |
-
"""
|
| 1315 |
-
Склеить несколько аудио массивов
|
| 1316 |
-
|
| 1317 |
-
Args:
|
| 1318 |
-
arrays: Список массивов
|
| 1319 |
-
srs: Список частот дискретизации
|
| 1320 |
-
dtype: Тип данных
|
| 1321 |
-
|
| 1322 |
-
Returns:
|
| 1323 |
-
Кортеж (результат, частота дискретизации)
|
| 1324 |
-
"""
|
| 1325 |
-
max_sr = int(max(*srs))
|
| 1326 |
-
arrayss = fit_arrays([convert_to_dtype(array, np.float64) for array in arrays],
|
| 1327 |
-
srs, max_channels=2, min_sr=max_sr, extend=False)
|
| 1328 |
-
result = np.concatenate(arrayss, axis=1, dtype=np.float64)
|
| 1329 |
-
print(_i18n("concatenate_complete"))
|
| 1330 |
-
return convert_to_dtype(result, dtype), max_sr
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
def trim(y: np.ndarray, start: int = 0, end: int = -1) -> np.ndarray:
|
| 1334 |
-
"""
|
| 1335 |
-
Обрезать аудио массив
|
| 1336 |
-
|
| 1337 |
-
Args:
|
| 1338 |
-
y: Аудио массив
|
| 1339 |
-
start: Начальная позиция
|
| 1340 |
-
end: Конечная позиция
|
| 1341 |
-
|
| 1342 |
-
Returns:
|
| 1343 |
-
Обрезанный массив
|
| 1344 |
-
"""
|
| 1345 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 1346 |
-
end_index = samples
|
| 1347 |
-
_end = end if end > 0 and end <= end_index else end_index
|
| 1348 |
-
if flatten:
|
| 1349 |
-
return y[start:_end]
|
| 1350 |
-
elif array_index == 0:
|
| 1351 |
-
return y[start:_end, :]
|
| 1352 |
-
elif array_index == 1:
|
| 1353 |
-
return y[:, start:_end]
|
| 1354 |
-
else:
|
| 1355 |
-
return y
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
def reverse(y: np.ndarray) -> np.ndarray:
|
| 1359 |
-
"""
|
| 1360 |
-
Перевернуть аудио массив
|
| 1361 |
-
|
| 1362 |
-
Args:
|
| 1363 |
-
y: Аудио массив
|
| 1364 |
-
|
| 1365 |
-
Returns:
|
| 1366 |
-
Перевернутый массив
|
| 1367 |
-
"""
|
| 1368 |
-
channels, samples, array_index, flatten = get_info_array(y)
|
| 1369 |
-
if flatten:
|
| 1370 |
-
return np.flip(y)
|
| 1371 |
-
else:
|
| 1372 |
-
return np.flip(y, axis=array_index)
|
| 1373 |
-
|
| 1374 |
-
|
| 1375 |
-
def write(
|
| 1376 |
-
path: str,
|
| 1377 |
-
y: np.ndarray,
|
| 1378 |
-
sr: int,
|
| 1379 |
-
bitrate: Union[int, str] = 320,
|
| 1380 |
-
prefer_float: bool = False
|
| 1381 |
-
) -> str:
|
| 1382 |
-
"""
|
| 1383 |
-
Записать аудио в файл
|
| 1384 |
-
|
| 1385 |
-
Args:
|
| 1386 |
-
path: Путь для сохранения
|
| 1387 |
-
y: Аудио массив
|
| 1388 |
-
sr: Частота дискретизации
|
| 1389 |
-
bitrate: Битрейт
|
| 1390 |
-
prefer_float: Предпочитать float формат
|
| 1391 |
-
|
| 1392 |
-
Returns:
|
| 1393 |
-
Путь к сохраненному файлу
|
| 1394 |
-
"""
|
| 1395 |
-
if str2bool(os.environ.get("MVSEPLESS_WRITE_ABS", "False")):
|
| 1396 |
-
path = os.path.abspath(path)
|
| 1397 |
-
|
| 1398 |
-
name, ext = os.path.splitext(path)
|
| 1399 |
-
dir_path = os.path.dirname(path)
|
| 1400 |
-
if dir_path != "":
|
| 1401 |
-
os.makedirs(dir_path, exist_ok=True)
|
| 1402 |
-
|
| 1403 |
-
if not sr:
|
| 1404 |
-
raise Exception(_i18n("sr_required"))
|
| 1405 |
-
|
| 1406 |
-
dtype = y.dtype
|
| 1407 |
-
channels, *_ = get_info_array(y)
|
| 1408 |
-
y = reshape(y, shape=("samples", "channels"))
|
| 1409 |
-
|
| 1410 |
-
sample_format = SAMPLE_FORMATS_DICT.get(str(dtype), None)
|
| 1411 |
-
if not sample_format:
|
| 1412 |
-
sample_format = "f32le"
|
| 1413 |
-
y = convert_to_dtype(y, np.float32)
|
| 1414 |
-
|
| 1415 |
-
y = np.nan_to_num(y, nan=0, posinf=0, neginf=0)
|
| 1416 |
-
|
| 1417 |
-
bitrate_val = bitrate_to_int(bitrate)
|
| 1418 |
-
if ext == ".ogg":
|
| 1419 |
-
max_bitrate = get_ogg_bitrate(sr, channels)
|
| 1420 |
-
if bitrate_val > max_bitrate:
|
| 1421 |
-
print(_i18n("ogg_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
|
| 1422 |
-
bitrate_val = max_bitrate
|
| 1423 |
-
elif ext == ".opus":
|
| 1424 |
-
max_bitrate = 256 * channels
|
| 1425 |
-
if bitrate_val > max_bitrate:
|
| 1426 |
-
print(_i18n("opus_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
|
| 1427 |
-
bitrate_val = max_bitrate
|
| 1428 |
-
|
| 1429 |
-
bitrate_fixed = 32 if bitrate_val < 32 else 320 if bitrate_val > 320 else bitrate_val
|
| 1430 |
-
|
| 1431 |
-
cmd = [ffmpeg_path, "-y", "-f", sample_format, "-ar", str(sr), "-ac", str(channels),
|
| 1432 |
-
"-i", "-", *get_codec_args(ext, prefer_float), "-ab", f"{bitrate_fixed}k", path]
|
| 1433 |
-
|
| 1434 |
-
process = subprocess.Popen(
|
| 1435 |
-
cmd,
|
| 1436 |
-
stdin=subprocess.PIPE,
|
| 1437 |
-
stdout=None,
|
| 1438 |
-
stderr=subprocess.PIPE,
|
| 1439 |
-
bufsize=10**8
|
| 1440 |
-
)
|
| 1441 |
-
|
| 1442 |
-
try:
|
| 1443 |
-
stdout_data, stderr_data = process.communicate(input=y.tobytes())
|
| 1444 |
-
|
| 1445 |
-
if process.returncode != 0:
|
| 1446 |
-
error_msg = stderr_data.decode('utf-8', errors='ignore')
|
| 1447 |
-
print(_i18n("ffmpeg_error", error=error_msg))
|
| 1448 |
-
raise Exception(_i18n("ffmpeg_exit_code", code=process.returncode))
|
| 1449 |
-
|
| 1450 |
-
except Exception as e:
|
| 1451 |
-
print(_i18n("write_critical_error", error=str(e)))
|
| 1452 |
-
process.kill()
|
| 1453 |
-
raise e
|
| 1454 |
-
|
| 1455 |
-
return path
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
def multiwrite(
|
| 1459 |
-
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1460 |
-
srs: Union[Tuple[int, ...], List[int]],
|
| 1461 |
-
paths: Union[Tuple[str, ...], List[str]],
|
| 1462 |
-
bitrate: Union[int, str] = 320,
|
| 1463 |
-
prefer_float: bool = False,
|
| 1464 |
-
callable_func: Optional[Callable] = None,
|
| 1465 |
-
strict: bool = False
|
| 1466 |
-
) -> Tuple[str, ...]:
|
| 1467 |
-
"""
|
| 1468 |
-
Записать несколько аудио массивов в файлы
|
| 1469 |
-
|
| 1470 |
-
Args:
|
| 1471 |
-
arrays: Список массивов
|
| 1472 |
-
srs: Список частот дискретизации
|
| 1473 |
-
paths: Список путей для сохранения
|
| 1474 |
-
bitrate: Битрейт
|
| 1475 |
-
prefer_float: Предпочитать float формат
|
| 1476 |
-
callable_func: Функция обратного вызова
|
| 1477 |
-
strict: Строгий режим
|
| 1478 |
-
|
| 1479 |
-
Returns:
|
| 1480 |
-
Кортеж сохраненных путей
|
| 1481 |
-
"""
|
| 1482 |
-
saved_paths = []
|
| 1483 |
-
exceptions = []
|
| 1484 |
-
|
| 1485 |
-
if len(arrays) == len(srs) == len(paths):
|
| 1486 |
-
save_arrays = list(zip(arrays, srs, paths))
|
| 1487 |
-
for array, sr, path in save_arrays:
|
| 1488 |
-
if callable_func is not None:
|
| 1489 |
-
callable_func(path)
|
| 1490 |
-
try:
|
| 1491 |
-
saved_paths.append(write(path, array, sr, bitrate=bitrate, prefer_float=prefer_float))
|
| 1492 |
-
except Exception as e:
|
| 1493 |
-
if strict:
|
| 1494 |
-
raise Exception(str(e))
|
| 1495 |
-
else:
|
| 1496 |
-
print(_i18n("write_error", error=str(e)))
|
| 1497 |
-
exceptions.append(str(e))
|
| 1498 |
-
|
| 1499 |
-
if not saved_paths:
|
| 1500 |
-
exceptions_str = '\n'.join(exceptions)
|
| 1501 |
-
raise Exception(_i18n("no_files_written", errors=exceptions_str))
|
| 1502 |
-
|
| 1503 |
return tuple(saved_paths)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import numpy as np
|
| 4 |
+
from gradio_helper import str2bool
|
| 5 |
+
from scipy.signal import ShortTimeFFT, resample
|
| 6 |
+
from scipy.signal.windows import dpss, hann
|
| 7 |
+
from numpy.typing import DTypeLike
|
| 8 |
+
from typing import List, Tuple, Optional, Union, Dict, Any, Callable
|
| 9 |
+
from i18n import _i18n
|
| 10 |
+
|
| 11 |
+
ffmpeg_path = "ffmpeg"
|
| 12 |
+
ffprobe_path = "ffprobe"
|
| 13 |
+
n_fft = 4096
|
| 14 |
+
hop = 1024
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def average(*ints: Union[int, float]) -> float:
|
| 18 |
+
"""
|
| 19 |
+
Вычислить среднее арифметическое
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
*ints: Числа для усреднения
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Среднее значение
|
| 26 |
+
"""
|
| 27 |
+
numbers = len(ints)
|
| 28 |
+
return sum(ints) / numbers
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def check_installed() -> None:
|
| 32 |
+
"""Проверить наличие ffmpeg и ffprobe"""
|
| 33 |
+
try:
|
| 34 |
+
ffmpeg_version_output = subprocess.check_output(
|
| 35 |
+
[ffmpeg_path, "-version"], text=True
|
| 36 |
+
)
|
| 37 |
+
print(_i18n("ffmpeg_found"))
|
| 38 |
+
except:
|
| 39 |
+
print(_i18n("ffmpeg_not_found"))
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
ffprobe_version_output = subprocess.check_output(
|
| 43 |
+
[ffprobe_path, "-version"], text=True
|
| 44 |
+
)
|
| 45 |
+
print(_i18n("ffprobe_found"))
|
| 46 |
+
except:
|
| 47 |
+
print(_i18n("ffprobe_not_found"))
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_ogg_bitrate(sample_rate: int, channels: int = 2) -> int:
|
| 51 |
+
"""
|
| 52 |
+
Определяет рекомендуемый битрейт для OGG на основе частоты дискретизации
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
sample_rate: Частота дискретизации
|
| 56 |
+
channels: Количество каналов
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
Рекомендуемый битрейт
|
| 60 |
+
"""
|
| 61 |
+
if sample_rate >= 40000:
|
| 62 |
+
per_channel = 240
|
| 63 |
+
elif sample_rate >= 26000:
|
| 64 |
+
per_channel = 190
|
| 65 |
+
elif sample_rate >= 15000:
|
| 66 |
+
per_channel = 90
|
| 67 |
+
elif sample_rate >= 9000:
|
| 68 |
+
per_channel = 50
|
| 69 |
+
elif sample_rate >= 8000:
|
| 70 |
+
per_channel = 42
|
| 71 |
+
else:
|
| 72 |
+
per_channel = 30
|
| 73 |
+
|
| 74 |
+
return int(per_channel * channels)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
SAMPLE_FORMATS_DICT: Dict[Union[str, type], str] = {
|
| 78 |
+
"int16": "s16le",
|
| 79 |
+
"int32": "s32le",
|
| 80 |
+
"float32": "f32le",
|
| 81 |
+
"float64": "f64le",
|
| 82 |
+
np.int16: "s16le",
|
| 83 |
+
np.int32: "s32le",
|
| 84 |
+
np.float32: "f32le",
|
| 85 |
+
np.float64: "f64le",
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
audio_formats: List[str] = [
|
| 89 |
+
'aac', 'ac3', 'ac4', 'adts', 'aiff', 'au', 'caf', 'dts', 'eac3',
|
| 90 |
+
'flac', 'm4a', 'mp3', 'mp2', 'ogg', 'oga', 'opus', 'ra', 'raw',
|
| 91 |
+
'snd', 'voc', 'wav', 'wma', 'wv'
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
video_formats_with_audio: List[str] = [
|
| 95 |
+
'3gp', '3g2', 'asf', 'avi', 'flv', 'f4v', 'm4v', 'mkv', 'mov',
|
| 96 |
+
'mp4', 'mpeg', 'mpg', 'mts', 'mxf', 'ogv', 'rm', 'rmvb', 'ts',
|
| 97 |
+
'vob', 'webm', 'wmv'
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
input_formats: List[str] = video_formats_with_audio + audio_formats
|
| 101 |
+
|
| 102 |
+
output_formats: List[str] = [
|
| 103 |
+
"mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff", "wma"
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
input_extensions: List[str] = [f".{of}" for of in input_formats]
|
| 107 |
+
|
| 108 |
+
output_extensions: List[str] = [f".{of}" for of in output_formats]
|
| 109 |
+
|
| 110 |
+
codec_args: Dict[str, Dict[bool, List[str]]] = {
|
| 111 |
+
".mp3": {
|
| 112 |
+
True: ["-c:a", "libmp3lame", "-sample_fmt", "fltp"],
|
| 113 |
+
False: ["-c:a", "libmp3lame", "-sample_fmt", "s16p"]
|
| 114 |
+
},
|
| 115 |
+
".wav": {
|
| 116 |
+
True: ["-c:a", "pcm_f32le", "-sample_fmt", "flt"],
|
| 117 |
+
False: ["-c:a", "pcm_s16le", "-sample_fmt", "s16"]
|
| 118 |
+
},
|
| 119 |
+
".flac": {
|
| 120 |
+
True: ["-c:a", "flac", "-sample_fmt", "s32"],
|
| 121 |
+
False: ["-c:a", "flac", "-sample_fmt", "s16"]
|
| 122 |
+
},
|
| 123 |
+
".ogg": {
|
| 124 |
+
True: ["-c:a", "libvorbis", "-sample_fmt", "fltp"],
|
| 125 |
+
False: ["-c:a", "libvorbis", "-sample_fmt", "fltp"]
|
| 126 |
+
},
|
| 127 |
+
".opus": {
|
| 128 |
+
True: ["-c:a", "libopus", "-sample_fmt", "flt"],
|
| 129 |
+
False: ["-c:a", "libopus", "-sample_fmt", "s16"]
|
| 130 |
+
},
|
| 131 |
+
".m4a": {
|
| 132 |
+
True: ["-c:a", "aac", "-sample_fmt", "fltp"],
|
| 133 |
+
False: ["-c:a", "aac", "-sample_fmt", "fltp"]
|
| 134 |
+
},
|
| 135 |
+
".aac": {
|
| 136 |
+
True: ["-c:a", "aac", "-sample_fmt", "fltp"],
|
| 137 |
+
False: ["-c:a", "aac", "-sample_fmt", "fltp"]
|
| 138 |
+
},
|
| 139 |
+
".ac3": {
|
| 140 |
+
True: ["-c:a", "ac3", "-sample_fmt", "fltp"],
|
| 141 |
+
False: ["-c:a", "ac3", "-sample_fmt", "fltp"]
|
| 142 |
+
},
|
| 143 |
+
".aiff": {
|
| 144 |
+
True: ["-c:a", "pcm_f32be", "-sample_fmt", "flt"],
|
| 145 |
+
False: ["-c:a", "pcm_s16be", "-sample_fmt", "s16"]
|
| 146 |
+
},
|
| 147 |
+
".wma": {
|
| 148 |
+
True: ["-c:a", "wmav2", "-sample_fmt", "fltp"],
|
| 149 |
+
False: ["-c:a", "wmav2", "-sample_fmt", "fltp"]
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def get_codec_args(extension: str, prefer_float: bool) -> List[str]:
|
| 155 |
+
"""
|
| 156 |
+
Получить аргументы кодека для FFmpeg
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
extension: Расширение файла
|
| 160 |
+
prefer_float: Предпочитать float формат
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
Список аргументов FFmpeg
|
| 164 |
+
"""
|
| 165 |
+
if extension not in codec_args:
|
| 166 |
+
return []
|
| 167 |
+
return codec_args[extension][prefer_float]
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
allowed_chars: str = r"1234567890"
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def sanitize_output(output: str) -> str:
|
| 174 |
+
"""
|
| 175 |
+
Очистить вывод от посторонних символов
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
output: Выходная строка
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
Очищенная строка
|
| 182 |
+
"""
|
| 183 |
+
return "".join([char for char in output if char in allowed_chars])
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def get_sr(path: str, stream: int = 0) -> int:
|
| 187 |
+
"""
|
| 188 |
+
Получить частоту дискретизации аудиофайла
|
| 189 |
+
|
| 190 |
+
Args:
|
| 191 |
+
path: Путь к файлу
|
| 192 |
+
stream: Номер аудиопотока
|
| 193 |
+
|
| 194 |
+
Returns:
|
| 195 |
+
Частота дискретизации
|
| 196 |
+
"""
|
| 197 |
+
cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
|
| 198 |
+
"-show_entries", "stream=sample_rate", "-select_streams", f"a:{stream}",
|
| 199 |
+
"-of", "compact=p=0:nk=1"]
|
| 200 |
+
process = subprocess.Popen(
|
| 201 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
| 202 |
+
)
|
| 203 |
+
stdout, stderr = process.communicate()
|
| 204 |
+
sample_rate = stdout.decode('utf-8').strip()
|
| 205 |
+
sample_rate = sanitize_output(sample_rate)
|
| 206 |
+
if sample_rate.isdigit():
|
| 207 |
+
return int(sample_rate)
|
| 208 |
+
else:
|
| 209 |
+
print(_i18n("sr_read_error", path=path))
|
| 210 |
+
return 0
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def get_channels(path: str, stream: int = 0) -> int:
|
| 214 |
+
"""
|
| 215 |
+
Получить количество каналов аудиофайла
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
path: Путь к файлу
|
| 219 |
+
stream: Номер аудиопотока
|
| 220 |
+
|
| 221 |
+
Returns:
|
| 222 |
+
Количество каналов
|
| 223 |
+
"""
|
| 224 |
+
cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
|
| 225 |
+
"-show_entries", "stream=channels", "-select_streams", f"a:{stream}",
|
| 226 |
+
"-of", "compact=p=0:nk=1"]
|
| 227 |
+
process = subprocess.Popen(
|
| 228 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
| 229 |
+
)
|
| 230 |
+
stdout, stderr = process.communicate()
|
| 231 |
+
channels = stdout.decode('utf-8').strip()
|
| 232 |
+
channels = sanitize_output(channels)
|
| 233 |
+
if channels.isdigit():
|
| 234 |
+
return int(channels)
|
| 235 |
+
else:
|
| 236 |
+
print(_i18n("channels_read_error", path=path))
|
| 237 |
+
return 0
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def check(path: str) -> bool:
|
| 241 |
+
"""
|
| 242 |
+
Проверить, является ли файл валидным аудио
|
| 243 |
+
|
| 244 |
+
Args:
|
| 245 |
+
path: Путь к файлу
|
| 246 |
+
|
| 247 |
+
Returns:
|
| 248 |
+
True если файл содержит аудио
|
| 249 |
+
"""
|
| 250 |
+
channels = get_channels(path)
|
| 251 |
+
sr = get_sr(path)
|
| 252 |
+
return channels != 0 and sr != 0
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def read(
|
| 256 |
+
path: str,
|
| 257 |
+
sr: Optional[int] = None,
|
| 258 |
+
mono: bool = False,
|
| 259 |
+
dtype: DTypeLike = "float32",
|
| 260 |
+
multi_channel: bool = False,
|
| 261 |
+
num_channels: int = 2,
|
| 262 |
+
stream: int = 0,
|
| 263 |
+
flatten: bool = False
|
| 264 |
+
) -> Tuple[np.ndarray, int]:
|
| 265 |
+
"""
|
| 266 |
+
Прочитать аудиофайл
|
| 267 |
+
|
| 268 |
+
Args:
|
| 269 |
+
path: Путь к файлу
|
| 270 |
+
sr: Частота дискретизации
|
| 271 |
+
mono: Читать как моно
|
| 272 |
+
dtype: Тип данных
|
| 273 |
+
multi_channel: Многоканальный режим
|
| 274 |
+
num_channels: Количество каналов
|
| 275 |
+
stream: Номер аудиопотока
|
| 276 |
+
flatten: Вернуть плоский массив
|
| 277 |
+
|
| 278 |
+
Returns:
|
| 279 |
+
Кортеж (аудиоданные, частота дискретизации)
|
| 280 |
+
"""
|
| 281 |
+
output_format = SAMPLE_FORMATS_DICT.get(dtype, None)
|
| 282 |
+
if not sr:
|
| 283 |
+
sr = get_sr(path, stream)
|
| 284 |
+
channels = 1 if mono else (get_channels(path, stream) if multi_channel else num_channels)
|
| 285 |
+
|
| 286 |
+
if not output_format:
|
| 287 |
+
output_format = "f32le"
|
| 288 |
+
cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
|
| 289 |
+
"-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
|
| 290 |
+
process = subprocess.Popen(
|
| 291 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
|
| 292 |
+
)
|
| 293 |
+
stdout, stderr = process.communicate()
|
| 294 |
+
y = np.frombuffer(stdout, dtype=np.float32)
|
| 295 |
+
y = convert_to_dtype(y, dtype)
|
| 296 |
+
else:
|
| 297 |
+
cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
|
| 298 |
+
"-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
|
| 299 |
+
process = subprocess.Popen(
|
| 300 |
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
|
| 301 |
+
)
|
| 302 |
+
stdout, stderr = process.communicate()
|
| 303 |
+
y = np.frombuffer(stdout, dtype=dtype)
|
| 304 |
+
|
| 305 |
+
if mono:
|
| 306 |
+
if flatten:
|
| 307 |
+
y = y.flatten()
|
| 308 |
+
else:
|
| 309 |
+
y = y.reshape((-1, 1)).T
|
| 310 |
+
else:
|
| 311 |
+
y = y.reshape((-1, channels)).T
|
| 312 |
+
|
| 313 |
+
return y.copy(), sr
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def multiread(
|
| 317 |
+
paths: Union[List[str], Tuple[str, ...]],
|
| 318 |
+
*args,
|
| 319 |
+
**kwargs
|
| 320 |
+
) -> Tuple[List[np.ndarray], List[int]]:
|
| 321 |
+
"""
|
| 322 |
+
Прочитать несколько аудиофайлов
|
| 323 |
+
|
| 324 |
+
Args:
|
| 325 |
+
paths: Список путей к файлам
|
| 326 |
+
*args: Аргументы для read
|
| 327 |
+
**kwargs: Именованные аргументы для read
|
| 328 |
+
|
| 329 |
+
Returns:
|
| 330 |
+
Кортеж (список аудиоданных, список частот дискретизации)
|
| 331 |
+
"""
|
| 332 |
+
readed_files = []
|
| 333 |
+
srs = []
|
| 334 |
+
len_arrays = len(paths)
|
| 335 |
+
for i, path in enumerate(paths, start=1):
|
| 336 |
+
array, sr = read(path, *args, **kwargs)
|
| 337 |
+
readed_files.append(array)
|
| 338 |
+
srs.append(sr)
|
| 339 |
+
print(_i18n("reading_progress", current=i, total=len_arrays), end="\r")
|
| 340 |
+
print("")
|
| 341 |
+
return readed_files, srs
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
def bitrate_to_int(a: Union[str, int, float]) -> int:
|
| 345 |
+
"""
|
| 346 |
+
Преобразовать битрейт в целое число
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
a: Битрейт в виде строки или числа
|
| 350 |
+
|
| 351 |
+
Returns:
|
| 352 |
+
Битрейт как целое число
|
| 353 |
+
"""
|
| 354 |
+
if isinstance(a, str):
|
| 355 |
+
if a.endswith(("k", "K")):
|
| 356 |
+
numeric_part = a[:-1]
|
| 357 |
+
if numeric_part.isdigit():
|
| 358 |
+
return int(numeric_part)
|
| 359 |
+
else:
|
| 360 |
+
print(_i18n("invalid_bitrate", bitrate=a))
|
| 361 |
+
return 320
|
| 362 |
+
else:
|
| 363 |
+
if a.isdigit():
|
| 364 |
+
return int(a)
|
| 365 |
+
else:
|
| 366 |
+
print(_i18n("invalid_bitrate", bitrate=a))
|
| 367 |
+
return 320
|
| 368 |
+
elif isinstance(a, (int, float)):
|
| 369 |
+
return int(a)
|
| 370 |
+
else:
|
| 371 |
+
return 320
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def get_info_array(y: np.ndarray) -> Tuple[int, int, Optional[int], bool]:
|
| 375 |
+
"""
|
| 376 |
+
Получить информацию об аудио массиве
|
| 377 |
+
|
| 378 |
+
Args:
|
| 379 |
+
y: Аудио массив
|
| 380 |
+
|
| 381 |
+
Returns:
|
| 382 |
+
Кортеж (количество каналов, количество сэмплов, индекс оси, флаг flatten)
|
| 383 |
+
"""
|
| 384 |
+
if y.ndim == 1:
|
| 385 |
+
flatten = True
|
| 386 |
+
channels = 1
|
| 387 |
+
samples = len(y)
|
| 388 |
+
array_index = -1
|
| 389 |
+
elif y.ndim == 2:
|
| 390 |
+
flatten = False
|
| 391 |
+
if y.shape[0] < y.shape[1]:
|
| 392 |
+
channels = y.shape[0]
|
| 393 |
+
samples = y.shape[1]
|
| 394 |
+
array_index = 1
|
| 395 |
+
else:
|
| 396 |
+
channels = y.shape[1]
|
| 397 |
+
samples = y.shape[0]
|
| 398 |
+
array_index = 0
|
| 399 |
+
else:
|
| 400 |
+
raise ValueError(_i18n("array_dim_error"))
|
| 401 |
+
return channels, samples, array_index, flatten
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def get_axis_from_array_index(index: int) -> int:
|
| 405 |
+
"""
|
| 406 |
+
Получить ось для операций на основе индекса массива
|
| 407 |
+
|
| 408 |
+
Args:
|
| 409 |
+
index: Индекс массива
|
| 410 |
+
|
| 411 |
+
Returns:
|
| 412 |
+
Номер оси
|
| 413 |
+
"""
|
| 414 |
+
if index == -1:
|
| 415 |
+
return -1
|
| 416 |
+
elif index == 1:
|
| 417 |
+
return 0
|
| 418 |
+
elif index == 0:
|
| 419 |
+
return 1
|
| 420 |
+
else:
|
| 421 |
+
return -1
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def get_duration_from_array(y: np.ndarray, sr: Optional[int] = None) -> Union[float, int]:
|
| 425 |
+
"""
|
| 426 |
+
Получить длительность аудио из массива
|
| 427 |
+
|
| 428 |
+
Args:
|
| 429 |
+
y: Аудио массив
|
| 430 |
+
sr: Частота дискретизации
|
| 431 |
+
|
| 432 |
+
Returns:
|
| 433 |
+
Длительность в секундах или количество сэмплов
|
| 434 |
+
"""
|
| 435 |
+
len_samples: int = get_info_array(y)[1]
|
| 436 |
+
if sr is not None:
|
| 437 |
+
return len_samples / sr
|
| 438 |
+
else:
|
| 439 |
+
return len_samples
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
def is_float(y: np.ndarray) -> bool:
|
| 443 |
+
"""
|
| 444 |
+
Проверить, является ли массив float типом
|
| 445 |
+
|
| 446 |
+
Args:
|
| 447 |
+
y: Аудио массив
|
| 448 |
+
|
| 449 |
+
Returns:
|
| 450 |
+
True если тип float
|
| 451 |
+
"""
|
| 452 |
+
return np.issubdtype(y.dtype, np.floating)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def is_float_dtype(dtype: DTypeLike) -> bool:
|
| 456 |
+
"""
|
| 457 |
+
Проверить, является ли тип данных float
|
| 458 |
+
|
| 459 |
+
Args:
|
| 460 |
+
dtype: Тип данных
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
True если тип float
|
| 464 |
+
"""
|
| 465 |
+
return np.issubdtype(dtype, np.floating)
|
| 466 |
+
|
| 467 |
+
|
| 468 |
+
def float_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 469 |
+
"""
|
| 470 |
+
Преобразовать float массив в целочисленный
|
| 471 |
+
|
| 472 |
+
Args:
|
| 473 |
+
y: Float массив
|
| 474 |
+
dtype: Целевой тип данных
|
| 475 |
+
|
| 476 |
+
Returns:
|
| 477 |
+
Целочисленный массив
|
| 478 |
+
"""
|
| 479 |
+
info = np.iinfo(dtype)
|
| 480 |
+
min_val = info.min
|
| 481 |
+
max_val = info.max
|
| 482 |
+
|
| 483 |
+
if min_val < 0:
|
| 484 |
+
y_scaled = y * max_val
|
| 485 |
+
y_rounded = np.round(y_scaled)
|
| 486 |
+
y_clipped = np.clip(y_rounded, min_val, max_val)
|
| 487 |
+
return y_clipped.astype(dtype)
|
| 488 |
+
elif min_val == 0:
|
| 489 |
+
y_normalized = (y + 1) / 2
|
| 490 |
+
y_scaled = y_normalized * max_val
|
| 491 |
+
y_rounded = np.round(y_scaled)
|
| 492 |
+
y_clipped = np.clip(y_rounded, 0, max_val)
|
| 493 |
+
return y_clipped.astype(dtype)
|
| 494 |
+
else:
|
| 495 |
+
raise ValueError(_i18n("unexpected_min_val", value=min_val))
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
def int_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 499 |
+
"""
|
| 500 |
+
Преобразовать целочисленный массив в другой целочисленный тип
|
| 501 |
+
|
| 502 |
+
Args:
|
| 503 |
+
y: Целочисленный массив
|
| 504 |
+
dtype: Целевой тип данных
|
| 505 |
+
|
| 506 |
+
Returns:
|
| 507 |
+
Преобразованный массив
|
| 508 |
+
"""
|
| 509 |
+
info_dst = np.iinfo(dtype)
|
| 510 |
+
info_src = np.iinfo(y.dtype)
|
| 511 |
+
y_float = y.astype(np.float64)
|
| 512 |
+
src_range = info_src.max - info_src.min
|
| 513 |
+
dst_range = info_dst.max - info_dst.min
|
| 514 |
+
if src_range == 0:
|
| 515 |
+
return np.full_like(y, info_dst.min, dtype=dtype)
|
| 516 |
+
y_scaled = (y_float - info_src.min) * (dst_range / src_range) + info_dst.min
|
| 517 |
+
y_rounded = np.round(y_scaled)
|
| 518 |
+
y_clipped = np.clip(y_rounded, info_dst.min, info_dst.max)
|
| 519 |
+
return y_clipped.astype(dtype)
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
def int_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 523 |
+
"""
|
| 524 |
+
Преобразовать целочисленный массив в float
|
| 525 |
+
|
| 526 |
+
Args:
|
| 527 |
+
y: Целочисленный массив
|
| 528 |
+
dtype: Целевой тип данных
|
| 529 |
+
|
| 530 |
+
Returns:
|
| 531 |
+
Float массив
|
| 532 |
+
"""
|
| 533 |
+
info = np.iinfo(y.dtype)
|
| 534 |
+
if info.min == 0:
|
| 535 |
+
y_normalized = (y.astype(np.float64) + -int(average(info.min, info.max))) / info.max
|
| 536 |
+
elif info.min < 0:
|
| 537 |
+
abs_max = max(abs(info.min), abs(info.max))
|
| 538 |
+
y_normalized = y.astype(np.float64) / abs_max
|
| 539 |
+
else:
|
| 540 |
+
raise ValueError(_i18n("unexpected_min_val", value=info.min))
|
| 541 |
+
return y_normalized.astype(dtype)
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def float_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 545 |
+
"""
|
| 546 |
+
Преобразовать float массив в другой float тип
|
| 547 |
+
|
| 548 |
+
Args:
|
| 549 |
+
y: Float массив
|
| 550 |
+
dtype: Целевой тип данных
|
| 551 |
+
|
| 552 |
+
Returns:
|
| 553 |
+
Преобразованный массив
|
| 554 |
+
"""
|
| 555 |
+
return y.astype(dtype)
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
def get_center_value_from_dtype(dtype: DTypeLike) -> int:
|
| 559 |
+
"""
|
| 560 |
+
Получить центральное значение для типа данных
|
| 561 |
+
|
| 562 |
+
Args:
|
| 563 |
+
dtype: Тип данных
|
| 564 |
+
|
| 565 |
+
Returns:
|
| 566 |
+
Центральное значение
|
| 567 |
+
"""
|
| 568 |
+
if is_float_dtype(dtype):
|
| 569 |
+
return 0
|
| 570 |
+
else:
|
| 571 |
+
info = np.iinfo(dtype)
|
| 572 |
+
return int(average(info.min, info.max))
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def convert_to_dtype(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
|
| 576 |
+
"""
|
| 577 |
+
Преобразовать массив в указанный тип данных
|
| 578 |
+
|
| 579 |
+
Args:
|
| 580 |
+
y: Входной массив
|
| 581 |
+
dtype: Целевой тип данных
|
| 582 |
+
|
| 583 |
+
Returns:
|
| 584 |
+
Преобразованный массив
|
| 585 |
+
"""
|
| 586 |
+
if is_float(y):
|
| 587 |
+
if is_float_dtype(dtype):
|
| 588 |
+
return float_to_float(y, dtype)
|
| 589 |
+
else:
|
| 590 |
+
return float_to_int(y, dtype)
|
| 591 |
+
else:
|
| 592 |
+
if is_float_dtype(dtype):
|
| 593 |
+
return int_to_float(y, dtype)
|
| 594 |
+
else:
|
| 595 |
+
return int_to_int(y, dtype)
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
def dc_offset(y: np.ndarray, offset: Union[float, int]) -> np.ndarray:
|
| 599 |
+
"""
|
| 600 |
+
Добавить смещение постоянного тока
|
| 601 |
+
|
| 602 |
+
Args:
|
| 603 |
+
y: Аудио массив
|
| 604 |
+
offset: Смещение
|
| 605 |
+
|
| 606 |
+
Returns:
|
| 607 |
+
Массив со смещением
|
| 608 |
+
"""
|
| 609 |
+
orig_dtype = y.dtype
|
| 610 |
+
y = convert_to_dtype(y, np.float32)
|
| 611 |
+
y = y + offset
|
| 612 |
+
return convert_to_dtype(y, orig_dtype)
|
| 613 |
+
|
| 614 |
+
|
| 615 |
+
def gain(y: np.ndarray, gain_value: Union[float, int]) -> np.ndarray:
|
| 616 |
+
"""
|
| 617 |
+
Применить усиление к аудио
|
| 618 |
+
|
| 619 |
+
Args:
|
| 620 |
+
y: Аудио массив
|
| 621 |
+
gain_value: Коэффициент усиления
|
| 622 |
+
|
| 623 |
+
Returns:
|
| 624 |
+
Усиленный массив
|
| 625 |
+
"""
|
| 626 |
+
orig_dtype = y.dtype
|
| 627 |
+
y = convert_to_dtype(y, np.float32)
|
| 628 |
+
y = y * gain_value
|
| 629 |
+
return convert_to_dtype(y, orig_dtype)
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def normalize(y: np.ndarray, target_peak: Union[float, int] = 1.0) -> np.ndarray:
|
| 633 |
+
"""
|
| 634 |
+
Нормализовать аудио по пиковому значению
|
| 635 |
+
|
| 636 |
+
Args:
|
| 637 |
+
y: Аудио массив
|
| 638 |
+
target_peak: Целевое пиковое значение
|
| 639 |
+
|
| 640 |
+
Returns:
|
| 641 |
+
Нормализованный массив
|
| 642 |
+
"""
|
| 643 |
+
orig_dtype = y.dtype
|
| 644 |
+
y = convert_to_dtype(y, np.float32)
|
| 645 |
+
current_peak = np.max(np.abs(y))
|
| 646 |
+
if current_peak > 0:
|
| 647 |
+
scaling_factor = target_peak / current_peak
|
| 648 |
+
y = y * scaling_factor
|
| 649 |
+
return convert_to_dtype(y, orig_dtype)
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
def create_zero_array(samples: int, dtype: DTypeLike) -> np.ndarray:
|
| 653 |
+
"""
|
| 654 |
+
Создать массив нулей с центром для типа данных
|
| 655 |
+
|
| 656 |
+
Args:
|
| 657 |
+
samples: Количество сэмплов
|
| 658 |
+
dtype: Тип данных
|
| 659 |
+
|
| 660 |
+
Returns:
|
| 661 |
+
Массив нулей
|
| 662 |
+
"""
|
| 663 |
+
return np.array([get_center_value_from_dtype(dtype) for _c in range(samples)], dtype=dtype)
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
def split_channels(y: np.ndarray) -> Tuple[np.ndarray, ...]:
|
| 667 |
+
"""
|
| 668 |
+
Разделить многоканальное аудио на отдельные каналы
|
| 669 |
+
|
| 670 |
+
Args:
|
| 671 |
+
y: Аудио массив
|
| 672 |
+
|
| 673 |
+
Returns:
|
| 674 |
+
Кортеж массивов каналов
|
| 675 |
+
"""
|
| 676 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 677 |
+
channels_arrays = []
|
| 678 |
+
if not flatten:
|
| 679 |
+
if array_index == 1:
|
| 680 |
+
for ch in range(channels):
|
| 681 |
+
channels_arrays.append(y[ch, :])
|
| 682 |
+
else:
|
| 683 |
+
for ch in range(channels):
|
| 684 |
+
channels_arrays.append(y[:, ch])
|
| 685 |
+
return tuple(channels_arrays)
|
| 686 |
+
else:
|
| 687 |
+
return (y,)
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
from scipy.signal import windows
|
| 691 |
+
|
| 692 |
+
|
| 693 |
+
def get_stft_obj(sr: int, n_fft: int, hop: int) -> ShortTimeFFT:
|
| 694 |
+
"""
|
| 695 |
+
Создает STFT с окном DPSS для сверхточного разделения частот
|
| 696 |
+
|
| 697 |
+
Args:
|
| 698 |
+
sr: Частота дискретизации
|
| 699 |
+
n_fft: Размер FFT
|
| 700 |
+
hop: Шаг
|
| 701 |
+
|
| 702 |
+
Returns:
|
| 703 |
+
Объект ShortTimeFFT
|
| 704 |
+
"""
|
| 705 |
+
win_dpss = str2bool(os.environ.get("MVSEPLESS_DPSS", "False"))
|
| 706 |
+
if win_dpss:
|
| 707 |
+
win = dpss(n_fft, NW=3, sym=False)
|
| 708 |
+
else:
|
| 709 |
+
win = hann(n_fft, sym=False)
|
| 710 |
+
return ShortTimeFFT(win, hop=hop, fs=sr, scale_to='magnitude', phase_shift=None)
|
| 711 |
+
|
| 712 |
+
|
| 713 |
+
def split_mid_side(
|
| 714 |
+
y: np.ndarray,
|
| 715 |
+
var: int = 1,
|
| 716 |
+
sr: Optional[int] = None
|
| 717 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
| 718 |
+
"""
|
| 719 |
+
Разделить стерео на Mid/Side
|
| 720 |
+
|
| 721 |
+
Args:
|
| 722 |
+
y: Аудио массив
|
| 723 |
+
var: Вариант разделения (0-4)
|
| 724 |
+
sr: Частота дискретизации
|
| 725 |
+
|
| 726 |
+
Returns:
|
| 727 |
+
Кортеж (mid, side)
|
| 728 |
+
"""
|
| 729 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 730 |
+
axis = get_axis_from_array_index(array_index)
|
| 731 |
+
if channels != 2:
|
| 732 |
+
raise Exception(_i18n("stereo_required"))
|
| 733 |
+
orig_dtype = y.dtype
|
| 734 |
+
y = convert_to_dtype(y, np.float32)
|
| 735 |
+
channels_arrays = split_channels(y)
|
| 736 |
+
left_channel = channels_arrays[0]
|
| 737 |
+
right_channel = channels_arrays[1]
|
| 738 |
+
mid_channel_one = (left_channel * 0.5) + (right_channel * 0.5)
|
| 739 |
+
|
| 740 |
+
if var == 0:
|
| 741 |
+
print(_i18n("mid_side_var0"))
|
| 742 |
+
side_channel = np.stack([(left_channel + -mid_channel_one), (right_channel + -mid_channel_one)], axis=axis)
|
| 743 |
+
mid_channel = y + -side_channel
|
| 744 |
+
elif var == 1:
|
| 745 |
+
print(_i18n("mid_side_var1"))
|
| 746 |
+
mid_channel = np.stack([mid_channel_one, mid_channel_one], axis=axis)
|
| 747 |
+
side_channel = y + -mid_channel
|
| 748 |
+
elif var == 2:
|
| 749 |
+
print(_i18n("mid_side_var2"))
|
| 750 |
+
same_sign = (left_channel * right_channel) > 0
|
| 751 |
+
center_mono = np.where(
|
| 752 |
+
same_sign,
|
| 753 |
+
np.minimum(np.abs(left_channel), np.abs(right_channel)) * np.sign(left_channel),
|
| 754 |
+
0.0
|
| 755 |
+
)
|
| 756 |
+
mid_channel = np.stack([center_mono, center_mono], axis=axis)
|
| 757 |
+
stereo_L = left_channel - center_mono
|
| 758 |
+
stereo_R = right_channel - center_mono
|
| 759 |
+
side_channel = np.stack([stereo_L, stereo_R], axis=axis)
|
| 760 |
+
elif var == 3:
|
| 761 |
+
print(_i18n("mid_side_var3"))
|
| 762 |
+
if not sr:
|
| 763 |
+
raise Exception(_i18n("sr_required"))
|
| 764 |
+
|
| 765 |
+
sft = get_stft_obj(sr, n_fft=n_fft, hop=hop)
|
| 766 |
+
y_float = convert_to_dtype(y, np.float32)
|
| 767 |
+
channels = split_channels(y_float)
|
| 768 |
+
|
| 769 |
+
# Получаем спектры левого и правого каналов
|
| 770 |
+
Lf = sft.stft(channels[0])
|
| 771 |
+
Rf = sft.stft(channels[1])
|
| 772 |
+
|
| 773 |
+
# Вычисляем схожесть (когерентность)
|
| 774 |
+
similarity_L = np.real(Lf * np.conj(Rf))
|
| 775 |
+
similarity_R = np.real(Rf * np.conj(Lf))
|
| 776 |
+
mask_l = similarity_L > 0
|
| 777 |
+
mask_r = similarity_R > 0
|
| 778 |
+
magL = np.abs(Lf)
|
| 779 |
+
magR = np.abs(Rf)
|
| 780 |
+
|
| 781 |
+
magC_L = np.minimum(magL, magR) * mask_l
|
| 782 |
+
magC_R = np.minimum(magL, magR) * mask_r
|
| 783 |
+
|
| 784 |
+
C_L = magC_L * np.exp(1j * np.angle(Rf))
|
| 785 |
+
C_R = magC_R * np.exp(1j * np.angle(Lf))
|
| 786 |
+
SL = Lf - C_L
|
| 787 |
+
SR = Rf - C_R
|
| 788 |
+
|
| 789 |
+
len_orig = y.shape[-1]
|
| 790 |
+
center_l = sft.istft(C_L, k1=len_orig)
|
| 791 |
+
center_r = sft.istft(C_R, k1=len_orig)
|
| 792 |
+
side_l = sft.istft(SL, k1=len_orig)
|
| 793 |
+
side_r = sft.istft(SR, k1=len_orig)
|
| 794 |
+
|
| 795 |
+
mid_ch = multi_channel_array_from_arrays(center_l, center_r, index=array_index, dtype=y.dtype)
|
| 796 |
+
side_ch = multi_channel_array_from_arrays(side_l, side_r, index=array_index, dtype=y.dtype)
|
| 797 |
+
|
| 798 |
+
return mid_ch, side_ch
|
| 799 |
+
elif var == 4:
|
| 800 |
+
print(_i18n("mid_side_var4"))
|
| 801 |
+
mid_channel = mid_channel_one
|
| 802 |
+
side_channel = left_channel + -right_channel
|
| 803 |
+
else:
|
| 804 |
+
raise ValueError(_i18n("unknown_var", var=var))
|
| 805 |
+
|
| 806 |
+
return convert_to_dtype(mid_channel, orig_dtype), convert_to_dtype(side_channel, orig_dtype)
|
| 807 |
+
|
| 808 |
+
|
| 809 |
+
def mid_side_to_stereo(
|
| 810 |
+
y: np.ndarray,
|
| 811 |
+
z: np.ndarray,
|
| 812 |
+
index: int = -1,
|
| 813 |
+
dtype: DTypeLike = np.float32
|
| 814 |
+
) -> np.ndarray:
|
| 815 |
+
"""
|
| 816 |
+
Преобразовать Mid/Side обратно в стерео
|
| 817 |
+
|
| 818 |
+
Args:
|
| 819 |
+
y: Mid канал
|
| 820 |
+
z: Side канал
|
| 821 |
+
index: Индекс оси
|
| 822 |
+
dtype: Тип данных
|
| 823 |
+
|
| 824 |
+
Returns:
|
| 825 |
+
Стерео массив
|
| 826 |
+
"""
|
| 827 |
+
y, z = convert_to_dtype(y, np.float32), convert_to_dtype(z, np.float32)
|
| 828 |
+
mid = multi_channel_array_from_arrays(y, y, index=index, dtype=np.float32)
|
| 829 |
+
side = multi_channel_array_from_arrays(z, -z, index=index, dtype=np.float32)
|
| 830 |
+
return convert_to_dtype(mid + side, dtype)
|
| 831 |
+
|
| 832 |
+
|
| 833 |
+
def mono_to_stereo(
|
| 834 |
+
y: np.ndarray,
|
| 835 |
+
index: int,
|
| 836 |
+
num_channels: int = 2
|
| 837 |
+
) -> np.ndarray:
|
| 838 |
+
"""
|
| 839 |
+
Преобразовать моно в стерео
|
| 840 |
+
|
| 841 |
+
Args:
|
| 842 |
+
y: Моно массив
|
| 843 |
+
index: Индекс оси
|
| 844 |
+
num_channels: Количество каналов
|
| 845 |
+
|
| 846 |
+
Returns:
|
| 847 |
+
Стерео массив
|
| 848 |
+
"""
|
| 849 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 850 |
+
axis = get_axis_from_array_index(array_index)
|
| 851 |
+
new_axis = get_axis_from_array_index(index)
|
| 852 |
+
orig_dtype = y.dtype
|
| 853 |
+
if channels == 1:
|
| 854 |
+
if flatten:
|
| 855 |
+
return np.stack([y for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
|
| 856 |
+
else:
|
| 857 |
+
return np.stack([y.flatten() for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
|
| 858 |
+
else:
|
| 859 |
+
if num_channels <= channels:
|
| 860 |
+
return y
|
| 861 |
+
else:
|
| 862 |
+
for _i in range(num_channels - channels):
|
| 863 |
+
y = np.append(y, create_zero_array(samples, orig_dtype), axis=new_axis)
|
| 864 |
+
return y
|
| 865 |
+
|
| 866 |
+
|
| 867 |
+
def stereo_to_mono(y: np.ndarray, to_flatten: bool = False) -> np.ndarray:
|
| 868 |
+
"""
|
| 869 |
+
Преобразовать стерео в моно
|
| 870 |
+
|
| 871 |
+
Args:
|
| 872 |
+
y: Стерео массив
|
| 873 |
+
to_flatten: Вернуть плоский массив
|
| 874 |
+
|
| 875 |
+
Returns:
|
| 876 |
+
Моно массив
|
| 877 |
+
"""
|
| 878 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 879 |
+
orig_dtype = y.dtype
|
| 880 |
+
y = convert_to_dtype(y, np.float32)
|
| 881 |
+
if channels > 1:
|
| 882 |
+
mono = create_zero_array(samples, np.float64)
|
| 883 |
+
for ch in split_channels(y):
|
| 884 |
+
mono = mono + gain(ch, (1 / channels))
|
| 885 |
+
if not to_flatten:
|
| 886 |
+
if array_index == 0:
|
| 887 |
+
return mono.reshape((1, -1))
|
| 888 |
+
else:
|
| 889 |
+
return mono.reshape((-1, 1))
|
| 890 |
+
else:
|
| 891 |
+
return mono
|
| 892 |
+
else:
|
| 893 |
+
return y
|
| 894 |
+
|
| 895 |
+
|
| 896 |
+
def multi_channel_array_from_arrays(
|
| 897 |
+
*arrays: np.ndarray,
|
| 898 |
+
index: int = -1,
|
| 899 |
+
dtype: DTypeLike
|
| 900 |
+
) -> np.ndarray:
|
| 901 |
+
"""
|
| 902 |
+
Создать многоканальный массив из отдельных каналов
|
| 903 |
+
|
| 904 |
+
Args:
|
| 905 |
+
*arrays: Массивы каналов
|
| 906 |
+
index: Индекс оси
|
| 907 |
+
dtype: Тип данных
|
| 908 |
+
|
| 909 |
+
Returns:
|
| 910 |
+
Многоканальный массив
|
| 911 |
+
"""
|
| 912 |
+
return np.stack([convert_to_dtype(array, dtype) for array in arrays],
|
| 913 |
+
axis=get_axis_from_array_index(index),
|
| 914 |
+
dtype=dtype)
|
| 915 |
+
|
| 916 |
+
|
| 917 |
+
def reshape(y: np.ndarray, shape: Tuple[str, ...] = ("channels", "samples")) -> np.ndarray:
|
| 918 |
+
"""
|
| 919 |
+
Изменить форму аудио массива
|
| 920 |
+
|
| 921 |
+
Args:
|
| 922 |
+
y: Аудио массив
|
| 923 |
+
shape: Целевая форма
|
| 924 |
+
|
| 925 |
+
Returns:
|
| 926 |
+
Измененный массив
|
| 927 |
+
"""
|
| 928 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 929 |
+
|
| 930 |
+
if shape == ("channels", "samples"):
|
| 931 |
+
if array_index == 0:
|
| 932 |
+
return y.T
|
| 933 |
+
elif array_index == 1:
|
| 934 |
+
return y
|
| 935 |
+
elif array_index is None and flatten:
|
| 936 |
+
return y.reshape((-1, 1)).T
|
| 937 |
+
else:
|
| 938 |
+
if y.shape[0] == channels:
|
| 939 |
+
return y
|
| 940 |
+
else:
|
| 941 |
+
return y.T
|
| 942 |
+
|
| 943 |
+
elif shape == ("samples", "channels"):
|
| 944 |
+
if array_index == 1: # (channels, samples)
|
| 945 |
+
return y.T
|
| 946 |
+
elif array_index == 0: # (samples, channels)
|
| 947 |
+
return y
|
| 948 |
+
elif array_index == -1 and flatten:
|
| 949 |
+
return y.reshape((-1, 1))
|
| 950 |
+
else:
|
| 951 |
+
if y.shape[0] == samples:
|
| 952 |
+
return y
|
| 953 |
+
else:
|
| 954 |
+
return y.T
|
| 955 |
+
|
| 956 |
+
elif shape == ("samples",):
|
| 957 |
+
if channels == 1 and not flatten:
|
| 958 |
+
return y.flatten()
|
| 959 |
+
elif flatten:
|
| 960 |
+
return y
|
| 961 |
+
else:
|
| 962 |
+
return stereo_to_mono(y, to_flatten=True)
|
| 963 |
+
|
| 964 |
+
else:
|
| 965 |
+
raise ValueError(f"{_i18n('unknown_shape')}: {shape}")
|
| 966 |
+
|
| 967 |
+
|
| 968 |
+
def easy_resampler(y: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
| 969 |
+
"""
|
| 970 |
+
Простой ресемплинг аудио
|
| 971 |
+
|
| 972 |
+
Args:
|
| 973 |
+
y: Аудио массив
|
| 974 |
+
orig_sr: Исходная частота
|
| 975 |
+
target_sr: Целевая частота
|
| 976 |
+
|
| 977 |
+
Returns:
|
| 978 |
+
Ресемплированный массив
|
| 979 |
+
"""
|
| 980 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 981 |
+
orig_dtype = y.dtype
|
| 982 |
+
ratio = float(target_sr) / orig_sr
|
| 983 |
+
n_samples = int(np.ceil(samples * ratio))
|
| 984 |
+
resampled = resample(y, n_samples, axis=array_index)
|
| 985 |
+
return convert_to_dtype(resampled, orig_dtype)
|
| 986 |
+
|
| 987 |
+
|
| 988 |
+
def add_zero_to_end(y: np.ndarray, max_samples: int) -> np.ndarray:
|
| 989 |
+
"""
|
| 990 |
+
Добавить нули в конец массива до указанной длины
|
| 991 |
+
|
| 992 |
+
Args:
|
| 993 |
+
y: Аудио массив
|
| 994 |
+
max_samples: Максимальное количество сэмплов
|
| 995 |
+
|
| 996 |
+
Returns:
|
| 997 |
+
Дополненный массив
|
| 998 |
+
"""
|
| 999 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 1000 |
+
center_value = get_center_value_from_dtype(y.dtype)
|
| 1001 |
+
if samples < max_samples:
|
| 1002 |
+
if flatten:
|
| 1003 |
+
pad_width = (0, max_samples - samples)
|
| 1004 |
+
else:
|
| 1005 |
+
if array_index == 1:
|
| 1006 |
+
pad_width = ((0, 0), (0, max_samples - samples))
|
| 1007 |
+
else:
|
| 1008 |
+
pad_width = ((0, max_samples - samples), (0, 0))
|
| 1009 |
+
return np.pad(y, pad_width, mode="constant", constant_values=center_value)
|
| 1010 |
+
else:
|
| 1011 |
+
return trim(y, 0, max_samples)
|
| 1012 |
+
|
| 1013 |
+
|
| 1014 |
+
def fit_arrays(
|
| 1015 |
+
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1016 |
+
srs: Union[Tuple[int, ...], List[int]],
|
| 1017 |
+
max_channels: int = 2,
|
| 1018 |
+
min_sr: int = 44100,
|
| 1019 |
+
flatten: bool = False,
|
| 1020 |
+
max_samples: int = -1,
|
| 1021 |
+
extend: bool = True
|
| 1022 |
+
) -> Tuple[np.ndarray, ...]:
|
| 1023 |
+
"""
|
| 1024 |
+
Привести несколько массивов к единому формату
|
| 1025 |
+
|
| 1026 |
+
Args:
|
| 1027 |
+
arrays: Список массивов
|
| 1028 |
+
srs: Список частот дискретизации
|
| 1029 |
+
max_channels: Максимальное количество каналов
|
| 1030 |
+
min_sr: Минимальная частота дискретизации
|
| 1031 |
+
flatten: Вернуть плоские массивы
|
| 1032 |
+
max_samples: Максимальное количество сэмплов
|
| 1033 |
+
extend: Дополнить до максимальной длины
|
| 1034 |
+
|
| 1035 |
+
Returns:
|
| 1036 |
+
Кортеж приведенных массивов
|
| 1037 |
+
"""
|
| 1038 |
+
if len(arrays) != len(srs):
|
| 1039 |
+
raise Exception(_i18n("arrays_srs_mismatch"))
|
| 1040 |
+
|
| 1041 |
+
new_arrays = []
|
| 1042 |
+
|
| 1043 |
+
arrays_with_srs = list(zip(arrays, srs))
|
| 1044 |
+
len_arrays = len(arrays_with_srs)
|
| 1045 |
+
|
| 1046 |
+
durations = [get_duration_from_array(array) for array, _c in arrays_with_srs]
|
| 1047 |
+
max_samples = max(durations) if durations else 0
|
| 1048 |
+
|
| 1049 |
+
for i, (array, sr) in enumerate(arrays_with_srs, start=1):
|
| 1050 |
+
channels1, samples1, array_index1, _c = get_info_array(array)
|
| 1051 |
+
a1 = easy_resampler(array, sr, min_sr)
|
| 1052 |
+
if flatten:
|
| 1053 |
+
a1 = stereo_to_mono(a1, to_flatten=True)
|
| 1054 |
+
else:
|
| 1055 |
+
if max_channels >= 2:
|
| 1056 |
+
a1 = mono_to_stereo(a1, array_index1, max_channels)
|
| 1057 |
+
else:
|
| 1058 |
+
a1 = stereo_to_mono(a1)
|
| 1059 |
+
a1 = reshape(a1, shape=("channels", "samples"))
|
| 1060 |
+
new_arrays.append(a1)
|
| 1061 |
+
print(_i18n("fitting_progress", current=i, total=len_arrays), end="\r")
|
| 1062 |
+
print("")
|
| 1063 |
+
|
| 1064 |
+
if extend:
|
| 1065 |
+
for i, array_ in enumerate(new_arrays):
|
| 1066 |
+
new_arrays[i] = add_zero_to_end(array_, max_samples)
|
| 1067 |
+
print(_i18n("extending_progress", current=i, total=len_arrays), end="\r")
|
| 1068 |
+
print("")
|
| 1069 |
+
|
| 1070 |
+
return tuple(new_arrays)
|
| 1071 |
+
|
| 1072 |
+
|
| 1073 |
+
def subtractor(
|
| 1074 |
+
y: np.ndarray,
|
| 1075 |
+
z: np.ndarray,
|
| 1076 |
+
sr1: int,
|
| 1077 |
+
sr2: int,
|
| 1078 |
+
spectrogram: bool = False
|
| 1079 |
+
) -> Tuple[np.ndarray, int]:
|
| 1080 |
+
"""
|
| 1081 |
+
Вычесть одно аудио из другого
|
| 1082 |
+
|
| 1083 |
+
Args:
|
| 1084 |
+
y: Первое аудио
|
| 1085 |
+
z: Второе аудио
|
| 1086 |
+
sr1: Частота первого
|
| 1087 |
+
sr2: Частота второго
|
| 1088 |
+
spectrogram: Использовать спектрограмму
|
| 1089 |
+
|
| 1090 |
+
Returns:
|
| 1091 |
+
Кортеж (результат, частота дискретизации)
|
| 1092 |
+
"""
|
| 1093 |
+
channels1, _, array_index1, flatten1 = get_info_array(y)
|
| 1094 |
+
channels2, _, array_index2, flatten2 = get_info_array(z)
|
| 1095 |
+
orig_dtype1 = y.dtype
|
| 1096 |
+
y = convert_to_dtype(y, np.float32)
|
| 1097 |
+
z = convert_to_dtype(z, np.float32)
|
| 1098 |
+
max_channels = max(channels1, channels2)
|
| 1099 |
+
min_sr = min(sr1, sr2)
|
| 1100 |
+
yz = fit_arrays([y, z], [sr1, sr2], max_channels=max_channels, min_sr=min_sr)
|
| 1101 |
+
y, z = yz[0], yz[1]
|
| 1102 |
+
|
| 1103 |
+
if spectrogram:
|
| 1104 |
+
print(_i18n("subtract_spectrogram"))
|
| 1105 |
+
sft = get_stft_obj(min_sr, n_fft=n_fft, hop=hop)
|
| 1106 |
+
res_channels = []
|
| 1107 |
+
|
| 1108 |
+
# Обрабатываем каналы по одному, чтобы не забивать RAM
|
| 1109 |
+
for ch_y, ch_z in zip(split_channels(y), split_channels(z)):
|
| 1110 |
+
spec_y = sft.stft(ch_y.astype(np.float32))
|
| 1111 |
+
spec_z = sft.stft(ch_z.astype(np.float32))
|
| 1112 |
+
|
| 1113 |
+
# Вычитание амплитуд: Mag_res = max(Mag_y - Mag_z, 0)
|
| 1114 |
+
# Сохраняем фазу сигнала 'y'
|
| 1115 |
+
res_spec = np.maximum(np.abs(spec_y) - np.abs(spec_z), 0) * np.exp(1j * np.angle(spec_y))
|
| 1116 |
+
|
| 1117 |
+
del spec_y, spec_z # Явно освобождаем память
|
| 1118 |
+
|
| 1119 |
+
res_wav = sft.istft(res_spec, k1=ch_y.shape[-1])
|
| 1120 |
+
res_channels.append(res_wav)
|
| 1121 |
+
|
| 1122 |
+
subtracted = multi_channel_array_from_arrays(*res_channels, index=1, dtype=orig_dtype1)
|
| 1123 |
+
return subtracted, min_sr
|
| 1124 |
+
else:
|
| 1125 |
+
print(_i18n("subtract_phase"))
|
| 1126 |
+
return convert_to_dtype(y - z, orig_dtype1), min_sr
|
| 1127 |
+
|
| 1128 |
+
|
| 1129 |
+
def absmax(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
|
| 1130 |
+
"""
|
| 1131 |
+
Получить элемент с максимальным абсолютным значением
|
| 1132 |
+
|
| 1133 |
+
Args:
|
| 1134 |
+
a: Входной массив
|
| 1135 |
+
axis: Ось
|
| 1136 |
+
|
| 1137 |
+
Returns:
|
| 1138 |
+
Элемент с максимальным абсолютным значением
|
| 1139 |
+
"""
|
| 1140 |
+
if axis is None:
|
| 1141 |
+
return a.flatten()[np.argmax(np.abs(a))]
|
| 1142 |
+
dims = list(a.shape)
|
| 1143 |
+
dims.pop(axis)
|
| 1144 |
+
indices = np.ogrid[tuple(slice(0, d) for d in dims)]
|
| 1145 |
+
argmax = np.abs(a).argmax(axis=axis)
|
| 1146 |
+
indices = list(indices)
|
| 1147 |
+
indices.insert(axis % len(a.shape), argmax)
|
| 1148 |
+
return a[tuple(indices)]
|
| 1149 |
+
|
| 1150 |
+
|
| 1151 |
+
def absmin(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
|
| 1152 |
+
"""
|
| 1153 |
+
Получить элемент с минимальным абсолютным значением
|
| 1154 |
+
|
| 1155 |
+
Args:
|
| 1156 |
+
a: Входной массив
|
| 1157 |
+
axis: Ось
|
| 1158 |
+
|
| 1159 |
+
Returns:
|
| 1160 |
+
Элемент с минимальным абсолютным значением
|
| 1161 |
+
"""
|
| 1162 |
+
if axis is None:
|
| 1163 |
+
return a.flatten()[np.argmin(np.abs(a))]
|
| 1164 |
+
dims = list(a.shape)
|
| 1165 |
+
dims.pop(axis)
|
| 1166 |
+
indices = np.ogrid[tuple(slice(0, d) for d in dims)]
|
| 1167 |
+
argmax = np.abs(a).argmin(axis=axis)
|
| 1168 |
+
indices.insert((len(a.shape) + axis) % len(a.shape), argmax)
|
| 1169 |
+
return a[tuple(indices)]
|
| 1170 |
+
|
| 1171 |
+
|
| 1172 |
+
def lambda_max(
|
| 1173 |
+
arr: np.ndarray,
|
| 1174 |
+
axis: Optional[int] = None,
|
| 1175 |
+
key: Optional[Callable] = None,
|
| 1176 |
+
keepdims: bool = False
|
| 1177 |
+
) -> np.ndarray:
|
| 1178 |
+
"""
|
| 1179 |
+
Применить функцию максимума с к��ючом
|
| 1180 |
+
|
| 1181 |
+
Args:
|
| 1182 |
+
arr: Входной массив
|
| 1183 |
+
axis: Ось
|
| 1184 |
+
key: Функция ключа
|
| 1185 |
+
keepdims: Сохранить размерность
|
| 1186 |
+
|
| 1187 |
+
Returns:
|
| 1188 |
+
Результат
|
| 1189 |
+
"""
|
| 1190 |
+
if key is None:
|
| 1191 |
+
key = np.abs
|
| 1192 |
+
idxs = np.argmax(key(arr), axis)
|
| 1193 |
+
if axis is not None:
|
| 1194 |
+
idxs = np.expand_dims(idxs, axis)
|
| 1195 |
+
result = np.take_along_axis(arr, idxs, axis)
|
| 1196 |
+
if not keepdims:
|
| 1197 |
+
result = np.squeeze(result, axis=axis)
|
| 1198 |
+
return result
|
| 1199 |
+
else:
|
| 1200 |
+
return arr.flatten()[idxs]
|
| 1201 |
+
|
| 1202 |
+
|
| 1203 |
+
def lambda_min(
|
| 1204 |
+
arr: np.ndarray,
|
| 1205 |
+
axis: Optional[int] = None,
|
| 1206 |
+
key: Optional[Callable] = None,
|
| 1207 |
+
keepdims: bool = False
|
| 1208 |
+
) -> np.ndarray:
|
| 1209 |
+
"""
|
| 1210 |
+
Применить функцию минимума с ключом
|
| 1211 |
+
|
| 1212 |
+
Args:
|
| 1213 |
+
arr: Входной массив
|
| 1214 |
+
axis: Ось
|
| 1215 |
+
key: Функция ключа
|
| 1216 |
+
keepdims: Сохранить размерность
|
| 1217 |
+
|
| 1218 |
+
Returns:
|
| 1219 |
+
Результат
|
| 1220 |
+
"""
|
| 1221 |
+
if key is None:
|
| 1222 |
+
key = np.abs
|
| 1223 |
+
idxs = np.argmin(key(arr), axis)
|
| 1224 |
+
if axis is not None:
|
| 1225 |
+
idxs = np.expand_dims(idxs, axis)
|
| 1226 |
+
result = np.take_along_axis(arr, idxs, axis)
|
| 1227 |
+
if not keepdims:
|
| 1228 |
+
result = np.squeeze(result, axis=axis)
|
| 1229 |
+
return result
|
| 1230 |
+
else:
|
| 1231 |
+
return arr.flatten()[idxs]
|
| 1232 |
+
|
| 1233 |
+
|
| 1234 |
+
def ensemble(
|
| 1235 |
+
pred_tracks: List[np.ndarray],
|
| 1236 |
+
srs: List[int],
|
| 1237 |
+
weights: List[float],
|
| 1238 |
+
algorithm: str,
|
| 1239 |
+
dtype: np.dtype = np.float32
|
| 1240 |
+
) -> Tuple[np.ndarray, int]:
|
| 1241 |
+
"""
|
| 1242 |
+
Создать ансамбль из нескольких предсказаний
|
| 1243 |
+
|
| 1244 |
+
Args:
|
| 1245 |
+
pred_tracks: Список предсказаний
|
| 1246 |
+
srs: Список частот дискретизации
|
| 1247 |
+
weights: Веса
|
| 1248 |
+
algorithm: Алгоритм объединения
|
| 1249 |
+
dtype: Тип данных
|
| 1250 |
+
|
| 1251 |
+
Returns:
|
| 1252 |
+
Кортеж (результат, частота дискретизации)
|
| 1253 |
+
"""
|
| 1254 |
+
if algorithm == "min_fft":
|
| 1255 |
+
max_sr = int(min(srs))
|
| 1256 |
+
else:
|
| 1257 |
+
max_sr = int(max(srs))
|
| 1258 |
+
|
| 1259 |
+
# Подгоняем все треки к одной длине и частоте
|
| 1260 |
+
pred_tracks = list(fit_arrays(pred_tracks, srs, max_channels=2, min_sr=max_sr))
|
| 1261 |
+
|
| 1262 |
+
sft = get_stft_obj(max_sr, n_fft=2048, hop=1024)
|
| 1263 |
+
final_length = pred_tracks[0].shape[-1]
|
| 1264 |
+
ensemble_wav_channels = []
|
| 1265 |
+
|
| 1266 |
+
for ch_idx in range(2): # Для каждого канала (L и R)
|
| 1267 |
+
accumulator = None
|
| 1268 |
+
total_weight = sum(weights)
|
| 1269 |
+
|
| 1270 |
+
for i, track in enumerate(pred_tracks):
|
| 1271 |
+
# Извлекаем канал и считаем STFT
|
| 1272 |
+
spec = sft.stft(track[ch_idx].astype(np.float32))
|
| 1273 |
+
|
| 1274 |
+
if algorithm == "avg_fft":
|
| 1275 |
+
weighted_spec = spec * weights[i]
|
| 1276 |
+
if accumulator is None:
|
| 1277 |
+
accumulator = weighted_spec
|
| 1278 |
+
else:
|
| 1279 |
+
accumulator += weighted_spec
|
| 1280 |
+
elif algorithm in ["min_fft", "max_fft", "median_fft"]:
|
| 1281 |
+
# Для медианы и экстремумов собираем стек для одного канала
|
| 1282 |
+
if i == 0:
|
| 1283 |
+
accumulator = [spec]
|
| 1284 |
+
else:
|
| 1285 |
+
accumulator.append(spec)
|
| 1286 |
+
|
| 1287 |
+
del spec
|
| 1288 |
+
|
| 1289 |
+
# Финализация алгоритма
|
| 1290 |
+
if algorithm == "avg_fft":
|
| 1291 |
+
res_spec = accumulator / total_weight
|
| 1292 |
+
elif algorithm == "median_fft":
|
| 1293 |
+
res_spec = np.median(np.real(accumulator), axis=0) + 1j * np.median(np.imag(accumulator), axis=0)
|
| 1294 |
+
elif algorithm == "min_fft":
|
| 1295 |
+
res_spec = lambda_min(np.array(accumulator), axis=0, key=np.abs)
|
| 1296 |
+
elif algorithm == "max_fft":
|
| 1297 |
+
res_spec = absmax(np.array(accumulator), axis=0)
|
| 1298 |
+
else:
|
| 1299 |
+
raise ValueError(_i18n("unknown_algorithm", alg=algorithm))
|
| 1300 |
+
|
| 1301 |
+
ensemble_wav_channels.append(sft.istft(res_spec, k1=final_length))
|
| 1302 |
+
del accumulator
|
| 1303 |
+
|
| 1304 |
+
result = multi_channel_array_from_arrays(*ensemble_wav_channels, index=1, dtype=dtype)
|
| 1305 |
+
print(_i18n("ensemble_complete"))
|
| 1306 |
+
return result, max_sr
|
| 1307 |
+
|
| 1308 |
+
|
| 1309 |
+
def concatenate(
|
| 1310 |
+
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1311 |
+
srs: Union[Tuple[int, ...], List[int]],
|
| 1312 |
+
dtype=np.float32
|
| 1313 |
+
) -> Tuple[np.ndarray, int]:
|
| 1314 |
+
"""
|
| 1315 |
+
Склеить несколько аудио массивов
|
| 1316 |
+
|
| 1317 |
+
Args:
|
| 1318 |
+
arrays: Список массивов
|
| 1319 |
+
srs: Список частот дискретизации
|
| 1320 |
+
dtype: Тип данных
|
| 1321 |
+
|
| 1322 |
+
Returns:
|
| 1323 |
+
Кортеж (результат, частота дискретизации)
|
| 1324 |
+
"""
|
| 1325 |
+
max_sr = int(max(*srs))
|
| 1326 |
+
arrayss = fit_arrays([convert_to_dtype(array, np.float64) for array in arrays],
|
| 1327 |
+
srs, max_channels=2, min_sr=max_sr, extend=False)
|
| 1328 |
+
result = np.concatenate(arrayss, axis=1, dtype=np.float64)
|
| 1329 |
+
print(_i18n("concatenate_complete"))
|
| 1330 |
+
return convert_to_dtype(result, dtype), max_sr
|
| 1331 |
+
|
| 1332 |
+
|
| 1333 |
+
def trim(y: np.ndarray, start: int = 0, end: int = -1) -> np.ndarray:
|
| 1334 |
+
"""
|
| 1335 |
+
Обрезать аудио массив
|
| 1336 |
+
|
| 1337 |
+
Args:
|
| 1338 |
+
y: Аудио массив
|
| 1339 |
+
start: Начальная позиция
|
| 1340 |
+
end: Конечная позиция
|
| 1341 |
+
|
| 1342 |
+
Returns:
|
| 1343 |
+
Обрезанный массив
|
| 1344 |
+
"""
|
| 1345 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 1346 |
+
end_index = samples
|
| 1347 |
+
_end = end if end > 0 and end <= end_index else end_index
|
| 1348 |
+
if flatten:
|
| 1349 |
+
return y[start:_end]
|
| 1350 |
+
elif array_index == 0:
|
| 1351 |
+
return y[start:_end, :]
|
| 1352 |
+
elif array_index == 1:
|
| 1353 |
+
return y[:, start:_end]
|
| 1354 |
+
else:
|
| 1355 |
+
return y
|
| 1356 |
+
|
| 1357 |
+
|
| 1358 |
+
def reverse(y: np.ndarray) -> np.ndarray:
|
| 1359 |
+
"""
|
| 1360 |
+
Перевернуть аудио массив
|
| 1361 |
+
|
| 1362 |
+
Args:
|
| 1363 |
+
y: Аудио массив
|
| 1364 |
+
|
| 1365 |
+
Returns:
|
| 1366 |
+
Перевернутый массив
|
| 1367 |
+
"""
|
| 1368 |
+
channels, samples, array_index, flatten = get_info_array(y)
|
| 1369 |
+
if flatten:
|
| 1370 |
+
return np.flip(y)
|
| 1371 |
+
else:
|
| 1372 |
+
return np.flip(y, axis=array_index)
|
| 1373 |
+
|
| 1374 |
+
|
| 1375 |
+
def write(
|
| 1376 |
+
path: str,
|
| 1377 |
+
y: np.ndarray,
|
| 1378 |
+
sr: int,
|
| 1379 |
+
bitrate: Union[int, str] = 320,
|
| 1380 |
+
prefer_float: bool = False
|
| 1381 |
+
) -> str:
|
| 1382 |
+
"""
|
| 1383 |
+
Записать аудио в файл
|
| 1384 |
+
|
| 1385 |
+
Args:
|
| 1386 |
+
path: Путь для сохранения
|
| 1387 |
+
y: Аудио массив
|
| 1388 |
+
sr: Частота дискретизации
|
| 1389 |
+
bitrate: Битрейт
|
| 1390 |
+
prefer_float: Предпочитать float формат
|
| 1391 |
+
|
| 1392 |
+
Returns:
|
| 1393 |
+
Путь к сохраненному файлу
|
| 1394 |
+
"""
|
| 1395 |
+
if str2bool(os.environ.get("MVSEPLESS_WRITE_ABS", "False")):
|
| 1396 |
+
path = os.path.abspath(path)
|
| 1397 |
+
|
| 1398 |
+
name, ext = os.path.splitext(path)
|
| 1399 |
+
dir_path = os.path.dirname(path)
|
| 1400 |
+
if dir_path != "":
|
| 1401 |
+
os.makedirs(dir_path, exist_ok=True)
|
| 1402 |
+
|
| 1403 |
+
if not sr:
|
| 1404 |
+
raise Exception(_i18n("sr_required"))
|
| 1405 |
+
|
| 1406 |
+
dtype = y.dtype
|
| 1407 |
+
channels, *_ = get_info_array(y)
|
| 1408 |
+
y = reshape(y, shape=("samples", "channels"))
|
| 1409 |
+
|
| 1410 |
+
sample_format = SAMPLE_FORMATS_DICT.get(str(dtype), None)
|
| 1411 |
+
if not sample_format:
|
| 1412 |
+
sample_format = "f32le"
|
| 1413 |
+
y = convert_to_dtype(y, np.float32)
|
| 1414 |
+
|
| 1415 |
+
y = np.nan_to_num(y, nan=0, posinf=0, neginf=0)
|
| 1416 |
+
|
| 1417 |
+
bitrate_val = bitrate_to_int(bitrate)
|
| 1418 |
+
if ext == ".ogg":
|
| 1419 |
+
max_bitrate = get_ogg_bitrate(sr, channels)
|
| 1420 |
+
if bitrate_val > max_bitrate:
|
| 1421 |
+
print(_i18n("ogg_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
|
| 1422 |
+
bitrate_val = max_bitrate
|
| 1423 |
+
elif ext == ".opus":
|
| 1424 |
+
max_bitrate = 256 * channels
|
| 1425 |
+
if bitrate_val > max_bitrate:
|
| 1426 |
+
print(_i18n("opus_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
|
| 1427 |
+
bitrate_val = max_bitrate
|
| 1428 |
+
|
| 1429 |
+
bitrate_fixed = 32 if bitrate_val < 32 else 320 if bitrate_val > 320 else bitrate_val
|
| 1430 |
+
|
| 1431 |
+
cmd = [ffmpeg_path, "-y", "-f", sample_format, "-ar", str(sr), "-ac", str(channels),
|
| 1432 |
+
"-i", "-", *get_codec_args(ext, prefer_float), "-ab", f"{bitrate_fixed}k", path]
|
| 1433 |
+
|
| 1434 |
+
process = subprocess.Popen(
|
| 1435 |
+
cmd,
|
| 1436 |
+
stdin=subprocess.PIPE,
|
| 1437 |
+
stdout=None,
|
| 1438 |
+
stderr=subprocess.PIPE,
|
| 1439 |
+
bufsize=10**8
|
| 1440 |
+
)
|
| 1441 |
+
|
| 1442 |
+
try:
|
| 1443 |
+
stdout_data, stderr_data = process.communicate(input=y.tobytes())
|
| 1444 |
+
|
| 1445 |
+
if process.returncode != 0:
|
| 1446 |
+
error_msg = stderr_data.decode('utf-8', errors='ignore')
|
| 1447 |
+
print(_i18n("ffmpeg_error", error=error_msg))
|
| 1448 |
+
raise Exception(_i18n("ffmpeg_exit_code", code=process.returncode))
|
| 1449 |
+
|
| 1450 |
+
except Exception as e:
|
| 1451 |
+
print(_i18n("write_critical_error", error=str(e)))
|
| 1452 |
+
process.kill()
|
| 1453 |
+
raise e
|
| 1454 |
+
|
| 1455 |
+
return path
|
| 1456 |
+
|
| 1457 |
+
|
| 1458 |
+
def multiwrite(
|
| 1459 |
+
arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
|
| 1460 |
+
srs: Union[Tuple[int, ...], List[int]],
|
| 1461 |
+
paths: Union[Tuple[str, ...], List[str]],
|
| 1462 |
+
bitrate: Union[int, str] = 320,
|
| 1463 |
+
prefer_float: bool = False,
|
| 1464 |
+
callable_func: Optional[Callable] = None,
|
| 1465 |
+
strict: bool = False
|
| 1466 |
+
) -> Tuple[str, ...]:
|
| 1467 |
+
"""
|
| 1468 |
+
Записать несколько аудио массивов в файлы
|
| 1469 |
+
|
| 1470 |
+
Args:
|
| 1471 |
+
arrays: Список массивов
|
| 1472 |
+
srs: Список частот дискретизации
|
| 1473 |
+
paths: Список путей для сохранения
|
| 1474 |
+
bitrate: Битрейт
|
| 1475 |
+
prefer_float: Предпочитать float формат
|
| 1476 |
+
callable_func: Функция обратного вызова
|
| 1477 |
+
strict: Строгий режим
|
| 1478 |
+
|
| 1479 |
+
Returns:
|
| 1480 |
+
Кортеж сохраненных путей
|
| 1481 |
+
"""
|
| 1482 |
+
saved_paths = []
|
| 1483 |
+
exceptions = []
|
| 1484 |
+
|
| 1485 |
+
if len(arrays) == len(srs) == len(paths):
|
| 1486 |
+
save_arrays = list(zip(arrays, srs, paths))
|
| 1487 |
+
for array, sr, path in save_arrays:
|
| 1488 |
+
if callable_func is not None:
|
| 1489 |
+
callable_func(path)
|
| 1490 |
+
try:
|
| 1491 |
+
saved_paths.append(write(path, array, sr, bitrate=bitrate, prefer_float=prefer_float))
|
| 1492 |
+
except Exception as e:
|
| 1493 |
+
if strict:
|
| 1494 |
+
raise Exception(str(e))
|
| 1495 |
+
else:
|
| 1496 |
+
print(_i18n("write_error", error=str(e)))
|
| 1497 |
+
exceptions.append(str(e))
|
| 1498 |
+
|
| 1499 |
+
if not saved_paths:
|
| 1500 |
+
exceptions_str = '\n'.join(exceptions)
|
| 1501 |
+
raise Exception(_i18n("no_files_written", errors=exceptions_str))
|
| 1502 |
+
|
| 1503 |
return tuple(saved_paths)
|
mvsepless/custom_models.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mbr_hybrid_aname": {
|
| 3 |
+
"model_type": "mel_band_roformer",
|
| 4 |
+
"category": "Инструментал и вокал",
|
| 5 |
+
"id": 13456,
|
| 6 |
+
"full_name": "ANAME TEST",
|
| 7 |
+
"stems": [
|
| 8 |
+
"vocals",
|
| 9 |
+
"other"
|
| 10 |
+
],
|
| 11 |
+
"target_instrument": null,
|
| 12 |
+
"checkpoint_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/MelBandRoformer_Hybrid_Arch.pth?download=true",
|
| 13 |
+
"config_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/config_hybrid.yaml?download=true"
|
| 14 |
+
},
|
| 15 |
+
"custom_model": {
|
| 16 |
+
"model_type": "mel_band_roformer",
|
| 17 |
+
"category": "custom",
|
| 18 |
+
"id": 1,
|
| 19 |
+
"full_name": "",
|
| 20 |
+
"stems": [
|
| 21 |
+
"vocals",
|
| 22 |
+
"other"
|
| 23 |
+
],
|
| 24 |
+
"target_instrument": null,
|
| 25 |
+
"checkpoint_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/MelBandRoformer_Hybrid_Arch.pth?download=true",
|
| 26 |
+
"config_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/config_hybrid.yaml?download=true"
|
| 27 |
+
}
|
| 28 |
+
}
|
mvsepless/i18n.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mvsepless/infer_utils.py
CHANGED
|
@@ -1,825 +1,825 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
sys.stdout.reconfigure(encoding='utf-8')
|
| 3 |
-
sys.stderr.reconfigure(encoding='utf-8')
|
| 4 |
-
import json
|
| 5 |
-
import numpy as np
|
| 6 |
-
import torch
|
| 7 |
-
import torch.nn as nn
|
| 8 |
-
import yaml
|
| 9 |
-
import librosa
|
| 10 |
-
import torch.nn.functional as F
|
| 11 |
-
from ml_collections import ConfigDict
|
| 12 |
-
from omegaconf import OmegaConf
|
| 13 |
-
from typing import Dict, List, Tuple, Any, Optional
|
| 14 |
-
from i18n import _i18n
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
def load_config(model_type: str, config_path: str) -> Any:
|
| 18 |
-
"""
|
| 19 |
-
Загрузить конфигурацию модели
|
| 20 |
-
|
| 21 |
-
Args:
|
| 22 |
-
model_type: Тип модели
|
| 23 |
-
config_path: Путь к конфигурационному файлу
|
| 24 |
-
|
| 25 |
-
Returns:
|
| 26 |
-
Конфигурация
|
| 27 |
-
"""
|
| 28 |
-
try:
|
| 29 |
-
with open(config_path, "r") as f:
|
| 30 |
-
if model_type == "htdemucs":
|
| 31 |
-
config = OmegaConf.load(config_path)
|
| 32 |
-
else:
|
| 33 |
-
config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
|
| 34 |
-
if hasattr(config.training, "new_segment"):
|
| 35 |
-
if hasattr(config.training, "segment"):
|
| 36 |
-
config.training.segment = config.audio.new_chunk_size
|
| 37 |
-
if hasattr(config.audio, "new_chunk_size"):
|
| 38 |
-
if hasattr(config.audio, "chunk_size"):
|
| 39 |
-
config.audio.chunk_size = config.audio.new_chunk_size
|
| 40 |
-
if hasattr(config.audio, "new_dim_t"):
|
| 41 |
-
if hasattr(config.audio, "dim_t"):
|
| 42 |
-
config.audio.dim_t = config.audio.new_dim_t
|
| 43 |
-
return config
|
| 44 |
-
except FileNotFoundError:
|
| 45 |
-
raise FileNotFoundError(_i18n("config_not_found", path=config_path))
|
| 46 |
-
except Exception as e:
|
| 47 |
-
raise ValueError(_i18n("config_load_error", error=str(e)))
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
def get_model_from_config(model_type: str, config_path: str) -> Tuple[Any, Any]:
|
| 51 |
-
"""
|
| 52 |
-
Получить модель из конфигурации
|
| 53 |
-
|
| 54 |
-
Args:
|
| 55 |
-
model_type: Тип модели
|
| 56 |
-
config_path: Путь к конфигурации
|
| 57 |
-
|
| 58 |
-
Returns:
|
| 59 |
-
Кортеж (модель, конфигурация)
|
| 60 |
-
"""
|
| 61 |
-
config = load_config(model_type, config_path)
|
| 62 |
-
|
| 63 |
-
if model_type == "mdx23c":
|
| 64 |
-
from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
|
| 65 |
-
model = TFC_TDF_net(config)
|
| 66 |
-
elif model_type == "mdxnet":
|
| 67 |
-
from models.mdx_net import MDXNet
|
| 68 |
-
model = MDXNet(**dict(config.model))
|
| 69 |
-
elif model_type == "vr":
|
| 70 |
-
from models.vr_arch import VRNet
|
| 71 |
-
model = VRNet(**dict(config.model))
|
| 72 |
-
elif model_type == "htdemucs":
|
| 73 |
-
from models.demucs4ht import get_model
|
| 74 |
-
model = get_model(config)
|
| 75 |
-
elif model_type == "mel_band_roformer":
|
| 76 |
-
if hasattr(config, "windowed"):
|
| 77 |
-
from models.windowed_roformer.model import MelBandRoformerWSA
|
| 78 |
-
model = MelBandRoformerWSA(**dict(config.model))
|
| 79 |
-
elif hasattr(config, "conformer"):
|
| 80 |
-
from models.bs_roformer import MelBandConformer
|
| 81 |
-
model = MelBandConformer(**dict(config.model))
|
| 82 |
-
else:
|
| 83 |
-
from models.bs_roformer import MelBandRoformer
|
| 84 |
-
model = MelBandRoformer(**dict(config.model))
|
| 85 |
-
elif model_type == "bs_roformer":
|
| 86 |
-
if hasattr(config, "sw"):
|
| 87 |
-
from models.bs_roformer import BSRoformer_SW
|
| 88 |
-
model = BSRoformer_SW(**dict(config.model))
|
| 89 |
-
elif hasattr(config, "fno"):
|
| 90 |
-
from models.bs_roformer import BSRoformer_FNO
|
| 91 |
-
model = BSRoformer_FNO(**dict(config.model))
|
| 92 |
-
elif hasattr(config, "hyperace"):
|
| 93 |
-
from models.bs_roformer import BSRoformerHyperACE
|
| 94 |
-
model = BSRoformerHyperACE(**dict(config.model))
|
| 95 |
-
elif hasattr(config, "hyperace2"):
|
| 96 |
-
from models.bs_roformer import BSRoformerHyperACE_2
|
| 97 |
-
model = BSRoformerHyperACE_2(**dict(config.model))
|
| 98 |
-
elif hasattr(config, "conformer"):
|
| 99 |
-
from models.bs_roformer import BSConformer
|
| 100 |
-
model = BSConformer(**dict(config.model))
|
| 101 |
-
elif hasattr(config, "conditional"):
|
| 102 |
-
from models.bs_roformer import BSRoformer_Conditional
|
| 103 |
-
model = BSRoformer_Conditional(**dict(config.model))
|
| 104 |
-
elif hasattr(config, "unwa_inst_large_2"):
|
| 105 |
-
from models.bs_roformer import BSRoformer_2
|
| 106 |
-
model = BSRoformer_2(**dict(config.model))
|
| 107 |
-
else:
|
| 108 |
-
from models.bs_roformer import BSRoformer
|
| 109 |
-
model = BSRoformer(**dict(config.model))
|
| 110 |
-
elif model_type == "bandit":
|
| 111 |
-
from models.bandit.core.model import MultiMaskMultiSourceBandSplitRNNSimple
|
| 112 |
-
model = MultiMaskMultiSourceBandSplitRNNSimple(**config.model)
|
| 113 |
-
elif model_type == "bandit_v2":
|
| 114 |
-
from models.bandit_v2.bandit import Bandit
|
| 115 |
-
model = Bandit(**config.kwargs)
|
| 116 |
-
elif model_type == "scnet_unofficial":
|
| 117 |
-
from models.scnet_unofficial import SCNet
|
| 118 |
-
model = SCNet(**config.model)
|
| 119 |
-
elif model_type == "scnet":
|
| 120 |
-
from models.scnet import SCNet
|
| 121 |
-
model = SCNet(**config.model)
|
| 122 |
-
elif model_type == 'scnet_masked':
|
| 123 |
-
from models.scnet.scnet_masked import SCNet
|
| 124 |
-
model = SCNet(**config.model)
|
| 125 |
-
elif model_type == 'scnet_tran':
|
| 126 |
-
from models.scnet.scnet_tran import SCNet_Tran
|
| 127 |
-
model = SCNet_Tran(**config.model)
|
| 128 |
-
elif model_type == 'medley_vox':
|
| 129 |
-
from models.medley_vox import load_model_with_args
|
| 130 |
-
model = load_model_with_args(config.model)
|
| 131 |
-
else:
|
| 132 |
-
raise ValueError(_i18n("unknown_model_type", model_type=model_type))
|
| 133 |
-
return model, config
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
|
| 137 |
-
"""
|
| 138 |
-
Создать массив окна для плавного склеивания
|
| 139 |
-
|
| 140 |
-
Args:
|
| 141 |
-
window_size: Размер окна
|
| 142 |
-
fade_size: Размер зоны затухания
|
| 143 |
-
|
| 144 |
-
Returns:
|
| 145 |
-
Массив окна
|
| 146 |
-
"""
|
| 147 |
-
fadein = torch.linspace(0, 1, fade_size)
|
| 148 |
-
fadeout = torch.linspace(1, 0, fade_size)
|
| 149 |
-
|
| 150 |
-
window = torch.ones(window_size)
|
| 151 |
-
window[-fade_size:] = fadeout
|
| 152 |
-
window[:fade_size] = fadein
|
| 153 |
-
return window
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def demix_mdxnet(
|
| 157 |
-
config: Any,
|
| 158 |
-
model: Any,
|
| 159 |
-
mix: np.ndarray,
|
| 160 |
-
device: torch.device,
|
| 161 |
-
) -> Dict[str, np.ndarray]:
|
| 162 |
-
"""
|
| 163 |
-
Демикс для MDXNet
|
| 164 |
-
|
| 165 |
-
Args:
|
| 166 |
-
config: Конфигурация
|
| 167 |
-
model: Модель
|
| 168 |
-
mix: Микс
|
| 169 |
-
device: Устройство
|
| 170 |
-
|
| 171 |
-
Returns:
|
| 172 |
-
Словарь с разделенными стемами
|
| 173 |
-
"""
|
| 174 |
-
mix_tensor = torch.tensor(mix, dtype=torch.float32).to(device)
|
| 175 |
-
batch_size = 1
|
| 176 |
-
num_overlap = config.inference.num_overlap
|
| 177 |
-
denoise = config.inference.denoise
|
| 178 |
-
stem_name = model.primary_stem
|
| 179 |
-
chunk_size = model.hop_length * (model.dim_t - 1)
|
| 180 |
-
fade_size = chunk_size // 10
|
| 181 |
-
step = chunk_size // num_overlap
|
| 182 |
-
border = chunk_size - step
|
| 183 |
-
|
| 184 |
-
length_init = mix_tensor.shape[-1]
|
| 185 |
-
|
| 186 |
-
if length_init > 2 * border and border > 0:
|
| 187 |
-
wave = nn.functional.pad(mix_tensor, (border, border), mode="reflect")
|
| 188 |
-
|
| 189 |
-
window = _getWindowingArray(chunk_size, fade_size).to(device)
|
| 190 |
-
|
| 191 |
-
with torch.no_grad():
|
| 192 |
-
result = torch.zeros_like(wave, device=device)
|
| 193 |
-
counter = torch.zeros_like(wave, device=device)
|
| 194 |
-
|
| 195 |
-
i = 0
|
| 196 |
-
batch_data = []
|
| 197 |
-
batch_locations = []
|
| 198 |
-
|
| 199 |
-
total_chunks = 0
|
| 200 |
-
temp_i = 0
|
| 201 |
-
while temp_i < wave.shape[1]:
|
| 202 |
-
total_chunks += 1
|
| 203 |
-
temp_i += step
|
| 204 |
-
|
| 205 |
-
processed_chunks = 0
|
| 206 |
-
|
| 207 |
-
while i < wave.shape[1]:
|
| 208 |
-
part = wave[:, i : i + chunk_size]
|
| 209 |
-
chunk_len = part.shape[-1]
|
| 210 |
-
|
| 211 |
-
if chunk_len < chunk_size:
|
| 212 |
-
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 213 |
-
part = nn.functional.pad(
|
| 214 |
-
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
batch_data.append(part)
|
| 218 |
-
batch_locations.append((i, chunk_len))
|
| 219 |
-
i += step
|
| 220 |
-
|
| 221 |
-
if len(batch_data) >= batch_size or i >= wave.shape[1]:
|
| 222 |
-
arr = torch.stack(batch_data, dim=0)
|
| 223 |
-
|
| 224 |
-
for j, (start, seg_len) in enumerate(batch_locations):
|
| 225 |
-
if denoise:
|
| 226 |
-
processed_spec1 = model(model.stft(arr[j : j + 1]))
|
| 227 |
-
processed_spec2 = model(model.stft(-(arr[j : j + 1])))
|
| 228 |
-
processed_wav = (model.istft(processed_spec1) + -model.istft(processed_spec2)) * 0.5
|
| 229 |
-
else:
|
| 230 |
-
processed_spec = model(model.stft(arr[j : j + 1]))
|
| 231 |
-
processed_wav = model.istft(processed_spec)
|
| 232 |
-
|
| 233 |
-
window_segment = window[..., :seg_len]
|
| 234 |
-
result[:, start : start + seg_len] += (
|
| 235 |
-
processed_wav[0, :, :seg_len] * window_segment
|
| 236 |
-
)
|
| 237 |
-
counter[:, start : start + seg_len] += window_segment
|
| 238 |
-
|
| 239 |
-
processed_chunks += len(batch_data)
|
| 240 |
-
|
| 241 |
-
progress_data = {
|
| 242 |
-
"processing": {
|
| 243 |
-
"processed": min(i, wave.shape[1]),
|
| 244 |
-
"total": wave.shape[1],
|
| 245 |
-
"unit": _i18n("unit_samples")
|
| 246 |
-
}
|
| 247 |
-
}
|
| 248 |
-
sys.stdout.write(
|
| 249 |
-
json.dumps(progress_data, ensure_ascii=False) + "\n"
|
| 250 |
-
)
|
| 251 |
-
sys.stdout.flush()
|
| 252 |
-
|
| 253 |
-
batch_data.clear()
|
| 254 |
-
batch_locations.clear()
|
| 255 |
-
|
| 256 |
-
estimated_sources = result / counter
|
| 257 |
-
|
| 258 |
-
if length_init > 2 * border and border > 0:
|
| 259 |
-
estimated_sources = estimated_sources[..., border:-border]
|
| 260 |
-
|
| 261 |
-
result_separation = estimated_sources.cpu().numpy()
|
| 262 |
-
|
| 263 |
-
result_separation = np.nan_to_num(
|
| 264 |
-
result_separation, nan=0.0, posinf=0.0, neginf=0.0
|
| 265 |
-
)
|
| 266 |
-
|
| 267 |
-
return {stem_name: result_separation}
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
def demix_vr(
|
| 271 |
-
config: Any,
|
| 272 |
-
model: Any,
|
| 273 |
-
mix: np.ndarray,
|
| 274 |
-
device: torch.device,
|
| 275 |
-
) -> Dict[str, np.ndarray]:
|
| 276 |
-
"""
|
| 277 |
-
Демикс дл
|
| 278 |
-
|
| 279 |
-
Args:
|
| 280 |
-
config: Конфигурация
|
| 281 |
-
model: Модель
|
| 282 |
-
mix: Микс
|
| 283 |
-
device: Устройство
|
| 284 |
-
|
| 285 |
-
Returns:
|
| 286 |
-
Словарь с разделенными стемами
|
| 287 |
-
"""
|
| 288 |
-
from models.vr_arch import spec_utils, NON_ACCOM_STEMS
|
| 289 |
-
aggression = config.inference.aggression
|
| 290 |
-
sr = config.audio.sample_rate
|
| 291 |
-
aggr = float(int(aggression) / 100)
|
| 292 |
-
aggressiveness = {
|
| 293 |
-
"value": aggr,
|
| 294 |
-
"split_bin": model.model_params.param["band"][1]["crop_stop"],
|
| 295 |
-
"aggr_correction": model.model_params.param.get("aggr_correction"),
|
| 296 |
-
}
|
| 297 |
-
X_spec = model.loading_mix(mix, sr)
|
| 298 |
-
|
| 299 |
-
def _execute(X_mag_pad: np.ndarray, roi_size: int) -> np.ndarray:
|
| 300 |
-
X_dataset = []
|
| 301 |
-
patches = (X_mag_pad.shape[2] - 2 * model.model_run.offset) // roi_size
|
| 302 |
-
total = patches
|
| 303 |
-
for i in range(patches):
|
| 304 |
-
processed = min(i + model.batch_size, patches)
|
| 305 |
-
start = i * roi_size
|
| 306 |
-
X_mag_window = X_mag_pad[:, :, start : start + model.window_size]
|
| 307 |
-
X_dataset.append(X_mag_window)
|
| 308 |
-
|
| 309 |
-
total_iterations = (
|
| 310 |
-
patches // model.batch_size
|
| 311 |
-
)
|
| 312 |
-
|
| 313 |
-
X_dataset = np.asarray(X_dataset)
|
| 314 |
-
model.model_run.eval()
|
| 315 |
-
with torch.no_grad():
|
| 316 |
-
mask = []
|
| 317 |
-
|
| 318 |
-
for i in range(0, patches, model.batch_size):
|
| 319 |
-
processed = min(i + model.batch_size, patches)
|
| 320 |
-
sys.stdout.write(
|
| 321 |
-
json.dumps(
|
| 322 |
-
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_patches")}},
|
| 323 |
-
ensure_ascii=False,
|
| 324 |
-
)
|
| 325 |
-
+ "\n"
|
| 326 |
-
)
|
| 327 |
-
sys.stdout.flush()
|
| 328 |
-
X_batch = X_dataset[i : i + model.batch_size]
|
| 329 |
-
X_batch = torch.from_numpy(X_batch).to(device)
|
| 330 |
-
pred = model.model_run.predict_mask(X_batch)
|
| 331 |
-
if not pred.size()[3] > 0:
|
| 332 |
-
raise ValueError(
|
| 333 |
-
_i18n("window_size_error")
|
| 334 |
-
)
|
| 335 |
-
pred = pred.detach().cpu().numpy()
|
| 336 |
-
pred = np.concatenate(pred, axis=2)
|
| 337 |
-
mask.append(pred)
|
| 338 |
-
if len(mask) == 0:
|
| 339 |
-
raise ValueError(
|
| 340 |
-
_i18n("window_size_error")
|
| 341 |
-
)
|
| 342 |
-
|
| 343 |
-
mask = np.concatenate(mask, axis=2)
|
| 344 |
-
return mask
|
| 345 |
-
|
| 346 |
-
def postprocess(
|
| 347 |
-
mask: np.ndarray,
|
| 348 |
-
X_mag: np.ndarray,
|
| 349 |
-
X_phase: np.ndarray
|
| 350 |
-
) -> Tuple[np.ndarray, np.ndarray]:
|
| 351 |
-
is_non_accom_stem = False
|
| 352 |
-
for stem in NON_ACCOM_STEMS:
|
| 353 |
-
if stem == model.primary_stem.lower():
|
| 354 |
-
is_non_accom_stem = True
|
| 355 |
-
|
| 356 |
-
mask = spec_utils.adjust_aggr(mask, is_non_accom_stem, aggressiveness)
|
| 357 |
-
|
| 358 |
-
if model.enable_post_process:
|
| 359 |
-
mask = spec_utils.merge_artifacts(
|
| 360 |
-
mask, thres=model.post_process_threshold
|
| 361 |
-
)
|
| 362 |
-
|
| 363 |
-
y_spec = mask * X_mag * np.exp(1.0j * X_phase)
|
| 364 |
-
v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
|
| 365 |
-
|
| 366 |
-
return y_spec, v_spec
|
| 367 |
-
|
| 368 |
-
X_mag, X_phase = spec_utils.preprocess(X_spec)
|
| 369 |
-
n_frame = X_mag.shape[2]
|
| 370 |
-
pad_l, pad_r, roi_size = spec_utils.make_padding(
|
| 371 |
-
n_frame, model.window_size, model.model_run.offset
|
| 372 |
-
)
|
| 373 |
-
X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
|
| 374 |
-
X_mag_pad /= X_mag_pad.max()
|
| 375 |
-
mask = _execute(X_mag_pad, roi_size)
|
| 376 |
-
|
| 377 |
-
mask = mask[:, :, :n_frame]
|
| 378 |
-
|
| 379 |
-
y_spec, v_spec = postprocess(mask, X_mag, X_phase)
|
| 380 |
-
|
| 381 |
-
y_spec = np.nan_to_num(y_spec, nan=0.0, posinf=0.0, neginf=0.0)
|
| 382 |
-
v_spec = np.nan_to_num(v_spec, nan=0.0, posinf=0.0, neginf=0.0)
|
| 383 |
-
primary_stem_array = model.spec_to_wav(y_spec).T
|
| 384 |
-
primary_stem_array = librosa.resample(
|
| 385 |
-
primary_stem_array.T,
|
| 386 |
-
orig_sr=model.model_samplerate,
|
| 387 |
-
target_sr=sr,
|
| 388 |
-
).T
|
| 389 |
-
secondary_stem_array = model.spec_to_wav(v_spec).T
|
| 390 |
-
secondary_stem_array = librosa.resample(
|
| 391 |
-
secondary_stem_array.T,
|
| 392 |
-
orig_sr=model.model_samplerate,
|
| 393 |
-
target_sr=sr,
|
| 394 |
-
).T
|
| 395 |
-
return {
|
| 396 |
-
model.primary_stem: primary_stem_array,
|
| 397 |
-
model.secondary_stem: secondary_stem_array,
|
| 398 |
-
}
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
def demix_demucs(
|
| 402 |
-
config: Any,
|
| 403 |
-
model: Any,
|
| 404 |
-
mix: np.ndarray,
|
| 405 |
-
device: torch.device
|
| 406 |
-
) -> Dict[str, np.ndarray]:
|
| 407 |
-
"""
|
| 408 |
-
Демикс для Demucs
|
| 409 |
-
|
| 410 |
-
Args:
|
| 411 |
-
config: Конфигурация
|
| 412 |
-
model: Модель
|
| 413 |
-
mix: Микс
|
| 414 |
-
device: Устройство
|
| 415 |
-
|
| 416 |
-
Returns:
|
| 417 |
-
Словарь с разделенными стемами
|
| 418 |
-
"""
|
| 419 |
-
mix = torch.tensor(mix, dtype=torch.float32)
|
| 420 |
-
chunk_size = config.training.samplerate * config.training.segment
|
| 421 |
-
num_instruments = len(config.training.instruments)
|
| 422 |
-
num_overlap = config.inference.num_overlap
|
| 423 |
-
step = chunk_size // num_overlap
|
| 424 |
-
fade_size = chunk_size // 10
|
| 425 |
-
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
| 426 |
-
|
| 427 |
-
batch_size = config.inference.batch_size
|
| 428 |
-
use_amp = getattr(config.training, "use_amp", True)
|
| 429 |
-
|
| 430 |
-
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 431 |
-
with torch.inference_mode():
|
| 432 |
-
req_shape = (num_instruments,) + mix.shape
|
| 433 |
-
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 434 |
-
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 435 |
-
|
| 436 |
-
i = 0
|
| 437 |
-
batch_data = []
|
| 438 |
-
batch_locations = []
|
| 439 |
-
|
| 440 |
-
while i < mix.shape[1]:
|
| 441 |
-
part = mix[:, i : i + chunk_size].to(device)
|
| 442 |
-
chunk_len = part.shape[-1]
|
| 443 |
-
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 444 |
-
part = nn.functional.pad(
|
| 445 |
-
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 446 |
-
)
|
| 447 |
-
|
| 448 |
-
batch_data.append(part)
|
| 449 |
-
batch_locations.append((i, chunk_len))
|
| 450 |
-
i += step
|
| 451 |
-
|
| 452 |
-
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 453 |
-
arr = torch.stack(batch_data, dim=0)
|
| 454 |
-
x = model(arr)
|
| 455 |
-
|
| 456 |
-
window = windowing_array.clone()
|
| 457 |
-
if i - step == 0:
|
| 458 |
-
window[:fade_size] = 1
|
| 459 |
-
elif i >= mix.shape[1]:
|
| 460 |
-
window[-fade_size:] = 1
|
| 461 |
-
|
| 462 |
-
for j, (start, seg_len) in enumerate(batch_locations):
|
| 463 |
-
result[..., start : start + seg_len] += (
|
| 464 |
-
x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 465 |
-
)
|
| 466 |
-
counter[..., start : start + seg_len] += window[..., :seg_len]
|
| 467 |
-
|
| 468 |
-
processed = min(i, mix.shape[1])
|
| 469 |
-
total = mix.shape[1]
|
| 470 |
-
sys.stdout.write(
|
| 471 |
-
json.dumps(
|
| 472 |
-
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}}
|
| 473 |
-
)
|
| 474 |
-
+ "\n"
|
| 475 |
-
)
|
| 476 |
-
sys.stdout.flush()
|
| 477 |
-
|
| 478 |
-
batch_data.clear()
|
| 479 |
-
batch_locations.clear()
|
| 480 |
-
|
| 481 |
-
estimated_sources = result / counter
|
| 482 |
-
estimated_sources = estimated_sources.cpu().numpy()
|
| 483 |
-
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 484 |
-
|
| 485 |
-
if num_instruments <= 1:
|
| 486 |
-
return estimated_sources
|
| 487 |
-
else:
|
| 488 |
-
instruments = config.training.instruments
|
| 489 |
-
return {k: v for k, v in zip(instruments, estimated_sources)}
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
def demix_generic(
|
| 493 |
-
config: ConfigDict,
|
| 494 |
-
model: torch.nn.Module,
|
| 495 |
-
mix: torch.Tensor,
|
| 496 |
-
device: torch.device,
|
| 497 |
-
) -> Dict[str, np.ndarray]:
|
| 498 |
-
"""
|
| 499 |
-
Общий демикс для большинства моделей
|
| 500 |
-
|
| 501 |
-
Args:
|
| 502 |
-
config: Конфигурация
|
| 503 |
-
model: Модель
|
| 504 |
-
mix: Микс
|
| 505 |
-
device: Устройство
|
| 506 |
-
|
| 507 |
-
Returns:
|
| 508 |
-
Словарь с разделенными стемами
|
| 509 |
-
"""
|
| 510 |
-
mix = torch.tensor(mix, dtype=torch.float32)
|
| 511 |
-
chunk_size = config.audio.chunk_size
|
| 512 |
-
instruments = prefer_target_instrument(config)
|
| 513 |
-
num_instruments = len(instruments)
|
| 514 |
-
num_overlap = config.inference.num_overlap
|
| 515 |
-
|
| 516 |
-
fade_size = chunk_size // 10
|
| 517 |
-
step = chunk_size // num_overlap
|
| 518 |
-
border = chunk_size - step
|
| 519 |
-
length_init = mix.shape[-1]
|
| 520 |
-
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
| 521 |
-
|
| 522 |
-
if length_init > 2 * border and border > 0:
|
| 523 |
-
mix = nn.functional.pad(mix, (border, border), mode="reflect")
|
| 524 |
-
|
| 525 |
-
batch_size = config.inference.batch_size
|
| 526 |
-
use_amp = getattr(config.training, "use_amp", True)
|
| 527 |
-
|
| 528 |
-
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 529 |
-
with torch.inference_mode():
|
| 530 |
-
req_shape = (num_instruments,) + mix.shape
|
| 531 |
-
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 532 |
-
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 533 |
-
|
| 534 |
-
i = 0
|
| 535 |
-
batch_data = []
|
| 536 |
-
batch_locations = []
|
| 537 |
-
|
| 538 |
-
while i < mix.shape[1]:
|
| 539 |
-
part = mix[:, i : i + chunk_size].to(device)
|
| 540 |
-
chunk_len = part.shape[-1]
|
| 541 |
-
|
| 542 |
-
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 543 |
-
part = nn.functional.pad(
|
| 544 |
-
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 545 |
-
)
|
| 546 |
-
|
| 547 |
-
batch_data.append(part)
|
| 548 |
-
batch_locations.append((i, chunk_len))
|
| 549 |
-
i += step
|
| 550 |
-
|
| 551 |
-
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 552 |
-
arr = torch.stack(batch_data, dim=0)
|
| 553 |
-
x = model(arr)
|
| 554 |
-
|
| 555 |
-
window = windowing_array.clone()
|
| 556 |
-
if i - step == 0:
|
| 557 |
-
window[:fade_size] = 1
|
| 558 |
-
elif i >= mix.shape[1]:
|
| 559 |
-
window[-fade_size:] = 1
|
| 560 |
-
|
| 561 |
-
for j, (start, seg_len) in enumerate(batch_locations):
|
| 562 |
-
result[..., start : start + seg_len] += (
|
| 563 |
-
x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 564 |
-
)
|
| 565 |
-
counter[..., start : start + seg_len] += window[..., :seg_len]
|
| 566 |
-
|
| 567 |
-
processed = min(i, mix.shape[1])
|
| 568 |
-
total = mix.shape[1]
|
| 569 |
-
sys.stdout.write(
|
| 570 |
-
json.dumps(
|
| 571 |
-
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}},
|
| 572 |
-
ensure_ascii=False,
|
| 573 |
-
)
|
| 574 |
-
+ "\n"
|
| 575 |
-
)
|
| 576 |
-
sys.stdout.flush()
|
| 577 |
-
|
| 578 |
-
batch_data.clear()
|
| 579 |
-
batch_locations.clear()
|
| 580 |
-
|
| 581 |
-
estimated_sources = result / counter
|
| 582 |
-
estimated_sources = estimated_sources.cpu().numpy()
|
| 583 |
-
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 584 |
-
|
| 585 |
-
if length_init > 2 * border and border > 0:
|
| 586 |
-
estimated_sources = estimated_sources[..., border:-border]
|
| 587 |
-
|
| 588 |
-
return {k: v for k, v in zip(instruments, estimated_sources)}
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
def demix_medley_vox(
|
| 592 |
-
config: ConfigDict,
|
| 593 |
-
model: Any,
|
| 594 |
-
mix: np.ndarray,
|
| 595 |
-
device: torch.device
|
| 596 |
-
) -> Dict[str, np.ndarray]:
|
| 597 |
-
"""
|
| 598 |
-
Демикс для Medley Vox
|
| 599 |
-
|
| 600 |
-
Args:
|
| 601 |
-
config: Конфигурация
|
| 602 |
-
model: Модель
|
| 603 |
-
mix: Микс
|
| 604 |
-
device: Устройство
|
| 605 |
-
|
| 606 |
-
Returns:
|
| 607 |
-
Словарь с разделенными стемами
|
| 608 |
-
"""
|
| 609 |
-
import pyloudnorm as pyln
|
| 610 |
-
from models.medley_vox.loudness_utils import loudnorm, db2linear
|
| 611 |
-
stems: List[str] = config.training.instruments
|
| 612 |
-
|
| 613 |
-
# Корректная обработка входного аудио
|
| 614 |
-
original_shape = mix.shape
|
| 615 |
-
|
| 616 |
-
# Проверяем размерность и приводим к формату [каналы, сэмплы]
|
| 617 |
-
if mix.ndim == 1: # Моно
|
| 618 |
-
mix = np.expand_dims(mix, axis=0) # [1, samples]
|
| 619 |
-
num_channels = 1
|
| 620 |
-
elif mix.ndim == 2:
|
| 621 |
-
if mix.shape[0] <= mix.shape[1]: # Предполагаем [каналы, сэмплы]
|
| 622 |
-
num_channels = mix.shape[0]
|
| 623 |
-
else: # Вероятно [сэмплы, каналы]
|
| 624 |
-
mix = mix.T # Транспонируем в [каналы, сэмплы]
|
| 625 |
-
num_channels = mix.shape[0]
|
| 626 |
-
|
| 627 |
-
# Параметры обработки
|
| 628 |
-
samplerate = config.model.sample_rate
|
| 629 |
-
segment_sec = config.model.seq_dur
|
| 630 |
-
chunk_size = int(samplerate * segment_sec)
|
| 631 |
-
overlap = config.inference.num_overlap
|
| 632 |
-
step = chunk_size // overlap
|
| 633 |
-
fade_size = chunk_size // 10
|
| 634 |
-
|
| 635 |
-
# Нормализация громкости всего микса
|
| 636 |
-
meter = pyln.Meter(model.sample_rate)
|
| 637 |
-
try:
|
| 638 |
-
# loudnorm ожидает [samples, channels] или [samples]
|
| 639 |
-
# Преобразуем для loudnorm
|
| 640 |
-
if num_channels > 1:
|
| 641 |
-
mix_for_loudnorm = mix.T # [samples, channels]
|
| 642 |
-
else:
|
| 643 |
-
mix_for_loudnorm = mix[0] # [samples]
|
| 644 |
-
|
| 645 |
-
mixture_d, adjusted_gain = loudnorm(mix_for_loudnorm, -24.0, meter)
|
| 646 |
-
|
| 647 |
-
# Преобразуем обратно в [channels, samples]
|
| 648 |
-
if num_channels > 1:
|
| 649 |
-
if isinstance(mixture_d, np.ndarray) and mixture_d.ndim == 2:
|
| 650 |
-
mixture_d = mixture_d.T # [channels, samples]
|
| 651 |
-
else:
|
| 652 |
-
# Если вернулось моно, дублируем на все каналы
|
| 653 |
-
mixture_d = np.tile(mixture_d, (num_channels, 1))
|
| 654 |
-
else:
|
| 655 |
-
if mixture_d.ndim == 1:
|
| 656 |
-
mixture_d = mixture_d.reshape(1, -1)
|
| 657 |
-
|
| 658 |
-
except Exception as e:
|
| 659 |
-
print(_i18n("loudnorm_error", error=str(e)))
|
| 660 |
-
# Альтернативный подход - нормализация вручную
|
| 661 |
-
mixture_d = mix.copy()
|
| 662 |
-
rms = np.sqrt(np.mean(mix**2))
|
| 663 |
-
target_rms = 0.1 # -20 dB примерно
|
| 664 |
-
if rms > 0:
|
| 665 |
-
adjusted_gain = 20 * np.log10(target_rms / rms)
|
| 666 |
-
mixture_d = mix * (target_rms / rms)
|
| 667 |
-
else:
|
| 668 |
-
adjusted_gain = 0
|
| 669 |
-
|
| 670 |
-
length_init = mixture_d.shape[1]
|
| 671 |
-
|
| 672 |
-
# Подготавливаем окна для каждого стема
|
| 673 |
-
windowing_array = _getWindowingArray(chunk_size, fade_size).to(device)
|
| 674 |
-
|
| 675 |
-
# Результирующие массивы для каждого стема [каналы, сэмплы]
|
| 676 |
-
result_stems = {stem: np.zeros((num_channels, length_init), dtype=np.float32)
|
| 677 |
-
for stem in stems}
|
| 678 |
-
|
| 679 |
-
# Преобразуем микс в тензор [каналы, сэмплы]
|
| 680 |
-
mix_tensor = torch.tensor(mixture_d, dtype=torch.float32).to(device)
|
| 681 |
-
|
| 682 |
-
# Счетчики для каждого стема [каналы, сэмплы]
|
| 683 |
-
counters = {stem: torch.zeros((num_channels, length_init), dtype=torch.float32, device=device)
|
| 684 |
-
for stem in stems}
|
| 685 |
-
|
| 686 |
-
i = 0
|
| 687 |
-
while i < length_init:
|
| 688 |
-
# Берем чанк для всех каналов одновременно [каналы, chunk_size]
|
| 689 |
-
end_idx = min(i + chunk_size, length_init)
|
| 690 |
-
chunk = mix_tensor[:, i:end_idx]
|
| 691 |
-
cur_chunk_len = chunk.shape[1]
|
| 692 |
-
|
| 693 |
-
# Создаем тензор для результатов этого чанка [каналы, 2, cur_chunk_len]
|
| 694 |
-
chunk_results = torch.zeros((num_channels, 2, cur_chunk_len), dtype=torch.float32, device=device)
|
| 695 |
-
|
| 696 |
-
# Обрабатываем каждый канал отдельно для этого чанка
|
| 697 |
-
for ch in range(num_channels):
|
| 698 |
-
# Берем один канал [1, cur_chunk_len]
|
| 699 |
-
channel_chunk = chunk[ch:ch+1, :]
|
| 700 |
-
|
| 701 |
-
# Паддинг если нужно
|
| 702 |
-
if cur_chunk_len < chunk_size:
|
| 703 |
-
pad_len = chunk_size - cur_chunk_len
|
| 704 |
-
channel_chunk = torch.nn.functional.pad(
|
| 705 |
-
channel_chunk, (0, pad_len), mode='constant', value=0
|
| 706 |
-
)
|
| 707 |
-
|
| 708 |
-
# Добавляем batch dimension [1, 1, chunk_size]
|
| 709 |
-
channel_chunk = channel_chunk.unsqueeze(0)
|
| 710 |
-
|
| 711 |
-
with torch.no_grad():
|
| 712 |
-
# Модель возвращает [1, 2, chunk_size]
|
| 713 |
-
out_chunk = model.separate(channel_chunk)
|
| 714 |
-
|
| 715 |
-
# Сохраняем результат для этого канала (обрезаем паддинг)
|
| 716 |
-
chunk_results[ch, :, :cur_chunk_len] = out_chunk[0, :, :cur_chunk_len].cpu()
|
| 717 |
-
|
| 718 |
-
# Применяем окно
|
| 719 |
-
window = windowing_array[:cur_chunk_len].clone()
|
| 720 |
-
if i == 0:
|
| 721 |
-
window[:fade_size] = 1
|
| 722 |
-
if end_idx >= length_init:
|
| 723 |
-
window[-fade_size:] = 1
|
| 724 |
-
|
| 725 |
-
# Добавляем результаты в общие массивы
|
| 726 |
-
for stem_idx, stem in enumerate(stems):
|
| 727 |
-
result_stems[stem][:, i:end_idx] += chunk_results[:, stem_idx, :].cpu().numpy() * window.cpu().numpy()
|
| 728 |
-
counters[stem][:, i:end_idx] += window
|
| 729 |
-
|
| 730 |
-
i += step
|
| 731 |
-
|
| 732 |
-
progress_data = {
|
| 733 |
-
"processing": {
|
| 734 |
-
"processed": min(end_idx, length_init),
|
| 735 |
-
"total": length_init,
|
| 736 |
-
"unit": _i18n("unit_samples"),
|
| 737 |
-
}
|
| 738 |
-
}
|
| 739 |
-
sys.stdout.write(json.dumps(progress_data, ensure_ascii=False) + "\n")
|
| 740 |
-
sys.stdout.flush()
|
| 741 |
-
|
| 742 |
-
# Нормализация результатов делением на счетчик
|
| 743 |
-
for stem in stems:
|
| 744 |
-
counters_np = counters[stem].cpu().numpy()
|
| 745 |
-
# Избегаем деления на ноль
|
| 746 |
-
mask = counters_np > 0
|
| 747 |
-
result_stems[stem][mask] /= counters_np[mask]
|
| 748 |
-
|
| 749 |
-
# Применяем обратную нормализацию громкости
|
| 750 |
-
result_stems[stem] = result_stems[stem] * db2linear(-adjusted_gain)
|
| 751 |
-
|
| 752 |
-
return result_stems
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
def demix(
|
| 756 |
-
config: ConfigDict,
|
| 757 |
-
model: torch.nn.Module,
|
| 758 |
-
mix: np.ndarray,
|
| 759 |
-
device: torch.device,
|
| 760 |
-
model_type: str,
|
| 761 |
-
) -> Dict[str, np.ndarray]:
|
| 762 |
-
"""
|
| 763 |
-
Основная функция демикса, выбирает подходящий метод в зависимости от типа модели
|
| 764 |
-
|
| 765 |
-
Args:
|
| 766 |
-
config: Конфигурация
|
| 767 |
-
model: Модель
|
| 768 |
-
mix: Микс
|
| 769 |
-
device: Устройство
|
| 770 |
-
model_type: Тип модели
|
| 771 |
-
|
| 772 |
-
Returns:
|
| 773 |
-
Словарь с разделенными стемами
|
| 774 |
-
"""
|
| 775 |
-
if model_type == "vr":
|
| 776 |
-
return demix_vr(config, model, mix, device)
|
| 777 |
-
elif model_type == "mdxnet":
|
| 778 |
-
return demix_mdxnet(config, model, mix, device)
|
| 779 |
-
elif model_type == "htdemucs":
|
| 780 |
-
return demix_demucs(config, model, mix, device)
|
| 781 |
-
elif model_type == "medley_vox":
|
| 782 |
-
return demix_medley_vox(config, model, mix, device)
|
| 783 |
-
else:
|
| 784 |
-
return demix_generic(config, model, mix, device)
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
def prefer_target_instrument(config: ConfigDict) -> List[str]:
|
| 788 |
-
"""
|
| 789 |
-
Получить предпочтительный инструмент из конфигурации
|
| 790 |
-
|
| 791 |
-
Args:
|
| 792 |
-
config: Конфигурация
|
| 793 |
-
|
| 794 |
-
Returns:
|
| 795 |
-
Список инструментов
|
| 796 |
-
"""
|
| 797 |
-
if config.training.get("target_instrument"):
|
| 798 |
-
return [config.training.target_instrument]
|
| 799 |
-
else:
|
| 800 |
-
return config.training.instruments
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
def prefer_target_instrument_test(
|
| 804 |
-
config: ConfigDict, selected_instruments: Optional[List[str]] = None
|
| 805 |
-
) -> List[str]:
|
| 806 |
-
"""
|
| 807 |
-
Получить предпочтительный инструмент для тестирования
|
| 808 |
-
|
| 809 |
-
Args:
|
| 810 |
-
config: Конфигурация
|
| 811 |
-
selected_instruments: Выбранные инструменты
|
| 812 |
-
|
| 813 |
-
Returns:
|
| 814 |
-
Список инструментов
|
| 815 |
-
"""
|
| 816 |
-
available_instruments = config.training.instruments
|
| 817 |
-
|
| 818 |
-
if selected_instruments is not None:
|
| 819 |
-
return [
|
| 820 |
-
instr for instr in selected_instruments if instr in available_instruments
|
| 821 |
-
]
|
| 822 |
-
elif config.training.get("target_instrument"):
|
| 823 |
-
return [config.training.target_instrument]
|
| 824 |
-
else:
|
| 825 |
return available_instruments
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
sys.stdout.reconfigure(encoding='utf-8')
|
| 3 |
+
sys.stderr.reconfigure(encoding='utf-8')
|
| 4 |
+
import json
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import yaml
|
| 9 |
+
import librosa
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
from ml_collections import ConfigDict
|
| 12 |
+
from omegaconf import OmegaConf
|
| 13 |
+
from typing import Dict, List, Tuple, Any, Optional
|
| 14 |
+
from i18n import _i18n
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def load_config(model_type: str, config_path: str) -> Any:
|
| 18 |
+
"""
|
| 19 |
+
Загрузить конфигурацию модели
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
model_type: Тип модели
|
| 23 |
+
config_path: Путь к конфигурационному файлу
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Конфигурация
|
| 27 |
+
"""
|
| 28 |
+
try:
|
| 29 |
+
with open(config_path, "r") as f:
|
| 30 |
+
if model_type == "htdemucs":
|
| 31 |
+
config = OmegaConf.load(config_path)
|
| 32 |
+
else:
|
| 33 |
+
config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
|
| 34 |
+
if hasattr(config.training, "new_segment"):
|
| 35 |
+
if hasattr(config.training, "segment"):
|
| 36 |
+
config.training.segment = config.audio.new_chunk_size
|
| 37 |
+
if hasattr(config.audio, "new_chunk_size"):
|
| 38 |
+
if hasattr(config.audio, "chunk_size"):
|
| 39 |
+
config.audio.chunk_size = config.audio.new_chunk_size
|
| 40 |
+
if hasattr(config.audio, "new_dim_t"):
|
| 41 |
+
if hasattr(config.audio, "dim_t"):
|
| 42 |
+
config.audio.dim_t = config.audio.new_dim_t
|
| 43 |
+
return config
|
| 44 |
+
except FileNotFoundError:
|
| 45 |
+
raise FileNotFoundError(_i18n("config_not_found", path=config_path))
|
| 46 |
+
except Exception as e:
|
| 47 |
+
raise ValueError(_i18n("config_load_error", error=str(e)))
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_model_from_config(model_type: str, config_path: str) -> Tuple[Any, Any]:
|
| 51 |
+
"""
|
| 52 |
+
Получить модель из конфигурации
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
model_type: Тип модели
|
| 56 |
+
config_path: Путь к конфигурации
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
Кортеж (модель, конфигурация)
|
| 60 |
+
"""
|
| 61 |
+
config = load_config(model_type, config_path)
|
| 62 |
+
|
| 63 |
+
if model_type == "mdx23c":
|
| 64 |
+
from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
|
| 65 |
+
model = TFC_TDF_net(config)
|
| 66 |
+
elif model_type == "mdxnet":
|
| 67 |
+
from models.mdx_net import MDXNet
|
| 68 |
+
model = MDXNet(**dict(config.model))
|
| 69 |
+
elif model_type == "vr":
|
| 70 |
+
from models.vr_arch import VRNet
|
| 71 |
+
model = VRNet(**dict(config.model))
|
| 72 |
+
elif model_type == "htdemucs":
|
| 73 |
+
from models.demucs4ht import get_model
|
| 74 |
+
model = get_model(config)
|
| 75 |
+
elif model_type == "mel_band_roformer":
|
| 76 |
+
if hasattr(config, "windowed"):
|
| 77 |
+
from models.windowed_roformer.model import MelBandRoformerWSA
|
| 78 |
+
model = MelBandRoformerWSA(**dict(config.model))
|
| 79 |
+
elif hasattr(config, "conformer"):
|
| 80 |
+
from models.bs_roformer import MelBandConformer
|
| 81 |
+
model = MelBandConformer(**dict(config.model))
|
| 82 |
+
else:
|
| 83 |
+
from models.bs_roformer import MelBandRoformer
|
| 84 |
+
model = MelBandRoformer(**dict(config.model))
|
| 85 |
+
elif model_type == "bs_roformer":
|
| 86 |
+
if hasattr(config, "sw"):
|
| 87 |
+
from models.bs_roformer import BSRoformer_SW
|
| 88 |
+
model = BSRoformer_SW(**dict(config.model))
|
| 89 |
+
elif hasattr(config, "fno"):
|
| 90 |
+
from models.bs_roformer import BSRoformer_FNO
|
| 91 |
+
model = BSRoformer_FNO(**dict(config.model))
|
| 92 |
+
elif hasattr(config, "hyperace"):
|
| 93 |
+
from models.bs_roformer import BSRoformerHyperACE
|
| 94 |
+
model = BSRoformerHyperACE(**dict(config.model))
|
| 95 |
+
elif hasattr(config, "hyperace2"):
|
| 96 |
+
from models.bs_roformer import BSRoformerHyperACE_2
|
| 97 |
+
model = BSRoformerHyperACE_2(**dict(config.model))
|
| 98 |
+
elif hasattr(config, "conformer"):
|
| 99 |
+
from models.bs_roformer import BSConformer
|
| 100 |
+
model = BSConformer(**dict(config.model))
|
| 101 |
+
elif hasattr(config, "conditional"):
|
| 102 |
+
from models.bs_roformer import BSRoformer_Conditional
|
| 103 |
+
model = BSRoformer_Conditional(**dict(config.model))
|
| 104 |
+
elif hasattr(config, "unwa_inst_large_2"):
|
| 105 |
+
from models.bs_roformer import BSRoformer_2
|
| 106 |
+
model = BSRoformer_2(**dict(config.model))
|
| 107 |
+
else:
|
| 108 |
+
from models.bs_roformer import BSRoformer
|
| 109 |
+
model = BSRoformer(**dict(config.model))
|
| 110 |
+
elif model_type == "bandit":
|
| 111 |
+
from models.bandit.core.model import MultiMaskMultiSourceBandSplitRNNSimple
|
| 112 |
+
model = MultiMaskMultiSourceBandSplitRNNSimple(**config.model)
|
| 113 |
+
elif model_type == "bandit_v2":
|
| 114 |
+
from models.bandit_v2.bandit import Bandit
|
| 115 |
+
model = Bandit(**config.kwargs)
|
| 116 |
+
elif model_type == "scnet_unofficial":
|
| 117 |
+
from models.scnet_unofficial import SCNet
|
| 118 |
+
model = SCNet(**config.model)
|
| 119 |
+
elif model_type == "scnet":
|
| 120 |
+
from models.scnet import SCNet
|
| 121 |
+
model = SCNet(**config.model)
|
| 122 |
+
elif model_type == 'scnet_masked':
|
| 123 |
+
from models.scnet.scnet_masked import SCNet
|
| 124 |
+
model = SCNet(**config.model)
|
| 125 |
+
elif model_type == 'scnet_tran':
|
| 126 |
+
from models.scnet.scnet_tran import SCNet_Tran
|
| 127 |
+
model = SCNet_Tran(**config.model)
|
| 128 |
+
elif model_type == 'medley_vox':
|
| 129 |
+
from models.medley_vox import load_model_with_args
|
| 130 |
+
model = load_model_with_args(config.model)
|
| 131 |
+
else:
|
| 132 |
+
raise ValueError(_i18n("unknown_model_type", model_type=model_type))
|
| 133 |
+
return model, config
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
|
| 137 |
+
"""
|
| 138 |
+
Создать массив окна для плавного склеивания
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
window_size: Размер окна
|
| 142 |
+
fade_size: Размер зоны затухания
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
Массив окна
|
| 146 |
+
"""
|
| 147 |
+
fadein = torch.linspace(0, 1, fade_size)
|
| 148 |
+
fadeout = torch.linspace(1, 0, fade_size)
|
| 149 |
+
|
| 150 |
+
window = torch.ones(window_size)
|
| 151 |
+
window[-fade_size:] = fadeout
|
| 152 |
+
window[:fade_size] = fadein
|
| 153 |
+
return window
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def demix_mdxnet(
|
| 157 |
+
config: Any,
|
| 158 |
+
model: Any,
|
| 159 |
+
mix: np.ndarray,
|
| 160 |
+
device: torch.device,
|
| 161 |
+
) -> Dict[str, np.ndarray]:
|
| 162 |
+
"""
|
| 163 |
+
Демикс для MDXNet
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
config: Конфигурация
|
| 167 |
+
model: Модель
|
| 168 |
+
mix: Микс
|
| 169 |
+
device: Устройство
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
Словарь с разделенными стемами
|
| 173 |
+
"""
|
| 174 |
+
mix_tensor = torch.tensor(mix, dtype=torch.float32).to(device)
|
| 175 |
+
batch_size = 1
|
| 176 |
+
num_overlap = config.inference.num_overlap
|
| 177 |
+
denoise = config.inference.denoise
|
| 178 |
+
stem_name = model.primary_stem
|
| 179 |
+
chunk_size = model.hop_length * (model.dim_t - 1)
|
| 180 |
+
fade_size = chunk_size // 10
|
| 181 |
+
step = chunk_size // num_overlap
|
| 182 |
+
border = chunk_size - step
|
| 183 |
+
|
| 184 |
+
length_init = mix_tensor.shape[-1]
|
| 185 |
+
|
| 186 |
+
if length_init > 2 * border and border > 0:
|
| 187 |
+
wave = nn.functional.pad(mix_tensor, (border, border), mode="reflect")
|
| 188 |
+
|
| 189 |
+
window = _getWindowingArray(chunk_size, fade_size).to(device)
|
| 190 |
+
|
| 191 |
+
with torch.no_grad():
|
| 192 |
+
result = torch.zeros_like(wave, device=device)
|
| 193 |
+
counter = torch.zeros_like(wave, device=device)
|
| 194 |
+
|
| 195 |
+
i = 0
|
| 196 |
+
batch_data = []
|
| 197 |
+
batch_locations = []
|
| 198 |
+
|
| 199 |
+
total_chunks = 0
|
| 200 |
+
temp_i = 0
|
| 201 |
+
while temp_i < wave.shape[1]:
|
| 202 |
+
total_chunks += 1
|
| 203 |
+
temp_i += step
|
| 204 |
+
|
| 205 |
+
processed_chunks = 0
|
| 206 |
+
|
| 207 |
+
while i < wave.shape[1]:
|
| 208 |
+
part = wave[:, i : i + chunk_size]
|
| 209 |
+
chunk_len = part.shape[-1]
|
| 210 |
+
|
| 211 |
+
if chunk_len < chunk_size:
|
| 212 |
+
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 213 |
+
part = nn.functional.pad(
|
| 214 |
+
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
batch_data.append(part)
|
| 218 |
+
batch_locations.append((i, chunk_len))
|
| 219 |
+
i += step
|
| 220 |
+
|
| 221 |
+
if len(batch_data) >= batch_size or i >= wave.shape[1]:
|
| 222 |
+
arr = torch.stack(batch_data, dim=0)
|
| 223 |
+
|
| 224 |
+
for j, (start, seg_len) in enumerate(batch_locations):
|
| 225 |
+
if denoise:
|
| 226 |
+
processed_spec1 = model(model.stft(arr[j : j + 1]))
|
| 227 |
+
processed_spec2 = model(model.stft(-(arr[j : j + 1])))
|
| 228 |
+
processed_wav = (model.istft(processed_spec1) + -model.istft(processed_spec2)) * 0.5
|
| 229 |
+
else:
|
| 230 |
+
processed_spec = model(model.stft(arr[j : j + 1]))
|
| 231 |
+
processed_wav = model.istft(processed_spec)
|
| 232 |
+
|
| 233 |
+
window_segment = window[..., :seg_len]
|
| 234 |
+
result[:, start : start + seg_len] += (
|
| 235 |
+
processed_wav[0, :, :seg_len] * window_segment
|
| 236 |
+
)
|
| 237 |
+
counter[:, start : start + seg_len] += window_segment
|
| 238 |
+
|
| 239 |
+
processed_chunks += len(batch_data)
|
| 240 |
+
|
| 241 |
+
progress_data = {
|
| 242 |
+
"processing": {
|
| 243 |
+
"processed": min(i, wave.shape[1]),
|
| 244 |
+
"total": wave.shape[1],
|
| 245 |
+
"unit": _i18n("unit_samples")
|
| 246 |
+
}
|
| 247 |
+
}
|
| 248 |
+
sys.stdout.write(
|
| 249 |
+
json.dumps(progress_data, ensure_ascii=False) + "\n"
|
| 250 |
+
)
|
| 251 |
+
sys.stdout.flush()
|
| 252 |
+
|
| 253 |
+
batch_data.clear()
|
| 254 |
+
batch_locations.clear()
|
| 255 |
+
|
| 256 |
+
estimated_sources = result / counter
|
| 257 |
+
|
| 258 |
+
if length_init > 2 * border and border > 0:
|
| 259 |
+
estimated_sources = estimated_sources[..., border:-border]
|
| 260 |
+
|
| 261 |
+
result_separation = estimated_sources.cpu().numpy()
|
| 262 |
+
|
| 263 |
+
result_separation = np.nan_to_num(
|
| 264 |
+
result_separation, nan=0.0, posinf=0.0, neginf=0.0
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
return {stem_name: result_separation}
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def demix_vr(
|
| 271 |
+
config: Any,
|
| 272 |
+
model: Any,
|
| 273 |
+
mix: np.ndarray,
|
| 274 |
+
device: torch.device,
|
| 275 |
+
) -> Dict[str, np.ndarray]:
|
| 276 |
+
"""
|
| 277 |
+
Демикс для VR
|
| 278 |
+
|
| 279 |
+
Args:
|
| 280 |
+
config: Конфигурация
|
| 281 |
+
model: Модель
|
| 282 |
+
mix: Микс
|
| 283 |
+
device: Устройство
|
| 284 |
+
|
| 285 |
+
Returns:
|
| 286 |
+
Словарь с разделенными стемами
|
| 287 |
+
"""
|
| 288 |
+
from models.vr_arch import spec_utils, NON_ACCOM_STEMS
|
| 289 |
+
aggression = config.inference.aggression
|
| 290 |
+
sr = config.audio.sample_rate
|
| 291 |
+
aggr = float(int(aggression) / 100)
|
| 292 |
+
aggressiveness = {
|
| 293 |
+
"value": aggr,
|
| 294 |
+
"split_bin": model.model_params.param["band"][1]["crop_stop"],
|
| 295 |
+
"aggr_correction": model.model_params.param.get("aggr_correction"),
|
| 296 |
+
}
|
| 297 |
+
X_spec = model.loading_mix(mix, sr)
|
| 298 |
+
|
| 299 |
+
def _execute(X_mag_pad: np.ndarray, roi_size: int) -> np.ndarray:
|
| 300 |
+
X_dataset = []
|
| 301 |
+
patches = (X_mag_pad.shape[2] - 2 * model.model_run.offset) // roi_size
|
| 302 |
+
total = patches
|
| 303 |
+
for i in range(patches):
|
| 304 |
+
processed = min(i + model.batch_size, patches)
|
| 305 |
+
start = i * roi_size
|
| 306 |
+
X_mag_window = X_mag_pad[:, :, start : start + model.window_size]
|
| 307 |
+
X_dataset.append(X_mag_window)
|
| 308 |
+
|
| 309 |
+
total_iterations = (
|
| 310 |
+
patches // model.batch_size
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
X_dataset = np.asarray(X_dataset)
|
| 314 |
+
model.model_run.eval()
|
| 315 |
+
with torch.no_grad():
|
| 316 |
+
mask = []
|
| 317 |
+
|
| 318 |
+
for i in range(0, patches, model.batch_size):
|
| 319 |
+
processed = min(i + model.batch_size, patches)
|
| 320 |
+
sys.stdout.write(
|
| 321 |
+
json.dumps(
|
| 322 |
+
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_patches")}},
|
| 323 |
+
ensure_ascii=False,
|
| 324 |
+
)
|
| 325 |
+
+ "\n"
|
| 326 |
+
)
|
| 327 |
+
sys.stdout.flush()
|
| 328 |
+
X_batch = X_dataset[i : i + model.batch_size]
|
| 329 |
+
X_batch = torch.from_numpy(X_batch).to(device)
|
| 330 |
+
pred = model.model_run.predict_mask(X_batch)
|
| 331 |
+
if not pred.size()[3] > 0:
|
| 332 |
+
raise ValueError(
|
| 333 |
+
_i18n("window_size_error")
|
| 334 |
+
)
|
| 335 |
+
pred = pred.detach().cpu().numpy()
|
| 336 |
+
pred = np.concatenate(pred, axis=2)
|
| 337 |
+
mask.append(pred)
|
| 338 |
+
if len(mask) == 0:
|
| 339 |
+
raise ValueError(
|
| 340 |
+
_i18n("window_size_error")
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
mask = np.concatenate(mask, axis=2)
|
| 344 |
+
return mask
|
| 345 |
+
|
| 346 |
+
def postprocess(
|
| 347 |
+
mask: np.ndarray,
|
| 348 |
+
X_mag: np.ndarray,
|
| 349 |
+
X_phase: np.ndarray
|
| 350 |
+
) -> Tuple[np.ndarray, np.ndarray]:
|
| 351 |
+
is_non_accom_stem = False
|
| 352 |
+
for stem in NON_ACCOM_STEMS:
|
| 353 |
+
if stem == model.primary_stem.lower():
|
| 354 |
+
is_non_accom_stem = True
|
| 355 |
+
|
| 356 |
+
mask = spec_utils.adjust_aggr(mask, is_non_accom_stem, aggressiveness)
|
| 357 |
+
|
| 358 |
+
if model.enable_post_process:
|
| 359 |
+
mask = spec_utils.merge_artifacts(
|
| 360 |
+
mask, thres=model.post_process_threshold
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
y_spec = mask * X_mag * np.exp(1.0j * X_phase)
|
| 364 |
+
v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
|
| 365 |
+
|
| 366 |
+
return y_spec, v_spec
|
| 367 |
+
|
| 368 |
+
X_mag, X_phase = spec_utils.preprocess(X_spec)
|
| 369 |
+
n_frame = X_mag.shape[2]
|
| 370 |
+
pad_l, pad_r, roi_size = spec_utils.make_padding(
|
| 371 |
+
n_frame, model.window_size, model.model_run.offset
|
| 372 |
+
)
|
| 373 |
+
X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
|
| 374 |
+
X_mag_pad /= X_mag_pad.max()
|
| 375 |
+
mask = _execute(X_mag_pad, roi_size)
|
| 376 |
+
|
| 377 |
+
mask = mask[:, :, :n_frame]
|
| 378 |
+
|
| 379 |
+
y_spec, v_spec = postprocess(mask, X_mag, X_phase)
|
| 380 |
+
|
| 381 |
+
y_spec = np.nan_to_num(y_spec, nan=0.0, posinf=0.0, neginf=0.0)
|
| 382 |
+
v_spec = np.nan_to_num(v_spec, nan=0.0, posinf=0.0, neginf=0.0)
|
| 383 |
+
primary_stem_array = model.spec_to_wav(y_spec).T
|
| 384 |
+
primary_stem_array = librosa.resample(
|
| 385 |
+
primary_stem_array.T,
|
| 386 |
+
orig_sr=model.model_samplerate,
|
| 387 |
+
target_sr=sr,
|
| 388 |
+
).T
|
| 389 |
+
secondary_stem_array = model.spec_to_wav(v_spec).T
|
| 390 |
+
secondary_stem_array = librosa.resample(
|
| 391 |
+
secondary_stem_array.T,
|
| 392 |
+
orig_sr=model.model_samplerate,
|
| 393 |
+
target_sr=sr,
|
| 394 |
+
).T
|
| 395 |
+
return {
|
| 396 |
+
model.primary_stem: primary_stem_array,
|
| 397 |
+
model.secondary_stem: secondary_stem_array,
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
def demix_demucs(
|
| 402 |
+
config: Any,
|
| 403 |
+
model: Any,
|
| 404 |
+
mix: np.ndarray,
|
| 405 |
+
device: torch.device
|
| 406 |
+
) -> Dict[str, np.ndarray]:
|
| 407 |
+
"""
|
| 408 |
+
Демикс для Demucs
|
| 409 |
+
|
| 410 |
+
Args:
|
| 411 |
+
config: Конфигурация
|
| 412 |
+
model: Модель
|
| 413 |
+
mix: Микс
|
| 414 |
+
device: Устройство
|
| 415 |
+
|
| 416 |
+
Returns:
|
| 417 |
+
Словарь с разделенными стемами
|
| 418 |
+
"""
|
| 419 |
+
mix = torch.tensor(mix, dtype=torch.float32)
|
| 420 |
+
chunk_size = config.training.samplerate * config.training.segment
|
| 421 |
+
num_instruments = len(config.training.instruments)
|
| 422 |
+
num_overlap = config.inference.num_overlap
|
| 423 |
+
step = chunk_size // num_overlap
|
| 424 |
+
fade_size = chunk_size // 10
|
| 425 |
+
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
| 426 |
+
|
| 427 |
+
batch_size = config.inference.batch_size
|
| 428 |
+
use_amp = getattr(config.training, "use_amp", True)
|
| 429 |
+
|
| 430 |
+
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 431 |
+
with torch.inference_mode():
|
| 432 |
+
req_shape = (num_instruments,) + mix.shape
|
| 433 |
+
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 434 |
+
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 435 |
+
|
| 436 |
+
i = 0
|
| 437 |
+
batch_data = []
|
| 438 |
+
batch_locations = []
|
| 439 |
+
|
| 440 |
+
while i < mix.shape[1]:
|
| 441 |
+
part = mix[:, i : i + chunk_size].to(device)
|
| 442 |
+
chunk_len = part.shape[-1]
|
| 443 |
+
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 444 |
+
part = nn.functional.pad(
|
| 445 |
+
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
batch_data.append(part)
|
| 449 |
+
batch_locations.append((i, chunk_len))
|
| 450 |
+
i += step
|
| 451 |
+
|
| 452 |
+
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 453 |
+
arr = torch.stack(batch_data, dim=0)
|
| 454 |
+
x = model(arr)
|
| 455 |
+
|
| 456 |
+
window = windowing_array.clone()
|
| 457 |
+
if i - step == 0:
|
| 458 |
+
window[:fade_size] = 1
|
| 459 |
+
elif i >= mix.shape[1]:
|
| 460 |
+
window[-fade_size:] = 1
|
| 461 |
+
|
| 462 |
+
for j, (start, seg_len) in enumerate(batch_locations):
|
| 463 |
+
result[..., start : start + seg_len] += (
|
| 464 |
+
x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 465 |
+
)
|
| 466 |
+
counter[..., start : start + seg_len] += window[..., :seg_len]
|
| 467 |
+
|
| 468 |
+
processed = min(i, mix.shape[1])
|
| 469 |
+
total = mix.shape[1]
|
| 470 |
+
sys.stdout.write(
|
| 471 |
+
json.dumps(
|
| 472 |
+
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}}
|
| 473 |
+
)
|
| 474 |
+
+ "\n"
|
| 475 |
+
)
|
| 476 |
+
sys.stdout.flush()
|
| 477 |
+
|
| 478 |
+
batch_data.clear()
|
| 479 |
+
batch_locations.clear()
|
| 480 |
+
|
| 481 |
+
estimated_sources = result / counter
|
| 482 |
+
estimated_sources = estimated_sources.cpu().numpy()
|
| 483 |
+
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 484 |
+
|
| 485 |
+
if num_instruments <= 1:
|
| 486 |
+
return estimated_sources
|
| 487 |
+
else:
|
| 488 |
+
instruments = config.training.instruments
|
| 489 |
+
return {k: v for k, v in zip(instruments, estimated_sources)}
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def demix_generic(
|
| 493 |
+
config: ConfigDict,
|
| 494 |
+
model: torch.nn.Module,
|
| 495 |
+
mix: torch.Tensor,
|
| 496 |
+
device: torch.device,
|
| 497 |
+
) -> Dict[str, np.ndarray]:
|
| 498 |
+
"""
|
| 499 |
+
Общий демикс для большинства моделей
|
| 500 |
+
|
| 501 |
+
Args:
|
| 502 |
+
config: Конфигурация
|
| 503 |
+
model: Модель
|
| 504 |
+
mix: Микс
|
| 505 |
+
device: Устройство
|
| 506 |
+
|
| 507 |
+
Returns:
|
| 508 |
+
Словарь с разделенными стемами
|
| 509 |
+
"""
|
| 510 |
+
mix = torch.tensor(mix, dtype=torch.float32)
|
| 511 |
+
chunk_size = config.audio.chunk_size
|
| 512 |
+
instruments = prefer_target_instrument(config)
|
| 513 |
+
num_instruments = len(instruments)
|
| 514 |
+
num_overlap = config.inference.num_overlap
|
| 515 |
+
|
| 516 |
+
fade_size = chunk_size // 10
|
| 517 |
+
step = chunk_size // num_overlap
|
| 518 |
+
border = chunk_size - step
|
| 519 |
+
length_init = mix.shape[-1]
|
| 520 |
+
windowing_array = _getWindowingArray(chunk_size, fade_size)
|
| 521 |
+
|
| 522 |
+
if length_init > 2 * border and border > 0:
|
| 523 |
+
mix = nn.functional.pad(mix, (border, border), mode="reflect")
|
| 524 |
+
|
| 525 |
+
batch_size = config.inference.batch_size
|
| 526 |
+
use_amp = getattr(config.training, "use_amp", True)
|
| 527 |
+
|
| 528 |
+
with torch.cuda.amp.autocast(enabled=use_amp):
|
| 529 |
+
with torch.inference_mode():
|
| 530 |
+
req_shape = (num_instruments,) + mix.shape
|
| 531 |
+
result = torch.zeros(req_shape, dtype=torch.float32)
|
| 532 |
+
counter = torch.zeros(req_shape, dtype=torch.float32)
|
| 533 |
+
|
| 534 |
+
i = 0
|
| 535 |
+
batch_data = []
|
| 536 |
+
batch_locations = []
|
| 537 |
+
|
| 538 |
+
while i < mix.shape[1]:
|
| 539 |
+
part = mix[:, i : i + chunk_size].to(device)
|
| 540 |
+
chunk_len = part.shape[-1]
|
| 541 |
+
|
| 542 |
+
pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
|
| 543 |
+
part = nn.functional.pad(
|
| 544 |
+
part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
batch_data.append(part)
|
| 548 |
+
batch_locations.append((i, chunk_len))
|
| 549 |
+
i += step
|
| 550 |
+
|
| 551 |
+
if len(batch_data) >= batch_size or i >= mix.shape[1]:
|
| 552 |
+
arr = torch.stack(batch_data, dim=0)
|
| 553 |
+
x = model(arr)
|
| 554 |
+
|
| 555 |
+
window = windowing_array.clone()
|
| 556 |
+
if i - step == 0:
|
| 557 |
+
window[:fade_size] = 1
|
| 558 |
+
elif i >= mix.shape[1]:
|
| 559 |
+
window[-fade_size:] = 1
|
| 560 |
+
|
| 561 |
+
for j, (start, seg_len) in enumerate(batch_locations):
|
| 562 |
+
result[..., start : start + seg_len] += (
|
| 563 |
+
x[j, ..., :seg_len].cpu() * window[..., :seg_len]
|
| 564 |
+
)
|
| 565 |
+
counter[..., start : start + seg_len] += window[..., :seg_len]
|
| 566 |
+
|
| 567 |
+
processed = min(i, mix.shape[1])
|
| 568 |
+
total = mix.shape[1]
|
| 569 |
+
sys.stdout.write(
|
| 570 |
+
json.dumps(
|
| 571 |
+
{"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}},
|
| 572 |
+
ensure_ascii=False,
|
| 573 |
+
)
|
| 574 |
+
+ "\n"
|
| 575 |
+
)
|
| 576 |
+
sys.stdout.flush()
|
| 577 |
+
|
| 578 |
+
batch_data.clear()
|
| 579 |
+
batch_locations.clear()
|
| 580 |
+
|
| 581 |
+
estimated_sources = result / counter
|
| 582 |
+
estimated_sources = estimated_sources.cpu().numpy()
|
| 583 |
+
np.nan_to_num(estimated_sources, copy=False, nan=0.0)
|
| 584 |
+
|
| 585 |
+
if length_init > 2 * border and border > 0:
|
| 586 |
+
estimated_sources = estimated_sources[..., border:-border]
|
| 587 |
+
|
| 588 |
+
return {k: v for k, v in zip(instruments, estimated_sources)}
|
| 589 |
+
|
| 590 |
+
|
| 591 |
+
def demix_medley_vox(
|
| 592 |
+
config: ConfigDict,
|
| 593 |
+
model: Any,
|
| 594 |
+
mix: np.ndarray,
|
| 595 |
+
device: torch.device
|
| 596 |
+
) -> Dict[str, np.ndarray]:
|
| 597 |
+
"""
|
| 598 |
+
Демикс для Medley Vox
|
| 599 |
+
|
| 600 |
+
Args:
|
| 601 |
+
config: Конфигурация
|
| 602 |
+
model: Модель
|
| 603 |
+
mix: Микс
|
| 604 |
+
device: Устройство
|
| 605 |
+
|
| 606 |
+
Returns:
|
| 607 |
+
Словарь с разделенными стемами
|
| 608 |
+
"""
|
| 609 |
+
import pyloudnorm as pyln
|
| 610 |
+
from models.medley_vox.loudness_utils import loudnorm, db2linear
|
| 611 |
+
stems: List[str] = config.training.instruments
|
| 612 |
+
|
| 613 |
+
# Корректная обработка входного аудио
|
| 614 |
+
original_shape = mix.shape
|
| 615 |
+
|
| 616 |
+
# Проверяем размерность и приводим к формату [каналы, сэмплы]
|
| 617 |
+
if mix.ndim == 1: # Моно
|
| 618 |
+
mix = np.expand_dims(mix, axis=0) # [1, samples]
|
| 619 |
+
num_channels = 1
|
| 620 |
+
elif mix.ndim == 2:
|
| 621 |
+
if mix.shape[0] <= mix.shape[1]: # Предполагаем [каналы, сэмплы]
|
| 622 |
+
num_channels = mix.shape[0]
|
| 623 |
+
else: # Вероятно [сэмплы, каналы]
|
| 624 |
+
mix = mix.T # Транспонируем в [каналы, сэмплы]
|
| 625 |
+
num_channels = mix.shape[0]
|
| 626 |
+
|
| 627 |
+
# Параметры обработки
|
| 628 |
+
samplerate = config.model.sample_rate
|
| 629 |
+
segment_sec = config.model.seq_dur
|
| 630 |
+
chunk_size = int(samplerate * segment_sec)
|
| 631 |
+
overlap = config.inference.num_overlap
|
| 632 |
+
step = chunk_size // overlap
|
| 633 |
+
fade_size = chunk_size // 10
|
| 634 |
+
|
| 635 |
+
# Нормализация громкости всего микса
|
| 636 |
+
meter = pyln.Meter(model.sample_rate)
|
| 637 |
+
try:
|
| 638 |
+
# loudnorm ожидает [samples, channels] или [samples]
|
| 639 |
+
# Преобразуем для loudnorm
|
| 640 |
+
if num_channels > 1:
|
| 641 |
+
mix_for_loudnorm = mix.T # [samples, channels]
|
| 642 |
+
else:
|
| 643 |
+
mix_for_loudnorm = mix[0] # [samples]
|
| 644 |
+
|
| 645 |
+
mixture_d, adjusted_gain = loudnorm(mix_for_loudnorm, -24.0, meter)
|
| 646 |
+
|
| 647 |
+
# Преобразуем обратно в [channels, samples]
|
| 648 |
+
if num_channels > 1:
|
| 649 |
+
if isinstance(mixture_d, np.ndarray) and mixture_d.ndim == 2:
|
| 650 |
+
mixture_d = mixture_d.T # [channels, samples]
|
| 651 |
+
else:
|
| 652 |
+
# Если вернулось моно, дублируем на все каналы
|
| 653 |
+
mixture_d = np.tile(mixture_d, (num_channels, 1))
|
| 654 |
+
else:
|
| 655 |
+
if mixture_d.ndim == 1:
|
| 656 |
+
mixture_d = mixture_d.reshape(1, -1)
|
| 657 |
+
|
| 658 |
+
except Exception as e:
|
| 659 |
+
print(_i18n("loudnorm_error", error=str(e)))
|
| 660 |
+
# Альтернативный подход - нормализация вручную
|
| 661 |
+
mixture_d = mix.copy()
|
| 662 |
+
rms = np.sqrt(np.mean(mix**2))
|
| 663 |
+
target_rms = 0.1 # -20 dB примерно
|
| 664 |
+
if rms > 0:
|
| 665 |
+
adjusted_gain = 20 * np.log10(target_rms / rms)
|
| 666 |
+
mixture_d = mix * (target_rms / rms)
|
| 667 |
+
else:
|
| 668 |
+
adjusted_gain = 0
|
| 669 |
+
|
| 670 |
+
length_init = mixture_d.shape[1]
|
| 671 |
+
|
| 672 |
+
# Подготавливаем окна для каждого стема
|
| 673 |
+
windowing_array = _getWindowingArray(chunk_size, fade_size).to(device)
|
| 674 |
+
|
| 675 |
+
# Результирующие массивы для каждого стема [каналы, сэмплы]
|
| 676 |
+
result_stems = {stem: np.zeros((num_channels, length_init), dtype=np.float32)
|
| 677 |
+
for stem in stems}
|
| 678 |
+
|
| 679 |
+
# Преобразуем микс в тензор [каналы, сэмплы]
|
| 680 |
+
mix_tensor = torch.tensor(mixture_d, dtype=torch.float32).to(device)
|
| 681 |
+
|
| 682 |
+
# Счетчики для каждого стема [каналы, сэмплы]
|
| 683 |
+
counters = {stem: torch.zeros((num_channels, length_init), dtype=torch.float32, device=device)
|
| 684 |
+
for stem in stems}
|
| 685 |
+
|
| 686 |
+
i = 0
|
| 687 |
+
while i < length_init:
|
| 688 |
+
# Берем чанк для всех каналов одновременно [каналы, chunk_size]
|
| 689 |
+
end_idx = min(i + chunk_size, length_init)
|
| 690 |
+
chunk = mix_tensor[:, i:end_idx]
|
| 691 |
+
cur_chunk_len = chunk.shape[1]
|
| 692 |
+
|
| 693 |
+
# Создаем тензор для результатов этого чанка [каналы, 2, cur_chunk_len]
|
| 694 |
+
chunk_results = torch.zeros((num_channels, 2, cur_chunk_len), dtype=torch.float32, device=device)
|
| 695 |
+
|
| 696 |
+
# Обрабатываем каждый канал отдельно для этого чанка
|
| 697 |
+
for ch in range(num_channels):
|
| 698 |
+
# Берем один канал [1, cur_chunk_len]
|
| 699 |
+
channel_chunk = chunk[ch:ch+1, :]
|
| 700 |
+
|
| 701 |
+
# Паддинг если нужно
|
| 702 |
+
if cur_chunk_len < chunk_size:
|
| 703 |
+
pad_len = chunk_size - cur_chunk_len
|
| 704 |
+
channel_chunk = torch.nn.functional.pad(
|
| 705 |
+
channel_chunk, (0, pad_len), mode='constant', value=0
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
+
# Добавляем batch dimension [1, 1, chunk_size]
|
| 709 |
+
channel_chunk = channel_chunk.unsqueeze(0)
|
| 710 |
+
|
| 711 |
+
with torch.no_grad():
|
| 712 |
+
# Модель возвращает [1, 2, chunk_size]
|
| 713 |
+
out_chunk = model.separate(channel_chunk)
|
| 714 |
+
|
| 715 |
+
# Сохраняем результат для этого канала (обрезаем паддинг)
|
| 716 |
+
chunk_results[ch, :, :cur_chunk_len] = out_chunk[0, :, :cur_chunk_len].cpu()
|
| 717 |
+
|
| 718 |
+
# Применяем окно
|
| 719 |
+
window = windowing_array[:cur_chunk_len].clone()
|
| 720 |
+
if i == 0:
|
| 721 |
+
window[:fade_size] = 1
|
| 722 |
+
if end_idx >= length_init:
|
| 723 |
+
window[-fade_size:] = 1
|
| 724 |
+
|
| 725 |
+
# Добавляем результаты в общие массивы
|
| 726 |
+
for stem_idx, stem in enumerate(stems):
|
| 727 |
+
result_stems[stem][:, i:end_idx] += chunk_results[:, stem_idx, :].cpu().numpy() * window.cpu().numpy()
|
| 728 |
+
counters[stem][:, i:end_idx] += window
|
| 729 |
+
|
| 730 |
+
i += step
|
| 731 |
+
|
| 732 |
+
progress_data = {
|
| 733 |
+
"processing": {
|
| 734 |
+
"processed": min(end_idx, length_init),
|
| 735 |
+
"total": length_init,
|
| 736 |
+
"unit": _i18n("unit_samples"),
|
| 737 |
+
}
|
| 738 |
+
}
|
| 739 |
+
sys.stdout.write(json.dumps(progress_data, ensure_ascii=False) + "\n")
|
| 740 |
+
sys.stdout.flush()
|
| 741 |
+
|
| 742 |
+
# Нормализация результатов делением на счетчик
|
| 743 |
+
for stem in stems:
|
| 744 |
+
counters_np = counters[stem].cpu().numpy()
|
| 745 |
+
# Избегаем деления на ноль
|
| 746 |
+
mask = counters_np > 0
|
| 747 |
+
result_stems[stem][mask] /= counters_np[mask]
|
| 748 |
+
|
| 749 |
+
# Применяем обратную нормализацию громкости
|
| 750 |
+
result_stems[stem] = result_stems[stem] * db2linear(-adjusted_gain)
|
| 751 |
+
|
| 752 |
+
return result_stems
|
| 753 |
+
|
| 754 |
+
|
| 755 |
+
def demix(
|
| 756 |
+
config: ConfigDict,
|
| 757 |
+
model: torch.nn.Module,
|
| 758 |
+
mix: np.ndarray,
|
| 759 |
+
device: torch.device,
|
| 760 |
+
model_type: str,
|
| 761 |
+
) -> Dict[str, np.ndarray]:
|
| 762 |
+
"""
|
| 763 |
+
Основная функция демикса, выбирает подходящий метод в зависимости от типа модели
|
| 764 |
+
|
| 765 |
+
Args:
|
| 766 |
+
config: Конфигурация
|
| 767 |
+
model: Модель
|
| 768 |
+
mix: Микс
|
| 769 |
+
device: Устройство
|
| 770 |
+
model_type: Тип модели
|
| 771 |
+
|
| 772 |
+
Returns:
|
| 773 |
+
Словарь с разделенными стемами
|
| 774 |
+
"""
|
| 775 |
+
if model_type == "vr":
|
| 776 |
+
return demix_vr(config, model, mix, device)
|
| 777 |
+
elif model_type == "mdxnet":
|
| 778 |
+
return demix_mdxnet(config, model, mix, device)
|
| 779 |
+
elif model_type == "htdemucs":
|
| 780 |
+
return demix_demucs(config, model, mix, device)
|
| 781 |
+
elif model_type == "medley_vox":
|
| 782 |
+
return demix_medley_vox(config, model, mix, device)
|
| 783 |
+
else:
|
| 784 |
+
return demix_generic(config, model, mix, device)
|
| 785 |
+
|
| 786 |
+
|
| 787 |
+
def prefer_target_instrument(config: ConfigDict) -> List[str]:
|
| 788 |
+
"""
|
| 789 |
+
Получить предпочтительный инструмент из конфигурации
|
| 790 |
+
|
| 791 |
+
Args:
|
| 792 |
+
config: Конфигурация
|
| 793 |
+
|
| 794 |
+
Returns:
|
| 795 |
+
Список инструментов
|
| 796 |
+
"""
|
| 797 |
+
if config.training.get("target_instrument"):
|
| 798 |
+
return [config.training.target_instrument]
|
| 799 |
+
else:
|
| 800 |
+
return config.training.instruments
|
| 801 |
+
|
| 802 |
+
|
| 803 |
+
def prefer_target_instrument_test(
|
| 804 |
+
config: ConfigDict, selected_instruments: Optional[List[str]] = None
|
| 805 |
+
) -> List[str]:
|
| 806 |
+
"""
|
| 807 |
+
Получить предпочтительный инструмент для тестирования
|
| 808 |
+
|
| 809 |
+
Args:
|
| 810 |
+
config: Конфигурация
|
| 811 |
+
selected_instruments: Выбранные инструменты
|
| 812 |
+
|
| 813 |
+
Returns:
|
| 814 |
+
Список инструментов
|
| 815 |
+
"""
|
| 816 |
+
available_instruments = config.training.instruments
|
| 817 |
+
|
| 818 |
+
if selected_instruments is not None:
|
| 819 |
+
return [
|
| 820 |
+
instr for instr in selected_instruments if instr in available_instruments
|
| 821 |
+
]
|
| 822 |
+
elif config.training.get("target_instrument"):
|
| 823 |
+
return [config.training.target_instrument]
|
| 824 |
+
else:
|
| 825 |
return available_instruments
|
mvsepless/install.py
CHANGED
|
@@ -1,356 +1,356 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import subprocess
|
| 3 |
-
import argparse
|
| 4 |
-
import re
|
| 5 |
-
import sys
|
| 6 |
-
from typing import List, Optional, Tuple, Union
|
| 7 |
-
from i18n import _i18n
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
def get_latest_version(package_name: str, index_url: Optional[str] = None) -> Optional[str]:
|
| 11 |
-
"""
|
| 12 |
-
Получает последнюю версию пакета из вывода pip index versions
|
| 13 |
-
|
| 14 |
-
Args:
|
| 15 |
-
package_name: Имя пакета
|
| 16 |
-
index_url: URL индекса пакетов
|
| 17 |
-
|
| 18 |
-
Returns:
|
| 19 |
-
Последняя версия пакета или None
|
| 20 |
-
"""
|
| 21 |
-
cmd = [sys.executable, "-m", "pip", "index", "versions", package_name]
|
| 22 |
-
if index_url:
|
| 23 |
-
cmd.extend(["--index-url", index_url])
|
| 24 |
-
|
| 25 |
-
try:
|
| 26 |
-
result = subprocess.run(
|
| 27 |
-
cmd,
|
| 28 |
-
capture_output=True,
|
| 29 |
-
text=True,
|
| 30 |
-
check=False # Не вызываем исключение при ошибке
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
-
if result.returncode != 0:
|
| 34 |
-
print(_i18n("pip_index_warning", code=result.returncode))
|
| 35 |
-
print(f"stderr: {result.stderr}")
|
| 36 |
-
return None
|
| 37 |
-
|
| 38 |
-
except Exception as e:
|
| 39 |
-
print(_i18n("pip_index_error", error=str(e)))
|
| 40 |
-
return None
|
| 41 |
-
|
| 42 |
-
def parse_version_from_output(pip_output: str) -> Optional[str]:
|
| 43 |
-
"""
|
| 44 |
-
Парсит версию из вывода pip
|
| 45 |
-
|
| 46 |
-
Args:
|
| 47 |
-
pip_output: Вывод pip
|
| 48 |
-
|
| 49 |
-
Returns:
|
| 50 |
-
Версия пакета или None
|
| 51 |
-
"""
|
| 52 |
-
if not pip_output:
|
| 53 |
-
return None
|
| 54 |
-
|
| 55 |
-
lines = pip_output.split('\n')
|
| 56 |
-
|
| 57 |
-
# Способ 1: Парсим первую строку
|
| 58 |
-
if lines and lines[0].strip():
|
| 59 |
-
first_line = lines[0].strip()
|
| 60 |
-
|
| 61 |
-
# Версия в скобках (приоритетный способ)
|
| 62 |
-
match = re.search(r'\(([^)]+)\)', first_line)
|
| 63 |
-
if match:
|
| 64 |
-
version = match.group(1).strip()
|
| 65 |
-
return version
|
| 66 |
-
|
| 67 |
-
# Версия после пробела
|
| 68 |
-
match = re.search(r'\S+\s+([^\s]+)', first_line)
|
| 69 |
-
if match:
|
| 70 |
-
version = match.group(1).strip()
|
| 71 |
-
# Проверяем, что это похоже на версию (содержит цифры)
|
| 72 |
-
if re.search(r'\d', version):
|
| 73 |
-
return version
|
| 74 |
-
|
| 75 |
-
# Способ 2: Ищем "Available versions:" и берем первую версию
|
| 76 |
-
for i, line in enumerate(lines):
|
| 77 |
-
if 'Available versions:' in line:
|
| 78 |
-
# Проверяем следующие несколько строк на наличие версий
|
| 79 |
-
for j in range(1, 4): # Проверяем следующие 3 строки
|
| 80 |
-
if i + j < len(lines):
|
| 81 |
-
versions_line = lines[i + j].strip()
|
| 82 |
-
if versions_line:
|
| 83 |
-
# Разделяем по запятой и берем первую версию
|
| 84 |
-
versions = [v.strip() for v in versions_line.split(',') if v.strip()]
|
| 85 |
-
if versions:
|
| 86 |
-
return versions[0]
|
| 87 |
-
break
|
| 88 |
-
|
| 89 |
-
return None
|
| 90 |
-
|
| 91 |
-
latest_version = parse_version_from_output(result.stdout)
|
| 92 |
-
|
| 93 |
-
print(_i18n("version_retrieved", package=package_name, version=latest_version or _i18n("unknown")))
|
| 94 |
-
|
| 95 |
-
return latest_version
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
def fno_compitable(index_url: Optional[str] = None) -> bool:
|
| 99 |
-
"""
|
| 100 |
-
Проверяет совместимость с FNO (Fourier Neural Operator)
|
| 101 |
-
|
| 102 |
-
Args:
|
| 103 |
-
index_url: URL индекса пакетов
|
| 104 |
-
|
| 105 |
-
Returns:
|
| 106 |
-
True если совместимо
|
| 107 |
-
"""
|
| 108 |
-
is_torch_2 = False
|
| 109 |
-
fno_c = False
|
| 110 |
-
latest_version_torch = get_latest_version("torch", index_url)
|
| 111 |
-
|
| 112 |
-
if not latest_version_torch:
|
| 113 |
-
print(_i18n("torch_version_not_found"))
|
| 114 |
-
return False
|
| 115 |
-
|
| 116 |
-
lvt = latest_version_torch.split(".")
|
| 117 |
-
lvt = [int(n_) for n_ in lvt if n_.isdigit()]
|
| 118 |
-
|
| 119 |
-
for i, num in enumerate(lvt, start=1):
|
| 120 |
-
if i == 1:
|
| 121 |
-
if num == 2:
|
| 122 |
-
is_torch_2 = True
|
| 123 |
-
elif i == 2:
|
| 124 |
-
if num >= 4 and is_torch_2:
|
| 125 |
-
fno_c = True
|
| 126 |
-
|
| 127 |
-
return fno_c
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
def is_nvidia_gpu_present() -> bool:
|
| 131 |
-
"""
|
| 132 |
-
Проверяет наличие NVIDIA GPU в системе
|
| 133 |
-
|
| 134 |
-
Returns:
|
| 135 |
-
True если GPU обнаружен
|
| 136 |
-
"""
|
| 137 |
-
try:
|
| 138 |
-
# Пытаемся выполнить команду nvidia-smi
|
| 139 |
-
result = subprocess.run(
|
| 140 |
-
['nvidia-smi'],
|
| 141 |
-
stdout=subprocess.PIPE,
|
| 142 |
-
stderr=subprocess.PIPE,
|
| 143 |
-
text=True,
|
| 144 |
-
check=False # Не поднимаем исключение при ошибке
|
| 145 |
-
)
|
| 146 |
-
|
| 147 |
-
# Если код возврата 0 — ко
|
| 148 |
-
if result.returncode == 0:
|
| 149 |
-
print(_i18n("nvidia_gpu_detected"))
|
| 150 |
-
return True
|
| 151 |
-
else:
|
| 152 |
-
print(_i18n("nvidia_smi_error"))
|
| 153 |
-
return False
|
| 154 |
-
|
| 155 |
-
except FileNotFoundError:
|
| 156 |
-
# Команда nvidia-smi не найдена в системе
|
| 157 |
-
print(_i18n("nvidia_smi_not_found"))
|
| 158 |
-
return False
|
| 159 |
-
except Exception as e:
|
| 160 |
-
print(_i18n("nvidia_check_error", error=str(e)))
|
| 161 |
-
return False
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
cuda_available: bool = is_nvidia_gpu_present()
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
def install_uv() -> None:
|
| 168 |
-
"""Устанавливает uv - быстрый установщик пакетов Python"""
|
| 169 |
-
print(_i18n("installing_uv"))
|
| 170 |
-
result = subprocess.run([sys.executable, "-m", "pip", "install", "uv"])
|
| 171 |
-
if result.returncode == 0:
|
| 172 |
-
print(_i18n("uv_installed"))
|
| 173 |
-
else:
|
| 174 |
-
print(_i18n("uv_install_error"))
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
def install_requirements(requirements: List[str], force: bool = False, index_url: Optional[str] = None) -> None:
|
| 178 |
-
"""
|
| 179 |
-
Устанавливает зависимости
|
| 180 |
-
|
| 181 |
-
Args:
|
| 182 |
-
requirements: Список зависимостей
|
| 183 |
-
force: Принудительная установка
|
| 184 |
-
index_url: URL индекса пакетов
|
| 185 |
-
"""
|
| 186 |
-
if not requirements:
|
| 187 |
-
return
|
| 188 |
-
|
| 189 |
-
cmd = [sys.executable, "-m", "uv", "pip", "install", "--no-cache-dir", "-qq"]
|
| 190 |
-
|
| 191 |
-
if force:
|
| 192 |
-
cmd.append("--upgrade")
|
| 193 |
-
cmd.append("--force-reinstall")
|
| 194 |
-
|
| 195 |
-
if index_url:
|
| 196 |
-
cmd.extend(["--index-url", index_url])
|
| 197 |
-
|
| 198 |
-
for pkg in requirements:
|
| 199 |
-
cmd.append(pkg)
|
| 200 |
-
|
| 201 |
-
result = subprocess.run(cmd)
|
| 202 |
-
|
| 203 |
-
if result.returncode != 0:
|
| 204 |
-
print(_i18n("requirements_install_error", count=len(requirements)))
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
# Списки зависимостей
|
| 208 |
-
torch_requirements: List[str] = [
|
| 209 |
-
"torch",
|
| 210 |
-
"torchvision",
|
| 211 |
-
"torchaudio",
|
| 212 |
-
"torchcrepe",
|
| 213 |
-
]
|
| 214 |
-
|
| 215 |
-
universal_requirements: List[str] = [
|
| 216 |
-
"numpy==2.0.2",
|
| 217 |
-
"pandas",
|
| 218 |
-
"scipy",
|
| 219 |
-
"librosa",
|
| 220 |
-
"samplerate==0.1.0",
|
| 221 |
-
"matplotlib",
|
| 222 |
-
"tqdm",
|
| 223 |
-
"einops",
|
| 224 |
-
"protobuf",
|
| 225 |
-
"soundfile",
|
| 226 |
-
"pydub",
|
| 227 |
-
"webrtcvad",
|
| 228 |
-
"audiomentations",
|
| 229 |
-
"pedalboard",
|
| 230 |
-
"ml_collections",
|
| 231 |
-
"timm",
|
| 232 |
-
"wandb",
|
| 233 |
-
"accelerate",
|
| 234 |
-
"bitsandbytes",
|
| 235 |
-
"tokenizers",
|
| 236 |
-
"huggingface-hub",
|
| 237 |
-
"transformers",
|
| 238 |
-
"torchseg",
|
| 239 |
-
"demucs==4.0.0",
|
| 240 |
-
"asteroid>=0.6.0",
|
| 241 |
-
"pyloudnorm",
|
| 242 |
-
"prodigyopt",
|
| 243 |
-
"torch_log_wmse",
|
| 244 |
-
"rotary_embedding_torch",
|
| 245 |
-
"gradio<6.0",
|
| 246 |
-
"omegaconf",
|
| 247 |
-
"beartype",
|
| 248 |
-
"spafe",
|
| 249 |
-
"torch_audiomentations",
|
| 250 |
-
"auraloss",
|
| 251 |
-
"onnx>=1.17",
|
| 252 |
-
"onnx2torch>=0.3.0",
|
| 253 |
-
"onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
|
| 254 |
-
"ml_dtypes",
|
| 255 |
-
"resampy",
|
| 256 |
-
"yt_dlp",
|
| 257 |
-
"pyngrok",
|
| 258 |
-
"praat-parselmouth",
|
| 259 |
-
"faiss-cpu==1.11",
|
| 260 |
-
"local-attention",
|
| 261 |
-
"tenacity",
|
| 262 |
-
"pyworld",
|
| 263 |
-
"gdown"
|
| 264 |
-
]
|
| 265 |
-
|
| 266 |
-
torch_old_requirements: List[str] = [
|
| 267 |
-
"torch==1.13.1",
|
| 268 |
-
"torchvision==0.14.1",
|
| 269 |
-
"torchaudio==0.13.1",
|
| 270 |
-
"torchcrepe==0.0.24",
|
| 271 |
-
]
|
| 272 |
-
|
| 273 |
-
old_requirements: List[str] = [
|
| 274 |
-
"numpy==1.26.4",
|
| 275 |
-
"pandas==2.3.3",
|
| 276 |
-
"scipy==1.15.3",
|
| 277 |
-
"librosa==0.11.0",
|
| 278 |
-
"samplerate==0.1.0",
|
| 279 |
-
"matplotlib==3.10.8",
|
| 280 |
-
"tqdm==4.67.1",
|
| 281 |
-
"einops==0.8.1",
|
| 282 |
-
"protobuf==6.33.4",
|
| 283 |
-
"soundfile==0.13.1",
|
| 284 |
-
"pydub==0.25.1",
|
| 285 |
-
"webrtcvad==2.0.10",
|
| 286 |
-
"audiomentations==0.43.1",
|
| 287 |
-
"pedalboard==0.8.2",
|
| 288 |
-
"ml_collections==1.1.0",
|
| 289 |
-
"timm==1.0.24",
|
| 290 |
-
"wandb==0.24.0",
|
| 291 |
-
"accelerate==1.2.1",
|
| 292 |
-
"bitsandbytes==0.45.0",
|
| 293 |
-
"tokenizers==0.15.2",
|
| 294 |
-
"huggingface-hub==0.34.2",
|
| 295 |
-
"transformers==4.39.3",
|
| 296 |
-
"torchseg==0.0.1a4",
|
| 297 |
-
"demucs==4.0.0",
|
| 298 |
-
"asteroid==0.6.0",
|
| 299 |
-
"pyloudnorm",
|
| 300 |
-
"prodigyopt==1.1.2",
|
| 301 |
-
"rotary_embedding_torch==0.3.6",
|
| 302 |
-
"gradio<6.0.0",
|
| 303 |
-
"omegaconf==2.3.0",
|
| 304 |
-
"beartype==0.22.9",
|
| 305 |
-
"spafe==0.3.3",
|
| 306 |
-
"torch_audiomentations==0.12.0",
|
| 307 |
-
"auraloss==0.4.0",
|
| 308 |
-
"onnx>=1.17",
|
| 309 |
-
"onnx2torch>=0.3.0",
|
| 310 |
-
"onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
|
| 311 |
-
"ml_dtypes==0.5.4",
|
| 312 |
-
"resampy==0.4.3",
|
| 313 |
-
"yt_dlp",
|
| 314 |
-
"pyngrok",
|
| 315 |
-
"praat-parselmouth==0.4.7",
|
| 316 |
-
"faiss-cpu==1.7.2",
|
| 317 |
-
"local-attention==1.10.0",
|
| 318 |
-
"tenacity==9.1.2",
|
| 319 |
-
"pyworld==0.3.5",
|
| 320 |
-
"gdown"
|
| 321 |
-
]
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
if __name__ == "__main__":
|
| 325 |
-
parser = argparse.ArgumentParser(description=_i18n("installer_description"))
|
| 326 |
-
parser.add_argument("--old", action="store_true", help=_i18n("old_deps_help"))
|
| 327 |
-
parser.add_argument("--force", action="store_true", help=_i18n("force_install_help"))
|
| 328 |
-
parser.add_argument("--index_url", type=str, default=None, help=_i18n("index_url_help"))
|
| 329 |
-
args = parser.parse_args()
|
| 330 |
-
|
| 331 |
-
if args.old:
|
| 332 |
-
torch_reqs = torch_old_requirements
|
| 333 |
-
reqs = old_requirements
|
| 334 |
-
print(_i18n("installing_old_deps"))
|
| 335 |
-
else:
|
| 336 |
-
torch_reqs = torch_requirements
|
| 337 |
-
reqs = universal_requirements
|
| 338 |
-
if fno_compitable(args.index_url):
|
| 339 |
-
reqs.append("neuraloperator==1.0.2")
|
| 340 |
-
print(_i18n("fno_compatible_detected"))
|
| 341 |
-
|
| 342 |
-
if args.force:
|
| 343 |
-
print(_i18n("force_install_warning"))
|
| 344 |
-
|
| 345 |
-
install_uv()
|
| 346 |
-
|
| 347 |
-
print(_i18n("installing_torch"))
|
| 348 |
-
install_requirements(torch_reqs, force=args.force, index_url=args.index_url)
|
| 349 |
-
|
| 350 |
-
print(_i18n("installing_other_deps"))
|
| 351 |
-
install_requirements(reqs, force=args.force)
|
| 352 |
-
|
| 353 |
-
print(_i18n("installing_setuptools"))
|
| 354 |
-
install_requirements(["setuptools<76.0"], force=True)
|
| 355 |
-
|
| 356 |
print(_i18n("installation_complete"))
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
import argparse
|
| 4 |
+
import re
|
| 5 |
+
import sys
|
| 6 |
+
from typing import List, Optional, Tuple, Union
|
| 7 |
+
from i18n import _i18n
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def get_latest_version(package_name: str, index_url: Optional[str] = None) -> Optional[str]:
|
| 11 |
+
"""
|
| 12 |
+
Получает последнюю версию пакета из вывода pip index versions
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
package_name: Имя пакета
|
| 16 |
+
index_url: URL индекса пакетов
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Последняя версия пакета или None
|
| 20 |
+
"""
|
| 21 |
+
cmd = [sys.executable, "-m", "pip", "index", "versions", package_name]
|
| 22 |
+
if index_url:
|
| 23 |
+
cmd.extend(["--index-url", index_url])
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
result = subprocess.run(
|
| 27 |
+
cmd,
|
| 28 |
+
capture_output=True,
|
| 29 |
+
text=True,
|
| 30 |
+
check=False # Не вызываем исключение при ошибке
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
if result.returncode != 0:
|
| 34 |
+
print(_i18n("pip_index_warning", code=result.returncode))
|
| 35 |
+
print(f"stderr: {result.stderr}")
|
| 36 |
+
return None
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(_i18n("pip_index_error", error=str(e)))
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
def parse_version_from_output(pip_output: str) -> Optional[str]:
|
| 43 |
+
"""
|
| 44 |
+
Парсит версию из вывода pip
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
pip_output: Вывод pip
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
Версия пакета или None
|
| 51 |
+
"""
|
| 52 |
+
if not pip_output:
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
lines = pip_output.split('\n')
|
| 56 |
+
|
| 57 |
+
# Способ 1: Парсим первую строку
|
| 58 |
+
if lines and lines[0].strip():
|
| 59 |
+
first_line = lines[0].strip()
|
| 60 |
+
|
| 61 |
+
# Версия в скобках (приоритетный способ)
|
| 62 |
+
match = re.search(r'\(([^)]+)\)', first_line)
|
| 63 |
+
if match:
|
| 64 |
+
version = match.group(1).strip()
|
| 65 |
+
return version
|
| 66 |
+
|
| 67 |
+
# Версия после пробела
|
| 68 |
+
match = re.search(r'\S+\s+([^\s]+)', first_line)
|
| 69 |
+
if match:
|
| 70 |
+
version = match.group(1).strip()
|
| 71 |
+
# Проверяем, что это похоже на версию (содержит цифры)
|
| 72 |
+
if re.search(r'\d', version):
|
| 73 |
+
return version
|
| 74 |
+
|
| 75 |
+
# Способ 2: Ищем "Available versions:" и берем первую версию
|
| 76 |
+
for i, line in enumerate(lines):
|
| 77 |
+
if 'Available versions:' in line:
|
| 78 |
+
# Проверяем следующие несколько строк на наличие версий
|
| 79 |
+
for j in range(1, 4): # Проверяем следующие 3 строки
|
| 80 |
+
if i + j < len(lines):
|
| 81 |
+
versions_line = lines[i + j].strip()
|
| 82 |
+
if versions_line:
|
| 83 |
+
# Разделяем по запятой и берем первую версию
|
| 84 |
+
versions = [v.strip() for v in versions_line.split(',') if v.strip()]
|
| 85 |
+
if versions:
|
| 86 |
+
return versions[0]
|
| 87 |
+
break
|
| 88 |
+
|
| 89 |
+
return None
|
| 90 |
+
|
| 91 |
+
latest_version = parse_version_from_output(result.stdout)
|
| 92 |
+
|
| 93 |
+
print(_i18n("version_retrieved", package=package_name, version=latest_version or _i18n("unknown")))
|
| 94 |
+
|
| 95 |
+
return latest_version
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def fno_compitable(index_url: Optional[str] = None) -> bool:
|
| 99 |
+
"""
|
| 100 |
+
Проверяет совместимость с FNO (Fourier Neural Operator)
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
index_url: URL индекса пакетов
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
True если совместимо
|
| 107 |
+
"""
|
| 108 |
+
is_torch_2 = False
|
| 109 |
+
fno_c = False
|
| 110 |
+
latest_version_torch = get_latest_version("torch", index_url)
|
| 111 |
+
|
| 112 |
+
if not latest_version_torch:
|
| 113 |
+
print(_i18n("torch_version_not_found"))
|
| 114 |
+
return False
|
| 115 |
+
|
| 116 |
+
lvt = latest_version_torch.split(".")
|
| 117 |
+
lvt = [int(n_) for n_ in lvt if n_.isdigit()]
|
| 118 |
+
|
| 119 |
+
for i, num in enumerate(lvt, start=1):
|
| 120 |
+
if i == 1:
|
| 121 |
+
if num == 2:
|
| 122 |
+
is_torch_2 = True
|
| 123 |
+
elif i == 2:
|
| 124 |
+
if num >= 4 and is_torch_2:
|
| 125 |
+
fno_c = True
|
| 126 |
+
|
| 127 |
+
return fno_c
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def is_nvidia_gpu_present() -> bool:
|
| 131 |
+
"""
|
| 132 |
+
Проверяет наличие NVIDIA GPU в системе
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
True если GPU обнаружен
|
| 136 |
+
"""
|
| 137 |
+
try:
|
| 138 |
+
# Пытаемся выполнить команду nvidia-smi
|
| 139 |
+
result = subprocess.run(
|
| 140 |
+
['nvidia-smi'],
|
| 141 |
+
stdout=subprocess.PIPE,
|
| 142 |
+
stderr=subprocess.PIPE,
|
| 143 |
+
text=True,
|
| 144 |
+
check=False # Не поднимаем исключение при ошибке
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Если код возврата 0 — команда выполнилась успешно
|
| 148 |
+
if result.returncode == 0:
|
| 149 |
+
print(_i18n("nvidia_gpu_detected"))
|
| 150 |
+
return True
|
| 151 |
+
else:
|
| 152 |
+
print(_i18n("nvidia_smi_error"))
|
| 153 |
+
return False
|
| 154 |
+
|
| 155 |
+
except FileNotFoundError:
|
| 156 |
+
# Команда nvidia-smi не найдена в системе
|
| 157 |
+
print(_i18n("nvidia_smi_not_found"))
|
| 158 |
+
return False
|
| 159 |
+
except Exception as e:
|
| 160 |
+
print(_i18n("nvidia_check_error", error=str(e)))
|
| 161 |
+
return False
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
cuda_available: bool = is_nvidia_gpu_present()
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def install_uv() -> None:
|
| 168 |
+
"""Устанавливает uv - быстрый установщик пакетов Python"""
|
| 169 |
+
print(_i18n("installing_uv"))
|
| 170 |
+
result = subprocess.run([sys.executable, "-m", "pip", "install", "uv"])
|
| 171 |
+
if result.returncode == 0:
|
| 172 |
+
print(_i18n("uv_installed"))
|
| 173 |
+
else:
|
| 174 |
+
print(_i18n("uv_install_error"))
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def install_requirements(requirements: List[str], force: bool = False, index_url: Optional[str] = None) -> None:
|
| 178 |
+
"""
|
| 179 |
+
Устанавливает зависимости
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
requirements: Список зависимостей
|
| 183 |
+
force: Принудительная установка
|
| 184 |
+
index_url: URL индекса пакетов
|
| 185 |
+
"""
|
| 186 |
+
if not requirements:
|
| 187 |
+
return
|
| 188 |
+
|
| 189 |
+
cmd = [sys.executable, "-m", "uv", "pip", "install", "--no-cache-dir", "-qq"]
|
| 190 |
+
|
| 191 |
+
if force:
|
| 192 |
+
cmd.append("--upgrade")
|
| 193 |
+
cmd.append("--force-reinstall")
|
| 194 |
+
|
| 195 |
+
if index_url:
|
| 196 |
+
cmd.extend(["--index-url", index_url])
|
| 197 |
+
|
| 198 |
+
for pkg in requirements:
|
| 199 |
+
cmd.append(pkg)
|
| 200 |
+
|
| 201 |
+
result = subprocess.run(cmd)
|
| 202 |
+
|
| 203 |
+
if result.returncode != 0:
|
| 204 |
+
print(_i18n("requirements_install_error", count=len(requirements)))
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
# Списки зависимостей
|
| 208 |
+
torch_requirements: List[str] = [
|
| 209 |
+
"torch",
|
| 210 |
+
"torchvision",
|
| 211 |
+
"torchaudio",
|
| 212 |
+
"torchcrepe",
|
| 213 |
+
]
|
| 214 |
+
|
| 215 |
+
universal_requirements: List[str] = [
|
| 216 |
+
"numpy==2.0.2",
|
| 217 |
+
"pandas",
|
| 218 |
+
"scipy",
|
| 219 |
+
"librosa",
|
| 220 |
+
"samplerate==0.1.0",
|
| 221 |
+
"matplotlib",
|
| 222 |
+
"tqdm",
|
| 223 |
+
"einops",
|
| 224 |
+
"protobuf",
|
| 225 |
+
"soundfile",
|
| 226 |
+
"pydub",
|
| 227 |
+
"webrtcvad",
|
| 228 |
+
"audiomentations",
|
| 229 |
+
"pedalboard",
|
| 230 |
+
"ml_collections",
|
| 231 |
+
"timm",
|
| 232 |
+
"wandb",
|
| 233 |
+
"accelerate",
|
| 234 |
+
"bitsandbytes",
|
| 235 |
+
"tokenizers",
|
| 236 |
+
"huggingface-hub",
|
| 237 |
+
"transformers",
|
| 238 |
+
"torchseg",
|
| 239 |
+
"demucs==4.0.0",
|
| 240 |
+
"asteroid>=0.6.0",
|
| 241 |
+
"pyloudnorm",
|
| 242 |
+
"prodigyopt",
|
| 243 |
+
"torch_log_wmse",
|
| 244 |
+
"rotary_embedding_torch",
|
| 245 |
+
"gradio<6.0",
|
| 246 |
+
"omegaconf",
|
| 247 |
+
"beartype",
|
| 248 |
+
"spafe",
|
| 249 |
+
"torch_audiomentations",
|
| 250 |
+
"auraloss",
|
| 251 |
+
"onnx>=1.17",
|
| 252 |
+
"onnx2torch>=0.3.0",
|
| 253 |
+
"onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
|
| 254 |
+
"ml_dtypes",
|
| 255 |
+
"resampy",
|
| 256 |
+
"yt_dlp",
|
| 257 |
+
"pyngrok",
|
| 258 |
+
"praat-parselmouth",
|
| 259 |
+
"faiss-cpu==1.11",
|
| 260 |
+
"local-attention",
|
| 261 |
+
"tenacity",
|
| 262 |
+
"pyworld",
|
| 263 |
+
"gdown"
|
| 264 |
+
]
|
| 265 |
+
|
| 266 |
+
torch_old_requirements: List[str] = [
|
| 267 |
+
"torch==1.13.1",
|
| 268 |
+
"torchvision==0.14.1",
|
| 269 |
+
"torchaudio==0.13.1",
|
| 270 |
+
"torchcrepe==0.0.24",
|
| 271 |
+
]
|
| 272 |
+
|
| 273 |
+
old_requirements: List[str] = [
|
| 274 |
+
"numpy==1.26.4",
|
| 275 |
+
"pandas==2.3.3",
|
| 276 |
+
"scipy==1.15.3",
|
| 277 |
+
"librosa==0.11.0",
|
| 278 |
+
"samplerate==0.1.0",
|
| 279 |
+
"matplotlib==3.10.8",
|
| 280 |
+
"tqdm==4.67.1",
|
| 281 |
+
"einops==0.8.1",
|
| 282 |
+
"protobuf==6.33.4",
|
| 283 |
+
"soundfile==0.13.1",
|
| 284 |
+
"pydub==0.25.1",
|
| 285 |
+
"webrtcvad==2.0.10",
|
| 286 |
+
"audiomentations==0.43.1",
|
| 287 |
+
"pedalboard==0.8.2",
|
| 288 |
+
"ml_collections==1.1.0",
|
| 289 |
+
"timm==1.0.24",
|
| 290 |
+
"wandb==0.24.0",
|
| 291 |
+
"accelerate==1.2.1",
|
| 292 |
+
"bitsandbytes==0.45.0",
|
| 293 |
+
"tokenizers==0.15.2",
|
| 294 |
+
"huggingface-hub==0.34.2",
|
| 295 |
+
"transformers==4.39.3",
|
| 296 |
+
"torchseg==0.0.1a4",
|
| 297 |
+
"demucs==4.0.0",
|
| 298 |
+
"asteroid==0.6.0",
|
| 299 |
+
"pyloudnorm",
|
| 300 |
+
"prodigyopt==1.1.2",
|
| 301 |
+
"rotary_embedding_torch==0.3.6",
|
| 302 |
+
"gradio<6.0.0",
|
| 303 |
+
"omegaconf==2.3.0",
|
| 304 |
+
"beartype==0.22.9",
|
| 305 |
+
"spafe==0.3.3",
|
| 306 |
+
"torch_audiomentations==0.12.0",
|
| 307 |
+
"auraloss==0.4.0",
|
| 308 |
+
"onnx>=1.17",
|
| 309 |
+
"onnx2torch>=0.3.0",
|
| 310 |
+
"onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
|
| 311 |
+
"ml_dtypes==0.5.4",
|
| 312 |
+
"resampy==0.4.3",
|
| 313 |
+
"yt_dlp",
|
| 314 |
+
"pyngrok",
|
| 315 |
+
"praat-parselmouth==0.4.7",
|
| 316 |
+
"faiss-cpu==1.7.2",
|
| 317 |
+
"local-attention==1.10.0",
|
| 318 |
+
"tenacity==9.1.2",
|
| 319 |
+
"pyworld==0.3.5",
|
| 320 |
+
"gdown"
|
| 321 |
+
]
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
if __name__ == "__main__":
|
| 325 |
+
parser = argparse.ArgumentParser(description=_i18n("installer_description"))
|
| 326 |
+
parser.add_argument("--old", action="store_true", help=_i18n("old_deps_help"))
|
| 327 |
+
parser.add_argument("--force", action="store_true", help=_i18n("force_install_help"))
|
| 328 |
+
parser.add_argument("--index_url", type=str, default=None, help=_i18n("index_url_help"))
|
| 329 |
+
args = parser.parse_args()
|
| 330 |
+
|
| 331 |
+
if args.old:
|
| 332 |
+
torch_reqs = torch_old_requirements
|
| 333 |
+
reqs = old_requirements
|
| 334 |
+
print(_i18n("installing_old_deps"))
|
| 335 |
+
else:
|
| 336 |
+
torch_reqs = torch_requirements
|
| 337 |
+
reqs = universal_requirements
|
| 338 |
+
if fno_compitable(args.index_url):
|
| 339 |
+
reqs.append("neuraloperator==1.0.2")
|
| 340 |
+
print(_i18n("fno_compatible_detected"))
|
| 341 |
+
|
| 342 |
+
if args.force:
|
| 343 |
+
print(_i18n("force_install_warning"))
|
| 344 |
+
|
| 345 |
+
install_uv()
|
| 346 |
+
|
| 347 |
+
print(_i18n("installing_torch"))
|
| 348 |
+
install_requirements(torch_reqs, force=args.force, index_url=args.index_url)
|
| 349 |
+
|
| 350 |
+
print(_i18n("installing_other_deps"))
|
| 351 |
+
install_requirements(reqs, force=args.force)
|
| 352 |
+
|
| 353 |
+
print(_i18n("installing_setuptools"))
|
| 354 |
+
install_requirements(["setuptools<76.0"], force=True)
|
| 355 |
+
|
| 356 |
print(_i18n("installation_complete"))
|
mvsepless/namer.py
CHANGED
|
@@ -1,165 +1,165 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
from typing import Dict, Any, Optional, List
|
| 4 |
-
from i18n import _i18n
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
class Namer:
|
| 8 |
-
"""Класс для работы с именами файлов и шаблонами"""
|
| 9 |
-
|
| 10 |
-
def __init__(self, max_length: int = 255, offset: int = 10) -> None:
|
| 11 |
-
"""
|
| 12 |
-
Инициализация Namer
|
| 13 |
-
|
| 14 |
-
Args:
|
| 15 |
-
max_length: Максимальная длина имени
|
| 16 |
-
offset: Запас длины
|
| 17 |
-
"""
|
| 18 |
-
if max_length < 40:
|
| 19 |
-
self.max_length = 40
|
| 20 |
-
else:
|
| 21 |
-
self.max_length = max_length
|
| 22 |
-
|
| 23 |
-
if offset < max_length:
|
| 24 |
-
self.safe_max_length = max_length - offset
|
| 25 |
-
else:
|
| 26 |
-
self.safe_max_length = max_length
|
| 27 |
-
|
| 28 |
-
def sanitize(self, name: str) -> str:
|
| 29 |
-
"""
|
| 30 |
-
Очистить имя файла от недопустимых символов
|
| 31 |
-
|
| 32 |
-
Args:
|
| 33 |
-
name: Исходное имя
|
| 34 |
-
|
| 35 |
-
Returns:
|
| 36 |
-
Очищенное имя
|
| 37 |
-
"""
|
| 38 |
-
sanitized = re.sub(r'[<>:"/\\|?*]', "_", name)
|
| 39 |
-
sanitized = re.sub(r"_+", "_", sanitized)
|
| 40 |
-
sanitized = sanitized.strip("_. ")
|
| 41 |
-
return sanitized
|
| 42 |
-
|
| 43 |
-
def short(self, name: str, length: Optional[int] = None) -> str:
|
| 44 |
-
"""
|
| 45 |
-
Сократить длинное имя
|
| 46 |
-
|
| 47 |
-
Args:
|
| 48 |
-
name: Исходное имя
|
| 49 |
-
length: Желаемая длина
|
| 50 |
-
|
| 51 |
-
Returns:
|
| 52 |
-
Сокращенное имя
|
| 53 |
-
"""
|
| 54 |
-
if length:
|
| 55 |
-
if len(name) > length:
|
| 56 |
-
return f"{name[:int(length // 2)]}...{name[-int(length // 2.5):]}"
|
| 57 |
-
else:
|
| 58 |
-
return name
|
| 59 |
-
else:
|
| 60 |
-
if len(name) > self.safe_max_length:
|
| 61 |
-
return f"{name[:int(self.safe_max_length // 4)]}...{name[-int(self.safe_max_length // 4):]}"
|
| 62 |
-
else:
|
| 63 |
-
return name
|
| 64 |
-
|
| 65 |
-
def iter(self, filepath: str) -> str:
|
| 66 |
-
"""
|
| 67 |
-
Создать уникальное имя файла, добавляя (n) если файл существует
|
| 68 |
-
|
| 69 |
-
Args:
|
| 70 |
-
filepath: Исходный путь
|
| 71 |
-
|
| 72 |
-
Returns:
|
| 73 |
-
Уникальный путь
|
| 74 |
-
"""
|
| 75 |
-
if not os.path.exists(filepath):
|
| 76 |
-
return filepath
|
| 77 |
-
|
| 78 |
-
directory, filename = os.path.split(filepath)
|
| 79 |
-
name, ext = os.path.splitext(filename)
|
| 80 |
-
|
| 81 |
-
counter = 1
|
| 82 |
-
while True:
|
| 83 |
-
new_filename = f"{name} ({counter}){ext}"
|
| 84 |
-
new_filepath = os.path.join(directory, new_filename)
|
| 85 |
-
if not os.path.exists(new_filepath):
|
| 86 |
-
return new_filepath
|
| 87 |
-
counter += 1
|
| 88 |
-
|
| 89 |
-
def template(self, template: str, **kwargs: Any) -> str:
|
| 90 |
-
"""
|
| 91 |
-
Применить шаблон с подстановкой ключей
|
| 92 |
-
|
| 93 |
-
Args:
|
| 94 |
-
template: Шаблон
|
| 95 |
-
**kwargs: Ключи для подстановки
|
| 96 |
-
|
| 97 |
-
Returns:
|
| 98 |
-
Результат подстановки
|
| 99 |
-
"""
|
| 100 |
-
if kwargs:
|
| 101 |
-
for key in kwargs:
|
| 102 |
-
template = template.replace(str(key), str(kwargs[key]))
|
| 103 |
-
return template
|
| 104 |
-
|
| 105 |
-
def dedup_template(self, template: str, keys: List[str] = []) -> str:
|
| 106 |
-
"""
|
| 107 |
-
Удалить дублирующиеся ключи из шаблона
|
| 108 |
-
|
| 109 |
-
Args:
|
| 110 |
-
template: Шаблон
|
| 111 |
-
keys: Список ключей
|
| 112 |
-
|
| 113 |
-
Returns:
|
| 114 |
-
Шаблон без дубликатов
|
| 115 |
-
"""
|
| 116 |
-
seen = set()
|
| 117 |
-
pattern = r"({})".format("|".join(re.escape(key) for key in keys))
|
| 118 |
-
|
| 119 |
-
def replace(match: re.Match) -> str:
|
| 120 |
-
key = match.group(1)
|
| 121 |
-
if key in seen:
|
| 122 |
-
return ""
|
| 123 |
-
seen.add(key)
|
| 124 |
-
return key
|
| 125 |
-
|
| 126 |
-
result = re.sub(pattern, replace, template)
|
| 127 |
-
return result
|
| 128 |
-
|
| 129 |
-
def short_input_name_template(self, template: str, **kwargs: Any) -> str:
|
| 130 |
-
"""
|
| 131 |
-
Сократить имя входного файла с учетом шаблона
|
| 132 |
-
|
| 133 |
-
Args:
|
| 134 |
-
template: Шаблон
|
| 135 |
-
**kwargs: Ключи для подстановки
|
| 136 |
-
|
| 137 |
-
Returns:
|
| 138 |
-
Сокращенное имя
|
| 139 |
-
"""
|
| 140 |
-
if kwargs:
|
| 141 |
-
input_file_name = kwargs.get("NAME", None)
|
| 142 |
-
if input_file_name:
|
| 143 |
-
merged_keys_value = ""
|
| 144 |
-
no_keys_template = template
|
| 145 |
-
for key in kwargs:
|
| 146 |
-
if key != "NAME":
|
| 147 |
-
merged_keys_value += str(kwargs[key])
|
| 148 |
-
for key in kwargs:
|
| 149 |
-
no_keys_template = no_keys_template.replace(str(key), "")
|
| 150 |
-
len_merged_keys = len(merged_keys_value)
|
| 151 |
-
len_no_keys = len(no_keys_template)
|
| 152 |
-
free_length = self.safe_max_length - (len_merged_keys + len_no_keys)
|
| 153 |
-
len_file_name = len(input_file_name)
|
| 154 |
-
start_index = free_length // 2
|
| 155 |
-
end_index = free_length // 2.5
|
| 156 |
-
if len_file_name > free_length:
|
| 157 |
-
return f"{input_file_name[:int(start_index)]}...{input_file_name[-int(end_index):]}"
|
| 158 |
-
else:
|
| 159 |
-
return input_file_name
|
| 160 |
-
else:
|
| 161 |
-
print(_i18n("name_key_missing"))
|
| 162 |
-
return ""
|
| 163 |
-
else:
|
| 164 |
-
print(_i18n("keys_required"))
|
| 165 |
return ""
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
from typing import Dict, Any, Optional, List
|
| 4 |
+
from i18n import _i18n
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class Namer:
|
| 8 |
+
"""Класс для работы с именами файлов и шаблонами"""
|
| 9 |
+
|
| 10 |
+
def __init__(self, max_length: int = 255, offset: int = 10) -> None:
|
| 11 |
+
"""
|
| 12 |
+
Инициализация Namer
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
max_length: Максимальная длина имени
|
| 16 |
+
offset: Запас длины
|
| 17 |
+
"""
|
| 18 |
+
if max_length < 40:
|
| 19 |
+
self.max_length = 40
|
| 20 |
+
else:
|
| 21 |
+
self.max_length = max_length
|
| 22 |
+
|
| 23 |
+
if offset < max_length:
|
| 24 |
+
self.safe_max_length = max_length - offset
|
| 25 |
+
else:
|
| 26 |
+
self.safe_max_length = max_length
|
| 27 |
+
|
| 28 |
+
def sanitize(self, name: str) -> str:
|
| 29 |
+
"""
|
| 30 |
+
Очистить имя файла от недопустимых символов
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
name: Исходное имя
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Очищенное имя
|
| 37 |
+
"""
|
| 38 |
+
sanitized = re.sub(r'[<>:"/\\|?*]', "_", name)
|
| 39 |
+
sanitized = re.sub(r"_+", "_", sanitized)
|
| 40 |
+
sanitized = sanitized.strip("_. ")
|
| 41 |
+
return sanitized
|
| 42 |
+
|
| 43 |
+
def short(self, name: str, length: Optional[int] = None) -> str:
|
| 44 |
+
"""
|
| 45 |
+
Сократить длинное имя
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
name: Исходное имя
|
| 49 |
+
length: Желаемая длина
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Сокращенное имя
|
| 53 |
+
"""
|
| 54 |
+
if length:
|
| 55 |
+
if len(name) > length:
|
| 56 |
+
return f"{name[:int(length // 2)]}...{name[-int(length // 2.5):]}"
|
| 57 |
+
else:
|
| 58 |
+
return name
|
| 59 |
+
else:
|
| 60 |
+
if len(name) > self.safe_max_length:
|
| 61 |
+
return f"{name[:int(self.safe_max_length // 4)]}...{name[-int(self.safe_max_length // 4):]}"
|
| 62 |
+
else:
|
| 63 |
+
return name
|
| 64 |
+
|
| 65 |
+
def iter(self, filepath: str) -> str:
|
| 66 |
+
"""
|
| 67 |
+
Создать уникальное имя файла, добавляя (n) если файл существует
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
filepath: Исходный путь
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
Уникальный путь
|
| 74 |
+
"""
|
| 75 |
+
if not os.path.exists(filepath):
|
| 76 |
+
return filepath
|
| 77 |
+
|
| 78 |
+
directory, filename = os.path.split(filepath)
|
| 79 |
+
name, ext = os.path.splitext(filename)
|
| 80 |
+
|
| 81 |
+
counter = 1
|
| 82 |
+
while True:
|
| 83 |
+
new_filename = f"{name} ({counter}){ext}"
|
| 84 |
+
new_filepath = os.path.join(directory, new_filename)
|
| 85 |
+
if not os.path.exists(new_filepath):
|
| 86 |
+
return new_filepath
|
| 87 |
+
counter += 1
|
| 88 |
+
|
| 89 |
+
def template(self, template: str, **kwargs: Any) -> str:
|
| 90 |
+
"""
|
| 91 |
+
Применить шаблон с подстановкой ключей
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
template: Шаблон
|
| 95 |
+
**kwargs: Ключи для подстановки
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
Результат подстановки
|
| 99 |
+
"""
|
| 100 |
+
if kwargs:
|
| 101 |
+
for key in kwargs:
|
| 102 |
+
template = template.replace(str(key), str(kwargs[key]))
|
| 103 |
+
return template
|
| 104 |
+
|
| 105 |
+
def dedup_template(self, template: str, keys: List[str] = []) -> str:
|
| 106 |
+
"""
|
| 107 |
+
Удалить дублирующиеся ключи из шаблона
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
template: Шаблон
|
| 111 |
+
keys: Список ключей
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
Шаблон без дубликатов
|
| 115 |
+
"""
|
| 116 |
+
seen = set()
|
| 117 |
+
pattern = r"({})".format("|".join(re.escape(key) for key in keys))
|
| 118 |
+
|
| 119 |
+
def replace(match: re.Match) -> str:
|
| 120 |
+
key = match.group(1)
|
| 121 |
+
if key in seen:
|
| 122 |
+
return ""
|
| 123 |
+
seen.add(key)
|
| 124 |
+
return key
|
| 125 |
+
|
| 126 |
+
result = re.sub(pattern, replace, template)
|
| 127 |
+
return result
|
| 128 |
+
|
| 129 |
+
def short_input_name_template(self, template: str, **kwargs: Any) -> str:
|
| 130 |
+
"""
|
| 131 |
+
Сократить имя входного файла с учетом шаблона
|
| 132 |
+
|
| 133 |
+
Args:
|
| 134 |
+
template: Шаблон
|
| 135 |
+
**kwargs: Ключи для подстановки
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
Сокращенное имя
|
| 139 |
+
"""
|
| 140 |
+
if kwargs:
|
| 141 |
+
input_file_name = kwargs.get("NAME", None)
|
| 142 |
+
if input_file_name:
|
| 143 |
+
merged_keys_value = ""
|
| 144 |
+
no_keys_template = template
|
| 145 |
+
for key in kwargs:
|
| 146 |
+
if key != "NAME":
|
| 147 |
+
merged_keys_value += str(kwargs[key])
|
| 148 |
+
for key in kwargs:
|
| 149 |
+
no_keys_template = no_keys_template.replace(str(key), "")
|
| 150 |
+
len_merged_keys = len(merged_keys_value)
|
| 151 |
+
len_no_keys = len(no_keys_template)
|
| 152 |
+
free_length = self.safe_max_length - (len_merged_keys + len_no_keys)
|
| 153 |
+
len_file_name = len(input_file_name)
|
| 154 |
+
start_index = free_length // 2
|
| 155 |
+
end_index = free_length // 2.5
|
| 156 |
+
if len_file_name > free_length:
|
| 157 |
+
return f"{input_file_name[:int(start_index)]}...{input_file_name[-int(end_index):]}"
|
| 158 |
+
else:
|
| 159 |
+
return input_file_name
|
| 160 |
+
else:
|
| 161 |
+
print(_i18n("name_key_missing"))
|
| 162 |
+
return ""
|
| 163 |
+
else:
|
| 164 |
+
print(_i18n("keys_required"))
|
| 165 |
return ""
|
mvsepless/separator.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
mvsepless/vbachgen.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|