mvsepless_cpu

Sleeping

App Files Files Community

noblebarkrr commited on Mar 21

Commit

32fefdb

verified ·

1 Parent(s): 483f7fe

Upload 14 files

Browse files

Files changed (10) hide show

mvsepless/additional_app.py +0 -0
mvsepless/app.py +58 -47
mvsepless/audio.py +1502 -1502
mvsepless/custom_models.json +28 -0
mvsepless/i18n.py +0 -0
mvsepless/infer_utils.py +824 -824
mvsepless/install.py +355 -355
mvsepless/namer.py +164 -164
mvsepless/separator.py +0 -0
mvsepless/vbachgen.py +0 -0

mvsepless/additional_app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

mvsepless/app.py CHANGED Viewed

@@ -344,7 +344,8 @@ class SeparatorGradio(GradioHelper, DownloadModelManager):
         theme: gr.Theme,
         add_app: bool = True,
         plugins: bool = True,
-        add_vbach: bool = False
     ) -> gr.Blocks:
         """
         Создать пользовательский интерфейс
@@ -866,56 +867,66 @@ class SeparatorGradio(GradioHelper, DownloadModelManager):
                                 return gr.update(value="")
             # Вкладка менеджера моделей
-            with gr.Tab(_i18n("tab_model_manager")):
-                with gr.Tab(_i18n("tab_download_model")):
-                    with gr.Group():
-                        select_dwm_preset = gr.Dropdown(
-                            label=_i18n("select_preset"),
-                            interactive=True,
-                            choices=list(self.dwm_presets.keys()),
-                            value=None,
-                        )
-                        select_dwm_names = gr.Dropdown(
-                            label=_i18n("select_models"),
-                            interactive=True,
-                            choices=default_model,
-                            value=[],
-                            multiselect=True
-                        )
-                        dwm_status = gr.Textbox(
-                            container=False,
-                            lines=3,
-                            interactive=False,
-                            max_lines=3,
-                            visible=False
-                        )
-                        download_dwm_button = gr.Button(_i18n("download_btn"))
-                        select_dwm_preset.change(
-                            lambda x: gr.update(value=self.parse_models_from_dwm_preset(x)),
-                            inputs=select_dwm_preset,
-                            outputs=select_dwm_names,
-                            trigger_mode="once"
-                        )
-                        download_dwm_button.click(
-                            lambda: gr.update(visible=True),
-                            outputs=dwm_status
-                        ).then(
-                            lambda x: (self.batch_download(x), gr.update(visible=False)),
-                            inputs=select_dwm_names,
-                            outputs=[gr.State(None), dwm_status]
-                        )
-                with gr.Tab(_i18n("tab_delete_models")):
-                    gr.Markdown(f"<h3><center>{_i18n('delete_all_warning')}</center></h3>")
-                    delete_models_cache_btn = gr.Button(_i18n("delete_all_btn"))
-                    delete_models_cache_btn.click(self.delete_models_cache, inputs=None, outputs=None)
             # Импорт дополнительных модулей
-            from additional_app import AutoEnsembless, ManualEnsembless, PluginManager, Inverter_UI, AudioApp
             if add_app:
                 with gr.Tab(_i18n("tab_audio_processing")):
                     _audio_app = AudioApp(user_directory)
                     _audio_app.UI()

         theme: gr.Theme,
         add_app: bool = True,
         plugins: bool = True,
+        add_vbach: bool = False,
+        model_manager_add: bool = True
     ) -> gr.Blocks:
         """
         Создать пользовательский интерфейс
                                 return gr.update(value="")
             # Вкладка менеджера моделей
+            if model_manager_add:
+                with gr.Tab(_i18n("tab_model_manager")):
+                    with gr.Tab(_i18n("tab_download_model")):
+                        with gr.Group():
+                            select_dwm_preset = gr.Dropdown(
+                                label=_i18n("select_preset"),
+                                interactive=True,
+                                choices=list(self.dwm_presets.keys()),
+                                value=None,
+                            )
+                            select_dwm_names = gr.Dropdown(
+                                label=_i18n("select_models"),
+                                interactive=True,
+                                choices=default_model,
+                                value=[],
+                                multiselect=True
+                            )
+                            dwm_status = gr.Textbox(
+                                container=False,
+                                lines=3,
+                                interactive=False,
+                                max_lines=3,
+                                visible=False
+                            )
+                            download_dwm_button = gr.Button(_i18n("download_btn"))
+                            select_dwm_preset.change(
+                                lambda x: gr.update(value=self.parse_models_from_dwm_preset(x)),
+                                inputs=select_dwm_preset,
+                                outputs=select_dwm_names,
+                                trigger_mode="once"
+                            )
+                            download_dwm_button.click(
+                                lambda: gr.update(visible=True),
+                                outputs=dwm_status
+                            ).then(
+                                lambda x: (self.batch_download(x), gr.update(visible=False)),
+                                inputs=select_dwm_names,
+                                outputs=[gr.State(None), dwm_status]
+                            )
+                    with gr.Tab(_i18n("tab_delete_models")):
+                        gr.Markdown(f"<h3><center>{_i18n('delete_all_warning')}</center></h3>")
+                        delete_models_cache_btn = gr.Button(_i18n("delete_all_btn"), variant="stop")
+                        delete_models_cache_btn.click(self.delete_models_cache, inputs=None, outputs=None)
             # Импорт дополнительных модулей
+            from additional_app import AutoEnsembless, ManualEnsembless, PluginManager, Inverter_UI, AudioApp, CustomSeparator
             if add_app:
+                with gr.Tab(_i18n("tab_custom_separation")):
+                    _custom_sep = CustomSeparator(
+                        self.input_files,
+                        self.upload_files,
+                        user_directory,
+                        device=self.device,
+                        history=self.history
+                    )
+                    _custom_sep.UI()
                 with gr.Tab(_i18n("tab_audio_processing")):
                     _audio_app = AudioApp(user_directory)
                     _audio_app.UI()

mvsepless/audio.py CHANGED Viewed

@@ -1,1503 +1,1503 @@
-import os
-import subprocess
-import numpy as np
-from gradio_helper import str2bool
-from scipy.signal import ShortTimeFFT, resample
-from scipy.signal.windows import dpss, hann
-from numpy.typing import DTypeLike
-from typing import List, Tuple, Optional, Union, Dict, Any, Callable
-from i18n import _i18n
-ffmpeg_path = "ffmpeg"
-ffprobe_path = "ffprobe"
-n_fft = 4096
-hop = 1024
-def average(*ints: Union[int, float]) -> float:
-    """
-    Вычислить среднее арифметическое
-    Args:
-        *ints: Числа для усреднения
-    Returns:
-        Среднее значение
-    """
-    numbers = len(ints)
-    return sum(ints) / numbers
-def check_installed() -> None:
-    """Проверить наличие ffmpeg и ffprobe"""
-    try:
-        ffmpeg_version_output = subprocess.check_output(
-            [ffmpeg_path, "-version"], text=True
-        )
-        print(_i18n("ffmpeg_found"))
-    except:
-        print(_i18n("ffmpeg_not_found"))
-    try:
-        ffprobe_version_output = subprocess.check_output(
-            [ffprobe_path, "-version"], text=True
-        )
-        print(_i18n("ffprobe_found"))
-    except:
-        print(_i18n("ffprobe_not_found"))
-def get_ogg_bitrate(sample_rate: int, channels: int = 2) -> int:
-    """
-    Определяет рекомендуемый битрейт для OGG на основе частоты дискретизации
-    Args:
-        sample_rate: Частота дискретизации
-        channels: Количество каналов
-    Returns:
-        Рекомендуемый битрейт
-    """
-    if sample_rate >= 40000:
-        per_channel = 240
-    elif sample_rate >= 26000:
-        per_channel = 190
-    elif sample_rate >= 15000:
-        per_channel = 90
-    elif sample_rate >= 9000:
-        per_channel = 50
-    elif sample_rate >= 8000:
-        per_channel = 42
-    else:
-        per_channel = 30
-    return int(per_channel * channels)
-SAMPLE_FORMATS_DICT: Dict[Union[str, type], str] = {
-    "int16": "s16le",
-    "int32": "s32le",
-    "float32": "f32le",
-    "float64": "f64le",
-    np.int16: "s16le",
-    np.int32: "s32le",
-    np.float32: "f32le",
-    np.float64: "f64le",
-}
-audio_formats: List[str] = [
-    'aac', 'ac3', 'ac4', 'adts', 'aiff', 'au', 'caf', 'dts', 'eac3',
-    'flac', 'm4a', 'mp3', 'mp2', 'ogg', 'oga', 'opus', 'ra', 'raw',
-    'snd', 'voc', 'wav', 'wma', 'wv'
-]
-video_formats_with_audio: List[str] = [
-    '3gp', '3g2', 'asf', 'avi', 'flv', 'f4v', 'm4v', 'mkv', 'mov',
-    'mp4', 'mpeg', 'mpg', 'mts', 'mxf', 'ogv', 'rm', 'rmvb', 'ts',
-    'vob', 'webm', 'wmv'
-]
-input_formats: List[str] = video_formats_with_audio + audio_formats
-output_formats: List[str] = [
-    "mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff", "wma"
-]
-input_extensions: List[str] = [f".{of}" for of in input_formats]
-output_extensions: List[str] = [f".{of}" for of in output_formats]
-codec_args: Dict[str, Dict[bool, List[str]]] = {
-    ".mp3": {
-        True: ["-c:a", "libmp3lame", "-sample_fmt", "fltp"],
-        False: ["-c:a", "libmp3lame", "-sample_fmt", "s16p"]
-    },
-    ".wav": {
-        True: ["-c:a", "pcm_f32le", "-sample_fmt", "flt"],
-        False: ["-c:a", "pcm_s16le", "-sample_fmt", "s16"]
-    },
-    ".flac": {
-        True: ["-c:a", "flac", "-sample_fmt", "s32"],
-        False: ["-c:a", "flac", "-sample_fmt", "s16"]
-    },
-    ".ogg": {
-        True: ["-c:a", "libvorbis", "-sample_fmt", "fltp"],
-        False: ["-c:a", "libvorbis", "-sample_fmt", "fltp"]
-    },
-    ".opus": {
-        True: ["-c:a", "libopus", "-sample_fmt", "flt"],
-        False: ["-c:a", "libopus", "-sample_fmt", "s16"]
-    },
-    ".m4a": {
-        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
-        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
-    },
-    ".aac": {
-        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
-        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
-    },
-    ".ac3": {
-        True: ["-c:a", "ac3", "-sample_fmt", "fltp"],
-        False: ["-c:a", "ac3", "-sample_fmt", "fltp"]
-    },
-    ".aiff": {
-        True: ["-c:a", "pcm_f32be", "-sample_fmt", "flt"],
-        False: ["-c:a", "pcm_s16be", "-sample_fmt", "s16"]
-    },
-    ".wma": {
-        True: ["-c:a", "wmav2", "-sample_fmt", "fltp"],
-        False: ["-c:a", "wmav2", "-sample_fmt", "fltp"]
-    }
-}
-def get_codec_args(extension: str, prefer_float: bool) -> List[str]:
-    """
-    Получить аргументы кодека для FFmpeg
-    Args:
-        extension: Расширение файла
-        prefer_float: Предпочитать float формат
-    Returns:
-        Список аргументов FFmpeg
-    """
-    if extension not in codec_args:
-        return []
-    return codec_args[extension][prefer_float]
-allowed_chars: str = r"1234567890"
-def sanitize_output(output: str) -> str:
-    """
-    Очистит�� вывод от посторонних символов
-    Args:
-        output: Выходная строка
-    Returns:
-        Очищенная строка
-    """
-    return "".join([char for char in output if char in allowed_chars])
-def get_sr(path: str, stream: int = 0) -> int:
-    """
-    Получить частоту дискретизации аудиофайла
-    Args:
-        path: Путь к файлу
-        stream: Номер аудиопотока
-    Returns:
-        Частота дискретизации
-    """
-    cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
-           "-show_entries", "stream=sample_rate", "-select_streams", f"a:{stream}",
-           "-of", "compact=p=0:nk=1"]
-    process = subprocess.Popen(
-        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-    )
-    stdout, stderr = process.communicate()
-    sample_rate = stdout.decode('utf-8').strip()
-    sample_rate = sanitize_output(sample_rate)
-    if sample_rate.isdigit():
-        return int(sample_rate)
-    else:
-        print(_i18n("sr_read_error", path=path))
-        return 0
-def get_channels(path: str, stream: int = 0) -> int:
-    """
-    Получить количество каналов аудиофайла
-    Args:
-        path: Путь к файлу
-        stream: Номер аудиопотока
-    Returns:
-        Количество каналов
-    """
-    cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
-           "-show_entries", "stream=channels", "-select_streams", f"a:{stream}",
-           "-of", "compact=p=0:nk=1"]
-    process = subprocess.Popen(
-        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-    )
-    stdout, stderr = process.communicate()
-    channels = stdout.decode('utf-8').strip()
-    channels = sanitize_output(channels)
-    if channels.isdigit():
-        return int(channels)
-    else:
-        print(_i18n("channels_read_error", path=path))
-        return 0
-def check(path: str) -> bool:
-    """
-    Проверить, является ли файл валидным аудио
-    Args:
-        path: Путь к файлу
-    Returns:
-        True если файл содержит аудио
-    """
-    channels = get_channels(path)
-    sr = get_sr(path)
-    return channels != 0 and sr != 0
-def read(
-    path: str,
-    sr: Optional[int] = None,
-    mono: bool = False,
-    dtype: DTypeLike = "float32",
-    multi_channel: bool = False,
-    num_channels: int = 2,
-    stream: int = 0,
-    flatten: bool = False
-) -> Tuple[np.ndarray, int]:
-    """
-    Прочитать аудиофайл
-    Args:
-        path: Путь к файлу
-        sr: Частота дискретизации
-        mono: Читать как моно
-        dtype: Тип данных
-        multi_channel: Многоканальный режим
-        num_channels: Количество каналов
-        stream: Номер аудиопотока
-        flatten: Вернуть плоский массив
-    Returns:
-        Кортеж (аудиоданные, частота дискретизации)
-    """
-    output_format = SAMPLE_FORMATS_DICT.get(dtype, None)
-    if not sr:
-        sr = get_sr(path, stream)
-    channels = 1 if mono else (get_channels(path, stream) if multi_channel else num_channels)
-    if not output_format:
-        output_format = "f32le"
-        cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
-               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
-        process = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
-        )
-        stdout, stderr = process.communicate()
-        y = np.frombuffer(stdout, dtype=np.float32)
-        y = convert_to_dtype(y, dtype)
-    else:
-        cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
-               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
-        process = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
-        )
-        stdout, stderr = process.communicate()
-        y = np.frombuffer(stdout, dtype=dtype)
-    if mono:
-        if flatten:
-            y = y.flatten()
-        else:
-            y = y.reshape((-1, 1)).T
-    else:
-        y = y.reshape((-1, channels)).T
-    return y.copy(), sr
-def multiread(
-    paths: Union[List[str], Tuple[str, ...]],
-    *args,
-    **kwargs
-) -> Tuple[List[np.ndarray], List[int]]:
-    """
-    Прочитать несколько аудиофайлов
-    Args:
-        paths: Список путей к файлам
-        *args: Аргументы для read
-        **kwargs: Именованные аргументы для read
-    Returns:
-        Кортеж (список аудиоданных, список частот дискрет��зации)
-    """
-    readed_files = []
-    srs = []
-    len_arrays = len(paths)
-    for i, path in enumerate(paths, start=1):
-        array, sr = read(path, *args, **kwargs)
-        readed_files.append(array)
-        srs.append(sr)
-        print(_i18n("reading_progress", current=i, total=len_arrays), end="\r")
-    print("")
-    return readed_files, srs
-def bitrate_to_int(a: Union[str, int, float]) -> int:
-    """
-    Преобразовать битрейт в целое число
-    Args:
-        a: Битрейт в виде строки или числа
-    Returns:
-        Битрейт как целое число
-    """
-    if isinstance(a, str):
-        if a.endswith(("k", "K")):
-            numeric_part = a[:-1]
-            if numeric_part.isdigit():
-                return int(numeric_part)
-            else:
-                print(_i18n("invalid_bitrate", bitrate=a))
-                return 320
-        else:
-            if a.isdigit():
-                return int(a)
-            else:
-                print(_i18n("invalid_bitrate", bitrate=a))
-                return 320
-    elif isinstance(a, (int, float)):
-        return int(a)
-    else:
-        return 320
-def get_info_array(y: np.ndarray) -> Tuple[int, int, Optional[int], bool]:
-    """
-    Получить информацию об аудио массиве
-    Args:
-        y: Аудио массив
-    Returns:
-        Кортеж (количество каналов, количество сэмплов, индекс оси, флаг flatten)
-    """
-    if y.ndim == 1:
-        flatten = True
-        channels = 1
-        samples = len(y)
-        array_index = -1
-    elif y.ndim == 2:
-        flatten = False
-        if y.shape[0] < y.shape[1]:
-            channels = y.shape[0]
-            samples = y.shape[1]
-            array_index = 1
-        else:
-            channels = y.shape[1]
-            samples = y.shape[0]
-            array_index = 0
-    else:
-        raise ValueError(_i18n("array_dim_error"))
-    return channels, samples, array_index, flatten
-def get_axis_from_array_index(index: int) -> int:
-    """
-    Получить ось для операций на основе индекса массива
-    Args:
-        index: Индекс массива
-    Returns:
-        Номер оси
-    """
-    if index == -1:
-        return -1
-    elif index == 1:
-        return 0
-    elif index == 0:
-        return 1
-    else:
-        return -1
-def get_duration_from_array(y: np.ndarray, sr: Optional[int] = None) -> Union[float, int]:
-    """
-    Получить длительность аудио из массива
-    Args:
-        y: Аудио массив
-        sr: Частота дискретизации
-    Returns:
-        Длительность в секундах или количество сэмплов
-    """
-    len_samples: int = get_info_array(y)[1]
-    if sr is not None:
-        return len_samples / sr
-    else:
-        return len_samples
-def is_float(y: np.ndarray) -> bool:
-    """
-    Проверить, является ли массив float типом
-    Args:
-        y: Аудио массив
-    Returns:
-        True если тип float
-    """
-    return np.issubdtype(y.dtype, np.floating)
-def is_float_dtype(dtype: DTypeLike) -> bool:
-    """
-    Проверить, является ли тип данных float
-    Args:
-        dtype: Тип данных
-    Returns:
-        True если тип float
-    """
-    return np.issubdtype(dtype, np.floating)
-def float_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
-    """
-    Преобразовать float массив в целочисленный
-    Args:
-        y: Float массив
-        dtype: Целевой тип данных
-    Returns:
-        Целочисленный массив
-    """
-    info = np.iinfo(dtype)
-    min_val = info.min
-    max_val = info.max
-    if min_val < 0:
-        y_scaled = y * max_val
-        y_rounded = np.round(y_scaled)
-        y_clipped = np.clip(y_rounded, min_val, max_val)
-        return y_clipped.astype(dtype)
-    elif min_val == 0:
-        y_normalized = (y + 1) / 2
-        y_scaled = y_normalized * max_val
-        y_rounded = np.round(y_scaled)
-        y_clipped = np.clip(y_rounded, 0, max_val)
-        return y_clipped.astype(dtype)
-    else:
-        raise ValueError(_i18n("unexpected_min_val", value=min_val))
-def int_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
-    """
-    Преобразовать целочисленный массив в другой целочисленный тип
-    Args:
-        y: Целочисленный массив
-        dtype: Целевой тип данных
-    Returns:
-        Преобразованный массив
-    """
-    info_dst = np.iinfo(dtype)
-    info_src = np.iinfo(y.dtype)
-    y_float = y.astype(np.float64)
-    src_range = info_src.max - info_src.min
-    dst_range = info_dst.max - info_dst.min
-    if src_range == 0:
-        return np.full_like(y, info_dst.min, dtype=dtype)
-    y_scaled = (y_float - info_src.min) * (dst_range / src_range) + info_dst.min
-    y_rounded = np.round(y_scaled)
-    y_clipped = np.clip(y_rounded, info_dst.min, info_dst.max)
-    return y_clipped.astype(dtype)
-def int_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
-    """
-    Преобразовать целочисленный массив в float
-    Args:
-        y: Целочисленный массив
-        dtype: Целевой тип данных
-    Returns:
-        Float массив
-    """
-    info = np.iinfo(y.dtype)
-    if info.min == 0:
-        y_normalized = (y.astype(np.float64) + -int(average(info.min, info.max))) / info.max
-    elif info.min < 0:
-        abs_max = max(abs(info.min), abs(info.max))
-        y_normalized = y.astype(np.float64) / abs_max
-    else:
-        raise ValueError(_i18n("unexpected_min_val", value=info.min))
-    return y_normalized.astype(dtype)
-def float_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
-    """
-    Преобразовать float массив в другой float тип
-    Args:
-        y: Float массив
-        dtype: Целевой тип данных
-    Returns:
-        Преобразованный массив
-    """
-    return y.astype(dtype)
-def get_center_value_from_dtype(dtype: DTypeLike) -> int:
-    """
-    Получить центральное значение для типа данных
-    Args:
-        dtype: Тип данных
-    Returns:
-        Центральное значение
-    """
-    if is_float_dtype(dtype):
-        return 0
-    else:
-        info = np.iinfo(dtype)
-        return int(average(info.min, info.max))
-def convert_to_dtype(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
-    """
-    Преобразовать массив в указанный тип данных
-    Args:
-        y: Входной массив
-        dtype: Целевой тип данных
-    Returns:
-        Преобразованный массив
-    """
-    if is_float(y):
-        if is_float_dtype(dtype):
-            return float_to_float(y, dtype)
-        else:
-            return float_to_int(y, dtype)
-    else:
-        if is_float_dtype(dtype):
-            return int_to_float(y, dtype)
-        else:
-            return int_to_int(y, dtype)
-def dc_offset(y: np.ndarray, offset: Union[float, int]) -> np.ndarray:
-    """
-    Добавить смещение постоянного тока
-    Args:
-        y: Аудио массив
-        offset: Смещение
-    Returns:
-        Массив со смещением
-    """
-    orig_dtype = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    y = y + offset
-    return convert_to_dtype(y, orig_dtype)
-def gain(y: np.ndarray, gain_value: Union[float, int]) -> np.ndarray:
-    """
-    Применить усиление к аудио
-    Args:
-        y: Аудио массив
-        gain_value: Коэффициент усиления
-    Returns:
-        Усиленный массив
-    """
-    orig_dtype = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    y = y * gain_value
-    return convert_to_dtype(y, orig_dtype)
-def normalize(y: np.ndarray, target_peak: Union[float, int] = 1.0) -> np.ndarray:
-    """
-    Нормализовать аудио по пиковому значению
-    Args:
-        y: Аудио массив
-        target_peak: Целевое пиковое значение
-    Returns:
-        Нормализованный массив
-    """
-    orig_dtype = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    current_peak = np.max(np.abs(y))
-    if current_peak > 0:
-        scaling_factor = target_peak / current_peak
-        y = y * scaling_factor
-    return convert_to_dtype(y, orig_dtype)
-def create_zero_array(samples: int, dtype: DTypeLike) -> np.ndarray:
-    """
-    Создать массив нулей с центром для типа данных
-    Args:
-        samples: Количество сэмплов
-        dtype: Тип данных
-    Returns:
-        Массив нулей
-    """
-    return np.array([get_center_value_from_dtype(dtype) for _c in range(samples)], dtype=dtype)
-def split_channels(y: np.ndarray) -> Tuple[np.ndarray, ...]:
-    """
-    Разделить многоканальное аудио на отдельные каналы
-    Args:
-        y: Аудио массив
-    Returns:
-        Кортеж массивов каналов
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    channels_arrays = []
-    if not flatten:
-        if array_index == 1:
-            for ch in range(channels):
-                channels_arrays.append(y[ch, :])
-        else:
-            for ch in range(channels):
-                channels_arrays.append(y[:, ch])
-        return tuple(channels_arrays)
-    else:
-        return (y,)
-from scipy.signal import windows
-def get_stft_obj(sr: int, n_fft: int, hop: int) -> ShortTimeFFT:
-    """
-    Создает STFT с окном DPSS для сверхточного разделения частот
-    Args:
-        sr: Частота дискретизации
-        n_fft: Размер FFT
-        hop: Шаг
-    Returns:
-        Объект ShortTimeFFT
-    """
-    win_dpss = str2bool(os.environ.get("MVSEPLESS_DPSS", "False"))
-    if win_dpss:
-        win = dpss(n_fft, NW=3, sym=False)
-    else:
-        win = hann(n_fft, sym=False)
-    return ShortTimeFFT(win, hop=hop, fs=sr, scale_to='magnitude', phase_shift=None)
-def split_mid_side(
-    y: np.ndarray,
-    var: int = 1,
-    sr: Optional[int] = None
-) -> Tuple[np.ndarray, np.ndarray]:
-    """
-    Разделить стерео на Mid/Side
-    Args:
-        y: Аудио массив
-        var: Вариант разделения (0-4)
-        sr: Частота дискретизации
-    Returns:
-        Кортеж (mid, side)
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    axis = get_axis_from_array_index(array_index)
-    if channels != 2:
-        raise Exception(_i18n("stereo_required"))
-    orig_dtype = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    channels_arrays = split_channels(y)
-    left_channel = channels_arrays[0]
-    right_channel = channels_arrays[1]
-    mid_channel_one = (left_channel * 0.5) + (right_channel * 0.5)
-    if var == 0:
-        print(_i18n("mid_side_var0"))
-        side_channel = np.stack([(left_channel + -mid_channel_one), (right_channel + -mid_channel_one)], axis=axis)
-        mid_channel = y + -side_channel
-    elif var == 1:
-        print(_i18n("mid_side_var1"))
-        mid_channel = np.stack([mid_channel_one, mid_channel_one], axis=axis)
-        side_channel = y + -mid_channel
-    elif var == 2:
-        print(_i18n("mid_side_var2"))
-        same_sign = (left_channel * right_channel) > 0
-        center_mono = np.where(
-            same_sign,
-            np.minimum(np.abs(left_channel), np.abs(right_channel)) * np.sign(left_channel),
-            0.0
-        )
-        mid_channel = np.stack([center_mono, center_mono], axis=axis)
-        stereo_L = left_channel - center_mono
-        stereo_R = right_channel - center_mono
-        side_channel = np.stack([stereo_L, stereo_R], axis=axis)
-    elif var == 3:
-        print(_i18n("mid_side_var3"))
-        if not sr:
-            raise Exception(_i18n("sr_required"))
-        sft = get_stft_obj(sr, n_fft=n_fft, hop=hop)
-        y_float = convert_to_dtype(y, np.float32)
-        channels = split_channels(y_float)
-        # Получаем спектры левого и правого каналов
-        Lf = sft.stft(channels[0])
-        Rf = sft.stft(channels[1])
-        # Вычисляем схожесть (когерентность)
-        similarity_L = np.real(Lf * np.conj(Rf))
-        similarity_R = np.real(Rf * np.conj(Lf))
-        mask_l = similarity_L > 0
-        mask_r = similarity_R > 0
-        magL = np.abs(Lf)
-        magR = np.abs(Rf)
-        magC_L = np.minimum(magL, magR) * mask_l
-        magC_R = np.minimum(magL, magR) * mask_r
-        C_L = magC_L * np.exp(1j * np.angle(Rf))
-        C_R = magC_R * np.exp(1j * np.angle(Lf))
-        SL = Lf - C_L
-        SR = Rf - C_R
-        len_orig = y.shape[-1]
-        center_l = sft.istft(C_L, k1=len_orig)
-        center_r = sft.istft(C_R, k1=len_orig)
-        side_l = sft.istft(SL, k1=len_orig)
-        side_r = sft.istft(SR, k1=len_orig)
-        mid_ch = multi_channel_array_from_arrays(center_l, center_r, index=array_index, dtype=y.dtype)
-        side_ch = multi_channel_array_from_arrays(side_l, side_r, index=array_index, dtype=y.dtype)
-        return mid_ch, side_ch
-    elif var == 4:
-        print(_i18n("mid_side_var4"))
-        mid_channel = mid_channel_one
-        side_channel = left_channel + -right_channel
-    else:
-        raise ValueError(_i18n("unknown_var", var=var))
-    return convert_to_dtype(mid_channel, orig_dtype), convert_to_dtype(side_channel, orig_dtype)
-def mid_side_to_stereo(
-    y: np.ndarray,
-    z: np.ndarray,
-    index: int = -1,
-    dtype: DTypeLike = np.float32
-) -> np.ndarray:
-    """
-    Преобразовать Mid/Side обратно в стерео
-    Args:
-        y: Mid канал
-        z: Side канал
-        index: Индекс оси
-        dtype: Тип данных
-    Returns:
-        Стерео массив
-    """
-    y, z = convert_to_dtype(y, np.float32), convert_to_dtype(z, np.float32)
-    mid = multi_channel_array_from_arrays(y, y, index=index, dtype=np.float32)
-    side = multi_channel_array_from_arrays(z, -z, index=index, dtype=np.float32)
-    return convert_to_dtype(mid + side, dtype)
-def mono_to_stereo(
-    y: np.ndarray,
-    index: int,
-    num_channels: int = 2
-) -> np.ndarray:
-    """
-    Преобразовать моно в стерео
-    Args:
-        y: Моно массив
-        index: Индекс оси
-        num_channels: Количество каналов
-    Returns:
-        Стерео массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    axis = get_axis_from_array_index(array_index)
-    new_axis = get_axis_from_array_index(index)
-    orig_dtype = y.dtype
-    if channels == 1:
-        if flatten:
-            return np.stack([y for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
-        else:
-            return np.stack([y.flatten() for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
-    else:
-        if num_channels <= channels:
-            return y
-        else:
-            for _i in range(num_channels - channels):
-                y = np.append(y, create_zero_array(samples, orig_dtype), axis=new_axis)
-            return y
-def stereo_to_mono(y: np.ndarray, to_flatten: bool = False) -> np.ndarray:
-    """
-    Преобразовать стерео в моно
-    Args:
-        y: Стерео массив
-        to_flatten: Вернуть плоский массив
-    Returns:
-        Моно массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    orig_dtype = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    if channels > 1:
-        mono = create_zero_array(samples, np.float64)
-        for ch in split_channels(y):
-            mono = mono + gain(ch, (1 / channels))
-        if not to_flatten:
-            if array_index == 0:
-                return mono.reshape((1, -1))
-            else:
-                return mono.reshape((-1, 1))
-        else:
-            return mono
-    else:
-        return y
-def multi_channel_array_from_arrays(
-    *arrays: np.ndarray,
-    index: int = -1,
-    dtype: DTypeLike
-) -> np.ndarray:
-    """
-    Создать многоканальный массив из отдельных каналов
-    Args:
-        *arrays: Массивы каналов
-        index: Индекс оси
-        dtype: Тип данных
-    Returns:
-        Многоканальный массив
-    """
-    return np.stack([convert_to_dtype(array, dtype) for array in arrays],
-                    axis=get_axis_from_array_index(index),
-                    dtype=dtype)
-def reshape(y: np.ndarray, shape: Tuple[str, ...] = ("channels", "samples")) -> np.ndarray:
-    """
-    Изменить форму аудио массива
-    Args:
-        y: Аудио массив
-        shape: Целевая форма
-    Returns:
-        Измененный массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    if shape == ("channels", "samples"):
-        if array_index == 0:
-            return y.T
-        elif array_index == 1:
-            return y
-        elif array_index is None and flatten:
-            return y.reshape((-1, 1)).T
-        else:
-            if y.shape[0] == channels:
-                return y
-            else:
-                return y.T
-    elif shape == ("samples", "channels"):
-        if array_index == 1:  # (channels, samples)
-            return y.T
-        elif array_index == 0:  # (samples, channels)
-            return y
-        elif array_index == -1 and flatten:
-            return y.reshape((-1, 1))
-        else:
-            if y.shape[0] == samples:
-                return y
-            else:
-                return y.T
-    elif shape == ("samples",):
-        if channels == 1 and not flatten:
-            return y.flatten()
-        elif flatten:
-            return y
-        else:
-            return stereo_to_mono(y, to_flatten=True)
-    else:
-        raise ValueError(f"{_i18n('unknown_shape')}: {shape}")
-def easy_resampler(y: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
-    """
-    Простой ресемплинг аудио
-    Args:
-        y: Аудио массив
-        orig_sr: Исходная частота
-        target_sr: Целевая частота
-    Returns:
-        Ресемплированный массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    orig_dtype = y.dtype
-    ratio = float(target_sr) / orig_sr
-    n_samples = int(np.ceil(samples * ratio))
-    resampled = resample(y, n_samples, axis=array_index)
-    return convert_to_dtype(resampled, orig_dtype)
-def add_zero_to_end(y: np.ndarray, max_samples: int) -> np.ndarray:
-    """
-    Добавить нули в конец массива до указанной длины
-    Args:
-        y: Аудио массив
-        max_samples: Максимальное количество сэмплов
-    Returns:
-        Дополненный массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    center_value = get_center_value_from_dtype(y.dtype)
-    if samples < max_samples:
-        if flatten:
-            pad_width = (0, max_samples - samples)
-        else:
-            if array_index == 1:
-                pad_width = ((0, 0), (0, max_samples - samples))
-            else:
-                pad_width = ((0, max_samples - samples), (0, 0))
-        return np.pad(y, pad_width, mode="constant", constant_values=center_value)
-    else:
-        return trim(y, 0, max_samples)
-def fit_arrays(
-    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
-    srs: Union[Tuple[int, ...], List[int]],
-    max_channels: int = 2,
-    min_sr: int = 44100,
-    flatten: bool = False,
-    max_samples: int = -1,
-    extend: bool = True
-) -> Tuple[np.ndarray, ...]:
-    """
-    Привести несколько массивов к единому формату
-    Args:
-        arrays: Список массивов
-        srs: Список частот дискретизации
-        max_channels: Максимальное количество каналов
-        min_sr: Минимальная частота дискретизации
-        flatten: Вернуть плоские массивы
-        max_samples: Максимальное количество сэмплов
-        extend: Дополнить до максимальной длины
-    Returns:
-        Кортеж приведенных массивов
-    """
-    if len(arrays) != len(srs):
-        raise Exception(_i18n("arrays_srs_mismatch"))
-    new_arrays = []
-    arrays_with_srs = list(zip(arrays, srs))
-    len_arrays = len(arrays_with_srs)
-    durations = [get_duration_from_array(array) for array, _c in arrays_with_srs]
-    max_samples = max(durations) if durations else 0
-    for i, (array, sr) in enumerate(arrays_with_srs, start=1):
-        channels1, samples1, array_index1, _c = get_info_array(array)
-        a1 = easy_resampler(array, sr, min_sr)
-        if flatten:
-            a1 = stereo_to_mono(a1, to_flatten=True)
-        else:
-            if max_channels >= 2:
-                a1 = mono_to_stereo(a1, array_index1, max_channels)
-            else:
-                a1 = stereo_to_mono(a1)
-        a1 = reshape(a1, shape=("channels", "samples"))
-        new_arrays.append(a1)
-        print(_i18n("fitting_progress", current=i, total=len_arrays), end="\r")
-    print("")
-    if extend:
-        for i, array_ in enumerate(new_arrays):
-            new_arrays[i] = add_zero_to_end(array_, max_samples)
-            print(_i18n("extending_progress", current=i, total=len_arrays), end="\r")
-        print("")
-    return tuple(new_arrays)
-def subtractor(
-    y: np.ndarray,
-    z: np.ndarray,
-    sr1: int,
-    sr2: int,
-    spectrogram: bool = False
-) -> Tuple[np.ndarray, int]:
-    """
-    Вычесть одно аудио из другого
-    Args:
-        y: Первое аудио
-        z: Второе аудио
-        sr1: Частота первого
-        sr2: Частота второго
-        spectrogram: Использовать спектрограмму
-    Returns:
-        Кортеж (результат, частота дискретизации)
-    """
-    channels1, _, array_index1, flatten1 = get_info_array(y)
-    channels2, _, array_index2, flatten2 = get_info_array(z)
-    orig_dtype1 = y.dtype
-    y = convert_to_dtype(y, np.float32)
-    z = convert_to_dtype(z, np.float32)
-    max_channels = max(channels1, channels2)
-    min_sr = min(sr1, sr2)
-    yz = fit_arrays([y, z], [sr1, sr2], max_channels=max_channels, min_sr=min_sr)
-    y, z = yz[0], yz[1]
-    if spectrogram:
-        print(_i18n("subtract_spectrogram"))
-        sft = get_stft_obj(min_sr, n_fft=n_fft, hop=hop)
-        res_channels = []
-        # Обрабатываем каналы по одному, чтобы не забивать RAM
-        for ch_y, ch_z in zip(split_channels(y), split_channels(z)):
-            spec_y = sft.stft(ch_y.astype(np.float32))
-            spec_z = sft.stft(ch_z.astype(np.float32))
-            # Вычитание амплитуд: Mag_res = max(Mag_y - Mag_z, 0)
-            # Сохраняем фазу сигнала 'y'
-            res_spec = np.maximum(np.abs(spec_y) - np.abs(spec_z), 0) * np.exp(1j * np.angle(spec_y))
-            del spec_y, spec_z  # Явно освобождаем память
-            res_wav = sft.istft(res_spec, k1=ch_y.shape[-1])
-            res_channels.append(res_wav)
-        subtracted = multi_channel_array_from_arrays(*res_channels, index=1, dtype=orig_dtype1)
-        return subtracted, min_sr
-    else:
-        print(_i18n("subtract_phase"))
-        return convert_to_dtype(y - z, orig_dtype1), min_sr
-def absmax(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
-    """
-    Получить элемент с максимальным абсолютным значением
-    Args:
-        a: Входной массив
-        axis: Ось
-    Returns:
-        Элемент с максимальным абсолютным значением
-    """
-    if axis is None:
-        return a.flatten()[np.argmax(np.abs(a))]
-    dims = list(a.shape)
-    dims.pop(axis)
-    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
-    argmax = np.abs(a).argmax(axis=axis)
-    indices = list(indices)
-    indices.insert(axis % len(a.shape), argmax)
-    return a[tuple(indices)]
-def absmin(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
-    """
-    Получить элемент с минимальным абсолютным значением
-    Args:
-        a: Входной массив
-        axis: Ось
-    Returns:
-        Элемент с минимальным абсолютным значением
-    """
-    if axis is None:
-        return a.flatten()[np.argmin(np.abs(a))]
-    dims = list(a.shape)
-    dims.pop(axis)
-    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
-    argmax = np.abs(a).argmin(axis=axis)
-    indices.insert((len(a.shape) + axis) % len(a.shape), argmax)
-    return a[tuple(indices)]
-def lambda_max(
-    arr: np.ndarray,
-    axis: Optional[int] = None,
-    key: Optional[Callable] = None,
-    keepdims: bool = False
-) -> np.ndarray:
-    """
-    Применить функцию максимума с ключом
-    Args:
-        arr: Входной массив
-        axis: Ось
-        key: Функция ключа
-        keepdims: Сохранить размерность
-    Returns:
-        Результат
-    """
-    if key is None:
-        key = np.abs
-    idxs = np.argmax(key(arr), axis)
-    if axis is not None:
-        idxs = np.expand_dims(idxs, axis)
-        result = np.take_along_axis(arr, idxs, axis)
-        if not keepdims:
-            result = np.squeeze(result, axis=axis)
-        return result
-    else:
-        return arr.flatten()[idxs]
-def lambda_min(
-    arr: np.ndarray,
-    axis: Optional[int] = None,
-    key: Optional[Callable] = None,
-    keepdims: bool = False
-) -> np.ndarray:
-    """
-    Применить функцию минимума с ключом
-    Args:
-        arr: Входной массив
-        axis: Ось
-        key: Функция ключа
-        keepdims: Сохранить размерность
-    Returns:
-        Результат
-    """
-    if key is None:
-        key = np.abs
-    idxs = np.argmin(key(arr), axis)
-    if axis is not None:
-        idxs = np.expand_dims(idxs, axis)
-        result = np.take_along_axis(arr, idxs, axis)
-        if not keepdims:
-            result = np.squeeze(result, axis=axis)
-        return result
-    else:
-        return arr.flatten()[idxs]
-def ensemble(
-    pred_tracks: List[np.ndarray],
-    srs: List[int],
-    weights: List[float],
-    algorithm: str,
-    dtype: np.dtype = np.float32
-) -> Tuple[np.ndarray, int]:
-    """
-    Создать ансамбль из нескольких предсказаний
-    Args:
-        pred_tracks: Список предсказаний
-        srs: Список частот дискретизации
-        weights: Веса
-        algorithm: Алгоритм объединения
-        dtype: Тип данных
-    Returns:
-        Кортеж (результат, частота дискретизации)
-    """
-    if algorithm == "min_fft":
-        max_sr = int(min(srs))
-    else:
-        max_sr = int(max(srs))
-    # Подгоняем все треки к одной длине и частоте
-    pred_tracks = list(fit_arrays(pred_tracks, srs, max_channels=2, min_sr=max_sr))
-    sft = get_stft_obj(max_sr, n_fft=2048, hop=1024)
-    final_length = pred_tracks[0].shape[-1]
-    ensemble_wav_channels = []
-    for ch_idx in range(2):  # Для каждого канала (L и R)
-        accumulator = None
-        total_weight = sum(weights)
-        for i, track in enumerate(pred_tracks):
-            # Извлекаем канал и считаем STFT
-            spec = sft.stft(track[ch_idx].astype(np.float32))
-            if algorithm == "avg_fft":
-                weighted_spec = spec * weights[i]
-                if accumulator is None:
-                    accumulator = weighted_spec
-                else:
-                    accumulator += weighted_spec
-            elif algorithm in ["min_fft", "max_fft", "median_fft"]:
-                # Для медианы и экстремумов собираем стек для одного канала
-                if i == 0:
-                    accumulator = [spec]
-                else:
-                    accumulator.append(spec)
-            del spec
-        # Финализация алгоритма
-        if algorithm == "avg_fft":
-            res_spec = accumulator / total_weight
-        elif algorithm == "median_fft":
-            res_spec = np.median(np.real(accumulator), axis=0) + 1j * np.median(np.imag(accumulator), axis=0)
-        elif algorithm == "min_fft":
-            res_spec = lambda_min(np.array(accumulator), axis=0, key=np.abs)
-        elif algorithm == "max_fft":
-            res_spec = absmax(np.array(accumulator), axis=0)
-        else:
-            raise ValueError(_i18n("unknown_algorithm", alg=algorithm))
-        ensemble_wav_channels.append(sft.istft(res_spec, k1=final_length))
-        del accumulator
-    result = multi_channel_array_from_arrays(*ensemble_wav_channels, index=1, dtype=dtype)
-    print(_i18n("ensemble_complete"))
-    return result, max_sr
-def concatenate(
-    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
-    srs: Union[Tuple[int, ...], List[int]],
-    dtype=np.float32
-) -> Tuple[np.ndarray, int]:
-    """
-    Склеить несколько аудио массивов
-    Args:
-        arrays: Список массивов
-        srs: Список частот дискретизации
-        dtype: Тип данных
-    Returns:
-        Кортеж (результат, частота дискретизации)
-    """
-    max_sr = int(max(*srs))
-    arrayss = fit_arrays([convert_to_dtype(array, np.float64) for array in arrays],
-                         srs, max_channels=2, min_sr=max_sr, extend=False)
-    result = np.concatenate(arrayss, axis=1, dtype=np.float64)
-    print(_i18n("concatenate_complete"))
-    return convert_to_dtype(result, dtype), max_sr
-def trim(y: np.ndarray, start: int = 0, end: int = -1) -> np.ndarray:
-    """
-    Обрезать аудио массив
-    Args:
-        y: Аудио массив
-        start: Начальная позиция
-        end: Конечная позиция
-    Returns:
-        Обрезанный массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    end_index = samples
-    _end = end if end > 0 and end <= end_index else end_index
-    if flatten:
-        return y[start:_end]
-    elif array_index == 0:
-        return y[start:_end, :]
-    elif array_index == 1:
-        return y[:, start:_end]
-    else:
-        return y
-def reverse(y: np.ndarray) -> np.ndarray:
-    """
-    Перевернуть аудио массив
-    Args:
-        y: Аудио массив
-    Returns:
-        Перевернутый массив
-    """
-    channels, samples, array_index, flatten = get_info_array(y)
-    if flatten:
-        return np.flip(y)
-    else:
-        return np.flip(y, axis=array_index)
-def write(
-    path: str,
-    y: np.ndarray,
-    sr: int,
-    bitrate: Union[int, str] = 320,
-    prefer_float: bool = False
-) -> str:
-    """
-    Записать аудио в файл
-    Args:
-        path: Путь для сохранения
-        y: Аудио массив
-        sr: Частота дискретизации
-        bitrate: Битрейт
-        prefer_float: Предпочитать float формат
-    Returns:
-        Путь к сохраненному файлу
-    """
-    if str2bool(os.environ.get("MVSEPLESS_WRITE_ABS", "False")):
-        path = os.path.abspath(path)
-    name, ext = os.path.splitext(path)
-    dir_path = os.path.dirname(path)
-    if dir_path != "":
-        os.makedirs(dir_path, exist_ok=True)
-    if not sr:
-        raise Exception(_i18n("sr_required"))
-    dtype = y.dtype
-    channels, *_ = get_info_array(y)
-    y = reshape(y, shape=("samples", "channels"))
-    sample_format = SAMPLE_FORMATS_DICT.get(str(dtype), None)
-    if not sample_format:
-        sample_format = "f32le"
-        y = convert_to_dtype(y, np.float32)
-    y = np.nan_to_num(y, nan=0, posinf=0, neginf=0)
-    bitrate_val = bitrate_to_int(bitrate)
-    if ext == ".ogg":
-        max_bitrate = get_ogg_bitrate(sr, channels)
-        if bitrate_val > max_bitrate:
-            print(_i18n("ogg_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
-            bitrate_val = max_bitrate
-    elif ext == ".opus":
-        max_bitrate = 256 * channels
-        if bitrate_val > max_bitrate:
-            print(_i18n("opus_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
-            bitrate_val = max_bitrate
-    bitrate_fixed = 32 if bitrate_val < 32 else 320 if bitrate_val > 320 else bitrate_val
-    cmd = [ffmpeg_path, "-y", "-f", sample_format, "-ar", str(sr), "-ac", str(channels),
-           "-i", "-", *get_codec_args(ext, prefer_float), "-ab", f"{bitrate_fixed}k", path]
-    process = subprocess.Popen(
-        cmd,
-        stdin=subprocess.PIPE,
-        stdout=None,
-        stderr=subprocess.PIPE,
-        bufsize=10**8
-    )
-    try:
-        stdout_data, stderr_data = process.communicate(input=y.tobytes())
-        if process.returncode != 0:
-            error_msg = stderr_data.decode('utf-8', errors='ignore')
-            print(_i18n("ffmpeg_error", error=error_msg))
-            raise Exception(_i18n("ffmpeg_exit_code", code=process.returncode))
-    except Exception as e:
-        print(_i18n("write_critical_error", error=str(e)))
-        process.kill()
-        raise e
-    return path
-def multiwrite(
-    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
-    srs: Union[Tuple[int, ...], List[int]],
-    paths: Union[Tuple[str, ...], List[str]],
-    bitrate: Union[int, str] = 320,
-    prefer_float: bool = False,
-    callable_func: Optional[Callable] = None,
-    strict: bool = False
-) -> Tuple[str, ...]:
-    """
-    Записать несколько аудио массивов в файлы
-    Args:
-        arrays: Список массивов
-        srs: Список частот дискретизации
-        paths: Список путей для сохранения
-        bitrate: Битрейт
-        prefer_float: Предпочитать float формат
-        callable_func: Функция обратного вызова
-        strict: Строгий режим
-    Returns:
-        Кортеж сохраненных путей
-    """
-    saved_paths = []
-    exceptions = []
-    if len(arrays) == len(srs) == len(paths):
-        save_arrays = list(zip(arrays, srs, paths))
-        for array, sr, path in save_arrays:
-            if callable_func is not None:
-                callable_func(path)
-            try:
-                saved_paths.append(write(path, array, sr, bitrate=bitrate, prefer_float=prefer_float))
-            except Exception as e:
-                if strict:
-                    raise Exception(str(e))
-                else:
-                    print(_i18n("write_error", error=str(e)))
-                    exceptions.append(str(e))
-    if not saved_paths:
-        exceptions_str = '\n'.join(exceptions)
-        raise Exception(_i18n("no_files_written", errors=exceptions_str))
     return tuple(saved_paths)

+import os
+import subprocess
+import numpy as np
+from gradio_helper import str2bool
+from scipy.signal import ShortTimeFFT, resample
+from scipy.signal.windows import dpss, hann
+from numpy.typing import DTypeLike
+from typing import List, Tuple, Optional, Union, Dict, Any, Callable
+from i18n import _i18n
+ffmpeg_path = "ffmpeg"
+ffprobe_path = "ffprobe"
+n_fft = 4096
+hop = 1024
+def average(*ints: Union[int, float]) -> float:
+    """
+    Вычислить среднее арифметическое
+    Args:
+        *ints: Числа для усреднения
+    Returns:
+        Среднее значение
+    """
+    numbers = len(ints)
+    return sum(ints) / numbers
+def check_installed() -> None:
+    """Проверить наличие ffmpeg и ffprobe"""
+    try:
+        ffmpeg_version_output = subprocess.check_output(
+            [ffmpeg_path, "-version"], text=True
+        )
+        print(_i18n("ffmpeg_found"))
+    except:
+        print(_i18n("ffmpeg_not_found"))
+    try:
+        ffprobe_version_output = subprocess.check_output(
+            [ffprobe_path, "-version"], text=True
+        )
+        print(_i18n("ffprobe_found"))
+    except:
+        print(_i18n("ffprobe_not_found"))
+def get_ogg_bitrate(sample_rate: int, channels: int = 2) -> int:
+    """
+    Определяет рекомендуемый битрейт для OGG на основе частоты дискретизации
+    Args:
+        sample_rate: Частота дискретизации
+        channels: Количество каналов
+    Returns:
+        Рекомендуемый битрейт
+    """
+    if sample_rate >= 40000:
+        per_channel = 240
+    elif sample_rate >= 26000:
+        per_channel = 190
+    elif sample_rate >= 15000:
+        per_channel = 90
+    elif sample_rate >= 9000:
+        per_channel = 50
+    elif sample_rate >= 8000:
+        per_channel = 42
+    else:
+        per_channel = 30
+    return int(per_channel * channels)
+SAMPLE_FORMATS_DICT: Dict[Union[str, type], str] = {
+    "int16": "s16le",
+    "int32": "s32le",
+    "float32": "f32le",
+    "float64": "f64le",
+    np.int16: "s16le",
+    np.int32: "s32le",
+    np.float32: "f32le",
+    np.float64: "f64le",
+}
+audio_formats: List[str] = [
+    'aac', 'ac3', 'ac4', 'adts', 'aiff', 'au', 'caf', 'dts', 'eac3',
+    'flac', 'm4a', 'mp3', 'mp2', 'ogg', 'oga', 'opus', 'ra', 'raw',
+    'snd', 'voc', 'wav', 'wma', 'wv'
+]
+video_formats_with_audio: List[str] = [
+    '3gp', '3g2', 'asf', 'avi', 'flv', 'f4v', 'm4v', 'mkv', 'mov',
+    'mp4', 'mpeg', 'mpg', 'mts', 'mxf', 'ogv', 'rm', 'rmvb', 'ts',
+    'vob', 'webm', 'wmv'
+]
+input_formats: List[str] = video_formats_with_audio + audio_formats
+output_formats: List[str] = [
+    "mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff", "wma"
+]
+input_extensions: List[str] = [f".{of}" for of in input_formats]
+output_extensions: List[str] = [f".{of}" for of in output_formats]
+codec_args: Dict[str, Dict[bool, List[str]]] = {
+    ".mp3": {
+        True: ["-c:a", "libmp3lame", "-sample_fmt", "fltp"],
+        False: ["-c:a", "libmp3lame", "-sample_fmt", "s16p"]
+    },
+    ".wav": {
+        True: ["-c:a", "pcm_f32le", "-sample_fmt", "flt"],
+        False: ["-c:a", "pcm_s16le", "-sample_fmt", "s16"]
+    },
+    ".flac": {
+        True: ["-c:a", "flac", "-sample_fmt", "s32"],
+        False: ["-c:a", "flac", "-sample_fmt", "s16"]
+    },
+    ".ogg": {
+        True: ["-c:a", "libvorbis", "-sample_fmt", "fltp"],
+        False: ["-c:a", "libvorbis", "-sample_fmt", "fltp"]
+    },
+    ".opus": {
+        True: ["-c:a", "libopus", "-sample_fmt", "flt"],
+        False: ["-c:a", "libopus", "-sample_fmt", "s16"]
+    },
+    ".m4a": {
+        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
+        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
+    },
+    ".aac": {
+        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
+        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
+    },
+    ".ac3": {
+        True: ["-c:a", "ac3", "-sample_fmt", "fltp"],
+        False: ["-c:a", "ac3", "-sample_fmt", "fltp"]
+    },
+    ".aiff": {
+        True: ["-c:a", "pcm_f32be", "-sample_fmt", "flt"],
+        False: ["-c:a", "pcm_s16be", "-sample_fmt", "s16"]
+    },
+    ".wma": {
+        True: ["-c:a", "wmav2", "-sample_fmt", "fltp"],
+        False: ["-c:a", "wmav2", "-sample_fmt", "fltp"]
+    }
+}
+def get_codec_args(extension: str, prefer_float: bool) -> List[str]:
+    """
+    Получить аргументы кодека для FFmpeg
+    Args:
+        extension: Расширение файла
+        prefer_float: Предпочитать float формат
+    Returns:
+        Список аргументов FFmpeg
+    """
+    if extension not in codec_args:
+        return []
+    return codec_args[extension][prefer_float]
+allowed_chars: str = r"1234567890"
+def sanitize_output(output: str) -> str:
+    """
+    Очистить вывод от посторонних символов
+    Args:
+        output: Выходная строка
+    Returns:
+        Очищенная строка
+    """
+    return "".join([char for char in output if char in allowed_chars])
+def get_sr(path: str, stream: int = 0) -> int:
+    """
+    Получить частоту дискретизации аудиофайла
+    Args:
+        path: Путь к файлу
+        stream: Номер аудиопотока
+    Returns:
+        Частота дискретизации
+    """
+    cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
+           "-show_entries", "stream=sample_rate", "-select_streams", f"a:{stream}",
+           "-of", "compact=p=0:nk=1"]
+    process = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    stdout, stderr = process.communicate()
+    sample_rate = stdout.decode('utf-8').strip()
+    sample_rate = sanitize_output(sample_rate)
+    if sample_rate.isdigit():
+        return int(sample_rate)
+    else:
+        print(_i18n("sr_read_error", path=path))
+        return 0
+def get_channels(path: str, stream: int = 0) -> int:
+    """
+    Получить количество каналов аудиофайла
+    Args:
+        path: Путь к файлу
+        stream: Номер аудиопотока
+    Returns:
+        Количество каналов
+    """
+    cmd = [ffprobe_path, "-i", path, "-v", "quiet", "-hide_banner",
+           "-show_entries", "stream=channels", "-select_streams", f"a:{stream}",
+           "-of", "compact=p=0:nk=1"]
+    process = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    stdout, stderr = process.communicate()
+    channels = stdout.decode('utf-8').strip()
+    channels = sanitize_output(channels)
+    if channels.isdigit():
+        return int(channels)
+    else:
+        print(_i18n("channels_read_error", path=path))
+        return 0
+def check(path: str) -> bool:
+    """
+    Проверить, является ли файл валидным аудио
+    Args:
+        path: Путь к файлу
+    Returns:
+        True если файл содержит аудио
+    """
+    channels = get_channels(path)
+    sr = get_sr(path)
+    return channels != 0 and sr != 0
+def read(
+    path: str,
+    sr: Optional[int] = None,
+    mono: bool = False,
+    dtype: DTypeLike = "float32",
+    multi_channel: bool = False,
+    num_channels: int = 2,
+    stream: int = 0,
+    flatten: bool = False
+) -> Tuple[np.ndarray, int]:
+    """
+    Прочитать аудиофайл
+    Args:
+        path: Путь к файлу
+        sr: Частота дискретизации
+        mono: Читать как моно
+        dtype: Тип данных
+        multi_channel: Многоканальный режим
+        num_channels: Количество каналов
+        stream: Номер аудиопотока
+        flatten: Вернуть плоский массив
+    Returns:
+        Кортеж (аудиоданные, частота дискретизации)
+    """
+    output_format = SAMPLE_FORMATS_DICT.get(dtype, None)
+    if not sr:
+        sr = get_sr(path, stream)
+    channels = 1 if mono else (get_channels(path, stream) if multi_channel else num_channels)
+    if not output_format:
+        output_format = "f32le"
+        cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
+               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
+        process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
+        )
+        stdout, stderr = process.communicate()
+        y = np.frombuffer(stdout, dtype=np.float32)
+        y = convert_to_dtype(y, dtype)
+    else:
+        cmd = [ffmpeg_path, "-i", path, "-map", f"0:a:{stream}", "-vn",
+               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
+        process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
+        )
+        stdout, stderr = process.communicate()
+        y = np.frombuffer(stdout, dtype=dtype)
+    if mono:
+        if flatten:
+            y = y.flatten()
+        else:
+            y = y.reshape((-1, 1)).T
+    else:
+        y = y.reshape((-1, channels)).T
+    return y.copy(), sr
+def multiread(
+    paths: Union[List[str], Tuple[str, ...]],
+    *args,
+    **kwargs
+) -> Tuple[List[np.ndarray], List[int]]:
+    """
+    Прочитать несколько аудиофайлов
+    Args:
+        paths: Список путей к файлам
+        *args: Аргументы для read
+        **kwargs: Именованные аргументы для read
+    Returns:
+        Кортеж (список аудиоданных, список частот дискретизации)
+    """
+    readed_files = []
+    srs = []
+    len_arrays = len(paths)
+    for i, path in enumerate(paths, start=1):
+        array, sr = read(path, *args, **kwargs)
+        readed_files.append(array)
+        srs.append(sr)
+        print(_i18n("reading_progress", current=i, total=len_arrays), end="\r")
+    print("")
+    return readed_files, srs
+def bitrate_to_int(a: Union[str, int, float]) -> int:
+    """
+    Преобразовать битрейт в целое число
+    Args:
+        a: Битрейт в виде строки или числа
+    Returns:
+        Битрейт как целое число
+    """
+    if isinstance(a, str):
+        if a.endswith(("k", "K")):
+            numeric_part = a[:-1]
+            if numeric_part.isdigit():
+                return int(numeric_part)
+            else:
+                print(_i18n("invalid_bitrate", bitrate=a))
+                return 320
+        else:
+            if a.isdigit():
+                return int(a)
+            else:
+                print(_i18n("invalid_bitrate", bitrate=a))
+                return 320
+    elif isinstance(a, (int, float)):
+        return int(a)
+    else:
+        return 320
+def get_info_array(y: np.ndarray) -> Tuple[int, int, Optional[int], bool]:
+    """
+    Получить информацию об аудио массиве
+    Args:
+        y: Аудио массив
+    Returns:
+        Кортеж (количество каналов, количество сэмплов, индекс оси, флаг flatten)
+    """
+    if y.ndim == 1:
+        flatten = True
+        channels = 1
+        samples = len(y)
+        array_index = -1
+    elif y.ndim == 2:
+        flatten = False
+        if y.shape[0] < y.shape[1]:
+            channels = y.shape[0]
+            samples = y.shape[1]
+            array_index = 1
+        else:
+            channels = y.shape[1]
+            samples = y.shape[0]
+            array_index = 0
+    else:
+        raise ValueError(_i18n("array_dim_error"))
+    return channels, samples, array_index, flatten
+def get_axis_from_array_index(index: int) -> int:
+    """
+    Получить ось для операций на основе индекса массива
+    Args:
+        index: Индекс массива
+    Returns:
+        Номер оси
+    """
+    if index == -1:
+        return -1
+    elif index == 1:
+        return 0
+    elif index == 0:
+        return 1
+    else:
+        return -1
+def get_duration_from_array(y: np.ndarray, sr: Optional[int] = None) -> Union[float, int]:
+    """
+    Получить длительность аудио из массива
+    Args:
+        y: Аудио массив
+        sr: Частота дискретизации
+    Returns:
+        Длительность в секундах или количество сэмплов
+    """
+    len_samples: int = get_info_array(y)[1]
+    if sr is not None:
+        return len_samples / sr
+    else:
+        return len_samples
+def is_float(y: np.ndarray) -> bool:
+    """
+    Проверить, является ли массив float типом
+    Args:
+        y: Аудио массив
+    Returns:
+        True если тип float
+    """
+    return np.issubdtype(y.dtype, np.floating)
+def is_float_dtype(dtype: DTypeLike) -> bool:
+    """
+    Проверить, является ли тип данных float
+    Args:
+        dtype: Тип данных
+    Returns:
+        True если тип float
+    """
+    return np.issubdtype(dtype, np.floating)
+def float_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
+    """
+    Преобразовать float массив в целочисленный
+    Args:
+        y: Float массив
+        dtype: Целевой тип данных
+    Returns:
+        Целочисленный массив
+    """
+    info = np.iinfo(dtype)
+    min_val = info.min
+    max_val = info.max
+    if min_val < 0:
+        y_scaled = y * max_val
+        y_rounded = np.round(y_scaled)
+        y_clipped = np.clip(y_rounded, min_val, max_val)
+        return y_clipped.astype(dtype)
+    elif min_val == 0:
+        y_normalized = (y + 1) / 2
+        y_scaled = y_normalized * max_val
+        y_rounded = np.round(y_scaled)
+        y_clipped = np.clip(y_rounded, 0, max_val)
+        return y_clipped.astype(dtype)
+    else:
+        raise ValueError(_i18n("unexpected_min_val", value=min_val))
+def int_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
+    """
+    Преобразовать целочисленный массив в другой целочисленный тип
+    Args:
+        y: Целочисленный массив
+        dtype: Целевой тип данных
+    Returns:
+        Преобразованный массив
+    """
+    info_dst = np.iinfo(dtype)
+    info_src = np.iinfo(y.dtype)
+    y_float = y.astype(np.float64)
+    src_range = info_src.max - info_src.min
+    dst_range = info_dst.max - info_dst.min
+    if src_range == 0:
+        return np.full_like(y, info_dst.min, dtype=dtype)
+    y_scaled = (y_float - info_src.min) * (dst_range / src_range) + info_dst.min
+    y_rounded = np.round(y_scaled)
+    y_clipped = np.clip(y_rounded, info_dst.min, info_dst.max)
+    return y_clipped.astype(dtype)
+def int_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
+    """
+    Преобразовать целочисленный массив в float
+    Args:
+        y: Целочисленный массив
+        dtype: Целевой тип данных
+    Returns:
+        Float массив
+    """
+    info = np.iinfo(y.dtype)
+    if info.min == 0:
+        y_normalized = (y.astype(np.float64) + -int(average(info.min, info.max))) / info.max
+    elif info.min < 0:
+        abs_max = max(abs(info.min), abs(info.max))
+        y_normalized = y.astype(np.float64) / abs_max
+    else:
+        raise ValueError(_i18n("unexpected_min_val", value=info.min))
+    return y_normalized.astype(dtype)
+def float_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
+    """
+    Преобразовать float массив в другой float тип
+    Args:
+        y: Float массив
+        dtype: Целевой тип данных
+    Returns:
+        Преобразованный массив
+    """
+    return y.astype(dtype)
+def get_center_value_from_dtype(dtype: DTypeLike) -> int:
+    """
+    Получить центральное значение для типа данных
+    Args:
+        dtype: Тип данных
+    Returns:
+        Центральное значение
+    """
+    if is_float_dtype(dtype):
+        return 0
+    else:
+        info = np.iinfo(dtype)
+        return int(average(info.min, info.max))
+def convert_to_dtype(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
+    """
+    Преобразовать массив в указанный тип данных
+    Args:
+        y: Входной массив
+        dtype: Целевой тип данных
+    Returns:
+        Преобразованный массив
+    """
+    if is_float(y):
+        if is_float_dtype(dtype):
+            return float_to_float(y, dtype)
+        else:
+            return float_to_int(y, dtype)
+    else:
+        if is_float_dtype(dtype):
+            return int_to_float(y, dtype)
+        else:
+            return int_to_int(y, dtype)
+def dc_offset(y: np.ndarray, offset: Union[float, int]) -> np.ndarray:
+    """
+    Добавить смещение постоянного тока
+    Args:
+        y: Аудио массив
+        offset: Смещение
+    Returns:
+        Массив со смещением
+    """
+    orig_dtype = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    y = y + offset
+    return convert_to_dtype(y, orig_dtype)
+def gain(y: np.ndarray, gain_value: Union[float, int]) -> np.ndarray:
+    """
+    Применить усиление к аудио
+    Args:
+        y: Аудио массив
+        gain_value: Коэффициент усиления
+    Returns:
+        Усиленный массив
+    """
+    orig_dtype = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    y = y * gain_value
+    return convert_to_dtype(y, orig_dtype)
+def normalize(y: np.ndarray, target_peak: Union[float, int] = 1.0) -> np.ndarray:
+    """
+    Нормализовать аудио по пиковому значению
+    Args:
+        y: Аудио массив
+        target_peak: Целевое пиковое значение
+    Returns:
+        Нормализованный массив
+    """
+    orig_dtype = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    current_peak = np.max(np.abs(y))
+    if current_peak > 0:
+        scaling_factor = target_peak / current_peak
+        y = y * scaling_factor
+    return convert_to_dtype(y, orig_dtype)
+def create_zero_array(samples: int, dtype: DTypeLike) -> np.ndarray:
+    """
+    Создать массив нулей с центром для типа данных
+    Args:
+        samples: Количество сэмплов
+        dtype: Тип данных
+    Returns:
+        Массив нулей
+    """
+    return np.array([get_center_value_from_dtype(dtype) for _c in range(samples)], dtype=dtype)
+def split_channels(y: np.ndarray) -> Tuple[np.ndarray, ...]:
+    """
+    Разделить многоканальное аудио на отдельные каналы
+    Args:
+        y: Аудио массив
+    Returns:
+        Кортеж массивов каналов
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    channels_arrays = []
+    if not flatten:
+        if array_index == 1:
+            for ch in range(channels):
+                channels_arrays.append(y[ch, :])
+        else:
+            for ch in range(channels):
+                channels_arrays.append(y[:, ch])
+        return tuple(channels_arrays)
+    else:
+        return (y,)
+from scipy.signal import windows
+def get_stft_obj(sr: int, n_fft: int, hop: int) -> ShortTimeFFT:
+    """
+    Создает STFT с окном DPSS для сверхточного разделения частот
+    Args:
+        sr: Частота дискретизации
+        n_fft: Размер FFT
+        hop: Шаг
+    Returns:
+        Объект ShortTimeFFT
+    """
+    win_dpss = str2bool(os.environ.get("MVSEPLESS_DPSS", "False"))
+    if win_dpss:
+        win = dpss(n_fft, NW=3, sym=False)
+    else:
+        win = hann(n_fft, sym=False)
+    return ShortTimeFFT(win, hop=hop, fs=sr, scale_to='magnitude', phase_shift=None)
+def split_mid_side(
+    y: np.ndarray,
+    var: int = 1,
+    sr: Optional[int] = None
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Разделить стерео на Mid/Side
+    Args:
+        y: Аудио массив
+        var: Вариант разделения (0-4)
+        sr: Частота дискретизации
+    Returns:
+        Кортеж (mid, side)
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    axis = get_axis_from_array_index(array_index)
+    if channels != 2:
+        raise Exception(_i18n("stereo_required"))
+    orig_dtype = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    channels_arrays = split_channels(y)
+    left_channel = channels_arrays[0]
+    right_channel = channels_arrays[1]
+    mid_channel_one = (left_channel * 0.5) + (right_channel * 0.5)
+    if var == 0:
+        print(_i18n("mid_side_var0"))
+        side_channel = np.stack([(left_channel + -mid_channel_one), (right_channel + -mid_channel_one)], axis=axis)
+        mid_channel = y + -side_channel
+    elif var == 1:
+        print(_i18n("mid_side_var1"))
+        mid_channel = np.stack([mid_channel_one, mid_channel_one], axis=axis)
+        side_channel = y + -mid_channel
+    elif var == 2:
+        print(_i18n("mid_side_var2"))
+        same_sign = (left_channel * right_channel) > 0
+        center_mono = np.where(
+            same_sign,
+            np.minimum(np.abs(left_channel), np.abs(right_channel)) * np.sign(left_channel),
+            0.0
+        )
+        mid_channel = np.stack([center_mono, center_mono], axis=axis)
+        stereo_L = left_channel - center_mono
+        stereo_R = right_channel - center_mono
+        side_channel = np.stack([stereo_L, stereo_R], axis=axis)
+    elif var == 3:
+        print(_i18n("mid_side_var3"))
+        if not sr:
+            raise Exception(_i18n("sr_required"))
+        sft = get_stft_obj(sr, n_fft=n_fft, hop=hop)
+        y_float = convert_to_dtype(y, np.float32)
+        channels = split_channels(y_float)
+        # Получаем спектры левого и правого каналов
+        Lf = sft.stft(channels[0])
+        Rf = sft.stft(channels[1])
+        # Вычисляем схожесть (когерентность)
+        similarity_L = np.real(Lf * np.conj(Rf))
+        similarity_R = np.real(Rf * np.conj(Lf))
+        mask_l = similarity_L > 0
+        mask_r = similarity_R > 0
+        magL = np.abs(Lf)
+        magR = np.abs(Rf)
+        magC_L = np.minimum(magL, magR) * mask_l
+        magC_R = np.minimum(magL, magR) * mask_r
+        C_L = magC_L * np.exp(1j * np.angle(Rf))
+        C_R = magC_R * np.exp(1j * np.angle(Lf))
+        SL = Lf - C_L
+        SR = Rf - C_R
+        len_orig = y.shape[-1]
+        center_l = sft.istft(C_L, k1=len_orig)
+        center_r = sft.istft(C_R, k1=len_orig)
+        side_l = sft.istft(SL, k1=len_orig)
+        side_r = sft.istft(SR, k1=len_orig)
+        mid_ch = multi_channel_array_from_arrays(center_l, center_r, index=array_index, dtype=y.dtype)
+        side_ch = multi_channel_array_from_arrays(side_l, side_r, index=array_index, dtype=y.dtype)
+        return mid_ch, side_ch
+    elif var == 4:
+        print(_i18n("mid_side_var4"))
+        mid_channel = mid_channel_one
+        side_channel = left_channel + -right_channel
+    else:
+        raise ValueError(_i18n("unknown_var", var=var))
+    return convert_to_dtype(mid_channel, orig_dtype), convert_to_dtype(side_channel, orig_dtype)
+def mid_side_to_stereo(
+    y: np.ndarray,
+    z: np.ndarray,
+    index: int = -1,
+    dtype: DTypeLike = np.float32
+) -> np.ndarray:
+    """
+    Преобразовать Mid/Side обратно в стерео
+    Args:
+        y: Mid канал
+        z: Side канал
+        index: Индекс оси
+        dtype: Тип данных
+    Returns:
+        Стерео массив
+    """
+    y, z = convert_to_dtype(y, np.float32), convert_to_dtype(z, np.float32)
+    mid = multi_channel_array_from_arrays(y, y, index=index, dtype=np.float32)
+    side = multi_channel_array_from_arrays(z, -z, index=index, dtype=np.float32)
+    return convert_to_dtype(mid + side, dtype)
+def mono_to_stereo(
+    y: np.ndarray,
+    index: int,
+    num_channels: int = 2
+) -> np.ndarray:
+    """
+    Преобразовать моно в стерео
+    Args:
+        y: Моно массив
+        index: Индекс оси
+        num_channels: Количество каналов
+    Returns:
+        Стерео массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    axis = get_axis_from_array_index(array_index)
+    new_axis = get_axis_from_array_index(index)
+    orig_dtype = y.dtype
+    if channels == 1:
+        if flatten:
+            return np.stack([y for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
+        else:
+            return np.stack([y.flatten() for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
+    else:
+        if num_channels <= channels:
+            return y
+        else:
+            for _i in range(num_channels - channels):
+                y = np.append(y, create_zero_array(samples, orig_dtype), axis=new_axis)
+            return y
+def stereo_to_mono(y: np.ndarray, to_flatten: bool = False) -> np.ndarray:
+    """
+    Преобразовать стерео в моно
+    Args:
+        y: Стерео массив
+        to_flatten: Вернуть плоский массив
+    Returns:
+        Моно массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    orig_dtype = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    if channels > 1:
+        mono = create_zero_array(samples, np.float64)
+        for ch in split_channels(y):
+            mono = mono + gain(ch, (1 / channels))
+        if not to_flatten:
+            if array_index == 0:
+                return mono.reshape((1, -1))
+            else:
+                return mono.reshape((-1, 1))
+        else:
+            return mono
+    else:
+        return y
+def multi_channel_array_from_arrays(
+    *arrays: np.ndarray,
+    index: int = -1,
+    dtype: DTypeLike
+) -> np.ndarray:
+    """
+    Создать многоканальный массив из отдельных каналов
+    Args:
+        *arrays: Массивы каналов
+        index: Индекс оси
+        dtype: Тип данных
+    Returns:
+        Многоканальный массив
+    """
+    return np.stack([convert_to_dtype(array, dtype) for array in arrays],
+                    axis=get_axis_from_array_index(index),
+                    dtype=dtype)
+def reshape(y: np.ndarray, shape: Tuple[str, ...] = ("channels", "samples")) -> np.ndarray:
+    """
+    Изменить форму аудио массива
+    Args:
+        y: Аудио массив
+        shape: Целевая форма
+    Returns:
+        Измененный массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    if shape == ("channels", "samples"):
+        if array_index == 0:
+            return y.T
+        elif array_index == 1:
+            return y
+        elif array_index is None and flatten:
+            return y.reshape((-1, 1)).T
+        else:
+            if y.shape[0] == channels:
+                return y
+            else:
+                return y.T
+    elif shape == ("samples", "channels"):
+        if array_index == 1:  # (channels, samples)
+            return y.T
+        elif array_index == 0:  # (samples, channels)
+            return y
+        elif array_index == -1 and flatten:
+            return y.reshape((-1, 1))
+        else:
+            if y.shape[0] == samples:
+                return y
+            else:
+                return y.T
+    elif shape == ("samples",):
+        if channels == 1 and not flatten:
+            return y.flatten()
+        elif flatten:
+            return y
+        else:
+            return stereo_to_mono(y, to_flatten=True)
+    else:
+        raise ValueError(f"{_i18n('unknown_shape')}: {shape}")
+def easy_resampler(y: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
+    """
+    Простой ресемплинг аудио
+    Args:
+        y: Аудио массив
+        orig_sr: Исходная частота
+        target_sr: Целевая частота
+    Returns:
+        Ресемплированный массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    orig_dtype = y.dtype
+    ratio = float(target_sr) / orig_sr
+    n_samples = int(np.ceil(samples * ratio))
+    resampled = resample(y, n_samples, axis=array_index)
+    return convert_to_dtype(resampled, orig_dtype)
+def add_zero_to_end(y: np.ndarray, max_samples: int) -> np.ndarray:
+    """
+    Добавить нули в конец массива до указанной длины
+    Args:
+        y: Аудио массив
+        max_samples: Максимальное количество сэмплов
+    Returns:
+        Дополненный массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    center_value = get_center_value_from_dtype(y.dtype)
+    if samples < max_samples:
+        if flatten:
+            pad_width = (0, max_samples - samples)
+        else:
+            if array_index == 1:
+                pad_width = ((0, 0), (0, max_samples - samples))
+            else:
+                pad_width = ((0, max_samples - samples), (0, 0))
+        return np.pad(y, pad_width, mode="constant", constant_values=center_value)
+    else:
+        return trim(y, 0, max_samples)
+def fit_arrays(
+    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
+    srs: Union[Tuple[int, ...], List[int]],
+    max_channels: int = 2,
+    min_sr: int = 44100,
+    flatten: bool = False,
+    max_samples: int = -1,
+    extend: bool = True
+) -> Tuple[np.ndarray, ...]:
+    """
+    Привести несколько массивов к единому формату
+    Args:
+        arrays: Список массивов
+        srs: Список частот дискретизации
+        max_channels: Максимальное количество каналов
+        min_sr: Минимальная частота дискретизации
+        flatten: Вернуть плоские массивы
+        max_samples: Максимальное количество сэмплов
+        extend: Дополнить до максимальной длины
+    Returns:
+        Кортеж приведенных массивов
+    """
+    if len(arrays) != len(srs):
+        raise Exception(_i18n("arrays_srs_mismatch"))
+    new_arrays = []
+    arrays_with_srs = list(zip(arrays, srs))
+    len_arrays = len(arrays_with_srs)
+    durations = [get_duration_from_array(array) for array, _c in arrays_with_srs]
+    max_samples = max(durations) if durations else 0
+    for i, (array, sr) in enumerate(arrays_with_srs, start=1):
+        channels1, samples1, array_index1, _c = get_info_array(array)
+        a1 = easy_resampler(array, sr, min_sr)
+        if flatten:
+            a1 = stereo_to_mono(a1, to_flatten=True)
+        else:
+            if max_channels >= 2:
+                a1 = mono_to_stereo(a1, array_index1, max_channels)
+            else:
+                a1 = stereo_to_mono(a1)
+        a1 = reshape(a1, shape=("channels", "samples"))
+        new_arrays.append(a1)
+        print(_i18n("fitting_progress", current=i, total=len_arrays), end="\r")
+    print("")
+    if extend:
+        for i, array_ in enumerate(new_arrays):
+            new_arrays[i] = add_zero_to_end(array_, max_samples)
+            print(_i18n("extending_progress", current=i, total=len_arrays), end="\r")
+        print("")
+    return tuple(new_arrays)
+def subtractor(
+    y: np.ndarray,
+    z: np.ndarray,
+    sr1: int,
+    sr2: int,
+    spectrogram: bool = False
+) -> Tuple[np.ndarray, int]:
+    """
+    Вычесть одно аудио из другого
+    Args:
+        y: Первое аудио
+        z: Второе аудио
+        sr1: Частота первого
+        sr2: Частота второго
+        spectrogram: Использовать спектрограмму
+    Returns:
+        Кортеж (результат, частота дискретизации)
+    """
+    channels1, _, array_index1, flatten1 = get_info_array(y)
+    channels2, _, array_index2, flatten2 = get_info_array(z)
+    orig_dtype1 = y.dtype
+    y = convert_to_dtype(y, np.float32)
+    z = convert_to_dtype(z, np.float32)
+    max_channels = max(channels1, channels2)
+    min_sr = min(sr1, sr2)
+    yz = fit_arrays([y, z], [sr1, sr2], max_channels=max_channels, min_sr=min_sr)
+    y, z = yz[0], yz[1]
+    if spectrogram:
+        print(_i18n("subtract_spectrogram"))
+        sft = get_stft_obj(min_sr, n_fft=n_fft, hop=hop)
+        res_channels = []
+        # Обрабатываем каналы по одному, чтобы не забивать RAM
+        for ch_y, ch_z in zip(split_channels(y), split_channels(z)):
+            spec_y = sft.stft(ch_y.astype(np.float32))
+            spec_z = sft.stft(ch_z.astype(np.float32))
+            # Вычитание амплитуд: Mag_res = max(Mag_y - Mag_z, 0)
+            # Сохраняем фазу сигнала 'y'
+            res_spec = np.maximum(np.abs(spec_y) - np.abs(spec_z), 0) * np.exp(1j * np.angle(spec_y))
+            del spec_y, spec_z  # Явно освобождаем память
+            res_wav = sft.istft(res_spec, k1=ch_y.shape[-1])
+            res_channels.append(res_wav)
+        subtracted = multi_channel_array_from_arrays(*res_channels, index=1, dtype=orig_dtype1)
+        return subtracted, min_sr
+    else:
+        print(_i18n("subtract_phase"))
+        return convert_to_dtype(y - z, orig_dtype1), min_sr
+def absmax(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
+    """
+    Получить элемент с максимальным абсолютным значением
+    Args:
+        a: Входной массив
+        axis: Ось
+    Returns:
+        Элемент с максимальным абсолютным значением
+    """
+    if axis is None:
+        return a.flatten()[np.argmax(np.abs(a))]
+    dims = list(a.shape)
+    dims.pop(axis)
+    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
+    argmax = np.abs(a).argmax(axis=axis)
+    indices = list(indices)
+    indices.insert(axis % len(a.shape), argmax)
+    return a[tuple(indices)]
+def absmin(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
+    """
+    Получить элемент с минимальным абсолютным значением
+    Args:
+        a: Входной массив
+        axis: Ось
+    Returns:
+        Элемент с минимальным абсолютным значением
+    """
+    if axis is None:
+        return a.flatten()[np.argmin(np.abs(a))]
+    dims = list(a.shape)
+    dims.pop(axis)
+    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
+    argmax = np.abs(a).argmin(axis=axis)
+    indices.insert((len(a.shape) + axis) % len(a.shape), argmax)
+    return a[tuple(indices)]
+def lambda_max(
+    arr: np.ndarray,
+    axis: Optional[int] = None,
+    key: Optional[Callable] = None,
+    keepdims: bool = False
+) -> np.ndarray:
+    """
+    Применить функцию максимума с к��ючом
+    Args:
+        arr: Входной массив
+        axis: Ось
+        key: Функция ключа
+        keepdims: Сохранить размерность
+    Returns:
+        Результат
+    """
+    if key is None:
+        key = np.abs
+    idxs = np.argmax(key(arr), axis)
+    if axis is not None:
+        idxs = np.expand_dims(idxs, axis)
+        result = np.take_along_axis(arr, idxs, axis)
+        if not keepdims:
+            result = np.squeeze(result, axis=axis)
+        return result
+    else:
+        return arr.flatten()[idxs]
+def lambda_min(
+    arr: np.ndarray,
+    axis: Optional[int] = None,
+    key: Optional[Callable] = None,
+    keepdims: bool = False
+) -> np.ndarray:
+    """
+    Применить функцию минимума с ключом
+    Args:
+        arr: Входной массив
+        axis: Ось
+        key: Функция ключа
+        keepdims: Сохранить размерность
+    Returns:
+        Результат
+    """
+    if key is None:
+        key = np.abs
+    idxs = np.argmin(key(arr), axis)
+    if axis is not None:
+        idxs = np.expand_dims(idxs, axis)
+        result = np.take_along_axis(arr, idxs, axis)
+        if not keepdims:
+            result = np.squeeze(result, axis=axis)
+        return result
+    else:
+        return arr.flatten()[idxs]
+def ensemble(
+    pred_tracks: List[np.ndarray],
+    srs: List[int],
+    weights: List[float],
+    algorithm: str,
+    dtype: np.dtype = np.float32
+) -> Tuple[np.ndarray, int]:
+    """
+    Создать ансамбль из нескольких предсказаний
+    Args:
+        pred_tracks: Список предсказаний
+        srs: Список частот дискретизации
+        weights: Веса
+        algorithm: Алгоритм объединения
+        dtype: Тип данных
+    Returns:
+        Кортеж (результат, частота дискретизации)
+    """
+    if algorithm == "min_fft":
+        max_sr = int(min(srs))
+    else:
+        max_sr = int(max(srs))
+    # Подгоняем все треки к одной длине и частоте
+    pred_tracks = list(fit_arrays(pred_tracks, srs, max_channels=2, min_sr=max_sr))
+    sft = get_stft_obj(max_sr, n_fft=2048, hop=1024)
+    final_length = pred_tracks[0].shape[-1]
+    ensemble_wav_channels = []
+    for ch_idx in range(2):  # Для каждого канала (L и R)
+        accumulator = None
+        total_weight = sum(weights)
+        for i, track in enumerate(pred_tracks):
+            # Извлекаем канал и считаем STFT
+            spec = sft.stft(track[ch_idx].astype(np.float32))
+            if algorithm == "avg_fft":
+                weighted_spec = spec * weights[i]
+                if accumulator is None:
+                    accumulator = weighted_spec
+                else:
+                    accumulator += weighted_spec
+            elif algorithm in ["min_fft", "max_fft", "median_fft"]:
+                # Для медианы и экстремумов собираем стек для одного канала
+                if i == 0:
+                    accumulator = [spec]
+                else:
+                    accumulator.append(spec)
+            del spec
+        # Финализация алгоритма
+        if algorithm == "avg_fft":
+            res_spec = accumulator / total_weight
+        elif algorithm == "median_fft":
+            res_spec = np.median(np.real(accumulator), axis=0) + 1j * np.median(np.imag(accumulator), axis=0)
+        elif algorithm == "min_fft":
+            res_spec = lambda_min(np.array(accumulator), axis=0, key=np.abs)
+        elif algorithm == "max_fft":
+            res_spec = absmax(np.array(accumulator), axis=0)
+        else:
+            raise ValueError(_i18n("unknown_algorithm", alg=algorithm))
+        ensemble_wav_channels.append(sft.istft(res_spec, k1=final_length))
+        del accumulator
+    result = multi_channel_array_from_arrays(*ensemble_wav_channels, index=1, dtype=dtype)
+    print(_i18n("ensemble_complete"))
+    return result, max_sr
+def concatenate(
+    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
+    srs: Union[Tuple[int, ...], List[int]],
+    dtype=np.float32
+) -> Tuple[np.ndarray, int]:
+    """
+    Склеить несколько аудио массивов
+    Args:
+        arrays: Список массивов
+        srs: Список частот дискретизации
+        dtype: Тип данных
+    Returns:
+        Кортеж (результат, частота дискретизации)
+    """
+    max_sr = int(max(*srs))
+    arrayss = fit_arrays([convert_to_dtype(array, np.float64) for array in arrays],
+                         srs, max_channels=2, min_sr=max_sr, extend=False)
+    result = np.concatenate(arrayss, axis=1, dtype=np.float64)
+    print(_i18n("concatenate_complete"))
+    return convert_to_dtype(result, dtype), max_sr
+def trim(y: np.ndarray, start: int = 0, end: int = -1) -> np.ndarray:
+    """
+    Обрезать аудио массив
+    Args:
+        y: Аудио массив
+        start: Начальная позиция
+        end: Конечная позиция
+    Returns:
+        Обрезанный массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    end_index = samples
+    _end = end if end > 0 and end <= end_index else end_index
+    if flatten:
+        return y[start:_end]
+    elif array_index == 0:
+        return y[start:_end, :]
+    elif array_index == 1:
+        return y[:, start:_end]
+    else:
+        return y
+def reverse(y: np.ndarray) -> np.ndarray:
+    """
+    Перевернуть аудио массив
+    Args:
+        y: Аудио массив
+    Returns:
+        Перевернутый массив
+    """
+    channels, samples, array_index, flatten = get_info_array(y)
+    if flatten:
+        return np.flip(y)
+    else:
+        return np.flip(y, axis=array_index)
+def write(
+    path: str,
+    y: np.ndarray,
+    sr: int,
+    bitrate: Union[int, str] = 320,
+    prefer_float: bool = False
+) -> str:
+    """
+    Записать аудио в файл
+    Args:
+        path: Путь для сохранения
+        y: Аудио массив
+        sr: Частота дискретизации
+        bitrate: Битрейт
+        prefer_float: Предпочитать float формат
+    Returns:
+        Путь к сохраненному файлу
+    """
+    if str2bool(os.environ.get("MVSEPLESS_WRITE_ABS", "False")):
+        path = os.path.abspath(path)
+    name, ext = os.path.splitext(path)
+    dir_path = os.path.dirname(path)
+    if dir_path != "":
+        os.makedirs(dir_path, exist_ok=True)
+    if not sr:
+        raise Exception(_i18n("sr_required"))
+    dtype = y.dtype
+    channels, *_ = get_info_array(y)
+    y = reshape(y, shape=("samples", "channels"))
+    sample_format = SAMPLE_FORMATS_DICT.get(str(dtype), None)
+    if not sample_format:
+        sample_format = "f32le"
+        y = convert_to_dtype(y, np.float32)
+    y = np.nan_to_num(y, nan=0, posinf=0, neginf=0)
+    bitrate_val = bitrate_to_int(bitrate)
+    if ext == ".ogg":
+        max_bitrate = get_ogg_bitrate(sr, channels)
+        if bitrate_val > max_bitrate:
+            print(_i18n("ogg_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
+            bitrate_val = max_bitrate
+    elif ext == ".opus":
+        max_bitrate = 256 * channels
+        if bitrate_val > max_bitrate:
+            print(_i18n("opus_bitrate_adjusted", old=bitrate_val, new=max_bitrate))
+            bitrate_val = max_bitrate
+    bitrate_fixed = 32 if bitrate_val < 32 else 320 if bitrate_val > 320 else bitrate_val
+    cmd = [ffmpeg_path, "-y", "-f", sample_format, "-ar", str(sr), "-ac", str(channels),
+           "-i", "-", *get_codec_args(ext, prefer_float), "-ab", f"{bitrate_fixed}k", path]
+    process = subprocess.Popen(
+        cmd,
+        stdin=subprocess.PIPE,
+        stdout=None,
+        stderr=subprocess.PIPE,
+        bufsize=10**8
+    )
+    try:
+        stdout_data, stderr_data = process.communicate(input=y.tobytes())
+        if process.returncode != 0:
+            error_msg = stderr_data.decode('utf-8', errors='ignore')
+            print(_i18n("ffmpeg_error", error=error_msg))
+            raise Exception(_i18n("ffmpeg_exit_code", code=process.returncode))
+    except Exception as e:
+        print(_i18n("write_critical_error", error=str(e)))
+        process.kill()
+        raise e
+    return path
+def multiwrite(
+    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]],
+    srs: Union[Tuple[int, ...], List[int]],
+    paths: Union[Tuple[str, ...], List[str]],
+    bitrate: Union[int, str] = 320,
+    prefer_float: bool = False,
+    callable_func: Optional[Callable] = None,
+    strict: bool = False
+) -> Tuple[str, ...]:
+    """
+    Записать несколько аудио массивов в файлы
+    Args:
+        arrays: Список массивов
+        srs: Список частот дискретизации
+        paths: Список путей для сохранения
+        bitrate: Битрейт
+        prefer_float: Предпочитать float формат
+        callable_func: Функция обратного вызова
+        strict: Строгий режим
+    Returns:
+        Кортеж сохраненных путей
+    """
+    saved_paths = []
+    exceptions = []
+    if len(arrays) == len(srs) == len(paths):
+        save_arrays = list(zip(arrays, srs, paths))
+        for array, sr, path in save_arrays:
+            if callable_func is not None:
+                callable_func(path)
+            try:
+                saved_paths.append(write(path, array, sr, bitrate=bitrate, prefer_float=prefer_float))
+            except Exception as e:
+                if strict:
+                    raise Exception(str(e))
+                else:
+                    print(_i18n("write_error", error=str(e)))
+                    exceptions.append(str(e))
+    if not saved_paths:
+        exceptions_str = '\n'.join(exceptions)
+        raise Exception(_i18n("no_files_written", errors=exceptions_str))
     return tuple(saved_paths)

mvsepless/custom_models.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "mbr_hybrid_aname": {
+        "model_type": "mel_band_roformer",
+        "category": "Инструментал и вокал",
+        "id": 13456,
+        "full_name": "ANAME TEST",
+        "stems": [
+            "vocals",
+            "other"
+        ],
+        "target_instrument": null,
+        "checkpoint_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/MelBandRoformer_Hybrid_Arch.pth?download=true",
+        "config_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/config_hybrid.yaml?download=true"
+    },
+    "custom_model": {
+        "model_type": "mel_band_roformer",
+        "category": "custom",
+        "id": 1,
+        "full_name": "",
+        "stems": [
+            "vocals",
+            "other"
+        ],
+        "target_instrument": null,
+        "checkpoint_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/MelBandRoformer_Hybrid_Arch.pth?download=true",
+        "config_url": "https://huggingface.co/Aname-Tommy/MelBandRoformers/resolve/main/config_hybrid.yaml?download=true"
+    }
+}

mvsepless/i18n.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

mvsepless/infer_utils.py CHANGED Viewed

@@ -1,825 +1,825 @@
-import sys
-sys.stdout.reconfigure(encoding='utf-8')
-sys.stderr.reconfigure(encoding='utf-8')
-import json
-import numpy as np
-import torch
-import torch.nn as nn
-import yaml
-import librosa
-import torch.nn.functional as F
-from ml_collections import ConfigDict
-from omegaconf import OmegaConf
-from typing import Dict, List, Tuple, Any, Optional
-from i18n import _i18n
-def load_config(model_type: str, config_path: str) -> Any:
-    """
-    Загрузить конфигурацию модели
-    Args:
-        model_type: Тип модели
-        config_path: Путь к конфигурационному файлу
-    Returns:
-        Конфигурация
-    """
-    try:
-        with open(config_path, "r") as f:
-            if model_type == "htdemucs":
-                config = OmegaConf.load(config_path)
-            else:
-                config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
-                if hasattr(config.training, "new_segment"):
-                    if hasattr(config.training, "segment"):
-                        config.training.segment = config.audio.new_chunk_size
-                if hasattr(config.audio, "new_chunk_size"):
-                    if hasattr(config.audio, "chunk_size"):
-                        config.audio.chunk_size = config.audio.new_chunk_size
-                if hasattr(config.audio, "new_dim_t"):
-                    if hasattr(config.audio, "dim_t"):
-                        config.audio.dim_t = config.audio.new_dim_t
-            return config
-    except FileNotFoundError:
-        raise FileNotFoundError(_i18n("config_not_found", path=config_path))
-    except Exception as e:
-        raise ValueError(_i18n("config_load_error", error=str(e)))
-def get_model_from_config(model_type: str, config_path: str) -> Tuple[Any, Any]:
-    """
-    Получить модель из конфигурации
-    Args:
-        model_type: Тип модели
-        config_path: Путь к конфигурации
-    Returns:
-        Кортеж (модель, конфигурация)
-    """
-    config = load_config(model_type, config_path)
-    if model_type == "mdx23c":
-        from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
-        model = TFC_TDF_net(config)
-    elif model_type == "mdxnet":
-        from models.mdx_net import MDXNet
-        model = MDXNet(**dict(config.model))
-    elif model_type == "vr":
-        from models.vr_arch import VRNet
-        model = VRNet(**dict(config.model))
-    elif model_type == "htdemucs":
-        from models.demucs4ht import get_model
-        model = get_model(config)
-    elif model_type == "mel_band_roformer":
-        if hasattr(config, "windowed"):
-            from models.windowed_roformer.model import MelBandRoformerWSA
-            model = MelBandRoformerWSA(**dict(config.model))
-        elif hasattr(config, "conformer"):
-            from models.bs_roformer import MelBandConformer
-            model = MelBandConformer(**dict(config.model))
-        else:
-            from models.bs_roformer import MelBandRoformer
-            model = MelBandRoformer(**dict(config.model))
-    elif model_type == "bs_roformer":
-        if hasattr(config, "sw"):
-            from models.bs_roformer import BSRoformer_SW
-            model = BSRoformer_SW(**dict(config.model))
-        elif hasattr(config, "fno"):
-            from models.bs_roformer import BSRoformer_FNO
-            model = BSRoformer_FNO(**dict(config.model))
-        elif hasattr(config, "hyperace"):
-            from models.bs_roformer import BSRoformerHyperACE
-            model = BSRoformerHyperACE(**dict(config.model))
-        elif hasattr(config, "hyperace2"):
-            from models.bs_roformer import BSRoformerHyperACE_2
-            model = BSRoformerHyperACE_2(**dict(config.model))
-        elif hasattr(config, "conformer"):
-            from models.bs_roformer import BSConformer
-            model = BSConformer(**dict(config.model))
-        elif hasattr(config, "conditional"):
-            from models.bs_roformer import BSRoformer_Conditional
-            model = BSRoformer_Conditional(**dict(config.model))
-        elif hasattr(config, "unwa_inst_large_2"):
-            from models.bs_roformer import BSRoformer_2
-            model = BSRoformer_2(**dict(config.model))
-        else:
-            from models.bs_roformer import BSRoformer
-            model = BSRoformer(**dict(config.model))
-    elif model_type == "bandit":
-        from models.bandit.core.model import MultiMaskMultiSourceBandSplitRNNSimple
-        model = MultiMaskMultiSourceBandSplitRNNSimple(**config.model)
-    elif model_type == "bandit_v2":
-        from models.bandit_v2.bandit import Bandit
-        model = Bandit(**config.kwargs)
-    elif model_type == "scnet_unofficial":
-        from models.scnet_unofficial import SCNet
-        model = SCNet(**config.model)
-    elif model_type == "scnet":
-        from models.scnet import SCNet
-        model = SCNet(**config.model)
-    elif model_type == 'scnet_masked':
-        from models.scnet.scnet_masked import SCNet
-        model = SCNet(**config.model)
-    elif model_type == 'scnet_tran':
-        from models.scnet.scnet_tran import SCNet_Tran
-        model = SCNet_Tran(**config.model)
-    elif model_type == 'medley_vox':
-        from models.medley_vox import load_model_with_args
-        model = load_model_with_args(config.model)
-    else:
-        raise ValueError(_i18n("unknown_model_type", model_type=model_type))
-    return model, config
-def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
-    """
-    Создать массив окна для плавного склеивания
-    Args:
-        window_size: Размер окна
-        fade_size: Размер зоны затухания
-    Returns:
-        Массив окна
-    """
-    fadein = torch.linspace(0, 1, fade_size)
-    fadeout = torch.linspace(1, 0, fade_size)
-    window = torch.ones(window_size)
-    window[-fade_size:] = fadeout
-    window[:fade_size] = fadein
-    return window
-def demix_mdxnet(
-    config: Any,
-    model: Any,
-    mix: np.ndarray,
-    device: torch.device,
-) -> Dict[str, np.ndarray]:
-    """
-    Демикс для MDXNet
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-    Returns:
-        Словарь с разделенными стемами
-    """
-    mix_tensor = torch.tensor(mix, dtype=torch.float32).to(device)
-    batch_size = 1
-    num_overlap = config.inference.num_overlap
-    denoise = config.inference.denoise
-    stem_name = model.primary_stem
-    chunk_size = model.hop_length * (model.dim_t - 1)
-    fade_size = chunk_size // 10
-    step = chunk_size // num_overlap
-    border = chunk_size - step
-    length_init = mix_tensor.shape[-1]
-    if length_init > 2 * border and border > 0:
-        wave = nn.functional.pad(mix_tensor, (border, border), mode="reflect")
-    window = _getWindowingArray(chunk_size, fade_size).to(device)
-    with torch.no_grad():
-        result = torch.zeros_like(wave, device=device)
-        counter = torch.zeros_like(wave, device=device)
-        i = 0
-        batch_data = []
-        batch_locations = []
-        total_chunks = 0
-        temp_i = 0
-        while temp_i < wave.shape[1]:
-            total_chunks += 1
-            temp_i += step
-        processed_chunks = 0
-        while i < wave.shape[1]:
-            part = wave[:, i : i + chunk_size]
-            chunk_len = part.shape[-1]
-            if chunk_len < chunk_size:
-                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
-                part = nn.functional.pad(
-                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
-                )
-            batch_data.append(part)
-            batch_locations.append((i, chunk_len))
-            i += step
-            if len(batch_data) >= batch_size or i >= wave.shape[1]:
-                arr = torch.stack(batch_data, dim=0)
-                for j, (start, seg_len) in enumerate(batch_locations):
-                    if denoise:
-                        processed_spec1 = model(model.stft(arr[j : j + 1]))
-                        processed_spec2 = model(model.stft(-(arr[j : j + 1])))
-                        processed_wav = (model.istft(processed_spec1) + -model.istft(processed_spec2)) * 0.5
-                    else:
-                        processed_spec = model(model.stft(arr[j : j + 1]))
-                        processed_wav = model.istft(processed_spec)
-                    window_segment = window[..., :seg_len]
-                    result[:, start : start + seg_len] += (
-                        processed_wav[0, :, :seg_len] * window_segment
-                    )
-                    counter[:, start : start + seg_len] += window_segment
-                processed_chunks += len(batch_data)
-                progress_data = {
-                    "processing": {
-                        "processed": min(i, wave.shape[1]),
-                        "total": wave.shape[1],
-                        "unit": _i18n("unit_samples")
-                    }
-                }
-                sys.stdout.write(
-                    json.dumps(progress_data, ensure_ascii=False) + "\n"
-                )
-                sys.stdout.flush()
-                batch_data.clear()
-                batch_locations.clear()
-        estimated_sources = result / counter
-        if length_init > 2 * border and border > 0:
-            estimated_sources = estimated_sources[..., border:-border]
-    result_separation = estimated_sources.cpu().numpy()
-    result_separation = np.nan_to_num(
-        result_separation, nan=0.0, posinf=0.0, neginf=0.0
-    )
-    return {stem_name: result_separation}
-def demix_vr(
-    config: Any,
-    model: Any,
-    mix: np.ndarray,
-    device: torch.device,
-) -> Dict[str, np.ndarray]:
-    """
-    Демикс дл�� VR
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-    Returns:
-        Словарь с разделенными стемами
-    """
-    from models.vr_arch import spec_utils, NON_ACCOM_STEMS
-    aggression = config.inference.aggression
-    sr = config.audio.sample_rate
-    aggr = float(int(aggression) / 100)
-    aggressiveness = {
-        "value": aggr,
-        "split_bin": model.model_params.param["band"][1]["crop_stop"],
-        "aggr_correction": model.model_params.param.get("aggr_correction"),
-    }
-    X_spec = model.loading_mix(mix, sr)
-    def _execute(X_mag_pad: np.ndarray, roi_size: int) -> np.ndarray:
-        X_dataset = []
-        patches = (X_mag_pad.shape[2] - 2 * model.model_run.offset) // roi_size
-        total = patches
-        for i in range(patches):
-            processed = min(i + model.batch_size, patches)
-            start = i * roi_size
-            X_mag_window = X_mag_pad[:, :, start : start + model.window_size]
-            X_dataset.append(X_mag_window)
-        total_iterations = (
-            patches // model.batch_size
-        )
-        X_dataset = np.asarray(X_dataset)
-        model.model_run.eval()
-        with torch.no_grad():
-            mask = []
-            for i in range(0, patches, model.batch_size):
-                processed = min(i + model.batch_size, patches)
-                sys.stdout.write(
-                    json.dumps(
-                        {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_patches")}},
-                        ensure_ascii=False,
-                    )
-                    + "\n"
-                )
-                sys.stdout.flush()
-                X_batch = X_dataset[i : i + model.batch_size]
-                X_batch = torch.from_numpy(X_batch).to(device)
-                pred = model.model_run.predict_mask(X_batch)
-                if not pred.size()[3] > 0:
-                    raise ValueError(
-                        _i18n("window_size_error")
-                    )
-                pred = pred.detach().cpu().numpy()
-                pred = np.concatenate(pred, axis=2)
-                mask.append(pred)
-            if len(mask) == 0:
-                raise ValueError(
-                    _i18n("window_size_error")
-                )
-            mask = np.concatenate(mask, axis=2)
-        return mask
-    def postprocess(
-        mask: np.ndarray,
-        X_mag: np.ndarray,
-        X_phase: np.ndarray
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        is_non_accom_stem = False
-        for stem in NON_ACCOM_STEMS:
-            if stem == model.primary_stem.lower():
-                is_non_accom_stem = True
-        mask = spec_utils.adjust_aggr(mask, is_non_accom_stem, aggressiveness)
-        if model.enable_post_process:
-            mask = spec_utils.merge_artifacts(
-                mask, thres=model.post_process_threshold
-            )
-        y_spec = mask * X_mag * np.exp(1.0j * X_phase)
-        v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
-        return y_spec, v_spec
-    X_mag, X_phase = spec_utils.preprocess(X_spec)
-    n_frame = X_mag.shape[2]
-    pad_l, pad_r, roi_size = spec_utils.make_padding(
-        n_frame, model.window_size, model.model_run.offset
-    )
-    X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
-    X_mag_pad /= X_mag_pad.max()
-    mask = _execute(X_mag_pad, roi_size)
-    mask = mask[:, :, :n_frame]
-    y_spec, v_spec = postprocess(mask, X_mag, X_phase)
-    y_spec = np.nan_to_num(y_spec, nan=0.0, posinf=0.0, neginf=0.0)
-    v_spec = np.nan_to_num(v_spec, nan=0.0, posinf=0.0, neginf=0.0)
-    primary_stem_array = model.spec_to_wav(y_spec).T
-    primary_stem_array = librosa.resample(
-        primary_stem_array.T,
-        orig_sr=model.model_samplerate,
-        target_sr=sr,
-    ).T
-    secondary_stem_array = model.spec_to_wav(v_spec).T
-    secondary_stem_array = librosa.resample(
-        secondary_stem_array.T,
-        orig_sr=model.model_samplerate,
-        target_sr=sr,
-    ).T
-    return {
-        model.primary_stem: primary_stem_array,
-        model.secondary_stem: secondary_stem_array,
-    }
-def demix_demucs(
-    config: Any,
-    model: Any,
-    mix: np.ndarray,
-    device: torch.device
-) -> Dict[str, np.ndarray]:
-    """
-    Демикс для Demucs
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-    Returns:
-        Словарь с разделенными стемами
-    """
-    mix = torch.tensor(mix, dtype=torch.float32)
-    chunk_size = config.training.samplerate * config.training.segment
-    num_instruments = len(config.training.instruments)
-    num_overlap = config.inference.num_overlap
-    step = chunk_size // num_overlap
-    fade_size = chunk_size // 10
-    windowing_array = _getWindowingArray(chunk_size, fade_size)
-    batch_size = config.inference.batch_size
-    use_amp = getattr(config.training, "use_amp", True)
-    with torch.cuda.amp.autocast(enabled=use_amp):
-        with torch.inference_mode():
-            req_shape = (num_instruments,) + mix.shape
-            result = torch.zeros(req_shape, dtype=torch.float32)
-            counter = torch.zeros(req_shape, dtype=torch.float32)
-            i = 0
-            batch_data = []
-            batch_locations = []
-            while i < mix.shape[1]:
-                part = mix[:, i : i + chunk_size].to(device)
-                chunk_len = part.shape[-1]
-                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
-                part = nn.functional.pad(
-                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
-                )
-                batch_data.append(part)
-                batch_locations.append((i, chunk_len))
-                i += step
-                if len(batch_data) >= batch_size or i >= mix.shape[1]:
-                    arr = torch.stack(batch_data, dim=0)
-                    x = model(arr)
-                    window = windowing_array.clone()
-                    if i - step == 0:
-                        window[:fade_size] = 1
-                    elif i >= mix.shape[1]:
-                        window[-fade_size:] = 1
-                    for j, (start, seg_len) in enumerate(batch_locations):
-                        result[..., start : start + seg_len] += (
-                            x[j, ..., :seg_len].cpu() * window[..., :seg_len]
-                        )
-                        counter[..., start : start + seg_len] += window[..., :seg_len]
-                    processed = min(i, mix.shape[1])
-                    total = mix.shape[1]
-                    sys.stdout.write(
-                        json.dumps(
-                            {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}}
-                        )
-                        + "\n"
-                    )
-                    sys.stdout.flush()
-                    batch_data.clear()
-                    batch_locations.clear()
-            estimated_sources = result / counter
-            estimated_sources = estimated_sources.cpu().numpy()
-            np.nan_to_num(estimated_sources, copy=False, nan=0.0)
-    if num_instruments <= 1:
-        return estimated_sources
-    else:
-        instruments = config.training.instruments
-        return {k: v for k, v in zip(instruments, estimated_sources)}
-def demix_generic(
-    config: ConfigDict,
-    model: torch.nn.Module,
-    mix: torch.Tensor,
-    device: torch.device,
-) -> Dict[str, np.ndarray]:
-    """
-    Общий демикс для большинства моделей
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-    Returns:
-        Словарь с разделенными стемами
-    """
-    mix = torch.tensor(mix, dtype=torch.float32)
-    chunk_size = config.audio.chunk_size
-    instruments = prefer_target_instrument(config)
-    num_instruments = len(instruments)
-    num_overlap = config.inference.num_overlap
-    fade_size = chunk_size // 10
-    step = chunk_size // num_overlap
-    border = chunk_size - step
-    length_init = mix.shape[-1]
-    windowing_array = _getWindowingArray(chunk_size, fade_size)
-    if length_init > 2 * border and border > 0:
-        mix = nn.functional.pad(mix, (border, border), mode="reflect")
-    batch_size = config.inference.batch_size
-    use_amp = getattr(config.training, "use_amp", True)
-    with torch.cuda.amp.autocast(enabled=use_amp):
-        with torch.inference_mode():
-            req_shape = (num_instruments,) + mix.shape
-            result = torch.zeros(req_shape, dtype=torch.float32)
-            counter = torch.zeros(req_shape, dtype=torch.float32)
-            i = 0
-            batch_data = []
-            batch_locations = []
-            while i < mix.shape[1]:
-                part = mix[:, i : i + chunk_size].to(device)
-                chunk_len = part.shape[-1]
-                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
-                part = nn.functional.pad(
-                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
-                )
-                batch_data.append(part)
-                batch_locations.append((i, chunk_len))
-                i += step
-                if len(batch_data) >= batch_size or i >= mix.shape[1]:
-                    arr = torch.stack(batch_data, dim=0)
-                    x = model(arr)
-                    window = windowing_array.clone()
-                    if i - step == 0:
-                        window[:fade_size] = 1
-                    elif i >= mix.shape[1]:
-                        window[-fade_size:] = 1
-                    for j, (start, seg_len) in enumerate(batch_locations):
-                        result[..., start : start + seg_len] += (
-                            x[j, ..., :seg_len].cpu() * window[..., :seg_len]
-                        )
-                        counter[..., start : start + seg_len] += window[..., :seg_len]
-                    processed = min(i, mix.shape[1])
-                    total = mix.shape[1]
-                    sys.stdout.write(
-                        json.dumps(
-                            {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}},
-                            ensure_ascii=False,
-                        )
-                        + "\n"
-                    )
-                    sys.stdout.flush()
-                    batch_data.clear()
-                    batch_locations.clear()
-            estimated_sources = result / counter
-            estimated_sources = estimated_sources.cpu().numpy()
-            np.nan_to_num(estimated_sources, copy=False, nan=0.0)
-            if length_init > 2 * border and border > 0:
-                estimated_sources = estimated_sources[..., border:-border]
-    return {k: v for k, v in zip(instruments, estimated_sources)}
-def demix_medley_vox(
-    config: ConfigDict,
-    model: Any,
-    mix: np.ndarray,
-    device: torch.device
-) -> Dict[str, np.ndarray]:
-    """
-    Демикс для Medley Vox
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-    Returns:
-        Словарь с разделенными стемами
-    """
-    import pyloudnorm as pyln
-    from models.medley_vox.loudness_utils import loudnorm, db2linear
-    stems: List[str] = config.training.instruments
-    # Корректная обработка входного аудио
-    original_shape = mix.shape
-    # Проверяем размерность и приводим к формату [каналы, сэмплы]
-    if mix.ndim == 1:  # Моно
-        mix = np.expand_dims(mix, axis=0)  # [1, samples]
-        num_channels = 1
-    elif mix.ndim == 2:
-        if mix.shape[0] <= mix.shape[1]:  # Предполагаем [каналы, сэмплы]
-            num_channels = mix.shape[0]
-        else:  # Вероятно [сэмплы, каналы]
-            mix = mix.T  # Транспонируем в [каналы, сэмплы]
-            num_channels = mix.shape[0]
-    # Параметры обработки
-    samplerate = config.model.sample_rate
-    segment_sec = config.model.seq_dur
-    chunk_size = int(samplerate * segment_sec)
-    overlap = config.inference.num_overlap
-    step = chunk_size // overlap
-    fade_size = chunk_size // 10
-    # Нормализация громкости всего микса
-    meter = pyln.Meter(model.sample_rate)
-    try:
-        # loudnorm ожидает [samples, channels] или [samples]
-        # Преобразуем для loudnorm
-        if num_channels > 1:
-            mix_for_loudnorm = mix.T  # [samples, channels]
-        else:
-            mix_for_loudnorm = mix[0]  # [samples]
-        mixture_d, adjusted_gain = loudnorm(mix_for_loudnorm, -24.0, meter)
-        # Преобразуем обратно в [channels, samples]
-        if num_channels > 1:
-            if isinstance(mixture_d, np.ndarray) and mixture_d.ndim == 2:
-                mixture_d = mixture_d.T  # [channels, samples]
-            else:
-                # Если вернулось моно, дублируем на все каналы
-                mixture_d = np.tile(mixture_d, (num_channels, 1))
-        else:
-            if mixture_d.ndim == 1:
-                mixture_d = mixture_d.reshape(1, -1)
-    except Exception as e:
-        print(_i18n("loudnorm_error", error=str(e)))
-        # Альтернативный подход - нормализация вручную
-        mixture_d = mix.copy()
-        rms = np.sqrt(np.mean(mix**2))
-        target_rms = 0.1  # -20 dB примерно
-        if rms > 0:
-            adjusted_gain = 20 * np.log10(target_rms / rms)
-            mixture_d = mix * (target_rms / rms)
-        else:
-            adjusted_gain = 0
-    length_init = mixture_d.shape[1]
-    # Подготавливаем окна для каждого стема
-    windowing_array = _getWindowingArray(chunk_size, fade_size).to(device)
-    # Результирующие массивы для каждого стема [каналы, сэмплы]
-    result_stems = {stem: np.zeros((num_channels, length_init), dtype=np.float32)
-                   for stem in stems}
-    # Преобразуем микс в тензор [каналы, сэмплы]
-    mix_tensor = torch.tensor(mixture_d, dtype=torch.float32).to(device)
-    # Счетчики для каждого стема [каналы, сэмплы]
-    counters = {stem: torch.zeros((num_channels, length_init), dtype=torch.float32, device=device)
-                for stem in stems}
-    i = 0
-    while i < length_init:
-        # Берем чанк для всех каналов одновременно [каналы, chunk_size]
-        end_idx = min(i + chunk_size, length_init)
-        chunk = mix_tensor[:, i:end_idx]
-        cur_chunk_len = chunk.shape[1]
-        # Создаем тензор для результатов этого чанка [каналы, 2, cur_chunk_len]
-        chunk_results = torch.zeros((num_channels, 2, cur_chunk_len), dtype=torch.float32, device=device)
-        # Обрабатываем каждый канал отдельно для этого чанка
-        for ch in range(num_channels):
-            # Берем один канал [1, cur_chunk_len]
-            channel_chunk = chunk[ch:ch+1, :]
-            # Паддинг если нужно
-            if cur_chunk_len < chunk_size:
-                pad_len = chunk_size - cur_chunk_len
-                channel_chunk = torch.nn.functional.pad(
-                    channel_chunk, (0, pad_len), mode='constant', value=0
-                )
-            # Добавляем batch dimension [1, 1, chunk_size]
-            channel_chunk = channel_chunk.unsqueeze(0)
-            with torch.no_grad():
-                # Модель возвращает [1, 2, chunk_size]
-                out_chunk = model.separate(channel_chunk)
-            # Сохраняем результат для этого канала (обрезаем паддинг)
-            chunk_results[ch, :, :cur_chunk_len] = out_chunk[0, :, :cur_chunk_len].cpu()
-        # Применяем окно
-        window = windowing_array[:cur_chunk_len].clone()
-        if i == 0:
-            window[:fade_size] = 1
-        if end_idx >= length_init:
-            window[-fade_size:] = 1
-        # Добавляем результаты в общие массивы
-        for stem_idx, stem in enumerate(stems):
-            result_stems[stem][:, i:end_idx] += chunk_results[:, stem_idx, :].cpu().numpy() * window.cpu().numpy()
-            counters[stem][:, i:end_idx] += window
-        i += step
-        progress_data = {
-            "processing": {
-                "processed": min(end_idx, length_init),
-                "total": length_init,
-                "unit": _i18n("unit_samples"),
-            }
-        }
-        sys.stdout.write(json.dumps(progress_data, ensure_ascii=False) + "\n")
-        sys.stdout.flush()
-    # Нормализация результатов делением на счетчик
-    for stem in stems:
-        counters_np = counters[stem].cpu().numpy()
-        # Избегаем деления на ноль
-        mask = counters_np > 0
-        result_stems[stem][mask] /= counters_np[mask]
-        # Применяем обратную нормализацию громкости
-        result_stems[stem] = result_stems[stem] * db2linear(-adjusted_gain)
-    return result_stems
-def demix(
-    config: ConfigDict,
-    model: torch.nn.Module,
-    mix: np.ndarray,
-    device: torch.device,
-    model_type: str,
-) -> Dict[str, np.ndarray]:
-    """
-    Основная функция демикса, выбирает подходящий метод в зависимости от типа модели
-    Args:
-        config: Конфигурация
-        model: Модель
-        mix: Микс
-        device: Устройство
-        model_type: Тип модели
-    Returns:
-        Словарь с разделенными стемами
-    """
-    if model_type == "vr":
-        return demix_vr(config, model, mix, device)
-    elif model_type == "mdxnet":
-        return demix_mdxnet(config, model, mix, device)
-    elif model_type == "htdemucs":
-        return demix_demucs(config, model, mix, device)
-    elif model_type == "medley_vox":
-        return demix_medley_vox(config, model, mix, device)
-    else:
-        return demix_generic(config, model, mix, device)
-def prefer_target_instrument(config: ConfigDict) -> List[str]:
-    """
-    Получить предпочтительный инструмент из конфигурации
-    Args:
-        config: Конфигурация
-    Returns:
-        Список инструментов
-    """
-    if config.training.get("target_instrument"):
-        return [config.training.target_instrument]
-    else:
-        return config.training.instruments
-def prefer_target_instrument_test(
-    config: ConfigDict, selected_instruments: Optional[List[str]] = None
-) -> List[str]:
-    """
-    Получить предпочтительный инструмент для тестирования
-    Args:
-        config: Конфигурация
-        selected_instruments: Выбранные инструменты
-    Returns:
-        Список инструментов
-    """
-    available_instruments = config.training.instruments
-    if selected_instruments is not None:
-        return [
-            instr for instr in selected_instruments if instr in available_instruments
-        ]
-    elif config.training.get("target_instrument"):
-        return [config.training.target_instrument]
-    else:
         return available_instruments

+import sys
+sys.stdout.reconfigure(encoding='utf-8')
+sys.stderr.reconfigure(encoding='utf-8')
+import json
+import numpy as np
+import torch
+import torch.nn as nn
+import yaml
+import librosa
+import torch.nn.functional as F
+from ml_collections import ConfigDict
+from omegaconf import OmegaConf
+from typing import Dict, List, Tuple, Any, Optional
+from i18n import _i18n
+def load_config(model_type: str, config_path: str) -> Any:
+    """
+    Загрузить конфигурацию модели
+    Args:
+        model_type: Тип модели
+        config_path: Путь к конфигурационному файлу
+    Returns:
+        Конфигурация
+    """
+    try:
+        with open(config_path, "r") as f:
+            if model_type == "htdemucs":
+                config = OmegaConf.load(config_path)
+            else:
+                config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader))
+                if hasattr(config.training, "new_segment"):
+                    if hasattr(config.training, "segment"):
+                        config.training.segment = config.audio.new_chunk_size
+                if hasattr(config.audio, "new_chunk_size"):
+                    if hasattr(config.audio, "chunk_size"):
+                        config.audio.chunk_size = config.audio.new_chunk_size
+                if hasattr(config.audio, "new_dim_t"):
+                    if hasattr(config.audio, "dim_t"):
+                        config.audio.dim_t = config.audio.new_dim_t
+            return config
+    except FileNotFoundError:
+        raise FileNotFoundError(_i18n("config_not_found", path=config_path))
+    except Exception as e:
+        raise ValueError(_i18n("config_load_error", error=str(e)))
+def get_model_from_config(model_type: str, config_path: str) -> Tuple[Any, Any]:
+    """
+    Получить модель из конфигурации
+    Args:
+        model_type: Тип модели
+        config_path: Путь к конфигурации
+    Returns:
+        Кортеж (модель, конфигурация)
+    """
+    config = load_config(model_type, config_path)
+    if model_type == "mdx23c":
+        from models.mdx23c_tfc_tdf_v3 import TFC_TDF_net
+        model = TFC_TDF_net(config)
+    elif model_type == "mdxnet":
+        from models.mdx_net import MDXNet
+        model = MDXNet(**dict(config.model))
+    elif model_type == "vr":
+        from models.vr_arch import VRNet
+        model = VRNet(**dict(config.model))
+    elif model_type == "htdemucs":
+        from models.demucs4ht import get_model
+        model = get_model(config)
+    elif model_type == "mel_band_roformer":
+        if hasattr(config, "windowed"):
+            from models.windowed_roformer.model import MelBandRoformerWSA
+            model = MelBandRoformerWSA(**dict(config.model))
+        elif hasattr(config, "conformer"):
+            from models.bs_roformer import MelBandConformer
+            model = MelBandConformer(**dict(config.model))
+        else:
+            from models.bs_roformer import MelBandRoformer
+            model = MelBandRoformer(**dict(config.model))
+    elif model_type == "bs_roformer":
+        if hasattr(config, "sw"):
+            from models.bs_roformer import BSRoformer_SW
+            model = BSRoformer_SW(**dict(config.model))
+        elif hasattr(config, "fno"):
+            from models.bs_roformer import BSRoformer_FNO
+            model = BSRoformer_FNO(**dict(config.model))
+        elif hasattr(config, "hyperace"):
+            from models.bs_roformer import BSRoformerHyperACE
+            model = BSRoformerHyperACE(**dict(config.model))
+        elif hasattr(config, "hyperace2"):
+            from models.bs_roformer import BSRoformerHyperACE_2
+            model = BSRoformerHyperACE_2(**dict(config.model))
+        elif hasattr(config, "conformer"):
+            from models.bs_roformer import BSConformer
+            model = BSConformer(**dict(config.model))
+        elif hasattr(config, "conditional"):
+            from models.bs_roformer import BSRoformer_Conditional
+            model = BSRoformer_Conditional(**dict(config.model))
+        elif hasattr(config, "unwa_inst_large_2"):
+            from models.bs_roformer import BSRoformer_2
+            model = BSRoformer_2(**dict(config.model))
+        else:
+            from models.bs_roformer import BSRoformer
+            model = BSRoformer(**dict(config.model))
+    elif model_type == "bandit":
+        from models.bandit.core.model import MultiMaskMultiSourceBandSplitRNNSimple
+        model = MultiMaskMultiSourceBandSplitRNNSimple(**config.model)
+    elif model_type == "bandit_v2":
+        from models.bandit_v2.bandit import Bandit
+        model = Bandit(**config.kwargs)
+    elif model_type == "scnet_unofficial":
+        from models.scnet_unofficial import SCNet
+        model = SCNet(**config.model)
+    elif model_type == "scnet":
+        from models.scnet import SCNet
+        model = SCNet(**config.model)
+    elif model_type == 'scnet_masked':
+        from models.scnet.scnet_masked import SCNet
+        model = SCNet(**config.model)
+    elif model_type == 'scnet_tran':
+        from models.scnet.scnet_tran import SCNet_Tran
+        model = SCNet_Tran(**config.model)
+    elif model_type == 'medley_vox':
+        from models.medley_vox import load_model_with_args
+        model = load_model_with_args(config.model)
+    else:
+        raise ValueError(_i18n("unknown_model_type", model_type=model_type))
+    return model, config
+def _getWindowingArray(window_size: int, fade_size: int) -> torch.Tensor:
+    """
+    Создать массив окна для плавного склеивания
+    Args:
+        window_size: Размер окна
+        fade_size: Размер зоны затухания
+    Returns:
+        Массив окна
+    """
+    fadein = torch.linspace(0, 1, fade_size)
+    fadeout = torch.linspace(1, 0, fade_size)
+    window = torch.ones(window_size)
+    window[-fade_size:] = fadeout
+    window[:fade_size] = fadein
+    return window
+def demix_mdxnet(
+    config: Any,
+    model: Any,
+    mix: np.ndarray,
+    device: torch.device,
+) -> Dict[str, np.ndarray]:
+    """
+    Демикс для MDXNet
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+    Returns:
+        Словарь с разделенными стемами
+    """
+    mix_tensor = torch.tensor(mix, dtype=torch.float32).to(device)
+    batch_size = 1
+    num_overlap = config.inference.num_overlap
+    denoise = config.inference.denoise
+    stem_name = model.primary_stem
+    chunk_size = model.hop_length * (model.dim_t - 1)
+    fade_size = chunk_size // 10
+    step = chunk_size // num_overlap
+    border = chunk_size - step
+    length_init = mix_tensor.shape[-1]
+    if length_init > 2 * border and border > 0:
+        wave = nn.functional.pad(mix_tensor, (border, border), mode="reflect")
+    window = _getWindowingArray(chunk_size, fade_size).to(device)
+    with torch.no_grad():
+        result = torch.zeros_like(wave, device=device)
+        counter = torch.zeros_like(wave, device=device)
+        i = 0
+        batch_data = []
+        batch_locations = []
+        total_chunks = 0
+        temp_i = 0
+        while temp_i < wave.shape[1]:
+            total_chunks += 1
+            temp_i += step
+        processed_chunks = 0
+        while i < wave.shape[1]:
+            part = wave[:, i : i + chunk_size]
+            chunk_len = part.shape[-1]
+            if chunk_len < chunk_size:
+                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
+                part = nn.functional.pad(
+                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
+                )
+            batch_data.append(part)
+            batch_locations.append((i, chunk_len))
+            i += step
+            if len(batch_data) >= batch_size or i >= wave.shape[1]:
+                arr = torch.stack(batch_data, dim=0)
+                for j, (start, seg_len) in enumerate(batch_locations):
+                    if denoise:
+                        processed_spec1 = model(model.stft(arr[j : j + 1]))
+                        processed_spec2 = model(model.stft(-(arr[j : j + 1])))
+                        processed_wav = (model.istft(processed_spec1) + -model.istft(processed_spec2)) * 0.5
+                    else:
+                        processed_spec = model(model.stft(arr[j : j + 1]))
+                        processed_wav = model.istft(processed_spec)
+                    window_segment = window[..., :seg_len]
+                    result[:, start : start + seg_len] += (
+                        processed_wav[0, :, :seg_len] * window_segment
+                    )
+                    counter[:, start : start + seg_len] += window_segment
+                processed_chunks += len(batch_data)
+                progress_data = {
+                    "processing": {
+                        "processed": min(i, wave.shape[1]),
+                        "total": wave.shape[1],
+                        "unit": _i18n("unit_samples")
+                    }
+                }
+                sys.stdout.write(
+                    json.dumps(progress_data, ensure_ascii=False) + "\n"
+                )
+                sys.stdout.flush()
+                batch_data.clear()
+                batch_locations.clear()
+        estimated_sources = result / counter
+        if length_init > 2 * border and border > 0:
+            estimated_sources = estimated_sources[..., border:-border]
+    result_separation = estimated_sources.cpu().numpy()
+    result_separation = np.nan_to_num(
+        result_separation, nan=0.0, posinf=0.0, neginf=0.0
+    )
+    return {stem_name: result_separation}
+def demix_vr(
+    config: Any,
+    model: Any,
+    mix: np.ndarray,
+    device: torch.device,
+) -> Dict[str, np.ndarray]:
+    """
+    Демикс для VR
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+    Returns:
+        Словарь с разделенными стемами
+    """
+    from models.vr_arch import spec_utils, NON_ACCOM_STEMS
+    aggression = config.inference.aggression
+    sr = config.audio.sample_rate
+    aggr = float(int(aggression) / 100)
+    aggressiveness = {
+        "value": aggr,
+        "split_bin": model.model_params.param["band"][1]["crop_stop"],
+        "aggr_correction": model.model_params.param.get("aggr_correction"),
+    }
+    X_spec = model.loading_mix(mix, sr)
+    def _execute(X_mag_pad: np.ndarray, roi_size: int) -> np.ndarray:
+        X_dataset = []
+        patches = (X_mag_pad.shape[2] - 2 * model.model_run.offset) // roi_size
+        total = patches
+        for i in range(patches):
+            processed = min(i + model.batch_size, patches)
+            start = i * roi_size
+            X_mag_window = X_mag_pad[:, :, start : start + model.window_size]
+            X_dataset.append(X_mag_window)
+        total_iterations = (
+            patches // model.batch_size
+        )
+        X_dataset = np.asarray(X_dataset)
+        model.model_run.eval()
+        with torch.no_grad():
+            mask = []
+            for i in range(0, patches, model.batch_size):
+                processed = min(i + model.batch_size, patches)
+                sys.stdout.write(
+                    json.dumps(
+                        {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_patches")}},
+                        ensure_ascii=False,
+                    )
+                    + "\n"
+                )
+                sys.stdout.flush()
+                X_batch = X_dataset[i : i + model.batch_size]
+                X_batch = torch.from_numpy(X_batch).to(device)
+                pred = model.model_run.predict_mask(X_batch)
+                if not pred.size()[3] > 0:
+                    raise ValueError(
+                        _i18n("window_size_error")
+                    )
+                pred = pred.detach().cpu().numpy()
+                pred = np.concatenate(pred, axis=2)
+                mask.append(pred)
+            if len(mask) == 0:
+                raise ValueError(
+                    _i18n("window_size_error")
+                )
+            mask = np.concatenate(mask, axis=2)
+        return mask
+    def postprocess(
+        mask: np.ndarray,
+        X_mag: np.ndarray,
+        X_phase: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        is_non_accom_stem = False
+        for stem in NON_ACCOM_STEMS:
+            if stem == model.primary_stem.lower():
+                is_non_accom_stem = True
+        mask = spec_utils.adjust_aggr(mask, is_non_accom_stem, aggressiveness)
+        if model.enable_post_process:
+            mask = spec_utils.merge_artifacts(
+                mask, thres=model.post_process_threshold
+            )
+        y_spec = mask * X_mag * np.exp(1.0j * X_phase)
+        v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
+        return y_spec, v_spec
+    X_mag, X_phase = spec_utils.preprocess(X_spec)
+    n_frame = X_mag.shape[2]
+    pad_l, pad_r, roi_size = spec_utils.make_padding(
+        n_frame, model.window_size, model.model_run.offset
+    )
+    X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
+    X_mag_pad /= X_mag_pad.max()
+    mask = _execute(X_mag_pad, roi_size)
+    mask = mask[:, :, :n_frame]
+    y_spec, v_spec = postprocess(mask, X_mag, X_phase)
+    y_spec = np.nan_to_num(y_spec, nan=0.0, posinf=0.0, neginf=0.0)
+    v_spec = np.nan_to_num(v_spec, nan=0.0, posinf=0.0, neginf=0.0)
+    primary_stem_array = model.spec_to_wav(y_spec).T
+    primary_stem_array = librosa.resample(
+        primary_stem_array.T,
+        orig_sr=model.model_samplerate,
+        target_sr=sr,
+    ).T
+    secondary_stem_array = model.spec_to_wav(v_spec).T
+    secondary_stem_array = librosa.resample(
+        secondary_stem_array.T,
+        orig_sr=model.model_samplerate,
+        target_sr=sr,
+    ).T
+    return {
+        model.primary_stem: primary_stem_array,
+        model.secondary_stem: secondary_stem_array,
+    }
+def demix_demucs(
+    config: Any,
+    model: Any,
+    mix: np.ndarray,
+    device: torch.device
+) -> Dict[str, np.ndarray]:
+    """
+    Демикс для Demucs
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+    Returns:
+        Словарь с разделенными стемами
+    """
+    mix = torch.tensor(mix, dtype=torch.float32)
+    chunk_size = config.training.samplerate * config.training.segment
+    num_instruments = len(config.training.instruments)
+    num_overlap = config.inference.num_overlap
+    step = chunk_size // num_overlap
+    fade_size = chunk_size // 10
+    windowing_array = _getWindowingArray(chunk_size, fade_size)
+    batch_size = config.inference.batch_size
+    use_amp = getattr(config.training, "use_amp", True)
+    with torch.cuda.amp.autocast(enabled=use_amp):
+        with torch.inference_mode():
+            req_shape = (num_instruments,) + mix.shape
+            result = torch.zeros(req_shape, dtype=torch.float32)
+            counter = torch.zeros(req_shape, dtype=torch.float32)
+            i = 0
+            batch_data = []
+            batch_locations = []
+            while i < mix.shape[1]:
+                part = mix[:, i : i + chunk_size].to(device)
+                chunk_len = part.shape[-1]
+                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
+                part = nn.functional.pad(
+                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
+                )
+                batch_data.append(part)
+                batch_locations.append((i, chunk_len))
+                i += step
+                if len(batch_data) >= batch_size or i >= mix.shape[1]:
+                    arr = torch.stack(batch_data, dim=0)
+                    x = model(arr)
+                    window = windowing_array.clone()
+                    if i - step == 0:
+                        window[:fade_size] = 1
+                    elif i >= mix.shape[1]:
+                        window[-fade_size:] = 1
+                    for j, (start, seg_len) in enumerate(batch_locations):
+                        result[..., start : start + seg_len] += (
+                            x[j, ..., :seg_len].cpu() * window[..., :seg_len]
+                        )
+                        counter[..., start : start + seg_len] += window[..., :seg_len]
+                    processed = min(i, mix.shape[1])
+                    total = mix.shape[1]
+                    sys.stdout.write(
+                        json.dumps(
+                            {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}}
+                        )
+                        + "\n"
+                    )
+                    sys.stdout.flush()
+                    batch_data.clear()
+                    batch_locations.clear()
+            estimated_sources = result / counter
+            estimated_sources = estimated_sources.cpu().numpy()
+            np.nan_to_num(estimated_sources, copy=False, nan=0.0)
+    if num_instruments <= 1:
+        return estimated_sources
+    else:
+        instruments = config.training.instruments
+        return {k: v for k, v in zip(instruments, estimated_sources)}
+def demix_generic(
+    config: ConfigDict,
+    model: torch.nn.Module,
+    mix: torch.Tensor,
+    device: torch.device,
+) -> Dict[str, np.ndarray]:
+    """
+    Общий демикс для большинства моделей
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+    Returns:
+        Словарь с разделенными стемами
+    """
+    mix = torch.tensor(mix, dtype=torch.float32)
+    chunk_size = config.audio.chunk_size
+    instruments = prefer_target_instrument(config)
+    num_instruments = len(instruments)
+    num_overlap = config.inference.num_overlap
+    fade_size = chunk_size // 10
+    step = chunk_size // num_overlap
+    border = chunk_size - step
+    length_init = mix.shape[-1]
+    windowing_array = _getWindowingArray(chunk_size, fade_size)
+    if length_init > 2 * border and border > 0:
+        mix = nn.functional.pad(mix, (border, border), mode="reflect")
+    batch_size = config.inference.batch_size
+    use_amp = getattr(config.training, "use_amp", True)
+    with torch.cuda.amp.autocast(enabled=use_amp):
+        with torch.inference_mode():
+            req_shape = (num_instruments,) + mix.shape
+            result = torch.zeros(req_shape, dtype=torch.float32)
+            counter = torch.zeros(req_shape, dtype=torch.float32)
+            i = 0
+            batch_data = []
+            batch_locations = []
+            while i < mix.shape[1]:
+                part = mix[:, i : i + chunk_size].to(device)
+                chunk_len = part.shape[-1]
+                pad_mode = "reflect" if chunk_len > chunk_size // 2 else "constant"
+                part = nn.functional.pad(
+                    part, (0, chunk_size - chunk_len), mode=pad_mode, value=0
+                )
+                batch_data.append(part)
+                batch_locations.append((i, chunk_len))
+                i += step
+                if len(batch_data) >= batch_size or i >= mix.shape[1]:
+                    arr = torch.stack(batch_data, dim=0)
+                    x = model(arr)
+                    window = windowing_array.clone()
+                    if i - step == 0:
+                        window[:fade_size] = 1
+                    elif i >= mix.shape[1]:
+                        window[-fade_size:] = 1
+                    for j, (start, seg_len) in enumerate(batch_locations):
+                        result[..., start : start + seg_len] += (
+                            x[j, ..., :seg_len].cpu() * window[..., :seg_len]
+                        )
+                        counter[..., start : start + seg_len] += window[..., :seg_len]
+                    processed = min(i, mix.shape[1])
+                    total = mix.shape[1]
+                    sys.stdout.write(
+                        json.dumps(
+                            {"processing": {"processed": processed, "total": total, "unit": _i18n("unit_samples")}},
+                            ensure_ascii=False,
+                        )
+                        + "\n"
+                    )
+                    sys.stdout.flush()
+                    batch_data.clear()
+                    batch_locations.clear()
+            estimated_sources = result / counter
+            estimated_sources = estimated_sources.cpu().numpy()
+            np.nan_to_num(estimated_sources, copy=False, nan=0.0)
+            if length_init > 2 * border and border > 0:
+                estimated_sources = estimated_sources[..., border:-border]
+    return {k: v for k, v in zip(instruments, estimated_sources)}
+def demix_medley_vox(
+    config: ConfigDict,
+    model: Any,
+    mix: np.ndarray,
+    device: torch.device
+) -> Dict[str, np.ndarray]:
+    """
+    Демикс для Medley Vox
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+    Returns:
+        Словарь с разделенными стемами
+    """
+    import pyloudnorm as pyln
+    from models.medley_vox.loudness_utils import loudnorm, db2linear
+    stems: List[str] = config.training.instruments
+    # Корректная обработка входного аудио
+    original_shape = mix.shape
+    # Проверяем размерность и приводим к формату [каналы, сэмплы]
+    if mix.ndim == 1:  # Моно
+        mix = np.expand_dims(mix, axis=0)  # [1, samples]
+        num_channels = 1
+    elif mix.ndim == 2:
+        if mix.shape[0] <= mix.shape[1]:  # Предполагаем [каналы, сэмплы]
+            num_channels = mix.shape[0]
+        else:  # Вероятно [сэмплы, каналы]
+            mix = mix.T  # Транспонируем в [каналы, сэмплы]
+            num_channels = mix.shape[0]
+    # Параметры обработки
+    samplerate = config.model.sample_rate
+    segment_sec = config.model.seq_dur
+    chunk_size = int(samplerate * segment_sec)
+    overlap = config.inference.num_overlap
+    step = chunk_size // overlap
+    fade_size = chunk_size // 10
+    # Нормализация громкости всего микса
+    meter = pyln.Meter(model.sample_rate)
+    try:
+        # loudnorm ожидает [samples, channels] или [samples]
+        # Преобразуем для loudnorm
+        if num_channels > 1:
+            mix_for_loudnorm = mix.T  # [samples, channels]
+        else:
+            mix_for_loudnorm = mix[0]  # [samples]
+        mixture_d, adjusted_gain = loudnorm(mix_for_loudnorm, -24.0, meter)
+        # Преобразуем обратно в [channels, samples]
+        if num_channels > 1:
+            if isinstance(mixture_d, np.ndarray) and mixture_d.ndim == 2:
+                mixture_d = mixture_d.T  # [channels, samples]
+            else:
+                # Если вернулось моно, дублируем на все каналы
+                mixture_d = np.tile(mixture_d, (num_channels, 1))
+        else:
+            if mixture_d.ndim == 1:
+                mixture_d = mixture_d.reshape(1, -1)
+    except Exception as e:
+        print(_i18n("loudnorm_error", error=str(e)))
+        # Альтернативный подход - нормализация вручную
+        mixture_d = mix.copy()
+        rms = np.sqrt(np.mean(mix**2))
+        target_rms = 0.1  # -20 dB примерно
+        if rms > 0:
+            adjusted_gain = 20 * np.log10(target_rms / rms)
+            mixture_d = mix * (target_rms / rms)
+        else:
+            adjusted_gain = 0
+    length_init = mixture_d.shape[1]
+    # Подготавливаем окна для каждого стема
+    windowing_array = _getWindowingArray(chunk_size, fade_size).to(device)
+    # Результирующие массивы для каждого стема [каналы, сэмплы]
+    result_stems = {stem: np.zeros((num_channels, length_init), dtype=np.float32)
+                   for stem in stems}
+    # Преобразуем микс в тензор [каналы, сэмплы]
+    mix_tensor = torch.tensor(mixture_d, dtype=torch.float32).to(device)
+    # Счетчики для каждого стема [каналы, сэмплы]
+    counters = {stem: torch.zeros((num_channels, length_init), dtype=torch.float32, device=device)
+                for stem in stems}
+    i = 0
+    while i < length_init:
+        # Берем чанк для всех каналов одновременно [каналы, chunk_size]
+        end_idx = min(i + chunk_size, length_init)
+        chunk = mix_tensor[:, i:end_idx]
+        cur_chunk_len = chunk.shape[1]
+        # Создаем тензор для результатов этого чанка [каналы, 2, cur_chunk_len]
+        chunk_results = torch.zeros((num_channels, 2, cur_chunk_len), dtype=torch.float32, device=device)
+        # Обрабатываем каждый канал отдельно для этого чанка
+        for ch in range(num_channels):
+            # Берем один канал [1, cur_chunk_len]
+            channel_chunk = chunk[ch:ch+1, :]
+            # Паддинг если нужно
+            if cur_chunk_len < chunk_size:
+                pad_len = chunk_size - cur_chunk_len
+                channel_chunk = torch.nn.functional.pad(
+                    channel_chunk, (0, pad_len), mode='constant', value=0
+                )
+            # Добавляем batch dimension [1, 1, chunk_size]
+            channel_chunk = channel_chunk.unsqueeze(0)
+            with torch.no_grad():
+                # Модель возвращает [1, 2, chunk_size]
+                out_chunk = model.separate(channel_chunk)
+            # Сохраняем результат для этого канала (обрезаем паддинг)
+            chunk_results[ch, :, :cur_chunk_len] = out_chunk[0, :, :cur_chunk_len].cpu()
+        # Применяем окно
+        window = windowing_array[:cur_chunk_len].clone()
+        if i == 0:
+            window[:fade_size] = 1
+        if end_idx >= length_init:
+            window[-fade_size:] = 1
+        # Добавляем результаты в общие массивы
+        for stem_idx, stem in enumerate(stems):
+            result_stems[stem][:, i:end_idx] += chunk_results[:, stem_idx, :].cpu().numpy() * window.cpu().numpy()
+            counters[stem][:, i:end_idx] += window
+        i += step
+        progress_data = {
+            "processing": {
+                "processed": min(end_idx, length_init),
+                "total": length_init,
+                "unit": _i18n("unit_samples"),
+            }
+        }
+        sys.stdout.write(json.dumps(progress_data, ensure_ascii=False) + "\n")
+        sys.stdout.flush()
+    # Нормализация результатов делением на счетчик
+    for stem in stems:
+        counters_np = counters[stem].cpu().numpy()
+        # Избегаем деления на ноль
+        mask = counters_np > 0
+        result_stems[stem][mask] /= counters_np[mask]
+        # Применяем обратную нормализацию громкости
+        result_stems[stem] = result_stems[stem] * db2linear(-adjusted_gain)
+    return result_stems
+def demix(
+    config: ConfigDict,
+    model: torch.nn.Module,
+    mix: np.ndarray,
+    device: torch.device,
+    model_type: str,
+) -> Dict[str, np.ndarray]:
+    """
+    Основная функция демикса, выбирает подходящий метод в зависимости от типа модели
+    Args:
+        config: Конфигурация
+        model: Модель
+        mix: Микс
+        device: Устройство
+        model_type: Тип модели
+    Returns:
+        Словарь с разделенными стемами
+    """
+    if model_type == "vr":
+        return demix_vr(config, model, mix, device)
+    elif model_type == "mdxnet":
+        return demix_mdxnet(config, model, mix, device)
+    elif model_type == "htdemucs":
+        return demix_demucs(config, model, mix, device)
+    elif model_type == "medley_vox":
+        return demix_medley_vox(config, model, mix, device)
+    else:
+        return demix_generic(config, model, mix, device)
+def prefer_target_instrument(config: ConfigDict) -> List[str]:
+    """
+    Получить предпочтительный инструмент из конфигурации
+    Args:
+        config: Конфигурация
+    Returns:
+        Список инструментов
+    """
+    if config.training.get("target_instrument"):
+        return [config.training.target_instrument]
+    else:
+        return config.training.instruments
+def prefer_target_instrument_test(
+    config: ConfigDict, selected_instruments: Optional[List[str]] = None
+) -> List[str]:
+    """
+    Получить предпочтительный инструмент для тестирования
+    Args:
+        config: Конфигурация
+        selected_instruments: Выбранные инструменты
+    Returns:
+        Список инструментов
+    """
+    available_instruments = config.training.instruments
+    if selected_instruments is not None:
+        return [
+            instr for instr in selected_instruments if instr in available_instruments
+        ]
+    elif config.training.get("target_instrument"):
+        return [config.training.target_instrument]
+    else:
         return available_instruments

mvsepless/install.py CHANGED Viewed

@@ -1,356 +1,356 @@
-import os
-import subprocess
-import argparse
-import re
-import sys
-from typing import List, Optional, Tuple, Union
-from i18n import _i18n
-def get_latest_version(package_name: str, index_url: Optional[str] = None) -> Optional[str]:
-    """
-    Получает последнюю версию пакета из вывода pip index versions
-    Args:
-        package_name: Имя пакета
-        index_url: URL индекса пакетов
-    Returns:
-        Последняя версия пакета или None
-    """
-    cmd = [sys.executable, "-m", "pip", "index", "versions", package_name]
-    if index_url:
-        cmd.extend(["--index-url", index_url])
-    try:
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            check=False  # Не вызываем исключение при ошибке
-        )
-        if result.returncode != 0:
-            print(_i18n("pip_index_warning", code=result.returncode))
-            print(f"stderr: {result.stderr}")
-            return None
-    except Exception as e:
-        print(_i18n("pip_index_error", error=str(e)))
-        return None
-    def parse_version_from_output(pip_output: str) -> Optional[str]:
-        """
-        Парсит версию из вывода pip
-        Args:
-            pip_output: Вывод pip
-        Returns:
-            Версия пакета или None
-        """
-        if not pip_output:
-            return None
-        lines = pip_output.split('\n')
-        # Способ 1: Парсим первую строку
-        if lines and lines[0].strip():
-            first_line = lines[0].strip()
-            # Версия в скобках (приоритетный способ)
-            match = re.search(r'\(([^)]+)\)', first_line)
-            if match:
-                version = match.group(1).strip()
-                return version
-            # Версия после пробела
-            match = re.search(r'\S+\s+([^\s]+)', first_line)
-            if match:
-                version = match.group(1).strip()
-                # Проверяем, что это похоже на версию (содержит цифры)
-                if re.search(r'\d', version):
-                    return version
-        # Способ 2: Ищем "Available versions:" и берем первую версию
-        for i, line in enumerate(lines):
-            if 'Available versions:' in line:
-                # Проверяем следующие несколько строк на наличие версий
-                for j in range(1, 4):  # Проверяем следующие 3 строки
-                    if i + j < len(lines):
-                        versions_line = lines[i + j].strip()
-                        if versions_line:
-                            # Разделяем по запятой и берем первую версию
-                            versions = [v.strip() for v in versions_line.split(',') if v.strip()]
-                            if versions:
-                                return versions[0]
-                break
-        return None
-    latest_version = parse_version_from_output(result.stdout)
-    print(_i18n("version_retrieved", package=package_name, version=latest_version or _i18n("unknown")))
-    return latest_version
-def fno_compitable(index_url: Optional[str] = None) -> bool:
-    """
-    Проверяет совместимость с FNO (Fourier Neural Operator)
-    Args:
-        index_url: URL индекса пакетов
-    Returns:
-        True если совместимо
-    """
-    is_torch_2 = False
-    fno_c = False
-    latest_version_torch = get_latest_version("torch", index_url)
-    if not latest_version_torch:
-        print(_i18n("torch_version_not_found"))
-        return False
-    lvt = latest_version_torch.split(".")
-    lvt = [int(n_) for n_ in lvt if n_.isdigit()]
-    for i, num in enumerate(lvt, start=1):
-        if i == 1:
-            if num == 2:
-                is_torch_2 = True
-        elif i == 2:
-            if num >= 4 and is_torch_2:
-                fno_c = True
-    return fno_c
-def is_nvidia_gpu_present() -> bool:
-    """
-    Проверяет наличие NVIDIA GPU в системе
-    Returns:
-        True если GPU обнаружен
-    """
-    try:
-        # Пытаемся выполнить команду nvidia-smi
-        result = subprocess.run(
-            ['nvidia-smi'],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            check=False  # Не поднимаем исключение при ошибке
-        )
-        # Если код возврата 0 — ко��анда выполнилась успешно
-        if result.returncode == 0:
-            print(_i18n("nvidia_gpu_detected"))
-            return True
-        else:
-            print(_i18n("nvidia_smi_error"))
-            return False
-    except FileNotFoundError:
-        # Команда nvidia-smi не найдена в системе
-        print(_i18n("nvidia_smi_not_found"))
-        return False
-    except Exception as e:
-        print(_i18n("nvidia_check_error", error=str(e)))
-        return False
-cuda_available: bool = is_nvidia_gpu_present()
-def install_uv() -> None:
-    """Устанавливает uv - быстрый установщик пакетов Python"""
-    print(_i18n("installing_uv"))
-    result = subprocess.run([sys.executable, "-m", "pip", "install", "uv"])
-    if result.returncode == 0:
-        print(_i18n("uv_installed"))
-    else:
-        print(_i18n("uv_install_error"))
-def install_requirements(requirements: List[str], force: bool = False, index_url: Optional[str] = None) -> None:
-    """
-    Устанавливает зависимости
-    Args:
-        requirements: Список зависимостей
-        force: Принудительная установка
-        index_url: URL индекса пакетов
-    """
-    if not requirements:
-        return
-    cmd = [sys.executable, "-m", "uv", "pip", "install", "--no-cache-dir", "-qq"]
-    if force:
-        cmd.append("--upgrade")
-        cmd.append("--force-reinstall")
-    if index_url:
-        cmd.extend(["--index-url", index_url])
-    for pkg in requirements:
-        cmd.append(pkg)
-    result = subprocess.run(cmd)
-    if result.returncode != 0:
-        print(_i18n("requirements_install_error", count=len(requirements)))
-# Списки зависимостей
-torch_requirements: List[str] = [
-    "torch",
-    "torchvision",
-    "torchaudio",
-    "torchcrepe",
-]
-universal_requirements: List[str] = [
-    "numpy==2.0.2",
-    "pandas",
-    "scipy",
-    "librosa",
-    "samplerate==0.1.0",
-    "matplotlib",
-    "tqdm",
-    "einops",
-    "protobuf",
-    "soundfile",
-    "pydub",
-    "webrtcvad",
-    "audiomentations",
-    "pedalboard",
-    "ml_collections",
-    "timm",
-    "wandb",
-    "accelerate",
-    "bitsandbytes",
-    "tokenizers",
-    "huggingface-hub",
-    "transformers",
-    "torchseg",
-    "demucs==4.0.0",
-    "asteroid>=0.6.0",
-    "pyloudnorm",
-    "prodigyopt",
-    "torch_log_wmse",
-    "rotary_embedding_torch",
-    "gradio<6.0",
-    "omegaconf",
-    "beartype",
-    "spafe",
-    "torch_audiomentations",
-    "auraloss",
-    "onnx>=1.17",
-    "onnx2torch>=0.3.0",
-    "onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
-    "ml_dtypes",
-    "resampy",
-    "yt_dlp",
-    "pyngrok",
-    "praat-parselmouth",
-    "faiss-cpu==1.11",
-    "local-attention",
-    "tenacity",
-    "pyworld",
-    "gdown"
-]
-torch_old_requirements: List[str] = [
-    "torch==1.13.1",
-    "torchvision==0.14.1",
-    "torchaudio==0.13.1",
-    "torchcrepe==0.0.24",
-]
-old_requirements: List[str] = [
-    "numpy==1.26.4",
-    "pandas==2.3.3",
-    "scipy==1.15.3",
-    "librosa==0.11.0",
-    "samplerate==0.1.0",
-    "matplotlib==3.10.8",
-    "tqdm==4.67.1",
-    "einops==0.8.1",
-    "protobuf==6.33.4",
-    "soundfile==0.13.1",
-    "pydub==0.25.1",
-    "webrtcvad==2.0.10",
-    "audiomentations==0.43.1",
-    "pedalboard==0.8.2",
-    "ml_collections==1.1.0",
-    "timm==1.0.24",
-    "wandb==0.24.0",
-    "accelerate==1.2.1",
-    "bitsandbytes==0.45.0",
-    "tokenizers==0.15.2",
-    "huggingface-hub==0.34.2",
-    "transformers==4.39.3",
-    "torchseg==0.0.1a4",
-    "demucs==4.0.0",
-    "asteroid==0.6.0",
-    "pyloudnorm",
-    "prodigyopt==1.1.2",
-    "rotary_embedding_torch==0.3.6",
-    "gradio<6.0.0",
-    "omegaconf==2.3.0",
-    "beartype==0.22.9",
-    "spafe==0.3.3",
-    "torch_audiomentations==0.12.0",
-    "auraloss==0.4.0",
-    "onnx>=1.17",
-    "onnx2torch>=0.3.0",
-    "onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
-    "ml_dtypes==0.5.4",
-    "resampy==0.4.3",
-    "yt_dlp",
-    "pyngrok",
-    "praat-parselmouth==0.4.7",
-    "faiss-cpu==1.7.2",
-    "local-attention==1.10.0",
-    "tenacity==9.1.2",
-    "pyworld==0.3.5",
-    "gdown"
-]
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description=_i18n("installer_description"))
-    parser.add_argument("--old", action="store_true", help=_i18n("old_deps_help"))
-    parser.add_argument("--force", action="store_true", help=_i18n("force_install_help"))
-    parser.add_argument("--index_url", type=str, default=None, help=_i18n("index_url_help"))
-    args = parser.parse_args()
-    if args.old:
-        torch_reqs = torch_old_requirements
-        reqs = old_requirements
-        print(_i18n("installing_old_deps"))
-    else:
-        torch_reqs = torch_requirements
-        reqs = universal_requirements
-        if fno_compitable(args.index_url):
-            reqs.append("neuraloperator==1.0.2")
-            print(_i18n("fno_compatible_detected"))
-    if args.force:
-        print(_i18n("force_install_warning"))
-    install_uv()
-    print(_i18n("installing_torch"))
-    install_requirements(torch_reqs, force=args.force, index_url=args.index_url)
-    print(_i18n("installing_other_deps"))
-    install_requirements(reqs, force=args.force)
-    print(_i18n("installing_setuptools"))
-    install_requirements(["setuptools<76.0"], force=True)
     print(_i18n("installation_complete"))

+import os
+import subprocess
+import argparse
+import re
+import sys
+from typing import List, Optional, Tuple, Union
+from i18n import _i18n
+def get_latest_version(package_name: str, index_url: Optional[str] = None) -> Optional[str]:
+    """
+    Получает последнюю версию пакета из вывода pip index versions
+    Args:
+        package_name: Имя пакета
+        index_url: URL индекса пакетов
+    Returns:
+        Последняя версия пакета или None
+    """
+    cmd = [sys.executable, "-m", "pip", "index", "versions", package_name]
+    if index_url:
+        cmd.extend(["--index-url", index_url])
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            check=False  # Не вызываем исключение при ошибке
+        )
+        if result.returncode != 0:
+            print(_i18n("pip_index_warning", code=result.returncode))
+            print(f"stderr: {result.stderr}")
+            return None
+    except Exception as e:
+        print(_i18n("pip_index_error", error=str(e)))
+        return None
+    def parse_version_from_output(pip_output: str) -> Optional[str]:
+        """
+        Парсит версию из вывода pip
+        Args:
+            pip_output: Вывод pip
+        Returns:
+            Версия пакета или None
+        """
+        if not pip_output:
+            return None
+        lines = pip_output.split('\n')
+        # Способ 1: Парсим первую строку
+        if lines and lines[0].strip():
+            first_line = lines[0].strip()
+            # Версия в скобках (приоритетный способ)
+            match = re.search(r'\(([^)]+)\)', first_line)
+            if match:
+                version = match.group(1).strip()
+                return version
+            # Версия после пробела
+            match = re.search(r'\S+\s+([^\s]+)', first_line)
+            if match:
+                version = match.group(1).strip()
+                # Проверяем, что это похоже на версию (содержит цифры)
+                if re.search(r'\d', version):
+                    return version
+        # Способ 2: Ищем "Available versions:" и берем первую версию
+        for i, line in enumerate(lines):
+            if 'Available versions:' in line:
+                # Проверяем следующие несколько строк на наличие версий
+                for j in range(1, 4):  # Проверяем следующие 3 строки
+                    if i + j < len(lines):
+                        versions_line = lines[i + j].strip()
+                        if versions_line:
+                            # Разделяем по запятой и берем первую версию
+                            versions = [v.strip() for v in versions_line.split(',') if v.strip()]
+                            if versions:
+                                return versions[0]
+                break
+        return None
+    latest_version = parse_version_from_output(result.stdout)
+    print(_i18n("version_retrieved", package=package_name, version=latest_version or _i18n("unknown")))
+    return latest_version
+def fno_compitable(index_url: Optional[str] = None) -> bool:
+    """
+    Проверяет совместимость с FNO (Fourier Neural Operator)
+    Args:
+        index_url: URL индекса пакетов
+    Returns:
+        True если совместимо
+    """
+    is_torch_2 = False
+    fno_c = False
+    latest_version_torch = get_latest_version("torch", index_url)
+    if not latest_version_torch:
+        print(_i18n("torch_version_not_found"))
+        return False
+    lvt = latest_version_torch.split(".")
+    lvt = [int(n_) for n_ in lvt if n_.isdigit()]
+    for i, num in enumerate(lvt, start=1):
+        if i == 1:
+            if num == 2:
+                is_torch_2 = True
+        elif i == 2:
+            if num >= 4 and is_torch_2:
+                fno_c = True
+    return fno_c
+def is_nvidia_gpu_present() -> bool:
+    """
+    Проверяет наличие NVIDIA GPU в системе
+    Returns:
+        True если GPU обнаружен
+    """
+    try:
+        # Пытаемся выполнить команду nvidia-smi
+        result = subprocess.run(
+            ['nvidia-smi'],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=False  # Не поднимаем исключение при ошибке
+        )
+        # Если код возврата 0 — команда выполнилась успешно
+        if result.returncode == 0:
+            print(_i18n("nvidia_gpu_detected"))
+            return True
+        else:
+            print(_i18n("nvidia_smi_error"))
+            return False
+    except FileNotFoundError:
+        # Команда nvidia-smi не найдена в системе
+        print(_i18n("nvidia_smi_not_found"))
+        return False
+    except Exception as e:
+        print(_i18n("nvidia_check_error", error=str(e)))
+        return False
+cuda_available: bool = is_nvidia_gpu_present()
+def install_uv() -> None:
+    """Устанавливает uv - быстрый установщик пакетов Python"""
+    print(_i18n("installing_uv"))
+    result = subprocess.run([sys.executable, "-m", "pip", "install", "uv"])
+    if result.returncode == 0:
+        print(_i18n("uv_installed"))
+    else:
+        print(_i18n("uv_install_error"))
+def install_requirements(requirements: List[str], force: bool = False, index_url: Optional[str] = None) -> None:
+    """
+    Устанавливает зависимости
+    Args:
+        requirements: Список зависимостей
+        force: Принудительная установка
+        index_url: URL индекса пакетов
+    """
+    if not requirements:
+        return
+    cmd = [sys.executable, "-m", "uv", "pip", "install", "--no-cache-dir", "-qq"]
+    if force:
+        cmd.append("--upgrade")
+        cmd.append("--force-reinstall")
+    if index_url:
+        cmd.extend(["--index-url", index_url])
+    for pkg in requirements:
+        cmd.append(pkg)
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        print(_i18n("requirements_install_error", count=len(requirements)))
+# Списки зависимостей
+torch_requirements: List[str] = [
+    "torch",
+    "torchvision",
+    "torchaudio",
+    "torchcrepe",
+]
+universal_requirements: List[str] = [
+    "numpy==2.0.2",
+    "pandas",
+    "scipy",
+    "librosa",
+    "samplerate==0.1.0",
+    "matplotlib",
+    "tqdm",
+    "einops",
+    "protobuf",
+    "soundfile",
+    "pydub",
+    "webrtcvad",
+    "audiomentations",
+    "pedalboard",
+    "ml_collections",
+    "timm",
+    "wandb",
+    "accelerate",
+    "bitsandbytes",
+    "tokenizers",
+    "huggingface-hub",
+    "transformers",
+    "torchseg",
+    "demucs==4.0.0",
+    "asteroid>=0.6.0",
+    "pyloudnorm",
+    "prodigyopt",
+    "torch_log_wmse",
+    "rotary_embedding_torch",
+    "gradio<6.0",
+    "omegaconf",
+    "beartype",
+    "spafe",
+    "torch_audiomentations",
+    "auraloss",
+    "onnx>=1.17",
+    "onnx2torch>=0.3.0",
+    "onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
+    "ml_dtypes",
+    "resampy",
+    "yt_dlp",
+    "pyngrok",
+    "praat-parselmouth",
+    "faiss-cpu==1.11",
+    "local-attention",
+    "tenacity",
+    "pyworld",
+    "gdown"
+]
+torch_old_requirements: List[str] = [
+    "torch==1.13.1",
+    "torchvision==0.14.1",
+    "torchaudio==0.13.1",
+    "torchcrepe==0.0.24",
+]
+old_requirements: List[str] = [
+    "numpy==1.26.4",
+    "pandas==2.3.3",
+    "scipy==1.15.3",
+    "librosa==0.11.0",
+    "samplerate==0.1.0",
+    "matplotlib==3.10.8",
+    "tqdm==4.67.1",
+    "einops==0.8.1",
+    "protobuf==6.33.4",
+    "soundfile==0.13.1",
+    "pydub==0.25.1",
+    "webrtcvad==2.0.10",
+    "audiomentations==0.43.1",
+    "pedalboard==0.8.2",
+    "ml_collections==1.1.0",
+    "timm==1.0.24",
+    "wandb==0.24.0",
+    "accelerate==1.2.1",
+    "bitsandbytes==0.45.0",
+    "tokenizers==0.15.2",
+    "huggingface-hub==0.34.2",
+    "transformers==4.39.3",
+    "torchseg==0.0.1a4",
+    "demucs==4.0.0",
+    "asteroid==0.6.0",
+    "pyloudnorm",
+    "prodigyopt==1.1.2",
+    "rotary_embedding_torch==0.3.6",
+    "gradio<6.0.0",
+    "omegaconf==2.3.0",
+    "beartype==0.22.9",
+    "spafe==0.3.3",
+    "torch_audiomentations==0.12.0",
+    "auraloss==0.4.0",
+    "onnx>=1.17",
+    "onnx2torch>=0.3.0",
+    "onnxruntime-gpu>=1.17" if cuda_available else "onnxruntime>=1.17",
+    "ml_dtypes==0.5.4",
+    "resampy==0.4.3",
+    "yt_dlp",
+    "pyngrok",
+    "praat-parselmouth==0.4.7",
+    "faiss-cpu==1.7.2",
+    "local-attention==1.10.0",
+    "tenacity==9.1.2",
+    "pyworld==0.3.5",
+    "gdown"
+]
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=_i18n("installer_description"))
+    parser.add_argument("--old", action="store_true", help=_i18n("old_deps_help"))
+    parser.add_argument("--force", action="store_true", help=_i18n("force_install_help"))
+    parser.add_argument("--index_url", type=str, default=None, help=_i18n("index_url_help"))
+    args = parser.parse_args()
+    if args.old:
+        torch_reqs = torch_old_requirements
+        reqs = old_requirements
+        print(_i18n("installing_old_deps"))
+    else:
+        torch_reqs = torch_requirements
+        reqs = universal_requirements
+        if fno_compitable(args.index_url):
+            reqs.append("neuraloperator==1.0.2")
+            print(_i18n("fno_compatible_detected"))
+    if args.force:
+        print(_i18n("force_install_warning"))
+    install_uv()
+    print(_i18n("installing_torch"))
+    install_requirements(torch_reqs, force=args.force, index_url=args.index_url)
+    print(_i18n("installing_other_deps"))
+    install_requirements(reqs, force=args.force)
+    print(_i18n("installing_setuptools"))
+    install_requirements(["setuptools<76.0"], force=True)
     print(_i18n("installation_complete"))

mvsepless/namer.py CHANGED Viewed

@@ -1,165 +1,165 @@
-import os
-import re
-from typing import Dict, Any, Optional, List
-from i18n import _i18n
-class Namer:
-    """Класс для работы с именами файлов и шаблонами"""
-    def __init__(self, max_length: int = 255, offset: int = 10) -> None:
-        """
-        Инициализация Namer
-        Args:
-            max_length: Максимальная длина имени
-            offset: Запас длины
-        """
-        if max_length < 40:
-            self.max_length = 40
-        else:
-            self.max_length = max_length
-        if offset < max_length:
-            self.safe_max_length = max_length - offset
-        else:
-            self.safe_max_length = max_length
-    def sanitize(self, name: str) -> str:
-        """
-        Очистить имя файла от недопустимых символов
-        Args:
-            name: Исходное имя
-        Returns:
-            Очищенное имя
-        """
-        sanitized = re.sub(r'[<>:"/\\|?*]', "_", name)
-        sanitized = re.sub(r"_+", "_", sanitized)
-        sanitized = sanitized.strip("_. ")
-        return sanitized
-    def short(self, name: str, length: Optional[int] = None) -> str:
-        """
-        Сократить длинное имя
-        Args:
-            name: Исходное имя
-            length: Желаемая длина
-        Returns:
-            Сокращенное имя
-        """
-        if length:
-            if len(name) > length:
-                return f"{name[:int(length // 2)]}...{name[-int(length // 2.5):]}"
-            else:
-                return name
-        else:
-            if len(name) > self.safe_max_length:
-                return f"{name[:int(self.safe_max_length // 4)]}...{name[-int(self.safe_max_length // 4):]}"
-            else:
-                return name
-    def iter(self, filepath: str) -> str:
-        """
-        Создать уникальное имя файла, добавляя (n) если файл существует
-        Args:
-            filepath: Исходный путь
-        Returns:
-            Уникальный путь
-        """
-        if not os.path.exists(filepath):
-            return filepath
-        directory, filename = os.path.split(filepath)
-        name, ext = os.path.splitext(filename)
-        counter = 1
-        while True:
-            new_filename = f"{name} ({counter}){ext}"
-            new_filepath = os.path.join(directory, new_filename)
-            if not os.path.exists(new_filepath):
-                return new_filepath
-            counter += 1
-    def template(self, template: str, **kwargs: Any) -> str:
-        """
-        Применить шаблон с подстановкой ключей
-        Args:
-            template: Шаблон
-            **kwargs: Ключи для подстановки
-        Returns:
-            Результат подстановки
-        """
-        if kwargs:
-            for key in kwargs:
-                template = template.replace(str(key), str(kwargs[key]))
-        return template
-    def dedup_template(self, template: str, keys: List[str] = []) -> str:
-        """
-        Удалить дублирующиеся ключи из шаблона
-        Args:
-            template: Шаблон
-            keys: Список ключей
-        Returns:
-            Шаблон без дубликатов
-        """
-        seen = set()
-        pattern = r"({})".format("|".join(re.escape(key) for key in keys))
-        def replace(match: re.Match) -> str:
-            key = match.group(1)
-            if key in seen:
-                return ""
-            seen.add(key)
-            return key
-        result = re.sub(pattern, replace, template)
-        return result
-    def short_input_name_template(self, template: str, **kwargs: Any) -> str:
-        """
-        Сократить имя входного файла с учетом шаблона
-        Args:
-            template: Шаблон
-            **kwargs: Ключи для подстановки
-        Returns:
-            Сокращенное имя
-        """
-        if kwargs:
-            input_file_name = kwargs.get("NAME", None)
-            if input_file_name:
-                merged_keys_value = ""
-                no_keys_template = template
-                for key in kwargs:
-                    if key != "NAME":
-                        merged_keys_value += str(kwargs[key])
-                for key in kwargs:
-                    no_keys_template = no_keys_template.replace(str(key), "")
-                len_merged_keys = len(merged_keys_value)
-                len_no_keys = len(no_keys_template)
-                free_length = self.safe_max_length - (len_merged_keys + len_no_keys)
-                len_file_name = len(input_file_name)
-                start_index = free_length // 2
-                end_index = free_length // 2.5
-                if len_file_name > free_length:
-                    return f"{input_file_name[:int(start_index)]}...{input_file_name[-int(end_index):]}"
-                else:
-                    return input_file_name
-            else:
-                print(_i18n("name_key_missing"))
-                return ""
-        else:
-            print(_i18n("keys_required"))
             return ""

+import os
+import re
+from typing import Dict, Any, Optional, List
+from i18n import _i18n
+class Namer:
+    """Класс для работы с именами файлов и шаблонами"""
+    def __init__(self, max_length: int = 255, offset: int = 10) -> None:
+        """
+        Инициализация Namer
+        Args:
+            max_length: Максимальная длина имени
+            offset: Запас длины
+        """
+        if max_length < 40:
+            self.max_length = 40
+        else:
+            self.max_length = max_length
+        if offset < max_length:
+            self.safe_max_length = max_length - offset
+        else:
+            self.safe_max_length = max_length
+    def sanitize(self, name: str) -> str:
+        """
+        Очистить имя файла от недопустимых символов
+        Args:
+            name: Исходное имя
+        Returns:
+            Очищенное имя
+        """
+        sanitized = re.sub(r'[<>:"/\\|?*]', "_", name)
+        sanitized = re.sub(r"_+", "_", sanitized)
+        sanitized = sanitized.strip("_. ")
+        return sanitized
+    def short(self, name: str, length: Optional[int] = None) -> str:
+        """
+        Сократить длинное имя
+        Args:
+            name: Исходное имя
+            length: Желаемая длина
+        Returns:
+            Сокращенное имя
+        """
+        if length:
+            if len(name) > length:
+                return f"{name[:int(length // 2)]}...{name[-int(length // 2.5):]}"
+            else:
+                return name
+        else:
+            if len(name) > self.safe_max_length:
+                return f"{name[:int(self.safe_max_length // 4)]}...{name[-int(self.safe_max_length // 4):]}"
+            else:
+                return name
+    def iter(self, filepath: str) -> str:
+        """
+        Создать уникальное имя файла, добавляя (n) если файл существует
+        Args:
+            filepath: Исходный путь
+        Returns:
+            Уникальный путь
+        """
+        if not os.path.exists(filepath):
+            return filepath
+        directory, filename = os.path.split(filepath)
+        name, ext = os.path.splitext(filename)
+        counter = 1
+        while True:
+            new_filename = f"{name} ({counter}){ext}"
+            new_filepath = os.path.join(directory, new_filename)
+            if not os.path.exists(new_filepath):
+                return new_filepath
+            counter += 1
+    def template(self, template: str, **kwargs: Any) -> str:
+        """
+        Применить шаблон с подстановкой ключей
+        Args:
+            template: Шаблон
+            **kwargs: Ключи для подстановки
+        Returns:
+            Результат подстановки
+        """
+        if kwargs:
+            for key in kwargs:
+                template = template.replace(str(key), str(kwargs[key]))
+        return template
+    def dedup_template(self, template: str, keys: List[str] = []) -> str:
+        """
+        Удалить дублирующиеся ключи из шаблона
+        Args:
+            template: Шаблон
+            keys: Список ключей
+        Returns:
+            Шаблон без дубликатов
+        """
+        seen = set()
+        pattern = r"({})".format("|".join(re.escape(key) for key in keys))
+        def replace(match: re.Match) -> str:
+            key = match.group(1)
+            if key in seen:
+                return ""
+            seen.add(key)
+            return key
+        result = re.sub(pattern, replace, template)
+        return result
+    def short_input_name_template(self, template: str, **kwargs: Any) -> str:
+        """
+        Сократить имя входного файла с учетом шаблона
+        Args:
+            template: Шаблон
+            **kwargs: Ключи для подстановки
+        Returns:
+            Сокращенное имя
+        """
+        if kwargs:
+            input_file_name = kwargs.get("NAME", None)
+            if input_file_name:
+                merged_keys_value = ""
+                no_keys_template = template
+                for key in kwargs:
+                    if key != "NAME":
+                        merged_keys_value += str(kwargs[key])
+                for key in kwargs:
+                    no_keys_template = no_keys_template.replace(str(key), "")
+                len_merged_keys = len(merged_keys_value)
+                len_no_keys = len(no_keys_template)
+                free_length = self.safe_max_length - (len_merged_keys + len_no_keys)
+                len_file_name = len(input_file_name)
+                start_index = free_length // 2
+                end_index = free_length // 2.5
+                if len_file_name > free_length:
+                    return f"{input_file_name[:int(start_index)]}...{input_file_name[-int(end_index):]}"
+                else:
+                    return input_file_name
+            else:
+                print(_i18n("name_key_missing"))
+                return ""
+        else:
+            print(_i18n("keys_required"))
             return ""

mvsepless/separator.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

mvsepless/vbachgen.py CHANGED Viewed

The diff for this file is too large to render. See raw diff