Spaces:

noblebarkrr
/

mvsepless_zero_gpu

Paused

File size: 53,676 Bytes

from tqdm import tqdm
import subprocess
import numpy as np
from pathlib import Path
import librosa
from scipy.signal import ShortTimeFFT, resample
from scipy.signal.windows import dpss, hann
from numpy.typing import DTypeLike
from typing import List, Tuple, Optional, Union, Dict, Any, Callable
from i18n import _i18n

ffmpeg_path = "ffmpeg"
ffprobe_path = "ffprobe"
n_fft = 2048
hop = 1024

def average(*ints: Union[int, float]) -> float:
    """
    Вычислить среднее арифметическое
    
    Args:
        *ints: Числа для усреднения
    
    Returns:
        Среднее значение
    """
    numbers = len(ints)
    return sum(ints) / numbers


def check_installed() -> None:
    """Проверить наличие ffmpeg и ffprobe"""
    try:
        ffmpeg_version_output = subprocess.check_output(
            [ffmpeg_path, "-version"], text=True
        )
        print(_i18n("ffmpeg_found"))
    except:
        print(_i18n("ffmpeg_not_found"))

    try:
        ffprobe_version_output = subprocess.check_output(
            [ffprobe_path, "-version"], text=True
        )
        print(_i18n("ffprobe_found"))
    except:
        print(_i18n("ffprobe_not_found"))


def get_ogg_bitrate(sample_rate: int, channels: int = 2) -> int:
    """
    Определяет рекомендуемый битрейт для OGG на основе частоты дискретизации
    
    Args:
        sample_rate: Частота дискретизации
        channels: Количество каналов
    
    Returns:
        Рекомендуемый битрейт
    """
    if sample_rate >= 40000:
        per_channel = 240
    elif sample_rate >= 26000:
        per_channel = 190
    elif sample_rate >= 15000:
        per_channel = 90
    elif sample_rate >= 9000:
        per_channel = 50
    elif sample_rate >= 8000:
        per_channel = 42
    else:
        per_channel = 30
    
    return int(per_channel * channels)


SAMPLE_FORMATS_DICT: Dict[Union[str, type], str] = {
    "int16": "s16le",
    "int32": "s32le",
    "float32": "f32le",
    "float64": "f64le",
    np.int16: "s16le",
    np.int32: "s32le",
    np.float32: "f32le",
    np.float64: "f64le",
}

audio_formats: List[str] = [
    'aac', 'ac3', 'ac4', 'adts', 'aiff', 'au', 'caf', 'dts', 'eac3',
    'flac', 'm4a', 'mp3', 'mp2', 'ogg', 'oga', 'opus', 'ra', 'raw',
    'snd', 'voc', 'wav', 'wma', 'wv'
]

video_formats_with_audio: List[str] = [
    '3gp', '3g2', 'asf', 'avi', 'flv', 'f4v', 'm4v', 'mkv', 'mov',
    'mp4', 'mpeg', 'mpg', 'mts', 'mxf', 'ogv', 'rm', 'rmvb', 'ts',
    'vob', 'webm', 'wmv'
]

input_formats: List[str] = video_formats_with_audio + audio_formats

output_formats: List[str] = [
    "mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff", "wma"
]

input_extensions: List[str] = [f".{of}" for of in input_formats]

output_extensions: List[str] = [f".{of}" for of in output_formats]

codec_args: Dict[str, Dict[bool, List[str]]] = {
    ".mp3": {
        True: ["-c:a", "libmp3lame", "-sample_fmt", "fltp"],
        False: ["-c:a", "libmp3lame", "-sample_fmt", "s16p"]
    },
    ".wav": {
        True: ["-c:a", "pcm_f32le", "-sample_fmt", "flt"],
        False: ["-c:a", "pcm_s16le", "-sample_fmt", "s16"]
    },
    ".flac": {
        True: ["-c:a", "flac", "-sample_fmt", "s32"],
        False: ["-c:a", "flac", "-sample_fmt", "s16"]
    },
    ".ogg": {
        True: ["-c:a", "libvorbis", "-sample_fmt", "fltp"],
        False: ["-c:a", "libvorbis", "-sample_fmt", "fltp"]
    },
    ".opus": {
        True: ["-c:a", "libopus", "-sample_fmt", "flt"],
        False: ["-c:a", "libopus", "-sample_fmt", "s16"]
    },
    ".m4a": {
        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
    },
    ".aac": {
        True: ["-c:a", "aac", "-sample_fmt", "fltp"],
        False: ["-c:a", "aac", "-sample_fmt", "fltp"]
    },
    ".ac3": {
        True: ["-c:a", "ac3", "-sample_fmt", "fltp"],
        False: ["-c:a", "ac3", "-sample_fmt", "fltp"]
    },
    ".aiff": {
        True: ["-c:a", "pcm_f32be", "-sample_fmt", "flt"],
        False: ["-c:a", "pcm_s16be", "-sample_fmt", "s16"]
    },
    ".wma": {
        True: ["-c:a", "wmav2", "-sample_fmt", "fltp"],
        False: ["-c:a", "wmav2", "-sample_fmt", "fltp"]
    }
}

ensemble_types = ("avg_fft", "min_fft", "max_fft", "median_fft")

def get_codec_args(extension: str, prefer_float: bool) -> List[str]:
    """
    Получить аргументы кодека для FFmpeg
    
    Args:
        extension: Расширение файла
        prefer_float: Предпочитать float формат
    
    Returns:
        Список аргументов FFmpeg
    """
    if extension not in codec_args:
        return []
    return codec_args[extension][prefer_float]


allowed_chars: str = r"1234567890"


def sanitize_output(output: str) -> str:
    """
    Очистить вывод от посторонних символов
    
    Args:
        output: Выходная строка
    
    Returns:
        Очищенная строка
    """
    return "".join([char for char in output if char in allowed_chars])


def get_sr(path: str | Path, stream: int = 0) -> int:
    """
    Получить частоту дискретизации аудиофайла
    
    Args:
        path: Путь к файлу
        stream: Номер аудиопотока
    
    Returns:
        Частота дискретизации
    """
    path = Path(path)
    cmd = [ffprobe_path, "-i", path.as_posix(), "-v", "quiet", "-hide_banner", 
           "-show_entries", "stream=sample_rate", "-select_streams", f"a:{stream}", 
           "-of", "compact=p=0:nk=1"]
    process = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    stdout, stderr = process.communicate()
    sample_rate = stdout.decode('utf-8').strip()
    sample_rate = sanitize_output(sample_rate)
    if sample_rate.isdigit():
        return int(sample_rate)
    else:
        print(_i18n("sr_read_error", path=path))
        return 0


def get_channels(path: str | Path, stream: int = 0) -> int:
    """
    Получить количество каналов аудиофайла
    
    Args:
        path: Путь к файлу
        stream: Номер аудиопотока
    
    Returns:
        Количество каналов
    """
    path = Path(path)
    cmd = [ffprobe_path, "-i", path.as_posix(), "-v", "quiet", "-hide_banner", 
           "-show_entries", "stream=channels", "-select_streams", f"a:{stream}", 
           "-of", "compact=p=0:nk=1"]
    process = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    stdout, stderr = process.communicate()
    channels = stdout.decode('utf-8').strip()
    channels = sanitize_output(channels)
    if channels.isdigit():
        return int(channels)
    else:
        print(_i18n("channels_read_error", path=path))
        return 0


def check(path: str | Path) -> bool:
    """
    Проверить, является ли файл валидным аудио
    
    Args:
        path: Путь к файлу
    
    Returns:
        True если файл содержит аудио
    """
    channels = get_channels(path)
    sr = get_sr(path)
    return channels != 0 and sr != 0


def read(
    path: str | Path, 
    sr: Optional[int] = None, 
    mono: bool = False, 
    dtype: DTypeLike = "float32", 
    multi_channel: bool = False, 
    num_channels: int = 2, 
    stream: int = 0, 
    flatten: bool = False
) -> Tuple[np.ndarray, int]:
    """
    Прочитать аудиофайл
    
    Args:
        path: Путь к файлу
        sr: Частота дискретизации
        mono: Читать как моно
        dtype: Тип данных
        multi_channel: Многоканальный режим
        num_channels: Количество каналов
        stream: Номер аудиопотока
        flatten: Вернуть плоский массив
    
    Returns:
        Кортеж (аудиоданные, частота дискретизации)
    """
    path = Path(path)
    output_format = SAMPLE_FORMATS_DICT.get(dtype, None)
    if not sr:
        sr = get_sr(path, stream)
    channels = 1 if mono else (get_channels(path, stream) if multi_channel else num_channels)
    
    if not output_format:
        output_format = "f32le"
        cmd = [ffmpeg_path, "-i", path.as_posix(), "-map", f"0:a:{stream}", "-vn", 
               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
        )
        stdout, stderr = process.communicate()
        y = np.frombuffer(stdout, dtype=np.float32)
        y = convert_to_dtype(y, dtype)
    else:
        cmd = [ffmpeg_path, "-i", path.as_posix(), "-map", f"0:a:{stream}", "-vn", 
               "-f", output_format, "-ac", str(channels), "-ar", str(sr), "-"]
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
        )
        stdout, stderr = process.communicate()
        y = np.frombuffer(stdout, dtype=dtype)
    
    if mono:
        if flatten:
            y = y.flatten()
        else:
            y = y.reshape((-1, 1)).T
    else:
        y = y.reshape((-1, channels)).T
    
    return y.copy(), sr


def multiread(
    paths: Union[List[str | Path], Tuple[str | Path, ...]], 
    *args, 
    **kwargs
) -> Tuple[List[np.ndarray], List[int]]:
    """
    Прочитать несколько аудиофайлов
    
    Args:
        paths: Список путей к файлам
        *args: Аргументы для read
        **kwargs: Именованные аргументы для read
    
    Returns:
        Кортеж (список аудиоданных, список частот дискретизации)
    """
    readed_files = []
    srs = []
    len_arrays = len(paths)
    for path in tqdm(paths, desc=_i18n("multi_reading"), unit=_i18n("files")):
        array, sr = read(path, *args, **kwargs)
        readed_files.append(array)
        srs.append(sr)
    return readed_files, srs


def bitrate_to_int(a: Union[str, int, float]) -> int:
    """
    Преобразовать битрейт в целое число
    
    Args:
        a: Битрейт в виде строки или числа
    
    Returns:
        Битрейт как целое число
    """
    if isinstance(a, str):
        if a.endswith(("k", "K")):
            numeric_part = a[:-1]
            if numeric_part.isdigit():
                return int(numeric_part)
            else:
                print(_i18n("invalid_bitrate", bitrate=a))
                return 320
        else:
            if a.isdigit():
                return int(a)
            else:
                print(_i18n("invalid_bitrate", bitrate=a))
                return 320
    elif isinstance(a, (int, float)):
        return int(a)
    else:
        return 320


def get_info_array(y: np.ndarray) -> Tuple[int, int, Optional[int], bool]:
    """
    Получить информацию об аудио массиве
    
    Args:
        y: Аудио массив
    
    Returns:
        Кортеж (количество каналов, количество сэмплов, индекс оси, флаг flatten)
    """
    if y.ndim == 1:
        flatten = True
        channels = 1
        samples = len(y)
        array_index = -1
    elif y.ndim == 2:
        flatten = False
        if y.shape[0] < y.shape[1]:
            channels = y.shape[0]
            samples = y.shape[1]
            array_index = 1
        else:
            channels = y.shape[1]
            samples = y.shape[0]
            array_index = 0
    else:
        raise ValueError(_i18n("array_dim_error", axis=y.ndim))
    return channels, samples, array_index, flatten


def get_axis_from_array_index(index: int) -> int:
    """
    Получить ось для операций на основе индекса массива
    
    Args:
        index: Индекс массива
    
    Returns:
        Номер оси
    """
    if index == -1:
        return -1
    elif index == 1:
        return 0
    elif index == 0:
        return 1
    else:
        return -1


def get_duration_from_array(y: np.ndarray, sr: Optional[int] = None) -> Union[float, int]:
    """
    Получить длительность аудио из массива
    
    Args:
        y: Аудио массив
        sr: Частота дискретизации
    
    Returns:
        Длительность в секундах или количество сэмплов
    """
    len_samples: int = get_info_array(y)[1]
    if sr is not None:
        return len_samples / sr
    else:
        return len_samples


def is_float(y: np.ndarray) -> bool:
    """
    Проверить, является ли массив float типом
    
    Args:
        y: Аудио массив
    
    Returns:
        True если тип float
    """
    return np.issubdtype(y.dtype, np.floating)


def is_float_dtype(dtype: DTypeLike) -> bool:
    """
    Проверить, является ли тип данных float
    
    Args:
        dtype: Тип данных
    
    Returns:
        True если тип float
    """
    return np.issubdtype(dtype, np.floating)


def float_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
    """
    Преобразовать float массив в целочисленный
    
    Args:
        y: Float массив
        dtype: Целевой тип данных
    
    Returns:
        Целочисленный массив
    """
    info = np.iinfo(dtype)
    min_val = info.min
    max_val = info.max
    
    if min_val < 0:
        y_scaled = y * max_val
        y_rounded = np.round(y_scaled)
        y_clipped = np.clip(y_rounded, min_val, max_val)
        return y_clipped.astype(dtype)
    elif min_val == 0:
        y_normalized = (y + 1) / 2
        y_scaled = y_normalized * max_val
        y_rounded = np.round(y_scaled)
        y_clipped = np.clip(y_rounded, 0, max_val)
        return y_clipped.astype(dtype)
    else:
        raise ValueError(_i18n("unexpected_min_val", value=min_val))


def int_to_int(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
    """
    Преобразовать целочисленный массив в другой целочисленный тип
    
    Args:
        y: Целочисленный массив
        dtype: Целевой тип данных
    
    Returns:
        Преобразованный массив
    """
    info_dst = np.iinfo(dtype)
    info_src = np.iinfo(y.dtype)
    y_float = y.astype(np.float64)
    src_range = info_src.max - info_src.min
    dst_range = info_dst.max - info_dst.min
    if src_range == 0:
        return np.full_like(y, info_dst.min, dtype=dtype)
    y_scaled = (y_float - info_src.min) * (dst_range / src_range) + info_dst.min
    y_rounded = np.round(y_scaled)
    y_clipped = np.clip(y_rounded, info_dst.min, info_dst.max)
    return y_clipped.astype(dtype)


def int_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
    """
    Преобразовать целочисленный массив в float
    
    Args:
        y: Целочисленный массив
        dtype: Целевой тип данных
    
    Returns:
        Float массив
    """
    info = np.iinfo(y.dtype)
    if info.min == 0:
        y_normalized = (y.astype(np.float64) + -int(average(info.min, info.max))) / info.max
    elif info.min < 0:
        abs_max = max(abs(info.min), abs(info.max))
        y_normalized = y.astype(np.float64) / abs_max
    else:
        raise ValueError(_i18n("unexpected_min_val", value=info.min))
    return y_normalized.astype(dtype)


def float_to_float(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
    """
    Преобразовать float массив в другой float тип
    
    Args:
        y: Float массив
        dtype: Целевой тип данных
    
    Returns:
        Преобразованный массив
    """
    return y.astype(dtype)


def get_center_value_from_dtype(dtype: DTypeLike) -> int:
    """
    Получить центральное значение для типа данных
    
    Args:
        dtype: Тип данных
    
    Returns:
        Центральное значение
    """
    if is_float_dtype(dtype):
        return 0
    else:
        info = np.iinfo(dtype)
        return int(average(info.min, info.max))


def convert_to_dtype(y: np.ndarray, dtype: DTypeLike) -> np.ndarray:
    """
    Преобразовать массив в указанный тип данных
    
    Args:
        y: Входной массив
        dtype: Целевой тип данных
    
    Returns:
        Преобразованный массив
    """
    if is_float(y):
        if is_float_dtype(dtype):
            return float_to_float(y, dtype)
        else:
            return float_to_int(y, dtype)
    else:
        if is_float_dtype(dtype):
            return int_to_float(y, dtype)
        else:
            return int_to_int(y, dtype)


def dc_offset(y: np.ndarray, offset: Union[float, int]) -> np.ndarray:
    """
    Добавить смещение постоянного тока
    
    Args:
        y: Аудио массив
        offset: Смещение
    
    Returns:
        Массив со смещением
    """
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    y = y + offset
    return convert_to_dtype(y, orig_dtype)


def gain(y: np.ndarray, gain_value: Union[float, int]) -> np.ndarray:
    """
    Применить усиление к аудио
    
    Args:
        y: Аудио массив
        gain_value: Коэффициент усиления
    
    Returns:
        Усиленный массив
    """
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    y = y * gain_value
    return convert_to_dtype(y, orig_dtype)


def normalize(y: np.ndarray, target_peak: Union[float, int] = 1.0) -> np.ndarray:
    """
    Нормализовать аудио по пиковому значению
    
    Args:
        y: Аудио массив
        target_peak: Целевое пиковое значение
    
    Returns:
        Нормализованный массив
    """
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    current_peak = np.max(np.abs(y))
    if current_peak > 0:
        scaling_factor = target_peak / current_peak
        y = y * scaling_factor
    return convert_to_dtype(y, orig_dtype)


def create_zero_array(samples: int, dtype: DTypeLike) -> np.ndarray:
    """
    Создать массив нулей с центром для типа данных
    
    Args:
        samples: Количество сэмплов
        dtype: Тип данных
    
    Returns:
        Массив нулей
    """
    return np.array([get_center_value_from_dtype(dtype) for _c in range(samples)], dtype=dtype)


def split_channels(y: np.ndarray) -> Tuple[np.ndarray, ...]:
    """
    Разделить многоканальное аудио на отдельные каналы
    
    Args:
        y: Аудио массив
    
    Returns:
        Кортеж массивов каналов
    """
    channels, samples, array_index, flatten = get_info_array(y)
    channels_arrays = []
    if not flatten:
        if array_index == 1:
            for ch in range(channels):
                channels_arrays.append(y[ch, :])
        else:
            for ch in range(channels):
                channels_arrays.append(y[:, ch])
        return tuple(channels_arrays)
    else:
        return (y,)

def get_stft_obj(sr: int, n_fft: int, hop: int) -> ShortTimeFFT:
    """
    Создает STFT с окном DPSS для сверхточного разделения частот
    
    Args:
        sr: Частота дискретизации
        n_fft: Размер FFT
        hop: Шаг
    
    Returns:
        Объект ShortTimeFFT
    """
    win = hann(n_fft, sym=False)
    return ShortTimeFFT(win, hop=hop, fs=sr, scale_to='magnitude', phase_shift=None)


def split_mid_side(
    y: np.ndarray, 
    var: int = 1, 
    sr: Optional[int] = None
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Разделить стерео на Mid/Side
    
    Args:
        y: Аудио массив
        var: Вариант разделения (0-4)
        sr: Частота дискретизации
    
    Returns:
        Кортеж (mid, side)
    """
    channels, samples, array_index, flatten = get_info_array(y)
    axis = get_axis_from_array_index(array_index)
    if channels != 2:
        raise Exception(_i18n("stereo_required"))
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    channels_arrays = split_channels(y)
    left_channel = channels_arrays[0]
    right_channel = channels_arrays[1]
    mid_channel_one = (left_channel * 0.5) + (right_channel * 0.5)
    
    if var == 0:
        print(_i18n("mid_side_var0"))
        side_channel = np.stack([(left_channel + -mid_channel_one), (right_channel + -mid_channel_one)], axis=axis)
        mid_channel = y + -side_channel
    elif var == 1:
        print(_i18n("mid_side_var1"))
        mid_channel = np.stack([mid_channel_one, mid_channel_one], axis=axis)
        side_channel = y + -mid_channel
    elif var == 2:
        print(_i18n("mid_side_var2"))
        same_sign = (left_channel * right_channel) > 0
        center_mono = np.where(
            same_sign,
            np.minimum(np.abs(left_channel), np.abs(right_channel)) * np.sign(left_channel),
            0.0
        )
        mid_channel = np.stack([center_mono, center_mono], axis=axis)
        stereo_L = left_channel - center_mono
        stereo_R = right_channel - center_mono
        side_channel = np.stack([stereo_L, stereo_R], axis=axis)
    elif var == 3:
        print(_i18n("mid_side_var3"))
        if not sr: 
            raise Exception(_i18n("sr_required"))
        
        sft = get_stft_obj(sr, n_fft=n_fft, hop=hop)
        y_float = convert_to_dtype(y, np.float32)
        channels = split_channels(y_float)
        
        # Получаем спектры левого и правого каналов
        Lf = sft.stft(channels[0])
        Rf = sft.stft(channels[1])
        
        # Вычисляем схожесть (когерентность)
        similarity_L = np.real(Lf * np.conj(Rf))
        similarity_R = np.real(Rf * np.conj(Lf))
        mask_l = similarity_L > 0
        mask_r = similarity_R > 0
        magL = np.abs(Lf)
        magR = np.abs(Rf)

        magC_L = np.minimum(magL, magR) * mask_l
        magC_R = np.minimum(magL, magR) * mask_r

        C_L = magC_L * np.exp(1j * np.angle(Rf))
        C_R = magC_R * np.exp(1j * np.angle(Lf))
        SL = Lf - C_L
        SR = Rf - C_R
        
        len_orig = y.shape[-1]
        center_l = sft.istft(C_L, k1=len_orig)
        center_r = sft.istft(C_R, k1=len_orig)
        side_l = sft.istft(SL, k1=len_orig)
        side_r = sft.istft(SR, k1=len_orig)
        
        mid_ch = multi_channel_array_from_arrays(center_l, center_r, index=array_index, dtype=y.dtype)
        side_ch = multi_channel_array_from_arrays(side_l, side_r, index=array_index, dtype=y.dtype)
        
        return mid_ch, side_ch
    elif var == 4:
        print(_i18n("mid_side_var4"))
        mid_channel = mid_channel_one
        side_channel = left_channel + -right_channel
    else:
        raise ValueError(_i18n("unknown_var", var=var))
    
    return convert_to_dtype(mid_channel, orig_dtype), convert_to_dtype(side_channel, orig_dtype)


def mid_side_to_stereo(
    y: np.ndarray, 
    z: np.ndarray, 
    index: int = -1, 
    dtype: DTypeLike = np.float32
) -> np.ndarray:
    """
    Преобразовать Mid/Side обратно в стерео
    
    Args:
        y: Mid канал
        z: Side канал
        index: Индекс оси
        dtype: Тип данных
    
    Returns:
        Стерео массив
    """
    y, z = convert_to_dtype(y, np.float32), convert_to_dtype(z, np.float32)
    mid = multi_channel_array_from_arrays(y, y, index=index, dtype=np.float32)
    side = multi_channel_array_from_arrays(z, -z, index=index, dtype=np.float32)
    return convert_to_dtype(mid + side, dtype)


def mono_to_stereo(
    y: np.ndarray, 
    index: int, 
    num_channels: int = 2
) -> np.ndarray:
    """
    Преобразовать моно в стерео
    
    Args:
        y: Моно массив
        index: Индекс оси
        num_channels: Количество каналов
    
    Returns:
        Стерео массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    axis = get_axis_from_array_index(array_index)
    new_axis = get_axis_from_array_index(index)
    orig_dtype = y.dtype
    if channels == 1:
        if flatten:
            return np.stack([y for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
        else:
            return np.stack([y.flatten() for _c in range(num_channels)], axis=new_axis, dtype=orig_dtype)
    else:
        if num_channels <= channels:
            return y
        else:
            for _i in range(num_channels - channels):
                y = np.append(y, create_zero_array(samples, orig_dtype), axis=new_axis)
            return y


def stereo_to_mono(y: np.ndarray, to_flatten: bool = False) -> np.ndarray:
    channels, samples, array_index, flatten = get_info_array(y)
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    
    if channels > 1:
        mono = create_zero_array(samples, np.float64)
        for ch in split_channels(y):
            mono = mono + gain(ch, (1 / channels))
        
        if not to_flatten:
            # Сохраняем ту же ориентацию, что и входной массив, но с 1 каналом
            if array_index == 0:  # вход был (samples, channels)
                return convert_to_dtype(mono.reshape(-1, 1), orig_dtype)
            else:  # array_index == 1 или flatten, вход был (channels, samples)
                return convert_to_dtype(mono.reshape(1, -1), orig_dtype)
        else:
            return convert_to_dtype(mono, orig_dtype)
    else:
        if to_flatten and not flatten:
            return convert_to_dtype(y.flatten(), orig_dtype)
        elif not to_flatten and flatten:
            if array_index == 0:
                return convert_to_dtype(y.reshape(-1, 1), orig_dtype)
            else:
                return convert_to_dtype(y.reshape(1, -1), orig_dtype)
        else:
            return convert_to_dtype(y, orig_dtype)


def multi_channel_array_from_arrays(
    *arrays: np.ndarray, 
    index: int = -1, 
    dtype: DTypeLike
) -> np.ndarray:
    """
    Создать многоканальный массив из отдельных каналов
    
    Args:
        *arrays: Массивы каналов
        index: Индекс оси
        dtype: Тип данных
    
    Returns:
        Многоканальный массив
    """
    return np.stack([convert_to_dtype(array, dtype) for array in arrays], 
                    axis=get_axis_from_array_index(index), 
                    dtype=dtype)


def reshape(y: np.ndarray, shape: Tuple[str, ...] = ("channels", "samples")) -> np.ndarray:
    """
    Изменить форму аудио массива
    
    Args:
        y: Аудио массив
        shape: Целевая форма
    
    Returns:
        Измененный массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    
    if shape == ("channels", "samples"):
        if array_index == 0:
            return y.T
        elif array_index == 1:
            return y
        elif array_index is None and flatten:
            return y.reshape((-1, 1)).T
        else:
            if y.shape[0] == channels:
                return y
            else:
                return y.T
    
    elif shape == ("samples", "channels"):
        if array_index == 1:  # (channels, samples)
            return y.T
        elif array_index == 0:  # (samples, channels)
            return y
        elif array_index == -1 and flatten:
            return y.reshape((-1, 1))
        else:
            if y.shape[0] == samples:
                return y
            else:
                return y.T
    
    elif shape == ("samples",):
        if channels == 1 and not flatten:
            return y.flatten()
        elif flatten:
            return y
        else:
            return stereo_to_mono(y, to_flatten=True)
    
    else:
        raise ValueError(f"{_i18n('unknown_shape')}: {shape}")


def easy_resampler(y: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
    """
    Простой ресемплинг аудио
    
    Args:
        y: Аудио массив
        orig_sr: Исходная частота
        target_sr: Целевая частота
    
    Returns:
        Ресемплированный массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    orig_dtype = y.dtype
    y = convert_to_dtype(y, np.float32)
    resampled = librosa.resample(
        y,
        orig_sr=orig_sr,
        target_sr=target_sr,
    )
    return convert_to_dtype(resampled, orig_dtype)


def add_zero_to_end(y: np.ndarray, max_samples: int) -> np.ndarray:
    """
    Добавить нули в конец массива до указанной длины
    
    Args:
        y: Аудио массив
        max_samples: Максимальное количество сэмплов
    
    Returns:
        Дополненный массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    center_value = get_center_value_from_dtype(y.dtype)
    if samples < max_samples:
        if flatten:
            pad_width = (0, max_samples - samples)
        else:
            if array_index == 1:
                pad_width = ((0, 0), (0, max_samples - samples))
            else:
                pad_width = ((0, max_samples - samples), (0, 0))
        return np.pad(y, pad_width, mode="constant", constant_values=center_value)
    else:
        return trim(y, 0, max_samples)


def fit_arrays(
    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]], 
    srs: Union[Tuple[int, ...], List[int]], 
    max_channels: int = 2, 
    min_sr: int = 44100, 
    flatten: bool = False,
    extend: bool = True
) -> Tuple[np.ndarray, ...]:
    """
    Привести несколько массивов к единому формату
    
    Args:
        arrays: Список массивов
        srs: Список частот дискретизации
        max_channels: Максимальное количество каналов
        min_sr: Минимальная частота дискретизации
        flatten: Вернуть плоские массивы
        extend: Дополнить до максимальной длины
    
    Returns:
        Кортеж приведенных массивов
    """
    if len(arrays) != len(srs):
        raise Exception(_i18n("arrays_srs_mismatch"))
    
    new_arrays = []
    
    arrays_with_srs = list(zip(arrays, srs))
    len_arrays = len(arrays_with_srs)
    same_sr = len(set(srs)) <= 1

    for (array, sr) in tqdm(arrays_with_srs, desc=_i18n("fitting_progress"), unit=_i18n("arrays")):
        channels1, samples1, array_index1, _c = get_info_array(array)
        if same_sr and sr == min_sr:
            a1 = array
        else:
            a1 = easy_resampler(array, sr, min_sr)
        if flatten:
            a1 = stereo_to_mono(a1, to_flatten=True)
        else:
            if max_channels >= 2:
                a1 = mono_to_stereo(a1, array_index1, max_channels)
            else:
                a1 = stereo_to_mono(a1)
        a1 = reshape(a1, shape=("channels", "samples"))
        new_arrays.append(a1)

    durations = [get_duration_from_array(array) for array in new_arrays]
    max_samples = max(durations) if durations else 0

    if extend:
        for i, array_ in tqdm(enumerate(new_arrays), desc=_i18n("extending_progress"), unit=_i18n("arrays")):
            new_arrays[i] = add_zero_to_end(array_, max_samples)
    
    return tuple(new_arrays)


def subtractor(
    y: np.ndarray, 
    z: np.ndarray, 
    sr1: int, 
    sr2: int, 
    spectrogram: bool = False
) -> Tuple[np.ndarray, int]:
    """
    Вычесть одно аудио из другого
    
    Args:
        y: Первое аудио
        z: Второе аудио
        sr1: Частота первого
        sr2: Частота второго
        spectrogram: Использовать спектрограмму
    
    Returns:
        Кортеж (результат, частота дискретизации)
    """
    channels1, _, array_index1, flatten1 = get_info_array(y)
    channels2, _, array_index2, flatten2 = get_info_array(z)
    orig_dtype1 = y.dtype
    y = convert_to_dtype(y, np.float32)
    z = convert_to_dtype(z, np.float32)
    max_channels = max(channels1, channels2)
    min_sr = min(sr1, sr2)
    yz = fit_arrays([y, z], [sr1, sr2], max_channels=max_channels, min_sr=min_sr)
    y, z = yz[0], yz[1]
    
    if spectrogram:
        print(_i18n("subtract_spectrogram"))
        sft = get_stft_obj(min_sr, n_fft=n_fft, hop=hop)
        res_channels = []
        
        # Обрабатываем каналы по одному, чтобы не забивать RAM
        for ch_y, ch_z in zip(split_channels(y), split_channels(z)):
            spec_y = sft.stft(ch_y.astype(np.float32))
            spec_z = sft.stft(ch_z.astype(np.float32))
            
            # Вычитание амплитуд: Mag_res = max(Mag_y - Mag_z, 0)
            # Сохраняем фазу сигнала 'y'
            res_spec = np.maximum(np.abs(spec_y) - np.abs(spec_z), 0) * np.exp(1j * np.angle(spec_y))
            
            del spec_y, spec_z  # Явно освобождаем память
            
            res_wav = sft.istft(res_spec, k1=ch_y.shape[-1])
            res_channels.append(res_wav)
            
        subtracted = multi_channel_array_from_arrays(*res_channels, index=1, dtype=orig_dtype1)
        return subtracted, min_sr
    else:
        print(_i18n("subtract_phase"))
        return convert_to_dtype(y - z, orig_dtype1), min_sr


def absmax(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
    """
    Получить элемент с максимальным абсолютным значением
    
    Args:
        a: Входной массив
        axis: Ось
    
    Returns:
        Элемент с максимальным абсолютным значением
    """
    if axis is None:
        return a.flatten()[np.argmax(np.abs(a))]
    dims = list(a.shape)
    dims.pop(axis)
    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
    argmax = np.abs(a).argmax(axis=axis)
    indices = list(indices)
    indices.insert(axis % len(a.shape), argmax)
    return a[tuple(indices)]


def absmin(a: np.ndarray, *, axis: Optional[int] = None) -> np.ndarray:
    """
    Получить элемент с минимальным абсолютным значением
    
    Args:
        a: Входной массив
        axis: Ось
    
    Returns:
        Элемент с минимальным абсолютным значением
    """
    if axis is None:
        return a.flatten()[np.argmin(np.abs(a))]
    dims = list(a.shape)
    dims.pop(axis)
    indices = np.ogrid[tuple(slice(0, d) for d in dims)]
    argmax = np.abs(a).argmin(axis=axis)
    indices.insert((len(a.shape) + axis) % len(a.shape), argmax)
    return a[tuple(indices)]


def lambda_max(
    arr: np.ndarray, 
    axis: Optional[int] = None, 
    key: Optional[Callable] = None, 
    keepdims: bool = False
) -> np.ndarray:
    """
    Применить функцию максимума с ключом
    
    Args:
        arr: Входной массив
        axis: Ось
        key: Функция ключа
        keepdims: Сохранить размерность
    
    Returns:
        Результат
    """
    if key is None:
        key = np.abs
    idxs = np.argmax(key(arr), axis)
    if axis is not None:
        idxs = np.expand_dims(idxs, axis)
        result = np.take_along_axis(arr, idxs, axis)
        if not keepdims:
            result = np.squeeze(result, axis=axis)
        return result
    else:
        return arr.flatten()[idxs]


def lambda_min(
    arr: np.ndarray, 
    axis: Optional[int] = None, 
    key: Optional[Callable] = None, 
    keepdims: bool = False
) -> np.ndarray:
    """
    Применить функцию минимума с ключом
    
    Args:
        arr: Входной массив
        axis: Ось
        key: Функция ключа
        keepdims: Сохранить размерность
    
    Returns:
        Результат
    """
    if key is None:
        key = np.abs
    idxs = np.argmin(key(arr), axis)
    if axis is not None:
        idxs = np.expand_dims(idxs, axis)
        result = np.take_along_axis(arr, idxs, axis)
        if not keepdims:
            result = np.squeeze(result, axis=axis)
        return result
    else:
        return arr.flatten()[idxs]


def ensemble(
    pred_tracks: List[np.ndarray], 
    srs: List[int], 
    ensemble_type: str = ensemble_types[0],
    weights: List[float] = [],  
    dtype: np.dtype = np.float32,
    disable_progress: bool = False,
) -> Tuple[np.ndarray, int]:
    """
    Создать ансамбль из нескольких предсказаний
    
    Args:
        pred_tracks: Список предсказаний (ожидается форма [channels, samples])
        srs: Список частот дискретизации
        ensemble_type: Алгоритм объединения ('avg_fft', 'min_fft', 'max_fft', 'median_fft')
        weights: Веса для avg_fft
        dtype: Тип данных
        disable_progress: Отключить отображение прогресса
    
    Returns:
        Кортеж (результат, частота дискретизации)
    """
    if ensemble_type == "min_fft":
        result_sr = int(min(srs))
    else:
        result_sr = int(max(srs))

    if ensemble_type == "avg_fft":
        if weights:
            if len(weights) > len(pred_tracks):
                weights = weights[:len(pred_tracks)]
            elif len(weights) < len(pred_tracks):
                weights = weights + [1.0] * (len(pred_tracks) - len(weights))
        else:
            weights = [1.0] * len(pred_tracks)
        total_weight = sum(weights)
    
    # Подгоняем все треки к одной длине и частоте
    pred_tracks = list(fit_arrays(pred_tracks, srs, max_channels=2, min_sr=result_sr))
    
    sft = get_stft_obj(result_sr, n_fft=2048, hop=1024)
    final_length = pred_tracks[0].shape[-1]
    
    # Инициализируем аккумуляторы для левого и правого каналов
    if ensemble_type == "avg_fft":
        left_accumulator = None
        right_accumulator = None
    elif ensemble_type in ["min_fft", "max_fft", "median_fft"]:
        left_accumulator = []
        right_accumulator = []
    
    # Обрабатываем все треки, для каждого сразу оба канала
    with tqdm(
        total=len(pred_tracks),
        desc=_i18n("ensemble_processing"),
        unit=_i18n("track"),
        disable=disable_progress,
        leave=False
    ) as pbar:
        
        for i, track in enumerate(pred_tracks):
            # Получаем STFT для левого и правого каналов
            spec_left = sft.stft(convert_to_dtype(track[0], np.float32))
            spec_right = sft.stft(convert_to_dtype(track[1], np.float32))
            
            if ensemble_type == "avg_fft":
                weighted_left = spec_left * weights[i]
                weighted_right = spec_right * weights[i]
                
                if left_accumulator is None:
                    left_accumulator = weighted_left
                    right_accumulator = weighted_right
                else:
                    left_accumulator += weighted_left
                    right_accumulator += weighted_right
                    
            elif ensemble_type in ["min_fft", "max_fft", "median_fft"]:
                left_accumulator.append(spec_left)
                right_accumulator.append(spec_right)
            
            del spec_left, spec_right
            pbar.update(1)
    
    # Финализация алгоритма для обоих каналов
    if ensemble_type == "avg_fft":
        left_res_spec = left_accumulator / total_weight
        right_res_spec = right_accumulator / total_weight
        
    elif ensemble_type == "median_fft":
        # Медиана для комплексных чисел через разделение на действительную и мнимую части
        left_real = np.real(left_accumulator)
        left_imag = np.imag(left_accumulator)
        right_real = np.real(right_accumulator)
        right_imag = np.imag(right_accumulator)
        
        left_res_spec = np.median(left_real, axis=0) + 1j * np.median(left_imag, axis=0)
        right_res_spec = np.median(right_real, axis=0) + 1j * np.median(right_imag, axis=0)
        
    elif ensemble_type == "min_fft":
        left_res_spec = lambda_min(np.array(left_accumulator), axis=0, key=np.abs)
        right_res_spec = lambda_min(np.array(right_accumulator), axis=0, key=np.abs)
        
    elif ensemble_type == "max_fft":
        left_res_spec = absmax(np.array(left_accumulator), axis=0)
        right_res_spec = absmax(np.array(right_accumulator), axis=0)
        
    else:
        raise ValueError(_i18n("unknown_etype", alg=ensemble_type))
    
    # Восстанавливаем сигналы
    left_channel = sft.istft(left_res_spec, k1=final_length)
    right_channel = sft.istft(right_res_spec, k1=final_length)
    
    # Собираем многоканальный массив
    result = multi_channel_array_from_arrays(left_channel, right_channel, index=1, dtype=dtype)
    
    return result, result_sr


def concatenate(
    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]], 
    srs: Union[Tuple[int, ...], List[int]], 
    dtype=np.float32
) -> Tuple[np.ndarray, int]:
    """
    Склеить несколько аудио массивов
    
    Args:
        arrays: Список массивов
        srs: Список частот дискретизации
        dtype: Тип данных
    
    Returns:
        Кортеж (результат, частота дискретизации)
    """
    max_sr = int(max(*srs))
    arrayss = fit_arrays([convert_to_dtype(array, np.float64) for array in arrays], 
                         srs, max_channels=2, min_sr=max_sr, extend=False)
    result = np.concatenate(arrayss, axis=1, dtype=np.float64)
    print(_i18n("concatenate_complete"))
    return convert_to_dtype(result, dtype), max_sr


def trim(y: np.ndarray, start: int = 0, end: int = -1) -> np.ndarray:
    """
    Обрезать аудио массив
    
    Args:
        y: Аудио массив
        start: Начальная позиция
        end: Конечная позиция
    
    Returns:
        Обрезанный массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    end_index = samples
    _end = end if end > 0 and end <= end_index else end_index
    if flatten:
        return y[start:_end]
    elif array_index == 0:
        return y[start:_end, :]
    elif array_index == 1:
        return y[:, start:_end]
    else:
        return y


def reverse(y: np.ndarray) -> np.ndarray:
    """
    Перевернуть аудио массив
    
    Args:
        y: Аудио массив
    
    Returns:
        Перевернутый массив
    """
    channels, samples, array_index, flatten = get_info_array(y)
    if flatten:
        return np.flip(y)
    else:
        return np.flip(y, axis=array_index)


def mix_arrays(
    arrays: list[np.ndarray], 
    srs: list[int], 
    target_sr: int, 
    index: int = -1, 
    dtype: DTypeLike = np.float32
) -> Tuple[np.ndarray, int]:
    """
    Смешать несколько аудио массивов (сложение с нормализацией)
    
    Args:
        arrays: Список массивов для смешивания
        srs: Список частот дискретизации
        target_sr: Целевая частота дискретизации
        index: Индекс оси для выходного массива
        dtype: Тип данных
    
    Returns:
        Кортеж (смешанный массив, частота дискретизации)
    """
    if len(arrays) != len(srs):
        raise Exception(_i18n("arrays_srs_mismatch"))
    
    if len(arrays) == 0:
        raise Exception(_i18n("no_arrays_to_mix"))
    
    # Конвертируем все массивы в float32 для смешивания
    arrays_float = [convert_to_dtype(array, np.float32) for array in arrays]
    
    # Приводим все массивы к единому формату (одинаковая частота, длина, каналы)
    # Определяем максимальное количество каналов среди всех массивов
    max_channels = 2  # По умолчанию стерео
    for array in arrays_float:
        channels, _, _, _ = get_info_array(array)
        if channels > max_channels:
            max_channels = channels
    
    # Подгоняем все массивы к target_sr и max_channels
    fitted_arrays = list(fit_arrays(
        arrays_float, 
        srs, 
        max_channels=max_channels, 
        min_sr=target_sr,
        extend=True  # Дополняем до максимальной длины
    ))
    
    # Получаем форму для смешивания
    mixed = None
    num_arrays = len(fitted_arrays)
    
    for array in fitted_arrays:
        if mixed is None:
            mixed = array.copy()
        else:
            mixed = mixed + array
    
    # Нормализуем, чтобы избежать клиппинга
    # Делим на количество массивов для усреднения
    mixed = mixed / num_arrays
    
    # Применяем мягкую нормализацию пиков (опционально)
    max_peak = np.max(np.abs(mixed))
    if max_peak > 0.95:
        mixed = mixed * (0.95 / max_peak)
    
    # Преобразуем в целевой тип данных и нужную форму
    result = convert_to_dtype(mixed, dtype)
    
    # Изменяем форму согласно индексу
    if index != -1:
        channels, samples, _, flatten = get_info_array(result)
        if not flatten:
            if index == 0:  # (samples, channels)
                result = result.T
            elif index == 1:  # (channels, samples) - уже в этом формате
                pass
    else:
        # По умолчанию возвращаем в формате (channels, samples)
        result = reshape(result, shape=("channels", "samples"))
    
    print(_i18n("mix_complete", count=num_arrays))
    return result, target_sr


def write(
    path: str | Path, 
    y: np.ndarray, 
    sr: int, 
    bitrate: Union[int, str] = 320, 
    prefer_float: bool = False
) -> str:
    """
    Записать аудио в файл
    
    Args:
        path: Путь для сохранения
        y: Аудио массив
        sr: Частота дискретизации
        bitrate: Битрейт
        prefer_float: Предпочитать float формат
    
    Returns:
        Путь к сохраненному файлу (Posix-вариант)
    """
    output_path = Path(path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    if not sr:
        raise Exception(_i18n("sr_required"))
    
    sr: int = int(sr)

    dtype = y.dtype
    channels, *_ = get_info_array(y)
    y = reshape(y, shape=("samples", "channels"))
    
    sample_format = SAMPLE_FORMATS_DICT.get(str(dtype), None)
    if not sample_format:
        sample_format = "f32le"
        y = convert_to_dtype(y, np.float32)
    
    y = np.nan_to_num(y, nan=0, posinf=0, neginf=0)
    
    bitrate_val = bitrate_to_int(bitrate)
    if output_path.suffix == ".ogg":
        max_bitrate = get_ogg_bitrate(sr, channels)
        if bitrate_val > max_bitrate:
            bitrate_val = max_bitrate
    elif output_path.suffix == ".opus":
        max_bitrate = 256 * channels
        if bitrate_val > max_bitrate:
            bitrate_val = max_bitrate
    
    bitrate_fixed = 32 if bitrate_val < 32 else 320 if bitrate_val > 320 else bitrate_val

    output_path_str = output_path.as_posix()

    cmd = [ffmpeg_path, "-y", "-f", sample_format, "-ar", str(sr), "-ac", str(channels), 
           "-i", "-", *get_codec_args(output_path.suffix, prefer_float), "-ab", f"{bitrate_fixed}k", output_path_str]

    process = subprocess.Popen(
        cmd,
        stdin=subprocess.PIPE,
        stdout=None,
        stderr=subprocess.PIPE,
        bufsize=10**8
    )

    try:
        stdout_data, stderr_data = process.communicate(input=y.tobytes())
        
        if process.returncode != 0:
            error_msg = stderr_data.decode('utf-8', errors='ignore')
            print(_i18n("ffmpeg_error", error=error_msg))
            raise Exception(_i18n("ffmpeg_exit_code", code=process.returncode))
            
    except Exception as e:
        print(_i18n("write_critical_error", error=str(e)))
        process.kill()
        raise e

    return output_path_str


def multiwrite(
    arrays: Union[Tuple[np.ndarray, ...], List[np.ndarray]], 
    srs: Union[Tuple[int, ...], List[int]], 
    paths: Union[Tuple[str | Path, ...], List[str | Path]], 
    bitrate: Union[int, str] = 320, 
    prefer_float: bool = False, 
    callable_func: Optional[Callable] = None, 
    strict: bool = False
) -> Tuple[str, ...]:
    """
    Записать несколько аудио массивов в файлы
    
    Args:
        arrays: Список массивов
        srs: Список частот дискретизации
        paths: Список путей для сохранения
        bitrate: Битрейт
        prefer_float: Предпочитать float формат
        callable_func: Функция обратного вызова
        strict: Строгий режим
    
    Returns:
        Кортеж сохраненных путей
    """
    saved_paths = []
    exceptions = []
    
    if len(arrays) == len(srs) == len(paths):
        save_arrays = list(zip(arrays, srs, paths))
        for array, sr, path in tqdm(save_arrays, desc=_i18n("multi_writing"), unit=_i18n("arrays")):
            if callable_func is not None:
                callable_func(path)
            try:
                saved_paths.append(write(path, array, sr, bitrate=bitrate, prefer_float=prefer_float))
            except Exception as e:
                if strict:
                    raise Exception(str(e))
                else:
                    print(_i18n("write_error", error=str(e)))
                    exceptions.append(str(e))
    
    if not saved_paths:
        exceptions_str = '\n'.join(exceptions)
        raise Exception(_i18n("no_files_written", errors=exceptions_str))
    
    return tuple(saved_paths)

def get_audio_files_from_list(input_paths: Union[str | Path, List[str | Path]], only_files: bool = False) -> List[str]:
    """
    Получить список аудиофайлов из переданных путей
    
    Args:
        input_paths: Путь к файлу или директории или список путей
        only_files: Только файлы (не рекурсивно)
    
    Returns:
        Список путей к аудиофайлам
    """
    input_list: List[str] = []
    
    if isinstance(input_paths, (str, Path)):
        input_paths = [input_paths]
    
    for p_str in input_paths:
        p = Path(p_str)
        
        if p.is_dir():
            if not only_files:
                for file in p.rglob('*'):
                    if file.is_file() and check(file):
                        input_list.append(p.as_posix())
        elif p.is_file():
            if check(p):
                input_list.append(p.as_posix())

    return input_list