Исправлена проблема с целочисленными типами данных при объединении каналов в моно в методе read

Browse files

Files changed (1) hide show

mvsepless/audio.py +789 -789

mvsepless/audio.py CHANGED Viewed

@@ -1,789 +1,789 @@
-import os
-from pathlib import Path
-import sys
-import json
-import subprocess
-import numpy as np
-from typing import Literal
-from collections.abc import Callable
-from pathlib import Path
-from numpy.typing import DTypeLike
-import tempfile
-import librosa
-if not __package__:
-    from namer import Namer
-else:
-    from .namer import Namer
-class NotInputFileSpecified(Exception):
-    pass
-class NotOutputFileSpecified(Exception):
-    pass
-class NotSupportedDataType(Exception):
-    pass
-class ErrorDecode(Exception):
-    pass
-class ErrorEncode(Exception):
-    pass
-class NotSupportedFormat(Exception):
-    pass
-class SampleRateError(Exception):
-    pass
-class FileIsNotAudio(Exception):
-    pass
-class Audio(Namer):
-    def __init__(self):
-        super().__init__()
-        self.ffmpeg_path = os.environ.get("MVSEPLESS_FFMPEG", "ffmpeg")
-        self.ffprobe_path = os.environ.get("MVSEPLESS_FFPROBE", "ffprobe")
-        self.output_formats = (
-            "mp3",
-            "wav",
-            "flac",
-            "ogg",
-            "opus",
-            "m4a",
-            "aac",
-            "ac3",
-            "aiff",
-        )
-        self.input_formats = (
-            "mp3",
-            "wav",
-            "flac",
-            "ogg",
-            "opus",
-            "m4a",
-            "aac",
-            "ac3",
-            "aiff",
-            "mp4",
-            "mkv",
-            "webm",
-            "avi",
-            "mov",
-            "ts",
-        )
-        self.supported_dtypes = ("int16", "int32", "float32", "float64")
-        self.dtypes_dict = {
-            "int16": "s16le",
-            "int32": "s32le",
-            "float32": "f32le",
-            "float64": "f64le",
-            np.int16: "s16le",
-            np.int32: "s32le",
-            np.float32: "f32le",
-            np.float64: "f64le",
-        }
-        self.bitrate_limit = {
-            "mp3": {"min": 8, "max": 320},
-            "aac": {"min": 8, "max": 512},
-            "m4a": {"min": 8, "max": 512},
-            "ac3": {"min": 32, "max": 640},
-            "ogg": {"min": 64, "max": 500},
-            "opus": {"min": 6, "max": 512},
-        }
-        self.sample_rates = {
-            "mp3": {
-                "supported": (
-                    44100,
-                    48000,
-                    32000,
-                    22050,
-                    24000,
-                    16000,
-                    11025,
-                    12000,
-                    8000,
-                )
-            },
-            "opus": {"supported": (48000, 24000, 16000, 12000, 8000)},
-            "m4a": {
-                "supported": (
-                    96000,
-                    88200,
-                    64000,
-                    48000,
-                    44100,
-                    32000,
-                    24000,
-                    22050,
-                    16000,
-                    12000,
-                    11025,
-                    8000,
-                    7350,
-                )
-            },
-            "aac": {
-                "supported": (
-                    96000,
-                    88200,
-                    64000,
-                    48000,
-                    44100,
-                    32000,
-                    24000,
-                    22050,
-                    16000,
-                    12000,
-                    11025,
-                    8000,
-                    7350,
-                )
-            },
-            "ac3": {
-                "supported": (
-                    48000,
-                    44100,
-                    32000,
-                )
-            },
-            "ogg": {"min": 6, "max": 192000},
-            "wav": {"min": 0, "max": float("inf")},
-            "aiff": {"min": 0, "max": float("inf")},
-            "flac": {"min": 0, "max": 192000},
-        }
-        self.check_ffmpeg()
-        self.check_ffprobe()
-    def check_ffmpeg(self):
-        try:
-            ffmpeg_version_output = subprocess.check_output(
-                [self.ffmpeg_path, "-version"], text=True
-            )
-        except FileNotFoundError:
-            if "PYTEST_CURRENT_TEST" not in os.environ:
-                raise FileNotFoundError(
-                    "FFMPEG не установлен. Укажите путь к установленному FFMPEG через переменную окружения MVSEPLESS_FFMPEG"
-                )
-    def check_ffprobe(self):
-        try:
-            ffmpeg_version_output = subprocess.check_output(
-                [self.ffprobe_path, "-version"], text=True
-            )
-        except FileNotFoundError:
-            if "PYTEST_CURRENT_TEST" not in os.environ:
-                raise FileNotFoundError(
-                    "FFPROBE не установлен. Укажите путь к установленному FFPROBE через переменную окружения MVSEPLESS_FFPROBE"
-                )
-    def fit_sr(
-        self,
-        f: (
-            str
-            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
-        ) = "mp3",
-        sr: int = 44100,
-    ) -> int:
-        format_info = self.sample_rates.get(f.lower())
-        if not format_info:
-            return None
-        if "supported" in format_info:
-            supported_rates = format_info["supported"]
-            if sr in supported_rates:
-                return sr
-            return min(supported_rates, key=lambda x: abs(x - sr))
-        elif "min" in format_info and "max" in format_info:
-            min_rate = format_info["min"]
-            max_rate = format_info["max"]
-            if sr < min_rate:
-                return min_rate
-            elif sr > max_rate:
-                return max_rate
-            else:
-                return sr
-        return None
-    def fit_br(
-        self,
-        f: (
-            str
-            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
-        ) = "mp3",
-        br: int = 320,
-    ) -> int:
-        if f not in self.bitrate_limit:
-            raise NotSupportedFormat(f"Формат {f} не поддерживается")
-        limits = self.bitrate_limit[f]
-        if br < limits["min"]:
-            return limits["min"]
-        elif br > limits["max"]:
-            return limits["max"]
-        else:
-            return br
-    def get_info(
-        self,
-        i: str | os.PathLike | Callable | None = None,
-    ) -> dict[int, dict[int, float]]:
-        audio_info = {}
-        if i:
-            if isinstance(i, Path):
-                i = str(i)
-            if os.path.exists(i):
-                cmd = [
-                    self.ffprobe_path,
-                    "-i",
-                    i,
-                    "-v",
-                    "quiet",
-                    "-hide_banner",
-                    "-show_entries",
-                    "stream=index,sample_rate,duration",
-                    "-select_streams",
-                    "a",
-                    "-of",
-                    "json",
-                ]
-                process = subprocess.Popen(
-                    cmd,
-                    stdin=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                )
-                stdout, stderr = process.communicate()
-                if process.returncode != 0:
-                    print(f"STDERR: {stderr.decode('utf-8')}")
-                    print(f"STDOUT: {stdout.decode('utf-8')}")
-                json_output = json.loads(stdout)
-                streams = json_output["streams"]
-                if not streams:
-                    pass
-                else:
-                    for a, stream in enumerate(streams):
-                        audio_info[a] = {
-                            "sample_rate": int(stream.get("sample_rate", 0)),
-                            "duration": float(stream.get("duration", 0)),
-                        }
-                return audio_info
-            else:
-                raise FileExistsError("Указанного файла не существует")
-        else:
-            raise NotInputFileSpecified("Не указан путь к файлу")
-    def check(self, i: str | os.PathLike | Callable | None = None) -> bool:
-        if i:
-            if isinstance(i, Path):
-                i = str(i)
-            if os.path.exists(i):
-                info = self.get_info(i=i)
-                if info:
-                    list_streams = list(info.keys())
-                    if len(list_streams) > 0:
-                        if info[0].get("sample_rate") > 0:
-                            return True
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            else:
-                raise FileExistsError("Указанного файла не существует")
-        else:
-            raise NotInputFileSpecified("Не указан путь к файлу")
-    def read(
-        self,
-        i: str | os.PathLike | Callable | None = None,
-        sr: int | None = None,
-        mono: bool = False,
-        dtype: DTypeLike = np.float32,
-        s: int = 0,
-    ) -> tuple[np.ndarray, int, float]:
-        output_format = self.dtypes_dict.get(dtype, None)
-        if not output_format:
-            raise NotSupportedDataType(f"Этот тип данных не поддерживается {dtype}")
-        if i:
-            if isinstance(i, Path):
-                i = str(i)
-            if os.path.exists(i):
-                audio_info = self.get_info(i=i)
-                list_streams = list(audio_info.keys())
-                if audio_info.get(s, False):
-                    stream = s
-                else:
-                    if len(list_streams) > 0:
-                        stream = 0
-                    else:
-                        raise FileIsNotAudio("В входном файле нет аудио потоков")
-                sample_rate_input = audio_info[stream]["sample_rate"]
-                if sample_rate_input == 0:
-                    raise FileIsNotAudio("В входном файле нет аудио потоков")
-                cmd = [
-                    self.ffmpeg_path,
-                    "-i",
-                    i,
-                    "-map",
-                    f"0:a:{stream}",
-                    "-vn",
-                    "-f",
-                    output_format,
-                    "-ac",
-                    "1" if mono else "2",
-                ]
-                if sr:
-                    cmd.extend(["-ar", str(sr)])
-                else:
-                    sr = sample_rate_input
-                cmd.append("pipe:1")
-                process = subprocess.Popen(
-                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
-                )
-                try:
-                    raw_audio, stderr = process.communicate(timeout=300)
-                    if process.returncode != 0:
-                        raise ErrorDecode(f"FFmpeg error: {stderr.decode()}")
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                    raise ErrorDecode("FFmpeg timeout при чтении файла")
-                audio_array = np.frombuffer(raw_audio, dtype=dtype)
-                channels = 1 if mono else 2
-                audio_array = audio_array.reshape((-1, channels)).T
-                if audio_array.ndim > 1 and channels == 1:
-                    audio_array = np.mean(
-                        audio_array, axis=tuple(range(audio_array.ndim - 1))
-                    )
-                len_samples = float(audio_array.shape[-1])
-                duration = len_samples / sr
-                print(f"Частота дискретизации: {sr}")
-                return audio_array.copy(), sr, duration
-            else:
-                raise FileExistsError("Указанного файла не существует")
-        else:
-            raise NotInputFileSpecified("Не указан путь к файлу")
-    def write(
-        self,
-        o: str | os.PathLike | Callable | None = None,
-        array: np.ndarray = np.array([], dtype=np.float32),
-        sr: int = 44100,
-        of: (
-            str
-            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
-            | None
-        ) = None,
-        br: str | int | None = None,
-    ) -> str:
-        if isinstance(array, np.ndarray):
-            if len(array.shape) == 1:
-                array = array.reshape(-1, 1)
-            elif len(array.shape) == 2:
-                if array.shape[0] == 2:
-                    array = array.T
-            else:
-                raise ValueError(
-                    "numpy-массив должен быть либо одномерным, либо двухмерным"
-                )
-            if array.dtype == np.int16:
-                input_format = "s16le"
-            elif array.dtype == np.int32:
-                input_format = "s32le"
-            elif array.dtype == np.float32:
-                input_format = "f32le"
-            elif array.dtype == np.float64:
-                input_format = "f64le"
-            else:
-                raise NotSupportedDataType(
-                    f"Этот тип данных не поддерживается {array.dtype}"
-                )
-            if array.shape[1] == 1:
-                audio_bytes = array.tobytes()
-                channels = 1
-            elif array.shape[1] == 2:
-                audio_bytes = array.tobytes()
-                channels = 2
-            else:
-                raise ValueError("numpy-массив должен содержать 1 или 2 канала")
-        else:
-            raise ValueError("Вход должен быть numpy-массивом")
-        if o:
-            if isinstance(o, Path):
-                o = str(o)
-            output_dir = os.path.dirname(o)
-            output_base = os.path.basename(o)
-            output_name, output_ext = os.path.splitext(output_base)
-            if output_dir != "":
-                os.makedirs(output_dir, exist_ok=True)
-            if output_ext == "":
-                if of:
-                    o += f".{of}"
-                else:
-                    o += f".mp3"
-            elif output_ext == ".":
-                if of:
-                    o += f"{of}"
-                else:
-                    o += f"mp3"
-        else:
-            raise NotOutputFileSpecified("Не указан путь к выходному файлу")
-        if of:
-            if of in self.output_formats:
-                output_name, output_ext = os.path.splitext(o)
-                if output_ext == f".{of}":
-                    pass
-                else:
-                    o = f"{os.path.join(output_dir, output_name)}.{of}"
-            else:
-                raise NotSupportedFormat(f"Неподдерживаемый формат: {of}")
-        else:
-            of = os.path.splitext(o)[1].strip(".")
-            if of in self.output_formats:
-                pass
-            else:
-                raise NotSupportedFormat(f"Неподдерживаемый формат: {of}")
-        if sr:
-            if isinstance(sr, int):
-                sample_rate_fixed = self.fit_sr(f=of, sr=sr)
-            elif isinstance(sr, float):
-                sr = int(sr)
-                sample_rate_fixed = self.fit_sr(f=of, sr=sr)
-            else:
-                raise SampleRateError(
-                    f"Частота дискретизации должна быть числом\n\nЗначение: {sr}\nТип: {type(sr)}"
-                )
-        else:
-            raise SampleRateError("Не указана частота дискретизации")
-        bitrate_fixed = "320k"
-        if of not in ["wav", "flac", "aiff"]:
-            if br:
-                if isinstance(br, int):
-                    bitrate_fixed = self.fit_br(f=of, br=br)
-                elif isinstance(br, float):
-                    bitrate_fixed = self.fit_br(f=of, br=int(br))
-                elif isinstance(br, str):
-                    bitrate_fixed = self.fit_br(f=of, br=int(br.strip("k").strip("K")))
-                else:
-                    bitrate_fixed = self.fit_br(f=of, br=320)
-            else:
-                bitrate_fixed = self.fit_br(of, 320)
-        format_settings = {
-            "wav": [
-                "-c:a",
-                "pcm_f32le",
-                "-sample_fmt",
-                "flt",
-            ],
-            "aiff": [
-                "-c:a",
-                "pcm_f32be",
-                "-sample_fmt",
-                "flt",
-            ],
-            "flac": [
-                "-c:a",
-                "flac",
-                "-compression_level",
-                "12",
-                "-sample_fmt",
-                "s32",
-            ],
-            "mp3": [
-                "-c:a",
-                "libmp3lame",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-            "ogg": [
-                "-c:a",
-                "libvorbis",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-            "opus": [
-                "-c:a",
-                "libopus",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-            "m4a": [
-                "-c:a",
-                "aac",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-            "aac": [
-                "-c:a",
-                "aac",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-            "ac3": [
-                "-c:a",
-                "ac3",
-                "-b:a",
-                f"{bitrate_fixed}k",
-            ],
-        }
-        cmd = [
-            self.ffmpeg_path,
-            "-y",
-            "-f",
-            input_format,
-            "-ar",
-            str(sr),
-            "-ac",
-            str(channels),
-            "-i",
-            "pipe:0",
-            "-ac",
-            str(channels),
-        ]
-        cmd.extend(["-ar", str(sample_rate_fixed)])
-        cmd.extend(format_settings[of])
-        o_dir, o_base = os.path.split(o)
-        o_base_n, o_base_ext = os.path.splitext(o_base)
-        o_base_n = self.sanitize(o_base_n)
-        o_base_n = self.short(o_base_n)
-        o = os.path.join(o_dir, f"{o_base_n}{o_base_ext}")
-        o = self.iter(o)
-        cmd.append(o)
-        process = subprocess.Popen(
-            cmd,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        try:
-            stdout, stderr = process.communicate(input=audio_bytes, timeout=300)
-        except subprocess.TimeoutExpired:
-            process.kill()
-            raise ErrorEncode("FFmpeg timeout: операция заняла слишком много времени")
-        if process.returncode != 0:
-            raise ErrorEncode(
-                f"FFmpeg завершился с ошибкой (код: {process.returncode})"
-            )
-        return os.path.abspath(o)
-class Inverter(Audio):
-    def __init__(self):
-        super().__init__()
-        self.test = "test"
-        self.w_types = [
-            "boxcar",
-            "triang",
-            "blackman",
-            "hamming",
-            "hann",
-            "bartlett",
-            "flattop",
-            "parzen",
-            "bohman",
-            "blackmanharris",
-            "nuttall",
-            "barthann",
-            "cosine",
-            "exponential",
-            "tukey",
-            "taylor",
-            "lanczos",
-        ]
-    def load_audio(self, filepath):
-        try:
-            y, sr, _ = self.read(i=filepath, sr=None, mono=False)
-            return y, sr
-        except Exception as e:
-            print(f"Ошибка загрузки аудио: {e}")
-            return None, None
-    def process_channel(
-        self, y1_ch, y2_ch, sr, method, w_size=2048, overlap=2, w_type="hann"
-    ):
-        HOP_LENGTH = w_size // overlap
-        if method == "waveform":
-            return y1_ch - y2_ch
-        elif method == "spectrogram":
-            S1 = librosa.stft(
-                y1_ch, n_fft=w_size, hop_length=HOP_LENGTH, win_length=w_size
-            )
-            S2 = librosa.stft(
-                y2_ch, n_fft=w_size, hop_length=HOP_LENGTH, win_length=w_size
-            )
-            mag1 = np.abs(S1)
-            mag2 = np.abs(S2)
-            mag_result = np.maximum(mag1 - mag2, 0)
-            phase = np.angle(S1)
-            S_result = mag_result * np.exp(1j * phase)
-            return librosa.istft(
-                S_result,
-                n_fft=w_size,
-                hop_length=HOP_LENGTH,
-                win_length=w_size,
-                length=len(y1_ch),
-            )
-    def process_audio(
-        self,
-        audio1_path,
-        audio2_path,
-        out_format,
-        method,
-        output_path="./inverted.mp3",
-        w_size=2048,
-        overlap=2,
-        w_type="hann",
-    ):
-        y1, sr1 = self.load_audio(audio1_path)
-        y2, sr2 = self.load_audio(audio2_path)
-        if sr1 is None or sr2 is None:
-            raise Exception("Произошла ошибка при чтении файлов")
-        channels1 = 1 if y1.ndim == 1 else y1.shape[0]
-        channels2 = 1 if y2.ndim == 1 else y2.shape[0]
-        if channels1 > 1:
-            y1 = y1.T
-        else:
-            y1 = y1.reshape(-1, 1)
-        if channels2 > 1:
-            y2 = y2.T
-        else:
-            y2 = y2.reshape(-1, 1)
-        if sr1 != sr2:
-            if channels2 > 1:
-                y2_resampled_list = []
-                for c in range(channels2):
-                    channel_resampled = librosa.resample(
-                        y2[:, c], orig_sr=sr2, target_sr=sr1
-                    )
-                    y2_resampled_list.append(channel_resampled)
-                min_channel_length = min(len(ch) for ch in y2_resampled_list)
-                y2_resampled = np.zeros(
-                    (min_channel_length, channels2), dtype=np.float32
-                )
-                for c, channel in enumerate(y2_resampled_list):
-                    y2_resampled[:, c] = channel[:min_channel_length]
-                y2 = y2_resampled
-            else:
-                y2 = librosa.resample(y2[:, 0], orig_sr=sr2, target_sr=sr1)
-                y2 = y2.reshape(-1, 1)
-            sr2 = sr1
-        min_len = min(len(y1), len(y2))
-        y1 = y1[:min_len]
-        y2 = y2[:min_len]
-        result_channels = []
-        if channels1 == 1 and channels2 > 1:
-            y2 = y2.mean(axis=1, keepdims=True)
-            channels2 = 1
-        for c in range(channels1):
-            y1_ch = y1[:, c]
-            if channels2 == 1:
-                y2_ch = y2[:, 0]
-            else:
-                y2_ch = y2[:, min(c, channels2 - 1)]
-            result_ch = self.process_channel(
-                y1_ch, y2_ch, sr1, method, w_size=w_size, overlap=overlap, w_type=w_type
-            )
-            result_channels.append(result_ch)
-        if len(result_channels) > 1:
-            result = np.column_stack(result_channels)
-        else:
-            result = np.array(result_channels[0])
-        if result.ndim > 1:
-            for c in range(result.shape[1]):
-                channel = result[:, c]
-                max_val = np.max(np.abs(channel))
-                if max_val > 0:
-                    result[:, c] = channel * 0.9 / max_val
-        else:
-            max_val = np.max(np.abs(result))
-            if max_val > 0:
-                result = result * 0.9 / max_val
-        inverted = self.write(
-            o=output_path, array=result.T, sr=sr1, of=out_format, br="320k"
-        )
-        return inverted

+import os
+from pathlib import Path
+import sys
+import json
+import subprocess
+import numpy as np
+from typing import Literal
+from collections.abc import Callable
+from pathlib import Path
+from numpy.typing import DTypeLike
+import tempfile
+import librosa
+if not __package__:
+    from namer import Namer
+else:
+    from .namer import Namer
+class NotInputFileSpecified(Exception):
+    pass
+class NotOutputFileSpecified(Exception):
+    pass
+class NotSupportedDataType(Exception):
+    pass
+class ErrorDecode(Exception):
+    pass
+class ErrorEncode(Exception):
+    pass
+class NotSupportedFormat(Exception):
+    pass
+class SampleRateError(Exception):
+    pass
+class FileIsNotAudio(Exception):
+    pass
+class Audio(Namer):
+    def __init__(self):
+        super().__init__()
+        self.ffmpeg_path = os.environ.get("MVSEPLESS_FFMPEG", "ffmpeg")
+        self.ffprobe_path = os.environ.get("MVSEPLESS_FFPROBE", "ffprobe")
+        self.output_formats = (
+            "mp3",
+            "wav",
+            "flac",
+            "ogg",
+            "opus",
+            "m4a",
+            "aac",
+            "ac3",
+            "aiff",
+        )
+        self.input_formats = (
+            "mp3",
+            "wav",
+            "flac",
+            "ogg",
+            "opus",
+            "m4a",
+            "aac",
+            "ac3",
+            "aiff",
+            "mp4",
+            "mkv",
+            "webm",
+            "avi",
+            "mov",
+            "ts",
+        )
+        self.supported_dtypes = ("int16", "int32", "float32", "float64")
+        self.dtypes_dict = {
+            "int16": "s16le",
+            "int32": "s32le",
+            "float32": "f32le",
+            "float64": "f64le",
+            np.int16: "s16le",
+            np.int32: "s32le",
+            np.float32: "f32le",
+            np.float64: "f64le",
+        }
+        self.bitrate_limit = {
+            "mp3": {"min": 8, "max": 320},
+            "aac": {"min": 8, "max": 512},
+            "m4a": {"min": 8, "max": 512},
+            "ac3": {"min": 32, "max": 640},
+            "ogg": {"min": 64, "max": 500},
+            "opus": {"min": 6, "max": 512},
+        }
+        self.sample_rates = {
+            "mp3": {
+                "supported": (
+                    44100,
+                    48000,
+                    32000,
+                    22050,
+                    24000,
+                    16000,
+                    11025,
+                    12000,
+                    8000,
+                )
+            },
+            "opus": {"supported": (48000, 24000, 16000, 12000, 8000)},
+            "m4a": {
+                "supported": (
+                    96000,
+                    88200,
+                    64000,
+                    48000,
+                    44100,
+                    32000,
+                    24000,
+                    22050,
+                    16000,
+                    12000,
+                    11025,
+                    8000,
+                    7350,
+                )
+            },
+            "aac": {
+                "supported": (
+                    96000,
+                    88200,
+                    64000,
+                    48000,
+                    44100,
+                    32000,
+                    24000,
+                    22050,
+                    16000,
+                    12000,
+                    11025,
+                    8000,
+                    7350,
+                )
+            },
+            "ac3": {
+                "supported": (
+                    48000,
+                    44100,
+                    32000,
+                )
+            },
+            "ogg": {"min": 6, "max": 192000},
+            "wav": {"min": 0, "max": float("inf")},
+            "aiff": {"min": 0, "max": float("inf")},
+            "flac": {"min": 0, "max": 192000},
+        }
+        self.check_ffmpeg()
+        self.check_ffprobe()
+    def check_ffmpeg(self):
+        try:
+            ffmpeg_version_output = subprocess.check_output(
+                [self.ffmpeg_path, "-version"], text=True
+            )
+        except FileNotFoundError:
+            if "PYTEST_CURRENT_TEST" not in os.environ:
+                raise FileNotFoundError(
+                    "FFMPEG не установлен. Укажите путь к установленному FFMPEG через переменную окружения MVSEPLESS_FFMPEG"
+                )
+    def check_ffprobe(self):
+        try:
+            ffmpeg_version_output = subprocess.check_output(
+                [self.ffprobe_path, "-version"], text=True
+            )
+        except FileNotFoundError:
+            if "PYTEST_CURRENT_TEST" not in os.environ:
+                raise FileNotFoundError(
+                    "FFPROBE не установлен. Укажите путь к установленному FFPROBE через переменную окружения MVSEPLESS_FFPROBE"
+                )
+    def fit_sr(
+        self,
+        f: (
+            str
+            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
+        ) = "mp3",
+        sr: int = 44100,
+    ) -> int:
+        format_info = self.sample_rates.get(f.lower())
+        if not format_info:
+            return None
+        if "supported" in format_info:
+            supported_rates = format_info["supported"]
+            if sr in supported_rates:
+                return sr
+            return min(supported_rates, key=lambda x: abs(x - sr))
+        elif "min" in format_info and "max" in format_info:
+            min_rate = format_info["min"]
+            max_rate = format_info["max"]
+            if sr < min_rate:
+                return min_rate
+            elif sr > max_rate:
+                return max_rate
+            else:
+                return sr
+        return None
+    def fit_br(
+        self,
+        f: (
+            str
+            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
+        ) = "mp3",
+        br: int = 320,
+    ) -> int:
+        if f not in self.bitrate_limit:
+            raise NotSupportedFormat(f"Формат {f} не поддерживается")
+        limits = self.bitrate_limit[f]
+        if br < limits["min"]:
+            return limits["min"]
+        elif br > limits["max"]:
+            return limits["max"]
+        else:
+            return br
+    def get_info(
+        self,
+        i: str | os.PathLike | Callable | None = None,
+    ) -> dict[int, dict[int, float]]:
+        audio_info = {}
+        if i:
+            if isinstance(i, Path):
+                i = str(i)
+            if os.path.exists(i):
+                cmd = [
+                    self.ffprobe_path,
+                    "-i",
+                    i,
+                    "-v",
+                    "quiet",
+                    "-hide_banner",
+                    "-show_entries",
+                    "stream=index,sample_rate,duration",
+                    "-select_streams",
+                    "a",
+                    "-of",
+                    "json",
+                ]
+                process = subprocess.Popen(
+                    cmd,
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+                stdout, stderr = process.communicate()
+                if process.returncode != 0:
+                    print(f"STDERR: {stderr.decode('utf-8')}")
+                    print(f"STDOUT: {stdout.decode('utf-8')}")
+                json_output = json.loads(stdout)
+                streams = json_output["streams"]
+                if not streams:
+                    pass
+                else:
+                    for a, stream in enumerate(streams):
+                        audio_info[a] = {
+                            "sample_rate": int(stream.get("sample_rate", 0)),
+                            "duration": float(stream.get("duration", 0)),
+                        }
+                return audio_info
+            else:
+                raise FileExistsError("Указанного файла не существует")
+        else:
+            raise NotInputFileSpecified("Не указан путь к файлу")
+    def check(self, i: str | os.PathLike | Callable | None = None) -> bool:
+        if i:
+            if isinstance(i, Path):
+                i = str(i)
+            if os.path.exists(i):
+                info = self.get_info(i=i)
+                if info:
+                    list_streams = list(info.keys())
+                    if len(list_streams) > 0:
+                        if info[0].get("sample_rate") > 0:
+                            return True
+                        else:
+                            return False
+                    else:
+                        return False
+                else:
+                    return False
+            else:
+                raise FileExistsError("Указанного файла не существует")
+        else:
+            raise NotInputFileSpecified("Не указан путь к файлу")
+    def read(
+        self,
+        i: str | os.PathLike | Callable | None = None,
+        sr: int | None = None,
+        mono: bool = False,
+        dtype: DTypeLike = np.float32,
+        s: int = 0,
+    ) -> tuple[np.ndarray, int, float]:
+        output_format = self.dtypes_dict.get(dtype, None)
+        if not output_format:
+            raise NotSupportedDataType(f"Этот тип данных не поддерживается {dtype}")
+        if i:
+            if isinstance(i, Path):
+                i = str(i)
+            if os.path.exists(i):
+                audio_info = self.get_info(i=i)
+                list_streams = list(audio_info.keys())
+                if audio_info.get(s, False):
+                    stream = s
+                else:
+                    if len(list_streams) > 0:
+                        stream = 0
+                    else:
+                        raise FileIsNotAudio("В входном файле нет аудио потоков")
+                sample_rate_input = audio_info[stream]["sample_rate"]
+                if sample_rate_input == 0:
+                    raise FileIsNotAudio("В входном файле нет аудио потоков")
+                cmd = [
+                    self.ffmpeg_path,
+                    "-i",
+                    i,
+                    "-map",
+                    f"0:a:{stream}",
+                    "-vn",
+                    "-f",
+                    output_format,
+                    "-ac",
+                    "1" if mono else "2",
+                ]
+                if sr:
+                    cmd.extend(["-ar", str(sr)])
+                else:
+                    sr = sample_rate_input
+                cmd.append("pipe:1")
+                process = subprocess.Popen(
+                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**8
+                )
+                try:
+                    raw_audio, stderr = process.communicate(timeout=300)
+                    if process.returncode != 0:
+                        raise ErrorDecode(f"FFmpeg error: {stderr.decode()}")
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                    raise ErrorDecode("FFmpeg timeout при чтении файла")
+                audio_array = np.frombuffer(raw_audio, dtype=dtype)
+                channels = 1 if mono else 2
+                audio_array = audio_array.reshape((-1, channels)).T
+                if audio_array.ndim > 1 and channels == 1:
+                    audio_array = np.mean(
+                        audio_array, axis=tuple(range(audio_array.ndim - 1)), dtype=dtype
+                    )
+                len_samples = float(audio_array.shape[-1])
+                duration = len_samples / sr
+                print(f"Частота дискретизации: {sr}")
+                return audio_array.copy(), sr, duration
+            else:
+                raise FileExistsError("Указанного файла не существует")
+        else:
+            raise NotInputFileSpecified("Не указан путь к файлу")
+    def write(
+        self,
+        o: str | os.PathLike | Callable | None = None,
+        array: np.ndarray = np.array([], dtype=np.float32),
+        sr: int = 44100,
+        of: (
+            str
+            | Literal["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "ac3", "aiff"]
+            | None
+        ) = None,
+        br: str | int | None = None,
+    ) -> str:
+        if isinstance(array, np.ndarray):
+            if len(array.shape) == 1:
+                array = array.reshape(-1, 1)
+            elif len(array.shape) == 2:
+                if array.shape[0] == 2:
+                    array = array.T
+            else:
+                raise ValueError(
+                    "numpy-массив должен быть либо одномерным, либо двухмерным"
+                )
+            if array.dtype == np.int16:
+                input_format = "s16le"
+            elif array.dtype == np.int32:
+                input_format = "s32le"
+            elif array.dtype == np.float32:
+                input_format = "f32le"
+            elif array.dtype == np.float64:
+                input_format = "f64le"
+            else:
+                raise NotSupportedDataType(
+                    f"Этот тип данных не поддерживается {array.dtype}"
+                )
+            if array.shape[1] == 1:
+                audio_bytes = array.tobytes()
+                channels = 1
+            elif array.shape[1] == 2:
+                audio_bytes = array.tobytes()
+                channels = 2
+            else:
+                raise ValueError("numpy-массив должен содержать 1 или 2 канала")
+        else:
+            raise ValueError("Вход должен быть numpy-массивом")
+        if o:
+            if isinstance(o, Path):
+                o = str(o)
+            output_dir = os.path.dirname(o)
+            output_base = os.path.basename(o)
+            output_name, output_ext = os.path.splitext(output_base)
+            if output_dir != "":
+                os.makedirs(output_dir, exist_ok=True)
+            if output_ext == "":
+                if of:
+                    o += f".{of}"
+                else:
+                    o += f".mp3"
+            elif output_ext == ".":
+                if of:
+                    o += f"{of}"
+                else:
+                    o += f"mp3"
+        else:
+            raise NotOutputFileSpecified("Не указан путь к выходному файлу")
+        if of:
+            if of in self.output_formats:
+                output_name, output_ext = os.path.splitext(o)
+                if output_ext == f".{of}":
+                    pass
+                else:
+                    o = f"{os.path.join(output_dir, output_name)}.{of}"
+            else:
+                raise NotSupportedFormat(f"Неподдерживаемый формат: {of}")
+        else:
+            of = os.path.splitext(o)[1].strip(".")
+            if of in self.output_formats:
+                pass
+            else:
+                raise NotSupportedFormat(f"Неподдерживаемый формат: {of}")
+        if sr:
+            if isinstance(sr, int):
+                sample_rate_fixed = self.fit_sr(f=of, sr=sr)
+            elif isinstance(sr, float):
+                sr = int(sr)
+                sample_rate_fixed = self.fit_sr(f=of, sr=sr)
+            else:
+                raise SampleRateError(
+                    f"Частота дискретизации должна быть числом\n\nЗначение: {sr}\nТип: {type(sr)}"
+                )
+        else:
+            raise SampleRateError("Не указана частота дискретизации")
+        bitrate_fixed = "320k"
+        if of not in ["wav", "flac", "aiff"]:
+            if br:
+                if isinstance(br, int):
+                    bitrate_fixed = self.fit_br(f=of, br=br)
+                elif isinstance(br, float):
+                    bitrate_fixed = self.fit_br(f=of, br=int(br))
+                elif isinstance(br, str):
+                    bitrate_fixed = self.fit_br(f=of, br=int(br.strip("k").strip("K")))
+                else:
+                    bitrate_fixed = self.fit_br(f=of, br=320)
+            else:
+                bitrate_fixed = self.fit_br(of, 320)
+        format_settings = {
+            "wav": [
+                "-c:a",
+                "pcm_f32le",
+                "-sample_fmt",
+                "flt",
+            ],
+            "aiff": [
+                "-c:a",
+                "pcm_f32be",
+                "-sample_fmt",
+                "flt",
+            ],
+            "flac": [
+                "-c:a",
+                "flac",
+                "-compression_level",
+                "12",
+                "-sample_fmt",
+                "s32",
+            ],
+            "mp3": [
+                "-c:a",
+                "libmp3lame",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+            "ogg": [
+                "-c:a",
+                "libvorbis",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+            "opus": [
+                "-c:a",
+                "libopus",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+            "m4a": [
+                "-c:a",
+                "aac",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+            "aac": [
+                "-c:a",
+                "aac",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+            "ac3": [
+                "-c:a",
+                "ac3",
+                "-b:a",
+                f"{bitrate_fixed}k",
+            ],
+        }
+        cmd = [
+            self.ffmpeg_path,
+            "-y",
+            "-f",
+            input_format,
+            "-ar",
+            str(sr),
+            "-ac",
+            str(channels),
+            "-i",
+            "pipe:0",
+            "-ac",
+            str(channels),
+        ]
+        cmd.extend(["-ar", str(sample_rate_fixed)])
+        cmd.extend(format_settings[of])
+        o_dir, o_base = os.path.split(o)
+        o_base_n, o_base_ext = os.path.splitext(o_base)
+        o_base_n = self.sanitize(o_base_n)
+        o_base_n = self.short(o_base_n)
+        o = os.path.join(o_dir, f"{o_base_n}{o_base_ext}")
+        o = self.iter(o)
+        cmd.append(o)
+        process = subprocess.Popen(
+            cmd,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        try:
+            stdout, stderr = process.communicate(input=audio_bytes, timeout=300)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            raise ErrorEncode("FFmpeg timeout: операция заняла слишком много времени")
+        if process.returncode != 0:
+            raise ErrorEncode(
+                f"FFmpeg завершился с ошибкой (код: {process.returncode})"
+            )
+        return os.path.abspath(o)
+class Inverter(Audio):
+    def __init__(self):
+        super().__init__()
+        self.test = "test"
+        self.w_types = [
+            "boxcar",
+            "triang",
+            "blackman",
+            "hamming",
+            "hann",
+            "bartlett",
+            "flattop",
+            "parzen",
+            "bohman",
+            "blackmanharris",
+            "nuttall",
+            "barthann",
+            "cosine",
+            "exponential",
+            "tukey",
+            "taylor",
+            "lanczos",
+        ]
+    def load_audio(self, filepath):
+        try:
+            y, sr, _ = self.read(i=filepath, sr=None, mono=False)
+            return y, sr
+        except Exception as e:
+            print(f"Ошибка загрузки аудио: {e}")
+            return None, None
+    def process_channel(
+        self, y1_ch, y2_ch, sr, method, w_size=2048, overlap=2, w_type="hann"
+    ):
+        HOP_LENGTH = w_size // overlap
+        if method == "waveform":
+            return y1_ch - y2_ch
+        elif method == "spectrogram":
+            S1 = librosa.stft(
+                y1_ch, n_fft=w_size, hop_length=HOP_LENGTH, win_length=w_size
+            )
+            S2 = librosa.stft(
+                y2_ch, n_fft=w_size, hop_length=HOP_LENGTH, win_length=w_size
+            )
+            mag1 = np.abs(S1)
+            mag2 = np.abs(S2)
+            mag_result = np.maximum(mag1 - mag2, 0)
+            phase = np.angle(S1)
+            S_result = mag_result * np.exp(1j * phase)
+            return librosa.istft(
+                S_result,
+                n_fft=w_size,
+                hop_length=HOP_LENGTH,
+                win_length=w_size,
+                length=len(y1_ch),
+            )
+    def process_audio(
+        self,
+        audio1_path,
+        audio2_path,
+        out_format,
+        method,
+        output_path="./inverted.mp3",
+        w_size=2048,
+        overlap=2,
+        w_type="hann",
+    ):
+        y1, sr1 = self.load_audio(audio1_path)
+        y2, sr2 = self.load_audio(audio2_path)
+        if sr1 is None or sr2 is None:
+            raise Exception("Произошла ошибка при чтении файлов")
+        channels1 = 1 if y1.ndim == 1 else y1.shape[0]
+        channels2 = 1 if y2.ndim == 1 else y2.shape[0]
+        if channels1 > 1:
+            y1 = y1.T
+        else:
+            y1 = y1.reshape(-1, 1)
+        if channels2 > 1:
+            y2 = y2.T
+        else:
+            y2 = y2.reshape(-1, 1)
+        if sr1 != sr2:
+            if channels2 > 1:
+                y2_resampled_list = []
+                for c in range(channels2):
+                    channel_resampled = librosa.resample(
+                        y2[:, c], orig_sr=sr2, target_sr=sr1
+                    )
+                    y2_resampled_list.append(channel_resampled)
+                min_channel_length = min(len(ch) for ch in y2_resampled_list)
+                y2_resampled = np.zeros(
+                    (min_channel_length, channels2), dtype=np.float32
+                )
+                for c, channel in enumerate(y2_resampled_list):
+                    y2_resampled[:, c] = channel[:min_channel_length]
+                y2 = y2_resampled
+            else:
+                y2 = librosa.resample(y2[:, 0], orig_sr=sr2, target_sr=sr1)
+                y2 = y2.reshape(-1, 1)
+            sr2 = sr1
+        min_len = min(len(y1), len(y2))
+        y1 = y1[:min_len]
+        y2 = y2[:min_len]
+        result_channels = []
+        if channels1 == 1 and channels2 > 1:
+            y2 = y2.mean(axis=1, keepdims=True)
+            channels2 = 1
+        for c in range(channels1):
+            y1_ch = y1[:, c]
+            if channels2 == 1:
+                y2_ch = y2[:, 0]
+            else:
+                y2_ch = y2[:, min(c, channels2 - 1)]
+            result_ch = self.process_channel(
+                y1_ch, y2_ch, sr1, method, w_size=w_size, overlap=overlap, w_type=w_type
+            )
+            result_channels.append(result_ch)
+        if len(result_channels) > 1:
+            result = np.column_stack(result_channels)
+        else:
+            result = np.array(result_channels[0])
+        if result.ndim > 1:
+            for c in range(result.shape[1]):
+                channel = result[:, c]
+                max_val = np.max(np.abs(channel))
+                if max_val > 0:
+                    result[:, c] = channel * 0.9 / max_val
+        else:
+            max_val = np.max(np.abs(result))
+            if max_val > 0:
+                result = result * 0.9 / max_val
+        inverted = self.write(
+            o=output_path, array=result.T, sr=sr1, of=out_format, br="320k"
+        )
+        return inverted