"""
전화 통화 품질 시뮬레이션 (PSTN)

깨끗한 녹음 오디오를 전화 통화 품질로 변환하여
AI Hub 등의 스튜디오 녹음 데이터를 학습용 통화 데이터로 전처리한다.

3단계 처리:
  1. 밴드패스 필터링 (300–3400 Hz) — ITU-T G.712
  2. 8 kHz 다운샘플링 (anti-aliasing 포함)
  3. G.711 비선형 양자화 (A-law / μ-law companding)
"""

import audioop
import random
from enum import Enum

import numpy as np
import scipy.signal as signal


class CompandingType(str, Enum):
    ALAW = "alaw"   # 한국/유럽/아시아 PSTN 표준
    ULAW = "ulaw"   # 북미 PSTN 표준
    RANDOM = "random"  # 랜덤 선택 (학습 데이터 다양성 확보)


class PhoneSimulator:
    """깨끗한 녹음 오디오 → 전화 통화 품질 변환기"""

    # PSTN 표준 파라미터
    PSTN_LOW_FREQ = 300.0      # Hz — ITU-T G.712 하한
    PSTN_HIGH_FREQ = 3400.0    # Hz — ITU-T G.712 상한
    PSTN_SAMPLE_RATE = 8000    # Hz — G.711 표준 샘플레이트
    FILTER_ORDER = 5           # Butterworth 필터 차수

    def __init__(self, companding: CompandingType = CompandingType.RANDOM):
        """
        Args:
            companding: 양자화 방식. RANDOM이면 파일마다 alaw/ulaw 랜덤 선택
        """
        self.companding = companding

    def process(self, audio: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
        """
        전화 통화 품질 시뮬레이션 적용.

        Args:
            audio: float32 mono numpy array (범위: -1.0 ~ 1.0)
            sr: 원본 샘플레이트

        Returns:
            (처리된 오디오, 새 샘플레이트=8000)
        """
        if audio.ndim != 1:
            raise ValueError(f"Mono audio expected, got shape {audio.shape}")

        # Step 1: 밴드패스 필터링 (300–3400 Hz)
        audio = self._bandpass_filter(audio, sr)

        # Step 2: 8 kHz 다운샘플링
        audio = self._downsample(audio, sr)

        # Step 3: G.711 companding (encode→decode round-trip)
        audio = self._compand(audio)

        return audio, self.PSTN_SAMPLE_RATE

    def _bandpass_filter(self, audio: np.ndarray, sr: int) -> np.ndarray:
        """
        ITU-T G.712 대역 필터링.

        300 Hz 미만(험 노이즈) + 3400 Hz 이상(치찰음) 제거.
        5차 Butterworth: 충분히 가파르면서 ringing 최소화.
        """
        nyq = sr / 2.0
        low = self.PSTN_LOW_FREQ / nyq
        high = self.PSTN_HIGH_FREQ / nyq

        # 나이퀴스트 이상이면 필터 적용 불가 (이미 대역 내)
        if high >= 1.0:
            high = 0.99
        if low <= 0.0:
            low = 0.01

        b, a = signal.butter(self.FILTER_ORDER, [low, high], btype="band")
        return signal.filtfilt(b, a, audio).astype(np.float32)

    def _downsample(self, audio: np.ndarray, sr: int) -> np.ndarray:
        """
        Anti-aliasing + 다운샘플링.

        scipy.signal.resample_poly는 내부적으로 anti-aliasing 필터를 적용하여
        에일리어싱을 방지한다.
        """
        if sr == self.PSTN_SAMPLE_RATE:
            return audio

        # GCD 기반 rational resampling (resample_poly가 더 정확)
        gcd = np.gcd(sr, self.PSTN_SAMPLE_RATE)
        up = self.PSTN_SAMPLE_RATE // gcd
        down = sr // gcd
        return signal.resample_poly(audio, up, down).astype(np.float32)

    def _compand(self, audio: np.ndarray) -> np.ndarray:
        """
        G.711 A-law/μ-law encode→decode round-trip.

        16비트 → 8비트 압축 → 16비트 복원 과정에서
        비선형 양자화 노이즈가 발생하여 전화 특유의 '거친' 음색을 만든다.
        """
        # companding 방식 결정
        if self.companding == CompandingType.RANDOM:
            method = random.choice([CompandingType.ALAW, CompandingType.ULAW])
        else:
            method = self.companding

        # float32 → 16-bit PCM
        pcm16 = np.clip(audio * 32767, -32768, 32767).astype(np.int16)
        raw_bytes = pcm16.tobytes()

        # encode (16bit → 8bit) → decode (8bit → 16bit) round-trip
        if method == CompandingType.ALAW:
            compressed = audioop.lin2alaw(raw_bytes, 2)
            decompressed = audioop.alaw2lin(compressed, 2)
        else:
            compressed = audioop.lin2ulaw(raw_bytes, 2)
            decompressed = audioop.ulaw2lin(compressed, 2)

        # 16-bit PCM → float32
        return np.frombuffer(decompressed, dtype=np.int16).astype(np.float32) / 32767.0