#!/usr/bin/env python3
# License: CC-BY-NC-ND-4.0
# Created by: Patrick Lumbantobing, Vertox-AI
# Copyright (c) 2026 Vertox-AI. All rights reserved.
#
# This work is licensed under the Creative Commons
# Attribution-NonCommercial-NoDerivatives 4.0 International License.
# To view a copy of this license, visit
# http://creativecommons.org/licenses/by-nc-nd/4.0/
"""
Utility functions for ASR preprocessing and model I/O.

Adapted from: https://github.com/istupakov/onnx-asr/tree/main

Provides:

- Sample-rate validation helpers.
- Typed NumPy array guards for common dtypes.
- WAV reading utilities (mono-mixing, multi-width support).
- Batch padding to a common length.
- Log-softmax implementation.
- Model file discovery helpers.
"""

from __future__ import annotations

import wave
from pathlib import Path
from typing import Literal, Optional, TypeGuard, cast, get_args

import numpy as np
import numpy.typing as npt

SampleRates = Literal[8_000, 11_025, 16_000, 22_050, 24_000, 32_000, 44_100, 48_000]


def is_supported_sample_rate(sample_rate: int) -> TypeGuard[SampleRates]:
    """Return True if ``sample_rate`` is one of the supported ASR rates."""
    return sample_rate in get_args(SampleRates)


def is_float16_array(x: object) -> TypeGuard[npt.NDArray[np.float16]]:
    """Return True if ``x`` is a NumPy array with dtype float16."""
    return isinstance(x, np.ndarray) and x.dtype == np.float16


def is_float32_array(x: object) -> TypeGuard[npt.NDArray[np.float32]]:
    """Return True if ``x`` is a NumPy array with dtype float32."""
    return isinstance(x, np.ndarray) and x.dtype == np.float32


def is_int32_array(x: object) -> TypeGuard[npt.NDArray[np.int32]]:
    """Return True if ``x`` is a NumPy array with dtype int32."""
    return isinstance(x, np.ndarray) and x.dtype == np.int32


def is_int64_array(x: object) -> TypeGuard[npt.NDArray[np.int64]]:
    """Return True if ``x`` is a NumPy array with dtype int64."""
    return isinstance(x, np.ndarray) and x.dtype == np.int64


class ModelPathNotDirectoryError(NotADirectoryError):
    """Raised when a given model path is not a directory."""

    def __init__(self, path: str | Path) -> None:
        super().__init__(f"The path '{path}' is not a directory.")


class ModelFileNotFoundError(FileNotFoundError):
    """Raised when a required model file cannot be found in a directory."""

    def __init__(self, filename: str | Path, path: str | Path) -> None:
        super().__init__(f"File '{filename}' not found in path '{path}'.")


class MoreThanOneModelFileFoundError(Exception):
    """Raised when multiple candidate model files match a given pattern."""

    def __init__(self, filename: str | Path, path: str | Path) -> None:
        super().__init__(f"Found more than one file '{filename}' in path '{path}'.")


class SupportedOnlyMonoAudioError(ValueError):
    """Raised when a multi-channel waveform is provided where mono is required."""

    def __init__(self) -> None:
        super().__init__("Supported only mono audio.")


class WrongSampleRateError(ValueError):
    """Raised when a waveform sample rate is not supported."""

    def __init__(self) -> None:
        super().__init__(f"Supported only {get_args(SampleRates)} sample rates.")


class DifferentSampleRatesError(ValueError):
    """Raised when waveforms in a batch have different sample rates."""

    def __init__(self) -> None:
        super().__init__("All sample rates in a batch must be the same.")


def read_wav(filename: str) -> tuple[npt.NDArray[np.float32], int]:
    """
    Read a PCM WAV file into a mono float32 NumPy array.

    The waveform is normalised to the range [-1, 1] (approximately) and
    multi-channel input is averaged down to mono.

    Parameters
    ----------
    filename :
        Path to the WAV file.

    Returns
    -------
    (np.ndarray, int)
        Tuple of ``(audio, sample_rate)`` where ``audio`` has shape ``(T,)``.
    """
    with wave.open(filename, mode="rb") as f:
        data = f.readframes(f.getnframes())

        zero_value = 0
        if f.getsampwidth() == 1:
            # 8-bit unsigned PCM.
            buffer = np.frombuffer(data, dtype="u1")
            zero_value = 1
        elif f.getsampwidth() == 3:
            # 24-bit PCM via 32-bit view.
            buffer = np.zeros((len(data) // 3, 4), dtype="V1")
            buffer[:, -3:] = np.frombuffer(data, dtype="V1").reshape(-1, f.getsampwidth())
            buffer = buffer.view(dtype="<i4")
        else:
            # 16-bit or 32-bit PCM.
            buffer = np.frombuffer(data, dtype=f"<i{f.getsampwidth()}")

        max_value = 2 ** (8 * buffer.itemsize - 1)
        sample_rate = f.getframerate()
        audio = buffer.reshape(f.getnframes(), f.getnchannels()).astype(np.float32) / max_value - zero_value

        if audio.shape[-1] == 1:
            return audio[:, 0], sample_rate

        # Multi-channel: simple average to mono.
        audio_ch_sum = audio[:, 0]
        for ch_idx in range(1, audio.shape[-1]):
            audio_ch_sum = audio_ch_sum + audio[:, ch_idx]
        return audio_ch_sum / audio.shape[-1], sample_rate


def read_wav_files(
    waveforms: list[npt.NDArray[np.float32] | str],
    numpy_sample_rate: Optional[SampleRates] = None,
) -> tuple[npt.NDArray[np.float32], npt.NDArray[np.int64], SampleRates]:
    """
    Convert a list of waveforms or filenames into a padded batch array.

    Parameters
    ----------
    waveforms :
        List of either mono float32 arrays (shape ``(T,)``) or filenames.
    numpy_sample_rate :
        Sample rate to associate with NumPy waveforms (ignored for filenames).

    Returns
    -------
    (np.ndarray, np.ndarray, SampleRates)
        ``(batch_waveforms, lengths, sample_rate)`` where
        ``batch_waveforms`` has shape ``(B, T_max)``.
    """
    results: list[npt.NDArray[np.float32]] = []
    sample_rates: list[int | SampleRates | None] = []

    for x in waveforms:
        if isinstance(x, str):
            waveform, sample_rate = read_wav(x)
            results.append(waveform)
            sample_rates.append(sample_rate)
        else:
            if x.ndim != 1:
                raise SupportedOnlyMonoAudioError
            results.append(x)
            sample_rates.append(numpy_sample_rate)

    if len(set(sample_rates)) > 1:
        raise DifferentSampleRatesError

    sr = sample_rates[0]
    if not isinstance(sr, int):
        # If everything came from NumPy arrays, sr is already a SampleRates.
        sr_int = int(sr) if sr is not None else None
    else:
        sr_int = sr

    if sr_int is not None and is_supported_sample_rate(sr_int):
        batch, lengths = pad_list(results)
        return batch, lengths, sr_int  # type: ignore[return-value]

    raise WrongSampleRateError


def pad_list(
    arrays: list[npt.NDArray[np.float32]],
) -> tuple[npt.NDArray[np.float32], npt.NDArray[np.int64]]:
    """
    Pad a list of 1-D NumPy arrays to a common length.

    Parameters
    ----------
    arrays :
        List of waveforms with shape ``(T_i,)``.

    Returns
    -------
    (np.ndarray, np.ndarray)
        ``(batch, lengths)`` where ``batch`` has shape ``(B, T_max)`` and
        ``lengths`` holds original lengths.
    """
    lens = np.array([array.shape[0] for array in arrays], dtype=np.int64)
    result = np.zeros((len(arrays), lens.max()), dtype=np.float32)
    for i, x in enumerate(arrays):
        result[i, : x.shape[0]] = x[: min(x.shape[0], result.shape[1])]
    return result, lens


def log_softmax(
    logits: npt.NDArray[np.float32],
    axis: int | None = None,
) -> npt.NDArray[np.float32]:
    """
    Compute the log-softmax of an array along the given axis.

    Parameters
    ----------
    logits :
        Input array of unnormalised log-probabilities.
    axis :
        Axis to normalise over (default: last axis).

    Returns
    -------
    np.ndarray
        Log-softmax of ``logits`` with the same shape and dtype float32.
    """
    if axis is None:
        axis = -1
    tmp = logits - np.max(logits, axis=axis, keepdims=True)
    tmp -= np.log(np.sum(np.exp(tmp), axis=axis, keepdims=True))
    return cast(npt.NDArray[np.float32], tmp)


def find_files(path: str | Path, files: dict[str, str]) -> dict[str, Path]:
    """
    Resolve model-related filenames within a directory.

    Parameters
    ----------
    path :
        Directory containing model files.
    files :
        Mapping from logical name (e.g., ``"encoder"``) to glob pattern
        (e.g., ``"encoder*.onnx"``).

    Returns
    -------
    dict[str, Path]
        Mapping from logical name to resolved :class:`Path`.

    Raises
    ------
    ModelPathNotDirectoryError
        If ``path`` is not a directory.
    ModelFileNotFoundError
        If no file matches a given pattern.
    MoreThanOneModelFileFoundError
        If multiple files match a given pattern.
    """
    if not Path(path).is_dir():
        raise ModelPathNotDirectoryError(path)

    # Optional config.json convenience.
    if Path(path, "config.json").exists():
        files |= {"config": "config.json"}

    def find(filename: str) -> Path:
        matches = list(Path(path).glob(filename))
        if len(matches) == 0:
            raise ModelFileNotFoundError(filename, path)
        if len(matches) > 1:
            raise MoreThanOneModelFileFoundError(filename, path)
        return matches[0]

    return {key: find(pattern) for key, pattern in files.items()}