File size: 4,731 Bytes

96336ad

"""First-Order Ambisonics (FOA) utilities."""

import numpy as np
import torch
from typing import Tuple


def deg2rad(degrees: float) -> float:
    """Convert degrees to radians."""
    return degrees * np.pi / 180.0


def encode_foa_analytic(

    mono: np.ndarray,

    azimuth_deg: float,

    elevation_deg: float,

    normalization: str = "SN3D"

) -> np.ndarray:
    """

    Encode mono signal to FOA using analytic panning.



    Args:

        mono: Mono audio signal, shape (n_samples,)

        azimuth_deg: Azimuth angle in degrees (-180 to 180, 0=front)

        elevation_deg: Elevation angle in degrees (-90 to 90, 0=level)

        normalization: "SN3D" or "N3D"



    Returns:

        FOA signal, shape (4, n_samples) with channels [W, X, Y, Z]

    """
    theta = deg2rad(azimuth_deg)
    phi = deg2rad(elevation_deg)

    # Standard FOA encoding
    W = mono / np.sqrt(2)  # Omnidirectional (SN3D normalization)
    X = mono * np.cos(theta) * np.cos(phi)  # Left-Right
    Y = mono * np.sin(theta) * np.cos(phi)  # Front-Back
    Z = mono * np.sin(phi)  # Up-Down

    foa = np.stack([W, X, Y, Z], axis=0)

    if normalization == "N3D":
        # Convert SN3D to N3D (scale W by sqrt(2))
        foa[0] *= np.sqrt(2)

    return foa


def encode_foa_analytic_torch(

    mono: torch.Tensor,

    azimuth_deg: float,

    elevation_deg: float,

    normalization: str = "SN3D"

) -> torch.Tensor:
    """

    PyTorch version of FOA encoding.



    Args:

        mono: Mono audio signal, shape (batch, n_samples) or (n_samples,)

        azimuth_deg: Azimuth angle in degrees

        elevation_deg: Elevation angle in degrees

        normalization: "SN3D" or "N3D"



    Returns:

        FOA signal, shape (batch, 4, n_samples) or (4, n_samples)

    """
    theta = torch.tensor(deg2rad(azimuth_deg), dtype=mono.dtype, device=mono.device)
    phi = torch.tensor(deg2rad(elevation_deg), dtype=mono.dtype, device=mono.device)

    # Add batch dim if needed
    if mono.ndim == 1:
        mono = mono.unsqueeze(0)
        squeeze_output = True
    else:
        squeeze_output = False

    # Standard FOA encoding
    W = mono / np.sqrt(2)
    X = mono * torch.cos(theta) * torch.cos(phi)
    Y = mono * torch.sin(theta) * torch.cos(phi)
    Z = mono * torch.sin(phi)

    foa = torch.stack([W, X, Y, Z], dim=1)  # (batch, 4, n_samples)

    if normalization == "N3D":
        foa[:, 0] *= np.sqrt(2)

    if squeeze_output:
        foa = foa.squeeze(0)

    return foa


def compute_intensity_vector(foa: np.ndarray) -> Tuple[float, float]:
    """

    Compute azimuth and elevation from FOA intensity vector.



    Args:

        foa: FOA signal, shape (4, n_samples)



    Returns:

        (azimuth_deg, elevation_deg)

    """
    W, X, Y, Z = foa

    # Compute time-averaged intensity vector
    Ix = np.mean(W * X)
    Iy = np.mean(W * Y)
    Iz = np.mean(W * Z)

    # Convert to angles
    azimuth_rad = np.arctan2(Iy, Ix)
    elevation_rad = np.arctan2(Iz, np.sqrt(Ix**2 + Iy**2))

    azimuth_deg = azimuth_rad * 180.0 / np.pi
    elevation_deg = elevation_rad * 180.0 / np.pi

    return azimuth_deg, elevation_deg


def compute_intensity_vector_torch(foa: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """

    PyTorch version of intensity vector computation.



    Args:

        foa: FOA signal, shape (batch, 4, n_samples) or (4, n_samples)



    Returns:

        (azimuth_deg, elevation_deg) tensors

    """
    if foa.ndim == 2:
        foa = foa.unsqueeze(0)
        squeeze_output = True
    else:
        squeeze_output = False

    W, X, Y, Z = foa[:, 0], foa[:, 1], foa[:, 2], foa[:, 3]

    # Compute time-averaged intensity vector
    Ix = torch.mean(W * X, dim=-1)
    Iy = torch.mean(W * Y, dim=-1)
    Iz = torch.mean(W * Z, dim=-1)

    # Convert to angles
    azimuth_rad = torch.atan2(Iy, Ix)
    elevation_rad = torch.atan2(Iz, torch.sqrt(Ix**2 + Iy**2))

    azimuth_deg = azimuth_rad * 180.0 / np.pi
    elevation_deg = elevation_rad * 180.0 / np.pi

    if squeeze_output:
        azimuth_deg = azimuth_deg.squeeze(0)
        elevation_deg = elevation_deg.squeeze(0)

    return azimuth_deg, elevation_deg


def foa_to_stereo_simple(foa: np.ndarray) -> np.ndarray:
    """

    Simple stereo downmix from FOA (just using W, X for L/R).



    Args:

        foa: FOA signal, shape (4, n_samples)



    Returns:

        Stereo signal, shape (2, n_samples)

    """
    W, X, Y, Z = foa

    # Simple stereo decode: L = W + X, R = W - X
    L = (W + X) / np.sqrt(2)
    R = (W - X) / np.sqrt(2)

    return np.stack([L, R], axis=0)