File size: 4,731 Bytes
96336ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
"""First-Order Ambisonics (FOA) utilities."""
import numpy as np
import torch
from typing import Tuple
def deg2rad(degrees: float) -> float:
"""Convert degrees to radians."""
return degrees * np.pi / 180.0
def encode_foa_analytic(
mono: np.ndarray,
azimuth_deg: float,
elevation_deg: float,
normalization: str = "SN3D"
) -> np.ndarray:
"""
Encode mono signal to FOA using analytic panning.
Args:
mono: Mono audio signal, shape (n_samples,)
azimuth_deg: Azimuth angle in degrees (-180 to 180, 0=front)
elevation_deg: Elevation angle in degrees (-90 to 90, 0=level)
normalization: "SN3D" or "N3D"
Returns:
FOA signal, shape (4, n_samples) with channels [W, X, Y, Z]
"""
theta = deg2rad(azimuth_deg)
phi = deg2rad(elevation_deg)
# Standard FOA encoding
W = mono / np.sqrt(2) # Omnidirectional (SN3D normalization)
X = mono * np.cos(theta) * np.cos(phi) # Left-Right
Y = mono * np.sin(theta) * np.cos(phi) # Front-Back
Z = mono * np.sin(phi) # Up-Down
foa = np.stack([W, X, Y, Z], axis=0)
if normalization == "N3D":
# Convert SN3D to N3D (scale W by sqrt(2))
foa[0] *= np.sqrt(2)
return foa
def encode_foa_analytic_torch(
mono: torch.Tensor,
azimuth_deg: float,
elevation_deg: float,
normalization: str = "SN3D"
) -> torch.Tensor:
"""
PyTorch version of FOA encoding.
Args:
mono: Mono audio signal, shape (batch, n_samples) or (n_samples,)
azimuth_deg: Azimuth angle in degrees
elevation_deg: Elevation angle in degrees
normalization: "SN3D" or "N3D"
Returns:
FOA signal, shape (batch, 4, n_samples) or (4, n_samples)
"""
theta = torch.tensor(deg2rad(azimuth_deg), dtype=mono.dtype, device=mono.device)
phi = torch.tensor(deg2rad(elevation_deg), dtype=mono.dtype, device=mono.device)
# Add batch dim if needed
if mono.ndim == 1:
mono = mono.unsqueeze(0)
squeeze_output = True
else:
squeeze_output = False
# Standard FOA encoding
W = mono / np.sqrt(2)
X = mono * torch.cos(theta) * torch.cos(phi)
Y = mono * torch.sin(theta) * torch.cos(phi)
Z = mono * torch.sin(phi)
foa = torch.stack([W, X, Y, Z], dim=1) # (batch, 4, n_samples)
if normalization == "N3D":
foa[:, 0] *= np.sqrt(2)
if squeeze_output:
foa = foa.squeeze(0)
return foa
def compute_intensity_vector(foa: np.ndarray) -> Tuple[float, float]:
"""
Compute azimuth and elevation from FOA intensity vector.
Args:
foa: FOA signal, shape (4, n_samples)
Returns:
(azimuth_deg, elevation_deg)
"""
W, X, Y, Z = foa
# Compute time-averaged intensity vector
Ix = np.mean(W * X)
Iy = np.mean(W * Y)
Iz = np.mean(W * Z)
# Convert to angles
azimuth_rad = np.arctan2(Iy, Ix)
elevation_rad = np.arctan2(Iz, np.sqrt(Ix**2 + Iy**2))
azimuth_deg = azimuth_rad * 180.0 / np.pi
elevation_deg = elevation_rad * 180.0 / np.pi
return azimuth_deg, elevation_deg
def compute_intensity_vector_torch(foa: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
PyTorch version of intensity vector computation.
Args:
foa: FOA signal, shape (batch, 4, n_samples) or (4, n_samples)
Returns:
(azimuth_deg, elevation_deg) tensors
"""
if foa.ndim == 2:
foa = foa.unsqueeze(0)
squeeze_output = True
else:
squeeze_output = False
W, X, Y, Z = foa[:, 0], foa[:, 1], foa[:, 2], foa[:, 3]
# Compute time-averaged intensity vector
Ix = torch.mean(W * X, dim=-1)
Iy = torch.mean(W * Y, dim=-1)
Iz = torch.mean(W * Z, dim=-1)
# Convert to angles
azimuth_rad = torch.atan2(Iy, Ix)
elevation_rad = torch.atan2(Iz, torch.sqrt(Ix**2 + Iy**2))
azimuth_deg = azimuth_rad * 180.0 / np.pi
elevation_deg = elevation_rad * 180.0 / np.pi
if squeeze_output:
azimuth_deg = azimuth_deg.squeeze(0)
elevation_deg = elevation_deg.squeeze(0)
return azimuth_deg, elevation_deg
def foa_to_stereo_simple(foa: np.ndarray) -> np.ndarray:
"""
Simple stereo downmix from FOA (just using W, X for L/R).
Args:
foa: FOA signal, shape (4, n_samples)
Returns:
Stereo signal, shape (2, n_samples)
"""
W, X, Y, Z = foa
# Simple stereo decode: L = W + X, R = W - X
L = (W + X) / np.sqrt(2)
R = (W - X) / np.sqrt(2)
return np.stack([L, R], axis=0)
|