|
|
"""Text parsing utilities for spatial directions."""
|
|
|
|
|
|
import re
|
|
|
from typing import Dict, Tuple, Optional
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
DIRECTION_BINS = {
|
|
|
"front": 0,
|
|
|
"front-left": 45,
|
|
|
"frontleft": 45,
|
|
|
"left": 90,
|
|
|
"back-left": 135,
|
|
|
"backleft": 135,
|
|
|
"back": 180,
|
|
|
"back-right": -135,
|
|
|
"backright": -135,
|
|
|
"right": -90,
|
|
|
"front-right": -45,
|
|
|
"frontright": -45,
|
|
|
}
|
|
|
|
|
|
ELEVATION_BINS = {
|
|
|
"down": -30,
|
|
|
"below": -30,
|
|
|
"lower": -30,
|
|
|
"level": 0,
|
|
|
"middle": 0,
|
|
|
"center": 0,
|
|
|
"up": 30,
|
|
|
"above": 30,
|
|
|
"upper": 30,
|
|
|
}
|
|
|
|
|
|
DISTANCE_BINS = {
|
|
|
"near": 1.0,
|
|
|
"close": 1.0,
|
|
|
"mid": 2.5,
|
|
|
"medium": 2.5,
|
|
|
"far": 5.0,
|
|
|
"distant": 5.0,
|
|
|
}
|
|
|
|
|
|
ROOM_SIZE_BINS = {
|
|
|
"small": "small",
|
|
|
"medium": "medium",
|
|
|
"large": "large",
|
|
|
}
|
|
|
|
|
|
REVERB_BINS = {
|
|
|
"dry": "dry",
|
|
|
"medium": "medium",
|
|
|
"wet": "wet",
|
|
|
}
|
|
|
|
|
|
|
|
|
def parse_spatial_text(text: str) -> Dict[str, any]:
|
|
|
"""
|
|
|
Parse spatial text description into parameters.
|
|
|
|
|
|
Args:
|
|
|
text: Text like "front-left, up, near, small room, dry"
|
|
|
|
|
|
Returns:
|
|
|
Dictionary with keys:
|
|
|
- azimuth_deg: float
|
|
|
- elevation_deg: float
|
|
|
- distance_m: float
|
|
|
- room_size: str
|
|
|
- reverb_level: str
|
|
|
"""
|
|
|
text_lower = text.lower().strip()
|
|
|
|
|
|
|
|
|
params = {
|
|
|
"azimuth_deg": 0.0,
|
|
|
"elevation_deg": 0.0,
|
|
|
"distance_m": 2.5,
|
|
|
"room_size": "medium",
|
|
|
"reverb_level": "medium",
|
|
|
}
|
|
|
|
|
|
|
|
|
for direction, angle in DIRECTION_BINS.items():
|
|
|
if direction in text_lower:
|
|
|
params["azimuth_deg"] = float(angle)
|
|
|
break
|
|
|
|
|
|
|
|
|
for elevation, angle in ELEVATION_BINS.items():
|
|
|
if elevation in text_lower:
|
|
|
params["elevation_deg"] = float(angle)
|
|
|
break
|
|
|
|
|
|
|
|
|
for distance, dist_m in DISTANCE_BINS.items():
|
|
|
if distance in text_lower:
|
|
|
params["distance_m"] = dist_m
|
|
|
break
|
|
|
|
|
|
|
|
|
for room_size in ROOM_SIZE_BINS.keys():
|
|
|
if room_size in text_lower:
|
|
|
params["room_size"] = room_size
|
|
|
break
|
|
|
|
|
|
|
|
|
for reverb in REVERB_BINS.keys():
|
|
|
if reverb in text_lower:
|
|
|
params["reverb_level"] = reverb
|
|
|
break
|
|
|
|
|
|
return params
|
|
|
|
|
|
|
|
|
def generate_random_spatial_text() -> Tuple[str, Dict[str, any]]:
|
|
|
"""
|
|
|
Generate random spatial text and corresponding parameters.
|
|
|
|
|
|
Returns:
|
|
|
(text, params_dict)
|
|
|
"""
|
|
|
|
|
|
direction = np.random.choice(list(DIRECTION_BINS.keys()))
|
|
|
elevation_keys = ["down", "level", "up"]
|
|
|
elevation = np.random.choice(elevation_keys)
|
|
|
distance_keys = ["near", "mid", "far"]
|
|
|
distance = np.random.choice(distance_keys)
|
|
|
room_size = np.random.choice(["small", "medium", "large"])
|
|
|
reverb = np.random.choice(["dry", "medium", "wet"])
|
|
|
|
|
|
|
|
|
text = f"{direction}, {elevation}, {distance}, {room_size} room, {reverb}"
|
|
|
|
|
|
|
|
|
params = {
|
|
|
"azimuth_deg": float(DIRECTION_BINS[direction]),
|
|
|
"elevation_deg": float(ELEVATION_BINS[elevation]),
|
|
|
"distance_m": DISTANCE_BINS[distance],
|
|
|
"room_size": room_size,
|
|
|
"reverb_level": reverb,
|
|
|
}
|
|
|
|
|
|
return text, params
|
|
|
|
|
|
|
|
|
def params_to_bins(params: Dict[str, any]) -> Dict[str, int]:
|
|
|
"""
|
|
|
Convert continuous parameters to bin indices.
|
|
|
|
|
|
Args:
|
|
|
params: Dict with azimuth_deg, elevation_deg, distance_m, etc.
|
|
|
|
|
|
Returns:
|
|
|
Dict with bin indices
|
|
|
"""
|
|
|
|
|
|
azimuth = params["azimuth_deg"]
|
|
|
direction_angles = [0, 45, 90, 135, 180, -135, -90, -45]
|
|
|
direction_bin = np.argmin([abs(azimuth - a) for a in direction_angles])
|
|
|
|
|
|
|
|
|
elevation = params["elevation_deg"]
|
|
|
elevation_angles = [-30, 0, 30]
|
|
|
elevation_bin = np.argmin([abs(elevation - a) for a in elevation_angles])
|
|
|
|
|
|
|
|
|
distance = params["distance_m"]
|
|
|
distance_values = [1.0, 2.5, 5.0]
|
|
|
distance_bin = np.argmin([abs(distance - d) for d in distance_values])
|
|
|
|
|
|
|
|
|
room_sizes = ["small", "medium", "large"]
|
|
|
room_bin = room_sizes.index(params.get("room_size", "medium"))
|
|
|
|
|
|
|
|
|
reverb_levels = ["dry", "medium", "wet"]
|
|
|
reverb_bin = reverb_levels.index(params.get("reverb_level", "medium"))
|
|
|
|
|
|
return {
|
|
|
"direction_bin": direction_bin,
|
|
|
"elevation_bin": elevation_bin,
|
|
|
"distance_bin": distance_bin,
|
|
|
"room_bin": room_bin,
|
|
|
"reverb_bin": reverb_bin,
|
|
|
}
|
|
|
|
|
|
|
|
|
def bins_to_one_hot(bins: Dict[str, int]) -> np.ndarray:
|
|
|
"""
|
|
|
Convert bin indices to concatenated one-hot encoding.
|
|
|
|
|
|
Args:
|
|
|
bins: Dict with bin indices
|
|
|
|
|
|
Returns:
|
|
|
One-hot vector of shape (8 + 3 + 3 + 3 + 3 = 20,)
|
|
|
"""
|
|
|
direction_oh = np.zeros(8)
|
|
|
direction_oh[bins["direction_bin"]] = 1.0
|
|
|
|
|
|
elevation_oh = np.zeros(3)
|
|
|
elevation_oh[bins["elevation_bin"]] = 1.0
|
|
|
|
|
|
distance_oh = np.zeros(3)
|
|
|
distance_oh[bins["distance_bin"]] = 1.0
|
|
|
|
|
|
room_oh = np.zeros(3)
|
|
|
room_oh[bins["room_bin"]] = 1.0
|
|
|
|
|
|
reverb_oh = np.zeros(3)
|
|
|
reverb_oh[bins["reverb_bin"]] = 1.0
|
|
|
|
|
|
return np.concatenate([direction_oh, elevation_oh, distance_oh, room_oh, reverb_oh])
|
|
|
|