"""Text parsing utilities for spatial directions.""" import re from typing import Dict, Tuple, Optional import numpy as np # Spatial ontology (from config) DIRECTION_BINS = { "front": 0, "front-left": 45, "frontleft": 45, "left": 90, "back-left": 135, "backleft": 135, "back": 180, "back-right": -135, "backright": -135, "right": -90, "front-right": -45, "frontright": -45, } ELEVATION_BINS = { "down": -30, "below": -30, "lower": -30, "level": 0, "middle": 0, "center": 0, "up": 30, "above": 30, "upper": 30, } DISTANCE_BINS = { "near": 1.0, "close": 1.0, "mid": 2.5, "medium": 2.5, "far": 5.0, "distant": 5.0, } ROOM_SIZE_BINS = { "small": "small", "medium": "medium", "large": "large", } REVERB_BINS = { "dry": "dry", "medium": "medium", "wet": "wet", } def parse_spatial_text(text: str) -> Dict[str, any]: """ Parse spatial text description into parameters. Args: text: Text like "front-left, up, near, small room, dry" Returns: Dictionary with keys: - azimuth_deg: float - elevation_deg: float - distance_m: float - room_size: str - reverb_level: str """ text_lower = text.lower().strip() # Defaults params = { "azimuth_deg": 0.0, "elevation_deg": 0.0, "distance_m": 2.5, "room_size": "medium", "reverb_level": "medium", } # Parse direction (azimuth) for direction, angle in DIRECTION_BINS.items(): if direction in text_lower: params["azimuth_deg"] = float(angle) break # Parse elevation for elevation, angle in ELEVATION_BINS.items(): if elevation in text_lower: params["elevation_deg"] = float(angle) break # Parse distance for distance, dist_m in DISTANCE_BINS.items(): if distance in text_lower: params["distance_m"] = dist_m break # Parse room size for room_size in ROOM_SIZE_BINS.keys(): if room_size in text_lower: params["room_size"] = room_size break # Parse reverb level for reverb in REVERB_BINS.keys(): if reverb in text_lower: params["reverb_level"] = reverb break return params def generate_random_spatial_text() -> Tuple[str, Dict[str, any]]: """ Generate random spatial text and corresponding parameters. Returns: (text, params_dict) """ # Random sampling direction = np.random.choice(list(DIRECTION_BINS.keys())) elevation_keys = ["down", "level", "up"] elevation = np.random.choice(elevation_keys) distance_keys = ["near", "mid", "far"] distance = np.random.choice(distance_keys) room_size = np.random.choice(["small", "medium", "large"]) reverb = np.random.choice(["dry", "medium", "wet"]) # Build text text = f"{direction}, {elevation}, {distance}, {room_size} room, {reverb}" # Get params params = { "azimuth_deg": float(DIRECTION_BINS[direction]), "elevation_deg": float(ELEVATION_BINS[elevation]), "distance_m": DISTANCE_BINS[distance], "room_size": room_size, "reverb_level": reverb, } return text, params def params_to_bins(params: Dict[str, any]) -> Dict[str, int]: """ Convert continuous parameters to bin indices. Args: params: Dict with azimuth_deg, elevation_deg, distance_m, etc. Returns: Dict with bin indices """ # Direction bin (8 bins) azimuth = params["azimuth_deg"] direction_angles = [0, 45, 90, 135, 180, -135, -90, -45] direction_bin = np.argmin([abs(azimuth - a) for a in direction_angles]) # Elevation bin (3 bins) elevation = params["elevation_deg"] elevation_angles = [-30, 0, 30] elevation_bin = np.argmin([abs(elevation - a) for a in elevation_angles]) # Distance bin (3 bins) distance = params["distance_m"] distance_values = [1.0, 2.5, 5.0] distance_bin = np.argmin([abs(distance - d) for d in distance_values]) # Room size bin (3 bins) room_sizes = ["small", "medium", "large"] room_bin = room_sizes.index(params.get("room_size", "medium")) # Reverb bin (3 bins) reverb_levels = ["dry", "medium", "wet"] reverb_bin = reverb_levels.index(params.get("reverb_level", "medium")) return { "direction_bin": direction_bin, "elevation_bin": elevation_bin, "distance_bin": distance_bin, "room_bin": room_bin, "reverb_bin": reverb_bin, } def bins_to_one_hot(bins: Dict[str, int]) -> np.ndarray: """ Convert bin indices to concatenated one-hot encoding. Args: bins: Dict with bin indices Returns: One-hot vector of shape (8 + 3 + 3 + 3 + 3 = 20,) """ direction_oh = np.zeros(8) direction_oh[bins["direction_bin"]] = 1.0 elevation_oh = np.zeros(3) elevation_oh[bins["elevation_bin"]] = 1.0 distance_oh = np.zeros(3) distance_oh[bins["distance_bin"]] = 1.0 room_oh = np.zeros(3) room_oh[bins["room_bin"]] = 1.0 reverb_oh = np.zeros(3) reverb_oh[bins["reverb_bin"]] = 1.0 return np.concatenate([direction_oh, elevation_oh, distance_oh, room_oh, reverb_oh])