soundsol commited on
Commit
343bd34
·
verified ·
1 Parent(s): 96336ad

Upload spatializer/utils/text_parser.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. spatializer/utils/text_parser.py +215 -0
spatializer/utils/text_parser.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Text parsing utilities for spatial directions."""
2
+
3
+ import re
4
+ from typing import Dict, Tuple, Optional
5
+ import numpy as np
6
+
7
+
8
+ # Spatial ontology (from config)
9
+ DIRECTION_BINS = {
10
+ "front": 0,
11
+ "front-left": 45,
12
+ "frontleft": 45,
13
+ "left": 90,
14
+ "back-left": 135,
15
+ "backleft": 135,
16
+ "back": 180,
17
+ "back-right": -135,
18
+ "backright": -135,
19
+ "right": -90,
20
+ "front-right": -45,
21
+ "frontright": -45,
22
+ }
23
+
24
+ ELEVATION_BINS = {
25
+ "down": -30,
26
+ "below": -30,
27
+ "lower": -30,
28
+ "level": 0,
29
+ "middle": 0,
30
+ "center": 0,
31
+ "up": 30,
32
+ "above": 30,
33
+ "upper": 30,
34
+ }
35
+
36
+ DISTANCE_BINS = {
37
+ "near": 1.0,
38
+ "close": 1.0,
39
+ "mid": 2.5,
40
+ "medium": 2.5,
41
+ "far": 5.0,
42
+ "distant": 5.0,
43
+ }
44
+
45
+ ROOM_SIZE_BINS = {
46
+ "small": "small",
47
+ "medium": "medium",
48
+ "large": "large",
49
+ }
50
+
51
+ REVERB_BINS = {
52
+ "dry": "dry",
53
+ "medium": "medium",
54
+ "wet": "wet",
55
+ }
56
+
57
+
58
+ def parse_spatial_text(text: str) -> Dict[str, any]:
59
+ """
60
+ Parse spatial text description into parameters.
61
+
62
+ Args:
63
+ text: Text like "front-left, up, near, small room, dry"
64
+
65
+ Returns:
66
+ Dictionary with keys:
67
+ - azimuth_deg: float
68
+ - elevation_deg: float
69
+ - distance_m: float
70
+ - room_size: str
71
+ - reverb_level: str
72
+ """
73
+ text_lower = text.lower().strip()
74
+
75
+ # Defaults
76
+ params = {
77
+ "azimuth_deg": 0.0,
78
+ "elevation_deg": 0.0,
79
+ "distance_m": 2.5,
80
+ "room_size": "medium",
81
+ "reverb_level": "medium",
82
+ }
83
+
84
+ # Parse direction (azimuth)
85
+ for direction, angle in DIRECTION_BINS.items():
86
+ if direction in text_lower:
87
+ params["azimuth_deg"] = float(angle)
88
+ break
89
+
90
+ # Parse elevation
91
+ for elevation, angle in ELEVATION_BINS.items():
92
+ if elevation in text_lower:
93
+ params["elevation_deg"] = float(angle)
94
+ break
95
+
96
+ # Parse distance
97
+ for distance, dist_m in DISTANCE_BINS.items():
98
+ if distance in text_lower:
99
+ params["distance_m"] = dist_m
100
+ break
101
+
102
+ # Parse room size
103
+ for room_size in ROOM_SIZE_BINS.keys():
104
+ if room_size in text_lower:
105
+ params["room_size"] = room_size
106
+ break
107
+
108
+ # Parse reverb level
109
+ for reverb in REVERB_BINS.keys():
110
+ if reverb in text_lower:
111
+ params["reverb_level"] = reverb
112
+ break
113
+
114
+ return params
115
+
116
+
117
+ def generate_random_spatial_text() -> Tuple[str, Dict[str, any]]:
118
+ """
119
+ Generate random spatial text and corresponding parameters.
120
+
121
+ Returns:
122
+ (text, params_dict)
123
+ """
124
+ # Random sampling
125
+ direction = np.random.choice(list(DIRECTION_BINS.keys()))
126
+ elevation_keys = ["down", "level", "up"]
127
+ elevation = np.random.choice(elevation_keys)
128
+ distance_keys = ["near", "mid", "far"]
129
+ distance = np.random.choice(distance_keys)
130
+ room_size = np.random.choice(["small", "medium", "large"])
131
+ reverb = np.random.choice(["dry", "medium", "wet"])
132
+
133
+ # Build text
134
+ text = f"{direction}, {elevation}, {distance}, {room_size} room, {reverb}"
135
+
136
+ # Get params
137
+ params = {
138
+ "azimuth_deg": float(DIRECTION_BINS[direction]),
139
+ "elevation_deg": float(ELEVATION_BINS[elevation]),
140
+ "distance_m": DISTANCE_BINS[distance],
141
+ "room_size": room_size,
142
+ "reverb_level": reverb,
143
+ }
144
+
145
+ return text, params
146
+
147
+
148
+ def params_to_bins(params: Dict[str, any]) -> Dict[str, int]:
149
+ """
150
+ Convert continuous parameters to bin indices.
151
+
152
+ Args:
153
+ params: Dict with azimuth_deg, elevation_deg, distance_m, etc.
154
+
155
+ Returns:
156
+ Dict with bin indices
157
+ """
158
+ # Direction bin (8 bins)
159
+ azimuth = params["azimuth_deg"]
160
+ direction_angles = [0, 45, 90, 135, 180, -135, -90, -45]
161
+ direction_bin = np.argmin([abs(azimuth - a) for a in direction_angles])
162
+
163
+ # Elevation bin (3 bins)
164
+ elevation = params["elevation_deg"]
165
+ elevation_angles = [-30, 0, 30]
166
+ elevation_bin = np.argmin([abs(elevation - a) for a in elevation_angles])
167
+
168
+ # Distance bin (3 bins)
169
+ distance = params["distance_m"]
170
+ distance_values = [1.0, 2.5, 5.0]
171
+ distance_bin = np.argmin([abs(distance - d) for d in distance_values])
172
+
173
+ # Room size bin (3 bins)
174
+ room_sizes = ["small", "medium", "large"]
175
+ room_bin = room_sizes.index(params.get("room_size", "medium"))
176
+
177
+ # Reverb bin (3 bins)
178
+ reverb_levels = ["dry", "medium", "wet"]
179
+ reverb_bin = reverb_levels.index(params.get("reverb_level", "medium"))
180
+
181
+ return {
182
+ "direction_bin": direction_bin,
183
+ "elevation_bin": elevation_bin,
184
+ "distance_bin": distance_bin,
185
+ "room_bin": room_bin,
186
+ "reverb_bin": reverb_bin,
187
+ }
188
+
189
+
190
+ def bins_to_one_hot(bins: Dict[str, int]) -> np.ndarray:
191
+ """
192
+ Convert bin indices to concatenated one-hot encoding.
193
+
194
+ Args:
195
+ bins: Dict with bin indices
196
+
197
+ Returns:
198
+ One-hot vector of shape (8 + 3 + 3 + 3 + 3 = 20,)
199
+ """
200
+ direction_oh = np.zeros(8)
201
+ direction_oh[bins["direction_bin"]] = 1.0
202
+
203
+ elevation_oh = np.zeros(3)
204
+ elevation_oh[bins["elevation_bin"]] = 1.0
205
+
206
+ distance_oh = np.zeros(3)
207
+ distance_oh[bins["distance_bin"]] = 1.0
208
+
209
+ room_oh = np.zeros(3)
210
+ room_oh[bins["room_bin"]] = 1.0
211
+
212
+ reverb_oh = np.zeros(3)
213
+ reverb_oh[bins["reverb_bin"]] = 1.0
214
+
215
+ return np.concatenate([direction_oh, elevation_oh, distance_oh, room_oh, reverb_oh])