Spaces:
Sleeping
Sleeping
File size: 7,140 Bytes
634310a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | """
Feature Normalizer
Min-max normalization for voice and text features based on expected ranges.
"""
import numpy as np
from typing import Dict
class FeatureNormalizer:
"""Normalize features to [0, 1] range using min-max scaling"""
# Expected ranges for voice features (from build spec)
VOICE_RANGES = {
'v1_snr': (-10, 40), # SNR in dB
'v2_noise_traffic': (0, 1), # Already normalized
'v2_noise_office': (0, 1),
'v2_noise_crowd': (0, 1),
'v2_noise_wind': (0, 1),
'v2_noise_clean': (0, 1),
'v3_speech_rate': (0, 5), # Words per second
'v4_pitch_mean': (75, 400), # Hz
'v5_pitch_std': (0, 100), # Hz
'v6_energy_mean': (0, 0.5), # RMS energy
'v7_energy_std': (0, 0.2),
'v8_pause_ratio': (0, 1), # Ratio
'v9_avg_pause_dur': (0, 3), # Seconds
'v10_mid_pause_cnt': (0, 20), # Count
'v11_emotion_stress': (0, 1),
'v12_emotion_energy': (0, 1),
'v13_emotion_valence': (0, 1),
}
# Expected ranges for text features (from build spec)
TEXT_RANGES = {
't1_explicit_busy': (0, 1), # Binary
't2_avg_resp_len': (0, 30), # Words
't3_short_ratio': (0, 1), # Ratio
't4_cognitive_load': (0, 0.3), # Ratio
't5_time_pressure': (0, 0.2), # Ratio
't6_deflection': (0, 0.2), # Ratio
't7_sentiment': (-1, 1), # Polarity
't8_coherence': (0, 1), # Score
't9_latency': (0, 10), # Seconds
}
def normalize_value(self, value: float, min_val: float, max_val: float) -> float:
"""
Min-max normalization to [0, 1]
Clips values outside expected range
"""
# Clip to range
value = max(min_val, min(max_val, value))
# Normalize
if max_val == min_val:
return 0.0
normalized = (value - min_val) / (max_val - min_val)
return float(normalized)
def normalize_voice(self, features: Dict[str, float]) -> np.ndarray:
"""
Normalize voice features to [0, 1]
Now handles 17 features (14 original + 3 emotion)
"""
normalized = []
feature_names = [
'v1_snr',
'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd',
'v2_noise_wind', 'v2_noise_clean',
'v3_speech_rate',
'v4_pitch_mean', 'v5_pitch_std',
'v6_energy_mean', 'v7_energy_std',
'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
# Paper 1: Add emotion features
'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
]
for feature_name in feature_names:
value = features.get(feature_name, 0.0)
min_val, max_val = self.VOICE_RANGES[feature_name]
normalized_val = self.normalize_value(value, min_val, max_val)
normalized.append(normalized_val)
return np.array(normalized, dtype=np.float32)
def normalize_text(self, features: Dict[str, float]) -> np.ndarray:
"""
Normalize text features to [0, 1]
Args:
features: Dict with 9 text feature keys
Returns:
np.ndarray of shape (9,) with normalized values
"""
normalized = []
for feature_name in [
't1_explicit_busy',
't2_avg_resp_len', 't3_short_ratio',
't4_cognitive_load', 't5_time_pressure', 't6_deflection',
't7_sentiment', 't8_coherence', 't9_latency'
]:
value = features.get(feature_name, 0.0)
min_val, max_val = self.TEXT_RANGES[feature_name]
normalized_val = self.normalize_value(value, min_val, max_val)
normalized.append(normalized_val)
return np.array(normalized, dtype=np.float32)
def normalize_all(
self,
voice_features: Dict[str, float],
text_features: Dict[str, float]
) -> np.ndarray:
"""
Normalize all 26 features (17 voice + 9 text) and concatenate
Returns:
np.ndarray of shape (26,) with all normalized features
"""
voice_norm = self.normalize_voice(voice_features)
text_norm = self.normalize_text(text_features)
return np.concatenate([voice_norm, text_norm])
def denormalize_voice(self, normalized: np.ndarray) -> Dict[str, float]:
"""
Convert normalized values back to original scale (for interpretability)
"""
feature_names = [
'v1_snr',
'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd',
'v2_noise_wind', 'v2_noise_clean',
'v3_speech_rate',
'v4_pitch_mean', 'v5_pitch_std',
'v6_energy_mean', 'v7_energy_std',
'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
]
denormalized = {}
for i, name in enumerate(feature_names):
min_val, max_val = self.VOICE_RANGES[name]
value = normalized[i] * (max_val - min_val) + min_val
denormalized[name] = float(value)
return denormalized
def denormalize_text(self, normalized: np.ndarray) -> Dict[str, float]:
"""
Convert normalized values back to original scale (for interpretability)
"""
feature_names = [
't1_explicit_busy',
't2_avg_resp_len', 't3_short_ratio',
't4_cognitive_load', 't5_time_pressure', 't6_deflection',
't7_sentiment', 't8_coherence', 't9_latency'
]
denormalized = {}
for i, name in enumerate(feature_names):
min_val, max_val = self.TEXT_RANGES[name]
value = normalized[i] * (max_val - min_val) + min_val
denormalized[name] = float(value)
return denormalized
if __name__ == "__main__":
# Test normalizer
normalizer = FeatureNormalizer()
# Test voice features
test_voice = {
'v1_snr': 15.0,
'v2_noise_traffic': 0.8,
'v2_noise_office': 0.1,
'v2_noise_crowd': 0.05,
'v2_noise_wind': 0.05,
'v2_noise_clean': 0.0,
'v3_speech_rate': 3.5,
'v4_pitch_mean': 150.0,
'v5_pitch_std': 25.0,
'v6_energy_mean': 0.1,
'v7_energy_std': 0.05,
'v8_pause_ratio': 0.3,
'v9_avg_pause_dur': 0.8,
'v10_mid_pause_cnt': 5.0
}
normalized = normalizer.normalize_voice(test_voice)
print("Voice features normalized:")
print(f"Shape: {normalized.shape}")
print(f"Range: [{normalized.min():.3f}, {normalized.max():.3f}]")
|