""" Feature Normalizer Min-max normalization for voice and text features based on expected ranges. """ import numpy as np from typing import Dict class FeatureNormalizer: """Normalize features to [0, 1] range using min-max scaling""" # Expected ranges for voice features (from build spec) VOICE_RANGES = { 'v1_snr': (-10, 40), # SNR in dB 'v2_noise_traffic': (0, 1), # Already normalized 'v2_noise_office': (0, 1), 'v2_noise_crowd': (0, 1), 'v2_noise_wind': (0, 1), 'v2_noise_clean': (0, 1), 'v3_speech_rate': (0, 5), # Words per second 'v4_pitch_mean': (75, 400), # Hz 'v5_pitch_std': (0, 100), # Hz 'v6_energy_mean': (0, 0.5), # RMS energy 'v7_energy_std': (0, 0.2), 'v8_pause_ratio': (0, 1), # Ratio 'v9_avg_pause_dur': (0, 3), # Seconds 'v10_mid_pause_cnt': (0, 20), # Count 'v11_emotion_stress': (0, 1), 'v12_emotion_energy': (0, 1), 'v13_emotion_valence': (0, 1), } # Expected ranges for text features (from build spec) TEXT_RANGES = { 't1_explicit_busy': (0, 1), # Binary 't2_avg_resp_len': (0, 30), # Words 't3_short_ratio': (0, 1), # Ratio 't4_cognitive_load': (0, 0.3), # Ratio 't5_time_pressure': (0, 0.2), # Ratio 't6_deflection': (0, 0.2), # Ratio 't7_sentiment': (-1, 1), # Polarity 't8_coherence': (0, 1), # Score 't9_latency': (0, 10), # Seconds } def normalize_value(self, value: float, min_val: float, max_val: float) -> float: """ Min-max normalization to [0, 1] Clips values outside expected range """ # Clip to range value = max(min_val, min(max_val, value)) # Normalize if max_val == min_val: return 0.0 normalized = (value - min_val) / (max_val - min_val) return float(normalized) def normalize_voice(self, features: Dict[str, float]) -> np.ndarray: """ Normalize voice features to [0, 1] Now handles 17 features (14 original + 3 emotion) """ normalized = [] feature_names = [ 'v1_snr', 'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', 'v2_noise_wind', 'v2_noise_clean', 'v3_speech_rate', 'v4_pitch_mean', 'v5_pitch_std', 'v6_energy_mean', 'v7_energy_std', 'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt', # Paper 1: Add emotion features 'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence' ] for feature_name in feature_names: value = features.get(feature_name, 0.0) min_val, max_val = self.VOICE_RANGES[feature_name] normalized_val = self.normalize_value(value, min_val, max_val) normalized.append(normalized_val) return np.array(normalized, dtype=np.float32) def normalize_text(self, features: Dict[str, float]) -> np.ndarray: """ Normalize text features to [0, 1] Args: features: Dict with 9 text feature keys Returns: np.ndarray of shape (9,) with normalized values """ normalized = [] for feature_name in [ 't1_explicit_busy', 't2_avg_resp_len', 't3_short_ratio', 't4_cognitive_load', 't5_time_pressure', 't6_deflection', 't7_sentiment', 't8_coherence', 't9_latency' ]: value = features.get(feature_name, 0.0) min_val, max_val = self.TEXT_RANGES[feature_name] normalized_val = self.normalize_value(value, min_val, max_val) normalized.append(normalized_val) return np.array(normalized, dtype=np.float32) def normalize_all( self, voice_features: Dict[str, float], text_features: Dict[str, float] ) -> np.ndarray: """ Normalize all 26 features (17 voice + 9 text) and concatenate Returns: np.ndarray of shape (26,) with all normalized features """ voice_norm = self.normalize_voice(voice_features) text_norm = self.normalize_text(text_features) return np.concatenate([voice_norm, text_norm]) def denormalize_voice(self, normalized: np.ndarray) -> Dict[str, float]: """ Convert normalized values back to original scale (for interpretability) """ feature_names = [ 'v1_snr', 'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', 'v2_noise_wind', 'v2_noise_clean', 'v3_speech_rate', 'v4_pitch_mean', 'v5_pitch_std', 'v6_energy_mean', 'v7_energy_std', 'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt', 'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence' ] denormalized = {} for i, name in enumerate(feature_names): min_val, max_val = self.VOICE_RANGES[name] value = normalized[i] * (max_val - min_val) + min_val denormalized[name] = float(value) return denormalized def denormalize_text(self, normalized: np.ndarray) -> Dict[str, float]: """ Convert normalized values back to original scale (for interpretability) """ feature_names = [ 't1_explicit_busy', 't2_avg_resp_len', 't3_short_ratio', 't4_cognitive_load', 't5_time_pressure', 't6_deflection', 't7_sentiment', 't8_coherence', 't9_latency' ] denormalized = {} for i, name in enumerate(feature_names): min_val, max_val = self.TEXT_RANGES[name] value = normalized[i] * (max_val - min_val) + min_val denormalized[name] = float(value) return denormalized if __name__ == "__main__": # Test normalizer normalizer = FeatureNormalizer() # Test voice features test_voice = { 'v1_snr': 15.0, 'v2_noise_traffic': 0.8, 'v2_noise_office': 0.1, 'v2_noise_crowd': 0.05, 'v2_noise_wind': 0.05, 'v2_noise_clean': 0.0, 'v3_speech_rate': 3.5, 'v4_pitch_mean': 150.0, 'v5_pitch_std': 25.0, 'v6_energy_mean': 0.1, 'v7_energy_std': 0.05, 'v8_pause_ratio': 0.3, 'v9_avg_pause_dur': 0.8, 'v10_mid_pause_cnt': 5.0 } normalized = normalizer.normalize_voice(test_voice) print("Voice features normalized:") print(f"Shape: {normalized.shape}") print(f"Range: [{normalized.min():.3f}, {normalized.max():.3f}]")