Spaces:
Sleeping
Sleeping
| """ | |
| Feature Normalizer | |
| Min-max normalization for voice and text features based on expected ranges. | |
| """ | |
| import numpy as np | |
| from typing import Dict | |
| class FeatureNormalizer: | |
| """Normalize features to [0, 1] range using min-max scaling""" | |
| # Expected ranges for voice features (from build spec) | |
| VOICE_RANGES = { | |
| 'v1_snr': (-10, 40), # SNR in dB | |
| 'v2_noise_traffic': (0, 1), # Already normalized | |
| 'v2_noise_office': (0, 1), | |
| 'v2_noise_crowd': (0, 1), | |
| 'v2_noise_wind': (0, 1), | |
| 'v2_noise_clean': (0, 1), | |
| 'v3_speech_rate': (0, 5), # Words per second | |
| 'v4_pitch_mean': (75, 400), # Hz | |
| 'v5_pitch_std': (0, 100), # Hz | |
| 'v6_energy_mean': (0, 0.5), # RMS energy | |
| 'v7_energy_std': (0, 0.2), | |
| 'v8_pause_ratio': (0, 1), # Ratio | |
| 'v9_avg_pause_dur': (0, 3), # Seconds | |
| 'v10_mid_pause_cnt': (0, 20), # Count | |
| 'v11_emotion_stress': (0, 1), | |
| 'v12_emotion_energy': (0, 1), | |
| 'v13_emotion_valence': (0, 1), | |
| } | |
| # Expected ranges for text features (from build spec) | |
| TEXT_RANGES = { | |
| 't1_explicit_busy': (0, 1), # Binary | |
| 't2_avg_resp_len': (0, 30), # Words | |
| 't3_short_ratio': (0, 1), # Ratio | |
| 't4_cognitive_load': (0, 0.3), # Ratio | |
| 't5_time_pressure': (0, 0.2), # Ratio | |
| 't6_deflection': (0, 0.2), # Ratio | |
| 't7_sentiment': (-1, 1), # Polarity | |
| 't8_coherence': (0, 1), # Score | |
| 't9_latency': (0, 10), # Seconds | |
| } | |
| def normalize_value(self, value: float, min_val: float, max_val: float) -> float: | |
| """ | |
| Min-max normalization to [0, 1] | |
| Clips values outside expected range | |
| """ | |
| # Clip to range | |
| value = max(min_val, min(max_val, value)) | |
| # Normalize | |
| if max_val == min_val: | |
| return 0.0 | |
| normalized = (value - min_val) / (max_val - min_val) | |
| return float(normalized) | |
| def normalize_voice(self, features: Dict[str, float]) -> np.ndarray: | |
| """ | |
| Normalize voice features to [0, 1] | |
| Now handles 17 features (14 original + 3 emotion) | |
| """ | |
| normalized = [] | |
| feature_names = [ | |
| 'v1_snr', | |
| 'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', | |
| 'v2_noise_wind', 'v2_noise_clean', | |
| 'v3_speech_rate', | |
| 'v4_pitch_mean', 'v5_pitch_std', | |
| 'v6_energy_mean', 'v7_energy_std', | |
| 'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt', | |
| # Paper 1: Add emotion features | |
| 'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence' | |
| ] | |
| for feature_name in feature_names: | |
| value = features.get(feature_name, 0.0) | |
| min_val, max_val = self.VOICE_RANGES[feature_name] | |
| normalized_val = self.normalize_value(value, min_val, max_val) | |
| normalized.append(normalized_val) | |
| return np.array(normalized, dtype=np.float32) | |
| def normalize_text(self, features: Dict[str, float]) -> np.ndarray: | |
| """ | |
| Normalize text features to [0, 1] | |
| Args: | |
| features: Dict with 9 text feature keys | |
| Returns: | |
| np.ndarray of shape (9,) with normalized values | |
| """ | |
| normalized = [] | |
| for feature_name in [ | |
| 't1_explicit_busy', | |
| 't2_avg_resp_len', 't3_short_ratio', | |
| 't4_cognitive_load', 't5_time_pressure', 't6_deflection', | |
| 't7_sentiment', 't8_coherence', 't9_latency' | |
| ]: | |
| value = features.get(feature_name, 0.0) | |
| min_val, max_val = self.TEXT_RANGES[feature_name] | |
| normalized_val = self.normalize_value(value, min_val, max_val) | |
| normalized.append(normalized_val) | |
| return np.array(normalized, dtype=np.float32) | |
| def normalize_all( | |
| self, | |
| voice_features: Dict[str, float], | |
| text_features: Dict[str, float] | |
| ) -> np.ndarray: | |
| """ | |
| Normalize all 26 features (17 voice + 9 text) and concatenate | |
| Returns: | |
| np.ndarray of shape (26,) with all normalized features | |
| """ | |
| voice_norm = self.normalize_voice(voice_features) | |
| text_norm = self.normalize_text(text_features) | |
| return np.concatenate([voice_norm, text_norm]) | |
| def denormalize_voice(self, normalized: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Convert normalized values back to original scale (for interpretability) | |
| """ | |
| feature_names = [ | |
| 'v1_snr', | |
| 'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', | |
| 'v2_noise_wind', 'v2_noise_clean', | |
| 'v3_speech_rate', | |
| 'v4_pitch_mean', 'v5_pitch_std', | |
| 'v6_energy_mean', 'v7_energy_std', | |
| 'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt', | |
| 'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence' | |
| ] | |
| denormalized = {} | |
| for i, name in enumerate(feature_names): | |
| min_val, max_val = self.VOICE_RANGES[name] | |
| value = normalized[i] * (max_val - min_val) + min_val | |
| denormalized[name] = float(value) | |
| return denormalized | |
| def denormalize_text(self, normalized: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Convert normalized values back to original scale (for interpretability) | |
| """ | |
| feature_names = [ | |
| 't1_explicit_busy', | |
| 't2_avg_resp_len', 't3_short_ratio', | |
| 't4_cognitive_load', 't5_time_pressure', 't6_deflection', | |
| 't7_sentiment', 't8_coherence', 't9_latency' | |
| ] | |
| denormalized = {} | |
| for i, name in enumerate(feature_names): | |
| min_val, max_val = self.TEXT_RANGES[name] | |
| value = normalized[i] * (max_val - min_val) + min_val | |
| denormalized[name] = float(value) | |
| return denormalized | |
| if __name__ == "__main__": | |
| # Test normalizer | |
| normalizer = FeatureNormalizer() | |
| # Test voice features | |
| test_voice = { | |
| 'v1_snr': 15.0, | |
| 'v2_noise_traffic': 0.8, | |
| 'v2_noise_office': 0.1, | |
| 'v2_noise_crowd': 0.05, | |
| 'v2_noise_wind': 0.05, | |
| 'v2_noise_clean': 0.0, | |
| 'v3_speech_rate': 3.5, | |
| 'v4_pitch_mean': 150.0, | |
| 'v5_pitch_std': 25.0, | |
| 'v6_energy_mean': 0.1, | |
| 'v7_energy_std': 0.05, | |
| 'v8_pause_ratio': 0.3, | |
| 'v9_avg_pause_dur': 0.8, | |
| 'v10_mid_pause_cnt': 5.0 | |
| } | |
| normalized = normalizer.normalize_voice(test_voice) | |
| print("Voice features normalized:") | |
| print(f"Shape: {normalized.shape}") | |
| print(f"Range: [{normalized.min():.3f}, {normalized.max():.3f}]") | |