Spaces:

divAIne
/

busy-module-xgboost

Sleeping

File size: 7,140 Bytes

634310a

"""

Feature Normalizer

Min-max normalization for voice and text features based on expected ranges.

"""

import numpy as np
from typing import Dict


class FeatureNormalizer:
    """Normalize features to [0, 1] range using min-max scaling"""
    
    # Expected ranges for voice features (from build spec)
    VOICE_RANGES = {
        'v1_snr': (-10, 40),          # SNR in dB
        'v2_noise_traffic': (0, 1),    # Already normalized
        'v2_noise_office': (0, 1),
        'v2_noise_crowd': (0, 1),
        'v2_noise_wind': (0, 1),
        'v2_noise_clean': (0, 1),
        'v3_speech_rate': (0, 5),      # Words per second
        'v4_pitch_mean': (75, 400),    # Hz
        'v5_pitch_std': (0, 100),      # Hz
        'v6_energy_mean': (0, 0.5),    # RMS energy
        'v7_energy_std': (0, 0.2),
        'v8_pause_ratio': (0, 1),      # Ratio
        'v9_avg_pause_dur': (0, 3),    # Seconds
        'v10_mid_pause_cnt': (0, 20),  # Count
        'v11_emotion_stress': (0, 1),
        'v12_emotion_energy': (0, 1),
        'v13_emotion_valence': (0, 1),
    }
    
    # Expected ranges for text features (from build spec)
    TEXT_RANGES = {
        't1_explicit_busy': (0, 1),    # Binary
        't2_avg_resp_len': (0, 30),    # Words
        't3_short_ratio': (0, 1),      # Ratio
        't4_cognitive_load': (0, 0.3), # Ratio
        't5_time_pressure': (0, 0.2),  # Ratio
        't6_deflection': (0, 0.2),     # Ratio
        't7_sentiment': (-1, 1),       # Polarity
        't8_coherence': (0, 1),        # Score
        't9_latency': (0, 10),         # Seconds
    }
    
    def normalize_value(self, value: float, min_val: float, max_val: float) -> float:
        """

        Min-max normalization to [0, 1]

        Clips values outside expected range

        """
        # Clip to range
        value = max(min_val, min(max_val, value))
        
        # Normalize
        if max_val == min_val:
            return 0.0
        
        normalized = (value - min_val) / (max_val - min_val)
        return float(normalized)
    
    def normalize_voice(self, features: Dict[str, float]) -> np.ndarray:
        """

        Normalize voice features to [0, 1]

        Now handles 17 features (14 original + 3 emotion)

        """
        normalized = []
        
        feature_names = [
            'v1_snr', 
            'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', 
            'v2_noise_wind', 'v2_noise_clean',
            'v3_speech_rate',
            'v4_pitch_mean', 'v5_pitch_std',
            'v6_energy_mean', 'v7_energy_std',
            'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
            # Paper 1: Add emotion features
            'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
        ]
        
        for feature_name in feature_names:
            value = features.get(feature_name, 0.0)
            min_val, max_val = self.VOICE_RANGES[feature_name]
            normalized_val = self.normalize_value(value, min_val, max_val)
            normalized.append(normalized_val)
        
        return np.array(normalized, dtype=np.float32)

    def normalize_text(self, features: Dict[str, float]) -> np.ndarray:
        """

        Normalize text features to [0, 1]

        

        Args:

            features: Dict with 9 text feature keys

        

        Returns:

            np.ndarray of shape (9,) with normalized values

        """
        normalized = []
        
        for feature_name in [
            't1_explicit_busy',
            't2_avg_resp_len', 't3_short_ratio',
            't4_cognitive_load', 't5_time_pressure', 't6_deflection',
            't7_sentiment', 't8_coherence', 't9_latency'
        ]:
            value = features.get(feature_name, 0.0)
            min_val, max_val = self.TEXT_RANGES[feature_name]
            normalized_val = self.normalize_value(value, min_val, max_val)
            normalized.append(normalized_val)
        
        return np.array(normalized, dtype=np.float32)
    
    def normalize_all(

        self, 

        voice_features: Dict[str, float], 

        text_features: Dict[str, float]

    ) -> np.ndarray:
        """

        Normalize all 26 features (17 voice + 9 text) and concatenate

        

        Returns:

            np.ndarray of shape (26,) with all normalized features

        """
        voice_norm = self.normalize_voice(voice_features)
        text_norm = self.normalize_text(text_features)
        
        return np.concatenate([voice_norm, text_norm])
    
    def denormalize_voice(self, normalized: np.ndarray) -> Dict[str, float]:
        """

        Convert normalized values back to original scale (for interpretability)

        """
        feature_names = [
            'v1_snr', 
            'v2_noise_traffic', 'v2_noise_office', 'v2_noise_crowd', 
            'v2_noise_wind', 'v2_noise_clean',
            'v3_speech_rate',
            'v4_pitch_mean', 'v5_pitch_std',
            'v6_energy_mean', 'v7_energy_std',
            'v8_pause_ratio', 'v9_avg_pause_dur', 'v10_mid_pause_cnt',
            'v11_emotion_stress', 'v12_emotion_energy', 'v13_emotion_valence'
        ]
        
        denormalized = {}
        for i, name in enumerate(feature_names):
            min_val, max_val = self.VOICE_RANGES[name]
            value = normalized[i] * (max_val - min_val) + min_val
            denormalized[name] = float(value)
        
        return denormalized
    
    def denormalize_text(self, normalized: np.ndarray) -> Dict[str, float]:
        """

        Convert normalized values back to original scale (for interpretability)

        """
        feature_names = [
            't1_explicit_busy',
            't2_avg_resp_len', 't3_short_ratio',
            't4_cognitive_load', 't5_time_pressure', 't6_deflection',
            't7_sentiment', 't8_coherence', 't9_latency'
        ]
        
        denormalized = {}
        for i, name in enumerate(feature_names):
            min_val, max_val = self.TEXT_RANGES[name]
            value = normalized[i] * (max_val - min_val) + min_val
            denormalized[name] = float(value)
        
        return denormalized


if __name__ == "__main__":
    # Test normalizer
    normalizer = FeatureNormalizer()
    
    # Test voice features
    test_voice = {
        'v1_snr': 15.0,
        'v2_noise_traffic': 0.8,
        'v2_noise_office': 0.1,
        'v2_noise_crowd': 0.05,
        'v2_noise_wind': 0.05,
        'v2_noise_clean': 0.0,
        'v3_speech_rate': 3.5,
        'v4_pitch_mean': 150.0,
        'v5_pitch_std': 25.0,
        'v6_energy_mean': 0.1,
        'v7_energy_std': 0.05,
        'v8_pause_ratio': 0.3,
        'v9_avg_pause_dur': 0.8,
        'v10_mid_pause_cnt': 5.0
    }
    
    normalized = normalizer.normalize_voice(test_voice)
    print("Voice features normalized:")
    print(f"Shape: {normalized.shape}")
    print(f"Range: [{normalized.min():.3f}, {normalized.max():.3f}]")