"""Formant feature extraction using Praat (parselmouth).""" import logging import numpy as np import parselmouth from src.features.schemas import FormantFeatures logger = logging.getLogger(__name__) class FormantExtractor: """Extract formant features (F1, F2, F3) using Praat.""" def __init__( self, max_num_formants: int = 5, ceiling_hz: float = 5500, window_length: float = 0.025, pre_emphasis: float = 0.97, ): """Initialize formant extractor.""" self.max_num_formants = max_num_formants self.ceiling_hz = ceiling_hz self.window_length = window_length self.pre_emphasis = pre_emphasis def extract(self, waveform: np.ndarray, sr: int) -> FormantFeatures: """ Extract formant features. Args: waveform: Audio waveform sr: Sample rate Returns: FormantFeatures with F1, F2, F3 statistics """ logger.debug("Extracting formants using Praat") try: # Create Praat Sound object sound = parselmouth.Sound(waveform, sampling_frequency=sr) # Extract formants formants = sound.to_formant_burg( time_step=0.01, max_number_of_formants=self.max_num_formants, maximum_formant=self.ceiling_hz, window_length=self.window_length, pre_emphasis_from=50.0, ) # Extract F1, F2, F3 contours f1_contour = [] f2_contour = [] f3_contour = [] for time in np.arange(0, sound.duration, 0.01): f1 = formants.get_value_at_time(1, time) f2 = formants.get_value_at_time(2, time) f3 = formants.get_value_at_time(3, time) # Filter out undefined values if f1 is not None and not np.isnan(f1): f1_contour.append(f1) if f2 is not None and not np.isnan(f2): f2_contour.append(f2) if f3 is not None and not np.isnan(f3): f3_contour.append(f3) # Convert to arrays f1_contour = np.array(f1_contour) if f1_contour else np.array([0.0]) f2_contour = np.array(f2_contour) if f2_contour else np.array([0.0]) f3_contour = np.array(f3_contour) if f3_contour else np.array([0.0]) # Compute statistics f1_mean = float(np.mean(f1_contour)) f1_std = float(np.std(f1_contour)) f2_mean = float(np.mean(f2_contour)) f2_std = float(np.std(f2_contour)) f3_mean = float(np.mean(f3_contour)) f3_std = float(np.std(f3_contour)) # Compute vowel space area (VSA) - simplified using F1 and F2 vowel_space_area = self._compute_vsa(f1_contour, f2_contour) # Formant dispersion formant_dispersion = float(np.mean([f1_mean, f2_mean, f3_mean])) logger.info( f"Formants extracted: F1={f1_mean:.0f}Hz, F2={f2_mean:.0f}Hz, VSA={vowel_space_area:.0f}" ) return FormantFeatures( f1_contour=f1_contour, f2_contour=f2_contour, f3_contour=f3_contour, f1_mean=f1_mean, f1_std=f1_std, f2_mean=f2_mean, f2_std=f2_std, f3_mean=f3_mean, f3_std=f3_std, vowel_space_area=vowel_space_area, formant_dispersion=formant_dispersion, ) except Exception as e: logger.error(f"Formant extraction failed: {e}") # Return default values return self._default_formants() def _compute_vsa(self, f1: np.ndarray, f2: np.ndarray) -> float: """Compute vowel space area (simplified triangle area).""" if len(f1) < 3 or len(f2) < 3: return 0.0 # Use percentiles to get corner vowels (simplified) f1_low, f1_mid, f1_high = np.percentile(f1, [25, 50, 75]) f2_low, f2_mid, f2_high = np.percentile(f2, [25, 50, 75]) # Triangle area using Heron's formula (simplified) area = abs((f1_low - f1_high) * (f2_mid - f2_low) / 2.0) return float(area) def _default_formants(self) -> FormantFeatures: """Return default formant features on failure.""" return FormantFeatures( f1_contour=np.array([500.0]), f2_contour=np.array([1500.0]), f3_contour=np.array([2500.0]), f1_mean=500.0, f1_std=0.0, f2_mean=1500.0, f2_std=0.0, f3_mean=2500.0, f3_std=0.0, vowel_space_area=0.0, formant_dispersion=1500.0, )