"""MFCC feature extraction.""" import logging import numpy as np import librosa from src.features.schemas import MFCCFeatures logger = logging.getLogger(__name__) class MFCCExtractor: """Extract MFCC features with delta and delta-delta.""" def __init__( self, n_mfcc: int = 13, n_fft: int = 2048, hop_length: int = 512, n_mels: int = 128, fmin: float = 0, fmax: float = 8000, delta_width: int = 9, ): """Initialize MFCC extractor.""" self.n_mfcc = n_mfcc self.n_fft = n_fft self.hop_length = hop_length self.n_mels = n_mels self.fmin = fmin self.fmax = fmax self.delta_width = delta_width def extract(self, waveform: np.ndarray, sr: int) -> MFCCFeatures: """ Extract MFCC features. Args: waveform: Audio waveform sr: Sample rate Returns: MFCCFeatures with 39-dimensional feature vectors """ logger.debug(f"Extracting MFCCs: n_mfcc={self.n_mfcc}") # Extract MFCCs mfcc = librosa.feature.mfcc( y=waveform, sr=sr, n_mfcc=self.n_mfcc, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, ) # Compute deltas delta = librosa.feature.delta(mfcc, width=self.delta_width) delta_delta = librosa.feature.delta(mfcc, order=2, width=self.delta_width) # Combine: (13, time) + (13, time) + (13, time) = (39, time) combined = np.vstack([mfcc, delta, delta_delta]) # Compute statistics mean = np.mean(combined, axis=1) std = np.std(combined, axis=1) logger.info(f"Extracted MFCCs: shape={combined.shape}") return MFCCFeatures( mfcc=mfcc, delta=delta, delta_delta=delta_delta, combined=combined, mean=mean, std=std, )