# utils.py import librosa import numpy as np def split_audio_chunks(audio, sr, chunk_duration=4.0): """Split audio into fixed-duration chunks (in seconds)""" chunk_samples = int(chunk_duration * sr) chunks = [] for i in range(0, len(audio), chunk_samples): chunk = audio[i:i + chunk_samples] if len(chunk) >= sr: # at least 1 sec chunks.append(chunk) return chunks def extract_mfcc_fixed(chunk, sr=22050, n_mfcc=13, n_fft=2048, hop_length=512, max_len=87): """Extract MFCC and pad/truncate to fixed length (87 ≈ 4 sec)""" mfcc = librosa.feature.mfcc( y=chunk, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length ) if mfcc.shape[1] < max_len: mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant') else: mfcc = mfcc[:, :max_len] return mfcc # shape: (13, 87)