| |
| import librosa |
| import numpy as np |
|
|
| def split_audio_chunks(audio, sr, chunk_duration=4.0): |
| """Split audio into fixed-duration chunks (in seconds)""" |
| chunk_samples = int(chunk_duration * sr) |
| chunks = [] |
| for i in range(0, len(audio), chunk_samples): |
| chunk = audio[i:i + chunk_samples] |
| if len(chunk) >= sr: |
| chunks.append(chunk) |
| return chunks |
|
|
| def extract_mfcc_fixed(chunk, sr=22050, n_mfcc=13, n_fft=2048, hop_length=512, max_len=87): |
| """Extract MFCC and pad/truncate to fixed length (87 ≈ 4 sec)""" |
| mfcc = librosa.feature.mfcc( |
| y=chunk, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length |
| ) |
| if mfcc.shape[1] < max_len: |
| mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant') |
| else: |
| mfcc = mfcc[:, :max_len] |
| return mfcc |