File size: 886 Bytes
f8e6ec4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | # utils.py
import librosa
import numpy as np
def split_audio_chunks(audio, sr, chunk_duration=4.0):
"""Split audio into fixed-duration chunks (in seconds)"""
chunk_samples = int(chunk_duration * sr)
chunks = []
for i in range(0, len(audio), chunk_samples):
chunk = audio[i:i + chunk_samples]
if len(chunk) >= sr: # at least 1 sec
chunks.append(chunk)
return chunks
def extract_mfcc_fixed(chunk, sr=22050, n_mfcc=13, n_fft=2048, hop_length=512, max_len=87):
"""Extract MFCC and pad/truncate to fixed length (87 ≈ 4 sec)"""
mfcc = librosa.feature.mfcc(
y=chunk, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length
)
if mfcc.shape[1] < max_len:
mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant')
else:
mfcc = mfcc[:, :max_len]
return mfcc # shape: (13, 87) |