Solomon17705's picture
Added Files
f8e6ec4
raw
history blame contribute delete
886 Bytes
# utils.py
import librosa
import numpy as np
def split_audio_chunks(audio, sr, chunk_duration=4.0):
"""Split audio into fixed-duration chunks (in seconds)"""
chunk_samples = int(chunk_duration * sr)
chunks = []
for i in range(0, len(audio), chunk_samples):
chunk = audio[i:i + chunk_samples]
if len(chunk) >= sr: # at least 1 sec
chunks.append(chunk)
return chunks
def extract_mfcc_fixed(chunk, sr=22050, n_mfcc=13, n_fft=2048, hop_length=512, max_len=87):
"""Extract MFCC and pad/truncate to fixed length (87 ≈ 4 sec)"""
mfcc = librosa.feature.mfcc(
y=chunk, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length
)
if mfcc.shape[1] < max_len:
mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant')
else:
mfcc = mfcc[:, :max_len]
return mfcc # shape: (13, 87)