File size: 886 Bytes
f8e6ec4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# utils.py
import librosa
import numpy as np

def split_audio_chunks(audio, sr, chunk_duration=4.0):
    """Split audio into fixed-duration chunks (in seconds)"""
    chunk_samples = int(chunk_duration * sr)
    chunks = []
    for i in range(0, len(audio), chunk_samples):
        chunk = audio[i:i + chunk_samples]
        if len(chunk) >= sr:  # at least 1 sec
            chunks.append(chunk)
    return chunks

def extract_mfcc_fixed(chunk, sr=22050, n_mfcc=13, n_fft=2048, hop_length=512, max_len=87):
    """Extract MFCC and pad/truncate to fixed length (87 ≈ 4 sec)"""
    mfcc = librosa.feature.mfcc(
        y=chunk, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length
    )
    if mfcc.shape[1] < max_len:
        mfcc = np.pad(mfcc, ((0, 0), (0, max_len - mfcc.shape[1])), mode='constant')
    else:
        mfcc = mfcc[:, :max_len]
    return mfcc  # shape: (13, 87)