File size: 690 Bytes
bddec1e
 
c17b8b3
bddec1e
c17b8b3
 
bddec1e
 
 
 
 
 
 
c17b8b3
bddec1e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import librosa
import numpy as np

def load_and_resample(audio_filepath: str, target_sr: int = 16000) -> np.ndarray:
    """
    Loads an audio file and resamples it to the target sample rate (default 16kHz for Wav2Vec).
    
    Args:
        audio_filepath (str): Path to the audio file.
        target_sr (int): The sample rate required by the model.
        
    Returns:
        np.ndarray: The audio time series.
    """
    try:
        # librosa automatically resamples if sr is provided
        speech, _ = librosa.load(audio_filepath, sr=target_sr)
        return speech
    except Exception as e:
        raise RuntimeError(f"Error processing audio file {audio_filepath}: {e}")