Multilingual-ASR / app /audio_processing.py
adiitya29's picture
Initialized project directories, added requirements, and implemented core Gradio UI with lazy-loaded Wav2Vec2 inference
bddec1e
raw
history blame contribute delete
690 Bytes
import librosa
import numpy as np
def load_and_resample(audio_filepath: str, target_sr: int = 16000) -> np.ndarray:
"""
Loads an audio file and resamples it to the target sample rate (default 16kHz for Wav2Vec).
Args:
audio_filepath (str): Path to the audio file.
target_sr (int): The sample rate required by the model.
Returns:
np.ndarray: The audio time series.
"""
try:
# librosa automatically resamples if sr is provided
speech, _ = librosa.load(audio_filepath, sr=target_sr)
return speech
except Exception as e:
raise RuntimeError(f"Error processing audio file {audio_filepath}: {e}")