sunf / utils /audio_processing.py
anhtunguyen98's picture
Upload folder using huggingface_hub
4698bfc verified
import torchaudio
import torch
import random
def load_audio(file_path, sample_rate=24000):
waveform, sr = torchaudio.load(file_path)
if sr != sample_rate:
waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
return waveform
def augment_audio(waveform, sample_rate):
"""Apply simple SoX-based augmentation (gain + tempo + pitch)."""
effects = []
# Random gain between -10 dB and +10 dB
gain_db = random.uniform(-10, 10)
effects.append(["gain", "-n", f"{gain_db}"])
# Random tempo change between 0.9x and 1.1x
tempo = random.uniform(0.9, 1.1)
effects.append(["tempo", f"{tempo}"])
# Random pitch shift between -100 and +100 cents
semitones = random.uniform(-1, 1)
effects.append(["pitch", f"{semitones * 100}"])
waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
waveform, sample_rate, effects
)
return waveform