import torchaudio import torch import random def load_audio(file_path, sample_rate=24000): waveform, sr = torchaudio.load(file_path) if sr != sample_rate: waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform) return waveform def augment_audio(waveform, sample_rate): """Apply simple SoX-based augmentation (gain + tempo + pitch).""" effects = [] # Random gain between -10 dB and +10 dB gain_db = random.uniform(-10, 10) effects.append(["gain", "-n", f"{gain_db}"]) # Random tempo change between 0.9x and 1.1x tempo = random.uniform(0.9, 1.1) effects.append(["tempo", f"{tempo}"]) # Random pitch shift between -100 and +100 cents semitones = random.uniform(-1, 1) effects.append(["pitch", f"{semitones * 100}"]) waveform, _ = torchaudio.sox_effects.apply_effects_tensor( waveform, sample_rate, effects ) return waveform