| import torchaudio | |
| import torch | |
| import random | |
| def load_audio(file_path, sample_rate=24000): | |
| waveform, sr = torchaudio.load(file_path) | |
| if sr != sample_rate: | |
| waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform) | |
| return waveform | |
| def augment_audio(waveform, sample_rate): | |
| """Apply simple SoX-based augmentation (gain + tempo + pitch).""" | |
| effects = [] | |
| # Random gain between -10 dB and +10 dB | |
| gain_db = random.uniform(-10, 10) | |
| effects.append(["gain", "-n", f"{gain_db}"]) | |
| # Random tempo change between 0.9x and 1.1x | |
| tempo = random.uniform(0.9, 1.1) | |
| effects.append(["tempo", f"{tempo}"]) | |
| # Random pitch shift between -100 and +100 cents | |
| semitones = random.uniform(-1, 1) | |
| effects.append(["pitch", f"{semitones * 100}"]) | |
| waveform, _ = torchaudio.sox_effects.apply_effects_tensor( | |
| waveform, sample_rate, effects | |
| ) | |
| return waveform |