File size: 924 Bytes
4698bfc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | import torchaudio
import torch
import random
def load_audio(file_path, sample_rate=24000):
waveform, sr = torchaudio.load(file_path)
if sr != sample_rate:
waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
return waveform
def augment_audio(waveform, sample_rate):
"""Apply simple SoX-based augmentation (gain + tempo + pitch)."""
effects = []
# Random gain between -10 dB and +10 dB
gain_db = random.uniform(-10, 10)
effects.append(["gain", "-n", f"{gain_db}"])
# Random tempo change between 0.9x and 1.1x
tempo = random.uniform(0.9, 1.1)
effects.append(["tempo", f"{tempo}"])
# Random pitch shift between -100 and +100 cents
semitones = random.uniform(-1, 1)
effects.append(["pitch", f"{semitones * 100}"])
waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
waveform, sample_rate, effects
)
return waveform |