File size: 924 Bytes
4698bfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import torchaudio
import torch
import random

def load_audio(file_path, sample_rate=24000):
    waveform, sr = torchaudio.load(file_path)
    if sr != sample_rate:
        waveform = torchaudio.transforms.Resample(sr, sample_rate)(waveform)
    return waveform


def augment_audio(waveform, sample_rate):
    """Apply simple SoX-based augmentation (gain + tempo + pitch)."""
    effects = []

    # Random gain between -10 dB and +10 dB
    gain_db = random.uniform(-10, 10)
    effects.append(["gain", "-n", f"{gain_db}"])

    # Random tempo change between 0.9x and 1.1x
    tempo = random.uniform(0.9, 1.1)
    effects.append(["tempo", f"{tempo}"])

    # Random pitch shift between -100 and +100 cents
    semitones = random.uniform(-1, 1)
    effects.append(["pitch", f"{semitones * 100}"])

    waveform, _ = torchaudio.sox_effects.apply_effects_tensor(
        waveform, sample_rate, effects
    )
    return waveform