| import torch |
| import numpy as np |
|
|
|
|
| def generate_white_noise(noise_shape, max_level, rng: np.random.RandomState): |
| |
| white_noise_level = max_level * rng.rand() |
| |
| |
| white_noise = white_noise_level*torch.from_numpy(rng.normal(0, 1, size=noise_shape)).float() |
|
|
| return white_noise |
|
|
| def generate_pink_noise(noise_shape, max_level, rng: np.random.RandomState): |
| |
| pink_noise_level = max_level * rng.rand() |
| |
| |
| |
| pink_noise = powerlaw_psd_gaussian(1, noise_shape, random_state = 0) |
| pink_noise = pink_noise_level*torch.from_numpy(pink_noise).float() |
|
|
| return pink_noise |
|
|
| def generate_brown_noise(noise_shape, max_level, rng: np.random.RandomState): |
| |
| brown_noise_level = max_level * rng.rand() |
| |
| |
| |
| brown_noise = powerlaw_psd_gaussian(2, noise_shape, random_state = 0) |
| brown_noise = brown_noise_level*torch.from_numpy(brown_noise).float() |
|
|
| return brown_noise |
|
|
| """Generate colored noise.""" |
|
|
| from numpy import sqrt, newaxis, integer |
| from numpy.fft import irfft, rfftfreq |
| from numpy.random import default_rng, Generator, RandomState |
| from numpy import sum as npsum |
|
|
|
|
| def powerlaw_psd_gaussian(exponent, size, fmin=0, random_state=None): |
| """Gaussian (1/f)**beta noise. |
| |
| Based on the algorithm in: |
| Timmer, J. and Koenig, M.: |
| On generating power law noise. |
| Astron. Astrophys. 300, 707-710 (1995) |
| |
| Normalised to unit variance |
| |
| Parameters: |
| ----------- |
| |
| exponent : float |
| The power-spectrum of the generated noise is proportional to |
| |
| S(f) = (1 / f)**beta |
| flicker / pink noise: exponent beta = 1 |
| brown noise: exponent beta = 2 |
| |
| Furthermore, the autocorrelation decays proportional to lag**-gamma |
| with gamma = 1 - beta for 0 < beta < 1. |
| There may be finite-size issues for beta close to one. |
| |
| shape : int or iterable |
| The output has the given shape, and the desired power spectrum in |
| the last coordinate. That is, the last dimension is taken as time, |
| and all other components are independent. |
| |
| fmin : float, optional |
| Low-frequency cutoff. |
| Default: 0 corresponds to original paper. |
| |
| The power-spectrum below fmin is flat. fmin is defined relative |
| to a unit sampling rate (see numpy's rfftfreq). For convenience, |
| the passed value is mapped to max(fmin, 1/samples) internally |
| since 1/samples is the lowest possible finite frequency in the |
| sample. The largest possible value is fmin = 0.5, the Nyquist |
| frequency. The output for this value is white noise. |
| |
| random_state : int, numpy.integer, numpy.random.Generator, numpy.random.RandomState, |
| optional |
| Optionally sets the state of NumPy's underlying random number generator. |
| Integer-compatible values or None are passed to np.random.default_rng. |
| np.random.RandomState or np.random.Generator are used directly. |
| Default: None. |
| |
| Returns |
| ------- |
| out : array |
| The samples. |
| |
| |
| Examples: |
| --------- |
| |
| # generate 1/f noise == pink noise == flicker noise |
| >>> import colorednoise as cn |
| >>> y = cn.powerlaw_psd_gaussian(1, 5) |
| """ |
| |
| |
| try: |
| size = list(size) |
| except TypeError: |
| size = [size] |
| |
| |
| samples = size[-1] |
| |
| |
| |
| f = rfftfreq(samples) |
| |
| |
| if 0 <= fmin <= 0.5: |
| fmin = max(fmin, 1./samples) |
| else: |
| raise ValueError("fmin must be chosen between 0 and 0.5.") |
| |
| |
| s_scale = f |
| ix = npsum(s_scale < fmin) |
| if ix and ix < len(s_scale): |
| s_scale[:ix] = s_scale[ix] |
| s_scale = s_scale**(-exponent/2.) |
| |
| |
| w = s_scale[1:].copy() |
| w[-1] *= (1 + (samples % 2)) / 2. |
| sigma = 2 * sqrt(npsum(w**2)) / samples |
| |
| |
| size[-1] = len(f) |
|
|
| |
| |
| dims_to_add = len(size) - 1 |
| s_scale = s_scale[(newaxis,) * dims_to_add + (Ellipsis,)] |
| |
| |
| normal_dist = _get_normal_distribution(random_state) |
|
|
| |
| sr = normal_dist(scale=s_scale, size=size) |
| si = normal_dist(scale=s_scale, size=size) |
| |
| |
| |
| if not (samples % 2): |
| si[..., -1] = 0 |
| sr[..., -1] *= sqrt(2) |
| |
| |
| si[..., 0] = 0 |
| sr[..., 0] *= sqrt(2) |
| |
| |
| s = sr + 1J * si |
| |
| |
| y = irfft(s, n=samples, axis=-1) / sigma |
| |
| return y |
|
|
|
|
| def _get_normal_distribution(random_state): |
| normal_dist = None |
| if isinstance(random_state, (integer, int)) or random_state is None: |
| random_state = default_rng(random_state) |
| normal_dist = random_state.normal |
| elif isinstance(random_state, (Generator, RandomState)): |
| normal_dist = random_state.normal |
| else: |
| raise ValueError( |
| "random_state must be one of integer, numpy.random.Generator, or None" |
| "numpy.random.Randomstate" |
| ) |
| return normal_dist |
|
|
|
|
| class WhitePinkBrownAugmentation: |
| def __init__(self, max_white_level=1e-3, max_pink_level=5e-3, max_brown_level=5e-3): |
| """ |
| max_shift: Maximum shift (inclusive) in both directions |
| unique: Whether the same shift across channels is unique |
| """ |
| self.max_white_level = max_white_level |
| self.max_pink_level = max_pink_level |
| self.max_brown_level = max_brown_level |
|
|
| def __call__(self, audio_data, gt_audio, rng: np.random.RandomState): |
| wn = generate_white_noise(audio_data.shape, self.max_white_level, rng) |
| pn = generate_pink_noise(audio_data.shape, self.max_pink_level, rng) |
| bn = generate_brown_noise(audio_data.shape, self.max_brown_level, rng) |
| |
| augmented_audio = audio_data + (wn + pn + bn) |
|
|
| return augmented_audio, gt_audio |