| | import torch |
| | import numpy as np |
| |
|
| |
|
| | def generate_white_noise(noise_shape, max_level, rng: np.random.RandomState): |
| | |
| | white_noise_level = max_level * rng.rand() |
| | |
| | |
| | white_noise = white_noise_level*torch.from_numpy(rng.normal(0, 1, size=noise_shape)).float() |
| |
|
| | return white_noise |
| |
|
| | def generate_pink_noise(noise_shape, max_level, rng: np.random.RandomState): |
| | |
| | pink_noise_level = max_level * rng.rand() |
| | |
| | |
| | |
| | pink_noise = powerlaw_psd_gaussian(1, noise_shape, random_state = 0) |
| | pink_noise = pink_noise_level*torch.from_numpy(pink_noise).float() |
| |
|
| | return pink_noise |
| |
|
| | def generate_brown_noise(noise_shape, max_level, rng: np.random.RandomState): |
| | |
| | brown_noise_level = max_level * rng.rand() |
| | |
| | |
| | |
| | brown_noise = powerlaw_psd_gaussian(2, noise_shape, random_state = 0) |
| | brown_noise = brown_noise_level*torch.from_numpy(brown_noise).float() |
| |
|
| | return brown_noise |
| |
|
| | """Generate colored noise.""" |
| |
|
| | from numpy import sqrt, newaxis, integer |
| | from numpy.fft import irfft, rfftfreq |
| | from numpy.random import default_rng, Generator, RandomState |
| | from numpy import sum as npsum |
| |
|
| |
|
| | def powerlaw_psd_gaussian(exponent, size, fmin=0, random_state=None): |
| | """Gaussian (1/f)**beta noise. |
| | |
| | Based on the algorithm in: |
| | Timmer, J. and Koenig, M.: |
| | On generating power law noise. |
| | Astron. Astrophys. 300, 707-710 (1995) |
| | |
| | Normalised to unit variance |
| | |
| | Parameters: |
| | ----------- |
| | |
| | exponent : float |
| | The power-spectrum of the generated noise is proportional to |
| | |
| | S(f) = (1 / f)**beta |
| | flicker / pink noise: exponent beta = 1 |
| | brown noise: exponent beta = 2 |
| | |
| | Furthermore, the autocorrelation decays proportional to lag**-gamma |
| | with gamma = 1 - beta for 0 < beta < 1. |
| | There may be finite-size issues for beta close to one. |
| | |
| | shape : int or iterable |
| | The output has the given shape, and the desired power spectrum in |
| | the last coordinate. That is, the last dimension is taken as time, |
| | and all other components are independent. |
| | |
| | fmin : float, optional |
| | Low-frequency cutoff. |
| | Default: 0 corresponds to original paper. |
| | |
| | The power-spectrum below fmin is flat. fmin is defined relative |
| | to a unit sampling rate (see numpy's rfftfreq). For convenience, |
| | the passed value is mapped to max(fmin, 1/samples) internally |
| | since 1/samples is the lowest possible finite frequency in the |
| | sample. The largest possible value is fmin = 0.5, the Nyquist |
| | frequency. The output for this value is white noise. |
| | |
| | random_state : int, numpy.integer, numpy.random.Generator, numpy.random.RandomState, |
| | optional |
| | Optionally sets the state of NumPy's underlying random number generator. |
| | Integer-compatible values or None are passed to np.random.default_rng. |
| | np.random.RandomState or np.random.Generator are used directly. |
| | Default: None. |
| | |
| | Returns |
| | ------- |
| | out : array |
| | The samples. |
| | |
| | |
| | Examples: |
| | --------- |
| | |
| | # generate 1/f noise == pink noise == flicker noise |
| | >>> import colorednoise as cn |
| | >>> y = cn.powerlaw_psd_gaussian(1, 5) |
| | """ |
| | |
| | |
| | try: |
| | size = list(size) |
| | except TypeError: |
| | size = [size] |
| | |
| | |
| | samples = size[-1] |
| | |
| | |
| | |
| | f = rfftfreq(samples) |
| | |
| | |
| | if 0 <= fmin <= 0.5: |
| | fmin = max(fmin, 1./samples) |
| | else: |
| | raise ValueError("fmin must be chosen between 0 and 0.5.") |
| | |
| | |
| | s_scale = f |
| | ix = npsum(s_scale < fmin) |
| | if ix and ix < len(s_scale): |
| | s_scale[:ix] = s_scale[ix] |
| | s_scale = s_scale**(-exponent/2.) |
| | |
| | |
| | w = s_scale[1:].copy() |
| | w[-1] *= (1 + (samples % 2)) / 2. |
| | sigma = 2 * sqrt(npsum(w**2)) / samples |
| | |
| | |
| | size[-1] = len(f) |
| |
|
| | |
| | |
| | dims_to_add = len(size) - 1 |
| | s_scale = s_scale[(newaxis,) * dims_to_add + (Ellipsis,)] |
| | |
| | |
| | normal_dist = _get_normal_distribution(random_state) |
| |
|
| | |
| | sr = normal_dist(scale=s_scale, size=size) |
| | si = normal_dist(scale=s_scale, size=size) |
| | |
| | |
| | |
| | if not (samples % 2): |
| | si[..., -1] = 0 |
| | sr[..., -1] *= sqrt(2) |
| | |
| | |
| | si[..., 0] = 0 |
| | sr[..., 0] *= sqrt(2) |
| | |
| | |
| | s = sr + 1J * si |
| | |
| | |
| | y = irfft(s, n=samples, axis=-1) / sigma |
| | |
| | return y |
| |
|
| |
|
| | def _get_normal_distribution(random_state): |
| | normal_dist = None |
| | if isinstance(random_state, (integer, int)) or random_state is None: |
| | random_state = default_rng(random_state) |
| | normal_dist = random_state.normal |
| | elif isinstance(random_state, (Generator, RandomState)): |
| | normal_dist = random_state.normal |
| | else: |
| | raise ValueError( |
| | "random_state must be one of integer, numpy.random.Generator, or None" |
| | "numpy.random.Randomstate" |
| | ) |
| | return normal_dist |
| |
|
| |
|
| | class WhitePinkBrownAugmentation: |
| | def __init__(self, max_white_level=1e-3, max_pink_level=5e-3, max_brown_level=5e-3): |
| | """ |
| | max_shift: Maximum shift (inclusive) in both directions |
| | unique: Whether the same shift across channels is unique |
| | """ |
| | self.max_white_level = max_white_level |
| | self.max_pink_level = max_pink_level |
| | self.max_brown_level = max_brown_level |
| |
|
| | def __call__(self, audio_data, gt_audio, rng: np.random.RandomState): |
| | wn = generate_white_noise(audio_data.shape, self.max_white_level, rng) |
| | pn = generate_pink_noise(audio_data.shape, self.max_pink_level, rng) |
| | bn = generate_brown_noise(audio_data.shape, self.max_brown_level, rng) |
| | |
| | augmented_audio = audio_data + (wn + pn + bn) |
| |
|
| | return augmented_audio, gt_audio |