Upload folder using huggingface_hub

df9f13e verified 5 months ago

6.95 kB

	import torch
	import numpy as np


	def generate_white_noise(noise_shape, max_level, rng: np.random.RandomState):
	# Choose white noise level
	white_noise_level = max_level * rng.rand()
	# print(white_noise_level)
	# Generate white noise
	white_noise = white_noise_level*torch.from_numpy(rng.normal(0, 1, size=noise_shape)).float()

	return white_noise

	def generate_pink_noise(noise_shape, max_level, rng: np.random.RandomState):
	# Choose pink noise level
	pink_noise_level = max_level * rng.rand()
	# print(pink_noise_level)

	# Generate pink noise
	pink_noise = powerlaw_psd_gaussian(1, noise_shape, random_state = 0)
	pink_noise = pink_noise_level*torch.from_numpy(pink_noise).float()

	return pink_noise

	def generate_brown_noise(noise_shape, max_level, rng: np.random.RandomState):
	# Choose brown noise level
	brown_noise_level = max_level * rng.rand()
	# print(brown_noise_level)

	# Generate brown noise
	brown_noise = powerlaw_psd_gaussian(2, noise_shape, random_state = 0)
	brown_noise = brown_noise_level*torch.from_numpy(brown_noise).float()

	return brown_noise

	"""Generate colored noise."""

	from numpy import sqrt, newaxis, integer
	from numpy.fft import irfft, rfftfreq
	from numpy.random import default_rng, Generator, RandomState
	from numpy import sum as npsum


	def powerlaw_psd_gaussian(exponent, size, fmin=0, random_state=None):
	"""Gaussian (1/f)**beta noise.

	Based on the algorithm in:
	Timmer, J. and Koenig, M.:
	On generating power law noise.
	Astron. Astrophys. 300, 707-710 (1995)

	Normalised to unit variance

	Parameters:
	-----------

	exponent : float
	The power-spectrum of the generated noise is proportional to

	S(f) = (1 / f)**beta
	flicker / pink noise: exponent beta = 1
	brown noise: exponent beta = 2

	Furthermore, the autocorrelation decays proportional to lag**-gamma
	with gamma = 1 - beta for 0 < beta < 1.
	There may be finite-size issues for beta close to one.

	shape : int or iterable
	The output has the given shape, and the desired power spectrum in
	the last coordinate. That is, the last dimension is taken as time,
	and all other components are independent.

	fmin : float, optional
	Low-frequency cutoff.
	Default: 0 corresponds to original paper.

	The power-spectrum below fmin is flat. fmin is defined relative
	to a unit sampling rate (see numpy's rfftfreq). For convenience,
	the passed value is mapped to max(fmin, 1/samples) internally
	since 1/samples is the lowest possible finite frequency in the
	sample. The largest possible value is fmin = 0.5, the Nyquist
	frequency. The output for this value is white noise.

	random_state : int, numpy.integer, numpy.random.Generator, numpy.random.RandomState,
	optional
	Optionally sets the state of NumPy's underlying random number generator.
	Integer-compatible values or None are passed to np.random.default_rng.
	np.random.RandomState or np.random.Generator are used directly.
	Default: None.

	Returns
	-------
	out : array
	The samples.


	Examples:
	---------

	# generate 1/f noise == pink noise == flicker noise
	>>> import colorednoise as cn
	>>> y = cn.powerlaw_psd_gaussian(1, 5)
	"""

	# Make sure size is a list so we can iterate it and assign to it.
	try:
	size = list(size)
	except TypeError:
	size = [size]

	# The number of samples in each time series
	samples = size[-1]

	# Calculate Frequencies (we asume a sample rate of one)
	# Use fft functions for real output (-> hermitian spectrum)
	f = rfftfreq(samples)

	# Validate / normalise fmin
	if 0 <= fmin <= 0.5:
	fmin = max(fmin, 1./samples) # Low frequency cutoff
	else:
	raise ValueError("fmin must be chosen between 0 and 0.5.")

	# Build scaling factors for all frequencies
	s_scale = f
	ix = npsum(s_scale < fmin) # Index of the cutoff
	if ix and ix < len(s_scale):
	s_scale[:ix] = s_scale[ix]
	s_scale = s_scale**(-exponent/2.)

	# Calculate theoretical output standard deviation from scaling
	w = s_scale[1:].copy()
	w[-1] *= (1 + (samples % 2)) / 2. # correct f = +-0.5
	sigma = 2 * sqrt(npsum(w**2)) / samples

	# Adjust size to generate one Fourier component per frequency
	size[-1] = len(f)

	# Add empty dimension(s) to broadcast s_scale along last
	# dimension of generated random power + phase (below)
	dims_to_add = len(size) - 1
	s_scale = s_scale[(newaxis,) * dims_to_add + (Ellipsis,)]

	# prepare random number generator
	normal_dist = _get_normal_distribution(random_state)

	# Generate scaled random power + phase
	sr = normal_dist(scale=s_scale, size=size)
	si = normal_dist(scale=s_scale, size=size)

	# If the signal length is even, frequencies +/- 0.5 are equal
	# so the coefficient must be real.
	if not (samples % 2):
	si[..., -1] = 0
	sr[..., -1] *= sqrt(2) # Fix magnitude

	# Regardless of signal length, the DC component must be real
	si[..., 0] = 0
	sr[..., 0] *= sqrt(2) # Fix magnitude

	# Combine power + corrected phase to Fourier components
	s = sr + 1J * si

	# Transform to real time series & scale to unit variance
	y = irfft(s, n=samples, axis=-1) / sigma

	return y


	def _get_normal_distribution(random_state):
	normal_dist = None
	if isinstance(random_state, (integer, int)) or random_state is None:
	random_state = default_rng(random_state)
	normal_dist = random_state.normal
	elif isinstance(random_state, (Generator, RandomState)):
	normal_dist = random_state.normal
	else:
	raise ValueError(
	"random_state must be one of integer, numpy.random.Generator, or None"
	"numpy.random.Randomstate"
	)
	return normal_dist


	class WhitePinkBrownAugmentation:
	def __init__(self, max_white_level=1e-3, max_pink_level=5e-3, max_brown_level=5e-3):
	"""
	max_shift: Maximum shift (inclusive) in both directions
	unique: Whether the same shift across channels is unique
	"""
	self.max_white_level = max_white_level
	self.max_pink_level = max_pink_level
	self.max_brown_level = max_brown_level

	def __call__(self, audio_data, gt_audio, rng: np.random.RandomState):
	wn = generate_white_noise(audio_data.shape, self.max_white_level, rng)
	pn = generate_pink_noise(audio_data.shape, self.max_pink_level, rng)
	bn = generate_brown_noise(audio_data.shape, self.max_brown_level, rng)
	# print("ssss")
	augmented_audio = audio_data + (wn + pn + bn)

	return augmented_audio, gt_audio