Spaces:

ALeLacheur
/

voiceblock

Sleeping

App Files Files Community

voiceblock / voicebox /src /simulation /noise.py

ALeLacheur

Voiceblock demo: Attempt 8

957e2dc over 1 year ago

raw

history blame contribute delete

5.67 kB

	import random
	import math
	import torch
	import torch.nn.functional as F
	import torchaudio

	from pathlib import Path

	import librosa as li
	from src.simulation.effect import Effect

	torchaudio.set_audio_backend("sox_io")

	################################################################################
	# Simulate environmental noise
	################################################################################


	class Noise(Effect):
	"""
	Simple additive noise effect
	"""
	def __init__(self,
	compute_grad: bool = True,
	type: str = 'gaussian',
	snr: any = None,
	noise_dir: str = None,
	ext: str = "wav"):
	"""
	Apply additive noise to audio signal. SNR calculations adapted from
	VoxCeleb-Trainer (https://github.com/clovaai/voxceleb_trainer/)

	:param compute_grad: if False, perform straight-through gradient
	estimation
	:param type: type of noise to add; must be one of `gaussian`,
	`uniform`, or `environmental`
	:param snr: decibel Signal-to-Noise ratio (dB SNR) of added noise
	:param noise_dir: directory from which to draw noise samples, if `type`
	is `environmental`
	:param ext: extension for audio files in `noise_dir`
	"""
	super().__init__(compute_grad)

	self.type = type
	self.noise_list = None
	self.ext = ext

	if type == 'environmental':
	if not noise_dir:
	raise ValueError(
	'Environmental noise requires sample directory'
	)
	else:
	self.noise_list = list(Path(noise_dir).rglob(f'*.{self.ext}'))

	# parse valid range of SNR parameter
	self.min_snr, self.max_snr = self.parse_range(
	snr,
	float,
	f'Invalid noise SNR {snr}'
	)

	# store noise as buffer to allow device movement
	self.register_buffer("noise", torch.zeros(1, dtype=torch.float32))
	self.register_buffer("noise_db", torch.zeros(1, dtype=torch.float32))

	# initialize parameters
	self.snr = None
	self.sample_params()

	def forward(self, x: torch.Tensor):

	# require batch, channel dimensions
	assert x.ndim >= 2
	orig_shape = x.shape

	if x.ndim == 2:
	x = x.unsqueeze(1)

	# scale noise level to stored SNR
	signal_db = 10 * torch.log10(
	torch.mean(torch.square(x), dim=-1, keepdims=True) + 1e-8
	)
	scale = torch.sqrt(
	torch.pow(10, (signal_db - self.noise_db - self.snr) / 10)
	)

	# scale noise and trim to input length
	noise = scale * self.noise.clone().to(x)[..., :x.shape[-1]]

	# repeat noise to match input length if necessary
	pad_len = max(x.shape[-1] - noise.shape[-1], 0)
	noise = F.pad(noise, (0, pad_len), mode='circular')

	# reshape to original dimensions
	return (noise + x).reshape(orig_shape)

	@staticmethod
	def _crossfade(sig, fade_len):
	sig = sig.clone()
	fade_len = int(fade_len * sig.shape[-1])
	fade_in = torch.linspace(0, 1, fade_len).to(sig)
	fade_out = torch.linspace(1, 0, fade_len).to(sig)
	sig[..., :fade_len] *= fade_in
	sig[..., -fade_len:] *= fade_out
	return sig

	def sample_params(self):
	"""
	Sample SNR uniformly from stored range
	"""
	self.snr = random.uniform(self.min_snr, self.max_snr)

	if self.type == "gaussian":
	self.noise = torch.randn(self.signal_length).to(self.noise)
	elif self.type == "uniform":
	self.noise = torch.sign(
	torch.randn(self.signal_length)
	).to(self.noise)
	elif self.type == "environmental":

	# load from randomly-selected file
	noise_np, _ = li.load(
	random.choice(self.noise_list),
	sr=self.sample_rate, mono=True
	)
	noise = torch.as_tensor(noise_np)

	# trim or loop (with cross-fade) to match expected signal length
	if noise.shape[-1] >= self.signal_length:
	self.noise = noise[..., :self.signal_length].reshape(
	1, 1, -1
	).to(self.noise)
	else:

	overlap = 0.05
	step = math.ceil(noise.shape[-1] * (1 - overlap))
	n_repeat = math.ceil(self.signal_length / step)

	padded = torch.zeros(
	1, step * (n_repeat - 1) + noise.shape[-1] + 1
	).reshape(1, -1).type(torch.float32)
	shape = padded.shape[:-1] + (n_repeat, noise.shape[-1])

	strides = (padded.stride()[0],) + (step, padded.stride()[-1],)
	frames = torch.as_strided(
	padded, size=shape, stride=strides
	)[::step]

	for j in range(n_repeat):
	frames[:, j, :] += self.crossfade(noise, overlap)

	self.noise = padded[..., :self.signal_length].reshape(
	1, 1, -1
	).to(self.noise)

	else:
	raise ValueError(f'Invalid noise type {self.type}')

	self.noise_db = 10 * torch.log10(
	torch.mean(torch.square(self.noise), dim=-1, keepdims=True) + 1e-8
	).to(self.noise_db)