Spaces:

nakas
/

Time-Domain-Audio-Style-Transfer

Runtime error

App Files Files Community

Time-Domain-Audio-Style-Transfer / audio_style_transfer /utils.py

nakas

github fork

2c448c3 over 3 years ago

raw

history blame contribute delete

6.36 kB

	"""NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
	Parag K. Mital
	"""
	import glob
	import numpy as np
	from scipy.signal import hann
	import librosa
	import matplotlib
	import matplotlib.pyplot as plt
	import os


	def limiter(signal,
	delay=40,
	threshold=0.9,
	release_coeff=0.9995,
	attack_coeff=0.9):

	delay_index = 0
	envelope = 0
	gain = 1
	delay = delay
	delay_line = np.zeros(delay)
	release_coeff = release_coeff
	attack_coeff = attack_coeff
	threshold = threshold

	for idx, sample in enumerate(signal):
	delay_line[delay_index] = sample
	delay_index = (delay_index + 1) % delay

	# calculate an envelope of the signal
	envelope = max(np.abs(sample), envelope * release_coeff)

	if envelope > threshold:
	target_gain = threshold / envelope
	else:
	target_gain = 1.0

	# have gain go towards a desired limiter gain
	gain = (gain * attack_coeff + target_gain * (1 - attack_coeff))

	# limit the delayed signal
	signal[idx] = delay_line[delay_index] * gain
	return signal


	def chop(signal, hop_size=256, frame_size=512):
	n_hops = len(signal) // hop_size
	frames = []
	hann_win = hann(frame_size)
	for hop_i in range(n_hops):
	frame = signal[(hop_i * hop_size):(hop_i * hop_size + frame_size)]
	frame = np.pad(frame, (0, frame_size - len(frame)), 'constant')
	frame *= hann_win
	frames.append(frame)
	frames = np.array(frames)
	return frames


	def unchop(frames, hop_size=256, frame_size=512):
	signal = np.zeros((frames.shape[0] * hop_size + frame_size,))
	for hop_i, frame in enumerate(frames):
	signal[(hop_i * hop_size):(hop_i * hop_size + frame_size)] += frame
	return signal


	def matrix_dft(V):
	N = len(V)
	w = np.exp(-2j * np.pi / N)
	col = np.vander([w], N, True)
	W = np.vander(col.flatten(), N, True) / np.sqrt(N)
	return np.dot(W, V)


	def dft_np(signal, hop_size=256, fft_size=512):
	s = chop(signal, hop_size, fft_size)
	N = s.shape[-1]
	k = np.reshape(
	np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [1, N // 2])
	x = np.reshape(np.linspace(0.0, N - 1, N), [N, 1])
	freqs = np.dot(x, k)
	real = np.dot(s, np.cos(freqs)) * (2.0 / N)
	imag = np.dot(s, np.sin(freqs)) * (2.0 / N)
	return real, imag


	def idft_np(re, im, hop_size=256, fft_size=512):
	N = re.shape[1] * 2
	k = np.reshape(
	np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [N // 2, 1])
	x = np.reshape(np.linspace(0.0, N - 1, N), [1, N])
	freqs = np.dot(k, x)
	signal = np.zeros((re.shape[0] * hop_size + fft_size,))
	recon = np.dot(re, np.cos(freqs)) + np.dot(im, np.sin(freqs))
	for hop_i, frame in enumerate(recon):
	signal[(hop_i * hop_size):(hop_i * hop_size + fft_size)] += frame
	return signal


	def rainbowgram(path,
	ax,
	peak=70.0,
	use_cqt=False,
	n_fft=1024,
	hop_length=256,
	sr=22050,
	over_sample=4,
	res_factor=0.8,
	octaves=5,
	notes_per_octave=10):
	audio = librosa.load(path, sr=sr)[0]
	if use_cqt:
	C = librosa.cqt(audio,
	sr=sr,
	hop_length=hop_length,
	bins_per_octave=int(notes_per_octave * over_sample),
	n_bins=int(octaves * notes_per_octave * over_sample),
	filter_scale=res_factor,
	fmin=librosa.note_to_hz('C2'))
	else:
	C = librosa.stft(
	audio,
	n_fft=n_fft,
	win_length=n_fft,
	hop_length=hop_length,
	center=True)
	mag, phase = librosa.core.magphase(C)
	phase_angle = np.angle(phase)
	phase_unwrapped = np.unwrap(phase_angle)
	dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
	dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
	mag = (librosa.logamplitude(
	mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
	cdict = {
	'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
	'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
	'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
	'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
	}
	my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
	plt.register_cmap(cmap=my_mask)
	ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
	ax.matshow(mag[::-1, :], cmap=my_mask)


	def rainbowgrams(list_of_paths,
	saveto=None,
	rows=2,
	cols=4,
	col_labels=[],
	row_labels=[],
	use_cqt=True,
	figsize=(15, 20),
	peak=70.0):
	"""Build a CQT rowsXcols.
	"""
	N = len(list_of_paths)
	assert N == rows * cols
	fig, axes = plt.subplots(
	rows, cols, sharex=True, sharey=True, figsize=figsize)
	fig.subplots_adjust(left=0.05, right=0.95, wspace=0.05, hspace=0.1)
	# fig = plt.figure(figsize=(18, N * 1.25))
	for i, path in enumerate(list_of_paths):
	row = int(i / cols)
	col = i % cols
	if rows == 1 and cols == 1:
	ax = axes
	elif rows == 1:
	ax = axes[col]
	elif cols == 1:
	ax = axes[row]
	else:
	ax = axes[row, col]
	rainbowgram(path, ax, peak, use_cqt)
	ax.set_axis_bgcolor('white')
	ax.set_xticks([])
	ax.set_yticks([])
	if col == 0 and row_labels:
	ax.set_ylabel(row_labels[row])
	if row == rows - 1 and col_labels:
	ax.set_xlabel(col_labels[col])
	if saveto is not None:
	fig.savefig(filename='{}.png'.format(saveto))


	def plot_rainbowgrams():
	for root in ['target', 'corpus', 'results']:
	files = glob.glob('{}/*/.wav'.format(root), recursive=True)
	for f in files:
	fname = '{}.png'.format(f)
	if not os.path.exists(fname):
	rainbowgrams(
	[f],
	saveto=fname,
	figsize=(20, 5),
	rows=1,
	cols=1)
	plt.close('all')