Spaces:
Runtime error
Runtime error
| """NIPS2017 "Time Domain Neural Audio Style Transfer" code repository | |
| Parag K. Mital | |
| """ | |
| import glob | |
| import numpy as np | |
| from scipy.signal import hann | |
| import librosa | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import os | |
| def limiter(signal, | |
| delay=40, | |
| threshold=0.9, | |
| release_coeff=0.9995, | |
| attack_coeff=0.9): | |
| delay_index = 0 | |
| envelope = 0 | |
| gain = 1 | |
| delay = delay | |
| delay_line = np.zeros(delay) | |
| release_coeff = release_coeff | |
| attack_coeff = attack_coeff | |
| threshold = threshold | |
| for idx, sample in enumerate(signal): | |
| delay_line[delay_index] = sample | |
| delay_index = (delay_index + 1) % delay | |
| # calculate an envelope of the signal | |
| envelope = max(np.abs(sample), envelope * release_coeff) | |
| if envelope > threshold: | |
| target_gain = threshold / envelope | |
| else: | |
| target_gain = 1.0 | |
| # have gain go towards a desired limiter gain | |
| gain = (gain * attack_coeff + target_gain * (1 - attack_coeff)) | |
| # limit the delayed signal | |
| signal[idx] = delay_line[delay_index] * gain | |
| return signal | |
| def chop(signal, hop_size=256, frame_size=512): | |
| n_hops = len(signal) // hop_size | |
| frames = [] | |
| hann_win = hann(frame_size) | |
| for hop_i in range(n_hops): | |
| frame = signal[(hop_i * hop_size):(hop_i * hop_size + frame_size)] | |
| frame = np.pad(frame, (0, frame_size - len(frame)), 'constant') | |
| frame *= hann_win | |
| frames.append(frame) | |
| frames = np.array(frames) | |
| return frames | |
| def unchop(frames, hop_size=256, frame_size=512): | |
| signal = np.zeros((frames.shape[0] * hop_size + frame_size,)) | |
| for hop_i, frame in enumerate(frames): | |
| signal[(hop_i * hop_size):(hop_i * hop_size + frame_size)] += frame | |
| return signal | |
| def matrix_dft(V): | |
| N = len(V) | |
| w = np.exp(-2j * np.pi / N) | |
| col = np.vander([w], N, True) | |
| W = np.vander(col.flatten(), N, True) / np.sqrt(N) | |
| return np.dot(W, V) | |
| def dft_np(signal, hop_size=256, fft_size=512): | |
| s = chop(signal, hop_size, fft_size) | |
| N = s.shape[-1] | |
| k = np.reshape( | |
| np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [1, N // 2]) | |
| x = np.reshape(np.linspace(0.0, N - 1, N), [N, 1]) | |
| freqs = np.dot(x, k) | |
| real = np.dot(s, np.cos(freqs)) * (2.0 / N) | |
| imag = np.dot(s, np.sin(freqs)) * (2.0 / N) | |
| return real, imag | |
| def idft_np(re, im, hop_size=256, fft_size=512): | |
| N = re.shape[1] * 2 | |
| k = np.reshape( | |
| np.linspace(0.0, 2 * np.pi / N * (N // 2), N // 2), [N // 2, 1]) | |
| x = np.reshape(np.linspace(0.0, N - 1, N), [1, N]) | |
| freqs = np.dot(k, x) | |
| signal = np.zeros((re.shape[0] * hop_size + fft_size,)) | |
| recon = np.dot(re, np.cos(freqs)) + np.dot(im, np.sin(freqs)) | |
| for hop_i, frame in enumerate(recon): | |
| signal[(hop_i * hop_size):(hop_i * hop_size + fft_size)] += frame | |
| return signal | |
| def rainbowgram(path, | |
| ax, | |
| peak=70.0, | |
| use_cqt=False, | |
| n_fft=1024, | |
| hop_length=256, | |
| sr=22050, | |
| over_sample=4, | |
| res_factor=0.8, | |
| octaves=5, | |
| notes_per_octave=10): | |
| audio = librosa.load(path, sr=sr)[0] | |
| if use_cqt: | |
| C = librosa.cqt(audio, | |
| sr=sr, | |
| hop_length=hop_length, | |
| bins_per_octave=int(notes_per_octave * over_sample), | |
| n_bins=int(octaves * notes_per_octave * over_sample), | |
| filter_scale=res_factor, | |
| fmin=librosa.note_to_hz('C2')) | |
| else: | |
| C = librosa.stft( | |
| audio, | |
| n_fft=n_fft, | |
| win_length=n_fft, | |
| hop_length=hop_length, | |
| center=True) | |
| mag, phase = librosa.core.magphase(C) | |
| phase_angle = np.angle(phase) | |
| phase_unwrapped = np.unwrap(phase_angle) | |
| dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1] | |
| dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi | |
| mag = (librosa.logamplitude( | |
| mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1 | |
| cdict = { | |
| 'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), | |
| 'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), | |
| 'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), | |
| 'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)) | |
| } | |
| my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict) | |
| plt.register_cmap(cmap=my_mask) | |
| ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow) | |
| ax.matshow(mag[::-1, :], cmap=my_mask) | |
| def rainbowgrams(list_of_paths, | |
| saveto=None, | |
| rows=2, | |
| cols=4, | |
| col_labels=[], | |
| row_labels=[], | |
| use_cqt=True, | |
| figsize=(15, 20), | |
| peak=70.0): | |
| """Build a CQT rowsXcols. | |
| """ | |
| N = len(list_of_paths) | |
| assert N == rows * cols | |
| fig, axes = plt.subplots( | |
| rows, cols, sharex=True, sharey=True, figsize=figsize) | |
| fig.subplots_adjust(left=0.05, right=0.95, wspace=0.05, hspace=0.1) | |
| # fig = plt.figure(figsize=(18, N * 1.25)) | |
| for i, path in enumerate(list_of_paths): | |
| row = int(i / cols) | |
| col = i % cols | |
| if rows == 1 and cols == 1: | |
| ax = axes | |
| elif rows == 1: | |
| ax = axes[col] | |
| elif cols == 1: | |
| ax = axes[row] | |
| else: | |
| ax = axes[row, col] | |
| rainbowgram(path, ax, peak, use_cqt) | |
| ax.set_axis_bgcolor('white') | |
| ax.set_xticks([]) | |
| ax.set_yticks([]) | |
| if col == 0 and row_labels: | |
| ax.set_ylabel(row_labels[row]) | |
| if row == rows - 1 and col_labels: | |
| ax.set_xlabel(col_labels[col]) | |
| if saveto is not None: | |
| fig.savefig(filename='{}.png'.format(saveto)) | |
| def plot_rainbowgrams(): | |
| for root in ['target', 'corpus', 'results']: | |
| files = glob.glob('{}/**/*.wav'.format(root), recursive=True) | |
| for f in files: | |
| fname = '{}.png'.format(f) | |
| if not os.path.exists(fname): | |
| rainbowgrams( | |
| [f], | |
| saveto=fname, | |
| figsize=(20, 5), | |
| rows=1, | |
| cols=1) | |
| plt.close('all') | |