import librosa import numpy as np import pandas as pd import scipy import parameters as params import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import tensorflow as tf def gaussian(x, mean=0): """ auxiliar """ val = (-(x - mean)**2)/2 return np.exp(val) cut = 60/params.LIKELY_BPM frame_cut = params.MEL_SAMPLING_RATE*cut curve = np.exp(-1/8 * np.log2(np.arange(0,40,1)/frame_cut)**2) curve = curve/curve.sum() #curve to weight the correlation. Works as a prior on the tempo, see weighted_correlation below. #pd.DataFrame(curve).plot(title='weight for the correlation with an emphasis at '+str(params.LIKELY_BPM)+' bpm') #plt.show() def weighted_correlation(predictions, len_frame=params.LEN_FRAME, shift=params.SHIFT): """ predictions: tf.tensor of shape [1, len_song] len_frame: int. length of the window where the time should stay constant shift: int. Gets the renormalized self correlation of the predictions. """ beginning = (params.NUM_SECONDS - 1)*len_frame//params.NUM_SECONDS end = (params.NUM_SECONDS + 1)*len_frame//params.NUM_SECONDS cor = np.correlate(predictions.numpy()[0,:len_frame], predictions.numpy()[0,:len_frame], 'full')[beginning:end] second_half = cor[params.MEL_SAMPLING_RATE + shift:]*curve return second_half/second_half.sum() def get_a_beat(predictions, w_cor): """ predictions: tf.tensor of shape [1, len_song] w_cor: np.array, the ouput of w_cor = weighted_correlation(predictions). Gets a beat by convolving the predictions with the curved correlation (the output of weighted_correlation), adding the predictions, and taking argmax. """ beat_detected = np.argmax(2*predictions.numpy()[0] + np.convolve(predictions.numpy()[0], w_cor, 'same')) return beat_detected def prob_beat(mode, mel_sampling_rate=params.MEL_SAMPLING_RATE): """ auxiliary function for find_prob_distribution_of_a_beat. mode: float. Unnormalized probability distribution with given mean over the integers from 0 to mel_sampling_rate. The probability distribution is the log2 of a gaussian. It is needed to find the next beat, and log2 is there as the probability of having a beat after (2^n)t times should be the same as the probability of having a beat after t/(2^n) times """ vals = np.arange(1, mel_sampling_rate) return np.concatenate([np.zeros(1),gaussian(np.log2(vals/mode))]) def find_prob_distribution_of_a_beat(w_cor, shift=params.SHIFT, constant_tempo=True, plot=False,): """ w_corr: the output of weighted_correlation Returns: np.1darray Given the weighted corelation, first we find its first peak (actual_peak) that is greater than 2/3 of its next peaks. This should be the tempo of the song. Returns an (unnormalized) probability distribution (namely prob_beat(mode=actual_peak)) on actual_peak*3//2 + 1 frames. This is the probability that the frame i has a beat, given that frame 0 has a beat. """ w_cor = np.array([0]*shift + list(w_cor)) peaks, _ = scipy.signal.find_peaks(w_cor) # Find its first peak (actual_peak) that is greater than 2/3 of its next peaks. # This is because, for example, if the tempo is 240 bpms, then on the wcorrelation there will be 2 # peaks with similar values: one which corresponds to 240 bpms, one which corresponds to 120. Getting the first peak # gurantees, in the example with 240 bpms, that the tempo we get is at 240 and not 120. actual_peak = peaks[0] for p in peaks: if w_cor[actual_peak] < 2*w_cor[p]/3: actual_peak = p if plot: pd.DataFrame(w_cor).plot(title='weighted correlation and tempo') plt.axvline(actual_peak) plt.show() pd.DataFrame(prob_beat(actual_peak)[:(actual_peak*3)//2 + 1]).plot(title='prob distribution') plt.show() if constant_tempo: curve = prob_beat(actual_peak)[:(actual_peak*3)//2 + 1] res = [] for idx, el in enumerate(curve): if abs(idx - actual_peak) <= 5: res.append(el) else: res.append(0) return np.array(res) else: return prob_beat(actual_peak)[:(actual_peak*3)//2 + 1] def search_after(predictions, predicted_beat, prob_distribution): """ predictions: np.1d array predicted_beat: int prob_distribution: np.1darray Searches for beats after the predicted beat. """ current_beat = predicted_beat result = [current_beat] while current_beat + len(prob_distribution) <= len(predictions): next_predictions = predictions[current_beat: current_beat + len(prob_distribution)] next_weighted_predictions = next_predictions*prob_distribution next_beat = np.argmax(next_weighted_predictions) if next_beat == 0: break result.append(current_beat + next_beat) current_beat += next_beat return result def search_before(predictions, predicted_beat, prob_distribution): """ predictions: np.1d array predicted_beat: int prob_distribution: np.1darray Searches for beats before the predicted beat. """ current_beat = predicted_beat result = [current_beat] while current_beat - len(prob_distribution) >=0: prev_predictions = predictions[current_beat - len(prob_distribution):current_beat] prev_w_predictions = prev_predictions*prob_distribution[::-1] prev_beat = np.argmax(prev_w_predictions) if prev_beat == 0: break result.append(current_beat - len(prob_distribution) + prev_beat) current_beat -= len(prob_distribution) - prev_beat return result def frames_with_beat(predictions, constant_tempo=True, plot=False): """ predictions: tf.tensor of shape [1, len_song]. The output of the neural network returns: a list of length LEN_FRAME with 1 at frame i iff there is a beat at frame i """ w_cor = weighted_correlation(predictions) #print('wcorr') prob_distribution = find_prob_distribution_of_a_beat(w_cor, constant_tempo=constant_tempo, plot=plot) #print('prob_distribution') single_beat = get_a_beat(predictions, w_cor) #print('single_beat') if plot: pd.DataFrame(predictions.numpy()[0]).plot(title='predictions with a bit') plt.axvline(single_beat, color='red') plt.savefig('plot_beat_predictions.png') #plt.show() beats_after = search_after(predictions.numpy()[0], single_beat, prob_distribution) beats_before = search_before(predictions.numpy()[0], single_beat, prob_distribution) #print('beats after and before') all_beats = list(beats_before) + list(beats_after) #print('all_beats') beats_in_frames = np.zeros(predictions.shape[1]) for _ in all_beats: if _ < predictions.shape[1]: beats_in_frames[_] = 1 return beats_in_frames