Spaces:
Build error
Build error
| from typing import List | |
| import librosa | |
| from data_generation.encoding import ParameterDescription, Sample | |
| from melody_synth.random_midi import RandomMidi | |
| from melody_synth.melody_generator import MelodyGenerator | |
| from scipy.io.wavfile import write | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| import json | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import tensorflow as tf | |
| import matplotlib | |
| from configurations.read_configuration import parameter_range, is_discrete, get_conf_stft_hyperparameter | |
| import shutil | |
| # from model.log_spectrogram import power_to_db | |
| from tools import power_to_db | |
| num_params = 16 | |
| def plot_spectrogram(signal: np.ndarray, | |
| path: str, | |
| frame_length=512, | |
| frame_step=256): | |
| """Computes the spectrogram of the given signal and saves it. | |
| Parameters | |
| ---------- | |
| signal: np.ndarray | |
| The signal for which to compute the spectrogram. | |
| path: str | |
| Path to save the the computed spectrogram. | |
| frame_length: | |
| Window size of the FFT. | |
| frame_step: | |
| Hop size of the FFT. | |
| """ | |
| # Compute spectrum for each frame. Returns complex tensor. | |
| # todo: duplicate code in log_spectrogram.py. Move this somewhere else perhaps. | |
| spectrogram = tf.signal.stft(signal, | |
| frame_length=frame_length, | |
| frame_step=frame_step, | |
| pad_end=False) # Returns 63 frames instead of 64 otherwise | |
| # Compute the magnitudes | |
| magnitude_spectrum = tf.abs(spectrogram) | |
| log_spectrum = power_to_db(magnitude_spectrum) | |
| matplotlib.pyplot.imsave(path, np.transpose(log_spectrum), vmin=-100, vmax=0, origin='lower') | |
| def plot_mel_spectrogram(signal: np.ndarray, | |
| path: str, | |
| frame_length=512, | |
| frame_step=256): | |
| spectrogram = librosa.feature.melspectrogram(signal, sr=16384, n_fft=2048, hop_length=frame_step, win_length=frame_length) | |
| matplotlib.pyplot.imsave(path, spectrogram, vmin=-100, vmax=0, origin='lower') | |
| # List of ParameterDescription objects that specify the parameters for generation | |
| param_descriptions: List[ParameterDescription] | |
| param_descriptions = [ | |
| # Oscillator levels | |
| ParameterDescription(name="osc1_amp", | |
| values=parameter_range('osc1_amp'), | |
| discrete=is_discrete('osc1_amp')), | |
| ParameterDescription(name="osc2_amp", | |
| values=parameter_range('osc2_amp'), | |
| discrete=is_discrete('osc2_amp')), | |
| # ADSR params | |
| ParameterDescription(name="attack", | |
| values=parameter_range('attack'), | |
| discrete=is_discrete('attack')), | |
| ParameterDescription(name="decay", | |
| values=parameter_range('decay'), | |
| discrete=is_discrete('decay')), | |
| ParameterDescription(name="sustain", | |
| values=parameter_range('sustain'), | |
| discrete=is_discrete('sustain')), | |
| ParameterDescription(name="release", | |
| values=parameter_range('release'), | |
| discrete=is_discrete('release')), | |
| ParameterDescription(name="cutoff_freq", | |
| values=parameter_range('cutoff_freq'), | |
| discrete=is_discrete('cutoff_freq')), | |
| # Oscillators types | |
| # 0 for sin saw, 1 for sin square, 2 for saw square | |
| # 3 for sin triangle, 4 for triangle saw, 5 for triangle square | |
| ParameterDescription(name="osc_types", | |
| values=parameter_range('osc_types'), | |
| discrete=is_discrete('osc_types')), | |
| ] | |
| def generate_dataset_for_cnn(n: int, | |
| path_name="./data/data_cnn_model", | |
| sample_rate=16384, | |
| n_samples_for_note=16384 * 4, | |
| n_samples_for_melody=16384 * 4, write_parameter=True, write_spectrogram=True): | |
| """ | |
| Generate dataset of size n for 'Inversynth' cnn model | |
| :param n: Int | |
| :param path_name: String--path to save the dataset | |
| :param sample_rate: Int | |
| :param n_samples_for_note: Int | |
| :param n_samples_for_melody: Int | |
| :param write_parameter: Boolean--if write parameter values in a .txt file | |
| :param write_spectrogram: Boolean--write spectrogram with parameter values in the file name | |
| :return: | |
| """ | |
| shutil.rmtree(path_name) | |
| Path(path_name).mkdir(parents=True, exist_ok=True) | |
| print("Generating dataset...") | |
| synth = MelodyGenerator(sample_rate, | |
| n_samples_for_note, n_samples_for_melody) | |
| randomMidi = RandomMidi() | |
| for i in tqdm(range(n)): | |
| parameter_values = [param.generate() for param in param_descriptions] | |
| # Dict of parameter values, what our synthesizer expects as input | |
| parameter_values_raw = {param.name: param.value for param in parameter_values} | |
| strategy = {"rhythm_strategy": "free_rhythm", | |
| "pitch_strategy": "free_pitch", | |
| "duration_strategy": "random_duration", | |
| } | |
| midi_encode, midi = randomMidi(strategy) | |
| signal = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| # Path to store each sample with its label | |
| path = path_name + f"/{i}" | |
| Path(path).mkdir(parents=True, exist_ok=True) | |
| if write_parameter: | |
| suffix = 'spectrogram' | |
| for parameter_value in parameter_values: | |
| suffix += f'_{parameter_value.name}={"%.3f" % parameter_value.value}' | |
| if write_spectrogram: | |
| plot_spectrogram(signal, path=path + f"/{suffix}.png", frame_length=1024, frame_step=256) | |
| else: | |
| with open(path + f"/{suffix}.txt", "w") as f: | |
| f.write("test") | |
| f.close() | |
| write(path + f"/{i}.wav", synth.sample_rate, signal) | |
| sample = Sample(parameter_values) | |
| # Dump label as json | |
| with open(path + "/label.json", "w") as label_file: | |
| label = sample.get_values() | |
| label['midi'] = midi | |
| # print(len(label["encoding"])) | |
| json.dump(label, label_file, ensure_ascii=True) | |
| print('Data generation done!') | |
| def generate_dataset_for_triplet(n: int, | |
| path_name="./data/data_triplet_val_10_500", | |
| sample_rate=16384, | |
| n_samples_for_note=16384 * 4, | |
| n_samples_for_melody=16384 * 4, | |
| n_labels=30, | |
| write_spectrogram=True): | |
| """ | |
| Generate dataset of size n for triplet model | |
| :param write_spectrogram: Boolean--if write spectrogram | |
| :param n: Int :param path_name: String--path to save the dataset :param sample_rate: Int :param | |
| n_samples_for_note: Int :param n_samples_for_melody: Int :param n_labels: Int--number of synthesizer parameter | |
| combinations contained in the dataset (a hyper parameter of triplet model) | |
| """ | |
| shutil.rmtree(path_name) | |
| Path(path_name).mkdir(parents=True, exist_ok=True) | |
| print("Generating dataset...") | |
| synth = MelodyGenerator(sample_rate, | |
| n_samples_for_note, n_samples_for_melody) | |
| randomMidi = RandomMidi() | |
| parameter_values_examples = [[param.generate() for param in param_descriptions] for i in range(n_labels)] | |
| parameter_values_raw_examples = [{param.name: param.value for param in parameter_values} for parameter_values in | |
| parameter_values_examples] | |
| np.random.seed() | |
| for i in tqdm(range(n)): | |
| label_index = np.random.randint(0, n_labels) | |
| parameter_values = parameter_values_examples[label_index] | |
| parameter_values_raw = parameter_values_raw_examples[label_index] | |
| strategy = {"rhythm_strategy": "free_rhythm", | |
| "pitch_strategy": "free_pitch", | |
| "duration_strategy": "random_duration", | |
| } | |
| midi_encode, midi = randomMidi(strategy) | |
| signal = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| # Path to store each sample with its label | |
| path = path_name + f"/{i}" | |
| Path(path).mkdir(parents=True, exist_ok=True) | |
| write(path + f"/{i}.wav", synth.sample_rate, signal) | |
| suffix = 'spectrogram' | |
| for parameter_value in parameter_values: | |
| suffix += f'_{parameter_value.name}={"%.3f" % parameter_value.value}' | |
| if write_spectrogram: | |
| hp = get_conf_stft_hyperparameter() | |
| frame_l = hp['frame_length'] | |
| frame_s = hp['frame_length'] | |
| plot_spectrogram(signal, path=path + f"/{suffix}.png", frame_length=frame_l, frame_step=frame_s) | |
| else: | |
| with open(path + f"/{suffix}.txt", "w") as f: | |
| f.write("test") | |
| f.close() | |
| with open(path + "/label_index.json", "w") as label_index_file: | |
| index_json = {'index': label_index} | |
| json.dump(index_json, label_index_file, ensure_ascii=False) | |
| # save midi as .txt file | |
| with open(path + "/midi.txt", "w") as midi_file: | |
| midi_file.write(str(midi)) | |
| midi_file.close() | |
| print('Data generation done!') | |
| def manhattan_distance(SP1, SP2): | |
| """ | |
| :param SP1: first input synthesizer parameter combination | |
| :param SP2: second input synthesizer parameter combination | |
| :return: Float--manhattan distance between SP1 and SP2 | |
| """ | |
| md = [] | |
| for key in SP1: | |
| parameter_name = key | |
| value1 = SP1[parameter_name] | |
| value2 = SP2[parameter_name] | |
| bins = parameter_range(parameter_name) | |
| bin_index1 = np.argmin(np.abs(np.array(bins) - value1)) | |
| bin_index2 = np.argmin(np.abs(np.array(bins) - value2)) | |
| if parameter_name == "osc_types": | |
| if bin_index1 == bin_index2: | |
| d = 0 | |
| else: | |
| d = 1 | |
| else: | |
| d = np.abs(bin_index1 - bin_index2) / (len(bins) - 1) | |
| md.append(d) | |
| return np.average(md) | |
| def generate_dataset_for_mixed_input_model(n: int, | |
| path_name="./data/data_mixed_input", | |
| sample_rate=16384, | |
| n_samples_for_note=16384 * 4, | |
| n_samples_for_melody=16384 * 4 | |
| ): | |
| """ | |
| Generate dataset of size n for mixed_input_model model | |
| :param n: Int | |
| :param path_name: String--path to save the dataset | |
| :param sample_rate: Int | |
| :param n_samples_for_note: Int | |
| :param n_samples_for_melody: Int | |
| :return: | |
| """ | |
| shutil.rmtree(path_name) | |
| Path(path_name).mkdir(parents=True, exist_ok=True) | |
| print("Generating dataset...") | |
| synth = MelodyGenerator(sample_rate, | |
| n_samples_for_note, n_samples_for_melody) | |
| randomMidi = RandomMidi() | |
| strategy = {"rhythm_strategy": "free_rhythm", | |
| "pitch_strategy": "free_pitch", | |
| "duration_strategy": "random_duration", | |
| } | |
| strategy0 = {"rhythm_strategy": "single_note_rhythm", | |
| "pitch_strategy": "fixed_pitch", | |
| "duration_strategy": "fixed_duration", | |
| } | |
| strategy1 = {"rhythm_strategy": "single_note_rhythm", | |
| "pitch_strategy": "fixed_pitch1", | |
| "duration_strategy": "fixed_duration", | |
| } | |
| strategy2 = {"rhythm_strategy": "single_note_rhythm", | |
| "pitch_strategy": "fixed_pitch2", | |
| "duration_strategy": "fixed_duration", | |
| } | |
| strategy3 = {"rhythm_strategy": "single_note_rhythm", | |
| "pitch_strategy": "fixed_pitch3", | |
| "duration_strategy": "fixed_duration", | |
| } | |
| strategy4 = {"rhythm_strategy": "single_note_rhythm", | |
| "pitch_strategy": "fixed_pitch4", | |
| "duration_strategy": "fixed_duration", | |
| } | |
| np.random.seed() | |
| for i in tqdm(range(n)): | |
| path = path_name + f"/{i}" | |
| Path(path).mkdir(parents=True, exist_ok=True) | |
| parameter_values = [param.generate() for param in param_descriptions] | |
| parameter_values_raw = {param.name: param.value for param in parameter_values} | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy) | |
| signal_query = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}.wav", synth.sample_rate, signal_query) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| if np.random.rand() < 0.01: # 50% positive | |
| with open(path + "/label.json", "w") as label_file: | |
| sample = Sample(parameter_values) | |
| label = sample.get_values() | |
| label['manhattan_distance'] = 0. | |
| json.dump(label, label_file, ensure_ascii=False) | |
| else: | |
| with open(path + "/label.json", "w") as label_file: | |
| query_sp = parameter_values_raw | |
| parameter_values = [param.generate() for param in param_descriptions] | |
| parameter_values_raw = {param.name: param.value for param in parameter_values} | |
| sample = Sample(parameter_values) | |
| label = sample.get_values() | |
| md = manhattan_distance(query_sp, parameter_values_raw) | |
| label['manhattan_distance'] = md | |
| json.dump(label, label_file, ensure_ascii=False) | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy0) | |
| signal_single_note = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}_0.wav", synth.sample_rate, signal_single_note) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy1) | |
| signal_single_note = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}_1.wav", synth.sample_rate, signal_single_note) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy2) | |
| signal_single_note = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}_2.wav", synth.sample_rate, signal_single_note) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy3) | |
| signal_single_note = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}_3.wav", synth.sample_rate, signal_single_note) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| # generate query music | |
| midi_encode, midi = randomMidi(strategy4) | |
| signal_single_note = synth.get_melody(parameter_values_raw, midi=midi).numpy() | |
| write(path + f"/{i}_4.wav", synth.sample_rate, signal_single_note) | |
| # plot_spectrogram(signal, path=path + f"/{i}_input.png", frame_length=512, frame_step=256) | |
| print('Data generation done!') | |