Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| from __future__ import absolute_import, division, print_function, unicode_literals | |
| import tensorflow as tf | |
| from tensorflow.keras import layers | |
| tf.keras.backend.clear_session() # For easy reset of notebook state. | |
| import random | |
| import numpy as np | |
| import os, sys, argparse, time | |
| from pathlib import Path | |
| import librosa | |
| import configparser | |
| import random | |
| import json | |
| import matplotlib.pyplot as plt | |
| import soundfile as sf | |
| import cv2 | |
| # os.environ["CUDA_VISIBLE_DEVICES"]="2" | |
| def synthesize_audio(): | |
| workdir = Path(os.getcwd() + '/Whole/func-timbre-vae/run-010/') | |
| # workdir =Path(os.getcwd() + '/Audio_Segments_2secs/func-timbre-vae/run-017/') | |
| #Parse arguments | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--config', type=str, default ='./default_whole.ini' , help='path to the config file') | |
| args = parser.parse_args() | |
| #Get configs | |
| config_path = args.config | |
| config = configparser.ConfigParser(allow_no_value=True) | |
| try: | |
| config.read(config_path) | |
| except FileNotFoundError: | |
| print('Config File Not Found at {}'.format(config_path)) | |
| sys.exit() | |
| #import audio configs | |
| sample_rate = config['audio'].getint('sample_rate') | |
| hop_length = config['audio'].getint('hop_length') | |
| bins_per_octave = config['audio'].getint('bins_per_octave') | |
| num_octaves = config['audio'].getint('num_octaves') | |
| n_bins = int(num_octaves * bins_per_octave) | |
| n_iter = config['audio'].getint('n_iter') | |
| #dataset | |
| dataset = Path(config['dataset'].get('datapath')) | |
| if not dataset.exists(): | |
| raise FileNotFoundError(dataset.resolve()) | |
| cqt_dataset = config['dataset'].get('cqt_dataset') | |
| if config['dataset'].get('workspace') != None: | |
| workspace = Path(config['dataset'].get('workspace')) | |
| run_number = config['dataset'].getint('run_number') | |
| my_audio = dataset / 'audio_external_test' | |
| #Training configs | |
| epochs = config['training'].getint('epochs') | |
| learning_rate = config['training'].getfloat('learning_rate') | |
| batch_size = config['training'].getint('batch_size') | |
| train_buf = config['training'].getint('buffer_size') | |
| buffer_size_dataset = config['training'].getboolean('buffer_size_dataset') | |
| max_to_keep = config['training'].getint('max_ckpts_to_keep') | |
| ckpt_epochs = config['training'].getint('checkpoint_epochs') | |
| continue_training = config['training'].getboolean('continue_training') | |
| learning_schedule = config['training'].getboolean('learning_schedule') | |
| save_best_only = config['training'].getboolean('save_best_only') | |
| early_patience_epoch = config['training'].getint('early_patience_epoch') | |
| early_delta = config['training'].getfloat('early_delta') | |
| adam_beta_1 = config['training'].getfloat('adam_beta_1') | |
| adam_beta_2 = config['training'].getfloat('adam_beta_2') | |
| #Model configs | |
| latent_dim_audio = config['VAE'].getint('latent_dim_audio') | |
| latent_dim_visual_score = config['VAE'].getint('latent_dim_visual_score') | |
| n_units_audio = config['VAE'].getint('n_units_audio') | |
| n_units_visual_score = config['VAE'].getint('n_units_visual_score') | |
| kl_beta = config['VAE'].getfloat('kl_beta') | |
| batch_normalization = config['VAE'].getboolean('batch_norm') | |
| VAE_output_activation = config['VAE'].get('output_activation') | |
| #etc | |
| example_length = config['extra'].getint('example_length') | |
| normalize_examples = config['extra'].getboolean('normalize_examples') | |
| plot_model = config['extra'].getboolean('plot_model') | |
| desc = config['extra'].get('description') | |
| start_time = time.time() | |
| config['extra']['start'] = time.asctime( time.localtime(start_time) ) | |
| AUTOTUNE = tf.data.experimental.AUTOTUNE | |
| #Define Sampling Layer | |
| class Sampling(layers.Layer): | |
| """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.""" | |
| def call(self, inputs): | |
| z_mean, z_log_var = inputs | |
| batch = tf.shape(z_mean)[0] | |
| dim = tf.shape(z_mean)[1] | |
| epsilon = tf.keras.backend.random_normal(shape=(batch, dim)) | |
| return z_mean + tf.exp(0.5 * z_log_var) * epsilon | |
| # LOAD MODEL ********************** | |
| my_model_path = workdir.joinpath('model','mymodel_last.h5') | |
| print(my_model_path) | |
| # my_model_path = '/homes/bb311/vs_audio/Whole/func-timbre-vae/run-009/model/mymodel_last.h5' | |
| with tf.keras.utils.CustomObjectScope({'Sampling': Sampling}): | |
| vae = tf.keras.models.load_model(my_model_path) | |
| # vae = tf.saved_model.load(my_model_path) | |
| vae.summary() | |
| # create Encoder model | |
| encoder_audio = tf.keras.Model(inputs = vae.get_layer("encoder_input_audio").input, outputs = [vae.get_layer("z_mean_audio").output, vae.get_layer("z_log_var_audio").output], name='encoder_audio') | |
| encoder_audio.summary() | |
| encoder_visual_score = tf.keras.Model(inputs = vae.get_layer("encoder_input_visual_score").input, outputs = [vae.get_layer("z_mean_visual_score").output, vae.get_layer("z_log_var_visual_score").output], name='encoder_visual_score') | |
| encoder_visual_score.summary() | |
| # create Decoder model | |
| decoder_audio = tf.keras.Model(inputs = vae.get_layer('decoder_audio').input, outputs = vae.get_layer('decoder_audio').output, name='decoder_audio') | |
| decoder_audio.summary() | |
| decoder_visual_score = tf.keras.Model(inputs = vae.get_layer('decoder_visual_score').input, outputs = vae.get_layer('decoder_visual_score').output, name='decoder_visual_score') | |
| decoder_visual_score.summary() | |
| workdir2 = Path(os.getcwd() + '/Whole/') | |
| # Generate examples | |
| print("Generating examples...") | |
| my_examples_folder_audio = workdir2.joinpath('Designed_Audio') | |
| my_examples_folder_visual_score_source = workdir2.joinpath('Manually_Designed_Visual_Scores_npy') | |
| my_examples_folder_visual_score_recons = workdir2.joinpath('Manually_Designed_Visual_Scores_recons') | |
| for f in os.listdir(my_examples_folder_visual_score_source): | |
| print("Examples for {}".format(os.path.splitext(f)[0])) | |
| file_path = my_examples_folder_visual_score_source / f | |
| my_array = np.load(file_path) | |
| test_dataset = tf.data.Dataset.from_tensor_slices(my_array).batch(batch_size).prefetch(AUTOTUNE) | |
| output_audio = tf.constant(0., dtype='float32', shape=(1,n_bins)) | |
| output_visual_score = tf.constant(0., dtype='float32', shape=(1,249,3)) | |
| print("Working on regenerating cqt magnitudes with the DL model") | |
| for step, x_batch_train in enumerate(test_dataset): | |
| print(x_batch_train.shape) | |
| reconstructed_whole = vae([tf.random.uniform(shape=(1, 384)), x_batch_train]) # random uniform is just a dull tensor for the other modality | |
| reconstructed_audio = reconstructed_whole[0] | |
| reconstructed_visual_score = reconstructed_whole[1] | |
| # print(reconstructed.shape) | |
| output_audio = tf.concat([output_audio, reconstructed_audio], 0) | |
| output_visual_score = tf.concat([output_visual_score, reconstructed_visual_score], 0) | |
| #recons visual score | |
| os.makedirs(my_examples_folder_visual_score_recons, exist_ok=True) | |
| image = output_visual_score.numpy() | |
| cv2.imwrite(str(my_examples_folder_visual_score_recons) + '/' + f[:-4] + '.jpeg', image*256) | |
| #recons audio | |
| fs = sample_rate | |
| output_np = np.transpose(output_audio.numpy()) | |
| output_inv_32 = librosa.griffinlim_cqt(output_np[1:], | |
| sr=fs, n_iter=n_iter, hop_length=hop_length, bins_per_octave=bins_per_octave, dtype=np.float32) | |
| if normalize_examples: | |
| output_inv_32 = librosa.util.normalize(output_inv_32) | |
| print("Saving audio files...") | |
| my_audio_out_fold = my_examples_folder_audio.joinpath(os.path.splitext(f)[0]) | |
| os.makedirs(my_audio_out_fold,exist_ok=True) | |
| sf.write(my_audio_out_fold.joinpath(f[:-4] + '.wav'), | |
| output_inv_32, sample_rate) |