Spaces:
Build error
Build error
| import joblib | |
| import numpy as np | |
| from generate_synthetic_data_online import generate_synth_dataset_log_512, generate_synth_dataset_log_muted_512 | |
| from tools import show_spc, spc_to_VAE_input, VAE_out_put_to_spc, np_log10 | |
| import torch.utils.data as data | |
| class Data_cache(): | |
| """This is a class that stores synthetic data.""" | |
| def __init__(self, synthetic_data, external_sources): | |
| self.n_synthetic = np.shape(synthetic_data)[0] | |
| self.synthetic_data = synthetic_data.astype(np.float32) | |
| self.external_sources = external_sources.astype(np.float32) | |
| self.epsilon = 1e-20 | |
| def get_all_data(self): | |
| return np.vstack([self.synthetic_data, self.external_sources]) | |
| def refresh(self): | |
| self.synthetic_data = generate_synth_dataset(self.n_synthetic, mute=True) | |
| def get_data_loader(self, shuffle=True, BATCH_SIZE=8, new_way=False): | |
| all_data = self.get_all_data() | |
| our_data = [] | |
| for i in range(len(all_data)): | |
| if new_way: | |
| spectrogram = VAE_out_put_to_spc(np.reshape(all_data[i], (1, 512, 256))) | |
| log_spectrogram = np.log10(spectrogram + self.epsilon) | |
| our_data.append(log_spectrogram) | |
| else: | |
| our_data.append(np.reshape(all_data[i], (1, 512, 256))) | |
| iterator = data.DataLoader(our_data, shuffle=shuffle, batch_size=BATCH_SIZE) | |
| return iterator | |
| def generate_synth_dataset(n_synthetic, mute=False): | |
| """Preprocessing for synthetic data""" | |
| n_synthetic_sample = n_synthetic | |
| if mute: | |
| Input0 = generate_synth_dataset_log_muted_512(n_synthetic_sample) | |
| else: | |
| Input0 = generate_synth_dataset_log_512(n_synthetic_sample) | |
| Input0 = spc_to_VAE_input(Input0) | |
| Input0 = Input0.reshape(Input0.shape[0], Input0.shape[1], Input0.shape[2], 1) | |
| return Input0 | |
| def read_data(data_path): | |
| """Read external sources""" | |
| data = np.array(joblib.load(data_path)) | |
| data = spc_to_VAE_input(data) | |
| data = data.reshape(data.shape[0], data.shape[1], data.shape[2], 1) | |
| return data | |
| def load_data(n_synthetic): | |
| """Generate the hybrid dataset.""" | |
| Input_synthetic = generate_synth_dataset(n_synthetic) | |
| Input_AU = read_data("./data/external_data/ARTURIA_data") | |
| print("ARTURIA dataset loaded.") | |
| Input_NSynth = read_data("./data/external_data/NSynth_data") | |
| print("NSynth dataset loaded.") | |
| Input_SF = read_data("./data/external_data/soundfonts_data") | |
| Input_SF_256 = np.zeros((337, 512, 256, 1)) | |
| Input_SF_256[:,:,:251,:] += Input_SF | |
| Input_SF =Input_SF_256 | |
| print("SoundFonts dataset loaded.") | |
| Input_google = read_data("./data/external_data/WaveNet_samples") | |
| Input_external = np.vstack([Input_AU, Input_NSynth, Input_SF, Input_google]) | |
| data_cache = Data_cache(Input_synthetic, Input_external) | |
| print(f"Data loaded, data shape: {np.shape(data_cache.get_all_data())}") | |
| return data_cache | |
| def show_data(dataset_name, n_sample=3, index=-1, new_way=False): | |
| """Show and return a certain dataset. | |
| Parameters | |
| ---------- | |
| dataset_name: String | |
| Name of the dataset to show. | |
| n_samples: int | |
| Number of samples to show. | |
| index: int | |
| Setting 'index' larger equal 0 shows the 'index'-th sample in the desired dataset. | |
| Returns | |
| ------- | |
| np.ndarray: | |
| The showed dataset. | |
| """ | |
| if dataset_name == "ARTURIA": | |
| data = read_data("./data/external_data/ARTURIA_data") | |
| elif dataset_name == "NSynth": | |
| data = read_data("./data/external_data/NSynth_data") | |
| elif dataset_name == "SoundFonts": | |
| data = read_data("./data/external_data/soundfonts_data") | |
| elif dataset_name == "Synthetic": | |
| data = generate_synth_dataset(int(n_sample * 3)) | |
| else: | |
| print("Example command: \"!python thesis_main.py show_data -s [ARTURIA, NSynth, SoundFonts, Synthetic] -n 5\"") | |
| return | |
| if index >= 0: | |
| show_spc(VAE_out_put_to_spc(data[index])) | |
| else: | |
| for i in range(n_sample): | |
| index = np.random.randint(0,len(data)) | |
| print(index) | |
| show_spc(VAE_out_put_to_spc(data[index])) | |
| return data | |
| def show_data(tensor_batch, index=-1, new_way=False): | |
| if index < 0: | |
| index = np.random.randint(0, tensor_batch.shape[0]) | |
| if new_way: | |
| sample = tensor_batch[index].detach().numpy() | |
| spectrogram = 10.0 ** sample | |
| print(f"The {index}-th sample:") | |
| show_spc(spectrogram) | |
| else: | |
| sample = tensor_batch[index].detach().numpy() | |
| show_spc(VAE_out_put_to_spc(sample)) | |
| # return data | |