Spaces:
Sleeping
Sleeping
| import torch | |
| import numpy as np | |
| import pandas as pd | |
| import librosa | |
| import torchaudio as audio | |
| class SEP28KDataset(torch.utils.data.Dataset): | |
| """SEP-28k Dataset.""" | |
| def __init__(self, x, y, unsqueeze=False, transform=None): | |
| """ | |
| Args: | |
| x (hdf5): hdf5 data one of 'Xtrain', 'Xtest', or 'Xvalid' | |
| y (hdf5): hdf5 file one of 'Ytrain', 'Ytest', or 'Yvalid' | |
| unsqueeze (bool, Optional): Whether or not to unsqueeze the feature. | |
| May be required for models that require image-like inputs. | |
| transform (callable, Optional): Optional transform to be applied | |
| on a sample. | |
| """ | |
| self.data = x | |
| self.labels = y | |
| # self.spec = audio.transforms.MelSpectrogram(n_mels=80, sample_rate=16000, | |
| # n_fft=512, f_max=8000, f_min=0, | |
| # power=0.5, hop_length=152, win_length=480) | |
| # self.db = audio.transforms.AmplitudeToDB() | |
| # self.freq_mask = audio.transforms.FrequencyMasking(freq_mask_param=1) | |
| # self.time_mask = audio.transforms.TimeMasking(time_mask_param=20) | |
| # self.rng = np.random.default_rng(42) | |
| # self.rng_2 = np.random.default_rng(68) | |
| self.unsqueeze = unsqueeze | |
| self.transform = transform | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| if torch.is_tensor(idx): | |
| idx = idx.tolist() | |
| # load sliced clip | |
| # _, wav = wavfile.read(clip_path) | |
| wav = self.data[idx] | |
| wav = self.pad_trunc(wav, 3000, 16000).astype('float32') | |
| wav = torch.tensor(wav) | |
| #wav = self.spec(wav) | |
| #wav = self.db(wav) | |
| #if (self.rng.choice(2,p=[0.2,0.8])): | |
| # wav = self.freq_mask(wav) | |
| # if (self.rng_2.choice(2,p=[0.2,0.8])): | |
| # wav = self.time_mask(wav) | |
| # get labels | |
| labels = self.labels[idx].astype('float32') | |
| if self.transform is not None: | |
| wav = self.transform(wav) | |
| if (self.unsqueeze): | |
| wav = torch.unsqueeze(wav, 0) | |
| return torch.tensor(wav).clone().detach(), torch.tensor(labels).clone().detach() | |
| def pad_trunc(sig, max_ms, sr): | |
| sig_len = sig.shape[0] | |
| max_len = sr//1000 * max_ms | |
| if (sig_len > max_len): | |
| # Truncate the signal to the given length | |
| sig = sig[:,:max_len] | |
| elif (sig_len < max_len): | |
| # Length of padding to add at the beginning and end of the signal | |
| pad_begin_len = np.random.randint(0, max_len - sig_len) | |
| pad_end_len = max_len - sig_len - pad_begin_len | |
| # Pad with 0s | |
| pad_begin = np.zeros((pad_begin_len)) | |
| pad_end = np.zeros((pad_end_len)) | |
| sig = np.concatenate((pad_begin, sig, pad_end), 0) | |
| return sig | |