import os import numpy as np import tensorflow as tf from pydub import AudioSegment from glob import glob from tqdm import tqdm from utils import Utils_functions class UtilsEncode_functions: def __init__(self, args): self.args = args self.U = Utils_functions(args) self.paths = sorted(glob(self.args.files_path + "/*")) def audio_generator(self): for p in self.paths: try: tp, ext = os.path.splitext(p) bname = os.path.basename(tp) wvo = AudioSegment.from_file(p, format=ext[1:]) wvo = wvo.set_frame_rate(self.args.sr) wvls = wvo.split_to_mono() wvls = [s.get_array_of_samples() for s in wvls] wv = np.array(wvls).T.astype(np.float32) wv /= np.iinfo(wvls[0].typecode).max yield np.squeeze(wv), bname except Exception as e: print(e) print("Exception ignored! Continuing...") pass # def create_dataset(self): # self.ds = ( # tf.data.Dataset.from_generator( # self.audio_generator, output_signature=(tf.TensorSpec(shape=(None, 2), dtype=tf.float32)) # ) # .prefetch(tf.data.experimental.AUTOTUNE) # .apply(tf.data.experimental.ignore_errors()) # ) def compress_files(self, models_ls=None): critic, gen, enc, dec, enc2, dec2, gen_ema, [opt_dec, opt_disc], switch = models_ls # self.create_dataset() os.makedirs(self.args.save_path, exist_ok=True) c = 0 time_compression_ratio = 16 # TODO: infer time compression ratio shape2 = self.args.shape pbar = tqdm(self.audio_generator(), position=0, leave=True, total=len(self.paths)) for (wv,bname) in pbar: try: if wv.shape[0] > self.args.hop * self.args.shape * 2 + 3 * self.args.hop: split_limit = ( 5 * 60 * self.args.sr ) # split very long waveforms (> 5 minutes) and process separately to avoid out of memory errors nsplits = (wv.shape[0] // split_limit) + 1 wvsplits = [] for ns in range(nsplits): if wv.shape[0] - (ns * split_limit) > self.args.hop * self.args.shape * 2 + 3 * self.args.hop: wvsplits.append(wv[ns * split_limit : (ns + 1) * split_limit, :]) for wv in wvsplits: wv = tf.image.random_crop( wv, size=[ (((wv.shape[0] - (3 * self.args.hop)) // (self.args.shape * self.args.hop))) * self.args.shape * self.args.hop + 3 * self.args.hop, 2, ], ) chls = [] for channel in range(2): x = wv[:, channel] x = tf.expand_dims(tf.transpose(self.U.wv2spec(x, hop_size=self.args.hop), (1, 0)), -1) ds = [] num = x.shape[1] // self.args.shape rn = 0 for i in range(num): ds.append( x[:, rn + (i * self.args.shape) : rn + (i * self.args.shape) + self.args.shape, :] ) del x ds = tf.convert_to_tensor(ds, dtype=tf.float32) lat = self.U.distribute_enc(ds, enc) del ds lat = tf.split(lat, lat.shape[0], 0) lat = tf.concat(lat, -2) lat = tf.squeeze(lat) switch = False if lat.shape[0] > (self.args.max_lat_len * time_compression_ratio): switch = True ds2 = [] num2 = lat.shape[-2] // shape2 rn2 = 0 for j in range(num2): ds2.append(lat[rn2 + (j * shape2) : rn2 + (j * shape2) + shape2, :]) ds2 = tf.convert_to_tensor(ds2, dtype=tf.float32) lat = self.U.distribute_enc(tf.expand_dims(ds2, -3), enc2) del ds2 lat = tf.split(lat, lat.shape[0], 0) lat = tf.concat(lat, -2) lat = tf.squeeze(lat) chls.append(lat) if lat.shape[0] > self.args.max_lat_len and switch: lat = tf.concat(chls, -1) del chls latc = lat[: (lat.shape[0] // self.args.max_lat_len) * self.args.max_lat_len, :] latc = tf.split(latc, latc.shape[0] // self.args.max_lat_len, 0) for el in latc: np.save(self.args.save_path + f"/{bname}_{c}.npy", el) c += 1 pbar.set_postfix({"Saved Files": c}) np.save(self.args.save_path + f"/{bname}_{c}.npy", lat[-self.args.max_lat_len :, :]) c += 1 pbar.set_postfix({"Saved Files": c}) del lat del latc except Exception as e: print(e) print("Exception ignored! Continuing...") pass def compress_whole_files(self, models_ls=None): critic, gen, enc, dec, enc2, dec2, gen_ema, [opt_dec, opt_disc], switch = models_ls # self.create_dataset() os.makedirs(self.args.save_path, exist_ok=True) c = 0 time_compression_ratio = 16 # TODO: infer time compression ratio shape2 = self.args.shape pbar = tqdm(self.audio_generator(), position=0, leave=True, total=len(self.paths)) for (wv,bname) in pbar: try: # wv_len_orig = wv.shape[0] if wv.shape[0] > self.args.hop * self.args.shape * 2 + 3 * self.args.hop: rem = (wv.shape[0] - (3 * self.args.hop)) % (self.args.shape * self.args.hop) if rem != 0: wv = tf.concat([wv, tf.zeros([rem,2], dtype=tf.float32)], 0) chls = [] for channel in range(2): x = wv[:, channel] x = tf.expand_dims(tf.transpose(self.U.wv2spec(x, hop_size=self.args.hop), (1, 0)), -1) ds = [] num = x.shape[1] // self.args.shape rn = 0 for i in range(num): ds.append( x[:, rn + (i * self.args.shape) : rn + (i * self.args.shape) + self.args.shape, :] ) del x ds = tf.convert_to_tensor(ds, dtype=tf.float32) lat = self.U.distribute_enc(ds, enc) del ds lat = tf.split(lat, lat.shape[0], 0) lat = tf.concat(lat, -2) lat = tf.squeeze(lat) ds2 = [] num2 = lat.shape[-2] // shape2 rn2 = 0 for j in range(num2): ds2.append(lat[rn2 + (j * shape2) : rn2 + (j * shape2) + shape2, :]) ds2 = tf.convert_to_tensor(ds2, dtype=tf.float32) lat = self.U.distribute_enc(tf.expand_dims(ds2, -3), enc2) del ds2 lat = tf.split(lat, lat.shape[0], 0) lat = tf.concat(lat, -2) lat = tf.squeeze(lat) chls.append(lat) lat = tf.concat(chls, -1) del chls np.save(self.args.save_path + f"/{bname}.npy", lat) c += 1 pbar.set_postfix({"Saved Files": c}) del lat except Exception as e: print(e) print("Exception ignored! Continuing...") pass