Spaces:
Running
Running
| import os | |
| import soundfile as sf | |
| import torch | |
| import pyloudnorm as pyln | |
| import librosa | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| from dataloader import SingleTrackSet | |
| from utils import db2linear | |
| def conv_tasnet_separate( | |
| args, our_model, device, track_audio, track_name, meter=None, augmented_gain=None | |
| ): | |
| if args.use_singletrackset: | |
| db = SingleTrackSet( | |
| track_audio.squeeze(dim=0), | |
| hop_length=args.data_params.nhop, | |
| num_frame=128, | |
| target_name=args.target, | |
| ) | |
| separated = [] | |
| for item in db: | |
| item = item.unsqueeze(0).to(device) | |
| estimates, *estimates_vars = our_model(item) | |
| if args.task_params.dataset == "delimit": | |
| estimates = estimates_vars[0] | |
| estimates = estimates.cpu().detach() | |
| separated.append( | |
| estimates[..., db.trim_length : -db.trim_length].cpu().detach().clone() | |
| ) | |
| estimates = torch.cat(separated, dim=-1) | |
| estimates = estimates[0, :, : track_audio.shape[-1]].numpy() | |
| else: | |
| estimates, *estimates_vars = our_model(track_audio) | |
| if args.save_histogram and args.task_params.dataset == "delimit": | |
| plt.figure(figsize=(10, 10)) | |
| plt.hist(estimates.cpu().detach().numpy().flatten(), bins=100) | |
| os.makedirs(f"{args.test_output_dir}/{track_name}", exist_ok=True) | |
| plt.savefig( | |
| f"{args.test_output_dir}/{track_name}/{args.target}_histogram.png" | |
| ) | |
| if args.task_params.dataset == "delimit": | |
| estimates = estimates_vars[0] | |
| estimates = estimates.cpu().detach().numpy() | |
| estimates = estimates[0, :, : track_audio.shape[-1]] | |
| if args.save_name_as_target: | |
| os.makedirs(f"{args.test_output_dir}/{track_name}", exist_ok=True) | |
| if args.save_output_loudnorm: | |
| print("SAVE Loudness normalized OUTPUT ") | |
| loudness = meter.integrated_loudness(estimates.T) | |
| estimates = estimates * db2linear(args.save_output_loudnorm - loudness, eps=0.0) | |
| elif augmented_gain != None and args.save_output_loudnorm == None: | |
| estimates = estimates * db2linear(-augmented_gain, eps=0.0) | |
| sf.write( | |
| f"{args.test_output_dir}/{track_name}/{args.target}.wav" | |
| if args.save_name_as_target | |
| else f"{args.test_output_dir}/{track_name}.wav", | |
| estimates.T, | |
| samplerate=args.data_params.sample_rate, | |
| ) | |
| if args.save_16k_mono: | |
| estimates_16k_mono = librosa.to_mono(estimates) | |
| estimates_16k_mono = librosa.resample( | |
| estimates_16k_mono, | |
| orig_sr=args.data_params.sample_rate, | |
| target_sr=16000, | |
| ) | |
| os.makedirs(f"{args.test_output_dir}_16k_mono/{track_name}", exist_ok=True) | |
| sf.write( | |
| f"{args.test_output_dir}_16k_mono/{track_name}/{args.target}.wav" | |
| if args.save_name_as_target | |
| else f"{args.test_output_dir}_16k_mono/{track_name}.wav", | |
| estimates_16k_mono, | |
| samplerate=16000, | |
| ) | |
| return estimates | |