File size: 1,774 Bytes
34d6c18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 23 16:23:15 2020
@author: nils
"""
import soundfile as sf
import numpy as np
import tensorflow as tf
##########################
# the values are fixed, if you need other values, you have to retrain.
# The sampling rate of 16k is also fix.
block_len = 512
block_shift = 128
# load model
model = tf.saved_model.load('./pretrained_model/dtln_saved_model')
infer = model.signatures["serving_default"]
# load audio file at 16k fs (please change)
audio,fs = sf.read('path_to_your_favorite_audio.wav')
# check for sampling rate
if fs != 16000:
raise ValueError('This model only supports 16k sampling rate.')
# preallocate output audio
out_file = np.zeros((len(audio)))
# create buffer
in_buffer = np.zeros((block_len))
out_buffer = np.zeros((block_len))
# calculate number of blocks
num_blocks = (audio.shape[0] - (block_len-block_shift)) // block_shift
# iterate over the number of blcoks
for idx in range(num_blocks):
# shift values and write to buffer
in_buffer[:-block_shift] = in_buffer[block_shift:]
in_buffer[-block_shift:] = audio[idx*block_shift:(idx*block_shift)+block_shift]
# create a batch dimension of one
in_block = np.expand_dims(in_buffer, axis=0).astype('float32')
# process one block
out_block= infer(tf.constant(in_block))['conv1d_1']
# shift values and write to buffer
out_buffer[:-block_shift] = out_buffer[block_shift:]
out_buffer[-block_shift:] = np.zeros((block_shift))
out_buffer += np.squeeze(out_block)
# write block to output file
out_file[idx*block_shift:(idx*block_shift)+block_shift] = out_buffer[:block_shift]
# write to .wav file
sf.write('out.wav', out_file, fs)
print('Processing finished.')
|