alekya
/

DTLN

Model card Files Files and versions

DTLN / real_time_processing.py

alekya's picture

model files

34d6c18 verified over 1 year ago

history blame contribute delete

1.77 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Tue Jun 23 16:23:15 2020

	@author: nils
	"""

	import soundfile as sf
	import numpy as np
	import tensorflow as tf



	##########################
	# the values are fixed, if you need other values, you have to retrain.
	# The sampling rate of 16k is also fix.
	block_len = 512
	block_shift = 128
	# load model
	model = tf.saved_model.load('./pretrained_model/dtln_saved_model')
	infer = model.signatures["serving_default"]
	# load audio file at 16k fs (please change)
	audio,fs = sf.read('path_to_your_favorite_audio.wav')
	# check for sampling rate
	if fs != 16000:
	raise ValueError('This model only supports 16k sampling rate.')
	# preallocate output audio
	out_file = np.zeros((len(audio)))
	# create buffer
	in_buffer = np.zeros((block_len))
	out_buffer = np.zeros((block_len))
	# calculate number of blocks
	num_blocks = (audio.shape[0] - (block_len-block_shift)) // block_shift
	# iterate over the number of blcoks
	for idx in range(num_blocks):
	# shift values and write to buffer
	in_buffer[:-block_shift] = in_buffer[block_shift:]
	in_buffer[-block_shift:] = audio[idxblock_shift:(idxblock_shift)+block_shift]
	# create a batch dimension of one
	in_block = np.expand_dims(in_buffer, axis=0).astype('float32')
	# process one block
	out_block= infer(tf.constant(in_block))['conv1d_1']
	# shift values and write to buffer
	out_buffer[:-block_shift] = out_buffer[block_shift:]
	out_buffer[-block_shift:] = np.zeros((block_shift))
	out_buffer += np.squeeze(out_block)
	# write block to output file
	out_file[idxblock_shift:(idxblock_shift)+block_shift] = out_buffer[:block_shift]


	# write to .wav file
	sf.write('out.wav', out_file, fs)

	print('Processing finished.')