Spaces:

Image-Processsing
/

Backend

Sleeping

Upload 18 files

85b6267 over 1 year ago

1.16 kB

	import tensorflow as tf
	import numpy as np
	import librosa
	import pickle
	import io

	# Load the YAMNet model from the SavedModel format
	yamnet_model = tf.saved_model.load('yamnet_saved_model')

	# Function to extract embeddings from audio file using YAMNet
	def extract_audio_embeddings(audio_binary):
	# Load audio from binary data using librosa
	audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz
	# Convert audio to float32 tensor
	audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
	# Extract embeddings using YAMNet model
	scores, embeddings, spectrogram = yamnet_model(audio_tensor)
	embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists
	return embeddings_list

	# Example usage
	if __name__ == "__main__":
	image_audio_path = "pictures/users/1a.mp3"
	# Extract embeddings from image audio file
	image_audio_embeddings = extract_audio_embeddings(image_audio_path)
	print("Embeddings for", image_audio_path)
	print(image_audio_embeddings)
	print("audio embedding model loaded succesfully")