Spaces:
Sleeping
Sleeping
| import tensorflow as tf | |
| import numpy as np | |
| import librosa | |
| import pickle | |
| import io | |
| # Load the YAMNet model from the SavedModel format | |
| yamnet_model = tf.saved_model.load('yamnet_saved_model') | |
| # Function to extract embeddings from audio file using YAMNet | |
| def extract_audio_embeddings(audio_binary): | |
| # Load audio from binary data using librosa | |
| audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz | |
| # Convert audio to float32 tensor | |
| audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32) | |
| # Extract embeddings using YAMNet model | |
| scores, embeddings, spectrogram = yamnet_model(audio_tensor) | |
| embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists | |
| return embeddings_list | |
| # Example usage | |
| if __name__ == "__main__": | |
| image_audio_path = "pictures/users/1a.mp3" | |
| # Extract embeddings from image audio file | |
| image_audio_embeddings = extract_audio_embeddings(image_audio_path) | |
| print("Embeddings for", image_audio_path) | |
| print(image_audio_embeddings) | |
| print("audio embedding model loaded succesfully") |