import gradio as gr import numpy as np import librosa import time import requests from io import BytesIO from PIL import Image import os from tensorflow.keras.models import load_model # Load the emotion prediction model def load_emotion_model(model_path): try: model = load_model(model_path) return model except Exception as e: print("Error loading emotion prediction model:", e) return None model_path = 'mymodel_SER_LSTM_RAVDESS.h5' model = load_emotion_model(model_path) # Function to extract MFCC features from audio def extract_mfcc(wav_file_name): try: y, sr = librosa.load(wav_file_name) mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0) return mfccs except Exception as e: print("Error extracting MFCC features:", e) return None # Emotions dictionary emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'} # Function to predict emotion from audio def predict_emotion_from_audio(wav_filepath): try: test_point = extract_mfcc(wav_filepath) if test_point is not None: test_point = np.reshape(test_point, newshape=(1, 40, 1)) predictions = model.predict(test_point) predicted_emotion_label = np.argmax(predictions[0]) + 1 return emotions[predicted_emotion_label] else: return "Error: Unable to extract features" except Exception as e: print("Error predicting emotion:", e) return None api_key = os.getenv("DeepAI_api_key") # Predict emotion from audio def get_predictions(audio_input): emotion_prediction = predict_emotion_from_audio(audio_input) # Generate image here or call a separate function image = generate_image(api_key, emotion_prediction) return emotion_prediction, image # Define a function to generate an image using DeepAI Text to Image API def generate_image(api_key, text): url = "https://api.deepai.org/api/text2img" headers = {'api-key': api_key} response = requests.post( url, data={ 'text': text, }, headers=headers ) response_data = response.json() if 'output_url' in response_data: image_url = response_data['output_url'] image_response = requests.get(image_url) image = Image.open(BytesIO(image_response.content)) return image else: return None #### # Create the Gradio interface with gr.Blocks() as interface: gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis") with gr.Tabs(): with gr.Tab("Acoustic and Semantic Predictions"): with gr.Row(): input_audio = gr.Audio(label="Input Audio", type="filepath") submit_button = gr.Button("Submit") output_label = [gr.Label("Prediction"), gr.Image(type='pil')] # Use a single Label instead of a list # Set the function to be called when the button is clicked submit_button.click(get_predictions, inputs=input_audio, outputs=output_label) interface.launch()