AVE2

Runtime error

App Files Files Community

jfforero commited on May 6, 2024

Commit

81019be

verified ·

1 Parent(s): 54b66d8

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -52

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import gradio as gr
 import numpy as np
 import librosa
-import time
 import requests
 from io import BytesIO
 from PIL import Image
 import os
 from tensorflow.keras.models import load_model
-import torch
-from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
 # Load the emotion prediction model
 def load_emotion_model(model_path):
@@ -19,21 +17,11 @@ def load_emotion_model(model_path):
         print("Error loading emotion prediction model:", e)
         return None
-from faster_whisper import WhisperModel
 model_size = "small"
-# Run on GPU with FP16
 model = WhisperModel(model_size, device="cpu", compute_type="int8")
-def transcribe(audio):
-    segments, _ = model.transcribe(audio, beam_size=5)
-    return "".join([segment.text for segment in segments])
 model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
 emotion_model = load_emotion_model(model_path)
@@ -69,46 +57,41 @@ api_key = os.getenv("DeepAI_api_key")
 # Predict emotion from audio
 def get_predictions(audio_input):
-    audio_file_path = audio_input.name
-    emotion_prediction = predict_emotion_from_audio(audio_file_path)
-    # Generate image here or call a separate function
-    image = generate_image(api_key, emotion_prediction)
-    return emotion_prediction, image
 # Define a function to generate an image using DeepAI Text to Image API
 def generate_image(api_key, text):
-    url = "https://api.deepai.org/api/text2img"
-    headers = {'api-key': api_key}
-    response = requests.post(
-        url,
-        data={
-            'text': text,
-        },
-        headers=headers
-    )
-    response_data = response.json()
-    if 'output_url' in response_data:
-        image_url = response_data['output_url']
-        image_response = requests.get(image_url)
-        image = Image.open(BytesIO(image_response.content))
-        return image
-    else:
         return None
 # Create the Gradio interface
-with gr.Blocks() as interface:
-    gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
-    with gr.Tabs():
-        with gr.Tab("Acoustic and Semantic Predictions"):
-            with gr.Row():
-                input_audio = gr.Audio(label="Input Audio", type="filepath")
-                submit_button = gr.Button("Submit")
-            output_label = [gr.Label("Prediction"), gr.Image(type='pil')]  # Use a single Label instead of a list
-    # Set the function to be called when the button is clicked
-    submit_button.click(get_predictions, inputs=input_audio, outputs=output_label)
-interface.launch()

 import gradio as gr
 import numpy as np
 import librosa
 import requests
 from io import BytesIO
 from PIL import Image
 import os
 from tensorflow.keras.models import load_model
+from faster_whisper import WhisperModel
 # Load the emotion prediction model
 def load_emotion_model(model_path):
         print("Error loading emotion prediction model:", e)
         return None
 model_size = "small"
+# Run on CPU with INT8 compute
 model = WhisperModel(model_size, device="cpu", compute_type="int8")
+# Load emotion prediction model
 model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
 emotion_model = load_emotion_model(model_path)
 # Predict emotion from audio
 def get_predictions(audio_input):
+    try:
+        audio_data = audio_input.read()  # Read the audio data
+        emotion_prediction = predict_emotion_from_audio(audio_data)
+        image = generate_image(api_key, emotion_prediction)
+        return emotion_prediction, image
+    except Exception as e:
+        print("Error processing audio:", e)
+        return None, None
 # Define a function to generate an image using DeepAI Text to Image API
 def generate_image(api_key, text):
+    try:
+        url = "https://api.deepai.org/api/text2img"
+        headers = {'api-key': api_key}
+        response = requests.post(
+            url,
+            data={'text': text},
+            headers=headers
+        )
+        response_data = response.json()
+        if 'output_url' in response_data:
+            image_url = response_data['output_url']
+            image_response = requests.get(image_url)
+            image = Image.open(BytesIO(image_response.content))
+            return image
+        else:
+            return None
+    except Exception as e:
+        print("Error generating image:", e)
         return None
 # Create the Gradio interface
+with gr.Interface(get_predictions,
+                  inputs=gr.inputs.Audio(label="Input Audio", type="file"),
+                  outputs=[gr.outputs.Text(label="Prediction"), gr.outputs.Image(label="Generated Image")],
+                  title="Emotional Machines Test",
+                  description="Load or Record an audio file to perform emotion analysis") as iface:
+    iface.launch()