jfforero commited on
Commit
d97a50e
·
verified ·
1 Parent(s): dc50617

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -44
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
4
- import time
5
  import requests
6
  from io import BytesIO
7
  from PIL import Image
8
  import os
9
  from tensorflow.keras.models import load_model
 
10
 
11
  # Load the emotion prediction model
12
  def load_emotion_model(model_path):
@@ -20,20 +20,15 @@ def load_emotion_model(model_path):
20
  model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
21
  model = load_emotion_model(model_path)
22
 
23
- #####
24
-
25
- from faster_whisper import WhisperModel
26
-
27
-
28
  model_size = "small"
29
  model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
30
 
 
31
  def transcribe(wav_filepath):
32
- segments, _ = model2.transcribe(wav_filepath, beam_size=5)
33
  return "".join([segment.text for segment in segments])
34
 
35
- #########
36
-
37
  # Function to extract MFCC features from audio
38
  def extract_mfcc(wav_file_name):
39
  try:
@@ -64,24 +59,13 @@ def predict_emotion_from_audio(wav_filepath):
64
 
65
  api_key = os.getenv("DeepAI_api_key")
66
 
67
-
68
- # Predict emotion from audio
69
- def get_predictions(audio_input):
70
- emotion_prediction = predict_emotion_from_audio(audio_input)
71
- # Generate image here or call a separate function
72
- image = generate_image(api_key, emotion_prediction)
73
- return emotion_prediction, image
74
-
75
-
76
- # Define a function to generate an image using DeepAI Text to Image API
77
  def generate_image(api_key, text):
78
  url = "https://api.deepai.org/api/text2img"
79
  headers = {'api-key': api_key}
80
  response = requests.post(
81
  url,
82
- data={
83
- 'text': text,
84
- },
85
  headers=headers
86
  )
87
  response_data = response.json()
@@ -92,29 +76,18 @@ def generate_image(api_key, text):
92
  return image
93
  else:
94
  return None
95
- ####
96
 
 
 
 
 
 
97
 
98
  # Create the Gradio interface
99
- with gr.Blocks() as interface:
100
- gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
101
-
102
- with gr.Tabs():
103
- with gr.Tab("Acoustic and Semantic Predictions"):
104
- with gr.Row():
105
- input_audio = gr.Audio(label="Input Audio", type="filepath")
106
- submit_button = gr.Button("Submit")
107
- output_label = [gr.Label("Prediction"), gr.Image(type='pil')] # Use a single Label instead of a list
108
-
109
- def submit_callback(wav_filepath):
110
- emotion_prediction, _ = get_predictions(wav_filepath) # Extract only the emotion prediction
111
- return emotion_prediction
112
-
113
- # Set the function to be called when the button is clicked
114
- submit_button.click(submit_callback, inputs=input_audio, outputs=output_label[0])
115
-
116
- interface.launch()
117
-
118
-
119
-
120
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
 
4
  import requests
5
  from io import BytesIO
6
  from PIL import Image
7
  import os
8
  from tensorflow.keras.models import load_model
9
+ from faster_whisper import WhisperModel
10
 
11
  # Load the emotion prediction model
12
  def load_emotion_model(model_path):
 
20
  model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
21
  model = load_emotion_model(model_path)
22
 
23
+ # Initialize WhisperModel
 
 
 
 
24
  model_size = "small"
25
  model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
26
 
27
+ # Function to transcribe audio
28
  def transcribe(wav_filepath):
29
+ segments, _ = model2.transcribe(wav_filepath, beam_size=5)
30
  return "".join([segment.text for segment in segments])
31
 
 
 
32
  # Function to extract MFCC features from audio
33
  def extract_mfcc(wav_file_name):
34
  try:
 
59
 
60
  api_key = os.getenv("DeepAI_api_key")
61
 
62
+ # Function to generate an image using DeepAI Text to Image API
 
 
 
 
 
 
 
 
 
63
  def generate_image(api_key, text):
64
  url = "https://api.deepai.org/api/text2img"
65
  headers = {'api-key': api_key}
66
  response = requests.post(
67
  url,
68
+ data={'text': text},
 
 
69
  headers=headers
70
  )
71
  response_data = response.json()
 
76
  return image
77
  else:
78
  return None
 
79
 
80
+ # Function to get predictions
81
+ def get_predictions(audio_input):
82
+ emotion_prediction = predict_emotion_from_audio(audio_input)
83
+ image = generate_image(api_key, emotion_prediction)
84
+ return emotion_prediction, image
85
 
86
  # Create the Gradio interface
87
+ interface = gr.Interface(
88
+ fn=get_predictions,
89
+ inputs=gr.Audio(label="Input Audio", type="file"),
90
+ outputs=[gr.Label("Prediction"), gr.Image(type='pil')]
91
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ interface.launch()