Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,6 +42,18 @@ def extract_mfcc(wav_file_name):
|
|
| 42 |
# Emotions dictionary
|
| 43 |
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Function to predict emotion from audio
|
| 46 |
def predict_emotion_from_audio(wav_filepath):
|
| 47 |
try:
|
|
@@ -81,23 +93,31 @@ def generate_image(api_key, text):
|
|
| 81 |
def get_predictions(audio_input):
|
| 82 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
| 83 |
transcribed_text = transcribe(audio_input)
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
return emotion_prediction, transcribed_text, image
|
| 87 |
|
| 88 |
# Create the Gradio interface
|
| 89 |
interface = gr.Interface(
|
| 90 |
fn=get_predictions,
|
| 91 |
-
inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
|
| 92 |
outputs=[
|
| 93 |
gr.Label("Acoustic Prediction", label="Acoustic Prediction", visible=False),
|
| 94 |
gr.Label("Transcribed Text", label="Transcribed Text", visible=False),
|
| 95 |
gr.Image(type='pil', label="Generated Image")
|
| 96 |
],
|
| 97 |
-
title
|
| 98 |
-
description
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
-
interface.launch()
|
|
|
|
| 42 |
# Emotions dictionary
|
| 43 |
emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
|
| 44 |
|
| 45 |
+
# Prompts for each emotion
|
| 46 |
+
emotion_prompts = {
|
| 47 |
+
'neutral': "Generate an image that represents a balanced and neutral environment with a calm, serene setting.",
|
| 48 |
+
'calm': "Create a peaceful landscape with soft colors and a tranquil atmosphere.",
|
| 49 |
+
'happy': "Design a bright, sunny environment full of vibrant colors and cheerful elements.",
|
| 50 |
+
'sad': "Generate a dark, gloomy scene with muted colors to convey a somber mood.",
|
| 51 |
+
'angry': "Create an intense, fiery scene with bold colors and dynamic elements.",
|
| 52 |
+
'fearful': "Generate an eerie, ominous environment with shadows and unsettling details.",
|
| 53 |
+
'disgust': "Design a scene that feels unsettling, with distorted, unnatural elements.",
|
| 54 |
+
'surprised': "Create a scene with unexpected contrasts, bright lights, and elements that convey a sense of surprise."
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
# Function to predict emotion from audio
|
| 58 |
def predict_emotion_from_audio(wav_filepath):
|
| 59 |
try:
|
|
|
|
| 93 |
def get_predictions(audio_input):
|
| 94 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
| 95 |
transcribed_text = transcribe(audio_input)
|
| 96 |
+
|
| 97 |
+
# Get the corresponding prompt for the predicted emotion
|
| 98 |
+
if emotion_prediction in emotion_prompts:
|
| 99 |
+
prompt_text = emotion_prompts[emotion_prediction]
|
| 100 |
+
else:
|
| 101 |
+
prompt_text = "Generate an image that represents an ambiguous emotional state."
|
| 102 |
+
|
| 103 |
+
image = generate_image(api_key, prompt_text)
|
| 104 |
return emotion_prediction, transcribed_text, image
|
| 105 |
|
| 106 |
# Create the Gradio interface
|
| 107 |
interface = gr.Interface(
|
| 108 |
fn=get_predictions,
|
| 109 |
+
inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
|
| 110 |
outputs=[
|
| 111 |
gr.Label("Acoustic Prediction", label="Acoustic Prediction", visible=False),
|
| 112 |
gr.Label("Transcribed Text", label="Transcribed Text", visible=False),
|
| 113 |
gr.Image(type='pil', label="Generated Image")
|
| 114 |
],
|
| 115 |
+
title="So What?",
|
| 116 |
+
description=("So What? is a multimedia work-in-progress project that leverages speech emotion recognition to create textured images for the What XR space.\n\n"
|
| 117 |
+
"Record yourself saying the expression \"What?\" \n\n"
|
| 118 |
+
"Try saying \"What?\" with different intonations to convey various emotions.\n\n"
|
| 119 |
+
"Press STOP when you finish, and SUBMIT your audio to generate a texture image.\n\n"
|
| 120 |
+
"Use the X icon to clear the recording instead of the button. :) ")
|
| 121 |
)
|
| 122 |
|
| 123 |
+
interface.launch()
|