Update app.py
Browse files
app.py
CHANGED
|
@@ -65,41 +65,11 @@ def transcribe(audio, state=""):
|
|
| 65 |
|
| 66 |
# Create a combined function that calls both models
|
| 67 |
def get_predictions(audio_input):
|
| 68 |
-
# Perform transcription to get the text
|
| 69 |
-
transcribed_text = transcribe(audio_input)
|
| 70 |
-
|
| 71 |
-
# Define the API key for DeepAI Text to Image API
|
| 72 |
-
api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
|
| 73 |
-
|
| 74 |
-
# Generate the image with the transcribed text using DeepAI Text to Image API
|
| 75 |
-
image = generate_image(api_key, transcribed_text)
|
| 76 |
-
|
| 77 |
-
# Get emotion prediction from audio
|
| 78 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
# Define a function to generate an image using DeepAI Text to Image API
|
| 83 |
-
def generate_image(api_key, text):
|
| 84 |
-
url = "https://api.deepai.org/api/text2img"
|
| 85 |
-
headers = {'api-key': api_key}
|
| 86 |
-
response = requests.post(
|
| 87 |
-
url,
|
| 88 |
-
data={
|
| 89 |
-
'text': text,
|
| 90 |
-
},
|
| 91 |
-
headers=headers
|
| 92 |
-
)
|
| 93 |
-
response_data = response.json()
|
| 94 |
-
if 'output_url' in response_data:
|
| 95 |
-
image_url = response_data['output_url']
|
| 96 |
-
image_response = requests.get(image_url)
|
| 97 |
-
image = Image.open(BytesIO(image_response.content))
|
| 98 |
-
return image
|
| 99 |
-
else:
|
| 100 |
-
return None
|
| 101 |
-
|
| 102 |
-
# Create the Gradio interface for acoustic and semantic predictions
|
| 103 |
with gr.Blocks() as interface:
|
| 104 |
gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
|
| 105 |
with gr.Tabs():
|
|
@@ -107,13 +77,9 @@ with gr.Blocks() as interface:
|
|
| 107 |
with gr.Row():
|
| 108 |
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
| 109 |
submit_button = gr.Button("Submit")
|
| 110 |
-
output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)
|
| 111 |
|
| 112 |
-
# Set the function to be called when the button is clicked
|
| 113 |
submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
|
| 114 |
|
| 115 |
-
# Display transcribed text as a label
|
| 116 |
-
transcribed_text_label = gr.Label(label="Transcribed Text")
|
| 117 |
-
|
| 118 |
-
# Launch the Gradio interface
|
| 119 |
interface.launch()
|
|
|
|
| 65 |
|
| 66 |
# Create a combined function that calls both models
|
| 67 |
def get_predictions(audio_input):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
emotion_prediction = predict_emotion_from_audio(audio_input)
|
| 69 |
+
transcribe_prediction = transcribe(audio_input)
|
| 70 |
+
return [emotion_prediction, transcribe_prediction]
|
| 71 |
|
| 72 |
+
# Create the Gradio interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
with gr.Blocks() as interface:
|
| 74 |
gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
|
| 75 |
with gr.Tabs():
|
|
|
|
| 77 |
with gr.Row():
|
| 78 |
input_audio = gr.Audio(label="Input Audio", type="filepath")
|
| 79 |
submit_button = gr.Button("Submit")
|
| 80 |
+
output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4)]
|
| 81 |
|
| 82 |
+
# Set the function to be called when the button is clicked
|
| 83 |
submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
interface.launch()
|