Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,13 +54,21 @@ def process_video_audio(video_path, audio_path):
|
|
| 54 |
frame_idx += 1
|
| 55 |
cap.release()
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return train_visual, train_audio_wave, train_audio_cnn
|
| 58 |
|
| 59 |
# Định nghĩa giao diện Gradio
|
| 60 |
def gradio_interface(video, audio):
|
|
|
|
| 61 |
train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video, audio)
|
| 62 |
model = trained_model("./model_vui_ve.h5")
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
iface = gr.Interface(
|
| 66 |
fn=gradio_interface,
|
|
@@ -69,7 +77,7 @@ iface = gr.Interface(
|
|
| 69 |
gr.Audio()
|
| 70 |
],
|
| 71 |
outputs=[
|
| 72 |
-
gr.
|
| 73 |
],
|
| 74 |
live=True,
|
| 75 |
title="Video and Audio Processing with Emotion Recognition"
|
|
|
|
| 54 |
frame_idx += 1
|
| 55 |
cap.release()
|
| 56 |
|
| 57 |
+
train_visual = tf.convert_to_tensor(train_visual, dtype=tf.float16)
|
| 58 |
+
train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave, dtype=tf.float16), (1, 20, 13077))
|
| 59 |
+
train_audio_cnn = tf.convert_to_tensor(train_audio_cnn, dtype=tf.float16)
|
| 60 |
+
|
| 61 |
return train_visual, train_audio_wave, train_audio_cnn
|
| 62 |
|
| 63 |
# Định nghĩa giao diện Gradio
|
| 64 |
def gradio_interface(video, audio):
|
| 65 |
+
|
| 66 |
train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video, audio)
|
| 67 |
model = trained_model("./model_vui_ve.h5")
|
| 68 |
+
output = model.predict({"input_visual": train_visual, "input_audio_cnn": train_audio_cnn, "input_audio_wave": train_audio_wave})
|
| 69 |
+
emo_index = tf.math.argmax(output)
|
| 70 |
+
|
| 71 |
+
return emotion_labels[emo_index]
|
| 72 |
|
| 73 |
iface = gr.Interface(
|
| 74 |
fn=gradio_interface,
|
|
|
|
| 77 |
gr.Audio()
|
| 78 |
],
|
| 79 |
outputs=[
|
| 80 |
+
gr.Text()
|
| 81 |
],
|
| 82 |
live=True,
|
| 83 |
title="Video and Audio Processing with Emotion Recognition"
|