Spaces:

THP2903
/

DPL-Project

Sleeping

THP2903 commited on Jun 30, 2024

Commit

6fc4cb7

verified ·

1 Parent(s): d997553

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import torch as pt
-import torchaudio
 import cv2
 import os
 import numpy as np
@@ -63,10 +64,10 @@ def process_video_audio(video_path):
     train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave.numpy(), dtype=tf.float16), (1, 20, 13077))
     train_audio_cnn = tf.convert_to_tensor(train_audio_cnn.numpy(), dtype=tf.float16)
-    return last_frame, train_visual, train_audio_wave, train_audio_cnn
 def predict_emotion(video_path):
-    last_frame, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
     model = load_model("model_vui_ve.keras")
@@ -77,13 +78,13 @@ def predict_emotion(video_path):
     })
     predicted_label = np.argmax(predictions)
-    return last_frame, predicted_label
 def predict_emotion_gradio(video_path):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
-    last_frame, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
-    return last_frame, predicted_emotion
 iface = gr.Interface(
     fn=predict_emotion_gradio,
@@ -92,6 +93,7 @@ iface = gr.Interface(
     ],
     outputs=[
         gr.Image(label="Last Frame"),
         gr.Textbox(label="Predicted Emotion")
     ],
     title="Emotion Recognition from Video",

 import gradio as gr
 import torch as pt
+import
+ torchaudio
 import cv2
 import os
 import numpy as np
     train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave.numpy(), dtype=tf.float16), (1, 20, 13077))
     train_audio_cnn = tf.convert_to_tensor(train_audio_cnn.numpy(), dtype=tf.float16)
+    return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
 def predict_emotion(video_path):
+    last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
     model = load_model("model_vui_ve.keras")
     })
     predicted_label = np.argmax(predictions)
+    return last_frame, audio_path, predicted_label
 def predict_emotion_gradio(video_path):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
+    last_frame, audio_path, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
+    return last_frame, audio_path, predicted_emotion
 iface = gr.Interface(
     fn=predict_emotion_gradio,
     ],
     outputs=[
         gr.Image(label="Last Frame"),
+        gr.Audio(label = "Audio")
         gr.Textbox(label="Predicted Emotion")
     ],
     title="Emotion Recognition from Video",