Spaces:

THP2903
/

DPL-Project

Sleeping

App Files Files Community

THP2903 commited on Jun 28, 2024

Commit

16e2339

verified ·

1 Parent(s): 6cd4025

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -117,6 +117,7 @@ def process_video_audio(video_path, audio_path):
     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
     for i in range(100):
         ret, frame = cap.read()
         if ret and (i % 10 == 0):
@@ -130,17 +131,18 @@ def process_video_audio(video_path, audio_path):
             else:
                 resized_frame = cv2.resize(frame, (120, 120))
                 train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_frame)
             frame_idx += 1
     cap.release()
     train_visual = tf.convert_to_tensor(train_visual, dtype=tf.float16)
     train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave, dtype=tf.float16), (1, 20, 13077))
     train_audio_cnn = tf.convert_to_tensor(train_audio_cnn, dtype=tf.float16)
-    return train_visual, train_audio_wave, train_audio_cnn
 def predict_emotion(video_path, audio_path):
-    train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path, audio_path)
     model = load_model("model_vui_ve.h5")
     predictions = model.predict({
@@ -150,15 +152,15 @@ def predict_emotion(video_path, audio_path):
     })
     predicted_label = np.argmax(predictions)
-    return predicted_label
 # Định nghĩa giao diện Gradio
-def predict_emotion_gradio(video, audio):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
-    predicted_label = predict_emotion(video, audio)
     predicted_emotion = emotion_dict[predicted_label]
-    return predicted_emotion
 # def gradio_interface(video, audio):
 #     emotion_labels = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
@@ -172,11 +174,12 @@ def predict_emotion_gradio(video, audio):
 iface = gr.Interface(
     fn=predict_emotion_gradio,
     inputs=[
-        gr.Video(label="Upload a video"
-),
         gr.Audio(label="Upload a audio")
     ],
-    outputs=gr.Textbox(label="Predicted Emotion"),
     title="Emotion Recognition from Video",
     description="Upload a video and get the predicted emotion."
 )

     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
+    last_frame = None
     for i in range(100):
         ret, frame = cap.read()
         if ret and (i % 10 == 0):
             else:
                 resized_frame = cv2.resize(frame, (120, 120))
                 train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_frame)
+                last_frame = frame
             frame_idx += 1
     cap.release()
     train_visual = tf.convert_to_tensor(train_visual, dtype=tf.float16)
     train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave, dtype=tf.float16), (1, 20, 13077))
     train_audio_cnn = tf.convert_to_tensor(train_audio_cnn, dtype=tf.float16)
+    return last_frame, train_visual, train_audio_wave, train_audio_cnn
 def predict_emotion(video_path, audio_path):
+    last_frame, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path, audio_path)
     model = load_model("model_vui_ve.h5")
     predictions = model.predict({
     })
     predicted_label = np.argmax(predictions)
+    return last_frame, predicted_label
 # Định nghĩa giao diện Gradio
+def predict_emotion_gradio(video_path, audio_path):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
+    last_frame, predicted_label = predict_emotion(video_path, audio_path)
     predicted_emotion = emotion_dict[predicted_label]
+    return last_frame, predicted_emotion
 # def gradio_interface(video, audio):
 #     emotion_labels = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
 iface = gr.Interface(
     fn=predict_emotion_gradio,
     inputs=[
+        gr.Video(label="Upload a video"),
         gr.Audio(label="Upload a audio")
     ],
+    outputs=[
+        gr.Textbox(label="Predicted Emotion"),
+        gr.Image(label = image frame last)],
     title="Emotion Recognition from Video",
     description="Upload a video and get the predicted emotion."
 )