Spaces:

THP2903
/

DPL-Project

Sleeping

App Files Files Community

THP2903 commited on Jul 9, 2024

Commit

1a638f1

verified ·

1 Parent(s): f40b122

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -16

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import streamlit as st
 import torch as pt
 import torchaudio
 import cv2
@@ -7,7 +7,6 @@ import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from moviepy.editor import VideoFileClip
-from PIL import Image
 def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
     """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
@@ -31,11 +30,15 @@ def process_video_audio(video_path):
     face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
     if len(wav[0]) > 261540:
         train_audio_wave[0, :] = wav[0][:261540]
     else:
         train_audio_wave[0, :len(wav[0])] = wav[0][:]
     train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
     last_frame = None
@@ -76,23 +79,24 @@ def predict_emotion(video_path):
     predicted_label = np.argmax(predictions)
     return last_frame, audio_path, predicted_label
-def predict_emotion_streamlit(video_path):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
     last_frame, audio_path, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
     return last_frame, audio_path, predicted_emotion
-st.title("Emotion Recognition from Video")
-st.write("Upload a video and get the predicted emotion.")
-video_file = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"])
-if video_file is not None:
-    with open("uploaded_video.mp4", "wb") as f:
-        f.write(video_file.getbuffer())
-    last_frame, audio_path, predicted_emotion = predict_emotion_streamlit("uploaded_video.mp4")
-    st.image(last_frame, caption="Last Frame", use_column_width=True)
-    st.audio(audio_path, format="audio/wav")
-    st.text(f"Predicted Emotion: {predicted_emotion}")

+import gradio as gr
 import torch as pt
 import torchaudio
 import cv2
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from moviepy.editor import VideoFileClip
 def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
     """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
     face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
     if len(wav[0]) > 261540:
+        print(wav.shape)
         train_audio_wave[0, :] = wav[0][:261540]
     else:
+        print(wav.shape)
         train_audio_wave[0, :len(wav[0])] = wav[0][:]
     train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
+    print(train_audio_cnn[0].shape)
     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
     last_frame = None
     predicted_label = np.argmax(predictions)
     return last_frame, audio_path, predicted_label
+def predict_emotion_gradio(video_path):
     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
     last_frame, audio_path, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
     return last_frame, audio_path, predicted_emotion
+iface = gr.Interface(
+    fn=predict_emotion_gradio,
+    inputs=[
+        gr.Video(label="Upload a video")
+    ],
+    outputs=[
+        gr.Image(label="Last Frame"),
+        gr.Audio(label = "Audio"),
+        gr.Textbox(label="Predicted Emotion")
+    ],
+    title="Emotion Recognition from Video",
+    description="Upload a video and get the predicted emotion."
+)
+iface.launch()