Spaces:

THP2903
/

DPL-Project

Sleeping

App Files Files Community

THP2903 commited on Jun 26, 2024

Commit

b249890

verified ·

1 Parent(s): dcec65d

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -4

app.py CHANGED Viewed

@@ -1,10 +1,71 @@
 import gradio as gr
 import torch as pt
 import cv2
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
 import torch as pt
+import torchaudio
 import cv2
+import os
+import numpy as np
+emotion_labels = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
+def process_video_audio(video_path, audio_path):
+    num_videos = 1
+    train_visual = np.zeros([num_videos, 120, 120, 3, 10])
+    train_audio_wave = np.zeros([num_videos, 261540])
+    train_audio_cnn = np.zeros([num_videos, 150, 512, 1])
+    mfcc = torchaudio.transforms.MFCC(n_mfcc=150, melkwargs={"n_fft": 1022, "n_mels": 150})
+    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+    wav, _ = torchaudio.load(audio_path)
+    if len(wav[0]) > 261540:
+        train_audio_wave[0, :] = wav[0][:261540]
+    else:
+        train_audio_wave[0, :len(wav[0])] = wav[0]
+    train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0]).numpy()
+    cap = cv2.VideoCapture(video_path)
+    frame_idx = 0
+    last_frame = None
+    for i in range(100):
+        ret, frame = cap.read()
+        if ret and (i % 10 == 0):
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+            if len(faces) > 0:
+                (x, y, w, h) = faces[0]
+                face = frame[y:y+h, x:x+w]
+                resized_face = cv2.resize(face, (120, 120))
+                train_visual[0, :, :, :, frame_idx] = resized_face
+            else:
+                resized_frame = cv2.resize(frame, (120, 120))
+                train_visual[0, :, :, :, frame_idx] = resized_frame
+            last_frame = frame
+            frame_idx += 1
+    cap.release()
+    predicted_emotion = "unknown"
+    return last_frame, predicted_emotion
+# Định nghĩa giao diện Gradio
+def gradio_interface(video, audio):
+    frame, emotion = process_video_audio(video, audio)
+    return gr.Image(frame, label="Processed Frame"), emotion
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.Video(),
+        gr.Audio()
+    ],
+    outputs=[
+        gr.Image(),
+        gr.Textbox()
+    ],
+    live=True,
+    title="Video and Audio Processing with Emotion Recognition"
+)
+iface.launch()