THP2903 commited on
Commit
b249890
·
verified ·
1 Parent(s): dcec65d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -4
app.py CHANGED
@@ -1,10 +1,71 @@
1
  import gradio as gr
2
  import torch as pt
 
3
  import cv2
 
 
4
 
5
- def greet(name):
6
- return "Hello " + name + "!!"
7
 
 
 
 
 
 
 
 
 
 
8
 
9
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
10
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch as pt
3
+ import torchaudio
4
  import cv2
5
+ import os
6
+ import numpy as np
7
 
8
+ emotion_labels = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
 
9
 
10
+ def process_video_audio(video_path, audio_path):
11
+ num_videos = 1
12
+ train_visual = np.zeros([num_videos, 120, 120, 3, 10])
13
+ train_audio_wave = np.zeros([num_videos, 261540])
14
+ train_audio_cnn = np.zeros([num_videos, 150, 512, 1])
15
+
16
+ mfcc = torchaudio.transforms.MFCC(n_mfcc=150, melkwargs={"n_fft": 1022, "n_mels": 150})
17
+
18
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
19
 
20
+ wav, _ = torchaudio.load(audio_path)
21
+ if len(wav[0]) > 261540:
22
+ train_audio_wave[0, :] = wav[0][:261540]
23
+ else:
24
+ train_audio_wave[0, :len(wav[0])] = wav[0]
25
+ train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0]).numpy()
26
+
27
+ cap = cv2.VideoCapture(video_path)
28
+ frame_idx = 0
29
+ last_frame = None
30
+ for i in range(100):
31
+ ret, frame = cap.read()
32
+ if ret and (i % 10 == 0):
33
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
34
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
35
+ if len(faces) > 0:
36
+ (x, y, w, h) = faces[0]
37
+ face = frame[y:y+h, x:x+w]
38
+ resized_face = cv2.resize(face, (120, 120))
39
+ train_visual[0, :, :, :, frame_idx] = resized_face
40
+ else:
41
+ resized_frame = cv2.resize(frame, (120, 120))
42
+ train_visual[0, :, :, :, frame_idx] = resized_frame
43
+ last_frame = frame
44
+ frame_idx += 1
45
+ cap.release()
46
+
47
+
48
+ predicted_emotion = "unknown"
49
+
50
+ return last_frame, predicted_emotion
51
+
52
+ # Định nghĩa giao diện Gradio
53
+ def gradio_interface(video, audio):
54
+ frame, emotion = process_video_audio(video, audio)
55
+ return gr.Image(frame, label="Processed Frame"), emotion
56
+
57
+ iface = gr.Interface(
58
+ fn=gradio_interface,
59
+ inputs=[
60
+ gr.Video(),
61
+ gr.Audio()
62
+ ],
63
+ outputs=[
64
+ gr.Image(),
65
+ gr.Textbox()
66
+ ],
67
+ live=True,
68
+ title="Video and Audio Processing with Emotion Recognition"
69
+ )
70
+
71
+ iface.launch()