THP2903 commited on
Commit
7082540
·
verified ·
1 Parent(s): 689cf50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -74
app.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
  # import torch as pt
3
  # import torchaudio
@@ -7,6 +17,169 @@
7
  # import tensorflow as tf
8
  # from tensorflow.keras.models import load_model
9
  # from moviepy.editor import VideoFileClip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  import gradio as gr
12
  import torch as pt
@@ -17,16 +190,18 @@ import numpy as np
17
  import tensorflow as tf
18
  from tensorflow.keras.models import load_model
19
  from moviepy.editor import VideoFileClip
20
- import socketIO_client as sio
 
21
 
 
22
  def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
23
- """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
24
  filename, ext = os.path.splitext(video_file)
25
  clip = VideoFileClip(video_file)
26
  audio_path = f"{filename}.{output_ext}"
27
  clip.audio.write_audiofile(audio_path)
28
  return audio_path
29
 
 
30
  def process_video_audio(video_path):
31
  audio_path = convert_video_to_audio_moviepy(video_path)
32
 
@@ -41,15 +216,11 @@ def process_video_audio(video_path):
41
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
42
 
43
  if len(wav[0]) > 261540:
44
- print(wav.shape)
45
  train_audio_wave[0, :] = wav[0][:261540]
46
  else:
47
- print(wav.shape)
48
  train_audio_wave[0, :len(wav[0])] = wav[0][:]
49
  train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
50
 
51
- print(train_audio_cnn[0].shape)
52
-
53
  cap = cv2.VideoCapture(video_path)
54
  frame_idx = 0
55
  last_frame = None
@@ -76,6 +247,7 @@ def process_video_audio(video_path):
76
 
77
  return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
78
 
 
79
  def predict_emotion(video_path):
80
  last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
81
 
@@ -90,78 +262,21 @@ def predict_emotion(video_path):
90
  predicted_label = np.argmax(predictions)
91
  return last_frame, audio_path, predicted_label
92
 
93
- # def predict_emotion_gradio(video_path):
94
- # emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
95
- # last_frame, audio_path, predicted_label = predict_emotion(video_path)
96
- # predicted_emotion = emotion_dict[predicted_label]
97
- # return last_frame, audio_path, predicted_emotion
98
-
99
- # iface = gr.Interface(
100
- # fn=predict_emotion_gradio,
101
- # inputs=[
102
- # gr.Video(label="Upload a video")
103
- # ],
104
- # outputs=[
105
- # gr.Image(label="Last Frame"),
106
- # gr.Audio(label = "Audio"),
107
- # gr.Textbox(label="Predicted Emotion")
108
- # ],
109
- # title="Emotion Recognition from Video",
110
- # description="Upload a video and get the predicted emotion."
111
- # )
112
-
113
- # iface.launch()
114
-
115
- def run_chat_server(app):
116
- """Runs a chat server using socket.IO"""
117
- clients = []
118
- messages = []
119
-
120
- @app.route('/chat', methods=['GET', 'POST'])
121
- def chat():
122
- return app.socketio.send(messages)
123
-
124
- @app.socketio.on('message')
125
- def handle_message(message):
126
- clients.append(message['client'])
127
- messages.append(message)
128
- app.logger.info(f'Received message: {message}')
129
- app.socketio.emit('message', message, skip_sid=True)
130
-
131
- @app.socketio.on('connect')
132
- def handle_connect():
133
- app.logger.info('Client connected')
134
-
135
- @app.socketio.on('disconnect')
136
- def handle_disconnect():
137
- app.logger.info('Client disconnected')
138
-
139
- if __name__ == '__main__':
140
- app.run(debug=True)
141
-
142
  def predict_emotion_with_chat(video_path):
 
143
  last_frame, audio_path, predicted_label = predict_emotion(video_path)
144
  predicted_emotion = emotion_dict[predicted_label]
145
 
146
  # Connect to the chat server
147
- client = sio.Client()
148
- client.connect('http://localhost:5000/chat')
149
-
150
- # Send the predicted emotion to the chat server
151
- client.emit('message', {'client': 'Emotion Recognition', 'message'
152
- : f'Predicted emotion: {predicted_emotion}'})
153
-
154
- # Receive messages from the chat server
155
- for msg in client.events:
156
- print(msg)
157
 
158
- return last_frame, audio_path, predicted_emotion, messages
159
 
 
160
  iface = gr.Interface(
161
  fn=predict_emotion_with_chat,
162
- inputs=[
163
- gr.Video(label="Upload a video")
164
- ],
165
  outputs=[
166
  gr.Image(label="Last Frame"),
167
  gr.Audio(label="Audio"),
@@ -172,10 +287,16 @@ iface = gr.Interface(
172
  description="Upload a video and get the predicted emotion. Chat with others in real-time."
173
  )
174
 
175
- # Start the Gradio interface and the chat server
176
- from flask import Flask
177
  app = Flask(__name__)
178
  app.config['SECRET_KEY'] = 'secret'
179
- app.socketio = sio.SocketIO(app)
180
- run_chat_server(app)
181
- iface.launch()
 
 
 
 
 
 
 
 
1
+ # # import gradio as gr
2
+ # # import torch as pt
3
+ # # import torchaudio
4
+ # # import cv2
5
+ # # import os
6
+ # # import numpy as np
7
+ # # import tensorflow as tf
8
+ # # from tensorflow.keras.models import load_model
9
+ # # from moviepy.editor import VideoFileClip
10
+
11
  # import gradio as gr
12
  # import torch as pt
13
  # import torchaudio
 
17
  # import tensorflow as tf
18
  # from tensorflow.keras.models import load_model
19
  # from moviepy.editor import VideoFileClip
20
+ # import socketIO_client as sio
21
+
22
+ # def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
23
+ # """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
24
+ # filename, ext = os.path.splitext(video_file)
25
+ # clip = VideoFileClip(video_file)
26
+ # audio_path = f"{filename}.{output_ext}"
27
+ # clip.audio.write_audiofile(audio_path)
28
+ # return audio_path
29
+
30
+ # def process_video_audio(video_path):
31
+ # audio_path = convert_video_to_audio_moviepy(video_path)
32
+
33
+ # wav, sr = torchaudio.load(audio_path)
34
+
35
+ # train_visual = pt.zeros([1, 120, 120, 3, 10])
36
+ # train_audio_wave = pt.zeros([1, 261540])
37
+ # train_audio_cnn = pt.zeros([1, 150, 512, 1])
38
+
39
+ # mfcc = torchaudio.transforms.MFCC(n_mfcc=150, melkwargs={"n_fft": 1022, "n_mels": 150})
40
+
41
+ # face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
42
+
43
+ # if len(wav[0]) > 261540:
44
+ # print(wav.shape)
45
+ # train_audio_wave[0, :] = wav[0][:261540]
46
+ # else:
47
+ # print(wav.shape)
48
+ # train_audio_wave[0, :len(wav[0])] = wav[0][:]
49
+ # train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
50
+
51
+ # print(train_audio_cnn[0].shape)
52
+
53
+ # cap = cv2.VideoCapture(video_path)
54
+ # frame_idx = 0
55
+ # last_frame = None
56
+ # for i in range(100):
57
+ # ret, frame = cap.read()
58
+ # if ret and (i % 10 == 0):
59
+ # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
60
+ # faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
61
+ # if len(faces) > 0:
62
+ # (x, y, w, h) = faces[0]
63
+ # face = frame[y:y+h, x:x+w]
64
+ # resized_face = cv2.resize(face, (120, 120))
65
+ # train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_face)
66
+ # else:
67
+ # resized_frame = cv2.resize(frame, (120, 120))
68
+ # train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_frame)
69
+ # last_frame = frame
70
+ # frame_idx += 1
71
+ # cap.release()
72
+
73
+ # train_visual = tf.convert_to_tensor(train_visual.numpy(), dtype=tf.float16)
74
+ # train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave.numpy(), dtype=tf.float16), (1, 20, 13077))
75
+ # train_audio_cnn = tf.convert_to_tensor(train_audio_cnn.numpy(), dtype=tf.float16)
76
+
77
+ # return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
78
+
79
+ # def predict_emotion(video_path):
80
+ # last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
81
+
82
+ # model = load_model("model_vui_ve.keras")
83
+
84
+ # predictions = model.predict({
85
+ # "input_visual": train_visual,
86
+ # "input_audio_cnn": train_audio_cnn,
87
+ # "input_audio_wave": train_audio_wave
88
+ # })
89
+
90
+ # predicted_label = np.argmax(predictions)
91
+ # return last_frame, audio_path, predicted_label
92
+
93
+ # # def predict_emotion_gradio(video_path):
94
+ # # emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
95
+ # # last_frame, audio_path, predicted_label = predict_emotion(video_path)
96
+ # # predicted_emotion = emotion_dict[predicted_label]
97
+ # # return last_frame, audio_path, predicted_emotion
98
+
99
+ # # iface = gr.Interface(
100
+ # # fn=predict_emotion_gradio,
101
+ # # inputs=[
102
+ # # gr.Video(label="Upload a video")
103
+ # # ],
104
+ # # outputs=[
105
+ # # gr.Image(label="Last Frame"),
106
+ # # gr.Audio(label = "Audio"),
107
+ # # gr.Textbox(label="Predicted Emotion")
108
+ # # ],
109
+ # # title="Emotion Recognition from Video",
110
+ # # description="Upload a video and get the predicted emotion."
111
+ # # )
112
+
113
+ # # iface.launch()
114
+
115
+ # def run_chat_server(app):
116
+ # """Runs a chat server using socket.IO"""
117
+ # clients = []
118
+ # messages = []
119
+
120
+ # @app.route('/chat', methods=['GET', 'POST'])
121
+ # def chat():
122
+ # return app.socketio.send(messages)
123
+
124
+ # @app.socketio.on('message')
125
+ # def handle_message(message):
126
+ # clients.append(message['client'])
127
+ # messages.append(message)
128
+ # app.logger.info(f'Received message: {message}')
129
+ # app.socketio.emit('message', message, skip_sid=True)
130
+
131
+ # @app.socketio.on('connect')
132
+ # def handle_connect():
133
+ # app.logger.info('Client connected')
134
+
135
+ # @app.socketio.on('disconnect')
136
+ # def handle_disconnect():
137
+ # app.logger.info('Client disconnected')
138
+
139
+ # if __name__ == '__main__':
140
+ # app.run(debug=True)
141
+
142
+ # def predict_emotion_with_chat(video_path):
143
+ # last_frame, audio_path, predicted_label = predict_emotion(video_path)
144
+ # predicted_emotion = emotion_dict[predicted_label]
145
+
146
+ # # Connect to the chat server
147
+ # client = sio.Client()
148
+ # client.connect('http://localhost:5000/chat')
149
+
150
+ # # Send the predicted emotion to the chat server
151
+ # client.emit('message', {'client': 'Emotion Recognition', 'message'
152
+ # : f'Predicted emotion: {predicted_emotion}'})
153
+
154
+ # # Receive messages from the chat server
155
+ # for msg in client.events:
156
+ # print(msg)
157
+
158
+ # return last_frame, audio_path, predicted_emotion, messages
159
+
160
+ # iface = gr.Interface(
161
+ # fn=predict_emotion_with_chat,
162
+ # inputs=[
163
+ # gr.Video(label="Upload a video")
164
+ # ],
165
+ # outputs=[
166
+ # gr.Image(label="Last Frame"),
167
+ # gr.Audio(label="Audio"),
168
+ # gr.Textbox(label="Predicted Emotion"),
169
+ # gr.Chatbot(label="Chat")
170
+ # ],
171
+ # title="Emotion Recognition with Chat",
172
+ # description="Upload a video and get the predicted emotion. Chat with others in real-time."
173
+ # )
174
+
175
+ # # Start the Gradio interface and the chat server
176
+ # from flask import Flask
177
+ # app = Flask(__name__)
178
+ # app.config['SECRET_KEY'] = 'secret'
179
+ # app.socketio = sio.SocketIO(app)
180
+ # run_chat_server(app)
181
+ # iface.launch()
182
+
183
 
184
  import gradio as gr
185
  import torch as pt
 
190
  import tensorflow as tf
191
  from tensorflow.keras.models import load_model
192
  from moviepy.editor import VideoFileClip
193
+ from flask import Flask
194
+ from flask_socketio import SocketIO, emit
195
 
196
+ # Function to convert video to audio
197
  def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
 
198
  filename, ext = os.path.splitext(video_file)
199
  clip = VideoFileClip(video_file)
200
  audio_path = f"{filename}.{output_ext}"
201
  clip.audio.write_audiofile(audio_path)
202
  return audio_path
203
 
204
+ # Process video and audio
205
  def process_video_audio(video_path):
206
  audio_path = convert_video_to_audio_moviepy(video_path)
207
 
 
216
  face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
217
 
218
  if len(wav[0]) > 261540:
 
219
  train_audio_wave[0, :] = wav[0][:261540]
220
  else:
 
221
  train_audio_wave[0, :len(wav[0])] = wav[0][:]
222
  train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
223
 
 
 
224
  cap = cv2.VideoCapture(video_path)
225
  frame_idx = 0
226
  last_frame = None
 
247
 
248
  return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
249
 
250
+ # Predict emotion from video
251
  def predict_emotion(video_path):
252
  last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
253
 
 
262
  predicted_label = np.argmax(predictions)
263
  return last_frame, audio_path, predicted_label
264
 
265
+ # Integrate chat functionality with emotion prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  def predict_emotion_with_chat(video_path):
267
+ emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
268
  last_frame, audio_path, predicted_label = predict_emotion(video_path)
269
  predicted_emotion = emotion_dict[predicted_label]
270
 
271
  # Connect to the chat server
272
+ socketio.emit('message', {'client': 'Emotion Recognition', 'message': f'Predicted emotion: {predicted_emotion}'})
 
 
 
 
 
 
 
 
 
273
 
274
+ return last_frame, audio_path, predicted_emotion
275
 
276
+ # Gradio Interface
277
  iface = gr.Interface(
278
  fn=predict_emotion_with_chat,
279
+ inputs=[gr.Video(label="Upload a video")],
 
 
280
  outputs=[
281
  gr.Image(label="Last Frame"),
282
  gr.Audio(label="Audio"),
 
287
  description="Upload a video and get the predicted emotion. Chat with others in real-time."
288
  )
289
 
290
+ # Flask app setup
 
291
  app = Flask(__name__)
292
  app.config['SECRET_KEY'] = 'secret'
293
+ socketio = SocketIO(app)
294
+
295
+ # Run the chat server
296
+ @socketio.on('message')
297
+ def handle_message(message):
298
+ emit('message', message, broadcast=True)
299
+
300
+ if __name__ == '__main__':
301
+ iface.launch()
302
+ socketio.run(app, debug=True)