Spaces:

THP2903
/

DPL-Project

Sleeping

App Files Files Community

THP2903 commited on Jul 10, 2024

Commit

7082540

verified ·

1 Parent(s): 689cf50

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -74

app.py CHANGED Viewed

@@ -1,3 +1,13 @@
 # import gradio as gr
 # import torch as pt
 # import torchaudio
@@ -7,6 +17,169 @@
 # import tensorflow as tf
 # from tensorflow.keras.models import load_model
 # from moviepy.editor import VideoFileClip
 import gradio as gr
 import torch as pt
@@ -17,16 +190,18 @@ import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from moviepy.editor import VideoFileClip
-import socketIO_client as sio
 def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
-    """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
     filename, ext = os.path.splitext(video_file)
     clip = VideoFileClip(video_file)
     audio_path = f"{filename}.{output_ext}"
     clip.audio.write_audiofile(audio_path)
     return audio_path
 def process_video_audio(video_path):
     audio_path = convert_video_to_audio_moviepy(video_path)
@@ -41,15 +216,11 @@ def process_video_audio(video_path):
     face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
     if len(wav[0]) > 261540:
-        print(wav.shape)
         train_audio_wave[0, :] = wav[0][:261540]
     else:
-        print(wav.shape)
         train_audio_wave[0, :len(wav[0])] = wav[0][:]
     train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
-    print(train_audio_cnn[0].shape)
     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
     last_frame = None
@@ -76,6 +247,7 @@ def process_video_audio(video_path):
     return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
 def predict_emotion(video_path):
     last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
@@ -90,78 +262,21 @@ def predict_emotion(video_path):
     predicted_label = np.argmax(predictions)
     return last_frame, audio_path, predicted_label
-# def predict_emotion_gradio(video_path):
-#     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
-#     last_frame, audio_path, predicted_label = predict_emotion(video_path)
-#     predicted_emotion = emotion_dict[predicted_label]
-#     return last_frame, audio_path, predicted_emotion
-# iface = gr.Interface(
-#     fn=predict_emotion_gradio,
-#     inputs=[
-#         gr.Video(label="Upload a video")
-#     ],
-#     outputs=[
-#         gr.Image(label="Last Frame"),
-#         gr.Audio(label = "Audio"),
-#         gr.Textbox(label="Predicted Emotion")
-#     ],
-#     title="Emotion Recognition from Video",
-#     description="Upload a video and get the predicted emotion."
-# )
-# iface.launch()
-def run_chat_server(app):
-    """Runs a chat server using socket.IO"""
-    clients = []
-    messages = []
-    @app.route('/chat', methods=['GET', 'POST'])
-    def chat():
-        return app.socketio.send(messages)
-    @app.socketio.on('message')
-    def handle_message(message):
-        clients.append(message['client'])
-        messages.append(message)
-        app.logger.info(f'Received message: {message}')
-        app.socketio.emit('message', message, skip_sid=True)
-    @app.socketio.on('connect')
-    def handle_connect():
-        app.logger.info('Client connected')
-    @app.socketio.on('disconnect')
-    def handle_disconnect():
-        app.logger.info('Client disconnected')
-    if __name__ == '__main__':
-        app.run(debug=True)
 def predict_emotion_with_chat(video_path):
     last_frame, audio_path, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
     # Connect to the chat server
-    client = sio.Client()
-    client.connect('http://localhost:5000/chat')
-    # Send the predicted emotion to the chat server
-    client.emit('message', {'client': 'Emotion Recognition', 'message'
-: f'Predicted emotion: {predicted_emotion}'})
-    # Receive messages from the chat server
-    for msg in client.events:
-        print(msg)
-    return last_frame, audio_path, predicted_emotion, messages
 iface = gr.Interface(
     fn=predict_emotion_with_chat,
-    inputs=[
-        gr.Video(label="Upload a video")
-    ],
     outputs=[
         gr.Image(label="Last Frame"),
         gr.Audio(label="Audio"),
@@ -172,10 +287,16 @@ iface = gr.Interface(
     description="Upload a video and get the predicted emotion. Chat with others in real-time."
 )
-# Start the Gradio interface and the chat server
-from flask import Flask
 app = Flask(__name__)
 app.config['SECRET_KEY'] = 'secret'
-app.socketio = sio.SocketIO(app)
-run_chat_server(app)
-iface.launch()

+# # import gradio as gr
+# # import torch as pt
+# # import torchaudio
+# # import cv2
+# # import os
+# # import numpy as np
+# # import tensorflow as tf
+# # from tensorflow.keras.models import load_model
+# # from moviepy.editor import VideoFileClip
 # import gradio as gr
 # import torch as pt
 # import torchaudio
 # import tensorflow as tf
 # from tensorflow.keras.models import load_model
 # from moviepy.editor import VideoFileClip
+# import socketIO_client as sio
+# def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
+#     """Converts video to audio using MoviePy library that uses `ffmpeg` under the hood"""
+#     filename, ext = os.path.splitext(video_file)
+#     clip = VideoFileClip(video_file)
+#     audio_path = f"{filename}.{output_ext}"
+#     clip.audio.write_audiofile(audio_path)
+#     return audio_path
+# def process_video_audio(video_path):
+#     audio_path = convert_video_to_audio_moviepy(video_path)
+#     wav, sr = torchaudio.load(audio_path)
+#     train_visual = pt.zeros([1, 120, 120, 3, 10])
+#     train_audio_wave = pt.zeros([1, 261540])
+#     train_audio_cnn = pt.zeros([1, 150, 512, 1])
+#     mfcc = torchaudio.transforms.MFCC(n_mfcc=150, melkwargs={"n_fft": 1022, "n_mels": 150})
+#     face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+#     if len(wav[0]) > 261540:
+#         print(wav.shape)
+#         train_audio_wave[0, :] = wav[0][:261540]
+#     else:
+#         print(wav.shape)
+#         train_audio_wave[0, :len(wav[0])] = wav[0][:]
+#     train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
+#     print(train_audio_cnn[0].shape)
+#     cap = cv2.VideoCapture(video_path)
+#     frame_idx = 0
+#     last_frame = None
+#     for i in range(100):
+#         ret, frame = cap.read()
+#         if ret and (i % 10 == 0):
+#             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+#             faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+#             if len(faces) > 0:
+#                 (x, y, w, h) = faces[0]
+#                 face = frame[y:y+h, x:x+w]
+#                 resized_face = cv2.resize(face, (120, 120))
+#                 train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_face)
+#             else:
+#                 resized_frame = cv2.resize(frame, (120, 120))
+#                 train_visual[0, :, :, :, frame_idx] = pt.tensor(resized_frame)
+#                 last_frame = frame
+#             frame_idx += 1
+#     cap.release()
+#     train_visual = tf.convert_to_tensor(train_visual.numpy(), dtype=tf.float16)
+#     train_audio_wave = tf.reshape(tf.convert_to_tensor(train_audio_wave.numpy(), dtype=tf.float16), (1, 20, 13077))
+#     train_audio_cnn = tf.convert_to_tensor(train_audio_cnn.numpy(), dtype=tf.float16)
+#     return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
+# def predict_emotion(video_path):
+#     last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
+#     model = load_model("model_vui_ve.keras")
+#     predictions = model.predict({
+#         "input_visual": train_visual,
+#         "input_audio_cnn": train_audio_cnn,
+#         "input_audio_wave": train_audio_wave
+#     })
+#     predicted_label = np.argmax(predictions)
+#     return last_frame, audio_path, predicted_label
+# # def predict_emotion_gradio(video_path):
+# #     emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
+# #     last_frame, audio_path, predicted_label = predict_emotion(video_path)
+# #     predicted_emotion = emotion_dict[predicted_label]
+# #     return last_frame, audio_path, predicted_emotion
+# # iface = gr.Interface(
+# #     fn=predict_emotion_gradio,
+# #     inputs=[
+# #         gr.Video(label="Upload a video")
+# #     ],
+# #     outputs=[
+# #         gr.Image(label="Last Frame"),
+# #         gr.Audio(label = "Audio"),
+# #         gr.Textbox(label="Predicted Emotion")
+# #     ],
+# #     title="Emotion Recognition from Video",
+# #     description="Upload a video and get the predicted emotion."
+# # )
+# # iface.launch()
+# def run_chat_server(app):
+#     """Runs a chat server using socket.IO"""
+#     clients = []
+#     messages = []
+#     @app.route('/chat', methods=['GET', 'POST'])
+#     def chat():
+#         return app.socketio.send(messages)
+#     @app.socketio.on('message')
+#     def handle_message(message):
+#         clients.append(message['client'])
+#         messages.append(message)
+#         app.logger.info(f'Received message: {message}')
+#         app.socketio.emit('message', message, skip_sid=True)
+#     @app.socketio.on('connect')
+#     def handle_connect():
+#         app.logger.info('Client connected')
+#     @app.socketio.on('disconnect')
+#     def handle_disconnect():
+#         app.logger.info('Client disconnected')
+#     if __name__ == '__main__':
+#         app.run(debug=True)
+# def predict_emotion_with_chat(video_path):
+#     last_frame, audio_path, predicted_label = predict_emotion(video_path)
+#     predicted_emotion = emotion_dict[predicted_label]
+#     # Connect to the chat server
+#     client = sio.Client()
+#     client.connect('http://localhost:5000/chat')
+#     # Send the predicted emotion to the chat server
+#     client.emit('message', {'client': 'Emotion Recognition', 'message'
+# : f'Predicted emotion: {predicted_emotion}'})
+#     # Receive messages from the chat server
+#     for msg in client.events:
+#         print(msg)
+#     return last_frame, audio_path, predicted_emotion, messages
+# iface = gr.Interface(
+#     fn=predict_emotion_with_chat,
+#     inputs=[
+#         gr.Video(label="Upload a video")
+#     ],
+#     outputs=[
+#         gr.Image(label="Last Frame"),
+#         gr.Audio(label="Audio"),
+#         gr.Textbox(label="Predicted Emotion"),
+#         gr.Chatbot(label="Chat")
+#     ],
+#     title="Emotion Recognition with Chat",
+#     description="Upload a video and get the predicted emotion. Chat with others in real-time."
+# )
+# # Start the Gradio interface and the chat server
+# from flask import Flask
+# app = Flask(__name__)
+# app.config['SECRET_KEY'] = 'secret'
+# app.socketio = sio.SocketIO(app)
+# run_chat_server(app)
+# iface.launch()
 import gradio as gr
 import torch as pt
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from moviepy.editor import VideoFileClip
+from flask import Flask
+from flask_socketio import SocketIO, emit
+# Function to convert video to audio
 def convert_video_to_audio_moviepy(video_file, output_ext="wav"):
     filename, ext = os.path.splitext(video_file)
     clip = VideoFileClip(video_file)
     audio_path = f"{filename}.{output_ext}"
     clip.audio.write_audiofile(audio_path)
     return audio_path
+# Process video and audio
 def process_video_audio(video_path):
     audio_path = convert_video_to_audio_moviepy(video_path)
     face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
     if len(wav[0]) > 261540:
         train_audio_wave[0, :] = wav[0][:261540]
     else:
         train_audio_wave[0, :len(wav[0])] = wav[0][:]
     train_audio_cnn[0, :, :, 0] = mfcc(train_audio_wave[0])
     cap = cv2.VideoCapture(video_path)
     frame_idx = 0
     last_frame = None
     return last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn
+# Predict emotion from video
 def predict_emotion(video_path):
     last_frame, audio_path, train_visual, train_audio_wave, train_audio_cnn = process_video_audio(video_path)
     predicted_label = np.argmax(predictions)
     return last_frame, audio_path, predicted_label
+# Integrate chat functionality with emotion prediction
 def predict_emotion_with_chat(video_path):
+    emotion_dict = {0: 'neutral', 1: 'calm', 2: 'happy', 3: 'sad', 4: 'angry', 5: 'fearful'}
     last_frame, audio_path, predicted_label = predict_emotion(video_path)
     predicted_emotion = emotion_dict[predicted_label]
     # Connect to the chat server
+    socketio.emit('message', {'client': 'Emotion Recognition', 'message': f'Predicted emotion: {predicted_emotion}'})
+    return last_frame, audio_path, predicted_emotion
+# Gradio Interface
 iface = gr.Interface(
     fn=predict_emotion_with_chat,
+    inputs=[gr.Video(label="Upload a video")],
     outputs=[
         gr.Image(label="Last Frame"),
         gr.Audio(label="Audio"),
     description="Upload a video and get the predicted emotion. Chat with others in real-time."
 )
+# Flask app setup
 app = Flask(__name__)
 app.config['SECRET_KEY'] = 'secret'
+socketio = SocketIO(app)
+# Run the chat server
+@socketio.on('message')
+def handle_message(message):
+    emit('message', message, broadcast=True)
+if __name__ == '__main__':
+    iface.launch()
+    socketio.run(app, debug=True)