Spaces:

reab5555
/

EmotionTrack

Sleeping

App Files Files Community

reab5555 commited on Jul 26, 2024

Commit

6d5f107

verified ·

1 Parent(s): e5bacbc

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -43

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import torch.nn.functional as F
 from facenet_pytorch import MTCNN
 import tempfile
 import os
 # Global variables
 input_labels_X = "Happy Face, Sad Face, Angry Face, Fear Face, Disgust Face, Contempt Face, Nervous Face, Curious Face, Flirtatious Face, Ashamed Face, Bored Face, Confused Face, Calm Face, Proud Face, Guilty Face, Annoyed Face, Desperate Face, Jealous Face, Embarrassed Face, Uncomfortable Face, Helpless Face, Shy Face, Infatuated Face, Apathetic Face, Neutral Face"
@@ -47,10 +48,7 @@ def process_frame(frame, selected_model):
             cv2.rectangle(frame, (x, y), (w, h), (0, 0, 255), 2)
             cropped_face = frame[y:h, x:w]
-            # Convert the cropped face to grayscale
-            #cropped_face_resized = cv2.resize(cropped_face, (224, 224))
-            # Convert the resized grayscale image to a tensor
             frame_tensor = preprocess(Image.fromarray(cropped_face)).unsqueeze(0).to(device)
             # Tokenize input labels and prepare for model
@@ -73,10 +71,29 @@ def process_frame(frame, selected_model):
             combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
             top_five_labels_probs = combined_labels_probs[:5]
-            # Draw results on the image
-            for idx, (label, prob) in enumerate(top_five_labels_probs):
-                result = f"{label.strip()}: {prob * 100:.1f}%"
-                cv2.putText(frame, result, (10, 45 + idx * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
         return frame
     except Exception as e:
@@ -84,41 +101,25 @@ def process_frame(frame, selected_model):
         return frame
 @spaces.GPU(duration=250)
-def process_video(input_video, selected_model, progress=gr.Progress()):
     try:
         cap = cv2.VideoCapture(input_video)
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps = 3  # Set FPS
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        # Create a temporary file for the output video
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
-            output_path = tmp_file.name
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        frame_count = 0
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            processed_frame = process_frame(frame, selected_model)
-            out.write(processed_frame)
-            frame_count += 1
-            progress(frame_count / total_frames, f"Processing frame {frame_count}/{total_frames}")
         cap.release()
-        out.release()
-        return input_video, output_path
     except Exception as e:
         print(f"An error occurred: {str(e)}")
-        return None, None
 # Create the Gradio app using Blocks
 with gr.Blocks() as app:
@@ -129,23 +130,29 @@ with gr.Blocks() as app:
             model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
             gr.Markdown("Upload a video to detect faces and recognize emotions.")
             video_input = gr.Video()
-            process_button_video = gr.Button("Process Video")
-            with gr.Row():
-                output_video_original = gr.Video(label="Source Video")
-                output_video_processed = gr.Video(label="Output Video")
-            process_button_video.click(process_video, inputs=[video_input, model_dropdown_video], outputs=[output_video_original, output_video_processed])
         with gr.TabItem("Image"):
             gr.Markdown("Upload an image to detect faces and recognize emotions.")
-            image_input = gr.Image(type="pil")
             model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
             process_button_image = gr.Button("Process Image")
-            output_image_image = gr.Image(type="pil", label="Processed Image")
-            process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image_image])
 # Launch the app
 app.launch()

 from facenet_pytorch import MTCNN
 import tempfile
 import os
+import matplotlib.pyplot as plt
 # Global variables
 input_labels_X = "Happy Face, Sad Face, Angry Face, Fear Face, Disgust Face, Contempt Face, Nervous Face, Curious Face, Flirtatious Face, Ashamed Face, Bored Face, Confused Face, Calm Face, Proud Face, Guilty Face, Annoyed Face, Desperate Face, Jealous Face, Embarrassed Face, Uncomfortable Face, Helpless Face, Shy Face, Infatuated Face, Apathetic Face, Neutral Face"
             cv2.rectangle(frame, (x, y), (w, h), (0, 0, 255), 2)
             cropped_face = frame[y:h, x:w]
+            # Convert the cropped face to a tensor
             frame_tensor = preprocess(Image.fromarray(cropped_face)).unsqueeze(0).to(device)
             # Tokenize input labels and prepare for model
             combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
             top_five_labels_probs = combined_labels_probs[:5]
+            # Create a bar graph
+            fig, ax = plt.subplots(figsize=(8, 4))
+            labels, probs = zip(*top_five_labels_probs)
+            ax.barh(labels, probs)
+            ax.set_xlabel('Probability')
+            ax.set_title('Top 5 Emotions')
+            # Convert plot to image
+            fig.canvas.draw()
+            plot_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+            plot_img = plot_img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+            # Resize plot image to fit in the frame
+            plot_height = int(frame.shape[0] * 0.3)
+            plot_width = int(plot_height * plot_img.shape[1] / plot_img.shape[0])
+            plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
+            # Overlay the plot on the frame
+            y_offset = frame.shape[0] - plot_height
+            x_offset = 0
+            frame[y_offset:y_offset+plot_height, x_offset:x_offset+plot_width] = plot_img_resized
+            plt.close(fig)
         return frame
     except Exception as e:
         return frame
 @spaces.GPU(duration=250)
+def process_video(input_video, selected_model, frame_index):
     try:
         cap = cv2.VideoCapture(input_video)
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Set the frame position
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
+        ret, frame = cap.read()
+        if not ret:
+            return None
+        processed_frame = process_frame(frame, selected_model)
         cap.release()
+        return processed_frame
     except Exception as e:
         print(f"An error occurred: {str(e)}")
+        return None
 # Create the Gradio app using Blocks
 with gr.Blocks() as app:
             model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
             gr.Markdown("Upload a video to detect faces and recognize emotions.")
             video_input = gr.Video()
+            frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
+            process_button_video = gr.Button("Process Frame")
+            output_frame = gr.Image(label="Processed Frame")
+            def update_slider(video):
+                cap = cv2.VideoCapture(video)
+                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                cap.release()
+                return gr.Slider.update(maximum=total_frames-1)
+            video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
+            process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
         with gr.TabItem("Image"):
             gr.Markdown("Upload an image to detect faces and recognize emotions.")
+            image_input = gr.Image(type="numpy")
             model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
             process_button_image = gr.Button("Process Image")
+            output_image = gr.Image(type="numpy", label="Processed Image")
+            process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
 # Launch the app
 app.launch()