Spaces:

reab5555
/

EmotionTrack

Sleeping

App Files Files Community

reab5555 commited on Jul 26, 2024

Commit

995585d

verified ·

1 Parent(s): 0806c50

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -31

app.py CHANGED Viewed

@@ -49,6 +49,7 @@ def process_frame(frame, selected_model):
             # Tokenize input labels and prepare for model
             input_labels = input_labels_X.split(", ")
             text = clip.tokenize(input_labels).to(device)
             with torch.no_grad():
@@ -63,12 +64,12 @@ def process_frame(frame, selected_model):
                 probabilities = F.softmax(logit_per_image[0], dim=0)
             # Combine labels with probabilities and sort
-            combined_labels_probs = list(zip(input_labels, probabilities.tolist()))
             combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
             top_five_labels_probs = combined_labels_probs[:5]
             # Create a bar graph
-            fig, ax = plt.subplots(figsize=(8, 4))
             labels, probs = zip(*top_five_labels_probs)
             bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
             ax.set_xlabel('Probability')
@@ -86,8 +87,8 @@ def process_frame(frame, selected_model):
             plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
             # Overlay the plot on the frame
-            y_offset = frame_rgb.shape[0] - plot_height
-            x_offset = 0
             alpha_plot = plot_img_resized[..., 3] / 255.0
             alpha_frame = 1.0 - alpha_plot
@@ -125,34 +126,41 @@ def process_video(input_video, selected_model, frame_index):
 with gr.Blocks() as app:
     gr.Markdown("# EmotionTrack (Zero-Shot)")
-    with gr.Tabs():
-        with gr.TabItem("Video"):
-            model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
-            gr.Markdown("Upload a video to detect faces and recognize emotions.")
-            video_input = gr.Video()
-            frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
-            process_button_video = gr.Button("Process Frame")
-            output_frame = gr.Image(label="Processed Frame")
-            def update_slider(video):
-                cap = cv2.VideoCapture(video)
-                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-                cap.release()
-                return gr.update(maximum=total_frames-1)
-            video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
-            process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
-        with gr.TabItem("Image"):
-            gr.Markdown("Upload an image to detect faces and recognize emotions.")
-            image_input = gr.Image(type="numpy")
-            model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
-            process_button_image = gr.Button("Process Image")
-            output_image = gr.Image(type="numpy", label="Processed Image")
-            process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
 # Launch the app with public link enabled
 app.launch(share=True)

             # Tokenize input labels and prepare for model
             input_labels = input_labels_X.split(", ")
+            input_labels_no_face = [label.replace(" Face", "") for label in input_labels]
             text = clip.tokenize(input_labels).to(device)
             with torch.no_grad():
                 probabilities = F.softmax(logit_per_image[0], dim=0)
             # Combine labels with probabilities and sort
+            combined_labels_probs = list(zip(input_labels_no_face, probabilities.tolist()))
             combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
             top_five_labels_probs = combined_labels_probs[:5]
             # Create a bar graph
+            fig, ax = plt.subplots(figsize=(8, 4), dpi=300)
             labels, probs = zip(*top_five_labels_probs)
             bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
             ax.set_xlabel('Probability')
             plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
             # Overlay the plot on the frame
+            y_offset = 0
+            x_offset = frame_rgb.shape[1] - plot_width
             alpha_plot = plot_img_resized[..., 3] / 255.0
             alpha_frame = 1.0 - alpha_plot
 with gr.Blocks() as app:
     gr.Markdown("# EmotionTrack (Zero-Shot)")
+    with gr.Row():
+        with gr.Column():
+            with gr.TabItem("Video"):
+                model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
+                gr.Markdown("Upload a video to detect faces and recognize emotions.")
+                video_input = gr.Video()
+                frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
+                def update_slider(video):
+                    cap = cv2.VideoCapture(video)
+                    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                    cap.release()
+                    return gr.update(maximum=total_frames-1)
+                def update_frame(video, model, frame_idx):
+                    return process_video(video, model, frame_idx)
+                video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
+                frame_slider.change(update_frame, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
+        with gr.Column():
+            with gr.TabItem("Video"):
+                process_button_video = gr.Button("Process Frame")
+                output_frame = gr.Image(label="Processed Frame")
+                process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
+            with gr.TabItem("Image"):
+                gr.Markdown("Upload an image to detect faces and recognize emotions.")
+                image_input = gr.Image(type="numpy")
+                model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
+                process_button_image = gr.Button("Process Image")
+                output_image = gr.Image(type="numpy", label="Processed Image")
+                process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
 # Launch the app with public link enabled
 app.launch(share=True)