reab5555 commited on
Commit
995585d
·
verified ·
1 Parent(s): 0806c50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -31
app.py CHANGED
@@ -49,6 +49,7 @@ def process_frame(frame, selected_model):
49
 
50
  # Tokenize input labels and prepare for model
51
  input_labels = input_labels_X.split(", ")
 
52
  text = clip.tokenize(input_labels).to(device)
53
 
54
  with torch.no_grad():
@@ -63,12 +64,12 @@ def process_frame(frame, selected_model):
63
  probabilities = F.softmax(logit_per_image[0], dim=0)
64
 
65
  # Combine labels with probabilities and sort
66
- combined_labels_probs = list(zip(input_labels, probabilities.tolist()))
67
  combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
68
  top_five_labels_probs = combined_labels_probs[:5]
69
 
70
  # Create a bar graph
71
- fig, ax = plt.subplots(figsize=(8, 4))
72
  labels, probs = zip(*top_five_labels_probs)
73
  bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
74
  ax.set_xlabel('Probability')
@@ -86,8 +87,8 @@ def process_frame(frame, selected_model):
86
  plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
87
 
88
  # Overlay the plot on the frame
89
- y_offset = frame_rgb.shape[0] - plot_height
90
- x_offset = 0
91
  alpha_plot = plot_img_resized[..., 3] / 255.0
92
  alpha_frame = 1.0 - alpha_plot
93
 
@@ -125,34 +126,41 @@ def process_video(input_video, selected_model, frame_index):
125
  with gr.Blocks() as app:
126
  gr.Markdown("# EmotionTrack (Zero-Shot)")
127
 
128
- with gr.Tabs():
129
- with gr.TabItem("Video"):
130
- model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
131
- gr.Markdown("Upload a video to detect faces and recognize emotions.")
132
- video_input = gr.Video()
133
- frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
134
- process_button_video = gr.Button("Process Frame")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- output_frame = gr.Image(label="Processed Frame")
137
-
138
- def update_slider(video):
139
- cap = cv2.VideoCapture(video)
140
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
141
- cap.release()
142
- return gr.update(maximum=total_frames-1)
143
-
144
- video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
145
- process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
146
-
147
- with gr.TabItem("Image"):
148
- gr.Markdown("Upload an image to detect faces and recognize emotions.")
149
- image_input = gr.Image(type="numpy")
150
- model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
151
- process_button_image = gr.Button("Process Image")
152
-
153
- output_image = gr.Image(type="numpy", label="Processed Image")
154
-
155
- process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
156
 
157
  # Launch the app with public link enabled
158
  app.launch(share=True)
 
49
 
50
  # Tokenize input labels and prepare for model
51
  input_labels = input_labels_X.split(", ")
52
+ input_labels_no_face = [label.replace(" Face", "") for label in input_labels]
53
  text = clip.tokenize(input_labels).to(device)
54
 
55
  with torch.no_grad():
 
64
  probabilities = F.softmax(logit_per_image[0], dim=0)
65
 
66
  # Combine labels with probabilities and sort
67
+ combined_labels_probs = list(zip(input_labels_no_face, probabilities.tolist()))
68
  combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
69
  top_five_labels_probs = combined_labels_probs[:5]
70
 
71
  # Create a bar graph
72
+ fig, ax = plt.subplots(figsize=(8, 4), dpi=300)
73
  labels, probs = zip(*top_five_labels_probs)
74
  bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
75
  ax.set_xlabel('Probability')
 
87
  plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
88
 
89
  # Overlay the plot on the frame
90
+ y_offset = 0
91
+ x_offset = frame_rgb.shape[1] - plot_width
92
  alpha_plot = plot_img_resized[..., 3] / 255.0
93
  alpha_frame = 1.0 - alpha_plot
94
 
 
126
  with gr.Blocks() as app:
127
  gr.Markdown("# EmotionTrack (Zero-Shot)")
128
 
129
+ with gr.Row():
130
+ with gr.Column():
131
+ with gr.TabItem("Video"):
132
+ model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
133
+ gr.Markdown("Upload a video to detect faces and recognize emotions.")
134
+ video_input = gr.Video()
135
+ frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
136
+
137
+ def update_slider(video):
138
+ cap = cv2.VideoCapture(video)
139
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
140
+ cap.release()
141
+ return gr.update(maximum=total_frames-1)
142
+
143
+ def update_frame(video, model, frame_idx):
144
+ return process_video(video, model, frame_idx)
145
+
146
+ video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
147
+ frame_slider.change(update_frame, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
148
+
149
+ with gr.Column():
150
+ with gr.TabItem("Video"):
151
+ process_button_video = gr.Button("Process Frame")
152
+ output_frame = gr.Image(label="Processed Frame")
153
+ process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
154
 
155
+ with gr.TabItem("Image"):
156
+ gr.Markdown("Upload an image to detect faces and recognize emotions.")
157
+ image_input = gr.Image(type="numpy")
158
+ model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
159
+ process_button_image = gr.Button("Process Image")
160
+
161
+ output_image = gr.Image(type="numpy", label="Processed Image")
162
+
163
+ process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  # Launch the app with public link enabled
166
  app.launch(share=True)