Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,6 +49,7 @@ def process_frame(frame, selected_model):
|
|
| 49 |
|
| 50 |
# Tokenize input labels and prepare for model
|
| 51 |
input_labels = input_labels_X.split(", ")
|
|
|
|
| 52 |
text = clip.tokenize(input_labels).to(device)
|
| 53 |
|
| 54 |
with torch.no_grad():
|
|
@@ -63,12 +64,12 @@ def process_frame(frame, selected_model):
|
|
| 63 |
probabilities = F.softmax(logit_per_image[0], dim=0)
|
| 64 |
|
| 65 |
# Combine labels with probabilities and sort
|
| 66 |
-
combined_labels_probs = list(zip(
|
| 67 |
combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
|
| 68 |
top_five_labels_probs = combined_labels_probs[:5]
|
| 69 |
|
| 70 |
# Create a bar graph
|
| 71 |
-
fig, ax = plt.subplots(figsize=(8, 4))
|
| 72 |
labels, probs = zip(*top_five_labels_probs)
|
| 73 |
bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
|
| 74 |
ax.set_xlabel('Probability')
|
|
@@ -86,8 +87,8 @@ def process_frame(frame, selected_model):
|
|
| 86 |
plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
|
| 87 |
|
| 88 |
# Overlay the plot on the frame
|
| 89 |
-
y_offset =
|
| 90 |
-
x_offset =
|
| 91 |
alpha_plot = plot_img_resized[..., 3] / 255.0
|
| 92 |
alpha_frame = 1.0 - alpha_plot
|
| 93 |
|
|
@@ -125,34 +126,41 @@ def process_video(input_video, selected_model, frame_index):
|
|
| 125 |
with gr.Blocks() as app:
|
| 126 |
gr.Markdown("# EmotionTrack (Zero-Shot)")
|
| 127 |
|
| 128 |
-
with gr.
|
| 129 |
-
with gr.
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
|
| 146 |
-
|
| 147 |
-
with gr.TabItem("Image"):
|
| 148 |
-
gr.Markdown("Upload an image to detect faces and recognize emotions.")
|
| 149 |
-
image_input = gr.Image(type="numpy")
|
| 150 |
-
model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
|
| 151 |
-
process_button_image = gr.Button("Process Image")
|
| 152 |
-
|
| 153 |
-
output_image = gr.Image(type="numpy", label="Processed Image")
|
| 154 |
-
|
| 155 |
-
process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
|
| 156 |
|
| 157 |
# Launch the app with public link enabled
|
| 158 |
app.launch(share=True)
|
|
|
|
| 49 |
|
| 50 |
# Tokenize input labels and prepare for model
|
| 51 |
input_labels = input_labels_X.split(", ")
|
| 52 |
+
input_labels_no_face = [label.replace(" Face", "") for label in input_labels]
|
| 53 |
text = clip.tokenize(input_labels).to(device)
|
| 54 |
|
| 55 |
with torch.no_grad():
|
|
|
|
| 64 |
probabilities = F.softmax(logit_per_image[0], dim=0)
|
| 65 |
|
| 66 |
# Combine labels with probabilities and sort
|
| 67 |
+
combined_labels_probs = list(zip(input_labels_no_face, probabilities.tolist()))
|
| 68 |
combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
|
| 69 |
top_five_labels_probs = combined_labels_probs[:5]
|
| 70 |
|
| 71 |
# Create a bar graph
|
| 72 |
+
fig, ax = plt.subplots(figsize=(8, 4), dpi=300)
|
| 73 |
labels, probs = zip(*top_five_labels_probs)
|
| 74 |
bars = ax.barh(labels, probs, color=plt.cm.tab20.colors)
|
| 75 |
ax.set_xlabel('Probability')
|
|
|
|
| 87 |
plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
|
| 88 |
|
| 89 |
# Overlay the plot on the frame
|
| 90 |
+
y_offset = 0
|
| 91 |
+
x_offset = frame_rgb.shape[1] - plot_width
|
| 92 |
alpha_plot = plot_img_resized[..., 3] / 255.0
|
| 93 |
alpha_frame = 1.0 - alpha_plot
|
| 94 |
|
|
|
|
| 126 |
with gr.Blocks() as app:
|
| 127 |
gr.Markdown("# EmotionTrack (Zero-Shot)")
|
| 128 |
|
| 129 |
+
with gr.Row():
|
| 130 |
+
with gr.Column():
|
| 131 |
+
with gr.TabItem("Video"):
|
| 132 |
+
model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
|
| 133 |
+
gr.Markdown("Upload a video to detect faces and recognize emotions.")
|
| 134 |
+
video_input = gr.Video()
|
| 135 |
+
frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
|
| 136 |
+
|
| 137 |
+
def update_slider(video):
|
| 138 |
+
cap = cv2.VideoCapture(video)
|
| 139 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 140 |
+
cap.release()
|
| 141 |
+
return gr.update(maximum=total_frames-1)
|
| 142 |
+
|
| 143 |
+
def update_frame(video, model, frame_idx):
|
| 144 |
+
return process_video(video, model, frame_idx)
|
| 145 |
+
|
| 146 |
+
video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
|
| 147 |
+
frame_slider.change(update_frame, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
|
| 148 |
+
|
| 149 |
+
with gr.Column():
|
| 150 |
+
with gr.TabItem("Video"):
|
| 151 |
+
process_button_video = gr.Button("Process Frame")
|
| 152 |
+
output_frame = gr.Image(label="Processed Frame")
|
| 153 |
+
process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
|
| 154 |
|
| 155 |
+
with gr.TabItem("Image"):
|
| 156 |
+
gr.Markdown("Upload an image to detect faces and recognize emotions.")
|
| 157 |
+
image_input = gr.Image(type="numpy")
|
| 158 |
+
model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
|
| 159 |
+
process_button_image = gr.Button("Process Image")
|
| 160 |
+
|
| 161 |
+
output_image = gr.Image(type="numpy", label="Processed Image")
|
| 162 |
+
|
| 163 |
+
process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
# Launch the app with public link enabled
|
| 166 |
app.launch(share=True)
|