Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import torch.nn.functional as F
|
|
| 9 |
from facenet_pytorch import MTCNN
|
| 10 |
import tempfile
|
| 11 |
import os
|
|
|
|
| 12 |
|
| 13 |
# Global variables
|
| 14 |
input_labels_X = "Happy Face, Sad Face, Angry Face, Fear Face, Disgust Face, Contempt Face, Nervous Face, Curious Face, Flirtatious Face, Ashamed Face, Bored Face, Confused Face, Calm Face, Proud Face, Guilty Face, Annoyed Face, Desperate Face, Jealous Face, Embarrassed Face, Uncomfortable Face, Helpless Face, Shy Face, Infatuated Face, Apathetic Face, Neutral Face"
|
|
@@ -47,10 +48,7 @@ def process_frame(frame, selected_model):
|
|
| 47 |
cv2.rectangle(frame, (x, y), (w, h), (0, 0, 255), 2)
|
| 48 |
cropped_face = frame[y:h, x:w]
|
| 49 |
|
| 50 |
-
# Convert the cropped face to
|
| 51 |
-
#cropped_face_resized = cv2.resize(cropped_face, (224, 224))
|
| 52 |
-
|
| 53 |
-
# Convert the resized grayscale image to a tensor
|
| 54 |
frame_tensor = preprocess(Image.fromarray(cropped_face)).unsqueeze(0).to(device)
|
| 55 |
|
| 56 |
# Tokenize input labels and prepare for model
|
|
@@ -73,10 +71,29 @@ def process_frame(frame, selected_model):
|
|
| 73 |
combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
|
| 74 |
top_five_labels_probs = combined_labels_probs[:5]
|
| 75 |
|
| 76 |
-
#
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
return frame
|
| 82 |
except Exception as e:
|
|
@@ -84,41 +101,25 @@ def process_frame(frame, selected_model):
|
|
| 84 |
return frame
|
| 85 |
|
| 86 |
@spaces.GPU(duration=250)
|
| 87 |
-
def process_video(input_video, selected_model,
|
| 88 |
try:
|
| 89 |
cap = cv2.VideoCapture(input_video)
|
| 90 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 100 |
-
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 101 |
-
|
| 102 |
-
frame_count = 0
|
| 103 |
-
|
| 104 |
-
while True:
|
| 105 |
-
ret, frame = cap.read()
|
| 106 |
-
if not ret:
|
| 107 |
-
break
|
| 108 |
-
|
| 109 |
-
processed_frame = process_frame(frame, selected_model)
|
| 110 |
-
out.write(processed_frame)
|
| 111 |
-
|
| 112 |
-
frame_count += 1
|
| 113 |
-
progress(frame_count / total_frames, f"Processing frame {frame_count}/{total_frames}")
|
| 114 |
|
|
|
|
| 115 |
cap.release()
|
| 116 |
-
out.release()
|
| 117 |
|
| 118 |
-
return
|
| 119 |
except Exception as e:
|
| 120 |
print(f"An error occurred: {str(e)}")
|
| 121 |
-
return None
|
| 122 |
|
| 123 |
# Create the Gradio app using Blocks
|
| 124 |
with gr.Blocks() as app:
|
|
@@ -129,23 +130,29 @@ with gr.Blocks() as app:
|
|
| 129 |
model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
|
| 130 |
gr.Markdown("Upload a video to detect faces and recognize emotions.")
|
| 131 |
video_input = gr.Video()
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
with gr.TabItem("Image"):
|
| 141 |
gr.Markdown("Upload an image to detect faces and recognize emotions.")
|
| 142 |
-
image_input = gr.Image(type="
|
| 143 |
model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
|
| 144 |
process_button_image = gr.Button("Process Image")
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[
|
| 149 |
|
| 150 |
# Launch the app
|
| 151 |
app.launch()
|
|
|
|
| 9 |
from facenet_pytorch import MTCNN
|
| 10 |
import tempfile
|
| 11 |
import os
|
| 12 |
+
import matplotlib.pyplot as plt
|
| 13 |
|
| 14 |
# Global variables
|
| 15 |
input_labels_X = "Happy Face, Sad Face, Angry Face, Fear Face, Disgust Face, Contempt Face, Nervous Face, Curious Face, Flirtatious Face, Ashamed Face, Bored Face, Confused Face, Calm Face, Proud Face, Guilty Face, Annoyed Face, Desperate Face, Jealous Face, Embarrassed Face, Uncomfortable Face, Helpless Face, Shy Face, Infatuated Face, Apathetic Face, Neutral Face"
|
|
|
|
| 48 |
cv2.rectangle(frame, (x, y), (w, h), (0, 0, 255), 2)
|
| 49 |
cropped_face = frame[y:h, x:w]
|
| 50 |
|
| 51 |
+
# Convert the cropped face to a tensor
|
|
|
|
|
|
|
|
|
|
| 52 |
frame_tensor = preprocess(Image.fromarray(cropped_face)).unsqueeze(0).to(device)
|
| 53 |
|
| 54 |
# Tokenize input labels and prepare for model
|
|
|
|
| 71 |
combined_labels_probs.sort(key=lambda x: x[1], reverse=True)
|
| 72 |
top_five_labels_probs = combined_labels_probs[:5]
|
| 73 |
|
| 74 |
+
# Create a bar graph
|
| 75 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
| 76 |
+
labels, probs = zip(*top_five_labels_probs)
|
| 77 |
+
ax.barh(labels, probs)
|
| 78 |
+
ax.set_xlabel('Probability')
|
| 79 |
+
ax.set_title('Top 5 Emotions')
|
| 80 |
+
|
| 81 |
+
# Convert plot to image
|
| 82 |
+
fig.canvas.draw()
|
| 83 |
+
plot_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
|
| 84 |
+
plot_img = plot_img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
|
| 85 |
+
|
| 86 |
+
# Resize plot image to fit in the frame
|
| 87 |
+
plot_height = int(frame.shape[0] * 0.3)
|
| 88 |
+
plot_width = int(plot_height * plot_img.shape[1] / plot_img.shape[0])
|
| 89 |
+
plot_img_resized = cv2.resize(plot_img, (plot_width, plot_height))
|
| 90 |
+
|
| 91 |
+
# Overlay the plot on the frame
|
| 92 |
+
y_offset = frame.shape[0] - plot_height
|
| 93 |
+
x_offset = 0
|
| 94 |
+
frame[y_offset:y_offset+plot_height, x_offset:x_offset+plot_width] = plot_img_resized
|
| 95 |
+
|
| 96 |
+
plt.close(fig)
|
| 97 |
|
| 98 |
return frame
|
| 99 |
except Exception as e:
|
|
|
|
| 101 |
return frame
|
| 102 |
|
| 103 |
@spaces.GPU(duration=250)
|
| 104 |
+
def process_video(input_video, selected_model, frame_index):
|
| 105 |
try:
|
| 106 |
cap = cv2.VideoCapture(input_video)
|
| 107 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 108 |
+
|
| 109 |
+
# Set the frame position
|
| 110 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
|
| 111 |
+
|
| 112 |
+
ret, frame = cap.read()
|
| 113 |
+
if not ret:
|
| 114 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
processed_frame = process_frame(frame, selected_model)
|
| 117 |
cap.release()
|
|
|
|
| 118 |
|
| 119 |
+
return processed_frame
|
| 120 |
except Exception as e:
|
| 121 |
print(f"An error occurred: {str(e)}")
|
| 122 |
+
return None
|
| 123 |
|
| 124 |
# Create the Gradio app using Blocks
|
| 125 |
with gr.Blocks() as app:
|
|
|
|
| 130 |
model_dropdown_video = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-B/32")
|
| 131 |
gr.Markdown("Upload a video to detect faces and recognize emotions.")
|
| 132 |
video_input = gr.Video()
|
| 133 |
+
frame_slider = gr.Slider(minimum=0, maximum=100, step=1, label="Frame Index")
|
| 134 |
+
process_button_video = gr.Button("Process Frame")
|
| 135 |
+
|
| 136 |
+
output_frame = gr.Image(label="Processed Frame")
|
| 137 |
|
| 138 |
+
def update_slider(video):
|
| 139 |
+
cap = cv2.VideoCapture(video)
|
| 140 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 141 |
+
cap.release()
|
| 142 |
+
return gr.Slider.update(maximum=total_frames-1)
|
| 143 |
|
| 144 |
+
video_input.change(update_slider, inputs=[video_input], outputs=[frame_slider])
|
| 145 |
+
process_button_video.click(process_video, inputs=[video_input, model_dropdown_video, frame_slider], outputs=[output_frame])
|
| 146 |
|
| 147 |
with gr.TabItem("Image"):
|
| 148 |
gr.Markdown("Upload an image to detect faces and recognize emotions.")
|
| 149 |
+
image_input = gr.Image(type="numpy")
|
| 150 |
model_dropdown_image = gr.Dropdown(choices=["ViT-B/32", "ViT-B/16", "ViT-L/14"], label="Model", value="ViT-L/14")
|
| 151 |
process_button_image = gr.Button("Process Image")
|
| 152 |
|
| 153 |
+
output_image = gr.Image(type="numpy", label="Processed Image")
|
| 154 |
|
| 155 |
+
process_button_image.click(process_frame, inputs=[image_input, model_dropdown_image], outputs=[output_image])
|
| 156 |
|
| 157 |
# Launch the app
|
| 158 |
app.launch()
|