Spaces:

moondream
/

gaze-demo

Running on Zero

App Files Files Community

vikhyatk commited on Jan 6

Commit

b90b44c

verified ·

1 Parent(s): 9f8fda1

Update demo.py

Browse files

Files changed (1) hide show

demo.py +24 -8

demo.py CHANGED Viewed

@@ -93,7 +93,7 @@ def visualize_faces_and_gaze(face_boxes, gaze_points=None, image=None, show_plot
     return fig
 @spaces.GPU(duration=15)
-def process_image(input_image):
     if input_image is None:
         return None, ""
@@ -106,8 +106,11 @@ def process_image(input_image):
         # Get image encoding
         enc_image = model.encode_image(pil_image)
-        flipped_pil = pil_image.copy().transpose(method=Image.FLIP_LEFT_RIGHT)
-        flip_enc_image = model.encode_image(flipped_pil)
         # Detect faces
         faces = model.detect(enc_image, "face")["objects"]
@@ -127,13 +130,18 @@ def process_image(input_image):
                 (face["x_max"] - face["x_min"]) * pil_image.width,
                 (face["y_max"] - face["y_min"]) * pil_image.height,
             )
             face_boxes.append(face_box)
             # Try to detect gaze
-            gaze = model.detect_gaze(enc_image, face=face, unstable_settings={
-                "prioritize_accuracy": True,
                 "flip_enc_img": flip_enc_image
-            })["gaze"]
             if gaze is not None:
                 gaze_point = (
@@ -151,7 +159,7 @@ def process_image(input_image):
         )
         faces_with_gaze = sum(1 for gp in gaze_points if gp is not None)
-        status = f"Detected {len(faces)} faces. {faces_with_gaze - len(faces)} faces identified as looking out of frame."
         return fig, status
     except Exception as e:
@@ -164,13 +172,21 @@ with gr.Blocks(title="Moondream Gaze Detection") as app:
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
         with gr.Column():
             output_text = gr.Textbox(label="Status")
             output_plot = gr.Plot(label="Visualization")
     input_image.change(
-        fn=process_image, inputs=[input_image], outputs=[output_plot, output_text]
     )
     gr.Examples(

     return fig
 @spaces.GPU(duration=15)
+def process_image(input_image, use_ensemble):
     if input_image is None:
         return None, ""
         # Get image encoding
         enc_image = model.encode_image(pil_image)
+        if use_ensemble:
+            flipped_pil = pil_image.copy().transpose(method=Image.FLIP_LEFT_RIGHT)
+            flip_enc_image = model.encode_image(flipped_pil)
+        else:
+            flip_enc_image = None
         # Detect faces
         faces = model.detect(enc_image, "face")["objects"]
                 (face["x_max"] - face["x_min"]) * pil_image.width,
                 (face["y_max"] - face["y_min"]) * pil_image.height,
             )
+            face_center = (
+                (face["x_min"] + face["x_max"]) / 2,
+                (face["y_min"] + face["y_max"]) / 2
+            )
             face_boxes.append(face_box)
             # Try to detect gaze
+            gaze_settings = {
+                "prioritize_accuracy": use_ensemble,
                 "flip_enc_img": flip_enc_image
+            }
+            gaze = model.detect_gaze(enc_image, face=face, eye=face_center, unstable_settings=gaze_settings)["gaze"]
             if gaze is not None:
                 gaze_point = (
         )
         faces_with_gaze = sum(1 for gp in gaze_points if gp is not None)
+        status = f"Detected {len(faces)} faces. Gaze detected for {faces_with_gaze} faces."
         return fig, status
     except Exception as e:
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(label="Input Image", type="pil")
+            use_ensemble = gr.Checkbox(
+                label="Use Ensemble Mode",
+                value=True,
+                info="Ensemble mode combines predictions from multiple model views for higher accuracy but is slower"
+            )
         with gr.Column():
             output_text = gr.Textbox(label="Status")
             output_plot = gr.Plot(label="Visualization")
     input_image.change(
+        fn=process_image, inputs=[input_image, use_ensemble], outputs=[output_plot, output_text]
+    )
+    use_ensemble.change(
+        fn=process_image, inputs=[input_image, use_ensemble], outputs=[output_plot, output_text]
     )
     gr.Examples(