Spaces:

moondream
/

gaze-demo

Running on Zero

App Files Files Community

vikhyatk commited on Jan 6

Commit

9f8fda1

verified ·

1 Parent(s): 1ab5922

Update demo.py

Browse files

Files changed (1) hide show

demo.py +42 -40

demo.py CHANGED Viewed

@@ -8,10 +8,8 @@ import matplotlib
 from PIL import Image
 from transformers import AutoModelForCausalLM
 matplotlib.use("Agg")  # Use Agg backend for non-interactive plotting
 os.environ["HF_TOKEN"] = os.environ.get("TOKEN_FROM_SECRET") or True
 model = AutoModelForCausalLM.from_pretrained(
     "vikhyatk/moondream-next",
@@ -21,9 +19,8 @@ model = AutoModelForCausalLM.from_pretrained(
     revision="56a3adeae60809e4269c544cde376feb20637ee0"
 )
-def visualize_gaze_multi(face_boxes, gaze_points, image=None, show_plot=True):
-    """Visualization function with reduced whitespace"""
     # Calculate figure size based on image aspect ratio
     if image is not None:
         height, width = image.shape[:2]
@@ -46,39 +43,42 @@ def visualize_gaze_multi(face_boxes, gaze_points, image=None, show_plot=True):
     colors = plt.cm.rainbow(np.linspace(0, 1, len(face_boxes)))
-    for face_box, gaze_point, color in zip(face_boxes, gaze_points, colors):
         hex_color = "#{:02x}{:02x}{:02x}".format(
             int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
         )
         x, y, width_box, height_box = face_box
-        gaze_x, gaze_y = gaze_point
         face_center_x = x + width_box / 2
         face_center_y = y + height_box / 2
         face_rect = plt.Rectangle(
             (x, y), width_box, height_box, fill=False, color=hex_color, linewidth=2
         )
         ax.add_patch(face_rect)
-        points = 50
-        alphas = np.linspace(0.8, 0, points)
-        x_points = np.linspace(face_center_x, gaze_x, points)
-        y_points = np.linspace(face_center_y, gaze_y, points)
-        for i in range(points - 1):
-            ax.plot(
-                [x_points[i], x_points[i + 1]],
-                [y_points[i], y_points[i + 1]],
-                color=hex_color,
-                alpha=alphas[i],
-                linewidth=4,
-            )
-        ax.scatter(gaze_x, gaze_y, color=hex_color, s=100, zorder=5)
-        ax.scatter(gaze_x, gaze_y, color="white", s=50, zorder=6)
     # Set plot limits and remove axes
     ax.set_xlim(0, width)
@@ -120,41 +120,43 @@ def process_image(input_image):
         gaze_points = []
         for face in faces:
-            gaze = model.detect_gaze(enc_image, face=face, unstable_settings={
-                "prioritize_accuracy": True,
-                "flip_enc_img": flip_enc_image
-            })["gaze"]
-            if gaze is None:
-                continue
             face_box = (
                 face["x_min"] * pil_image.width,
                 face["y_min"] * pil_image.height,
                 (face["x_max"] - face["x_min"]) * pil_image.width,
                 (face["y_max"] - face["y_min"]) * pil_image.height,
             )
-            gaze_point = (
-                gaze["x"] * pil_image.width,
-                gaze["y"] * pil_image.height,
-            )
-            face_boxes.append(face_box)
-            gaze_points.append(gaze_point)
         # Create visualization
         image_array = np.array(pil_image)
-        fig = visualize_gaze_multi(
             face_boxes, gaze_points, image=image_array, show_plot=False
         )
-        return fig, f"Detected {len(faces)} faces."
     except Exception as e:
         return None, f"Error processing image: {str(e)}"
 with gr.Blocks(title="Moondream Gaze Detection") as app:
     gr.Markdown("# 🌔 Moondream Gaze Detection")
     gr.Markdown("Upload an image to detect faces and visualize their gaze directions.")
@@ -177,4 +179,4 @@ with gr.Blocks(title="Moondream Gaze Detection") as app:
     )
 if __name__ == "__main__":
-    app.launch()

 from PIL import Image
 from transformers import AutoModelForCausalLM
 matplotlib.use("Agg")  # Use Agg backend for non-interactive plotting
 os.environ["HF_TOKEN"] = os.environ.get("TOKEN_FROM_SECRET") or True
 model = AutoModelForCausalLM.from_pretrained(
     "vikhyatk/moondream-next",
     revision="56a3adeae60809e4269c544cde376feb20637ee0"
 )
+def visualize_faces_and_gaze(face_boxes, gaze_points=None, image=None, show_plot=True):
+    """Visualization function that can handle faces without gaze data"""
     # Calculate figure size based on image aspect ratio
     if image is not None:
         height, width = image.shape[:2]
     colors = plt.cm.rainbow(np.linspace(0, 1, len(face_boxes)))
+    for i, (face_box, color) in enumerate(zip(face_boxes, colors)):
         hex_color = "#{:02x}{:02x}{:02x}".format(
             int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
         )
         x, y, width_box, height_box = face_box
         face_center_x = x + width_box / 2
         face_center_y = y + height_box / 2
+        # Draw face bounding box
         face_rect = plt.Rectangle(
             (x, y), width_box, height_box, fill=False, color=hex_color, linewidth=2
         )
         ax.add_patch(face_rect)
+        # Draw gaze line if gaze data is available
+        if gaze_points is not None and i < len(gaze_points) and gaze_points[i] is not None:
+            gaze_x, gaze_y = gaze_points[i]
+            points = 50
+            alphas = np.linspace(0.8, 0, points)
+            x_points = np.linspace(face_center_x, gaze_x, points)
+            y_points = np.linspace(face_center_y, gaze_y, points)
+            for j in range(points - 1):
+                ax.plot(
+                    [x_points[j], x_points[j + 1]],
+                    [y_points[j], y_points[j + 1]],
+                    color=hex_color,
+                    alpha=alphas[j],
+                    linewidth=4,
+                )
+            ax.scatter(gaze_x, gaze_y, color=hex_color, s=100, zorder=5)
+            ax.scatter(gaze_x, gaze_y, color="white", s=50, zorder=6)
     # Set plot limits and remove axes
     ax.set_xlim(0, width)
         gaze_points = []
         for face in faces:
+            # Add face bounding box regardless of gaze detection
             face_box = (
                 face["x_min"] * pil_image.width,
                 face["y_min"] * pil_image.height,
                 (face["x_max"] - face["x_min"]) * pil_image.width,
                 (face["y_max"] - face["y_min"]) * pil_image.height,
             )
+            face_boxes.append(face_box)
+            # Try to detect gaze
+            gaze = model.detect_gaze(enc_image, face=face, unstable_settings={
+                "prioritize_accuracy": True,
+                "flip_enc_img": flip_enc_image
+            })["gaze"]
+            if gaze is not None:
+                gaze_point = (
+                    gaze["x"] * pil_image.width,
+                    gaze["y"] * pil_image.height,
+                )
+                gaze_points.append(gaze_point)
+            else:
+                gaze_points.append(None)
         # Create visualization
         image_array = np.array(pil_image)
+        fig = visualize_faces_and_gaze(
             face_boxes, gaze_points, image=image_array, show_plot=False
         )
+        faces_with_gaze = sum(1 for gp in gaze_points if gp is not None)
+        status = f"Detected {len(faces)} faces. {faces_with_gaze - len(faces)} faces identified as looking out of frame."
+        return fig, status
     except Exception as e:
         return None, f"Error processing image: {str(e)}"
 with gr.Blocks(title="Moondream Gaze Detection") as app:
     gr.Markdown("# 🌔 Moondream Gaze Detection")
     gr.Markdown("Upload an image to detect faces and visualize their gaze directions.")
     )
 if __name__ == "__main__":
+    app.launch()