Spaces:

jerpelhan
/

GECO2-demo

Running on Zero

App Files Files Community

jerpelhan commited on Dec 31, 2025

Commit

1bf734e

1 Parent(s): ef5932e

Updating sdk version and resolving compability issues -- image_prompter is removed, gradio_image_annotation added

Browse files

Files changed (3) hide show

README.md +1 -1
demo_gradio.py +213 -44
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 title: GeCo2 Gradio Demo
 sdk: gradio
-sdk_version: "4.44.1"
 python_version: "3.10.13"
 app_file: demo_gradio.py
 ---

 ---
 title: GeCo2 Gradio Demo
 sdk: gradio
+sdk_version: "5.50.0"
 python_version: "3.10.13"
 app_file: demo_gradio.py
 ---

demo_gradio.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import spaces
 import torch
 import gradio as gr
-from gradio_image_prompter import ImagePrompter
 from torch.nn import DataParallel
 from models.counter_infer import build_model
 from utils.arg_parser import get_argparser
@@ -14,10 +14,55 @@ import numpy as np
 import colorsys
 _MODEL = None
 _ARGS = None
 _WEIGHTS_PATH = None
 def _get_args():
     global _ARGS
     if _ARGS is None:
@@ -26,6 +71,7 @@ def _get_args():
         _ARGS = args
     return _ARGS
 def _get_weights_path():
     global _WEIGHTS_PATH
     if _WEIGHTS_PATH is None:
@@ -36,6 +82,7 @@ def _get_weights_path():
         )
     return _WEIGHTS_PATH
 def get_model_on_device(device: torch.device):
     """
     Lazily build and load model, then move to the requested device.
@@ -63,22 +110,140 @@ def get_model_on_device(device: torch.device):
     return _MODEL
-# **Function to Process Image Once**
 @spaces.GPU
 def process_image_once(inputs, enable_mask):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model = get_model_on_device(device)
     image = inputs["image"]
-    drawn_boxes = inputs["points"]
     image_tensor = torch.tensor(image).to(device)
     image_tensor = image_tensor.permute(2, 0, 1).float() / 255.0
     image_tensor = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image_tensor)
-    bboxes_tensor = torch.tensor([[box[0], box[1], box[3], box[4]] for box in drawn_boxes], dtype=torch.float32).to(
-        device
-    )
     img, bboxes, scale = resize_and_pad(image_tensor, bboxes_tensor, size=1024.0)
     img = img.unsqueeze(0).to(device)
@@ -88,13 +253,8 @@ def process_image_once(inputs, enable_mask):
         model.module.return_masks = enable_mask
         outputs, _, _, _, masks = model(img, bboxes)
-    # ------------------------------------------------------------------
-    # ZeroGPU requirement: return ONLY CPU-native objects to main process.
-    # Do NOT return CUDA tensors, and avoid returning output dicts that may
-    # contain additional CUDA tensors beyond pred_boxes/box_v.
-    # ------------------------------------------------------------------
     out0 = outputs[0]
     pred_boxes_cpu = out0["pred_boxes"].detach().float().cpu()
     box_v_cpu = out0["box_v"].detach().float().cpu()
@@ -108,7 +268,6 @@ def process_image_once(inputs, enable_mask):
     else:
         masks_cpu = [None]
-    # img is only used for shape in post_process, so return a CPU tensor
     img_cpu = img.detach().cpu()
     return image, outputs_cpu, masks_cpu, img_cpu, float(scale), drawn_boxes
@@ -123,22 +282,13 @@ def _hsv_to_rgb255(h, s, v):
 def instance_colors(i: int):
-    """
-    Pastel palette per instance.
-    - Mask: pastel fill
-    - Box: same hue, slightly more saturated (but still pastel-ish)
-    Deterministic hue stepping (golden ratio) for stable and distinct colors.
-    """
     h = (i * 0.618033988749895) % 1.0
-    mask_rgb = _hsv_to_rgb255(h, s=0.28, v=1.00)   # soft pastel
-    box_rgb  = _hsv_to_rgb255(h, s=0.42, v=0.95)   # slightly stronger pastel
     return mask_rgb, box_rgb
 def overlay_single_mask(base_rgba: Image.Image, mask_bool: np.ndarray, rgb, alpha=0.45):
-    """
-    Alpha-composite a single instance mask (boolean HxW) in given rgb onto base_rgba.
-    """
     if mask_bool.dtype != np.bool_:
         mask_bool = mask_bool.astype(bool)
@@ -153,12 +303,19 @@ def overlay_single_mask(base_rgba: Image.Image, mask_bool: np.ndarray, rgb, alph
     return Image.alpha_composite(base_rgba, overlay_img)
-# **Post-process and Update Output**
 def post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold):
     idx = 0
     threshold = 1 / threshold
     score = outputs[idx]["box_v"]
     score_mask = score > score.max() / threshold
     keep = ops.nms(
@@ -171,20 +328,17 @@ def post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, th
     pred_boxes = torch.clamp(pred_boxes, 0, 1)
     pred_boxes = (pred_boxes / scale * img.shape[-1]).tolist()
-    # Base image as RGBA for compositing
     image = Image.fromarray((image).astype(np.uint8)).convert("RGBA")
-    # --- Masks: per-instance, pastel, matching box hue ---
     if enable_mask and masks is not None and masks[idx] is not None:
         masks_sel = masks[idx][score_mask[0]] if score_mask.ndim > 1 else masks[idx][score_mask]
-        masks_sel = masks_sel[keep]  # align with pred_boxes
         target_h = int(img.shape[2] / scale)
         target_w = int(img.shape[3] / scale)
         resize_nearest = T.Resize((target_h, target_w), interpolation=T.InterpolationMode.NEAREST)
         W, H = image.size
         for i in range(masks_sel.shape[0]):
             mask_i = masks_sel[i]
             if mask_i.ndim == 3:
@@ -197,37 +351,38 @@ def post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, th
             mask_rgb, _ = instance_colors(i)
             image = overlay_single_mask(image, mask_bool, mask_rgb, alpha=0.45)
-    # --- Boxes: thin, pastel, no labels/text ---
     draw = ImageDraw.Draw(image)
-    box_width = 2  # thin and clean
     for i, box in enumerate(pred_boxes):
         _, box_rgb = instance_colors(i)
         x1, y1, x2, y2 = map(float, box)
         draw.rectangle([x1, y1, x2, y2], outline=box_rgb, width=box_width)
-    # --- Exemplar boxes (user-drawn): keep clear but unobtrusive, no text ---
-    exemplar_outline = (255, 255, 255, 255)  # white
-    exemplar_inner = (0, 0, 0, 255)          # black
     for box in drawn_boxes:
         x1, y1, x2, y2 = box[0], box[1], box[3], box[4]
         draw.rectangle([x1, y1, x2, y2], outline=exemplar_outline, width=2)
         draw.rectangle([x1 + 1, y1 + 1, x2 - 1, y2 - 1], outline=exemplar_inner, width=1)
-    # Return without any text/labels on the image
     return image.convert("RGB"), len(pred_boxes)
-iface = gr.Blocks(title="GeCo2 Gradio Demo")
 with iface:
     gr.Markdown(
         """
 # GeCo2: Generalized-Scale Object Counting with Gradual Query Aggregation
-GeCo2 is a few-shot, category-agnostic detection counter. With only a small number of exemplars, GeCo2 can detect and count all instances of the target object in an image wihtout any retraining.
 1) Upload an image.
 2) Draw bounding boxes on the target object (preferably ~3 instances).
 3) Click **Count**.
@@ -244,7 +399,17 @@ GeCo2 is a few-shot, category-agnostic detection counter. With only a small numb
     drawn_boxes_state = gr.State()
     with gr.Row():
-        image_prompter = ImagePrompter()
         image_output = gr.Image(type="pil")
     with gr.Row():
@@ -256,6 +421,8 @@ GeCo2 is a few-shot, category-agnostic detection counter. With only a small numb
     def initial_process(inputs, enable_mask, threshold):
         image, outputs, masks, img, scale, drawn_boxes = process_image_once(inputs, enable_mask)
         return (
             *post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold),
             image,
@@ -267,11 +434,13 @@ GeCo2 is a few-shot, category-agnostic detection counter. With only a small numb
         )
     def update_threshold(threshold, image, outputs, masks, img, scale, drawn_boxes, enable_mask):
         return post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold)
     count_button.click(
         initial_process,
-        [image_prompter, enable_mask, threshold],
         [image_output, count_output, image_input, outputs_state, masks_state, img_state, scale_state, drawn_boxes_state],
     )
@@ -288,4 +457,4 @@ GeCo2 is a few-shot, category-agnostic detection counter. With only a small numb
     )
 if __name__ == "__main__":
-    iface.launch()

 import spaces
 import torch
 import gradio as gr
+from gradio_image_annotation import image_annotator
 from torch.nn import DataParallel
 from models.counter_infer import build_model
 from utils.arg_parser import get_argparser
 import colorsys
+# -----------------------------
+# Minimal UI + force "Create" mode (press C a few times)
+# -----------------------------
+JS_FORCE_CREATE_MODE = r"""
+function () {
+  const pressC = () => {
+    const ev = new KeyboardEvent("keydown", {
+      key: "c",
+      code: "KeyC",
+      bubbles: true
+    });
+    document.dispatchEvent(ev);
+  };
+  let tries = 0;
+  const t = setInterval(() => {
+    tries++;
+    pressC();
+    if (tries > 20) clearInterval(t);
+  }, 200);
+}
+"""
+CSS_MINIMAL_UI = """
+/* Hide labels, instructions, help text */
+.gradio-container label,
+.gradio-container .block-label,
+.gradio-container .markdown,
+.gradio-container p {
+  display: none !important;
+}
+/* Reduce rounding of UI containers */
+.gradio-container [class*="rounded"] {
+  border-radius: 4px !important;
+}
+/* Reduce padding */
+.gradio-container [class*="p-4"] {
+  padding: 0.25rem !important;
+}
+"""
 _MODEL = None
 _ARGS = None
 _WEIGHTS_PATH = None
 def _get_args():
     global _ARGS
     if _ARGS is None:
         _ARGS = args
     return _ARGS
 def _get_weights_path():
     global _WEIGHTS_PATH
     if _WEIGHTS_PATH is None:
         )
     return _WEIGHTS_PATH
 def get_model_on_device(device: torch.device):
     """
     Lazily build and load model, then move to the requested device.
     return _MODEL
+# -----------------------------
+# Rotation helper (in case annotator reports orientation)
+# -----------------------------
+def _rotate_image_and_boxes(image_np: np.ndarray, boxes: list[dict], angle: int):
+    """
+    angle is in 90-degree steps. The gradio_image_annotation README demonstrates:
+        np.rot90(image, k=-angle)
+    so angle=1 => rotate clockwise 90 deg.
+    """
+    if angle is None:
+        return image_np, boxes
+    a = int(angle) % 4
+    if a == 0:
+        return image_np, boxes
+    H, W = image_np.shape[:2]
+    # rotate image using the same convention as the component docs
+    image_rot = np.rot90(image_np, k=-a)
+    def clamp_box(xmin, ymin, xmax, ymax, newW, newH):
+        xmin = max(0, min(newW, xmin))
+        xmax = max(0, min(newW, xmax))
+        ymin = max(0, min(newH, ymin))
+        ymax = max(0, min(newH, ymax))
+        # ensure ordering
+        if xmax < xmin:
+            xmin, xmax = xmax, xmin
+        if ymax < ymin:
+            ymin, ymax = ymax, ymin
+        return xmin, ymin, xmax, ymax
+    boxes_rot = []
+    if a == 1:
+        # 90 deg clockwise: (x,y) -> (H - 1 - y, x)
+        newH, newW = W, H
+        for b in boxes:
+            xmin, ymin, xmax, ymax = b["xmin"], b["ymin"], b["xmax"], b["ymax"]
+            nxmin = H - ymax
+            nxmax = H - ymin
+            nymin = xmin
+            nymax = xmax
+            nxmin, nymin, nxmax, nymax = clamp_box(nxmin, nymin, nxmax, nymax, newW, newH)
+            bb = dict(b)
+            bb.update({"xmin": nxmin, "ymin": nymin, "xmax": nxmax, "ymax": nymax})
+            boxes_rot.append(bb)
+    elif a == 2:
+        # 180 deg: (x,y) -> (W - 1 - x, H - 1 - y)
+        newH, newW = H, W
+        for b in boxes:
+            xmin, ymin, xmax, ymax = b["xmin"], b["ymin"], b["xmax"], b["ymax"]
+            nxmin = W - xmax
+            nxmax = W - xmin
+            nymin = H - ymax
+            nymax = H - ymin
+            nxmin, nymin, nxmax, nymax = clamp_box(nxmin, nymin, nxmax, nymax, newW, newH)
+            bb = dict(b)
+            bb.update({"xmin": nxmin, "ymin": nymin, "xmax": nxmax, "ymax": nymax})
+            boxes_rot.append(bb)
+    else:  # a == 3
+        # 90 deg counter-clockwise: (x,y) -> (y, W - 1 - x)
+        newH, newW = W, H
+        for b in boxes:
+            xmin, ymin, xmax, ymax = b["xmin"], b["ymin"], b["xmax"], b["ymax"]
+            nxmin = ymin
+            nxmax = ymax
+            nymin = W - xmax
+            nymax = W - xmin
+            nxmin, nymin, nxmax, nymax = clamp_box(nxmin, nymin, nxmax, nymax, newW, newH)
+            bb = dict(b)
+            bb.update({"xmin": nxmin, "ymin": nymin, "xmax": nxmax, "ymax": nymax})
+            boxes_rot.append(bb)
+    return image_rot, boxes_rot
+# -----------------------------
+# Function to Process Image Once (GPU)
+# -----------------------------
 @spaces.GPU
 def process_image_once(inputs, enable_mask):
+    """
+    inputs is AnnotatedImageValue-like dict from gradio_image_annotation:
+      {
+        "image": np.ndarray | PIL | str,
+        "boxes": [ {xmin,ymin,xmax,ymax,label?,color?}, ... ],
+        "orientation": int?
+      }
+    """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model = get_model_on_device(device)
+    if inputs is None or inputs.get("image", None) is None:
+        # keep behavior simple: return empty outputs
+        return None, [{"pred_boxes": torch.empty(0, 4), "box_v": torch.empty(0)}], [None], torch.empty(1), 1.0, []
     image = inputs["image"]
+    boxes = inputs.get("boxes", []) or []
+    # Ensure numpy image
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    elif isinstance(image, str):
+        # If you ever allow URL/path returns, you’d need to load it here.
+        # For now, enforce image_type="numpy" in the UI so this does not occur.
+        raise ValueError("Annotator returned image as str. Set image_type='numpy' on image_annotator.")
+    # Handle orientation if provided (rare but supported by component)
+    angle = inputs.get("orientation", None)
+    if angle is not None:
+        image, boxes = _rotate_image_and_boxes(image, boxes, angle)
+    # Convert boxes dicts to your legacy list format so downstream code stays unchanged:
+    # drawn_boxes elements must support [0],[1],[3],[4] usage in your code.
+    # We'll encode as: [x1, y1, 0, x2, y2]
+    drawn_boxes = []
+    for b in boxes:
+        drawn_boxes.append([float(b["xmin"]), float(b["ymin"]), 0.0, float(b["xmax"]), float(b["ymax"])])
+    # If no boxes, keep consistent behavior (model call would likely fail)
+    if len(drawn_boxes) == 0:
+        return image, [{"pred_boxes": torch.empty(0, 4), "box_v": torch.empty(0)}], [None], torch.empty(1), 1.0, []
     image_tensor = torch.tensor(image).to(device)
     image_tensor = image_tensor.permute(2, 0, 1).float() / 255.0
     image_tensor = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image_tensor)
+    bboxes_tensor = torch.tensor(
+        [[box[0], box[1], box[3], box[4]] for box in drawn_boxes],
+        dtype=torch.float32,
+    ).to(device)
     img, bboxes, scale = resize_and_pad(image_tensor, bboxes_tensor, size=1024.0)
     img = img.unsqueeze(0).to(device)
         model.module.return_masks = enable_mask
         outputs, _, _, _, masks = model(img, bboxes)
+    # Return ONLY CPU-native objects to main process.
     out0 = outputs[0]
     pred_boxes_cpu = out0["pred_boxes"].detach().float().cpu()
     box_v_cpu = out0["box_v"].detach().float().cpu()
     else:
         masks_cpu = [None]
     img_cpu = img.detach().cpu()
     return image, outputs_cpu, masks_cpu, img_cpu, float(scale), drawn_boxes
 def instance_colors(i: int):
     h = (i * 0.618033988749895) % 1.0
+    mask_rgb = _hsv_to_rgb255(h, s=0.28, v=1.00)
+    box_rgb  = _hsv_to_rgb255(h, s=0.42, v=0.95)
     return mask_rgb, box_rgb
 def overlay_single_mask(base_rgba: Image.Image, mask_bool: np.ndarray, rgb, alpha=0.45):
     if mask_bool.dtype != np.bool_:
         mask_bool = mask_bool.astype(bool)
     return Image.alpha_composite(base_rgba, overlay_img)
+# -----------------------------
+# Post-process and Update Output
+# -----------------------------
 def post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold):
     idx = 0
     threshold = 1 / threshold
     score = outputs[idx]["box_v"]
+    if score.numel() == 0:
+        # no predictions
+        image_pil = Image.fromarray((image).astype(np.uint8)).convert("RGB")
+        return image_pil, 0
     score_mask = score > score.max() / threshold
     keep = ops.nms(
     pred_boxes = torch.clamp(pred_boxes, 0, 1)
     pred_boxes = (pred_boxes / scale * img.shape[-1]).tolist()
     image = Image.fromarray((image).astype(np.uint8)).convert("RGBA")
     if enable_mask and masks is not None and masks[idx] is not None:
         masks_sel = masks[idx][score_mask[0]] if score_mask.ndim > 1 else masks[idx][score_mask]
+        masks_sel = masks_sel[keep]
         target_h = int(img.shape[2] / scale)
         target_w = int(img.shape[3] / scale)
         resize_nearest = T.Resize((target_h, target_w), interpolation=T.InterpolationMode.NEAREST)
         W, H = image.size
         for i in range(masks_sel.shape[0]):
             mask_i = masks_sel[i]
             if mask_i.ndim == 3:
             mask_rgb, _ = instance_colors(i)
             image = overlay_single_mask(image, mask_bool, mask_rgb, alpha=0.45)
     draw = ImageDraw.Draw(image)
+    box_width = 2
     for i, box in enumerate(pred_boxes):
         _, box_rgb = instance_colors(i)
         x1, y1, x2, y2 = map(float, box)
         draw.rectangle([x1, y1, x2, y2], outline=box_rgb, width=box_width)
+    exemplar_outline = (255, 255, 255, 255)
+    exemplar_inner = (0, 0, 0, 255)
     for box in drawn_boxes:
         x1, y1, x2, y2 = box[0], box[1], box[3], box[4]
         draw.rectangle([x1, y1, x2, y2], outline=exemplar_outline, width=2)
         draw.rectangle([x1 + 1, y1 + 1, x2 - 1, y2 - 1], outline=exemplar_inner, width=1)
     return image.convert("RGB"), len(pred_boxes)
+# -----------------------------
+# Gradio UI
+# -----------------------------
+iface = gr.Blocks(
+    title="GeCo2 Gradio Demo",
+    js=JS_FORCE_CREATE_MODE,
+    css=CSS_MINIMAL_UI,
+)
 with iface:
     gr.Markdown(
         """
 # GeCo2: Generalized-Scale Object Counting with Gradual Query Aggregation
+GeCo2 is a few-shot, category-agnostic detection counter. With only a small number of exemplars, GeCo2 can detect and count all instances of the target object in an image without any retraining.
 1) Upload an image.
 2) Draw bounding boxes on the target object (preferably ~3 instances).
 3) Click **Count**.
     drawn_boxes_state = gr.State()
     with gr.Row():
+        # New annotator component
+        annotator = image_annotator(
+            value=None,
+            image_type="numpy",              # ensures inputs["image"] is a numpy array
+            label_list=["Object"],
+            label_colors=[(0, 255, 0)],
+            use_default_label=True,
+            enable_keyboard_shortcuts=True,
+            interactive=True,
+            show_label=False,                # hide label text on boxes
+        )
         image_output = gr.Image(type="pil")
     with gr.Row():
     def initial_process(inputs, enable_mask, threshold):
         image, outputs, masks, img, scale, drawn_boxes = process_image_once(inputs, enable_mask)
+        if image is None:
+            return None, 0, None, None, None, None, None, None
         return (
             *post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold),
             image,
         )
     def update_threshold(threshold, image, outputs, masks, img, scale, drawn_boxes, enable_mask):
+        if image is None or outputs is None or img is None:
+            return None, 0
         return post_process(image, outputs, masks, img, scale, drawn_boxes, enable_mask, threshold)
     count_button.click(
         initial_process,
+        [annotator, enable_mask, threshold],
         [image_output, count_output, image_input, outputs_state, masks_state, img_state, scale_state, drawn_boxes_state],
     )
     )
 if __name__ == "__main__":
+    iface.queue().launch()

requirements.txt CHANGED Viewed

@@ -110,5 +110,5 @@ websockets==12.0
 zipp==3.21.0
 spaces
 gradio_client
-gradio>=4.0.0,<5
-gradio_image_prompter @ https://huggingface.co/datasets/jerpelhan/geco2-assets/resolve/main/wheels/gradio_image_prompter-0.1.0-py3-none-any.whl

 zipp==3.21.0
 spaces
 gradio_client
+gradio==5.50.0
+gradio_image_annotation