Spaces:

sk3404
/

tiger_counter

Sleeping

App Files Files Community

Sergey Kolbin commited on Aug 17

Commit

d384e64

1 Parent(s): 28dc509

mmma

Browse files

Files changed (2) hide show

app.py +92 -41
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,24 +1,44 @@
 import gradio as gr
 from transformers import pipeline
 from PIL import Image, ImageDraw, ImageFont
 from collections import defaultdict
-# 1) Zero-shot detector (works on CPU Spaces)
-#    You can upgrade model to "google/owlv2-base-patch16-ensemble" for higher accuracy (slower).
-#    model="google/owlvit-base-patch32"  # fast & lightweight
-detector = pipeline(
-    task="zero-shot-object-detection",
-    model="google/owlv2-base-patch16-ensemble"  # fast & lightweight
-)
-# Keep labels explicit so the model can choose the right class.
-# (You can add synonyms like "Bengal tiger", "African lion" if you want.)
-LABELS = ["tiger", "lion"]
-COLOR_BY_LABEL = {
-    "tiger": "red",
-    "lion": "blue",
-}
 def iou(box_a, box_b):
     xA = max(box_a["xmin"], box_b["xmin"])
@@ -34,7 +54,6 @@ def iou(box_a, box_b):
     return inter / denom
 def nms_single_class(dets, iou_thresh=0.5):
-    # dets: list of dicts with keys {"box": {...}, "score": float, "label": str}
     dets = sorted(dets, key=lambda d: d["score"], reverse=True)
     kept = []
     while dets:
@@ -44,12 +63,12 @@ def nms_single_class(dets, iou_thresh=0.5):
     return kept
 def class_aware_nms(dets, iou_thresh=0.5):
-    # Run NMS separately per class so lions don't suppress tigers (and vice versa)
     by_label = defaultdict(list)
     for d in dets:
         by_label[d["label"].lower()].append(d)
     merged = []
-    for label, per_class in by_label.items():
         merged.extend(nms_single_class(per_class, iou_thresh=iou_thresh))
     return merged
@@ -61,41 +80,65 @@ def annotate(img, dets):
         font = None
     for d in dets:
         b = d["box"]
-        color = COLOR_BY_LABEL.get(d["label"].lower(), "red")
         draw.rectangle([(b["xmin"], b["ymin"]), (b["xmax"], b["ymax"])], outline=color, width=3)
         txt = f"{d['label']} {d['score']:.2f}"
-        # Estimate text width
         try:
             txt_w = draw.textlength(txt, font=font)
         except AttributeError:
             txt_w = 8 * len(txt)
         pad = 3
-        draw.rectangle(
-            [(b["xmin"], b["ymin"] - 18), (b["xmin"] + txt_w + 2 * pad, b["ymin"])],
-            fill=color
-        )
-        draw.text((b["xmin"] + pad, b["ymin"] - 16), txt, fill="white", font=font)
     return img
-def count_big_cats(img, score_threshold, iou_threshold):
-    # 2) Run zero-shot detection with both labels
-    preds = detector(img, candidate_labels=LABELS)
-    # 3) Keep only our labels and apply score filter
-    preds = [p for p in preds if p["label"].lower() in LABELS and p["score"] >= score_threshold]
-    # 4) Class-aware NMS
     preds = class_aware_nms(preds, iou_thresh=iou_threshold)
-    # 5) Prepare counts
-    tiger_count = sum(1 for p in preds if p["label"].lower() == "tiger")
-    lion_count = sum(1 for p in preds if p["label"].lower() == "lion")
     total_count = tiger_count + lion_count
-    # 6) Draw boxes
     img_annotated = annotate(img.copy(), preds)
     return tiger_count, lion_count, total_count, img_annotated
 TEST_IMAGES = {
     "Tigers": "examples/tiger1.png",
     "More Tigers": "examples/tiger2.png",
@@ -106,13 +149,16 @@ TEST_IMAGES = {
 def load_test_image(choice):
     return Image.open(TEST_IMAGES[choice])
 with gr.Blocks(title="Big Cat Counter") as demo:
-    gr.Markdown("# 🐯🦁 Big Cat Counter\nUpload an image and I’ll count how many **tigers** and **lions** I see.")
     with gr.Row():
         with gr.Column():
             inp = gr.Image(type="pil", label="Input image")
             test_selector = gr.Dropdown(list(TEST_IMAGES.keys()), label="Pick a test image")
             score_th = gr.Slider(0.05, 0.95, value=0.20, step=0.05, label="Score threshold")
             iou_th = gr.Slider(0.1, 0.9, value=0.50, step=0.05, label="IOU (NMS) threshold")
             btn = gr.Button("Count Big Cats")
@@ -121,8 +167,13 @@ with gr.Blocks(title="Big Cat Counter") as demo:
             out_lion = gr.Number(label="Lion count", precision=0)
             out_total = gr.Number(label="Total big cats", precision=0)
             out_img = gr.Image(label="Annotated output")
     test_selector.change(fn=load_test_image, inputs=test_selector, outputs=inp)
-    btn.click(fn=count_big_cats, inputs=[inp, score_th, iou_th], outputs=[out_tiger, out_lion, out_total, out_img])
 if __name__ == "__main__":
     demo.launch()

+import os
+import torch
 import gradio as gr
 from transformers import pipeline
 from PIL import Image, ImageDraw, ImageFont
 from collections import defaultdict
+from functools import lru_cache
+# ---------- Config ----------
+DEFAULT_MODEL = os.getenv("MODEL_ID", "google/owlvit-large-patch14")
+MODEL_CHOICES = [
+    "google/owlvit-large-patch14",              # default
+    "google/owlv2-large-patch14-ensemble",
+    "google/owlv2-base-patch16-ensemble",
+    "google/owlvit-base-patch32",
+]
+# Candidate labels include toys/plush/lego/figurines/cartoons/etc.
+TIGER_SYNS = [
+    "tiger", "tiger cub", "tiger toy", "toy tiger", "plush tiger",
+    "stuffed tiger", "stuffed animal tiger", "lego tiger", "tiger figurine",
+    "tiger statue", "cartoon tiger", "tiger drawing"
+]
+LION_SYNS = [
+    "lion", "lioness", "lion cub", "lion toy", "toy lion", "plush lion",
+    "stuffed lion", "stuffed animal lion", "lego lion", "lion figurine",
+    "lion statue", "cartoon lion", "lion drawing"
+]
+CANDIDATE_LABELS = TIGER_SYNS + LION_SYNS
+COLOR_BY_LABEL = {"tiger": "red", "lion": "blue"}
+# ---------- Utils ----------
+def canonicalize(label: str):
+    l = label.lower()
+    if "tiger" in l:
+        return "tiger"
+    if "lion" in l:
+        return "lion"
+    return None
 def iou(box_a, box_b):
     xA = max(box_a["xmin"], box_b["xmin"])
     return inter / denom
 def nms_single_class(dets, iou_thresh=0.5):
     dets = sorted(dets, key=lambda d: d["score"], reverse=True)
     kept = []
     while dets:
     return kept
 def class_aware_nms(dets, iou_thresh=0.5):
+    # NMS per class so synonyms don't suppress each other across classes
     by_label = defaultdict(list)
     for d in dets:
         by_label[d["label"].lower()].append(d)
     merged = []
+    for per_class in by_label.values():
         merged.extend(nms_single_class(per_class, iou_thresh=iou_thresh))
     return merged
         font = None
     for d in dets:
         b = d["box"]
+        color = COLOR_BY_LABEL.get(d["label"], "red")
         draw.rectangle([(b["xmin"], b["ymin"]), (b["xmax"], b["ymax"])], outline=color, width=3)
         txt = f"{d['label']} {d['score']:.2f}"
         try:
             txt_w = draw.textlength(txt, font=font)
         except AttributeError:
             txt_w = 8 * len(txt)
         pad = 3
+        top = max(0, b["ymin"] - 18)
+        draw.rectangle([(b["xmin"], top), (b["xmin"] + txt_w + 2 * pad, top + 18)], fill=color)
+        draw.text((b["xmin"] + pad, top + 2), txt, fill="white", font=font)
     return img
+@lru_cache(maxsize=4)
+def get_detector(model_id: str):
+    return pipeline(
+        "zero-shot-object-detection",
+        model=model_id,
+        device=0 if torch.cuda.is_available() else -1,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    )
+# ---------- Inference ----------
+def count_big_cats(img, score_threshold, iou_threshold, model_id):
+    if img is None:
+        return 0, 0, 0, None
+    # Keep memory in check for huge uploads
+    if img.width * img.height > 4_000_000:  # ~4MP
+        img = img.copy()
+        img.thumbnail((2048, 2048))
+    detector = get_detector(model_id)
+    raw = detector(img, candidate_labels=CANDIDATE_LABELS)
+    # Canonicalize labels BEFORE NMS so synonyms don't double-count
+    preds = class_aware_nms(raw, iou_thresh=iou_threshold)
+    for p in raw:
+        if p["score"] < score_threshold:
+            continue
+        canon = canonicalize(p["label"])
+        if canon is None:
+            continue
+        q = dict(p)
+        q["label"] = canon           # overwrite with canonical ('tiger'/'lion')
+        preds.append(q)
+    # NMS per canonical class
+    iou_threshold = iou_threshold
     preds = class_aware_nms(preds, iou_thresh=iou_threshold)
+    tiger_count = sum(1 for p in preds if p["label"] == "tiger")
+    lion_count  = sum(1 for p in preds if p["label"] == "lion")
     total_count = tiger_count + lion_count
     img_annotated = annotate(img.copy(), preds)
     return tiger_count, lion_count, total_count, img_annotated
+# ---------- Demo ----------
 TEST_IMAGES = {
     "Tigers": "examples/tiger1.png",
     "More Tigers": "examples/tiger2.png",
 def load_test_image(choice):
     return Image.open(TEST_IMAGES[choice])
+# Default dropdown value (env override supported)
+default_choice = DEFAULT_MODEL if DEFAULT_MODEL in MODEL_CHOICES else MODEL_CHOICES[0]
 with gr.Blocks(title="Big Cat Counter") as demo:
+    gr.Markdown("# 🐯🦁 Big Cat Counter\nUpload an image and I’ll count how many **tigers** and **lions** I see (including toys, plush, LEGO, etc.).")
     with gr.Row():
         with gr.Column():
             inp = gr.Image(type="pil", label="Input image")
             test_selector = gr.Dropdown(list(TEST_IMAGES.keys()), label="Pick a test image")
+            model_dd = gr.Dropdown(MODEL_CHOICES, value=default_choice, label="Model")
             score_th = gr.Slider(0.05, 0.95, value=0.20, step=0.05, label="Score threshold")
             iou_th = gr.Slider(0.1, 0.9, value=0.50, step=0.05, label="IOU (NMS) threshold")
             btn = gr.Button("Count Big Cats")
             out_lion = gr.Number(label="Lion count", precision=0)
             out_total = gr.Number(label="Total big cats", precision=0)
             out_img = gr.Image(label="Annotated output")
     test_selector.change(fn=load_test_image, inputs=test_selector, outputs=inp)
+    btn.click(
+        fn=count_big_cats,
+        inputs=[inp, score_th, iou_th, model_dd],
+        outputs=[out_tiger, out_lion, out_total, out_img],
+    )
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-transformers>=4.41.0
 huggingface_hub>=0.23.0
 torch
 scipy
@@ -6,3 +6,4 @@ gradio>=4.0.0
 pillow
 safetensors
 accelerate

+transformers>=4.43
 huggingface_hub>=0.23.0
 torch
 scipy
 pillow
 safetensors
 accelerate
+numpy