import gradio as gr from transformers import pipeline from PIL import Image, ImageDraw, ImageFont from collections import defaultdict # 1) Zero-shot detector (works on CPU Spaces) # You can upgrade model to "google/owlv2-base-patch16-ensemble" for higher accuracy (slower). # model="google/owlvit-base-patch32" # fast & lightweight detector = pipeline( task="zero-shot-object-detection", model="google/owlv2-base-patch16-ensemble" # fast & lightweight ) # Keep labels explicit so the model can choose the right class. # (You can add synonyms like "Bengal tiger", "African lion" if you want.) LABELS = ["tiger", "lion"] COLOR_BY_LABEL = { "tiger": "red", "lion": "blue", } def iou(box_a, box_b): xA = max(box_a["xmin"], box_b["xmin"]) yA = max(box_a["ymin"], box_b["ymin"]) xB = min(box_a["xmax"], box_b["xmax"]) yB = min(box_a["ymax"], box_b["ymax"]) inter_w = max(0.0, xB - xA) inter_h = max(0.0, yB - yA) inter = inter_w * inter_h area_a = (box_a["xmax"] - box_a["xmin"]) * (box_a["ymax"] - box_a["ymin"]) area_b = (box_b["xmax"] - box_b["xmin"]) * (box_b["ymax"] - box_b["ymin"]) denom = area_a + area_b - inter + 1e-9 return inter / denom def nms_single_class(dets, iou_thresh=0.5): # dets: list of dicts with keys {"box": {...}, "score": float, "label": str} dets = sorted(dets, key=lambda d: d["score"], reverse=True) kept = [] while dets: best = dets.pop(0) kept.append(best) dets = [d for d in dets if iou(best["box"], d["box"]) < iou_thresh] return kept def class_aware_nms(dets, iou_thresh=0.5): # Run NMS separately per class so lions don't suppress tigers (and vice versa) by_label = defaultdict(list) for d in dets: by_label[d["label"].lower()].append(d) merged = [] for label, per_class in by_label.items(): merged.extend(nms_single_class(per_class, iou_thresh=iou_thresh)) return merged def annotate(img, dets): draw = ImageDraw.Draw(img) try: font = ImageFont.truetype("DejaVuSans.ttf", 14) except: font = None for d in dets: b = d["box"] color = COLOR_BY_LABEL.get(d["label"].lower(), "red") draw.rectangle([(b["xmin"], b["ymin"]), (b["xmax"], b["ymax"])], outline=color, width=3) txt = f"{d['label']} {d['score']:.2f}" # Estimate text width try: txt_w = draw.textlength(txt, font=font) except AttributeError: txt_w = 8 * len(txt) pad = 3 draw.rectangle( [(b["xmin"], b["ymin"] - 18), (b["xmin"] + txt_w + 2 * pad, b["ymin"])], fill=color ) draw.text((b["xmin"] + pad, b["ymin"] - 16), txt, fill="white", font=font) return img def count_big_cats(img, score_threshold, iou_threshold): # 2) Run zero-shot detection with both labels preds = detector(img, candidate_labels=LABELS) # 3) Keep only our labels and apply score filter preds = [p for p in preds if p["label"].lower() in LABELS and p["score"] >= score_threshold] # 4) Class-aware NMS preds = class_aware_nms(preds, iou_thresh=iou_threshold) # 5) Prepare counts tiger_count = sum(1 for p in preds if p["label"].lower() == "tiger") lion_count = sum(1 for p in preds if p["label"].lower() == "lion") total_count = tiger_count + lion_count # 6) Draw boxes img_annotated = annotate(img.copy(), preds) return tiger_count, lion_count, total_count, img_annotated TEST_IMAGES = { "Tigers": "examples/tiger1.png", "More Tigers": "examples/tiger2.png", "Funny Tigers": "examples/tiger3.png", "Lions": "examples/tigers_and_lions_2.png", } def load_test_image(choice): return Image.open(TEST_IMAGES[choice]) with gr.Blocks(title="Big Cat Counter") as demo: gr.Markdown("# 🐯🦁 Big Cat Counter\nUpload an image and I’ll count how many **tigers** and **lions** I see.") with gr.Row(): with gr.Column(): inp = gr.Image(type="pil", label="Input image") test_selector = gr.Dropdown(list(TEST_IMAGES.keys()), label="Pick a test image") score_th = gr.Slider(0.05, 0.95, value=0.20, step=0.05, label="Score threshold") iou_th = gr.Slider(0.1, 0.9, value=0.50, step=0.05, label="IOU (NMS) threshold") btn = gr.Button("Count Big Cats") with gr.Column(): out_tiger = gr.Number(label="Tiger count", precision=0) out_lion = gr.Number(label="Lion count", precision=0) out_total = gr.Number(label="Total big cats", precision=0) out_img = gr.Image(label="Annotated output") test_selector.change(fn=load_test_image, inputs=test_selector, outputs=inp) btn.click(fn=count_big_cats, inputs=[inp, score_th, iou_th], outputs=[out_tiger, out_lion, out_total, out_img]) if __name__ == "__main__": demo.launch()