Spaces:

Napron
/

small_object_detection

Sleeping

App Files Files Community

orik-ss commited on 2 days ago

Commit

100dbc1

1 Parent(s): 7905374

Updated siglip labels

Browse files

Files changed (4) hide show

app.py +3 -6
dfine_jina_pipeline.py +14 -2
siglip2_onnx_zeroshot.py +3 -9
siglip_zeroshot.py +3 -14

app.py CHANGED Viewed

@@ -142,10 +142,7 @@ def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice, mi
         classifier=classifier,
     )
-    if status is not None:
-        return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status
-    return [(g, None) for g in group_crops], [(k, None) for k in known_crops], ""
 IMG_HEIGHT = 400
@@ -332,8 +329,8 @@ with gr.Blocks(title="Small Object Detection") as app:
                     )
                     out_status_dfine = gr.Textbox(
-                        label="Status",
-                        lines=2,
                         interactive=False,
                     )

         classifier=classifier,
     )
+    return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status or ""
 IMG_HEIGHT = 400
                     )
                     out_status_dfine = gr.Textbox(
+                        label="Classification details",
+                        lines=8,
                         interactive=False,
                     )

dfine_jina_pipeline.py CHANGED Viewed

@@ -618,6 +618,7 @@ def run_single_image(
     results_per_crop = []
     group_crop_images = []
     # For each person/car group: crop (with 10% margin), run D-FINE on crop, detect objects, then classify each
     for gidx, grp in enumerate(top_groups):
@@ -702,6 +703,14 @@ def run_single_image(
             conf = result["confidence"]
             results_per_crop.append((gidx, (bx1, by1, bx2, by2), small_crop, pred, conf))
         # Draw bboxes on this group crop (bboxes already in crop coords)
         boxes_to_draw = [
             (bx1, by1, bx2, by2, pred, conf)
@@ -714,8 +723,11 @@ def run_single_image(
             crop_pil_drawn = crop_pil
         group_crop_images.append(np.array(crop_pil_drawn))
     if not results_per_crop:
-        return group_crop_images if group_crop_images else [], [], "No small-object crops: D-FINE on person/car crops did not detect any object (gun/phone/etc.), or all were below min size."
     # Build known-only gallery: only objects with conf >= min_display_conf
     known_crop_composites = []
@@ -725,7 +737,7 @@ def run_single_image(
         composite = draw_label_on_image(crop_pil, pred, conf)
         known_crop_composites.append(np.array(composite))
-    return group_crop_images, known_crop_composites, None
 if __name__ == "__main__":

     results_per_crop = []
     group_crop_images = []
+    classification_log = []
     # For each person/car group: crop (with 10% margin), run D-FINE on crop, detect objects, then classify each
     for gidx, grp in enumerate(top_groups):
             conf = result["confidence"]
             results_per_crop.append((gidx, (bx1, by1, bx2, by2), small_crop, pred, conf))
+            # Build per-crop log line
+            sims_str = ", ".join(f"{k}: {v:.4f}" for k, v in result.get("all_sims", {}).items())
+            classification_log.append(
+                f"[group {gidx}] dfine: {d['label']} ({d['conf']:.3f}) → "
+                f"{pred} (conf={conf:.4f}, gap={result['gap']:.4f}, 2nd={result.get('second_best','?')}) "
+                f"| {result['status']} | {sims_str}"
+            )
         # Draw bboxes on this group crop (bboxes already in crop coords)
         boxes_to_draw = [
             (bx1, by1, bx2, by2, pred, conf)
             crop_pil_drawn = crop_pil
         group_crop_images.append(np.array(crop_pil_drawn))
+    log_text = f"Classifier: {classifier} | {len(results_per_crop)} crops classified\n"
+    log_text += "\n".join(classification_log) if classification_log else "(no crops)"
     if not results_per_crop:
+        return group_crop_images if group_crop_images else [], [], log_text + "\nNo small-object crops: D-FINE on person/car crops did not detect any object (gun/phone/etc.), or all were below min size."
     # Build known-only gallery: only objects with conf >= min_display_conf
     known_crop_composites = []
         composite = draw_label_on_image(crop_pil, pred, conf)
         known_crop_composites.append(np.array(composite))
+    return group_crop_images, known_crop_composites, log_text
 if __name__ == "__main__":

siglip2_onnx_zeroshot.py CHANGED Viewed

@@ -13,7 +13,7 @@ from PIL import Image
 from huggingface_hub import hf_hub_download
 from transformers import AutoProcessor
-from jina_fewshot import CLASS_PROMPTS, IMAGE_EXTS
 REPO_ID = "onnx-community/siglip2-large-patch16-256-ONNX"
@@ -137,15 +137,9 @@ class SigLIP2ONNXClassifier:
         if not self.labels:
             raise ValueError(f"No subfolders in {refs_dir}")
-        text_prompts = []
-        for name in self.labels:
-            prompts = CLASS_PROMPTS.get(name, [f"a {name}"])
-            text_prompts.append(prompts[0])
-        self._text_embeds = self._encode_texts(text_prompts)
-        print(f"  SigLIP2 ONNX classes: {self.labels}")
-        print(f"  Text prompts: {text_prompts}")
         print(f"  Text embeds shape: {self._text_embeds.shape}")
     def classify_crop(self, crop, conf_threshold, gap_threshold):

 from huggingface_hub import hf_hub_download
 from transformers import AutoProcessor
+from jina_fewshot import IMAGE_EXTS
 REPO_ID = "onnx-community/siglip2-large-patch16-256-ONNX"
         if not self.labels:
             raise ValueError(f"No subfolders in {refs_dir}")
+        self._text_embeds = self._encode_texts(self.labels)
+        print(f"  SigLIP2 ONNX labels: {self.labels}")
         print(f"  Text embeds shape: {self._text_embeds.shape}")
     def classify_crop(self, crop, conf_threshold, gap_threshold):

siglip_zeroshot.py CHANGED Viewed

@@ -11,9 +11,6 @@ import numpy as np
 import torch
 from transformers import SiglipModel, AutoProcessor
-from jina_fewshot import CLASS_PROMPTS
 class SigLIPClassifier:
     """Zero-shot crop classifier using SigLIP (PyTorch)."""
@@ -27,25 +24,17 @@ class SigLIPClassifier:
         self.processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
         self.labels = []
-        self._text_prompts = []
         print(f"[*] SigLIP loaded in {time.perf_counter() - t0:.1f}s (device={device})")
     def build_refs(self, refs_dir, **kwargs):
-        """Extract class names from refs_dir subfolders. No images needed."""
         refs_dir = Path(refs_dir)
         self.labels = sorted(d.name for d in refs_dir.iterdir() if d.is_dir())
         if not self.labels:
             raise ValueError(f"No subfolders in {refs_dir}")
-        # Build one prompt per class (first from CLASS_PROMPTS, fallback to "a {name}")
-        self._text_prompts = []
-        for name in self.labels:
-            prompts = CLASS_PROMPTS.get(name, [f"a {name}"])
-            self._text_prompts.append(prompts[0])
-        print(f"  SigLIP classes: {self.labels}")
-        print(f"  Text prompts: {self._text_prompts}")
     def classify_crop(self, crop, conf_threshold, gap_threshold):
         """
@@ -53,7 +42,7 @@ class SigLIPClassifier:
         Returns dict matching jina_fewshot.classify() format.
         """
         inputs = self.processor(
-            text=self._text_prompts,
             images=crop,
             return_tensors="pt",
             padding="max_length",

 import torch
 from transformers import SiglipModel, AutoProcessor
 class SigLIPClassifier:
     """Zero-shot crop classifier using SigLIP (PyTorch)."""
         self.processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
         self.labels = []
         print(f"[*] SigLIP loaded in {time.perf_counter() - t0:.1f}s (device={device})")
     def build_refs(self, refs_dir, **kwargs):
+        """Extract class names from refs_dir subfolders as plain labels."""
         refs_dir = Path(refs_dir)
         self.labels = sorted(d.name for d in refs_dir.iterdir() if d.is_dir())
         if not self.labels:
             raise ValueError(f"No subfolders in {refs_dir}")
+        print(f"  SigLIP labels: {self.labels}")
     def classify_crop(self, crop, conf_threshold, gap_threshold):
         """
         Returns dict matching jina_fewshot.classify() format.
         """
         inputs = self.processor(
+            text=self.labels,
             images=crop,
             return_tensors="pt",
             padding="max_length",