Spaces:

Napron
/

small_object_detection

Sleeping

App Files Files Community

orik-ss commited on Mar 13

Commit

2455309

1 Parent(s): 1319df4

Added dfine all models and removed jina and siglip2

Browse files

Files changed (2) hide show

app.py +33 -91
dfine_jina_pipeline.py +18 -5

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Gradio app: Tab 1 = Object Detection (YOLO models/v1), Tab 2 = D-FINE + Classify (Jina). """
 import os
 os.environ["YOLO_CONFIG_DIR"] = os.environ.get("YOLO_CONFIG_DIR", "/tmp")
@@ -9,7 +9,7 @@ import gradio as gr
 from ultralytics import YOLO
 from pathlib import Path
-# Tab 2: D-FINE runs first, then Jina for crop classification
 from dfine_jina_pipeline import run_single_image
@@ -108,17 +108,8 @@ def run_detection(image, model):
     return out_img, det_json
-CLASSIFIER_MAP = {
-    "Jina-CLIP-v2 (few-shot)": "jina",
-    "SigLIP (zero-shot)": "siglip",
-    "SigLIP2 ONNX (zero-shot)": "siglip2_onnx",
-}
-def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice,
-                       min_display_conf, gap_threshold, siglip_threshold,
-                       classifier_choice="Jina-CLIP-v2 (few-shot)"):
-    """Tab 2: D-FINE first, then classify crops.
     Returns (group_crop_gallery, known_crop_gallery, status_message).
     """
     if image is None:
@@ -129,18 +120,8 @@ def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice,
     if not refs.is_dir():
         return [], [], f"Refs folder not found: {refs}"
-    dfine_model = "large" if dfine_model_choice.strip().lower() == "large" else "medium"
-    classifier = CLASSIFIER_MAP.get(classifier_choice, "jina")
-    # SigLIP models: use their own threshold, no gap check
-    if classifier in ("siglip", "siglip2_onnx"):
-        conf_thresh = float(siglip_threshold)
-        gap_thresh = 0.0
-        display_conf = float(siglip_threshold)
-    else:
-        conf_thresh = 0.5
-        gap_thresh = float(gap_threshold)
-        display_conf = float(min_display_conf)
     group_crops, known_crops, status = run_single_image(
         image,
@@ -148,11 +129,11 @@ def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice,
         dfine_model=dfine_model,
         det_threshold=float(dfine_threshold),
         conf_threshold=conf_thresh,
-        gap_threshold=gap_thresh,
         min_side=24,
         crop_dedup_iou=0.4,
-        min_display_conf=display_conf,
-        classifier=classifier,
     )
     return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status or ""
@@ -248,14 +229,9 @@ with gr.Blocks(title="Small Object Detection") as app:
         with gr.TabItem("D-FINE + Classify"):
             gr.Markdown(
-                "**D-FINE** runs first (person/car grouping), then small-object crops are classified. "
-                "Choose a **classifier**: Jina-CLIP-v2 (few-shot, uses reference images), "
-                "SigLIP (zero-shot, PyTorch), or SigLIP2 ONNX (zero-shot, larger model). "
-                "Choose D-FINE model size (Medium or Large). "
-                "Uses the **refs** folder (one subfolder per class, e.g. refs/phone/, refs/cigarette/) "
-                "— Jina uses reference images; SigLIP models use only the folder names as class labels.\n\n"
-                "**Gap** = how much the top class (e.g. gun) must beat the next-best class (e.g. phone). "
-                "Bigger gap means the model is more sure; we only accept the label if both confidence and gap are high enough."
             )
             with gr.Row():
@@ -268,19 +244,16 @@ with gr.Blocks(title="Small Object Detection") as app:
                         height=IMG_HEIGHT
                     )
-                    classifier_radio = gr.Radio(
-                        choices=list(CLASSIFIER_MAP.keys()),
-                        value="Jina-CLIP-v2 (few-shot)",
-                        label="Classifier",
-                    )
-                    dfine_model_radio = gr.Radio(
-                        choices=["Medium", "Large"],
-                        value="Large",
                         label="D-FINE model",
                     )
-                    # Default threshold: Large=0.2, Medium=0.15 (slider updates when model changes)
                     dfine_threshold_slider = gr.Slider(
                         minimum=0.05,
                         maximum=0.5,
@@ -290,7 +263,11 @@ with gr.Blocks(title="Small Object Detection") as app:
                     )
                     def update_dfine_threshold_default(choice):
-                        return gr.update(value=0.2 if (choice and choice.strip().lower() == "large") else 0.15)
                     dfine_model_radio.change(
                         fn=update_dfine_threshold_default,
@@ -298,6 +275,14 @@ with gr.Blocks(title="Small Object Detection") as app:
                         outputs=[dfine_threshold_slider],
                     )
                     refs_path = gr.Textbox(
                         label="Refs folder path",
                         value=REFS_DIR,
@@ -311,33 +296,6 @@ with gr.Blocks(title="Small Object Detection") as app:
                 with gr.Column(scale=1):
-                    # --- Jina thresholds (visible when Jina selected) ---
-                    threshold_slider = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.703,
-                        step=0.005,
-                        label="Jina: min display confidence",
-                    )
-                    gap_slider = gr.Slider(
-                        minimum=0.0,
-                        maximum=0.02,
-                        value=0.005,
-                        step=0.001,
-                        label="Jina: gap (top class must beat runner-up by this much)",
-                    )
-                    # --- SigLIP threshold (visible when SigLIP selected) ---
-                    siglip_threshold_slider = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.05,
-                        step=0.01,
-                        label="SigLIP: min confidence threshold",
-                        visible=False,
-                    )
                     out_gallery_dfine = gr.Gallery(
                         label="Person/car crops (all D-FINE objects inside drawn with label + score)",
                         height=IMG_HEIGHT,
@@ -358,25 +316,9 @@ with gr.Blocks(title="Small Object Detection") as app:
                         interactive=False,
                     )
-            # Show/hide threshold sliders based on classifier choice
-            def update_threshold_visibility(choice):
-                is_jina = (choice == "Jina-CLIP-v2 (few-shot)")
-                return (
-                    gr.update(visible=is_jina),      # threshold_slider
-                    gr.update(visible=is_jina),      # gap_slider
-                    gr.update(visible=not is_jina),  # siglip_threshold_slider
-                )
-            classifier_radio.change(
-                fn=update_threshold_visibility,
-                inputs=[classifier_radio],
-                outputs=[threshold_slider, gap_slider, siglip_threshold_slider],
-            )
             btn_dfine.click(
                 fn=run_dfine_classify,
-                inputs=[inp_dfine, refs_path, dfine_threshold_slider, dfine_model_radio,
-                        threshold_slider, gap_slider, siglip_threshold_slider, classifier_radio],
                 outputs=[out_gallery_dfine, out_gallery_known, out_status_dfine],
                 concurrency_limit=1,
             )

+""" Gradio app: Tab 1 = Object Detection (YOLO models/v1), Tab 2 = D-FINE + SigLIP Classify. """
 import os
 os.environ["YOLO_CONFIG_DIR"] = os.environ.get("YOLO_CONFIG_DIR", "/tmp")
 from ultralytics import YOLO
 from pathlib import Path
+# Tab 2: D-FINE runs first, then SigLIP for crop classification
 from dfine_jina_pipeline import run_single_image
     return out_img, det_json
+def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice, siglip_threshold):
+    """Tab 2: D-FINE first, then classify crops with SigLIP.
     Returns (group_crop_gallery, known_crop_gallery, status_message).
     """
     if image is None:
     if not refs.is_dir():
         return [], [], f"Refs folder not found: {refs}"
+    dfine_model = dfine_model_choice.strip().lower() if dfine_model_choice else "large-obj365"
+    conf_thresh = float(siglip_threshold)
     group_crops, known_crops, status = run_single_image(
         image,
         dfine_model=dfine_model,
         det_threshold=float(dfine_threshold),
         conf_threshold=conf_thresh,
+        gap_threshold=0.0,
         min_side=24,
         crop_dedup_iou=0.4,
+        min_display_conf=conf_thresh,
+        classifier="siglip",
     )
     return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status or ""
         with gr.TabItem("D-FINE + Classify"):
             gr.Markdown(
+                "**D-FINE** runs first (person/car grouping), then small-object crops are classified with **SigLIP** (zero-shot). "
+                "Choose a D-FINE model (obj365, coco, or obj2coco variants in small/medium/large). "
+                "Uses the **refs** folder names as class labels (e.g. refs/phone/, refs/cigarette/)."
             )
             with gr.Row():
                         height=IMG_HEIGHT
                     )
+                    dfine_model_radio = gr.Dropdown(
+                        choices=[
+                            "small-obj365", "medium-obj365", "large-obj365",
+                            "small-coco", "medium-coco", "large-coco",
+                            "small-obj2coco", "medium-obj2coco", "large-obj2coco",
+                        ],
+                        value="large-obj365",
                         label="D-FINE model",
                     )
                     dfine_threshold_slider = gr.Slider(
                         minimum=0.05,
                         maximum=0.5,
                     )
                     def update_dfine_threshold_default(choice):
+                        if not choice:
+                            return gr.update(value=0.15)
+                        size = choice.strip().lower().split("-")[0]
+                        defaults = {"large": 0.2, "medium": 0.15, "small": 0.1}
+                        return gr.update(value=defaults.get(size, 0.15))
                     dfine_model_radio.change(
                         fn=update_dfine_threshold_default,
                         outputs=[dfine_threshold_slider],
                     )
+                    siglip_threshold_slider = gr.Slider(
+                        minimum=0.001,
+                        maximum=0.1,
+                        value=0.01,
+                        step=0.001,
+                        label="SigLIP: min confidence threshold",
+                    )
                     refs_path = gr.Textbox(
                         label="Refs folder path",
                         value=REFS_DIR,
                 with gr.Column(scale=1):
                     out_gallery_dfine = gr.Gallery(
                         label="Person/car crops (all D-FINE objects inside drawn with label + score)",
                         height=IMG_HEIGHT,
                         interactive=False,
                     )
             btn_dfine.click(
                 fn=run_dfine_classify,
+                inputs=[inp_dfine, refs_path, dfine_threshold_slider, dfine_model_radio, siglip_threshold_slider],
                 outputs=[out_gallery_dfine, out_gallery_known, out_status_dfine],
                 concurrency_limit=1,
             )

dfine_jina_pipeline.py CHANGED Viewed

@@ -211,7 +211,7 @@ def parse_args():
     p.add_argument("--text-weight", type=float, default=0.3)
     p.add_argument("--max-images", type=int, default=None)
     p.add_argument("--device", default=None)
-    p.add_argument("--dfine-model", choices=["medium", "large"], default="large", help="D-FINE model size")
     return p.parse_args()
@@ -303,7 +303,7 @@ def main():
         raise SystemExit(f"No images in {input_dir}")
     # Load D-FINE
-    dfine_model_id = DFINE_MODEL_IDS.get(args.dfine_model, DFINE_MODEL_IDS["large"])
     print(f"[*] Loading D-FINE ({dfine_model_id})...")
     t0 = time.perf_counter()
     image_processor = AutoImageProcessor.from_pretrained(dfine_model_id)
@@ -501,7 +501,20 @@ def main():
 _APP_DFINE = None  # (model_id, image_processor, dfine_model, person_car_ids)
 _APP_CLASSIFIERS = {}  # {classifier_name: (classifier_instance, refs_dir_str)}
-DFINE_MODEL_IDS = {"medium": "ustc-community/dfine-medium-obj365", "large": "ustc-community/dfine-large-obj365"}
 CLASSIFIER_CHOICES = ["jina", "siglip", "siglip2_onnx"]
@@ -574,9 +587,9 @@ def run_single_image(
     if not refs_dir.is_dir():
         return [], [], f"Refs folder not found: {refs_dir}"
-    dfine_model = (dfine_model or "large").strip().lower()
     if dfine_model not in DFINE_MODEL_IDS:
-        dfine_model = "large"
     model_id = DFINE_MODEL_IDS[dfine_model]
     device = device or ("cuda" if torch.cuda.is_available() else "cpu")

     p.add_argument("--text-weight", type=float, default=0.3)
     p.add_argument("--max-images", type=int, default=None)
     p.add_argument("--device", default=None)
+    p.add_argument("--dfine-model", choices=list(DFINE_MODEL_IDS.keys()), default="large-obj365", help="D-FINE model")
     return p.parse_args()
         raise SystemExit(f"No images in {input_dir}")
     # Load D-FINE
+    dfine_model_id = DFINE_MODEL_IDS.get(args.dfine_model, DFINE_MODEL_IDS["large-obj365"])
     print(f"[*] Loading D-FINE ({dfine_model_id})...")
     t0 = time.perf_counter()
     image_processor = AutoImageProcessor.from_pretrained(dfine_model_id)
 _APP_DFINE = None  # (model_id, image_processor, dfine_model, person_car_ids)
 _APP_CLASSIFIERS = {}  # {classifier_name: (classifier_instance, refs_dir_str)}
+DFINE_MODEL_IDS = {
+    # obj365
+    "small-obj365": "ustc-community/dfine-small-obj365",
+    "medium-obj365": "ustc-community/dfine-medium-obj365",
+    "large-obj365": "ustc-community/dfine-large-obj365",
+    # coco
+    "small-coco": "ustc-community/dfine-small-coco",
+    "medium-coco": "ustc-community/dfine-medium-coco",
+    "large-coco": "ustc-community/dfine-large-coco",
+    # obj2coco
+    "small-obj2coco": "ustc-community/dfine-small-obj2coco",
+    "medium-obj2coco": "ustc-community/dfine-medium-obj2coco",
+    "large-obj2coco": "ustc-community/dfine-large-obj2coco-e25",
+}
 CLASSIFIER_CHOICES = ["jina", "siglip", "siglip2_onnx"]
     if not refs_dir.is_dir():
         return [], [], f"Refs folder not found: {refs_dir}"
+    dfine_model = (dfine_model or "large-obj365").strip().lower()
     if dfine_model not in DFINE_MODEL_IDS:
+        dfine_model = "large-obj365"
     model_id = DFINE_MODEL_IDS[dfine_model]
     device = device or ("cuda" if torch.cuda.is_available() else "cpu")