orik-ss commited on
Commit
100dbc1
·
1 Parent(s): 7905374

Updated siglip labels

Browse files
app.py CHANGED
@@ -142,10 +142,7 @@ def run_dfine_classify(image, refs_path, dfine_threshold, dfine_model_choice, mi
142
  classifier=classifier,
143
  )
144
 
145
- if status is not None:
146
- return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status
147
-
148
- return [(g, None) for g in group_crops], [(k, None) for k in known_crops], ""
149
 
150
 
151
  IMG_HEIGHT = 400
@@ -332,8 +329,8 @@ with gr.Blocks(title="Small Object Detection") as app:
332
  )
333
 
334
  out_status_dfine = gr.Textbox(
335
- label="Status",
336
- lines=2,
337
  interactive=False,
338
  )
339
 
 
142
  classifier=classifier,
143
  )
144
 
145
+ return [(g, None) for g in (group_crops or [])], [(k, None) for k in (known_crops or [])], status or ""
 
 
 
146
 
147
 
148
  IMG_HEIGHT = 400
 
329
  )
330
 
331
  out_status_dfine = gr.Textbox(
332
+ label="Classification details",
333
+ lines=8,
334
  interactive=False,
335
  )
336
 
dfine_jina_pipeline.py CHANGED
@@ -618,6 +618,7 @@ def run_single_image(
618
 
619
  results_per_crop = []
620
  group_crop_images = []
 
621
 
622
  # For each person/car group: crop (with 10% margin), run D-FINE on crop, detect objects, then classify each
623
  for gidx, grp in enumerate(top_groups):
@@ -702,6 +703,14 @@ def run_single_image(
702
  conf = result["confidence"]
703
  results_per_crop.append((gidx, (bx1, by1, bx2, by2), small_crop, pred, conf))
704
 
 
 
 
 
 
 
 
 
705
  # Draw bboxes on this group crop (bboxes already in crop coords)
706
  boxes_to_draw = [
707
  (bx1, by1, bx2, by2, pred, conf)
@@ -714,8 +723,11 @@ def run_single_image(
714
  crop_pil_drawn = crop_pil
715
  group_crop_images.append(np.array(crop_pil_drawn))
716
 
 
 
 
717
  if not results_per_crop:
718
- return group_crop_images if group_crop_images else [], [], "No small-object crops: D-FINE on person/car crops did not detect any object (gun/phone/etc.), or all were below min size."
719
 
720
  # Build known-only gallery: only objects with conf >= min_display_conf
721
  known_crop_composites = []
@@ -725,7 +737,7 @@ def run_single_image(
725
  composite = draw_label_on_image(crop_pil, pred, conf)
726
  known_crop_composites.append(np.array(composite))
727
 
728
- return group_crop_images, known_crop_composites, None
729
 
730
 
731
  if __name__ == "__main__":
 
618
 
619
  results_per_crop = []
620
  group_crop_images = []
621
+ classification_log = []
622
 
623
  # For each person/car group: crop (with 10% margin), run D-FINE on crop, detect objects, then classify each
624
  for gidx, grp in enumerate(top_groups):
 
703
  conf = result["confidence"]
704
  results_per_crop.append((gidx, (bx1, by1, bx2, by2), small_crop, pred, conf))
705
 
706
+ # Build per-crop log line
707
+ sims_str = ", ".join(f"{k}: {v:.4f}" for k, v in result.get("all_sims", {}).items())
708
+ classification_log.append(
709
+ f"[group {gidx}] dfine: {d['label']} ({d['conf']:.3f}) → "
710
+ f"{pred} (conf={conf:.4f}, gap={result['gap']:.4f}, 2nd={result.get('second_best','?')}) "
711
+ f"| {result['status']} | {sims_str}"
712
+ )
713
+
714
  # Draw bboxes on this group crop (bboxes already in crop coords)
715
  boxes_to_draw = [
716
  (bx1, by1, bx2, by2, pred, conf)
 
723
  crop_pil_drawn = crop_pil
724
  group_crop_images.append(np.array(crop_pil_drawn))
725
 
726
+ log_text = f"Classifier: {classifier} | {len(results_per_crop)} crops classified\n"
727
+ log_text += "\n".join(classification_log) if classification_log else "(no crops)"
728
+
729
  if not results_per_crop:
730
+ return group_crop_images if group_crop_images else [], [], log_text + "\nNo small-object crops: D-FINE on person/car crops did not detect any object (gun/phone/etc.), or all were below min size."
731
 
732
  # Build known-only gallery: only objects with conf >= min_display_conf
733
  known_crop_composites = []
 
737
  composite = draw_label_on_image(crop_pil, pred, conf)
738
  known_crop_composites.append(np.array(composite))
739
 
740
+ return group_crop_images, known_crop_composites, log_text
741
 
742
 
743
  if __name__ == "__main__":
siglip2_onnx_zeroshot.py CHANGED
@@ -13,7 +13,7 @@ from PIL import Image
13
  from huggingface_hub import hf_hub_download
14
  from transformers import AutoProcessor
15
 
16
- from jina_fewshot import CLASS_PROMPTS, IMAGE_EXTS
17
 
18
 
19
  REPO_ID = "onnx-community/siglip2-large-patch16-256-ONNX"
@@ -137,15 +137,9 @@ class SigLIP2ONNXClassifier:
137
  if not self.labels:
138
  raise ValueError(f"No subfolders in {refs_dir}")
139
 
140
- text_prompts = []
141
- for name in self.labels:
142
- prompts = CLASS_PROMPTS.get(name, [f"a {name}"])
143
- text_prompts.append(prompts[0])
144
 
145
- self._text_embeds = self._encode_texts(text_prompts)
146
-
147
- print(f" SigLIP2 ONNX classes: {self.labels}")
148
- print(f" Text prompts: {text_prompts}")
149
  print(f" Text embeds shape: {self._text_embeds.shape}")
150
 
151
  def classify_crop(self, crop, conf_threshold, gap_threshold):
 
13
  from huggingface_hub import hf_hub_download
14
  from transformers import AutoProcessor
15
 
16
+ from jina_fewshot import IMAGE_EXTS
17
 
18
 
19
  REPO_ID = "onnx-community/siglip2-large-patch16-256-ONNX"
 
137
  if not self.labels:
138
  raise ValueError(f"No subfolders in {refs_dir}")
139
 
140
+ self._text_embeds = self._encode_texts(self.labels)
 
 
 
141
 
142
+ print(f" SigLIP2 ONNX labels: {self.labels}")
 
 
 
143
  print(f" Text embeds shape: {self._text_embeds.shape}")
144
 
145
  def classify_crop(self, crop, conf_threshold, gap_threshold):
siglip_zeroshot.py CHANGED
@@ -11,9 +11,6 @@ import numpy as np
11
  import torch
12
  from transformers import SiglipModel, AutoProcessor
13
 
14
- from jina_fewshot import CLASS_PROMPTS
15
-
16
-
17
  class SigLIPClassifier:
18
  """Zero-shot crop classifier using SigLIP (PyTorch)."""
19
 
@@ -27,25 +24,17 @@ class SigLIPClassifier:
27
  self.processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
28
 
29
  self.labels = []
30
- self._text_prompts = []
31
 
32
  print(f"[*] SigLIP loaded in {time.perf_counter() - t0:.1f}s (device={device})")
33
 
34
  def build_refs(self, refs_dir, **kwargs):
35
- """Extract class names from refs_dir subfolders. No images needed."""
36
  refs_dir = Path(refs_dir)
37
  self.labels = sorted(d.name for d in refs_dir.iterdir() if d.is_dir())
38
  if not self.labels:
39
  raise ValueError(f"No subfolders in {refs_dir}")
40
 
41
- # Build one prompt per class (first from CLASS_PROMPTS, fallback to "a {name}")
42
- self._text_prompts = []
43
- for name in self.labels:
44
- prompts = CLASS_PROMPTS.get(name, [f"a {name}"])
45
- self._text_prompts.append(prompts[0])
46
-
47
- print(f" SigLIP classes: {self.labels}")
48
- print(f" Text prompts: {self._text_prompts}")
49
 
50
  def classify_crop(self, crop, conf_threshold, gap_threshold):
51
  """
@@ -53,7 +42,7 @@ class SigLIPClassifier:
53
  Returns dict matching jina_fewshot.classify() format.
54
  """
55
  inputs = self.processor(
56
- text=self._text_prompts,
57
  images=crop,
58
  return_tensors="pt",
59
  padding="max_length",
 
11
  import torch
12
  from transformers import SiglipModel, AutoProcessor
13
 
 
 
 
14
  class SigLIPClassifier:
15
  """Zero-shot crop classifier using SigLIP (PyTorch)."""
16
 
 
24
  self.processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
25
 
26
  self.labels = []
 
27
 
28
  print(f"[*] SigLIP loaded in {time.perf_counter() - t0:.1f}s (device={device})")
29
 
30
  def build_refs(self, refs_dir, **kwargs):
31
+ """Extract class names from refs_dir subfolders as plain labels."""
32
  refs_dir = Path(refs_dir)
33
  self.labels = sorted(d.name for d in refs_dir.iterdir() if d.is_dir())
34
  if not self.labels:
35
  raise ValueError(f"No subfolders in {refs_dir}")
36
 
37
+ print(f" SigLIP labels: {self.labels}")
 
 
 
 
 
 
 
38
 
39
  def classify_crop(self, crop, conf_threshold, gap_threshold):
40
  """
 
42
  Returns dict matching jina_fewshot.classify() format.
43
  """
44
  inputs = self.processor(
45
+ text=self.labels,
46
  images=crop,
47
  return_tensors="pt",
48
  padding="max_length",