AttrLLM

Sleeping

App Files Files Community

Stephentao-30 commited on Apr 21

Commit

069a1f6

1 Parent(s): d74d0ca

Interaction view: always number patches 1-16, even if upstream rename skipped

Browse files

Files changed (1) hide show

visualization/plotting/benchmark_interaction.py +36 -15

visualization/plotting/benchmark_interaction.py CHANGED Viewed

@@ -91,35 +91,43 @@ def create_benchmark_interaction_html(
     n_segs = 0
     if clip_summary:
-        for item in clip_summary.get("image_region_values", []):
-            # Use the actual segment number so the index matches segment_bboxes
-            # and the label-map canvas (always in color-order 0, 1, 2, …).
             #   seg_6       → 6           (UnSAM)
             #   patch_1_2   → 1*grid+2    (raw patch-grid; assumes grid=sqrt(n))
             #   "7"         → 7 - 1 = 6   (post-rename patch-grid)
-            label = item["label"]
             seg_num = n_segs  # fallback: sequential
-            if label.startswith("seg_"):
                 try:
-                    seg_num = int(label.split("_", 1)[1])
                 except (ValueError, IndexError):
                     pass
-            elif label.startswith("patch_"):
                 try:
-                    _, r_str, c_str = label.split("_", 2)
-                    total_regions = len(clip_summary.get("image_region_values", []))
-                    grid = int(round(total_regions ** 0.5)) or 4
-                    seg_num = int(r_str) * grid + int(c_str)
                 except (ValueError, IndexError):
                     pass
-            elif label.isdigit():
                 try:
-                    seg_num = int(label) - 1
                 except ValueError:
                     pass
             regions.append({
                 "index": seg_num,
-                "label": label,
                 "value": item["value"],
                 "type": "segment",
             })
@@ -173,9 +181,22 @@ def create_benchmark_interaction_html(
         # Map subword token labels to whole caption words.
         from .medical_charts import _tok_to_word
         cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", [])
         for item in cross_source:
             cross_interactions.append({
-                "seg": item["pair"][0],
                 "tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"),
                 "value": item["value"],
             })

     n_segs = 0
     if clip_summary:
+        raw_items = clip_summary.get("image_region_values", [])
+        total_regions = len(raw_items)
+        grid_guess = int(round(total_regions ** 0.5))
+        looks_like_patch_grid = (grid_guess * grid_guess == total_regions) and all(
+            str(it.get("label", "")).startswith("patch_")
+            or str(it.get("label", "")).isdigit()
+            for it in raw_items
+        )
+        for item in raw_items:
+            # Resolve the segment number:
             #   seg_6       → 6           (UnSAM)
             #   patch_1_2   → 1*grid+2    (raw patch-grid; assumes grid=sqrt(n))
             #   "7"         → 7 - 1 = 6   (post-rename patch-grid)
+            raw_label = str(item["label"])
             seg_num = n_segs  # fallback: sequential
+            if raw_label.startswith("seg_"):
                 try:
+                    seg_num = int(raw_label.split("_", 1)[1])
                 except (ValueError, IndexError):
                     pass
+            elif raw_label.startswith("patch_"):
                 try:
+                    _, r_str, c_str = raw_label.split("_", 2)
+                    seg_num = int(r_str) * grid_guess + int(c_str)
                 except (ValueError, IndexError):
                     pass
+            elif raw_label.isdigit():
                 try:
+                    seg_num = int(raw_label) - 1
                 except ValueError:
                     pass
+            # Display label: in patch-grid mode always show "1".."N" in reading
+            # order so the overlay doesn't leak raw "patch_r_c" text.
+            display_label = str(seg_num + 1) if looks_like_patch_grid else raw_label
             regions.append({
                 "index": seg_num,
+                "label": display_label,
                 "value": item["value"],
                 "type": "segment",
             })
         # Map subword token labels to whole caption words.
         from .medical_charts import _tok_to_word
         cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", [])
+        def _seg_display(seg_raw: str) -> str:
+            # Normalize cross-pair segment labels the same way we normalized
+            # region labels above — otherwise arrows can't match regions.
+            s = str(seg_raw)
+            if looks_like_patch_grid and s.startswith("patch_"):
+                try:
+                    _, rr, cc = s.split("_", 2)
+                    return str(int(rr) * grid_guess + int(cc) + 1)
+                except (ValueError, IndexError):
+                    return s
+            return s
         for item in cross_source:
             cross_interactions.append({
+                "seg": _seg_display(item["pair"][0]),
                 "tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"),
                 "value": item["value"],
             })