Stephentao-30
Interaction view: display overlay.png instead of segmap.png to avoid burned-in patch labels
77657f1 | """ | |
| benchmark_interaction.py — Interactive cross-modal visualization for the benchmark tab. | |
| Creates a self-contained HTML component that shows: | |
| - Image with real UnSAM segment outlines (clickable via hidden canvas label map) | |
| - Caption tokens below the image (clickable, colored by Shapley value) | |
| - Arrows/lines connecting image regions to their most interacting tokens | |
| - Click a region → highlights linked tokens; click a token → highlights linked regions | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import uuid | |
| from html import escape | |
| from typing import Any, Dict, List, Optional, Sequence, Tuple | |
| def _value_to_color(value: float, max_abs: float, single_color: bool = False) -> str: | |
| if max_abs <= 0: | |
| return "rgb(225, 225, 223)" | |
| norm = min(1.0, abs(value) / max_abs) | |
| if single_color: | |
| base = (225, 225, 223) | |
| target = (52, 102, 177) # influence neutral blue | |
| elif value >= 0: | |
| base = (225, 225, 223) | |
| target = (1, 109, 1) | |
| else: | |
| base = (225, 225, 223) | |
| target = (221, 19, 19) | |
| r = int(round(base[0] + (target[0] - base[0]) * norm)) | |
| g = int(round(base[1] + (target[1] - base[1]) * norm)) | |
| b = int(round(base[2] + (target[2] - base[2]) * norm)) | |
| return f"rgb({r}, {g}, {b})" | |
| def _value_to_rgba(value: float, max_abs: float, alpha: float = 0.5) -> str: | |
| if max_abs <= 0: | |
| return "rgba(200, 200, 200, 0.1)" | |
| norm = min(1.0, abs(value) / max_abs) | |
| if value >= 0: | |
| return f"rgba(1, 109, 1, {alpha * norm:.2f})" | |
| else: | |
| return f"rgba(221, 19, 19, {alpha * norm:.2f})" | |
| def create_benchmark_interaction_html( | |
| image_b64: str, | |
| clip_summary: Optional[Dict[str, Any]], | |
| vllm_logprob: Optional[Dict[str, Any]], | |
| caption: str = "", | |
| all_cross_modal_pairs: Optional[List[Dict[str, Any]]] = None, | |
| segmap_b64: str = "", | |
| overlay_b64: str = "", | |
| segment_bboxes: Optional[List[Optional[Dict[str, float]]]] = None, | |
| label_map_b64: str = "", | |
| image_width: int = 0, | |
| image_height: int = 0, | |
| title: str = "Cross-Modal Interaction View", | |
| method_label: str = "Shapley", | |
| ) -> str: | |
| """ | |
| Build a self-contained HTML + JS component for the benchmark tab. | |
| Shows: | |
| - Image panel (left) with real UnSAM segment outlines and clickable regions | |
| - Token panel (right) with clickable colored tokens | |
| - SVG arrow layer connecting regions to tokens on click | |
| - Cross-modal interaction list | |
| When segment_bboxes and label_map_b64 are provided, uses canvas-based | |
| pixel-level click detection for accurate segment selection matching | |
| the real UnSAM segmentation. Falls back to horizontal strips otherwise. | |
| """ | |
| if not image_b64 and not clip_summary: | |
| return "<div style='padding:20px;color:#888;'>No data available. Select an example.</div>" | |
| view_id = f"bm-{uuid.uuid4().hex[:8]}" | |
| is_influence = (method_label or "").lower() == "influence" | |
| # Prepare image URL | |
| img_url = image_b64 | |
| if img_url and not img_url.startswith("data:"): | |
| img_url = f"data:image/png;base64,{img_url}" | |
| # Extract CLIP data | |
| regions = [] # {index, label, value} | |
| tokens = [] # {index, label, value} | |
| cross_interactions = [] # {seg, tok, value} | |
| n_segs = 0 | |
| if clip_summary: | |
| raw_items = clip_summary.get("image_region_values", []) | |
| total_regions = len(raw_items) | |
| grid_guess = int(round(total_regions ** 0.5)) | |
| looks_like_patch_grid = (grid_guess * grid_guess == total_regions) and all( | |
| str(it.get("label", "")).startswith("patch_") | |
| or str(it.get("label", "")).isdigit() | |
| for it in raw_items | |
| ) | |
| for item in raw_items: | |
| # Resolve the segment number: | |
| # seg_6 → 6 (UnSAM) | |
| # patch_1_2 → 1*grid+2 (raw patch-grid; assumes grid=sqrt(n)) | |
| # "7" → 7 - 1 = 6 (post-rename patch-grid) | |
| raw_label = str(item["label"]) | |
| seg_num = n_segs # fallback: sequential | |
| if raw_label.startswith("seg_"): | |
| try: | |
| seg_num = int(raw_label.split("_", 1)[1]) | |
| except (ValueError, IndexError): | |
| pass | |
| elif raw_label.startswith("patch_"): | |
| try: | |
| _, r_str, c_str = raw_label.split("_", 2) | |
| seg_num = int(r_str) * grid_guess + int(c_str) | |
| except (ValueError, IndexError): | |
| pass | |
| elif raw_label.isdigit(): | |
| try: | |
| seg_num = int(raw_label) - 1 | |
| except ValueError: | |
| pass | |
| # Display label: in patch-grid mode always show "1".."N" in reading | |
| # order so the overlay doesn't leak raw "patch_r_c" text. | |
| display_label = str(seg_num + 1) if looks_like_patch_grid else raw_label | |
| regions.append({ | |
| "index": seg_num, | |
| "label": display_label, | |
| "value": item["value"], | |
| "type": "segment", | |
| }) | |
| n_segs += 1 | |
| # Build a lookup from CLIP token labels to values. | |
| # Also build a ##-stripped version for subword matching. | |
| clip_tok_values: Dict[str, float] = {} | |
| for item in clip_summary.get("token_values", []): | |
| tok_label = item["label"].replace("tok:", "") | |
| clip_tok_values[tok_label] = item["value"] | |
| # Collect tokens with ## stripped for substring matching | |
| clip_tok_set = set(clip_tok_values.keys()) | |
| # Include ALL words from the full caption, not just CLIP's top-k | |
| if caption: | |
| words = caption.replace(".", " .").replace(",", " ,").replace("(", " (").replace(")", " )").split() | |
| for i, word in enumerate(words): | |
| value = clip_tok_values.get(word, 0.0) | |
| matched_tok = word if value != 0.0 else None | |
| if value == 0.0: | |
| value = clip_tok_values.get(word.lower(), 0.0) | |
| matched_tok = word.lower() if value != 0.0 else None | |
| if value == 0.0: | |
| # Sum all matching subword tokens (strip ## before matching) | |
| total = 0.0 | |
| for tok in clip_tok_set: | |
| tok_clean = tok.lstrip("#") | |
| if len(tok_clean) >= 3 and tok_clean.lower() in word.lower(): | |
| total += clip_tok_values[tok] | |
| matched_tok = tok | |
| value = total | |
| tokens.append({ | |
| "index": i, | |
| "label": word, | |
| "value": value, | |
| "_matched_tok": matched_tok, | |
| }) | |
| else: | |
| for i, item in enumerate(clip_summary.get("token_values", [])): | |
| tok_label = item["label"].replace("tok:", "").lstrip("#") | |
| tokens.append({ | |
| "index": i, | |
| "label": tok_label, | |
| "value": item["value"], | |
| "_matched_tok": tok_label, | |
| }) | |
| # Use ALL cross-modal pairs if provided, else fall back to top-5. | |
| # Map subword token labels to whole caption words. | |
| from .medical_charts import _tok_to_word | |
| cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", []) | |
| def _seg_display(seg_raw: str) -> str: | |
| # Normalize cross-pair segment labels the same way we normalized | |
| # region labels above — otherwise arrows can't match regions. | |
| s = str(seg_raw) | |
| if looks_like_patch_grid and s.startswith("patch_"): | |
| try: | |
| _, rr, cc = s.split("_", 2) | |
| return str(int(rr) * grid_guess + int(cc) + 1) | |
| except (ValueError, IndexError): | |
| return s | |
| return s | |
| for item in cross_source: | |
| cross_interactions.append({ | |
| "seg": _seg_display(item["pair"][0]), | |
| "tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"), | |
| "value": item["value"], | |
| }) | |
| # Determine if we have real segment bounding boxes | |
| has_real_bboxes = ( | |
| segment_bboxes is not None | |
| and len(segment_bboxes) == n_segs | |
| and any(b is not None for b in segment_bboxes) | |
| ) | |
| has_label_map = bool(label_map_b64) | |
| # Build region overlays: real bboxes if available, else horizontal strips | |
| # When a pixel-accurate label map is available, skip the rectangular div | |
| # overlays entirely — the segmap image already shows real segment contours. | |
| # We still render small labels at segment centers for identification. | |
| region_overlays_html = "" | |
| max_abs_r = max((abs(r["value"]) for r in regions), default=1.0) or 1.0 | |
| if has_label_map and has_real_bboxes and n_segs > 0: | |
| # Label-map mode: no rectangular divs, just center labels. | |
| # When segments look like a patch grid (perfect-square count and numeric | |
| # or patch-style labels), show reading-order numbers 1..N so the overlay | |
| # reads left-to-right top-to-bottom without the raw "patch_r_c" noise. | |
| grid = int(round(n_segs ** 0.5)) | |
| is_patch_grid = (grid * grid == n_segs) and all( | |
| str(r["label"]).isdigit() or str(r["label"]).startswith("patch_") | |
| for r in regions | |
| ) | |
| for r in regions: | |
| r_idx = r["index"] | |
| bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None | |
| if bbox is None: | |
| continue | |
| r_label = escape(str(r_idx + 1) if is_patch_grid else r["label"]) | |
| r_value = r["value"] | |
| region_overlays_html += ( | |
| f"<span class='bm-seg-label' data-idx='{r_idx}' " | |
| f"style='left:{bbox['cx_pct']:.1f}%;top:{bbox['cy_pct']:.1f}%;'>" | |
| f"{r_label}</span>" | |
| ) | |
| elif has_real_bboxes and n_segs > 0: | |
| # No label map — use rectangular bounding box divs as fallback | |
| for r in regions: | |
| r_idx = r["index"] | |
| bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None | |
| if bbox is None: | |
| continue | |
| r_label = escape(r["label"]) | |
| r_value = r["value"] | |
| if is_influence: | |
| norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0 | |
| bg = f"rgba(52,102,177,{0.25*norm:.2f})" | |
| border_color = "rgba(52,102,177,0.8)" | |
| val_str = f"{r_value:.2f}" | |
| title_str = f"{r_label}: {r_value:.3f}" | |
| else: | |
| bg = _value_to_rgba(r_value, max_abs_r, 0.25) | |
| border_color = "rgba(1,109,1,0.8)" if r_value >= 0 else "rgba(200,40,40,0.8)" | |
| val_str = f"{r_value:+.2f}" | |
| title_str = f"{r_label}: {r_value:+.3f}" | |
| region_overlays_html += ( | |
| f"<div class='bm-region-box' data-idx='{r_idx}' " | |
| f"title='{title_str}' " | |
| f"style='left:{bbox['x0_pct']:.2f}%;top:{bbox['y0_pct']:.2f}%;" | |
| f"width:{bbox['w_pct']:.2f}%;height:{bbox['h_pct']:.2f}%;" | |
| f"background:{bg};border:2px solid {border_color};'>" | |
| f"<span class='bm-box-label'>{r_label} ({val_str})</span>" | |
| f"</div>" | |
| ) | |
| elif n_segs > 0: | |
| # Fallback: horizontal strips | |
| strip_h = 100.0 / n_segs | |
| for r in regions: | |
| r_idx = r["index"] | |
| r_label = escape(r["label"]) | |
| r_value = r["value"] | |
| top_pct = r_idx * strip_h | |
| if is_influence: | |
| norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0 | |
| bg = f"rgba(52,102,177,{0.3*norm:.2f})" | |
| border_color = "rgba(52,102,177,0.6)" | |
| val_str = f"{r_value:.2f}" | |
| title_str = f"{r_label}: {r_value:.3f}" | |
| else: | |
| bg = _value_to_rgba(r_value, max_abs_r, 0.3) | |
| border_color = "rgba(1,109,1,0.6)" if r_value >= 0 else "rgba(200,40,40,0.6)" | |
| val_str = f"{r_value:+.2f}" | |
| title_str = f"{r_label}: {r_value:+.3f}" | |
| region_overlays_html += ( | |
| f"<div class='bm-region-box' data-idx='{r_idx}' " | |
| f"title='{title_str}' " | |
| f"style='top:{top_pct:.1f}%;height:{strip_h:.1f}%;left:0;width:100%;" | |
| f"background:{bg};border-bottom:2px solid {border_color};'>" | |
| f"<span class='bm-box-label'>{r_label} ({val_str})</span>" | |
| f"</div>" | |
| ) | |
| # Prefer overlay.png over segmap.png — the segmap has patch labels | |
| # ("patch_0_0" etc.) burned into the image pixels during precompute, | |
| # which visually dominate the clean 1..16 labels we render in HTML. | |
| # overlay.png has no baked text, so only our HTML spans show through. | |
| display_img_url = img_url | |
| if overlay_b64: | |
| ov_url = overlay_b64 | |
| if ov_url and not ov_url.startswith("data:"): | |
| ov_url = f"data:image/png;base64,{ov_url}" | |
| display_img_url = ov_url | |
| elif segmap_b64: | |
| seg_url = segmap_b64 | |
| if seg_url and not seg_url.startswith("data:"): | |
| seg_url = f"data:image/png;base64,{seg_url}" | |
| display_img_url = seg_url | |
| # Label map data URL for canvas click detection | |
| label_map_url = "" | |
| if has_label_map: | |
| label_map_url = f"data:image/png;base64,{label_map_b64}" | |
| # Build token chips | |
| max_abs_t = max((abs(t["value"]) for t in tokens), default=1.0) or 1.0 | |
| tokens_html = "" | |
| for t in tokens: | |
| color = _value_to_color(t["value"], max_abs_t, single_color=is_influence) | |
| t_idx = t["index"] | |
| t_label = escape(t["label"]) | |
| t_value = t["value"] | |
| t_title = (f"{t_label}: {t_value:.4f}" if is_influence | |
| else f"{t_label}: {t_value:+.4f}") | |
| tokens_html += ( | |
| f"<span class='bm-token' data-idx='{t_idx}' " | |
| f"title='{t_title}' " | |
| f"style='background:{color};'>" | |
| f"{t_label}" | |
| f"</span>" | |
| ) | |
| # Build cross-modal link data for JS | |
| seg_idx_map = {r["label"]: r["index"] for r in regions} | |
| clip_tok_to_word_indices: Dict[str, List[int]] = {} | |
| for t in tokens: | |
| matched = t.get("_matched_tok") | |
| if matched: | |
| clip_tok_to_word_indices.setdefault(matched, []).append(t["index"]) | |
| clip_tok_to_word_indices.setdefault(matched.lower(), []).append(t["index"]) | |
| links = [] | |
| seen_links: set = set() | |
| for ci in cross_interactions: | |
| si = seg_idx_map.get(ci["seg"]) | |
| if si is None: | |
| continue | |
| tok_key = ci["tok"] | |
| word_indices = (clip_tok_to_word_indices.get(tok_key) or | |
| clip_tok_to_word_indices.get(tok_key.lower()) or []) | |
| for ti in word_indices: | |
| link_key = (si, ti) | |
| if link_key not in seen_links: | |
| seen_links.add(link_key) | |
| tok_display = tokens[ti]["label"] if ti < len(tokens) else tok_key | |
| links.append({"seg": si, "tok": ti, "value": ci["value"], | |
| "seg_label": ci["seg"], "tok_label": tok_display}) | |
| # Pass bounding box data to JS for arrow origin computation | |
| bboxes_json = json.dumps([ | |
| segment_bboxes[i] if (has_real_bboxes and i < len(segment_bboxes)) else None | |
| for i in range(n_segs) | |
| ]) | |
| links_json = json.dumps(links) | |
| # Build regions lookup keyed by actual segment index (not sequential) | |
| regions_by_idx = {} | |
| for r in regions: | |
| regions_by_idx[r["index"]] = {"label": r["label"], "value": r["value"]} | |
| regions_json = json.dumps(regions_by_idx) | |
| # JS for interactivity | |
| js_code = f""" | |
| (function() {{ | |
| const VIEW_ID = '{view_id}'; | |
| const root = document.getElementById(VIEW_ID); | |
| if (!root) return; | |
| // --- Staleness guard --- | |
| // Mark this root with the current view ID so stale closures can detect | |
| // they belong to a replaced component and bail out. | |
| root.setAttribute('data-bm-active', VIEW_ID); | |
| function isStale() {{ | |
| // If root was removed from DOM or replaced by a new render, bail out | |
| if (!document.contains(root)) return true; | |
| return root.getAttribute('data-bm-active') !== VIEW_ID; | |
| }} | |
| const links = {links_json}; | |
| const bboxes = {bboxes_json}; | |
| const regions = {regions_json}; | |
| const nSegs = {n_segs}; | |
| const METHOD_IS_INFLUENCE = {str(is_influence).lower()}; | |
| const INFLUENCE_ARROW_COLOR = 'rgba(52,102,177,0.75)'; | |
| const regionEls = root.querySelectorAll('.bm-region-box'); | |
| const tokenEls = root.querySelectorAll('.bm-token'); | |
| const svg = root.querySelector('.bm-svg'); | |
| const linkList = root.querySelector('.bm-link-list'); | |
| const imgWrap = root.querySelector('.bm-img-wrap'); | |
| const mainImg = root.querySelector('.bm-main-img'); | |
| // --- Highlight canvas: draws real segment contours on click --- | |
| let hlCanvas = root.querySelector('.bm-highlight-canvas'); | |
| let hlCtx = hlCanvas ? hlCanvas.getContext('2d') : null; | |
| function syncHighlightCanvas() {{ | |
| if (!hlCanvas || !mainImg) return; | |
| hlCanvas.width = mainImg.naturalWidth || mainImg.width; | |
| hlCanvas.height = mainImg.naturalHeight || mainImg.height; | |
| }} | |
| if (mainImg) {{ | |
| if (mainImg.complete) syncHighlightCanvas(); | |
| else mainImg.addEventListener('load', syncHighlightCanvas); | |
| }} | |
| // --- Canvas-based label map for pixel-perfect click detection --- | |
| let labelCanvas = null; | |
| let labelCtx = null; | |
| let labelMapReady = false; | |
| const labelMapUrl = '{label_map_url}'; | |
| if (labelMapUrl) {{ | |
| labelCanvas = document.createElement('canvas'); | |
| labelCtx = labelCanvas.getContext('2d', {{willReadFrequently: true}}); | |
| const lmImg = new window.Image(); | |
| lmImg.onload = function() {{ | |
| if (isStale()) return; // Don't populate canvas if replaced | |
| labelCanvas.width = lmImg.naturalWidth; | |
| labelCanvas.height = lmImg.naturalHeight; | |
| labelCtx.drawImage(lmImg, 0, 0); | |
| labelMapReady = true; | |
| buildSegMasks(); | |
| }}; | |
| lmImg.src = labelMapUrl; | |
| }} | |
| // ── Hover-dim state ───────────────────────────────────────────── | |
| // Per-segment boolean masks (Uint8Array, 1=inside seg, 0=outside), | |
| // indexed by the label-map's flat coords so we don't re-read | |
| // getImageData on every mousemove. | |
| let segMasks = new Array(nSegs).fill(null); | |
| let hoveredSeg = -1; | |
| let clickedSeg = -1; | |
| let hoverRafPending = false; | |
| const HOVER_SUPPORTED = window.matchMedia && window.matchMedia('(hover: hover)').matches; | |
| function buildSegMasks() {{ | |
| if (!labelMapReady || !labelCanvas || !labelCtx) return; | |
| const w = labelCanvas.width, h = labelCanvas.height; | |
| const data = labelCtx.getImageData(0, 0, w, h).data; | |
| for (let i = 0; i < nSegs; i++) segMasks[i] = new Uint8Array(w * h); | |
| for (let p = 0, i = 0; p < data.length; p += 4, i++) {{ | |
| const seg = data[p] - 1; | |
| if (seg >= 0 && seg < nSegs) segMasks[seg][i] = 1; | |
| }} | |
| }} | |
| function drawHoverDim(segIdx) {{ | |
| // Dim the entire image except the pixels belonging to `segIdx`. | |
| if (isStale() || !hlCtx || !hlCanvas || !labelCanvas) return; | |
| syncHighlightCanvas(); | |
| hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); | |
| if (segIdx < 0 || !segMasks[segIdx]) return; | |
| const cw = hlCanvas.width, ch = hlCanvas.height; | |
| const lw = labelCanvas.width, lh = labelCanvas.height; | |
| const mask = segMasks[segIdx]; | |
| const imgData = hlCtx.createImageData(cw, ch); | |
| const px = imgData.data; | |
| const dimAlpha = 140; // ≈55% black | |
| // First: alpha=dim everywhere (rgb=0 by default) | |
| for (let i = 3; i < px.length; i += 4) px[i] = dimAlpha; | |
| // Second: punch through (alpha=0) wherever the hovered seg lives | |
| const sx = lw / cw, sy = lh / ch; | |
| for (let y = 0; y < ch; y++) {{ | |
| const ly = Math.floor(y * sy) * lw; | |
| const rowBase = y * cw * 4 + 3; | |
| for (let x = 0; x < cw; x++) {{ | |
| if (mask[ly + Math.floor(x * sx)]) px[rowBase + x * 4] = 0; | |
| }} | |
| }} | |
| hlCtx.putImageData(imgData, 0, 0); | |
| }} | |
| function getSegAtPixel(clientX, clientY) {{ | |
| if (isStale() || !labelMapReady || !labelCanvas || !labelCtx || !mainImg) return -1; | |
| const rect = mainImg.getBoundingClientRect(); | |
| const rx = (clientX - rect.left) / rect.width; | |
| const ry = (clientY - rect.top) / rect.height; | |
| if (rx < 0 || rx > 1 || ry < 0 || ry > 1) return -1; | |
| const px = Math.floor(rx * labelCanvas.width); | |
| const py = Math.floor(ry * labelCanvas.height); | |
| const data = labelCtx.getImageData(px, py, 1, 1).data; | |
| const segIdx = data[0] - 1; // red channel: index+1, 0=bg | |
| return (segIdx >= 0 && segIdx < nSegs) ? segIdx : -1; | |
| }} | |
| function clearAll() {{ | |
| regionEls.forEach(el => el.classList.remove('bm-active', 'bm-linked')); | |
| tokenEls.forEach(el => el.classList.remove('bm-active', 'bm-linked')); | |
| // Clear segment labels highlight | |
| root.querySelectorAll('.bm-seg-label').forEach(el => el.classList.remove('bm-seg-active')); | |
| if (svg) svg.innerHTML = ''; | |
| if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); | |
| if (linkList) linkList.innerHTML = '<div class="bm-hint">Click a segment or word to see connections.</div>'; | |
| clickedSeg = -1; | |
| }} | |
| function drawSegmentContour(segIdx) {{ | |
| // Draw the real segment contour on the highlight canvas using the label map | |
| if (isStale() || !labelMapReady || !hlCtx || !hlCanvas || !labelCtx || !labelCanvas) return; | |
| syncHighlightCanvas(); | |
| hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); | |
| var w = labelCanvas.width, h = labelCanvas.height; | |
| var imgData = labelCtx.getImageData(0, 0, w, h); | |
| var px = imgData.data; | |
| var target = segIdx + 1; // label map red channel = index + 1 | |
| // Find edge pixels: pixels that belong to this segment where at | |
| // least one 4-connected neighbor does NOT belong to this segment | |
| hlCtx.fillStyle = 'rgba(0, 229, 255, 0.9)'; // cyan highlight | |
| var scaleX = hlCanvas.width / w; | |
| var scaleY = hlCanvas.height / h; | |
| for (var y = 0; y < h; y++) {{ | |
| for (var x = 0; x < w; x++) {{ | |
| var idx = (y * w + x) * 4; | |
| if (px[idx] !== target) continue; | |
| // Check 4-connected neighbors | |
| var isEdge = false; | |
| if (x === 0 || px[idx - 4] !== target) isEdge = true; | |
| else if (x === w - 1 || px[idx + 4] !== target) isEdge = true; | |
| else if (y === 0 || px[idx - w * 4] !== target) isEdge = true; | |
| else if (y === h - 1 || px[idx + w * 4] !== target) isEdge = true; | |
| if (isEdge) {{ | |
| hlCtx.fillRect(Math.round(x * scaleX), Math.round(y * scaleY), | |
| Math.max(2, Math.round(scaleX) + 1), | |
| Math.max(2, Math.round(scaleY) + 1)); | |
| }} | |
| }} | |
| }} | |
| }} | |
| function getSegCenter(segIdx) {{ | |
| // Use real bbox center if available | |
| const bb = bboxes[segIdx]; | |
| if (bb) return {{ xPct: bb.cx_pct, yPct: bb.cy_pct }}; | |
| // Fallback: center of horizontal strip | |
| const stripH = 100.0 / nSegs; | |
| return {{ xPct: 50, yPct: segIdx * stripH + stripH / 2 }}; | |
| }} | |
| function drawLine(segIdx, tokIdx, value) {{ | |
| if (!svg || !imgWrap) return; | |
| const rootRect = root.getBoundingClientRect(); | |
| const imgRect = imgWrap.getBoundingClientRect(); | |
| const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]'); | |
| if (!tokEl) return; | |
| const tokRect = tokEl.getBoundingClientRect(); | |
| // Arrow start: segment center in image coordinates | |
| const center = getSegCenter(segIdx); | |
| const x1 = imgRect.left + (center.xPct / 100) * imgRect.width - rootRect.left; | |
| const y1 = imgRect.top + (center.yPct / 100) * imgRect.height - rootRect.top; | |
| const x2 = tokRect.left + tokRect.width / 2 - rootRect.left; | |
| const y2 = tokRect.top - rootRect.top; | |
| const color = METHOD_IS_INFLUENCE | |
| ? INFLUENCE_ARROW_COLOR | |
| : (value >= 0 ? 'rgba(1,109,1,0.7)' : 'rgba(221,19,19,0.7)'); | |
| const line = document.createElementNS('http://www.w3.org/2000/svg', 'line'); | |
| line.setAttribute('x1', x1); | |
| line.setAttribute('y1', y1); | |
| line.setAttribute('x2', x2); | |
| line.setAttribute('y2', y2); | |
| line.setAttribute('stroke', color); | |
| line.setAttribute('stroke-width', '3'); | |
| line.setAttribute('stroke-linecap', 'round'); | |
| line.setAttribute('opacity', '0.7'); | |
| svg.appendChild(line); | |
| }} | |
| function onClickRegion(segIdx) {{ | |
| if (isStale()) return; | |
| clearAll(); | |
| clickedSeg = segIdx; | |
| // Highlight the rectangular div (fallback mode) or segment label | |
| const segEl = root.querySelector('.bm-region-box[data-idx="' + segIdx + '"]'); | |
| if (segEl) segEl.classList.add('bm-active'); | |
| const segLabel = root.querySelector('.bm-seg-label[data-idx="' + segIdx + '"]'); | |
| if (segLabel) segLabel.classList.add('bm-seg-active'); | |
| // Draw pixel-accurate contour on highlight canvas | |
| drawSegmentContour(segIdx); | |
| const related = links.filter(l => l.seg === segIdx) | |
| .sort((a, b) => Math.abs(b.value) - Math.abs(a.value)); | |
| related.forEach(l => {{ | |
| const tokEl = root.querySelector('.bm-token[data-idx="' + l.tok + '"]'); | |
| if (tokEl) tokEl.classList.add('bm-linked'); | |
| drawLine(l.seg, l.tok, l.value); | |
| }}); | |
| if (linkList) {{ | |
| var regInfo = regions[segIdx] || {{}}; | |
| var regName = regInfo.label || ('seg_' + segIdx); | |
| var regVal = regInfo.value !== undefined | |
| ? (METHOD_IS_INFLUENCE ? regInfo.value.toFixed(3) : (regInfo.value >= 0 ? '+' : '') + regInfo.value.toFixed(3)) | |
| : ''; | |
| var header = '<div class="bm-link-header"><strong>' + regName + '</strong> <span style="color:#888">' + regVal + '</span></div>'; | |
| if (related.length === 0) {{ | |
| linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this region.</div>'; | |
| }} else {{ | |
| linkList.innerHTML = header + related.map(l => {{ | |
| var color = METHOD_IS_INFLUENCE | |
| ? '#34569F' | |
| : (l.value >= 0 ? '#016d01' : '#c82828'); | |
| var formatted = METHOD_IS_INFLUENCE | |
| ? l.value.toFixed(3) | |
| : (l.value >= 0 ? '+' : '') + l.value.toFixed(3); | |
| return '<div class="bm-link-row">' + | |
| '<span>' + l.tok_label + '</span>' + | |
| '<strong style="color:' + color + '">' + | |
| formatted + '</strong></div>'; | |
| }}).join(''); | |
| }} | |
| }} | |
| }} | |
| function onClickToken(tokIdx) {{ | |
| if (isStale()) return; | |
| clearAll(); | |
| const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]'); | |
| if (tokEl) tokEl.classList.add('bm-active'); | |
| const related = links.filter(l => l.tok === tokIdx) | |
| .sort((a, b) => Math.abs(b.value) - Math.abs(a.value)); | |
| related.forEach(l => {{ | |
| const segEl = root.querySelector('.bm-region-box[data-idx="' + l.seg + '"]'); | |
| if (segEl) segEl.classList.add('bm-linked'); | |
| const segLabel = root.querySelector('.bm-seg-label[data-idx="' + l.seg + '"]'); | |
| if (segLabel) segLabel.classList.add('bm-seg-active'); | |
| drawLine(l.seg, l.tok, l.value); | |
| }}); | |
| // Draw contours for all linked segments | |
| if (related.length > 0) drawSegmentContour(related[0].seg); | |
| if (linkList) {{ | |
| var tokName = tokIdx < {len(tokens)} ? {json.dumps([t["label"] for t in tokens])}[tokIdx] : 'word ' + tokIdx; | |
| var header = '<div class="bm-link-header"><strong>"' + tokName + '"</strong></div>'; | |
| if (related.length === 0) {{ | |
| linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this word.</div>'; | |
| }} else {{ | |
| linkList.innerHTML = header + related.map(l => {{ | |
| var color = METHOD_IS_INFLUENCE | |
| ? '#34569F' | |
| : (l.value >= 0 ? '#016d01' : '#c82828'); | |
| var formatted = METHOD_IS_INFLUENCE | |
| ? l.value.toFixed(3) | |
| : (l.value >= 0 ? '+' : '') + l.value.toFixed(3); | |
| return '<div class="bm-link-row">' + | |
| '<span>' + l.seg_label + '</span>' + | |
| '<strong style="color:' + color + '">' + | |
| formatted + '</strong></div>'; | |
| }}).join(''); | |
| }} | |
| }} | |
| }} | |
| // Click on the image: use label map canvas for pixel-perfect detection | |
| if (imgWrap) {{ | |
| imgWrap.addEventListener('click', function(e) {{ | |
| if (isStale()) return; | |
| const segIdx = getSegAtPixel(e.clientX, e.clientY); | |
| if (segIdx >= 0) {{ | |
| onClickRegion(segIdx); | |
| }} | |
| }}); | |
| // Hover-dim: highlight the segment under the cursor by dimming the rest. | |
| // Skipped on touch devices where there's no real hover. | |
| if (HOVER_SUPPORTED) {{ | |
| imgWrap.addEventListener('mousemove', function(e) {{ | |
| if (isStale() || hoverRafPending) return; | |
| hoverRafPending = true; | |
| requestAnimationFrame(function() {{ | |
| hoverRafPending = false; | |
| const seg = getSegAtPixel(e.clientX, e.clientY); | |
| if (seg === hoveredSeg) return; | |
| hoveredSeg = seg; | |
| drawHoverDim(seg); | |
| }}); | |
| }}); | |
| imgWrap.addEventListener('mouseleave', function() {{ | |
| if (isStale()) return; | |
| hoveredSeg = -1; | |
| // Restore the click-highlight if any, else clear the canvas. | |
| if (clickedSeg >= 0) drawSegmentContour(clickedSeg); | |
| else if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); | |
| }}); | |
| }} | |
| }} | |
| // Also allow clicking the bbox overlay divs directly | |
| regionEls.forEach(el => {{ | |
| el.addEventListener('click', function(e) {{ | |
| if (isStale()) return; | |
| e.stopPropagation(); | |
| // If label map is available, prefer pixel detection | |
| if (labelMapReady && labelCanvas && labelCtx) {{ | |
| const segIdx = getSegAtPixel(e.clientX, e.clientY); | |
| if (segIdx >= 0) {{ onClickRegion(segIdx); return; }} | |
| }} | |
| onClickRegion(parseInt(el.dataset.idx)); | |
| }}); | |
| }}); | |
| tokenEls.forEach(el => {{ | |
| el.addEventListener('click', () => onClickToken(parseInt(el.dataset.idx))); | |
| }}); | |
| const resetBtn = root.querySelector('.bm-reset'); | |
| if (resetBtn) resetBtn.addEventListener('click', clearAll); | |
| }})(); | |
| """ | |
| script_id = f"{view_id}-script" | |
| loader_id = f"{view_id}-loader" | |
| if is_influence: | |
| legend_html = """ | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(52,102,177,0.6);"></div> | |
| <span>Influence strength (always positive — higher = more important)</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div> | |
| <span>Neutral / not scored</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(52,102,177,0.7); width:30px; height:3px; border-radius:2px;"></div> | |
| <span>Line thickness = cross-modal interaction strength</span> | |
| </div> | |
| """ | |
| else: | |
| method_display = method_label or "Shapley" | |
| legend_html = f""" | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(1,109,1,0.6);"></div> | |
| <span>Positive {escape(method_display)} (contributes to matching)</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(221,19,19,0.6);"></div> | |
| <span>Negative {escape(method_display)} (detracts from matching)</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div> | |
| <span>Neutral / not scored</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(1,109,1,0.7); width:30px; height:3px; border-radius:2px;"></div> | |
| <span>Green line = positive cross-modal interaction</span> | |
| </div> | |
| <div class="bm-legend-item"> | |
| <div class="bm-legend-swatch" style="background:rgba(221,19,19,0.7); width:30px; height:3px; border-radius:2px;"></div> | |
| <span>Red line = negative cross-modal interaction</span> | |
| </div> | |
| """ | |
| return f""" | |
| <style> | |
| .bm-root {{ | |
| font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; | |
| background: #f7f5f2; | |
| border: 1px solid #e3e3ec; | |
| border-radius: 16px; | |
| padding: 16px; | |
| position: relative; | |
| }} | |
| .bm-title {{ | |
| font-size: 15px; font-weight: 700; color: #2d1f4a; | |
| margin-bottom: 10px; | |
| }} | |
| .bm-layout {{ | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 14px; | |
| }} | |
| .bm-img-panel {{ | |
| background: #fff; border: 1px solid #e3e3ec; border-radius: 14px; | |
| padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06); | |
| }} | |
| .bm-img-wrap {{ | |
| position: relative; border-radius: 12px; overflow: hidden; | |
| cursor: crosshair; | |
| }} | |
| .bm-img-wrap img.bm-main-img {{ | |
| display: block; width: 100%; height: auto; | |
| }} | |
| /* Region bounding box overlay (real UnSAM bbox or fallback strip) */ | |
| .bm-region-box {{ | |
| position: absolute; | |
| cursor: pointer; transition: all 0.15s ease; | |
| display: flex; align-items: flex-start; justify-content: flex-start; | |
| box-sizing: border-box; | |
| border-radius: 4px; | |
| }} | |
| .bm-region-box:hover {{ | |
| outline: 2px solid rgba(255,255,255,0.9); | |
| z-index: 10; | |
| }} | |
| .bm-region-box.bm-active {{ | |
| outline: 3px solid #fff; | |
| z-index: 20; | |
| box-shadow: 0 0 0 2px #111, inset 0 0 0 2000px rgba(255,255,255,0.12); | |
| }} | |
| .bm-region-box.bm-linked {{ | |
| outline: 2px dashed #c82828; | |
| z-index: 15; | |
| }} | |
| .bm-box-label {{ | |
| font-size: 11px; font-weight: 700; color: #fff; | |
| background: rgba(0,0,0,0.7); padding: 2px 6px; | |
| border-radius: 0 0 6px 0; pointer-events: none; | |
| white-space: nowrap; | |
| }} | |
| .bm-right-panel {{ | |
| display: flex; flex-direction: column; gap: 10px; | |
| }} | |
| .bm-tok-section {{ | |
| background: #fff; border: 1px solid #e3e3ec; border-radius: 14px; | |
| padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06); | |
| }} | |
| .bm-section-title {{ | |
| font-size: 13px; font-weight: 600; color: #555; margin-bottom: 8px; | |
| }} | |
| .bm-tok-grid {{ | |
| display: flex; flex-wrap: wrap; gap: 6px; | |
| }} | |
| .bm-token {{ | |
| display: inline-flex; align-items: center; justify-content: center; | |
| padding: 5px 10px; border-radius: 10px; font-size: 13px; font-weight: 600; | |
| color: #2a2140; border: 1px solid rgba(60,44,80,0.12); | |
| cursor: pointer; transition: all 0.2s ease; | |
| }} | |
| .bm-token:hover {{ box-shadow: 0 4px 8px rgba(0,0,0,0.15); }} | |
| .bm-token.bm-active {{ | |
| outline: 3px solid #111; | |
| box-shadow: 0 8px 16px rgba(1,109,1,0.25); | |
| }} | |
| .bm-token.bm-linked {{ | |
| outline: 2px solid #016d01; | |
| box-shadow: 0 4px 10px rgba(1,109,1,0.2); | |
| }} | |
| .bm-link-list {{ | |
| display: flex; flex-direction: column; gap: 4px; | |
| max-height: 200px; overflow-y: auto; | |
| }} | |
| .bm-link-row {{ | |
| display: flex; justify-content: space-between; gap: 8px; | |
| font-size: 12px; color: #3a2f50; | |
| background: #f8f6ff; border-radius: 8px; padding: 5px 10px; | |
| }} | |
| .bm-hint {{ font-size: 12px; color: #888; font-style: italic; }} | |
| .bm-link-header {{ | |
| font-size: 13px; color: #2d1f4a; padding: 4px 10px; | |
| background: #eee8ff; border-radius: 8px; margin-bottom: 4px; | |
| }} | |
| .bm-legend {{ | |
| display: flex; flex-wrap: wrap; gap: 12px; padding: 8px 12px; | |
| background: #fff; border: 1px solid #e3e3ec; border-radius: 10px; | |
| margin-top: 10px; font-size: 11px; color: #444; | |
| }} | |
| .bm-legend-item {{ | |
| display: flex; align-items: center; gap: 5px; | |
| }} | |
| .bm-legend-swatch {{ | |
| width: 14px; height: 14px; border-radius: 3px; border: 1px solid #ccc; | |
| }} | |
| .bm-controls {{ | |
| display: flex; justify-content: space-between; align-items: center; | |
| }} | |
| .bm-reset {{ | |
| border: none; background: #eee; border-radius: 999px; | |
| padding: 4px 12px; font-size: 11px; font-weight: 600; | |
| color: #555; cursor: pointer; | |
| }} | |
| .bm-highlight-canvas {{ | |
| position: absolute; top: 0; left: 0; width: 100%; height: 100%; | |
| pointer-events: none; z-index: 5; | |
| }} | |
| .bm-seg-label {{ | |
| position: absolute; transform: translate(-50%, -50%); | |
| font-size: 14px; font-weight: 700; color: #fff; | |
| background: rgba(0,0,0,0.55); padding: 2px 7px; | |
| border-radius: 4px; pointer-events: none; z-index: 6; | |
| white-space: nowrap; | |
| }} | |
| .bm-seg-label.bm-seg-active {{ | |
| background: rgba(0,229,255,0.85); color: #000; | |
| box-shadow: 0 0 6px rgba(0,229,255,0.6); | |
| }} | |
| .bm-svg {{ | |
| position: absolute; top: 0; left: 0; width: 100%; height: 100%; | |
| pointer-events: none; z-index: 50; | |
| }} | |
| @media (max-width: 900px) {{ | |
| .bm-layout {{ grid-template-columns: 1fr; }} | |
| }} | |
| </style> | |
| <div class="bm-root" id="{view_id}"> | |
| <svg class="bm-svg" xmlns="http://www.w3.org/2000/svg"></svg> | |
| <div class="bm-title">{escape(title)}</div> | |
| <div class="bm-layout"> | |
| <div class="bm-img-panel"> | |
| <div class="bm-section-title">Image Regions (click a region to see linked words)</div> | |
| <div class="bm-img-wrap"> | |
| <img class="bm-main-img" src="{display_img_url}" alt="segmented medical image" /> | |
| <canvas class="bm-highlight-canvas"></canvas> | |
| {region_overlays_html} | |
| </div> | |
| </div> | |
| <div class="bm-right-panel"> | |
| <div class="bm-tok-section"> | |
| <div class="bm-controls"> | |
| <div class="bm-section-title">Caption Words (click a word to see linked regions)</div> | |
| <button type="button" class="bm-reset">Reset</button> | |
| </div> | |
| <div class="bm-tok-grid">{tokens_html}</div> | |
| </div> | |
| <div class="bm-tok-section"> | |
| <div class="bm-section-title">Cross-Modal Links</div> | |
| <div class="bm-link-list"> | |
| <div class="bm-hint">Click a segment or word to see connections.</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="bm-legend"> | |
| <strong>Legend ({escape(method_label)}):</strong> | |
| {legend_html} | |
| </div> | |
| <img class="bm-loader" id="{loader_id}" alt="" | |
| src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==" | |
| onload="(function(){{var s=document.getElementById('{script_id}'); | |
| if(!s||!s.textContent){{return;}}try{{(new Function(s.textContent))();}}catch(e){{ | |
| console.warn('bm interaction init failed',e);}}}})()"/> | |
| <script type="text/plain" id="{script_id}">{js_code}</script> | |
| </div> | |
| """ | |