""" benchmark_interaction.py — Interactive cross-modal visualization for the benchmark tab. Creates a self-contained HTML component that shows: - Image with real UnSAM segment outlines (clickable via hidden canvas label map) - Caption tokens below the image (clickable, colored by Shapley value) - Arrows/lines connecting image regions to their most interacting tokens - Click a region → highlights linked tokens; click a token → highlights linked regions """ from __future__ import annotations import json import uuid from html import escape from typing import Any, Dict, List, Optional, Sequence, Tuple def _value_to_color(value: float, max_abs: float, single_color: bool = False) -> str: if max_abs <= 0: return "rgb(225, 225, 223)" norm = min(1.0, abs(value) / max_abs) if single_color: base = (225, 225, 223) target = (52, 102, 177) # influence neutral blue elif value >= 0: base = (225, 225, 223) target = (1, 109, 1) else: base = (225, 225, 223) target = (221, 19, 19) r = int(round(base[0] + (target[0] - base[0]) * norm)) g = int(round(base[1] + (target[1] - base[1]) * norm)) b = int(round(base[2] + (target[2] - base[2]) * norm)) return f"rgb({r}, {g}, {b})" def _value_to_rgba(value: float, max_abs: float, alpha: float = 0.5) -> str: if max_abs <= 0: return "rgba(200, 200, 200, 0.1)" norm = min(1.0, abs(value) / max_abs) if value >= 0: return f"rgba(1, 109, 1, {alpha * norm:.2f})" else: return f"rgba(221, 19, 19, {alpha * norm:.2f})" def create_benchmark_interaction_html( image_b64: str, clip_summary: Optional[Dict[str, Any]], vllm_logprob: Optional[Dict[str, Any]], caption: str = "", all_cross_modal_pairs: Optional[List[Dict[str, Any]]] = None, segmap_b64: str = "", overlay_b64: str = "", segment_bboxes: Optional[List[Optional[Dict[str, float]]]] = None, label_map_b64: str = "", image_width: int = 0, image_height: int = 0, title: str = "Cross-Modal Interaction View", method_label: str = "Shapley", ) -> str: """ Build a self-contained HTML + JS component for the benchmark tab. Shows: - Image panel (left) with real UnSAM segment outlines and clickable regions - Token panel (right) with clickable colored tokens - SVG arrow layer connecting regions to tokens on click - Cross-modal interaction list When segment_bboxes and label_map_b64 are provided, uses canvas-based pixel-level click detection for accurate segment selection matching the real UnSAM segmentation. Falls back to horizontal strips otherwise. """ if not image_b64 and not clip_summary: return "

No data available. Select an example.

" view_id = f"bm-{uuid.uuid4().hex[:8]}" is_influence = (method_label or "").lower() == "influence" # Prepare image URL img_url = image_b64 if img_url and not img_url.startswith("data:"): img_url = f"data:image/png;base64,{img_url}" # Extract CLIP data regions = [] # {index, label, value} tokens = [] # {index, label, value} cross_interactions = [] # {seg, tok, value} n_segs = 0 if clip_summary: raw_items = clip_summary.get("image_region_values", []) total_regions = len(raw_items) grid_guess = int(round(total_regions ** 0.5)) looks_like_patch_grid = (grid_guess * grid_guess == total_regions) and all( str(it.get("label", "")).startswith("patch_") or str(it.get("label", "")).isdigit() for it in raw_items ) for item in raw_items: # Resolve the segment number: # seg_6 → 6 (UnSAM) # patch_1_2 → 1*grid+2 (raw patch-grid; assumes grid=sqrt(n)) # "7" → 7 - 1 = 6 (post-rename patch-grid) raw_label = str(item["label"]) seg_num = n_segs # fallback: sequential if raw_label.startswith("seg_"): try: seg_num = int(raw_label.split("_", 1)[1]) except (ValueError, IndexError): pass elif raw_label.startswith("patch_"): try: _, r_str, c_str = raw_label.split("_", 2) seg_num = int(r_str) * grid_guess + int(c_str) except (ValueError, IndexError): pass elif raw_label.isdigit(): try: seg_num = int(raw_label) - 1 except ValueError: pass # Display label: in patch-grid mode always show "1".."N" in reading # order so the overlay doesn't leak raw "patch_r_c" text. display_label = str(seg_num + 1) if looks_like_patch_grid else raw_label regions.append({ "index": seg_num, "label": display_label, "value": item["value"], "type": "segment", }) n_segs += 1 # Build a lookup from CLIP token labels to values. # Also build a ##-stripped version for subword matching. clip_tok_values: Dict[str, float] = {} for item in clip_summary.get("token_values", []): tok_label = item["label"].replace("tok:", "") clip_tok_values[tok_label] = item["value"] # Collect tokens with ## stripped for substring matching clip_tok_set = set(clip_tok_values.keys()) # Include ALL words from the full caption, not just CLIP's top-k if caption: words = caption.replace(".", " .").replace(",", " ,").replace("(", " (").replace(")", " )").split() for i, word in enumerate(words): value = clip_tok_values.get(word, 0.0) matched_tok = word if value != 0.0 else None if value == 0.0: value = clip_tok_values.get(word.lower(), 0.0) matched_tok = word.lower() if value != 0.0 else None if value == 0.0: # Sum all matching subword tokens (strip ## before matching) total = 0.0 for tok in clip_tok_set: tok_clean = tok.lstrip("#") if len(tok_clean) >= 3 and tok_clean.lower() in word.lower(): total += clip_tok_values[tok] matched_tok = tok value = total tokens.append({ "index": i, "label": word, "value": value, "_matched_tok": matched_tok, }) else: for i, item in enumerate(clip_summary.get("token_values", [])): tok_label = item["label"].replace("tok:", "").lstrip("#") tokens.append({ "index": i, "label": tok_label, "value": item["value"], "_matched_tok": tok_label, }) # Use ALL cross-modal pairs if provided, else fall back to top-5. # Map subword token labels to whole caption words. from .medical_charts import _tok_to_word cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", []) def _seg_display(seg_raw: str) -> str: # Normalize cross-pair segment labels the same way we normalized # region labels above — otherwise arrows can't match regions. s = str(seg_raw) if looks_like_patch_grid and s.startswith("patch_"): try: _, rr, cc = s.split("_", 2) return str(int(rr) * grid_guess + int(cc) + 1) except (ValueError, IndexError): return s return s for item in cross_source: cross_interactions.append({ "seg": _seg_display(item["pair"][0]), "tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"), "value": item["value"], }) # Determine if we have real segment bounding boxes has_real_bboxes = ( segment_bboxes is not None and len(segment_bboxes) == n_segs and any(b is not None for b in segment_bboxes) ) has_label_map = bool(label_map_b64) # Build region overlays: real bboxes if available, else horizontal strips # When a pixel-accurate label map is available, skip the rectangular div # overlays entirely — the segmap image already shows real segment contours. # We still render small labels at segment centers for identification. region_overlays_html = "" max_abs_r = max((abs(r["value"]) for r in regions), default=1.0) or 1.0 if has_label_map and has_real_bboxes and n_segs > 0: # Label-map mode: no rectangular divs, just center labels. # When segments look like a patch grid (perfect-square count and numeric # or patch-style labels), show reading-order numbers 1..N so the overlay # reads left-to-right top-to-bottom without the raw "patch_r_c" noise. grid = int(round(n_segs ** 0.5)) is_patch_grid = (grid * grid == n_segs) and all( str(r["label"]).isdigit() or str(r["label"]).startswith("patch_") for r in regions ) for r in regions: r_idx = r["index"] bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None if bbox is None: continue r_label = escape(str(r_idx + 1) if is_patch_grid else r["label"]) r_value = r["value"] region_overlays_html += ( f"" f"{r_label}" ) elif has_real_bboxes and n_segs > 0: # No label map — use rectangular bounding box divs as fallback for r in regions: r_idx = r["index"] bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None if bbox is None: continue r_label = escape(r["label"]) r_value = r["value"] if is_influence: norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0 bg = f"rgba(52,102,177,{0.25*norm:.2f})" border_color = "rgba(52,102,177,0.8)" val_str = f"{r_value:.2f}" title_str = f"{r_label}: {r_value:.3f}" else: bg = _value_to_rgba(r_value, max_abs_r, 0.25) border_color = "rgba(1,109,1,0.8)" if r_value >= 0 else "rgba(200,40,40,0.8)" val_str = f"{r_value:+.2f}" title_str = f"{r_label}: {r_value:+.3f}" region_overlays_html += ( f"

" f"{r_label} ({val_str})" f"

" ) elif n_segs > 0: # Fallback: horizontal strips strip_h = 100.0 / n_segs for r in regions: r_idx = r["index"] r_label = escape(r["label"]) r_value = r["value"] top_pct = r_idx * strip_h if is_influence: norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0 bg = f"rgba(52,102,177,{0.3*norm:.2f})" border_color = "rgba(52,102,177,0.6)" val_str = f"{r_value:.2f}" title_str = f"{r_label}: {r_value:.3f}" else: bg = _value_to_rgba(r_value, max_abs_r, 0.3) border_color = "rgba(1,109,1,0.6)" if r_value >= 0 else "rgba(200,40,40,0.6)" val_str = f"{r_value:+.2f}" title_str = f"{r_label}: {r_value:+.3f}" region_overlays_html += ( f"

" f"{r_label} ({val_str})" f"

" ) # Prefer overlay.png over segmap.png — the segmap has patch labels # ("patch_0_0" etc.) burned into the image pixels during precompute, # which visually dominate the clean 1..16 labels we render in HTML. # overlay.png has no baked text, so only our HTML spans show through. display_img_url = img_url if overlay_b64: ov_url = overlay_b64 if ov_url and not ov_url.startswith("data:"): ov_url = f"data:image/png;base64,{ov_url}" display_img_url = ov_url elif segmap_b64: seg_url = segmap_b64 if seg_url and not seg_url.startswith("data:"): seg_url = f"data:image/png;base64,{seg_url}" display_img_url = seg_url # Label map data URL for canvas click detection label_map_url = "" if has_label_map: label_map_url = f"data:image/png;base64,{label_map_b64}" # Build token chips max_abs_t = max((abs(t["value"]) for t in tokens), default=1.0) or 1.0 tokens_html = "" for t in tokens: color = _value_to_color(t["value"], max_abs_t, single_color=is_influence) t_idx = t["index"] t_label = escape(t["label"]) t_value = t["value"] t_title = (f"{t_label}: {t_value:.4f}" if is_influence else f"{t_label}: {t_value:+.4f}") tokens_html += ( f"" f"{t_label}" f"" ) # Build cross-modal link data for JS seg_idx_map = {r["label"]: r["index"] for r in regions} clip_tok_to_word_indices: Dict[str, List[int]] = {} for t in tokens: matched = t.get("_matched_tok") if matched: clip_tok_to_word_indices.setdefault(matched, []).append(t["index"]) clip_tok_to_word_indices.setdefault(matched.lower(), []).append(t["index"]) links = [] seen_links: set = set() for ci in cross_interactions: si = seg_idx_map.get(ci["seg"]) if si is None: continue tok_key = ci["tok"] word_indices = (clip_tok_to_word_indices.get(tok_key) or clip_tok_to_word_indices.get(tok_key.lower()) or []) for ti in word_indices: link_key = (si, ti) if link_key not in seen_links: seen_links.add(link_key) tok_display = tokens[ti]["label"] if ti < len(tokens) else tok_key links.append({"seg": si, "tok": ti, "value": ci["value"], "seg_label": ci["seg"], "tok_label": tok_display}) # Pass bounding box data to JS for arrow origin computation bboxes_json = json.dumps([ segment_bboxes[i] if (has_real_bboxes and i < len(segment_bboxes)) else None for i in range(n_segs) ]) links_json = json.dumps(links) # Build regions lookup keyed by actual segment index (not sequential) regions_by_idx = {} for r in regions: regions_by_idx[r["index"]] = {"label": r["label"], "value": r["value"]} regions_json = json.dumps(regions_by_idx) # JS for interactivity js_code = f""" (function() {{ const VIEW_ID = '{view_id}'; const root = document.getElementById(VIEW_ID); if (!root) return; // --- Staleness guard --- // Mark this root with the current view ID so stale closures can detect // they belong to a replaced component and bail out. root.setAttribute('data-bm-active', VIEW_ID); function isStale() {{ // If root was removed from DOM or replaced by a new render, bail out if (!document.contains(root)) return true; return root.getAttribute('data-bm-active') !== VIEW_ID; }} const links = {links_json}; const bboxes = {bboxes_json}; const regions = {regions_json}; const nSegs = {n_segs}; const METHOD_IS_INFLUENCE = {str(is_influence).lower()}; const INFLUENCE_ARROW_COLOR = 'rgba(52,102,177,0.75)'; const regionEls = root.querySelectorAll('.bm-region-box'); const tokenEls = root.querySelectorAll('.bm-token'); const svg = root.querySelector('.bm-svg'); const linkList = root.querySelector('.bm-link-list'); const imgWrap = root.querySelector('.bm-img-wrap'); const mainImg = root.querySelector('.bm-main-img'); // --- Highlight canvas: draws real segment contours on click --- let hlCanvas = root.querySelector('.bm-highlight-canvas'); let hlCtx = hlCanvas ? hlCanvas.getContext('2d') : null; function syncHighlightCanvas() {{ if (!hlCanvas || !mainImg) return; hlCanvas.width = mainImg.naturalWidth || mainImg.width; hlCanvas.height = mainImg.naturalHeight || mainImg.height; }} if (mainImg) {{ if (mainImg.complete) syncHighlightCanvas(); else mainImg.addEventListener('load', syncHighlightCanvas); }} // --- Canvas-based label map for pixel-perfect click detection --- let labelCanvas = null; let labelCtx = null; let labelMapReady = false; const labelMapUrl = '{label_map_url}'; if (labelMapUrl) {{ labelCanvas = document.createElement('canvas'); labelCtx = labelCanvas.getContext('2d', {{willReadFrequently: true}}); const lmImg = new window.Image(); lmImg.onload = function() {{ if (isStale()) return; // Don't populate canvas if replaced labelCanvas.width = lmImg.naturalWidth; labelCanvas.height = lmImg.naturalHeight; labelCtx.drawImage(lmImg, 0, 0); labelMapReady = true; buildSegMasks(); }}; lmImg.src = labelMapUrl; }} // ── Hover-dim state ───────────────────────────────────────────── // Per-segment boolean masks (Uint8Array, 1=inside seg, 0=outside), // indexed by the label-map's flat coords so we don't re-read // getImageData on every mousemove. let segMasks = new Array(nSegs).fill(null); let hoveredSeg = -1; let clickedSeg = -1; let hoverRafPending = false; const HOVER_SUPPORTED = window.matchMedia && window.matchMedia('(hover: hover)').matches; function buildSegMasks() {{ if (!labelMapReady || !labelCanvas || !labelCtx) return; const w = labelCanvas.width, h = labelCanvas.height; const data = labelCtx.getImageData(0, 0, w, h).data; for (let i = 0; i < nSegs; i++) segMasks[i] = new Uint8Array(w * h); for (let p = 0, i = 0; p < data.length; p += 4, i++) {{ const seg = data[p] - 1; if (seg >= 0 && seg < nSegs) segMasks[seg][i] = 1; }} }} function drawHoverDim(segIdx) {{ // Dim the entire image except the pixels belonging to `segIdx`. if (isStale() || !hlCtx || !hlCanvas || !labelCanvas) return; syncHighlightCanvas(); hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); if (segIdx < 0 || !segMasks[segIdx]) return; const cw = hlCanvas.width, ch = hlCanvas.height; const lw = labelCanvas.width, lh = labelCanvas.height; const mask = segMasks[segIdx]; const imgData = hlCtx.createImageData(cw, ch); const px = imgData.data; const dimAlpha = 140; // ≈55% black // First: alpha=dim everywhere (rgb=0 by default) for (let i = 3; i < px.length; i += 4) px[i] = dimAlpha; // Second: punch through (alpha=0) wherever the hovered seg lives const sx = lw / cw, sy = lh / ch; for (let y = 0; y < ch; y++) {{ const ly = Math.floor(y * sy) * lw; const rowBase = y * cw * 4 + 3; for (let x = 0; x < cw; x++) {{ if (mask[ly + Math.floor(x * sx)]) px[rowBase + x * 4] = 0; }} }} hlCtx.putImageData(imgData, 0, 0); }} function getSegAtPixel(clientX, clientY) {{ if (isStale() || !labelMapReady || !labelCanvas || !labelCtx || !mainImg) return -1; const rect = mainImg.getBoundingClientRect(); const rx = (clientX - rect.left) / rect.width; const ry = (clientY - rect.top) / rect.height; if (rx < 0 || rx > 1 || ry < 0 || ry > 1) return -1; const px = Math.floor(rx * labelCanvas.width); const py = Math.floor(ry * labelCanvas.height); const data = labelCtx.getImageData(px, py, 1, 1).data; const segIdx = data[0] - 1; // red channel: index+1, 0=bg return (segIdx >= 0 && segIdx < nSegs) ? segIdx : -1; }} function clearAll() {{ regionEls.forEach(el => el.classList.remove('bm-active', 'bm-linked')); tokenEls.forEach(el => el.classList.remove('bm-active', 'bm-linked')); // Clear segment labels highlight root.querySelectorAll('.bm-seg-label').forEach(el => el.classList.remove('bm-seg-active')); if (svg) svg.innerHTML = ''; if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); if (linkList) linkList.innerHTML = '

Click a segment or word to see connections.

'; clickedSeg = -1; }} function drawSegmentContour(segIdx) {{ // Draw the real segment contour on the highlight canvas using the label map if (isStale() || !labelMapReady || !hlCtx || !hlCanvas || !labelCtx || !labelCanvas) return; syncHighlightCanvas(); hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); var w = labelCanvas.width, h = labelCanvas.height; var imgData = labelCtx.getImageData(0, 0, w, h); var px = imgData.data; var target = segIdx + 1; // label map red channel = index + 1 // Find edge pixels: pixels that belong to this segment where at // least one 4-connected neighbor does NOT belong to this segment hlCtx.fillStyle = 'rgba(0, 229, 255, 0.9)'; // cyan highlight var scaleX = hlCanvas.width / w; var scaleY = hlCanvas.height / h; for (var y = 0; y < h; y++) {{ for (var x = 0; x < w; x++) {{ var idx = (y * w + x) * 4; if (px[idx] !== target) continue; // Check 4-connected neighbors var isEdge = false; if (x === 0 || px[idx - 4] !== target) isEdge = true; else if (x === w - 1 || px[idx + 4] !== target) isEdge = true; else if (y === 0 || px[idx - w * 4] !== target) isEdge = true; else if (y === h - 1 || px[idx + w * 4] !== target) isEdge = true; if (isEdge) {{ hlCtx.fillRect(Math.round(x * scaleX), Math.round(y * scaleY), Math.max(2, Math.round(scaleX) + 1), Math.max(2, Math.round(scaleY) + 1)); }} }} }} }} function getSegCenter(segIdx) {{ // Use real bbox center if available const bb = bboxes[segIdx]; if (bb) return {{ xPct: bb.cx_pct, yPct: bb.cy_pct }}; // Fallback: center of horizontal strip const stripH = 100.0 / nSegs; return {{ xPct: 50, yPct: segIdx * stripH + stripH / 2 }}; }} function drawLine(segIdx, tokIdx, value) {{ if (!svg || !imgWrap) return; const rootRect = root.getBoundingClientRect(); const imgRect = imgWrap.getBoundingClientRect(); const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]'); if (!tokEl) return; const tokRect = tokEl.getBoundingClientRect(); // Arrow start: segment center in image coordinates const center = getSegCenter(segIdx); const x1 = imgRect.left + (center.xPct / 100) * imgRect.width - rootRect.left; const y1 = imgRect.top + (center.yPct / 100) * imgRect.height - rootRect.top; const x2 = tokRect.left + tokRect.width / 2 - rootRect.left; const y2 = tokRect.top - rootRect.top; const color = METHOD_IS_INFLUENCE ? INFLUENCE_ARROW_COLOR : (value >= 0 ? 'rgba(1,109,1,0.7)' : 'rgba(221,19,19,0.7)'); const line = document.createElementNS('http://www.w3.org/2000/svg', 'line'); line.setAttribute('x1', x1); line.setAttribute('y1', y1); line.setAttribute('x2', x2); line.setAttribute('y2', y2); line.setAttribute('stroke', color); line.setAttribute('stroke-width', '3'); line.setAttribute('stroke-linecap', 'round'); line.setAttribute('opacity', '0.7'); svg.appendChild(line); }} function onClickRegion(segIdx) {{ if (isStale()) return; clearAll(); clickedSeg = segIdx; // Highlight the rectangular div (fallback mode) or segment label const segEl = root.querySelector('.bm-region-box[data-idx="' + segIdx + '"]'); if (segEl) segEl.classList.add('bm-active'); const segLabel = root.querySelector('.bm-seg-label[data-idx="' + segIdx + '"]'); if (segLabel) segLabel.classList.add('bm-seg-active'); // Draw pixel-accurate contour on highlight canvas drawSegmentContour(segIdx); const related = links.filter(l => l.seg === segIdx) .sort((a, b) => Math.abs(b.value) - Math.abs(a.value)); related.forEach(l => {{ const tokEl = root.querySelector('.bm-token[data-idx="' + l.tok + '"]'); if (tokEl) tokEl.classList.add('bm-linked'); drawLine(l.seg, l.tok, l.value); }}); if (linkList) {{ var regInfo = regions[segIdx] || {{}}; var regName = regInfo.label || ('seg_' + segIdx); var regVal = regInfo.value !== undefined ? (METHOD_IS_INFLUENCE ? regInfo.value.toFixed(3) : (regInfo.value >= 0 ? '+' : '') + regInfo.value.toFixed(3)) : ''; var header = '

' + regName + ' ' + regVal + '

'; if (related.length === 0) {{ linkList.innerHTML = header + '

No cross-modal links for this region.

'; }} else {{ linkList.innerHTML = header + related.map(l => {{ var color = METHOD_IS_INFLUENCE ? '#34569F' : (l.value >= 0 ? '#016d01' : '#c82828'); var formatted = METHOD_IS_INFLUENCE ? l.value.toFixed(3) : (l.value >= 0 ? '+' : '') + l.value.toFixed(3); return '

' + '' + l.tok_label + '' + '' + formatted + '

'; }}).join(''); }} }} }} function onClickToken(tokIdx) {{ if (isStale()) return; clearAll(); const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]'); if (tokEl) tokEl.classList.add('bm-active'); const related = links.filter(l => l.tok === tokIdx) .sort((a, b) => Math.abs(b.value) - Math.abs(a.value)); related.forEach(l => {{ const segEl = root.querySelector('.bm-region-box[data-idx="' + l.seg + '"]'); if (segEl) segEl.classList.add('bm-linked'); const segLabel = root.querySelector('.bm-seg-label[data-idx="' + l.seg + '"]'); if (segLabel) segLabel.classList.add('bm-seg-active'); drawLine(l.seg, l.tok, l.value); }}); // Draw contours for all linked segments if (related.length > 0) drawSegmentContour(related[0].seg); if (linkList) {{ var tokName = tokIdx < {len(tokens)} ? {json.dumps([t["label"] for t in tokens])}[tokIdx] : 'word ' + tokIdx; var header = '

"' + tokName + '"

'; if (related.length === 0) {{ linkList.innerHTML = header + '

No cross-modal links for this word.

' + '' + l.seg_label + '' + '' + formatted + '

'; }}).join(''); }} }} }} // Click on the image: use label map canvas for pixel-perfect detection if (imgWrap) {{ imgWrap.addEventListener('click', function(e) {{ if (isStale()) return; const segIdx = getSegAtPixel(e.clientX, e.clientY); if (segIdx >= 0) {{ onClickRegion(segIdx); }} }}); // Hover-dim: highlight the segment under the cursor by dimming the rest. // Skipped on touch devices where there's no real hover. if (HOVER_SUPPORTED) {{ imgWrap.addEventListener('mousemove', function(e) {{ if (isStale() || hoverRafPending) return; hoverRafPending = true; requestAnimationFrame(function() {{ hoverRafPending = false; const seg = getSegAtPixel(e.clientX, e.clientY); if (seg === hoveredSeg) return; hoveredSeg = seg; drawHoverDim(seg); }}); }}); imgWrap.addEventListener('mouseleave', function() {{ if (isStale()) return; hoveredSeg = -1; // Restore the click-highlight if any, else clear the canvas. if (clickedSeg >= 0) drawSegmentContour(clickedSeg); else if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height); }}); }} }} // Also allow clicking the bbox overlay divs directly regionEls.forEach(el => {{ el.addEventListener('click', function(e) {{ if (isStale()) return; e.stopPropagation(); // If label map is available, prefer pixel detection if (labelMapReady && labelCanvas && labelCtx) {{ const segIdx = getSegAtPixel(e.clientX, e.clientY); if (segIdx >= 0) {{ onClickRegion(segIdx); return; }} }} onClickRegion(parseInt(el.dataset.idx)); }}); }}); tokenEls.forEach(el => {{ el.addEventListener('click', () => onClickToken(parseInt(el.dataset.idx))); }}); const resetBtn = root.querySelector('.bm-reset'); if (resetBtn) resetBtn.addEventListener('click', clearAll); }})(); """ script_id = f"{view_id}-script" loader_id = f"{view_id}-loader" if is_influence: legend_html = """

Influence strength (always positive — higher = more important)

Neutral / not scored

Line thickness = cross-modal interaction strength

""" else: method_display = method_label or "Shapley" legend_html = f"""

Positive {escape(method_display)} (contributes to matching)

Negative {escape(method_display)} (detracts from matching)

Neutral / not scored

Green line = positive cross-modal interaction

Red line = negative cross-modal interaction

""" return f"""

{escape(title)}

Image Regions (click a region to see linked words)

{region_overlays_html}

Caption Words (click a word to see linked regions)

{tokens_html}

Cross-Modal Links

Click a segment or word to see connections.

Legend ({escape(method_label)}): {legend_html}

"""