AttrLLM / visualization /plotting /benchmark_interaction.py
Stephentao-30
Interaction view: display overlay.png instead of segmap.png to avoid burned-in patch labels
77657f1
"""
benchmark_interaction.py — Interactive cross-modal visualization for the benchmark tab.
Creates a self-contained HTML component that shows:
- Image with real UnSAM segment outlines (clickable via hidden canvas label map)
- Caption tokens below the image (clickable, colored by Shapley value)
- Arrows/lines connecting image regions to their most interacting tokens
- Click a region → highlights linked tokens; click a token → highlights linked regions
"""
from __future__ import annotations
import json
import uuid
from html import escape
from typing import Any, Dict, List, Optional, Sequence, Tuple
def _value_to_color(value: float, max_abs: float, single_color: bool = False) -> str:
if max_abs <= 0:
return "rgb(225, 225, 223)"
norm = min(1.0, abs(value) / max_abs)
if single_color:
base = (225, 225, 223)
target = (52, 102, 177) # influence neutral blue
elif value >= 0:
base = (225, 225, 223)
target = (1, 109, 1)
else:
base = (225, 225, 223)
target = (221, 19, 19)
r = int(round(base[0] + (target[0] - base[0]) * norm))
g = int(round(base[1] + (target[1] - base[1]) * norm))
b = int(round(base[2] + (target[2] - base[2]) * norm))
return f"rgb({r}, {g}, {b})"
def _value_to_rgba(value: float, max_abs: float, alpha: float = 0.5) -> str:
if max_abs <= 0:
return "rgba(200, 200, 200, 0.1)"
norm = min(1.0, abs(value) / max_abs)
if value >= 0:
return f"rgba(1, 109, 1, {alpha * norm:.2f})"
else:
return f"rgba(221, 19, 19, {alpha * norm:.2f})"
def create_benchmark_interaction_html(
image_b64: str,
clip_summary: Optional[Dict[str, Any]],
vllm_logprob: Optional[Dict[str, Any]],
caption: str = "",
all_cross_modal_pairs: Optional[List[Dict[str, Any]]] = None,
segmap_b64: str = "",
overlay_b64: str = "",
segment_bboxes: Optional[List[Optional[Dict[str, float]]]] = None,
label_map_b64: str = "",
image_width: int = 0,
image_height: int = 0,
title: str = "Cross-Modal Interaction View",
method_label: str = "Shapley",
) -> str:
"""
Build a self-contained HTML + JS component for the benchmark tab.
Shows:
- Image panel (left) with real UnSAM segment outlines and clickable regions
- Token panel (right) with clickable colored tokens
- SVG arrow layer connecting regions to tokens on click
- Cross-modal interaction list
When segment_bboxes and label_map_b64 are provided, uses canvas-based
pixel-level click detection for accurate segment selection matching
the real UnSAM segmentation. Falls back to horizontal strips otherwise.
"""
if not image_b64 and not clip_summary:
return "<div style='padding:20px;color:#888;'>No data available. Select an example.</div>"
view_id = f"bm-{uuid.uuid4().hex[:8]}"
is_influence = (method_label or "").lower() == "influence"
# Prepare image URL
img_url = image_b64
if img_url and not img_url.startswith("data:"):
img_url = f"data:image/png;base64,{img_url}"
# Extract CLIP data
regions = [] # {index, label, value}
tokens = [] # {index, label, value}
cross_interactions = [] # {seg, tok, value}
n_segs = 0
if clip_summary:
raw_items = clip_summary.get("image_region_values", [])
total_regions = len(raw_items)
grid_guess = int(round(total_regions ** 0.5))
looks_like_patch_grid = (grid_guess * grid_guess == total_regions) and all(
str(it.get("label", "")).startswith("patch_")
or str(it.get("label", "")).isdigit()
for it in raw_items
)
for item in raw_items:
# Resolve the segment number:
# seg_6 → 6 (UnSAM)
# patch_1_2 → 1*grid+2 (raw patch-grid; assumes grid=sqrt(n))
# "7" → 7 - 1 = 6 (post-rename patch-grid)
raw_label = str(item["label"])
seg_num = n_segs # fallback: sequential
if raw_label.startswith("seg_"):
try:
seg_num = int(raw_label.split("_", 1)[1])
except (ValueError, IndexError):
pass
elif raw_label.startswith("patch_"):
try:
_, r_str, c_str = raw_label.split("_", 2)
seg_num = int(r_str) * grid_guess + int(c_str)
except (ValueError, IndexError):
pass
elif raw_label.isdigit():
try:
seg_num = int(raw_label) - 1
except ValueError:
pass
# Display label: in patch-grid mode always show "1".."N" in reading
# order so the overlay doesn't leak raw "patch_r_c" text.
display_label = str(seg_num + 1) if looks_like_patch_grid else raw_label
regions.append({
"index": seg_num,
"label": display_label,
"value": item["value"],
"type": "segment",
})
n_segs += 1
# Build a lookup from CLIP token labels to values.
# Also build a ##-stripped version for subword matching.
clip_tok_values: Dict[str, float] = {}
for item in clip_summary.get("token_values", []):
tok_label = item["label"].replace("tok:", "")
clip_tok_values[tok_label] = item["value"]
# Collect tokens with ## stripped for substring matching
clip_tok_set = set(clip_tok_values.keys())
# Include ALL words from the full caption, not just CLIP's top-k
if caption:
words = caption.replace(".", " .").replace(",", " ,").replace("(", " (").replace(")", " )").split()
for i, word in enumerate(words):
value = clip_tok_values.get(word, 0.0)
matched_tok = word if value != 0.0 else None
if value == 0.0:
value = clip_tok_values.get(word.lower(), 0.0)
matched_tok = word.lower() if value != 0.0 else None
if value == 0.0:
# Sum all matching subword tokens (strip ## before matching)
total = 0.0
for tok in clip_tok_set:
tok_clean = tok.lstrip("#")
if len(tok_clean) >= 3 and tok_clean.lower() in word.lower():
total += clip_tok_values[tok]
matched_tok = tok
value = total
tokens.append({
"index": i,
"label": word,
"value": value,
"_matched_tok": matched_tok,
})
else:
for i, item in enumerate(clip_summary.get("token_values", [])):
tok_label = item["label"].replace("tok:", "").lstrip("#")
tokens.append({
"index": i,
"label": tok_label,
"value": item["value"],
"_matched_tok": tok_label,
})
# Use ALL cross-modal pairs if provided, else fall back to top-5.
# Map subword token labels to whole caption words.
from .medical_charts import _tok_to_word
cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", [])
def _seg_display(seg_raw: str) -> str:
# Normalize cross-pair segment labels the same way we normalized
# region labels above — otherwise arrows can't match regions.
s = str(seg_raw)
if looks_like_patch_grid and s.startswith("patch_"):
try:
_, rr, cc = s.split("_", 2)
return str(int(rr) * grid_guess + int(cc) + 1)
except (ValueError, IndexError):
return s
return s
for item in cross_source:
cross_interactions.append({
"seg": _seg_display(item["pair"][0]),
"tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"),
"value": item["value"],
})
# Determine if we have real segment bounding boxes
has_real_bboxes = (
segment_bboxes is not None
and len(segment_bboxes) == n_segs
and any(b is not None for b in segment_bboxes)
)
has_label_map = bool(label_map_b64)
# Build region overlays: real bboxes if available, else horizontal strips
# When a pixel-accurate label map is available, skip the rectangular div
# overlays entirely — the segmap image already shows real segment contours.
# We still render small labels at segment centers for identification.
region_overlays_html = ""
max_abs_r = max((abs(r["value"]) for r in regions), default=1.0) or 1.0
if has_label_map and has_real_bboxes and n_segs > 0:
# Label-map mode: no rectangular divs, just center labels.
# When segments look like a patch grid (perfect-square count and numeric
# or patch-style labels), show reading-order numbers 1..N so the overlay
# reads left-to-right top-to-bottom without the raw "patch_r_c" noise.
grid = int(round(n_segs ** 0.5))
is_patch_grid = (grid * grid == n_segs) and all(
str(r["label"]).isdigit() or str(r["label"]).startswith("patch_")
for r in regions
)
for r in regions:
r_idx = r["index"]
bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None
if bbox is None:
continue
r_label = escape(str(r_idx + 1) if is_patch_grid else r["label"])
r_value = r["value"]
region_overlays_html += (
f"<span class='bm-seg-label' data-idx='{r_idx}' "
f"style='left:{bbox['cx_pct']:.1f}%;top:{bbox['cy_pct']:.1f}%;'>"
f"{r_label}</span>"
)
elif has_real_bboxes and n_segs > 0:
# No label map — use rectangular bounding box divs as fallback
for r in regions:
r_idx = r["index"]
bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None
if bbox is None:
continue
r_label = escape(r["label"])
r_value = r["value"]
if is_influence:
norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0
bg = f"rgba(52,102,177,{0.25*norm:.2f})"
border_color = "rgba(52,102,177,0.8)"
val_str = f"{r_value:.2f}"
title_str = f"{r_label}: {r_value:.3f}"
else:
bg = _value_to_rgba(r_value, max_abs_r, 0.25)
border_color = "rgba(1,109,1,0.8)" if r_value >= 0 else "rgba(200,40,40,0.8)"
val_str = f"{r_value:+.2f}"
title_str = f"{r_label}: {r_value:+.3f}"
region_overlays_html += (
f"<div class='bm-region-box' data-idx='{r_idx}' "
f"title='{title_str}' "
f"style='left:{bbox['x0_pct']:.2f}%;top:{bbox['y0_pct']:.2f}%;"
f"width:{bbox['w_pct']:.2f}%;height:{bbox['h_pct']:.2f}%;"
f"background:{bg};border:2px solid {border_color};'>"
f"<span class='bm-box-label'>{r_label} ({val_str})</span>"
f"</div>"
)
elif n_segs > 0:
# Fallback: horizontal strips
strip_h = 100.0 / n_segs
for r in regions:
r_idx = r["index"]
r_label = escape(r["label"])
r_value = r["value"]
top_pct = r_idx * strip_h
if is_influence:
norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0
bg = f"rgba(52,102,177,{0.3*norm:.2f})"
border_color = "rgba(52,102,177,0.6)"
val_str = f"{r_value:.2f}"
title_str = f"{r_label}: {r_value:.3f}"
else:
bg = _value_to_rgba(r_value, max_abs_r, 0.3)
border_color = "rgba(1,109,1,0.6)" if r_value >= 0 else "rgba(200,40,40,0.6)"
val_str = f"{r_value:+.2f}"
title_str = f"{r_label}: {r_value:+.3f}"
region_overlays_html += (
f"<div class='bm-region-box' data-idx='{r_idx}' "
f"title='{title_str}' "
f"style='top:{top_pct:.1f}%;height:{strip_h:.1f}%;left:0;width:100%;"
f"background:{bg};border-bottom:2px solid {border_color};'>"
f"<span class='bm-box-label'>{r_label} ({val_str})</span>"
f"</div>"
)
# Prefer overlay.png over segmap.png — the segmap has patch labels
# ("patch_0_0" etc.) burned into the image pixels during precompute,
# which visually dominate the clean 1..16 labels we render in HTML.
# overlay.png has no baked text, so only our HTML spans show through.
display_img_url = img_url
if overlay_b64:
ov_url = overlay_b64
if ov_url and not ov_url.startswith("data:"):
ov_url = f"data:image/png;base64,{ov_url}"
display_img_url = ov_url
elif segmap_b64:
seg_url = segmap_b64
if seg_url and not seg_url.startswith("data:"):
seg_url = f"data:image/png;base64,{seg_url}"
display_img_url = seg_url
# Label map data URL for canvas click detection
label_map_url = ""
if has_label_map:
label_map_url = f"data:image/png;base64,{label_map_b64}"
# Build token chips
max_abs_t = max((abs(t["value"]) for t in tokens), default=1.0) or 1.0
tokens_html = ""
for t in tokens:
color = _value_to_color(t["value"], max_abs_t, single_color=is_influence)
t_idx = t["index"]
t_label = escape(t["label"])
t_value = t["value"]
t_title = (f"{t_label}: {t_value:.4f}" if is_influence
else f"{t_label}: {t_value:+.4f}")
tokens_html += (
f"<span class='bm-token' data-idx='{t_idx}' "
f"title='{t_title}' "
f"style='background:{color};'>"
f"{t_label}"
f"</span>"
)
# Build cross-modal link data for JS
seg_idx_map = {r["label"]: r["index"] for r in regions}
clip_tok_to_word_indices: Dict[str, List[int]] = {}
for t in tokens:
matched = t.get("_matched_tok")
if matched:
clip_tok_to_word_indices.setdefault(matched, []).append(t["index"])
clip_tok_to_word_indices.setdefault(matched.lower(), []).append(t["index"])
links = []
seen_links: set = set()
for ci in cross_interactions:
si = seg_idx_map.get(ci["seg"])
if si is None:
continue
tok_key = ci["tok"]
word_indices = (clip_tok_to_word_indices.get(tok_key) or
clip_tok_to_word_indices.get(tok_key.lower()) or [])
for ti in word_indices:
link_key = (si, ti)
if link_key not in seen_links:
seen_links.add(link_key)
tok_display = tokens[ti]["label"] if ti < len(tokens) else tok_key
links.append({"seg": si, "tok": ti, "value": ci["value"],
"seg_label": ci["seg"], "tok_label": tok_display})
# Pass bounding box data to JS for arrow origin computation
bboxes_json = json.dumps([
segment_bboxes[i] if (has_real_bboxes and i < len(segment_bboxes)) else None
for i in range(n_segs)
])
links_json = json.dumps(links)
# Build regions lookup keyed by actual segment index (not sequential)
regions_by_idx = {}
for r in regions:
regions_by_idx[r["index"]] = {"label": r["label"], "value": r["value"]}
regions_json = json.dumps(regions_by_idx)
# JS for interactivity
js_code = f"""
(function() {{
const VIEW_ID = '{view_id}';
const root = document.getElementById(VIEW_ID);
if (!root) return;
// --- Staleness guard ---
// Mark this root with the current view ID so stale closures can detect
// they belong to a replaced component and bail out.
root.setAttribute('data-bm-active', VIEW_ID);
function isStale() {{
// If root was removed from DOM or replaced by a new render, bail out
if (!document.contains(root)) return true;
return root.getAttribute('data-bm-active') !== VIEW_ID;
}}
const links = {links_json};
const bboxes = {bboxes_json};
const regions = {regions_json};
const nSegs = {n_segs};
const METHOD_IS_INFLUENCE = {str(is_influence).lower()};
const INFLUENCE_ARROW_COLOR = 'rgba(52,102,177,0.75)';
const regionEls = root.querySelectorAll('.bm-region-box');
const tokenEls = root.querySelectorAll('.bm-token');
const svg = root.querySelector('.bm-svg');
const linkList = root.querySelector('.bm-link-list');
const imgWrap = root.querySelector('.bm-img-wrap');
const mainImg = root.querySelector('.bm-main-img');
// --- Highlight canvas: draws real segment contours on click ---
let hlCanvas = root.querySelector('.bm-highlight-canvas');
let hlCtx = hlCanvas ? hlCanvas.getContext('2d') : null;
function syncHighlightCanvas() {{
if (!hlCanvas || !mainImg) return;
hlCanvas.width = mainImg.naturalWidth || mainImg.width;
hlCanvas.height = mainImg.naturalHeight || mainImg.height;
}}
if (mainImg) {{
if (mainImg.complete) syncHighlightCanvas();
else mainImg.addEventListener('load', syncHighlightCanvas);
}}
// --- Canvas-based label map for pixel-perfect click detection ---
let labelCanvas = null;
let labelCtx = null;
let labelMapReady = false;
const labelMapUrl = '{label_map_url}';
if (labelMapUrl) {{
labelCanvas = document.createElement('canvas');
labelCtx = labelCanvas.getContext('2d', {{willReadFrequently: true}});
const lmImg = new window.Image();
lmImg.onload = function() {{
if (isStale()) return; // Don't populate canvas if replaced
labelCanvas.width = lmImg.naturalWidth;
labelCanvas.height = lmImg.naturalHeight;
labelCtx.drawImage(lmImg, 0, 0);
labelMapReady = true;
buildSegMasks();
}};
lmImg.src = labelMapUrl;
}}
// ── Hover-dim state ─────────────────────────────────────────────
// Per-segment boolean masks (Uint8Array, 1=inside seg, 0=outside),
// indexed by the label-map's flat coords so we don't re-read
// getImageData on every mousemove.
let segMasks = new Array(nSegs).fill(null);
let hoveredSeg = -1;
let clickedSeg = -1;
let hoverRafPending = false;
const HOVER_SUPPORTED = window.matchMedia && window.matchMedia('(hover: hover)').matches;
function buildSegMasks() {{
if (!labelMapReady || !labelCanvas || !labelCtx) return;
const w = labelCanvas.width, h = labelCanvas.height;
const data = labelCtx.getImageData(0, 0, w, h).data;
for (let i = 0; i < nSegs; i++) segMasks[i] = new Uint8Array(w * h);
for (let p = 0, i = 0; p < data.length; p += 4, i++) {{
const seg = data[p] - 1;
if (seg >= 0 && seg < nSegs) segMasks[seg][i] = 1;
}}
}}
function drawHoverDim(segIdx) {{
// Dim the entire image except the pixels belonging to `segIdx`.
if (isStale() || !hlCtx || !hlCanvas || !labelCanvas) return;
syncHighlightCanvas();
hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
if (segIdx < 0 || !segMasks[segIdx]) return;
const cw = hlCanvas.width, ch = hlCanvas.height;
const lw = labelCanvas.width, lh = labelCanvas.height;
const mask = segMasks[segIdx];
const imgData = hlCtx.createImageData(cw, ch);
const px = imgData.data;
const dimAlpha = 140; // ≈55% black
// First: alpha=dim everywhere (rgb=0 by default)
for (let i = 3; i < px.length; i += 4) px[i] = dimAlpha;
// Second: punch through (alpha=0) wherever the hovered seg lives
const sx = lw / cw, sy = lh / ch;
for (let y = 0; y < ch; y++) {{
const ly = Math.floor(y * sy) * lw;
const rowBase = y * cw * 4 + 3;
for (let x = 0; x < cw; x++) {{
if (mask[ly + Math.floor(x * sx)]) px[rowBase + x * 4] = 0;
}}
}}
hlCtx.putImageData(imgData, 0, 0);
}}
function getSegAtPixel(clientX, clientY) {{
if (isStale() || !labelMapReady || !labelCanvas || !labelCtx || !mainImg) return -1;
const rect = mainImg.getBoundingClientRect();
const rx = (clientX - rect.left) / rect.width;
const ry = (clientY - rect.top) / rect.height;
if (rx < 0 || rx > 1 || ry < 0 || ry > 1) return -1;
const px = Math.floor(rx * labelCanvas.width);
const py = Math.floor(ry * labelCanvas.height);
const data = labelCtx.getImageData(px, py, 1, 1).data;
const segIdx = data[0] - 1; // red channel: index+1, 0=bg
return (segIdx >= 0 && segIdx < nSegs) ? segIdx : -1;
}}
function clearAll() {{
regionEls.forEach(el => el.classList.remove('bm-active', 'bm-linked'));
tokenEls.forEach(el => el.classList.remove('bm-active', 'bm-linked'));
// Clear segment labels highlight
root.querySelectorAll('.bm-seg-label').forEach(el => el.classList.remove('bm-seg-active'));
if (svg) svg.innerHTML = '';
if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
if (linkList) linkList.innerHTML = '<div class="bm-hint">Click a segment or word to see connections.</div>';
clickedSeg = -1;
}}
function drawSegmentContour(segIdx) {{
// Draw the real segment contour on the highlight canvas using the label map
if (isStale() || !labelMapReady || !hlCtx || !hlCanvas || !labelCtx || !labelCanvas) return;
syncHighlightCanvas();
hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
var w = labelCanvas.width, h = labelCanvas.height;
var imgData = labelCtx.getImageData(0, 0, w, h);
var px = imgData.data;
var target = segIdx + 1; // label map red channel = index + 1
// Find edge pixels: pixels that belong to this segment where at
// least one 4-connected neighbor does NOT belong to this segment
hlCtx.fillStyle = 'rgba(0, 229, 255, 0.9)'; // cyan highlight
var scaleX = hlCanvas.width / w;
var scaleY = hlCanvas.height / h;
for (var y = 0; y < h; y++) {{
for (var x = 0; x < w; x++) {{
var idx = (y * w + x) * 4;
if (px[idx] !== target) continue;
// Check 4-connected neighbors
var isEdge = false;
if (x === 0 || px[idx - 4] !== target) isEdge = true;
else if (x === w - 1 || px[idx + 4] !== target) isEdge = true;
else if (y === 0 || px[idx - w * 4] !== target) isEdge = true;
else if (y === h - 1 || px[idx + w * 4] !== target) isEdge = true;
if (isEdge) {{
hlCtx.fillRect(Math.round(x * scaleX), Math.round(y * scaleY),
Math.max(2, Math.round(scaleX) + 1),
Math.max(2, Math.round(scaleY) + 1));
}}
}}
}}
}}
function getSegCenter(segIdx) {{
// Use real bbox center if available
const bb = bboxes[segIdx];
if (bb) return {{ xPct: bb.cx_pct, yPct: bb.cy_pct }};
// Fallback: center of horizontal strip
const stripH = 100.0 / nSegs;
return {{ xPct: 50, yPct: segIdx * stripH + stripH / 2 }};
}}
function drawLine(segIdx, tokIdx, value) {{
if (!svg || !imgWrap) return;
const rootRect = root.getBoundingClientRect();
const imgRect = imgWrap.getBoundingClientRect();
const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]');
if (!tokEl) return;
const tokRect = tokEl.getBoundingClientRect();
// Arrow start: segment center in image coordinates
const center = getSegCenter(segIdx);
const x1 = imgRect.left + (center.xPct / 100) * imgRect.width - rootRect.left;
const y1 = imgRect.top + (center.yPct / 100) * imgRect.height - rootRect.top;
const x2 = tokRect.left + tokRect.width / 2 - rootRect.left;
const y2 = tokRect.top - rootRect.top;
const color = METHOD_IS_INFLUENCE
? INFLUENCE_ARROW_COLOR
: (value >= 0 ? 'rgba(1,109,1,0.7)' : 'rgba(221,19,19,0.7)');
const line = document.createElementNS('http://www.w3.org/2000/svg', 'line');
line.setAttribute('x1', x1);
line.setAttribute('y1', y1);
line.setAttribute('x2', x2);
line.setAttribute('y2', y2);
line.setAttribute('stroke', color);
line.setAttribute('stroke-width', '3');
line.setAttribute('stroke-linecap', 'round');
line.setAttribute('opacity', '0.7');
svg.appendChild(line);
}}
function onClickRegion(segIdx) {{
if (isStale()) return;
clearAll();
clickedSeg = segIdx;
// Highlight the rectangular div (fallback mode) or segment label
const segEl = root.querySelector('.bm-region-box[data-idx="' + segIdx + '"]');
if (segEl) segEl.classList.add('bm-active');
const segLabel = root.querySelector('.bm-seg-label[data-idx="' + segIdx + '"]');
if (segLabel) segLabel.classList.add('bm-seg-active');
// Draw pixel-accurate contour on highlight canvas
drawSegmentContour(segIdx);
const related = links.filter(l => l.seg === segIdx)
.sort((a, b) => Math.abs(b.value) - Math.abs(a.value));
related.forEach(l => {{
const tokEl = root.querySelector('.bm-token[data-idx="' + l.tok + '"]');
if (tokEl) tokEl.classList.add('bm-linked');
drawLine(l.seg, l.tok, l.value);
}});
if (linkList) {{
var regInfo = regions[segIdx] || {{}};
var regName = regInfo.label || ('seg_' + segIdx);
var regVal = regInfo.value !== undefined
? (METHOD_IS_INFLUENCE ? regInfo.value.toFixed(3) : (regInfo.value >= 0 ? '+' : '') + regInfo.value.toFixed(3))
: '';
var header = '<div class="bm-link-header"><strong>' + regName + '</strong> <span style="color:#888">' + regVal + '</span></div>';
if (related.length === 0) {{
linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this region.</div>';
}} else {{
linkList.innerHTML = header + related.map(l => {{
var color = METHOD_IS_INFLUENCE
? '#34569F'
: (l.value >= 0 ? '#016d01' : '#c82828');
var formatted = METHOD_IS_INFLUENCE
? l.value.toFixed(3)
: (l.value >= 0 ? '+' : '') + l.value.toFixed(3);
return '<div class="bm-link-row">' +
'<span>' + l.tok_label + '</span>' +
'<strong style="color:' + color + '">' +
formatted + '</strong></div>';
}}).join('');
}}
}}
}}
function onClickToken(tokIdx) {{
if (isStale()) return;
clearAll();
const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]');
if (tokEl) tokEl.classList.add('bm-active');
const related = links.filter(l => l.tok === tokIdx)
.sort((a, b) => Math.abs(b.value) - Math.abs(a.value));
related.forEach(l => {{
const segEl = root.querySelector('.bm-region-box[data-idx="' + l.seg + '"]');
if (segEl) segEl.classList.add('bm-linked');
const segLabel = root.querySelector('.bm-seg-label[data-idx="' + l.seg + '"]');
if (segLabel) segLabel.classList.add('bm-seg-active');
drawLine(l.seg, l.tok, l.value);
}});
// Draw contours for all linked segments
if (related.length > 0) drawSegmentContour(related[0].seg);
if (linkList) {{
var tokName = tokIdx < {len(tokens)} ? {json.dumps([t["label"] for t in tokens])}[tokIdx] : 'word ' + tokIdx;
var header = '<div class="bm-link-header"><strong>"' + tokName + '"</strong></div>';
if (related.length === 0) {{
linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this word.</div>';
}} else {{
linkList.innerHTML = header + related.map(l => {{
var color = METHOD_IS_INFLUENCE
? '#34569F'
: (l.value >= 0 ? '#016d01' : '#c82828');
var formatted = METHOD_IS_INFLUENCE
? l.value.toFixed(3)
: (l.value >= 0 ? '+' : '') + l.value.toFixed(3);
return '<div class="bm-link-row">' +
'<span>' + l.seg_label + '</span>' +
'<strong style="color:' + color + '">' +
formatted + '</strong></div>';
}}).join('');
}}
}}
}}
// Click on the image: use label map canvas for pixel-perfect detection
if (imgWrap) {{
imgWrap.addEventListener('click', function(e) {{
if (isStale()) return;
const segIdx = getSegAtPixel(e.clientX, e.clientY);
if (segIdx >= 0) {{
onClickRegion(segIdx);
}}
}});
// Hover-dim: highlight the segment under the cursor by dimming the rest.
// Skipped on touch devices where there's no real hover.
if (HOVER_SUPPORTED) {{
imgWrap.addEventListener('mousemove', function(e) {{
if (isStale() || hoverRafPending) return;
hoverRafPending = true;
requestAnimationFrame(function() {{
hoverRafPending = false;
const seg = getSegAtPixel(e.clientX, e.clientY);
if (seg === hoveredSeg) return;
hoveredSeg = seg;
drawHoverDim(seg);
}});
}});
imgWrap.addEventListener('mouseleave', function() {{
if (isStale()) return;
hoveredSeg = -1;
// Restore the click-highlight if any, else clear the canvas.
if (clickedSeg >= 0) drawSegmentContour(clickedSeg);
else if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
}});
}}
}}
// Also allow clicking the bbox overlay divs directly
regionEls.forEach(el => {{
el.addEventListener('click', function(e) {{
if (isStale()) return;
e.stopPropagation();
// If label map is available, prefer pixel detection
if (labelMapReady && labelCanvas && labelCtx) {{
const segIdx = getSegAtPixel(e.clientX, e.clientY);
if (segIdx >= 0) {{ onClickRegion(segIdx); return; }}
}}
onClickRegion(parseInt(el.dataset.idx));
}});
}});
tokenEls.forEach(el => {{
el.addEventListener('click', () => onClickToken(parseInt(el.dataset.idx)));
}});
const resetBtn = root.querySelector('.bm-reset');
if (resetBtn) resetBtn.addEventListener('click', clearAll);
}})();
"""
script_id = f"{view_id}-script"
loader_id = f"{view_id}-loader"
if is_influence:
legend_html = """
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(52,102,177,0.6);"></div>
<span>Influence strength (always positive — higher = more important)</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div>
<span>Neutral / not scored</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(52,102,177,0.7); width:30px; height:3px; border-radius:2px;"></div>
<span>Line thickness = cross-modal interaction strength</span>
</div>
"""
else:
method_display = method_label or "Shapley"
legend_html = f"""
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(1,109,1,0.6);"></div>
<span>Positive {escape(method_display)} (contributes to matching)</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(221,19,19,0.6);"></div>
<span>Negative {escape(method_display)} (detracts from matching)</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div>
<span>Neutral / not scored</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(1,109,1,0.7); width:30px; height:3px; border-radius:2px;"></div>
<span>Green line = positive cross-modal interaction</span>
</div>
<div class="bm-legend-item">
<div class="bm-legend-swatch" style="background:rgba(221,19,19,0.7); width:30px; height:3px; border-radius:2px;"></div>
<span>Red line = negative cross-modal interaction</span>
</div>
"""
return f"""
<style>
.bm-root {{
font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
background: #f7f5f2;
border: 1px solid #e3e3ec;
border-radius: 16px;
padding: 16px;
position: relative;
}}
.bm-title {{
font-size: 15px; font-weight: 700; color: #2d1f4a;
margin-bottom: 10px;
}}
.bm-layout {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 14px;
}}
.bm-img-panel {{
background: #fff; border: 1px solid #e3e3ec; border-radius: 14px;
padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06);
}}
.bm-img-wrap {{
position: relative; border-radius: 12px; overflow: hidden;
cursor: crosshair;
}}
.bm-img-wrap img.bm-main-img {{
display: block; width: 100%; height: auto;
}}
/* Region bounding box overlay (real UnSAM bbox or fallback strip) */
.bm-region-box {{
position: absolute;
cursor: pointer; transition: all 0.15s ease;
display: flex; align-items: flex-start; justify-content: flex-start;
box-sizing: border-box;
border-radius: 4px;
}}
.bm-region-box:hover {{
outline: 2px solid rgba(255,255,255,0.9);
z-index: 10;
}}
.bm-region-box.bm-active {{
outline: 3px solid #fff;
z-index: 20;
box-shadow: 0 0 0 2px #111, inset 0 0 0 2000px rgba(255,255,255,0.12);
}}
.bm-region-box.bm-linked {{
outline: 2px dashed #c82828;
z-index: 15;
}}
.bm-box-label {{
font-size: 11px; font-weight: 700; color: #fff;
background: rgba(0,0,0,0.7); padding: 2px 6px;
border-radius: 0 0 6px 0; pointer-events: none;
white-space: nowrap;
}}
.bm-right-panel {{
display: flex; flex-direction: column; gap: 10px;
}}
.bm-tok-section {{
background: #fff; border: 1px solid #e3e3ec; border-radius: 14px;
padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06);
}}
.bm-section-title {{
font-size: 13px; font-weight: 600; color: #555; margin-bottom: 8px;
}}
.bm-tok-grid {{
display: flex; flex-wrap: wrap; gap: 6px;
}}
.bm-token {{
display: inline-flex; align-items: center; justify-content: center;
padding: 5px 10px; border-radius: 10px; font-size: 13px; font-weight: 600;
color: #2a2140; border: 1px solid rgba(60,44,80,0.12);
cursor: pointer; transition: all 0.2s ease;
}}
.bm-token:hover {{ box-shadow: 0 4px 8px rgba(0,0,0,0.15); }}
.bm-token.bm-active {{
outline: 3px solid #111;
box-shadow: 0 8px 16px rgba(1,109,1,0.25);
}}
.bm-token.bm-linked {{
outline: 2px solid #016d01;
box-shadow: 0 4px 10px rgba(1,109,1,0.2);
}}
.bm-link-list {{
display: flex; flex-direction: column; gap: 4px;
max-height: 200px; overflow-y: auto;
}}
.bm-link-row {{
display: flex; justify-content: space-between; gap: 8px;
font-size: 12px; color: #3a2f50;
background: #f8f6ff; border-radius: 8px; padding: 5px 10px;
}}
.bm-hint {{ font-size: 12px; color: #888; font-style: italic; }}
.bm-link-header {{
font-size: 13px; color: #2d1f4a; padding: 4px 10px;
background: #eee8ff; border-radius: 8px; margin-bottom: 4px;
}}
.bm-legend {{
display: flex; flex-wrap: wrap; gap: 12px; padding: 8px 12px;
background: #fff; border: 1px solid #e3e3ec; border-radius: 10px;
margin-top: 10px; font-size: 11px; color: #444;
}}
.bm-legend-item {{
display: flex; align-items: center; gap: 5px;
}}
.bm-legend-swatch {{
width: 14px; height: 14px; border-radius: 3px; border: 1px solid #ccc;
}}
.bm-controls {{
display: flex; justify-content: space-between; align-items: center;
}}
.bm-reset {{
border: none; background: #eee; border-radius: 999px;
padding: 4px 12px; font-size: 11px; font-weight: 600;
color: #555; cursor: pointer;
}}
.bm-highlight-canvas {{
position: absolute; top: 0; left: 0; width: 100%; height: 100%;
pointer-events: none; z-index: 5;
}}
.bm-seg-label {{
position: absolute; transform: translate(-50%, -50%);
font-size: 14px; font-weight: 700; color: #fff;
background: rgba(0,0,0,0.55); padding: 2px 7px;
border-radius: 4px; pointer-events: none; z-index: 6;
white-space: nowrap;
}}
.bm-seg-label.bm-seg-active {{
background: rgba(0,229,255,0.85); color: #000;
box-shadow: 0 0 6px rgba(0,229,255,0.6);
}}
.bm-svg {{
position: absolute; top: 0; left: 0; width: 100%; height: 100%;
pointer-events: none; z-index: 50;
}}
@media (max-width: 900px) {{
.bm-layout {{ grid-template-columns: 1fr; }}
}}
</style>
<div class="bm-root" id="{view_id}">
<svg class="bm-svg" xmlns="http://www.w3.org/2000/svg"></svg>
<div class="bm-title">{escape(title)}</div>
<div class="bm-layout">
<div class="bm-img-panel">
<div class="bm-section-title">Image Regions (click a region to see linked words)</div>
<div class="bm-img-wrap">
<img class="bm-main-img" src="{display_img_url}" alt="segmented medical image" />
<canvas class="bm-highlight-canvas"></canvas>
{region_overlays_html}
</div>
</div>
<div class="bm-right-panel">
<div class="bm-tok-section">
<div class="bm-controls">
<div class="bm-section-title">Caption Words (click a word to see linked regions)</div>
<button type="button" class="bm-reset">Reset</button>
</div>
<div class="bm-tok-grid">{tokens_html}</div>
</div>
<div class="bm-tok-section">
<div class="bm-section-title">Cross-Modal Links</div>
<div class="bm-link-list">
<div class="bm-hint">Click a segment or word to see connections.</div>
</div>
</div>
</div>
</div>
<div class="bm-legend">
<strong>Legend ({escape(method_label)}):</strong>
{legend_html}
</div>
<img class="bm-loader" id="{loader_id}" alt=""
src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw=="
onload="(function(){{var s=document.getElementById('{script_id}');
if(!s||!s.textContent){{return;}}try{{(new Function(s.textContent))();}}catch(e){{
console.warn('bm interaction init failed',e);}}}})()"/>
<script type="text/plain" id="{script_id}">{js_code}</script>
</div>
"""