AttrLLM

Sleeping

AttrLLM / visualization /plotting /benchmark_interaction.py

Stephentao-30

Interaction view: display overlay.png instead of segmap.png to avoid burned-in patch labels

77657f1 about 1 month ago

40.7 kB

	"""
	benchmark_interaction.py — Interactive cross-modal visualization for the benchmark tab.

	Creates a self-contained HTML component that shows:
	- Image with real UnSAM segment outlines (clickable via hidden canvas label map)
	- Caption tokens below the image (clickable, colored by Shapley value)
	- Arrows/lines connecting image regions to their most interacting tokens
	- Click a region → highlights linked tokens; click a token → highlights linked regions
	"""

	from __future__ import annotations

	import json
	import uuid
	from html import escape
	from typing import Any, Dict, List, Optional, Sequence, Tuple


	def _value_to_color(value: float, max_abs: float, single_color: bool = False) -> str:
	if max_abs <= 0:
	return "rgb(225, 225, 223)"
	norm = min(1.0, abs(value) / max_abs)
	if single_color:
	base = (225, 225, 223)
	target = (52, 102, 177) # influence neutral blue
	elif value >= 0:
	base = (225, 225, 223)
	target = (1, 109, 1)
	else:
	base = (225, 225, 223)
	target = (221, 19, 19)
	r = int(round(base[0] + (target[0] - base[0]) * norm))
	g = int(round(base[1] + (target[1] - base[1]) * norm))
	b = int(round(base[2] + (target[2] - base[2]) * norm))
	return f"rgb({r}, {g}, {b})"


	def _value_to_rgba(value: float, max_abs: float, alpha: float = 0.5) -> str:
	if max_abs <= 0:
	return "rgba(200, 200, 200, 0.1)"
	norm = min(1.0, abs(value) / max_abs)
	if value >= 0:
	return f"rgba(1, 109, 1, {alpha * norm:.2f})"
	else:
	return f"rgba(221, 19, 19, {alpha * norm:.2f})"


	def create_benchmark_interaction_html(
	image_b64: str,
	clip_summary: Optional[Dict[str, Any]],
	vllm_logprob: Optional[Dict[str, Any]],
	caption: str = "",
	all_cross_modal_pairs: Optional[List[Dict[str, Any]]] = None,
	segmap_b64: str = "",
	overlay_b64: str = "",
	segment_bboxes: Optional[List[Optional[Dict[str, float]]]] = None,
	label_map_b64: str = "",
	image_width: int = 0,
	image_height: int = 0,
	title: str = "Cross-Modal Interaction View",
	method_label: str = "Shapley",
	) -> str:
	"""
	Build a self-contained HTML + JS component for the benchmark tab.

	Shows:
	- Image panel (left) with real UnSAM segment outlines and clickable regions
	- Token panel (right) with clickable colored tokens
	- SVG arrow layer connecting regions to tokens on click
	- Cross-modal interaction list

	When segment_bboxes and label_map_b64 are provided, uses canvas-based
	pixel-level click detection for accurate segment selection matching
	the real UnSAM segmentation. Falls back to horizontal strips otherwise.
	"""
	if not image_b64 and not clip_summary:
	return "<div style='padding:20px;color:#888;'>No data available. Select an example.</div>"

	view_id = f"bm-{uuid.uuid4().hex[:8]}"
	is_influence = (method_label or "").lower() == "influence"

	# Prepare image URL
	img_url = image_b64
	if img_url and not img_url.startswith("data:"):
	img_url = f"data:image/png;base64,{img_url}"

	# Extract CLIP data
	regions = [] # {index, label, value}
	tokens = [] # {index, label, value}
	cross_interactions = [] # {seg, tok, value}
	n_segs = 0

	if clip_summary:
	raw_items = clip_summary.get("image_region_values", [])
	total_regions = len(raw_items)
	grid_guess = int(round(total_regions ** 0.5))
	looks_like_patch_grid = (grid_guess * grid_guess == total_regions) and all(
	str(it.get("label", "")).startswith("patch_")
	or str(it.get("label", "")).isdigit()
	for it in raw_items
	)
	for item in raw_items:
	# Resolve the segment number:
	# seg_6 → 6 (UnSAM)
	# patch_1_2 → 1*grid+2 (raw patch-grid; assumes grid=sqrt(n))
	# "7" → 7 - 1 = 6 (post-rename patch-grid)
	raw_label = str(item["label"])
	seg_num = n_segs # fallback: sequential
	if raw_label.startswith("seg_"):
	try:
	seg_num = int(raw_label.split("_", 1)[1])
	except (ValueError, IndexError):
	pass
	elif raw_label.startswith("patch_"):
	try:
	_, r_str, c_str = raw_label.split("_", 2)
	seg_num = int(r_str) * grid_guess + int(c_str)
	except (ValueError, IndexError):
	pass
	elif raw_label.isdigit():
	try:
	seg_num = int(raw_label) - 1
	except ValueError:
	pass
	# Display label: in patch-grid mode always show "1".."N" in reading
	# order so the overlay doesn't leak raw "patch_r_c" text.
	display_label = str(seg_num + 1) if looks_like_patch_grid else raw_label
	regions.append({
	"index": seg_num,
	"label": display_label,
	"value": item["value"],
	"type": "segment",
	})
	n_segs += 1

	# Build a lookup from CLIP token labels to values.
	# Also build a ##-stripped version for subword matching.
	clip_tok_values: Dict[str, float] = {}
	for item in clip_summary.get("token_values", []):
	tok_label = item["label"].replace("tok:", "")
	clip_tok_values[tok_label] = item["value"]

	# Collect tokens with ## stripped for substring matching
	clip_tok_set = set(clip_tok_values.keys())

	# Include ALL words from the full caption, not just CLIP's top-k
	if caption:
	words = caption.replace(".", " .").replace(",", " ,").replace("(", " (").replace(")", " )").split()
	for i, word in enumerate(words):
	value = clip_tok_values.get(word, 0.0)
	matched_tok = word if value != 0.0 else None
	if value == 0.0:
	value = clip_tok_values.get(word.lower(), 0.0)
	matched_tok = word.lower() if value != 0.0 else None
	if value == 0.0:
	# Sum all matching subword tokens (strip ## before matching)
	total = 0.0
	for tok in clip_tok_set:
	tok_clean = tok.lstrip("#")
	if len(tok_clean) >= 3 and tok_clean.lower() in word.lower():
	total += clip_tok_values[tok]
	matched_tok = tok
	value = total
	tokens.append({
	"index": i,
	"label": word,
	"value": value,
	"_matched_tok": matched_tok,
	})
	else:
	for i, item in enumerate(clip_summary.get("token_values", [])):
	tok_label = item["label"].replace("tok:", "").lstrip("#")
	tokens.append({
	"index": i,
	"label": tok_label,
	"value": item["value"],
	"_matched_tok": tok_label,
	})

	# Use ALL cross-modal pairs if provided, else fall back to top-5.
	# Map subword token labels to whole caption words.
	from .medical_charts import _tok_to_word
	cross_source = all_cross_modal_pairs or clip_summary.get("cross_modal_interactions", [])

	def _seg_display(seg_raw: str) -> str:
	# Normalize cross-pair segment labels the same way we normalized
	# region labels above — otherwise arrows can't match regions.
	s = str(seg_raw)
	if looks_like_patch_grid and s.startswith("patch_"):
	try:
	_, rr, cc = s.split("_", 2)
	return str(int(rr) * grid_guess + int(cc) + 1)
	except (ValueError, IndexError):
	return s
	return s

	for item in cross_source:
	cross_interactions.append({
	"seg": _seg_display(item["pair"][0]),
	"tok": _tok_to_word(item["pair"][1], caption) if caption else item["pair"][1].replace("tok:", "").lstrip("#"),
	"value": item["value"],
	})

	# Determine if we have real segment bounding boxes
	has_real_bboxes = (
	segment_bboxes is not None
	and len(segment_bboxes) == n_segs
	and any(b is not None for b in segment_bboxes)
	)
	has_label_map = bool(label_map_b64)

	# Build region overlays: real bboxes if available, else horizontal strips
	# When a pixel-accurate label map is available, skip the rectangular div
	# overlays entirely — the segmap image already shows real segment contours.
	# We still render small labels at segment centers for identification.
	region_overlays_html = ""
	max_abs_r = max((abs(r["value"]) for r in regions), default=1.0) or 1.0

	if has_label_map and has_real_bboxes and n_segs > 0:
	# Label-map mode: no rectangular divs, just center labels.
	# When segments look like a patch grid (perfect-square count and numeric
	# or patch-style labels), show reading-order numbers 1..N so the overlay
	# reads left-to-right top-to-bottom without the raw "patch_r_c" noise.
	grid = int(round(n_segs ** 0.5))
	is_patch_grid = (grid * grid == n_segs) and all(
	str(r["label"]).isdigit() or str(r["label"]).startswith("patch_")
	for r in regions
	)
	for r in regions:
	r_idx = r["index"]
	bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None
	if bbox is None:
	continue
	r_label = escape(str(r_idx + 1) if is_patch_grid else r["label"])
	r_value = r["value"]
	region_overlays_html += (
	f"<span class='bm-seg-label' data-idx='{r_idx}' "
	f"style='left:{bbox['cx_pct']:.1f}%;top:{bbox['cy_pct']:.1f}%;'>"
	f"{r_label}</span>"
	)
	elif has_real_bboxes and n_segs > 0:
	# No label map — use rectangular bounding box divs as fallback
	for r in regions:
	r_idx = r["index"]
	bbox = segment_bboxes[r_idx] if r_idx < len(segment_bboxes) else None
	if bbox is None:
	continue
	r_label = escape(r["label"])
	r_value = r["value"]
	if is_influence:
	norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0
	bg = f"rgba(52,102,177,{0.25*norm:.2f})"
	border_color = "rgba(52,102,177,0.8)"
	val_str = f"{r_value:.2f}"
	title_str = f"{r_label}: {r_value:.3f}"
	else:
	bg = _value_to_rgba(r_value, max_abs_r, 0.25)
	border_color = "rgba(1,109,1,0.8)" if r_value >= 0 else "rgba(200,40,40,0.8)"
	val_str = f"{r_value:+.2f}"
	title_str = f"{r_label}: {r_value:+.3f}"
	region_overlays_html += (
	f"<div class='bm-region-box' data-idx='{r_idx}' "
	f"title='{title_str}' "
	f"style='left:{bbox['x0_pct']:.2f}%;top:{bbox['y0_pct']:.2f}%;"
	f"width:{bbox['w_pct']:.2f}%;height:{bbox['h_pct']:.2f}%;"
	f"background:{bg};border:2px solid {border_color};'>"
	f"<span class='bm-box-label'>{r_label} ({val_str})</span>"
	f"</div>"
	)
	elif n_segs > 0:
	# Fallback: horizontal strips
	strip_h = 100.0 / n_segs
	for r in regions:
	r_idx = r["index"]
	r_label = escape(r["label"])
	r_value = r["value"]
	top_pct = r_idx * strip_h
	if is_influence:
	norm = min(1.0, abs(r_value) / max_abs_r) if max_abs_r else 0.0
	bg = f"rgba(52,102,177,{0.3*norm:.2f})"
	border_color = "rgba(52,102,177,0.6)"
	val_str = f"{r_value:.2f}"
	title_str = f"{r_label}: {r_value:.3f}"
	else:
	bg = _value_to_rgba(r_value, max_abs_r, 0.3)
	border_color = "rgba(1,109,1,0.6)" if r_value >= 0 else "rgba(200,40,40,0.6)"
	val_str = f"{r_value:+.2f}"
	title_str = f"{r_label}: {r_value:+.3f}"
	region_overlays_html += (
	f"<div class='bm-region-box' data-idx='{r_idx}' "
	f"title='{title_str}' "
	f"style='top:{top_pct:.1f}%;height:{strip_h:.1f}%;left:0;width:100%;"
	f"background:{bg};border-bottom:2px solid {border_color};'>"
	f"<span class='bm-box-label'>{r_label} ({val_str})</span>"
	f"</div>"
	)

	# Prefer overlay.png over segmap.png — the segmap has patch labels
	# ("patch_0_0" etc.) burned into the image pixels during precompute,
	# which visually dominate the clean 1..16 labels we render in HTML.
	# overlay.png has no baked text, so only our HTML spans show through.
	display_img_url = img_url
	if overlay_b64:
	ov_url = overlay_b64
	if ov_url and not ov_url.startswith("data:"):
	ov_url = f"data:image/png;base64,{ov_url}"
	display_img_url = ov_url
	elif segmap_b64:
	seg_url = segmap_b64
	if seg_url and not seg_url.startswith("data:"):
	seg_url = f"data:image/png;base64,{seg_url}"
	display_img_url = seg_url

	# Label map data URL for canvas click detection
	label_map_url = ""
	if has_label_map:
	label_map_url = f"data:image/png;base64,{label_map_b64}"

	# Build token chips
	max_abs_t = max((abs(t["value"]) for t in tokens), default=1.0) or 1.0
	tokens_html = ""
	for t in tokens:
	color = _value_to_color(t["value"], max_abs_t, single_color=is_influence)
	t_idx = t["index"]
	t_label = escape(t["label"])
	t_value = t["value"]
	t_title = (f"{t_label}: {t_value:.4f}" if is_influence
	else f"{t_label}: {t_value:+.4f}")
	tokens_html += (
	f"<span class='bm-token' data-idx='{t_idx}' "
	f"title='{t_title}' "
	f"style='background:{color};'>"
	f"{t_label}"
	f"</span>"
	)

	# Build cross-modal link data for JS
	seg_idx_map = {r["label"]: r["index"] for r in regions}

	clip_tok_to_word_indices: Dict[str, List[int]] = {}
	for t in tokens:
	matched = t.get("_matched_tok")
	if matched:
	clip_tok_to_word_indices.setdefault(matched, []).append(t["index"])
	clip_tok_to_word_indices.setdefault(matched.lower(), []).append(t["index"])

	links = []
	seen_links: set = set()
	for ci in cross_interactions:
	si = seg_idx_map.get(ci["seg"])
	if si is None:
	continue
	tok_key = ci["tok"]
	word_indices = (clip_tok_to_word_indices.get(tok_key) or
	clip_tok_to_word_indices.get(tok_key.lower()) or [])
	for ti in word_indices:
	link_key = (si, ti)
	if link_key not in seen_links:
	seen_links.add(link_key)
	tok_display = tokens[ti]["label"] if ti < len(tokens) else tok_key
	links.append({"seg": si, "tok": ti, "value": ci["value"],
	"seg_label": ci["seg"], "tok_label": tok_display})

	# Pass bounding box data to JS for arrow origin computation
	bboxes_json = json.dumps([
	segment_bboxes[i] if (has_real_bboxes and i < len(segment_bboxes)) else None
	for i in range(n_segs)
	])
	links_json = json.dumps(links)
	# Build regions lookup keyed by actual segment index (not sequential)
	regions_by_idx = {}
	for r in regions:
	regions_by_idx[r["index"]] = {"label": r["label"], "value": r["value"]}
	regions_json = json.dumps(regions_by_idx)

	# JS for interactivity
	js_code = f"""
	(function() {{
	const VIEW_ID = '{view_id}';
	const root = document.getElementById(VIEW_ID);
	if (!root) return;

	// --- Staleness guard ---
	// Mark this root with the current view ID so stale closures can detect
	// they belong to a replaced component and bail out.
	root.setAttribute('data-bm-active', VIEW_ID);
	function isStale() {{
	// If root was removed from DOM or replaced by a new render, bail out
	if (!document.contains(root)) return true;
	return root.getAttribute('data-bm-active') !== VIEW_ID;
	}}

	const links = {links_json};
	const bboxes = {bboxes_json};
	const regions = {regions_json};
	const nSegs = {n_segs};
	const METHOD_IS_INFLUENCE = {str(is_influence).lower()};
	const INFLUENCE_ARROW_COLOR = 'rgba(52,102,177,0.75)';
	const regionEls = root.querySelectorAll('.bm-region-box');
	const tokenEls = root.querySelectorAll('.bm-token');
	const svg = root.querySelector('.bm-svg');
	const linkList = root.querySelector('.bm-link-list');
	const imgWrap = root.querySelector('.bm-img-wrap');
	const mainImg = root.querySelector('.bm-main-img');

	// --- Highlight canvas: draws real segment contours on click ---
	let hlCanvas = root.querySelector('.bm-highlight-canvas');
	let hlCtx = hlCanvas ? hlCanvas.getContext('2d') : null;
	function syncHighlightCanvas() {{
	if (!hlCanvas \|\| !mainImg) return;
	hlCanvas.width = mainImg.naturalWidth \|\| mainImg.width;
	hlCanvas.height = mainImg.naturalHeight \|\| mainImg.height;
	}}
	if (mainImg) {{
	if (mainImg.complete) syncHighlightCanvas();
	else mainImg.addEventListener('load', syncHighlightCanvas);
	}}

	// --- Canvas-based label map for pixel-perfect click detection ---
	let labelCanvas = null;
	let labelCtx = null;
	let labelMapReady = false;
	const labelMapUrl = '{label_map_url}';
	if (labelMapUrl) {{
	labelCanvas = document.createElement('canvas');
	labelCtx = labelCanvas.getContext('2d', {{willReadFrequently: true}});
	const lmImg = new window.Image();
	lmImg.onload = function() {{
	if (isStale()) return; // Don't populate canvas if replaced
	labelCanvas.width = lmImg.naturalWidth;
	labelCanvas.height = lmImg.naturalHeight;
	labelCtx.drawImage(lmImg, 0, 0);
	labelMapReady = true;
	buildSegMasks();
	}};
	lmImg.src = labelMapUrl;
	}}

	// ── Hover-dim state ─────────────────────────────────────────────
	// Per-segment boolean masks (Uint8Array, 1=inside seg, 0=outside),
	// indexed by the label-map's flat coords so we don't re-read
	// getImageData on every mousemove.
	let segMasks = new Array(nSegs).fill(null);
	let hoveredSeg = -1;
	let clickedSeg = -1;
	let hoverRafPending = false;
	const HOVER_SUPPORTED = window.matchMedia && window.matchMedia('(hover: hover)').matches;

	function buildSegMasks() {{
	if (!labelMapReady \|\| !labelCanvas \|\| !labelCtx) return;
	const w = labelCanvas.width, h = labelCanvas.height;
	const data = labelCtx.getImageData(0, 0, w, h).data;
	for (let i = 0; i < nSegs; i++) segMasks[i] = new Uint8Array(w * h);
	for (let p = 0, i = 0; p < data.length; p += 4, i++) {{
	const seg = data[p] - 1;
	if (seg >= 0 && seg < nSegs) segMasks[seg][i] = 1;
	}}
	}}

	function drawHoverDim(segIdx) {{
	// Dim the entire image except the pixels belonging to `segIdx`.
	if (isStale() \|\| !hlCtx \|\| !hlCanvas \|\| !labelCanvas) return;
	syncHighlightCanvas();
	hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
	if (segIdx < 0 \|\| !segMasks[segIdx]) return;

	const cw = hlCanvas.width, ch = hlCanvas.height;
	const lw = labelCanvas.width, lh = labelCanvas.height;
	const mask = segMasks[segIdx];

	const imgData = hlCtx.createImageData(cw, ch);
	const px = imgData.data;
	const dimAlpha = 140; // ≈55% black
	// First: alpha=dim everywhere (rgb=0 by default)
	for (let i = 3; i < px.length; i += 4) px[i] = dimAlpha;
	// Second: punch through (alpha=0) wherever the hovered seg lives
	const sx = lw / cw, sy = lh / ch;
	for (let y = 0; y < ch; y++) {{
	const ly = Math.floor(y * sy) * lw;
	const rowBase = y * cw * 4 + 3;
	for (let x = 0; x < cw; x++) {{
	if (mask[ly + Math.floor(x * sx)]) px[rowBase + x * 4] = 0;
	}}
	}}
	hlCtx.putImageData(imgData, 0, 0);
	}}

	function getSegAtPixel(clientX, clientY) {{
	if (isStale() \|\| !labelMapReady \|\| !labelCanvas \|\| !labelCtx \|\| !mainImg) return -1;
	const rect = mainImg.getBoundingClientRect();
	const rx = (clientX - rect.left) / rect.width;
	const ry = (clientY - rect.top) / rect.height;
	if (rx < 0 \|\| rx > 1 \|\| ry < 0 \|\| ry > 1) return -1;
	const px = Math.floor(rx * labelCanvas.width);
	const py = Math.floor(ry * labelCanvas.height);
	const data = labelCtx.getImageData(px, py, 1, 1).data;
	const segIdx = data[0] - 1; // red channel: index+1, 0=bg
	return (segIdx >= 0 && segIdx < nSegs) ? segIdx : -1;
	}}

	function clearAll() {{
	regionEls.forEach(el => el.classList.remove('bm-active', 'bm-linked'));
	tokenEls.forEach(el => el.classList.remove('bm-active', 'bm-linked'));
	// Clear segment labels highlight
	root.querySelectorAll('.bm-seg-label').forEach(el => el.classList.remove('bm-seg-active'));
	if (svg) svg.innerHTML = '';
	if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
	if (linkList) linkList.innerHTML = '<div class="bm-hint">Click a segment or word to see connections.</div>';
	clickedSeg = -1;
	}}

	function drawSegmentContour(segIdx) {{
	// Draw the real segment contour on the highlight canvas using the label map
	if (isStale() \|\| !labelMapReady \|\| !hlCtx \|\| !hlCanvas \|\| !labelCtx \|\| !labelCanvas) return;
	syncHighlightCanvas();
	hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
	var w = labelCanvas.width, h = labelCanvas.height;
	var imgData = labelCtx.getImageData(0, 0, w, h);
	var px = imgData.data;
	var target = segIdx + 1; // label map red channel = index + 1
	// Find edge pixels: pixels that belong to this segment where at
	// least one 4-connected neighbor does NOT belong to this segment
	hlCtx.fillStyle = 'rgba(0, 229, 255, 0.9)'; // cyan highlight
	var scaleX = hlCanvas.width / w;
	var scaleY = hlCanvas.height / h;
	for (var y = 0; y < h; y++) {{
	for (var x = 0; x < w; x++) {{
	var idx = (y * w + x) * 4;
	if (px[idx] !== target) continue;
	// Check 4-connected neighbors
	var isEdge = false;
	if (x === 0 \|\| px[idx - 4] !== target) isEdge = true;
	else if (x === w - 1 \|\| px[idx + 4] !== target) isEdge = true;
	else if (y === 0 \|\| px[idx - w * 4] !== target) isEdge = true;
	else if (y === h - 1 \|\| px[idx + w * 4] !== target) isEdge = true;
	if (isEdge) {{
	hlCtx.fillRect(Math.round(x * scaleX), Math.round(y * scaleY),
	Math.max(2, Math.round(scaleX) + 1),
	Math.max(2, Math.round(scaleY) + 1));
	}}
	}}
	}}
	}}

	function getSegCenter(segIdx) {{
	// Use real bbox center if available
	const bb = bboxes[segIdx];
	if (bb) return {{ xPct: bb.cx_pct, yPct: bb.cy_pct }};
	// Fallback: center of horizontal strip
	const stripH = 100.0 / nSegs;
	return {{ xPct: 50, yPct: segIdx * stripH + stripH / 2 }};
	}}

	function drawLine(segIdx, tokIdx, value) {{
	if (!svg \|\| !imgWrap) return;
	const rootRect = root.getBoundingClientRect();
	const imgRect = imgWrap.getBoundingClientRect();
	const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]');
	if (!tokEl) return;
	const tokRect = tokEl.getBoundingClientRect();

	// Arrow start: segment center in image coordinates
	const center = getSegCenter(segIdx);
	const x1 = imgRect.left + (center.xPct / 100) * imgRect.width - rootRect.left;
	const y1 = imgRect.top + (center.yPct / 100) * imgRect.height - rootRect.top;
	const x2 = tokRect.left + tokRect.width / 2 - rootRect.left;
	const y2 = tokRect.top - rootRect.top;

	const color = METHOD_IS_INFLUENCE
	? INFLUENCE_ARROW_COLOR
	: (value >= 0 ? 'rgba(1,109,1,0.7)' : 'rgba(221,19,19,0.7)');
	const line = document.createElementNS('http://www.w3.org/2000/svg', 'line');
	line.setAttribute('x1', x1);
	line.setAttribute('y1', y1);
	line.setAttribute('x2', x2);
	line.setAttribute('y2', y2);
	line.setAttribute('stroke', color);
	line.setAttribute('stroke-width', '3');
	line.setAttribute('stroke-linecap', 'round');
	line.setAttribute('opacity', '0.7');
	svg.appendChild(line);
	}}

	function onClickRegion(segIdx) {{
	if (isStale()) return;
	clearAll();
	clickedSeg = segIdx;
	// Highlight the rectangular div (fallback mode) or segment label
	const segEl = root.querySelector('.bm-region-box[data-idx="' + segIdx + '"]');
	if (segEl) segEl.classList.add('bm-active');
	const segLabel = root.querySelector('.bm-seg-label[data-idx="' + segIdx + '"]');
	if (segLabel) segLabel.classList.add('bm-seg-active');
	// Draw pixel-accurate contour on highlight canvas
	drawSegmentContour(segIdx);

	const related = links.filter(l => l.seg === segIdx)
	.sort((a, b) => Math.abs(b.value) - Math.abs(a.value));
	related.forEach(l => {{
	const tokEl = root.querySelector('.bm-token[data-idx="' + l.tok + '"]');
	if (tokEl) tokEl.classList.add('bm-linked');
	drawLine(l.seg, l.tok, l.value);
	}});

	if (linkList) {{
	var regInfo = regions[segIdx] \|\| {{}};
	var regName = regInfo.label \|\| ('seg_' + segIdx);
	var regVal = regInfo.value !== undefined
	? (METHOD_IS_INFLUENCE ? regInfo.value.toFixed(3) : (regInfo.value >= 0 ? '+' : '') + regInfo.value.toFixed(3))
	: '';
	var header = '<div class="bm-link-header"><strong>' + regName + '</strong> <span style="color:#888">' + regVal + '</span></div>';
	if (related.length === 0) {{
	linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this region.</div>';
	}} else {{
	linkList.innerHTML = header + related.map(l => {{
	var color = METHOD_IS_INFLUENCE
	? '#34569F'
	: (l.value >= 0 ? '#016d01' : '#c82828');
	var formatted = METHOD_IS_INFLUENCE
	? l.value.toFixed(3)
	: (l.value >= 0 ? '+' : '') + l.value.toFixed(3);
	return '<div class="bm-link-row">' +
	'<span>' + l.tok_label + '</span>' +
	'<strong style="color:' + color + '">' +
	formatted + '</strong></div>';
	}}).join('');
	}}
	}}
	}}

	function onClickToken(tokIdx) {{
	if (isStale()) return;
	clearAll();
	const tokEl = root.querySelector('.bm-token[data-idx="' + tokIdx + '"]');
	if (tokEl) tokEl.classList.add('bm-active');

	const related = links.filter(l => l.tok === tokIdx)
	.sort((a, b) => Math.abs(b.value) - Math.abs(a.value));
	related.forEach(l => {{
	const segEl = root.querySelector('.bm-region-box[data-idx="' + l.seg + '"]');
	if (segEl) segEl.classList.add('bm-linked');
	const segLabel = root.querySelector('.bm-seg-label[data-idx="' + l.seg + '"]');
	if (segLabel) segLabel.classList.add('bm-seg-active');
	drawLine(l.seg, l.tok, l.value);
	}});
	// Draw contours for all linked segments
	if (related.length > 0) drawSegmentContour(related[0].seg);

	if (linkList) {{
	var tokName = tokIdx < {len(tokens)} ? {json.dumps([t["label"] for t in tokens])}[tokIdx] : 'word ' + tokIdx;
	var header = '<div class="bm-link-header"><strong>"' + tokName + '"</strong></div>';
	if (related.length === 0) {{
	linkList.innerHTML = header + '<div class="bm-hint">No cross-modal links for this word.</div>';
	}} else {{
	linkList.innerHTML = header + related.map(l => {{
	var color = METHOD_IS_INFLUENCE
	? '#34569F'
	: (l.value >= 0 ? '#016d01' : '#c82828');
	var formatted = METHOD_IS_INFLUENCE
	? l.value.toFixed(3)
	: (l.value >= 0 ? '+' : '') + l.value.toFixed(3);
	return '<div class="bm-link-row">' +
	'<span>' + l.seg_label + '</span>' +
	'<strong style="color:' + color + '">' +
	formatted + '</strong></div>';
	}}).join('');
	}}
	}}
	}}

	// Click on the image: use label map canvas for pixel-perfect detection
	if (imgWrap) {{
	imgWrap.addEventListener('click', function(e) {{
	if (isStale()) return;
	const segIdx = getSegAtPixel(e.clientX, e.clientY);
	if (segIdx >= 0) {{
	onClickRegion(segIdx);
	}}
	}});

	// Hover-dim: highlight the segment under the cursor by dimming the rest.
	// Skipped on touch devices where there's no real hover.
	if (HOVER_SUPPORTED) {{
	imgWrap.addEventListener('mousemove', function(e) {{
	if (isStale() \|\| hoverRafPending) return;
	hoverRafPending = true;
	requestAnimationFrame(function() {{
	hoverRafPending = false;
	const seg = getSegAtPixel(e.clientX, e.clientY);
	if (seg === hoveredSeg) return;
	hoveredSeg = seg;
	drawHoverDim(seg);
	}});
	}});
	imgWrap.addEventListener('mouseleave', function() {{
	if (isStale()) return;
	hoveredSeg = -1;
	// Restore the click-highlight if any, else clear the canvas.
	if (clickedSeg >= 0) drawSegmentContour(clickedSeg);
	else if (hlCtx && hlCanvas) hlCtx.clearRect(0, 0, hlCanvas.width, hlCanvas.height);
	}});
	}}
	}}

	// Also allow clicking the bbox overlay divs directly
	regionEls.forEach(el => {{
	el.addEventListener('click', function(e) {{
	if (isStale()) return;
	e.stopPropagation();
	// If label map is available, prefer pixel detection
	if (labelMapReady && labelCanvas && labelCtx) {{
	const segIdx = getSegAtPixel(e.clientX, e.clientY);
	if (segIdx >= 0) {{ onClickRegion(segIdx); return; }}
	}}
	onClickRegion(parseInt(el.dataset.idx));
	}});
	}});
	tokenEls.forEach(el => {{
	el.addEventListener('click', () => onClickToken(parseInt(el.dataset.idx)));
	}});

	const resetBtn = root.querySelector('.bm-reset');
	if (resetBtn) resetBtn.addEventListener('click', clearAll);
	}})();
	"""

	script_id = f"{view_id}-script"
	loader_id = f"{view_id}-loader"

	if is_influence:
	legend_html = """
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(52,102,177,0.6);"></div>
	<span>Influence strength (always positive — higher = more important)</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div>
	<span>Neutral / not scored</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(52,102,177,0.7); width:30px; height:3px; border-radius:2px;"></div>
	<span>Line thickness = cross-modal interaction strength</span>
	</div>
	"""
	else:
	method_display = method_label or "Shapley"
	legend_html = f"""
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(1,109,1,0.6);"></div>
	<span>Positive {escape(method_display)} (contributes to matching)</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(221,19,19,0.6);"></div>
	<span>Negative {escape(method_display)} (detracts from matching)</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgb(225,225,223);"></div>
	<span>Neutral / not scored</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(1,109,1,0.7); width:30px; height:3px; border-radius:2px;"></div>
	<span>Green line = positive cross-modal interaction</span>
	</div>
	<div class="bm-legend-item">
	<div class="bm-legend-swatch" style="background:rgba(221,19,19,0.7); width:30px; height:3px; border-radius:2px;"></div>
	<span>Red line = negative cross-modal interaction</span>
	</div>
	"""

	return f"""
	<style>
	.bm-root {{
	font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif;
	background: #f7f5f2;
	border: 1px solid #e3e3ec;
	border-radius: 16px;
	padding: 16px;
	position: relative;
	}}
	.bm-title {{
	font-size: 15px; font-weight: 700; color: #2d1f4a;
	margin-bottom: 10px;
	}}
	.bm-layout {{
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 14px;
	}}
	.bm-img-panel {{
	background: #fff; border: 1px solid #e3e3ec; border-radius: 14px;
	padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06);
	}}
	.bm-img-wrap {{
	position: relative; border-radius: 12px; overflow: hidden;
	cursor: crosshair;
	}}
	.bm-img-wrap img.bm-main-img {{
	display: block; width: 100%; height: auto;
	}}
	/* Region bounding box overlay (real UnSAM bbox or fallback strip) */
	.bm-region-box {{
	position: absolute;
	cursor: pointer; transition: all 0.15s ease;
	display: flex; align-items: flex-start; justify-content: flex-start;
	box-sizing: border-box;
	border-radius: 4px;
	}}
	.bm-region-box:hover {{
	outline: 2px solid rgba(255,255,255,0.9);
	z-index: 10;
	}}
	.bm-region-box.bm-active {{
	outline: 3px solid #fff;
	z-index: 20;
	box-shadow: 0 0 0 2px #111, inset 0 0 0 2000px rgba(255,255,255,0.12);
	}}
	.bm-region-box.bm-linked {{
	outline: 2px dashed #c82828;
	z-index: 15;
	}}
	.bm-box-label {{
	font-size: 11px; font-weight: 700; color: #fff;
	background: rgba(0,0,0,0.7); padding: 2px 6px;
	border-radius: 0 0 6px 0; pointer-events: none;
	white-space: nowrap;
	}}
	.bm-right-panel {{
	display: flex; flex-direction: column; gap: 10px;
	}}
	.bm-tok-section {{
	background: #fff; border: 1px solid #e3e3ec; border-radius: 14px;
	padding: 12px; box-shadow: 0 8px 16px rgba(32,25,40,0.06);
	}}
	.bm-section-title {{
	font-size: 13px; font-weight: 600; color: #555; margin-bottom: 8px;
	}}
	.bm-tok-grid {{
	display: flex; flex-wrap: wrap; gap: 6px;
	}}
	.bm-token {{
	display: inline-flex; align-items: center; justify-content: center;
	padding: 5px 10px; border-radius: 10px; font-size: 13px; font-weight: 600;
	color: #2a2140; border: 1px solid rgba(60,44,80,0.12);
	cursor: pointer; transition: all 0.2s ease;
	}}
	.bm-token:hover {{ box-shadow: 0 4px 8px rgba(0,0,0,0.15); }}
	.bm-token.bm-active {{
	outline: 3px solid #111;
	box-shadow: 0 8px 16px rgba(1,109,1,0.25);
	}}
	.bm-token.bm-linked {{
	outline: 2px solid #016d01;
	box-shadow: 0 4px 10px rgba(1,109,1,0.2);
	}}
	.bm-link-list {{
	display: flex; flex-direction: column; gap: 4px;
	max-height: 200px; overflow-y: auto;
	}}
	.bm-link-row {{
	display: flex; justify-content: space-between; gap: 8px;
	font-size: 12px; color: #3a2f50;
	background: #f8f6ff; border-radius: 8px; padding: 5px 10px;
	}}
	.bm-hint {{ font-size: 12px; color: #888; font-style: italic; }}
	.bm-link-header {{
	font-size: 13px; color: #2d1f4a; padding: 4px 10px;
	background: #eee8ff; border-radius: 8px; margin-bottom: 4px;
	}}
	.bm-legend {{
	display: flex; flex-wrap: wrap; gap: 12px; padding: 8px 12px;
	background: #fff; border: 1px solid #e3e3ec; border-radius: 10px;
	margin-top: 10px; font-size: 11px; color: #444;
	}}
	.bm-legend-item {{
	display: flex; align-items: center; gap: 5px;
	}}
	.bm-legend-swatch {{
	width: 14px; height: 14px; border-radius: 3px; border: 1px solid #ccc;
	}}
	.bm-controls {{
	display: flex; justify-content: space-between; align-items: center;
	}}
	.bm-reset {{
	border: none; background: #eee; border-radius: 999px;
	padding: 4px 12px; font-size: 11px; font-weight: 600;
	color: #555; cursor: pointer;
	}}
	.bm-highlight-canvas {{
	position: absolute; top: 0; left: 0; width: 100%; height: 100%;
	pointer-events: none; z-index: 5;
	}}
	.bm-seg-label {{
	position: absolute; transform: translate(-50%, -50%);
	font-size: 14px; font-weight: 700; color: #fff;
	background: rgba(0,0,0,0.55); padding: 2px 7px;
	border-radius: 4px; pointer-events: none; z-index: 6;
	white-space: nowrap;
	}}
	.bm-seg-label.bm-seg-active {{
	background: rgba(0,229,255,0.85); color: #000;
	box-shadow: 0 0 6px rgba(0,229,255,0.6);
	}}
	.bm-svg {{
	position: absolute; top: 0; left: 0; width: 100%; height: 100%;
	pointer-events: none; z-index: 50;
	}}
	@media (max-width: 900px) {{
	.bm-layout {{ grid-template-columns: 1fr; }}
	}}
	</style>
	<div class="bm-root" id="{view_id}">
	<svg class="bm-svg" xmlns="http://www.w3.org/2000/svg"></svg>
	<div class="bm-title">{escape(title)}</div>
	<div class="bm-layout">
	<div class="bm-img-panel">
	<div class="bm-section-title">Image Regions (click a region to see linked words)</div>
	<div class="bm-img-wrap">
	<img class="bm-main-img" src="{display_img_url}" alt="segmented medical image" />
	<canvas class="bm-highlight-canvas"></canvas>
	{region_overlays_html}
	</div>
	</div>
	<div class="bm-right-panel">
	<div class="bm-tok-section">
	<div class="bm-controls">
	<div class="bm-section-title">Caption Words (click a word to see linked regions)</div>
	<button type="button" class="bm-reset">Reset</button>
	</div>
	<div class="bm-tok-grid">{tokens_html}</div>
	</div>
	<div class="bm-tok-section">
	<div class="bm-section-title">Cross-Modal Links</div>
	<div class="bm-link-list">
	<div class="bm-hint">Click a segment or word to see connections.</div>
	</div>
	</div>
	</div>
	</div>
	<div class="bm-legend">
	<strong>Legend ({escape(method_label)}):</strong>
	{legend_html}
	</div>
	<img class="bm-loader" id="{loader_id}" alt=""
	src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw=="
	onload="(function(){{var s=document.getElementById('{script_id}');
	if(!s\|\|!s.textContent){{return;}}try{{(new Function(s.textContent))();}}catch(e){{
	console.warn('bm interaction init failed',e);}}}})()"/>
	<script type="text/plain" id="{script_id}">{js_code}</script>
	</div>
	"""