AttrLLM / visualization /plotting /mm_interactions.py
Qingpeng Kong
clean initial state
3e72399
from __future__ import annotations
import json
import uuid
from html import escape
from typing import Any, Dict, List, Sequence, Tuple
def _to_data_url(raw_b64: str) -> str:
if not raw_b64:
return ""
if raw_b64.startswith("data:"):
return raw_b64
return f"data:image/png;base64,{raw_b64}"
def _strip_prefix(label: str) -> str:
if ":" in label:
return label.split(":", 1)[1]
return label
def _value_to_color(value: float, max_abs: float) -> str:
if max_abs <= 0:
return "rgb(225, 225, 223)"
norm = min(1.0, abs(value) / max_abs)
if value >= 0:
base = (225, 225, 223)
target = (1, 109, 1)
else:
base = (225, 225, 223)
target = (221, 19, 19)
r = int(round(base[0] + (target[0] - base[0]) * norm))
g = int(round(base[1] + (target[1] - base[1]) * norm))
b = int(round(base[2] + (target[2] - base[2]) * norm))
return f"rgb({r}, {g}, {b})"
def _format_region_boxes(
regions: Sequence[Dict[str, Any]],
image_size: Tuple[int, int] | None,
) -> List[str]:
if not image_size:
return []
width, height = image_size
boxes = []
for region in regions:
bbox = region.get("bbox")
if not bbox or len(bbox) != 4:
continue
x0, y0, x1, y1 = [float(v) for v in bbox]
if width <= 0 or height <= 0:
continue
left = max(0.0, min(100.0, (x0 / width) * 100))
top = max(0.0, min(100.0, (y0 / height) * 100))
w_pct = max(0.0, min(100.0, ((x1 - x0) / width) * 100))
h_pct = max(0.0, min(100.0, ((y1 - y0) / height) * 100))
idx = region.get("index", 0)
label = escape(region.get("label") or f"Region {int(idx) + 1}")
boxes.append(
"<div class='mm-region' "
f"data-region-index='{idx}' "
f"title='{label}' "
f"style='left:{left:.3f}%;top:{top:.3f}%;width:{w_pct:.3f}%;height:{h_pct:.3f}%;'>"
"</div>"
)
return boxes
def create_multimodal_interaction_html(
image_b64: str,
overlay_b64: str | None,
regions: Sequence[Dict[str, Any]],
features: Sequence[Dict[str, Any]],
interactions: Sequence[Dict[str, Any]],
*,
image_size: Tuple[int, int] | None = None,
top_k: int = 20,
title: str = "Multimodal Interaction View",
) -> str:
if not image_b64:
return "<div class='mm-empty'>No image available.</div>"
view_id = f"mm-interaction-{uuid.uuid4().hex[:8]}"
image_url = _to_data_url(image_b64)
overlay_url = _to_data_url(overlay_b64 or "")
max_abs = max((abs(float(item.get("value", 0.0))) for item in features), default=0.0)
region_labels = {
int(region.get("index", 0)): str(region.get("label") or f"Region {int(region.get('index', 0)) + 1}")
for region in regions
}
tokens_html = []
feature_meta: List[Dict[str, Any]] = []
for item in features:
idx = int(item.get("index", 0))
value = float(item.get("value", 0.0))
modality = item.get("modality") or "text"
ref_index = int(item.get("ref_index", idx))
raw_label = str(item.get("feature", ""))
label = _strip_prefix(raw_label)
if modality == "image":
label = region_labels.get(ref_index, label or f"Region {ref_index + 1}")
display = escape(label)
color = _value_to_color(value, max_abs)
tooltip = escape(f"{label}: {value:+.4f}")
tokens_html.append(
"<span class='mm-token' "
f"data-feature-index='{idx}' data-modality='{modality}' data-ref-index='{ref_index}' "
f"title='{tooltip}' style='background:{color};'>"
f"{display}"
"</span>"
)
feature_meta.append(
{
"index": idx,
"modality": modality,
"ref_index": ref_index,
"label": label,
"value": value,
}
)
edges = []
for item in interactions:
indices = item.get("indices")
if not indices or len(indices) != 2:
continue
try:
i = int(indices[0])
j = int(indices[1])
value = float(item.get("value", 0.0))
except Exception:
continue
if i == j:
continue
edges.append({"i": i, "j": j, "value": value})
edges.sort(key=lambda entry: abs(entry["value"]), reverse=True)
edges = edges[: max(0, int(top_k))]
payload = json.dumps(
{
"edges": edges,
"features": feature_meta,
}
)
region_boxes = _format_region_boxes(regions, image_size)
no_edges_note = "" if edges else "<div class='mm-note'>No interactions to display.</div>"
overlay_html = f"<img class='mm-overlay' src='{overlay_url}' alt='overlay' />" if overlay_url else ""
script_id = f"{view_id}-script"
loader_id = f"{view_id}-loader"
js_code = (
f"const root=document.getElementById('{view_id}');"
"if(!root){return;}"
f"const data={payload};"
"const edges=data.edges||[];"
"const features=data.features||[];"
"const tokens=[...root.querySelectorAll('.mm-token')];"
"const regions=[...root.querySelectorAll('.mm-region')];"
"const linkPanel=root.querySelector('.mm-link-list');"
"const tokenMap=new Map(tokens.map(el=>[el.dataset.featureIndex,el]));"
"const regionMap=new Map(regions.map(el=>[el.dataset.regionIndex,el]));"
"const featureMap=new Map(features.map(f=>[String(f.index),f]));"
"const regionToFeature=new Map("
"features.filter(f=>f.modality==='image').map(f=>[String(f.ref_index),String(f.index)])"
");"
"const adjacency=new Map();"
"edges.forEach(edge=>{"
"const a=String(edge.i);const b=String(edge.j);"
"if(!adjacency.has(a)){adjacency.set(a,[]);}if(!adjacency.has(b)){adjacency.set(b,[]);}"
"adjacency.get(a).push(edge);adjacency.get(b).push(edge);"
"});"
"function clearActive(){"
"tokens.forEach(el=>el.classList.remove('is-active','is-linked'));"
"regions.forEach(el=>el.classList.remove('is-active','is-linked'));"
"if(linkPanel){linkPanel.innerHTML='';}"
"}"
"function markRegion(regionIdx, cls){"
"const el=regionMap.get(String(regionIdx));"
"if(el){el.classList.add(cls);}"
"}"
"function markToken(featureIdx, cls){"
"const el=tokenMap.get(String(featureIdx));"
"if(el){el.classList.add(cls);}"
"}"
"function updateLinks(links, focusIdx){"
"if(!linkPanel){return;}"
"if(!links.length){linkPanel.innerHTML='<div class=\"mm-note\">No linked features.</div>';return;}"
"const rows=links.map(link=>{"
"const other=String(link.i)===focusIdx?String(link.j):String(link.i);"
"const meta=featureMap.get(other)||{};"
"const label=(meta.label||other);"
"const value=Number(link.value||0).toFixed(3);"
"return `<div class='mm-link-row'><span>${label}</span><strong>${value}</strong></div>`;"
"});"
"linkPanel.innerHTML=rows.join('');"
"}"
"function highlightFeature(featureIdx){"
"const focusIdx=String(featureIdx);"
"clearActive();"
"markToken(focusIdx,'is-active');"
"const meta=featureMap.get(focusIdx);"
"if(meta&&meta.modality==='image'){markRegion(meta.ref_index,'is-active');}"
"const links=(adjacency.get(focusIdx)||[]).slice();"
"links.sort((a,b)=>Math.abs(b.value)-Math.abs(a.value));"
"const topLinks=links.slice(0,6);"
"topLinks.forEach(link=>{"
"const other=String(link.i)===focusIdx?String(link.j):String(link.i);"
"markToken(other,'is-linked');"
"const otherMeta=featureMap.get(other);"
"if(otherMeta&&otherMeta.modality==='image'){markRegion(otherMeta.ref_index,'is-linked');}"
"});"
"updateLinks(topLinks,focusIdx);"
"}"
"tokens.forEach(el=>{"
"el.addEventListener('click',()=>highlightFeature(el.dataset.featureIndex));"
"});"
"regions.forEach(el=>{"
"el.addEventListener('click',()=>{"
"const featureIdx=regionToFeature.get(el.dataset.regionIndex);"
"if(featureIdx){highlightFeature(featureIdx);}"
"});"
"});"
"const resetBtn=root.querySelector('.mm-reset');"
"if(resetBtn){resetBtn.addEventListener('click',clearActive);}"
)
return (
"<style>"
".mm-root{font-family:'Segoe UI','Helvetica Neue',Arial,sans-serif;"
"background:#f7f5f2;border:1px solid #e3e3ec;border-radius:18px;padding:20px;"
"display:grid;grid-template-columns:minmax(0,1.2fr) minmax(0,1fr);gap:18px;}"
".mm-image-panel{background:#fff;border:1px solid #e3e3ec;border-radius:16px;"
"padding:16px;box-shadow:0 12px 24px rgba(32,25,40,0.08);}"
".mm-image-wrap{position:relative;border-radius:14px;overflow:hidden;}"
".mm-image-wrap img{display:block;width:100%;height:auto;}"
".mm-overlay{position:absolute;left:0;top:0;width:100%;height:100%;pointer-events:none;}"
".mm-region{position:absolute;border:2px solid rgba(61,44,54,0.35);"
"background:rgba(255,255,255,0.0);border-radius:8px;transition:all .2s ease;"
"cursor:pointer;}"
".mm-region.is-active{border-color:#111111;box-shadow:0 0 0 3px rgba(1,109,1,0.35);}"
".mm-region.is-linked{border-color:#111111;box-shadow:0 0 0 3px rgba(221,19,19,0.35);}"
".mm-token-panel{background:#fff;border:1px solid #e3e3ec;border-radius:16px;"
"padding:16px;box-shadow:0 12px 24px rgba(32,25,40,0.08);display:flex;"
"flex-direction:column;gap:14px;}"
".mm-title{font-size:16px;font-weight:700;color:#2d1f4a;}"
".mm-token-grid{display:flex;flex-wrap:wrap;gap:8px;}"
".mm-token{display:inline-flex;align-items:center;justify-content:center;"
"padding:6px 10px;border-radius:12px;font-size:13px;font-weight:600;"
"color:#2a2140;border:1px solid rgba(60,44,80,0.12);cursor:pointer;"
"transition:all .2s ease;}"
".mm-token.is-active{outline:2px solid #111111;box-shadow:0 10px 18px rgba(1,109,1,0.2);}"
".mm-token.is-linked{outline:2px solid #111111;box-shadow:0 10px 18px rgba(221,19,19,0.2);}"
".mm-controls{display:flex;justify-content:space-between;align-items:center;gap:8px;}"
".mm-reset{border:none;background:#f1efe9;border-radius:999px;padding:6px 12px;"
"font-size:12px;font-weight:600;color:#4a3b4e;cursor:pointer;}"
".mm-link-list{display:flex;flex-direction:column;gap:6px;}"
".mm-link-row{display:flex;justify-content:space-between;gap:8px;font-size:12px;"
"color:#3a2f50;background:#f8f6ff;border-radius:10px;padding:6px 10px;}"
".mm-link-row strong{color:#2d1f4a;}"
".mm-note{font-size:12px;color:#6f5a72;}"
".mm-empty{font-size:14px;color:#7f6f86;}"
"@media (max-width: 900px){"
".mm-root{grid-template-columns:1fr;}}"
"</style>"
f"<div class='mm-root' id='{view_id}'>"
"<div class='mm-image-panel'>"
f"<div class='mm-title'>{escape(title)}</div>"
"<div class='mm-image-wrap'>"
f"<img src='{image_url}' alt='input image' />"
f"{overlay_html}"
f"{''.join(region_boxes)}"
"</div>"
"</div>"
"<div class='mm-token-panel'>"
"<div class='mm-controls'>"
"<div class='mm-title'>Features</div>"
"<button type='button' class='mm-reset'>Reset</button>"
"</div>"
f"<div class='mm-token-grid'>{''.join(tokens_html)}</div>"
f"{no_edges_note}"
"<div class='mm-link-list'></div>"
"</div>"
f"<img class='mm-loader' id='{loader_id}' alt='' "
"src='data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==' "
f"onload=\"(function(){{var s=document.getElementById('{script_id}');"
"if(!s||!s.textContent){return;}try{(new Function(s.textContent))();}catch(e){"
"console.warn('mm interaction init failed',e);}})()\" />"
f"<script type='text/plain' id='{script_id}'>{js_code}</script>"
"</div>"
)