| from __future__ import annotations |
|
|
| import json |
| import uuid |
| from html import escape |
| from typing import Any, Dict, List, Sequence, Tuple |
|
|
|
|
| def _to_data_url(raw_b64: str) -> str: |
| if not raw_b64: |
| return "" |
| if raw_b64.startswith("data:"): |
| return raw_b64 |
| return f"data:image/png;base64,{raw_b64}" |
|
|
|
|
| def _strip_prefix(label: str) -> str: |
| if ":" in label: |
| return label.split(":", 1)[1] |
| return label |
|
|
|
|
| def _value_to_color(value: float, max_abs: float) -> str: |
| if max_abs <= 0: |
| return "rgb(225, 225, 223)" |
| norm = min(1.0, abs(value) / max_abs) |
| if value >= 0: |
| base = (225, 225, 223) |
| target = (1, 109, 1) |
| else: |
| base = (225, 225, 223) |
| target = (221, 19, 19) |
| r = int(round(base[0] + (target[0] - base[0]) * norm)) |
| g = int(round(base[1] + (target[1] - base[1]) * norm)) |
| b = int(round(base[2] + (target[2] - base[2]) * norm)) |
| return f"rgb({r}, {g}, {b})" |
|
|
|
|
| def _format_region_boxes( |
| regions: Sequence[Dict[str, Any]], |
| image_size: Tuple[int, int] | None, |
| ) -> List[str]: |
| if not image_size: |
| return [] |
| width, height = image_size |
| boxes = [] |
| for region in regions: |
| bbox = region.get("bbox") |
| if not bbox or len(bbox) != 4: |
| continue |
| x0, y0, x1, y1 = [float(v) for v in bbox] |
| if width <= 0 or height <= 0: |
| continue |
| left = max(0.0, min(100.0, (x0 / width) * 100)) |
| top = max(0.0, min(100.0, (y0 / height) * 100)) |
| w_pct = max(0.0, min(100.0, ((x1 - x0) / width) * 100)) |
| h_pct = max(0.0, min(100.0, ((y1 - y0) / height) * 100)) |
| idx = region.get("index", 0) |
| label = escape(region.get("label") or f"Region {int(idx) + 1}") |
| boxes.append( |
| "<div class='mm-region' " |
| f"data-region-index='{idx}' " |
| f"title='{label}' " |
| f"style='left:{left:.3f}%;top:{top:.3f}%;width:{w_pct:.3f}%;height:{h_pct:.3f}%;'>" |
| "</div>" |
| ) |
| return boxes |
|
|
|
|
| def create_multimodal_interaction_html( |
| image_b64: str, |
| overlay_b64: str | None, |
| regions: Sequence[Dict[str, Any]], |
| features: Sequence[Dict[str, Any]], |
| interactions: Sequence[Dict[str, Any]], |
| *, |
| image_size: Tuple[int, int] | None = None, |
| top_k: int = 20, |
| title: str = "Multimodal Interaction View", |
| ) -> str: |
| if not image_b64: |
| return "<div class='mm-empty'>No image available.</div>" |
|
|
| view_id = f"mm-interaction-{uuid.uuid4().hex[:8]}" |
| image_url = _to_data_url(image_b64) |
| overlay_url = _to_data_url(overlay_b64 or "") |
|
|
| max_abs = max((abs(float(item.get("value", 0.0))) for item in features), default=0.0) |
| region_labels = { |
| int(region.get("index", 0)): str(region.get("label") or f"Region {int(region.get('index', 0)) + 1}") |
| for region in regions |
| } |
|
|
| tokens_html = [] |
| feature_meta: List[Dict[str, Any]] = [] |
| for item in features: |
| idx = int(item.get("index", 0)) |
| value = float(item.get("value", 0.0)) |
| modality = item.get("modality") or "text" |
| ref_index = int(item.get("ref_index", idx)) |
| raw_label = str(item.get("feature", "")) |
| label = _strip_prefix(raw_label) |
| if modality == "image": |
| label = region_labels.get(ref_index, label or f"Region {ref_index + 1}") |
| display = escape(label) |
| color = _value_to_color(value, max_abs) |
| tooltip = escape(f"{label}: {value:+.4f}") |
| tokens_html.append( |
| "<span class='mm-token' " |
| f"data-feature-index='{idx}' data-modality='{modality}' data-ref-index='{ref_index}' " |
| f"title='{tooltip}' style='background:{color};'>" |
| f"{display}" |
| "</span>" |
| ) |
| feature_meta.append( |
| { |
| "index": idx, |
| "modality": modality, |
| "ref_index": ref_index, |
| "label": label, |
| "value": value, |
| } |
| ) |
|
|
| edges = [] |
| for item in interactions: |
| indices = item.get("indices") |
| if not indices or len(indices) != 2: |
| continue |
| try: |
| i = int(indices[0]) |
| j = int(indices[1]) |
| value = float(item.get("value", 0.0)) |
| except Exception: |
| continue |
| if i == j: |
| continue |
| edges.append({"i": i, "j": j, "value": value}) |
| edges.sort(key=lambda entry: abs(entry["value"]), reverse=True) |
| edges = edges[: max(0, int(top_k))] |
|
|
| payload = json.dumps( |
| { |
| "edges": edges, |
| "features": feature_meta, |
| } |
| ) |
|
|
| region_boxes = _format_region_boxes(regions, image_size) |
| no_edges_note = "" if edges else "<div class='mm-note'>No interactions to display.</div>" |
| overlay_html = f"<img class='mm-overlay' src='{overlay_url}' alt='overlay' />" if overlay_url else "" |
|
|
| script_id = f"{view_id}-script" |
| loader_id = f"{view_id}-loader" |
| js_code = ( |
| f"const root=document.getElementById('{view_id}');" |
| "if(!root){return;}" |
| f"const data={payload};" |
| "const edges=data.edges||[];" |
| "const features=data.features||[];" |
| "const tokens=[...root.querySelectorAll('.mm-token')];" |
| "const regions=[...root.querySelectorAll('.mm-region')];" |
| "const linkPanel=root.querySelector('.mm-link-list');" |
| "const tokenMap=new Map(tokens.map(el=>[el.dataset.featureIndex,el]));" |
| "const regionMap=new Map(regions.map(el=>[el.dataset.regionIndex,el]));" |
| "const featureMap=new Map(features.map(f=>[String(f.index),f]));" |
| "const regionToFeature=new Map(" |
| "features.filter(f=>f.modality==='image').map(f=>[String(f.ref_index),String(f.index)])" |
| ");" |
| "const adjacency=new Map();" |
| "edges.forEach(edge=>{" |
| "const a=String(edge.i);const b=String(edge.j);" |
| "if(!adjacency.has(a)){adjacency.set(a,[]);}if(!adjacency.has(b)){adjacency.set(b,[]);}" |
| "adjacency.get(a).push(edge);adjacency.get(b).push(edge);" |
| "});" |
| "function clearActive(){" |
| "tokens.forEach(el=>el.classList.remove('is-active','is-linked'));" |
| "regions.forEach(el=>el.classList.remove('is-active','is-linked'));" |
| "if(linkPanel){linkPanel.innerHTML='';}" |
| "}" |
| "function markRegion(regionIdx, cls){" |
| "const el=regionMap.get(String(regionIdx));" |
| "if(el){el.classList.add(cls);}" |
| "}" |
| "function markToken(featureIdx, cls){" |
| "const el=tokenMap.get(String(featureIdx));" |
| "if(el){el.classList.add(cls);}" |
| "}" |
| "function updateLinks(links, focusIdx){" |
| "if(!linkPanel){return;}" |
| "if(!links.length){linkPanel.innerHTML='<div class=\"mm-note\">No linked features.</div>';return;}" |
| "const rows=links.map(link=>{" |
| "const other=String(link.i)===focusIdx?String(link.j):String(link.i);" |
| "const meta=featureMap.get(other)||{};" |
| "const label=(meta.label||other);" |
| "const value=Number(link.value||0).toFixed(3);" |
| "return `<div class='mm-link-row'><span>${label}</span><strong>${value}</strong></div>`;" |
| "});" |
| "linkPanel.innerHTML=rows.join('');" |
| "}" |
| "function highlightFeature(featureIdx){" |
| "const focusIdx=String(featureIdx);" |
| "clearActive();" |
| "markToken(focusIdx,'is-active');" |
| "const meta=featureMap.get(focusIdx);" |
| "if(meta&&meta.modality==='image'){markRegion(meta.ref_index,'is-active');}" |
| "const links=(adjacency.get(focusIdx)||[]).slice();" |
| "links.sort((a,b)=>Math.abs(b.value)-Math.abs(a.value));" |
| "const topLinks=links.slice(0,6);" |
| "topLinks.forEach(link=>{" |
| "const other=String(link.i)===focusIdx?String(link.j):String(link.i);" |
| "markToken(other,'is-linked');" |
| "const otherMeta=featureMap.get(other);" |
| "if(otherMeta&&otherMeta.modality==='image'){markRegion(otherMeta.ref_index,'is-linked');}" |
| "});" |
| "updateLinks(topLinks,focusIdx);" |
| "}" |
| "tokens.forEach(el=>{" |
| "el.addEventListener('click',()=>highlightFeature(el.dataset.featureIndex));" |
| "});" |
| "regions.forEach(el=>{" |
| "el.addEventListener('click',()=>{" |
| "const featureIdx=regionToFeature.get(el.dataset.regionIndex);" |
| "if(featureIdx){highlightFeature(featureIdx);}" |
| "});" |
| "});" |
| "const resetBtn=root.querySelector('.mm-reset');" |
| "if(resetBtn){resetBtn.addEventListener('click',clearActive);}" |
| ) |
|
|
| return ( |
| "<style>" |
| ".mm-root{font-family:'Segoe UI','Helvetica Neue',Arial,sans-serif;" |
| "background:#f7f5f2;border:1px solid #e3e3ec;border-radius:18px;padding:20px;" |
| "display:grid;grid-template-columns:minmax(0,1.2fr) minmax(0,1fr);gap:18px;}" |
| ".mm-image-panel{background:#fff;border:1px solid #e3e3ec;border-radius:16px;" |
| "padding:16px;box-shadow:0 12px 24px rgba(32,25,40,0.08);}" |
| ".mm-image-wrap{position:relative;border-radius:14px;overflow:hidden;}" |
| ".mm-image-wrap img{display:block;width:100%;height:auto;}" |
| ".mm-overlay{position:absolute;left:0;top:0;width:100%;height:100%;pointer-events:none;}" |
| ".mm-region{position:absolute;border:2px solid rgba(61,44,54,0.35);" |
| "background:rgba(255,255,255,0.0);border-radius:8px;transition:all .2s ease;" |
| "cursor:pointer;}" |
| ".mm-region.is-active{border-color:#111111;box-shadow:0 0 0 3px rgba(1,109,1,0.35);}" |
| ".mm-region.is-linked{border-color:#111111;box-shadow:0 0 0 3px rgba(221,19,19,0.35);}" |
| ".mm-token-panel{background:#fff;border:1px solid #e3e3ec;border-radius:16px;" |
| "padding:16px;box-shadow:0 12px 24px rgba(32,25,40,0.08);display:flex;" |
| "flex-direction:column;gap:14px;}" |
| ".mm-title{font-size:16px;font-weight:700;color:#2d1f4a;}" |
| ".mm-token-grid{display:flex;flex-wrap:wrap;gap:8px;}" |
| ".mm-token{display:inline-flex;align-items:center;justify-content:center;" |
| "padding:6px 10px;border-radius:12px;font-size:13px;font-weight:600;" |
| "color:#2a2140;border:1px solid rgba(60,44,80,0.12);cursor:pointer;" |
| "transition:all .2s ease;}" |
| ".mm-token.is-active{outline:2px solid #111111;box-shadow:0 10px 18px rgba(1,109,1,0.2);}" |
| ".mm-token.is-linked{outline:2px solid #111111;box-shadow:0 10px 18px rgba(221,19,19,0.2);}" |
| ".mm-controls{display:flex;justify-content:space-between;align-items:center;gap:8px;}" |
| ".mm-reset{border:none;background:#f1efe9;border-radius:999px;padding:6px 12px;" |
| "font-size:12px;font-weight:600;color:#4a3b4e;cursor:pointer;}" |
| ".mm-link-list{display:flex;flex-direction:column;gap:6px;}" |
| ".mm-link-row{display:flex;justify-content:space-between;gap:8px;font-size:12px;" |
| "color:#3a2f50;background:#f8f6ff;border-radius:10px;padding:6px 10px;}" |
| ".mm-link-row strong{color:#2d1f4a;}" |
| ".mm-note{font-size:12px;color:#6f5a72;}" |
| ".mm-empty{font-size:14px;color:#7f6f86;}" |
| "@media (max-width: 900px){" |
| ".mm-root{grid-template-columns:1fr;}}" |
| "</style>" |
| f"<div class='mm-root' id='{view_id}'>" |
| "<div class='mm-image-panel'>" |
| f"<div class='mm-title'>{escape(title)}</div>" |
| "<div class='mm-image-wrap'>" |
| f"<img src='{image_url}' alt='input image' />" |
| f"{overlay_html}" |
| f"{''.join(region_boxes)}" |
| "</div>" |
| "</div>" |
| "<div class='mm-token-panel'>" |
| "<div class='mm-controls'>" |
| "<div class='mm-title'>Features</div>" |
| "<button type='button' class='mm-reset'>Reset</button>" |
| "</div>" |
| f"<div class='mm-token-grid'>{''.join(tokens_html)}</div>" |
| f"{no_edges_note}" |
| "<div class='mm-link-list'></div>" |
| "</div>" |
| f"<img class='mm-loader' id='{loader_id}' alt='' " |
| "src='data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///ywAAAAAAQABAAACAUwAOw==' " |
| f"onload=\"(function(){{var s=document.getElementById('{script_id}');" |
| "if(!s||!s.textContent){return;}try{(new Function(s.textContent))();}catch(e){" |
| "console.warn('mm interaction init failed',e);}})()\" />" |
| f"<script type='text/plain' id='{script_id}'>{js_code}</script>" |
| "</div>" |
| ) |
|
|