LocateAnything

Paused

fix-bot commited on 5 days ago

Commit

4c3d7bf

1 Parent(s): 605f703

ui: restore staggered detection pop-in and add usage guide

Bring back one-by-one overlay animation and decoding trace from the earlier demo, and add Quick Start guidance for categories and parameters.

Files changed (2) hide show

app.py +118 -6
index.html +117 -22

app.py CHANGED Viewed

@@ -306,6 +306,107 @@ def _parse_out_info_dict(out_info: str) -> dict:
     return stats
 def generate_raw_prompt(task_type, category):
     if not category:
         category = "objects"
@@ -407,7 +508,8 @@ def run_image_gpu_api(
             "coords": [round(c, 2) for c in det.get("coords", [])]
         })
-    return out_img_path, stats, output_text, detections_summary
 @spaces.GPU(duration=240, size="xlarge")
@@ -558,7 +660,15 @@ def run_video_gpu_api(
         "early_stop_reason": early_stop_reason
     }
-    return out_video_path, stats, "\n---\n".join(inference_results_for_draw), detections_summary
 # ============================================================
@@ -623,7 +733,7 @@ def run_inference_api(
             if not img_path or not os.path.exists(img_path):
                 return None, None, {"success": False, "error": f"Invalid image file path: {img_path}"}
-            out_img_path, stats, raw_text, detections = run_image_gpu_api(
                 img_path, category, model_mode, temp, top_p, top_k, short_size, final_prompt
             )
@@ -633,7 +743,8 @@ def run_inference_api(
                 "stats": stats,
                 "raw_text": raw_text,
                 "detections": detections,
-                "final_prompt": final_prompt
             }
             return FileData(path=out_img_path), None, meta
@@ -652,7 +763,7 @@ def run_inference_api(
             if not vid_path or not os.path.exists(vid_path):
                 return None, None, {"success": False, "error": f"Invalid video file path: {vid_path}"}
-            out_vid_path, stats, raw_text, detections = run_video_gpu_api(
                 vid_path, category, model_mode, temp, top_p, top_k, short_size, final_prompt, max_video_frames
             )
@@ -662,7 +773,8 @@ def run_inference_api(
                 "stats": stats,
                 "raw_text": raw_text,
                 "detections": detections,
-                "final_prompt": final_prompt
             }
             return None, FileData(path=out_vid_path), meta

     return stats
+def generate_dynamic_html(token_sequence, out_info, raw_text):
+    uid = f"a{int(time.time() * 1000)}"
+    css = f"""
+    <style>
+        .dc-root-{uid} {{
+            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+            border: 1px solid rgba(118, 185, 0, 0.25); border-radius: 12px;
+            background: rgba(0, 0, 0, 0.55); overflow: hidden;
+        }}
+        .dc-header-{uid} {{
+            display: flex; align-items: center; justify-content: space-between; flex-wrap: wrap; gap: 8px;
+            padding: 10px 14px;
+            background: linear-gradient(135deg, rgba(118, 185, 0, 0.25) 0%, rgba(63, 98, 0, 0.35) 100%);
+            border-bottom: 1px solid rgba(118, 185, 0, 0.2);
+        }}
+        .dc-header-title-{uid} {{ font-weight: 700; font-size: 0.82em; color: #d9f99d; letter-spacing: 0.04em; text-transform: uppercase; }}
+        .dc-legend-{uid} {{ display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }}
+        .dc-legend-item-{uid} {{ display: flex; align-items: center; gap: 5px; font-size: 0.72em; color: rgba(226, 232, 240, 0.85); }}
+        .dc-legend-dot-{uid} {{ width: 8px; height: 8px; border-radius: 2px; display: inline-block; }}
+        .dc-row-{uid} {{ display: flex; gap: 10px; padding: 12px 14px; border-bottom: 1px solid rgba(255,255,255,0.05); }}
+        .dc-row-{uid}:last-child {{ border-bottom: none; }}
+        .dc-val-{uid} {{ flex: 1; line-height: 2.2; word-wrap: break-word; color: #cbd5e1; font-size: 0.85em; }}
+        @keyframes tk-{uid} {{
+            0%   {{ opacity: 0; transform: translateY(8px) scale(0.92); }}
+            60%  {{ opacity: 1; transform: translateY(-2px) scale(1.02); }}
+            100% {{ opacity: 1; transform: translateY(0) scale(1); }}
+        }}
+        .tk-mtp-{uid}, .tk-ar-{uid} {{
+            opacity: 0; animation: tk-{uid} 0.35s ease-out forwards;
+            border-radius: 5px; padding: 2px 7px; margin: 2px 1px; display: inline-block;
+            font-size: 0.78em; font-weight: 600;
+            font-family: 'Fira Code', Consolas, monospace; white-space: nowrap;
+        }}
+        .tk-mtp-{uid} {{ background: rgba(118, 185, 0, 0.15); border: 1px solid rgba(118, 185, 0, 0.55); color: #bbf7d0; }}
+        .tk-ar-{uid} {{ background: rgba(230, 81, 0, 0.15); border: 1px solid rgba(230, 81, 0, 0.55); color: #fed7aa; }}
+        .tk-stat-{uid} {{
+            opacity: 0; animation: tk-{uid} 0.4s ease-out forwards;
+            background: rgba(118, 185, 0, 0.12); border: 1px solid rgba(118, 185, 0, 0.35); border-radius: 6px;
+            padding: 4px 12px; display: inline-block; font-size: 0.78em; color: #d9f99d; font-weight: 600;
+        }}
+        .dc-raw-{uid} {{ padding: 0 14px 12px; }}
+        .dc-raw-{uid} summary {{ cursor: pointer; color: #94a3b8; font-size: 0.78em; user-select: none; }}
+        .dc-raw-{uid} summary:hover {{ color: #76b900; }}
+        .dc-raw-pre-{uid} {{
+            background: rgba(0,0,0,0.45); border: 1px solid rgba(255,255,255,0.08); border-radius: 6px;
+            padding: 10px; margin-top: 8px;
+            font-family: 'Fira Code', Consolas, monospace;
+            font-size: 0.74em; color: #cbd5e1; white-space: pre-wrap; word-break: break-all;
+            max-height: 180px; overflow-y: auto;
+        }}
+    </style>
+    """
+    h = css + f'<div class="dc-root-{uid}">'
+    h += (f'<div class="dc-header-{uid}">'
+          f'<span class="dc-header-title-{uid}">Decoding Trace</span>'
+          f'<div class="dc-legend-{uid}">'
+          f'<div class="dc-legend-item-{uid}"><span class="dc-legend-dot-{uid}" style="background:#76b900;"></span>MTP Parallel</div>'
+          f'<div class="dc-legend-item-{uid}"><span class="dc-legend-dot-{uid}" style="background:#e65100;"></span>AR Fallback</div>'
+          f'</div></div>')
+    h += f'<div class="dc-row-{uid}"><div class="dc-val-{uid}">'
+    tok_idx = 0
+    if token_sequence:
+        for item in token_sequence:
+            if not isinstance(item, (list, tuple)) or len(item) < 2:
+                continue
+            decode_type = str(item[0]).lower()
+            text = str(item[1])
+            safe = text.replace("<", "&lt;").replace(">", "&gt;")
+            delay = f"{tok_idx * 0.06:.2f}s"
+            cls = f"tk-ar-{uid}" if decode_type == "ar" else f"tk-mtp-{uid}"
+            h += f'<span class="{cls}" style="animation-delay:{delay}">{safe}</span> '
+            tok_idx += 1
+    h += '</div></div>'
+    if out_info:
+        stats = _parse_out_info_dict(out_info)
+        bits = []
+        if "forward_step" in stats:
+            bits.append(f"{stats['forward_step']} steps")
+        if "num_tokens" in stats:
+            bits.append(f"{stats['num_tokens']} tokens")
+        if "num_boxes" in stats:
+            bits.append(f"{stats['num_boxes']} boxes")
+        if "switch_to_ar" in stats:
+            n = stats["switch_to_ar"]
+            bits.append(f"{n} AR fallback{'s' if n != '1' else ''}")
+        if "tps" in stats:
+            bits.append(f"{stats['tps']} tok/s")
+        if "bps" in stats:
+            bits.append(f"{stats['bps']} box/s")
+        summary = " · ".join(bits) if bits else out_info.strip()
+        stat_delay = f"{tok_idx * 0.06 + 0.3:.2f}s"
+        h += (f'<div class="dc-row-{uid}" style="justify-content:flex-end;padding-top:4px;padding-bottom:8px;border-bottom:none;">'
+              f'<span class="tk-stat-{uid}" style="animation-delay:{stat_delay}">{summary}</span></div>')
+    if raw_text:
+        safe_raw = raw_text.replace("<", "&lt;").replace(">", "&gt;")
+        h += (f'<div class="dc-raw-{uid}"><details><summary>Show Raw Response</summary>'
+              f'<div class="dc-raw-pre-{uid}">{safe_raw}</div></details></div>')
+    h += '</div>'
+    return h
 def generate_raw_prompt(task_type, category):
     if not category:
         category = "objects"
             "coords": [round(c, 2) for c in det.get("coords", [])]
         })
+    html = generate_dynamic_html(token_sequence, out_info, output_text)
+    return out_img_path, stats, output_text, detections_summary, html
 @spaces.GPU(duration=240, size="xlarge")
         "early_stop_reason": early_stop_reason
     }
+    raw_combined = "\n---\n".join(inference_results_for_draw)
+    timing_summary = (
+        f"Processed {processed_count}/{n_sampled} sampled frames "
+        f"({total} total) in {total_time:.1f}s"
+    )
+    if early_stopped:
+        timing_summary += f" — {early_stop_reason}"
+    html = generate_dynamic_html([], "", timing_summary + "\n\n" + raw_combined)
+    return out_video_path, stats, raw_combined, detections_summary, html
 # ============================================================
             if not img_path or not os.path.exists(img_path):
                 return None, None, {"success": False, "error": f"Invalid image file path: {img_path}"}
+            out_img_path, stats, raw_text, detections, html = run_image_gpu_api(
                 img_path, category, model_mode, temp, top_p, top_k, short_size, final_prompt
             )
                 "stats": stats,
                 "raw_text": raw_text,
                 "detections": detections,
+                "final_prompt": final_prompt,
+                "html": html,
             }
             return FileData(path=out_img_path), None, meta
             if not vid_path or not os.path.exists(vid_path):
                 return None, None, {"success": False, "error": f"Invalid video file path: {vid_path}"}
+            out_vid_path, stats, raw_text, detections, html = run_video_gpu_api(
                 vid_path, category, model_mode, temp, top_p, top_k, short_size, final_prompt, max_video_frames
             )
                 "stats": stats,
                 "raw_text": raw_text,
                 "detections": detections,
+                "final_prompt": final_prompt,
+                "html": html,
             }
             return None, FileData(path=out_vid_path), meta

index.html CHANGED Viewed

@@ -184,6 +184,20 @@
             border-color: #76b900 !important;
             background: rgba(118, 185, 0, 0.04) !important;
         }
     </style>
 </head>
 <body class="text-slate-100 font-sans min-h-screen pb-16 carbon-grid">
@@ -270,6 +284,9 @@
                     <p class="text-xs text-slate-400 max-w-sm font-medium leading-relaxed">
                         NVIDIA's advanced 3B vision-language model. Locate any object, UI target, or text in images and videos with natural language.
                     </p>
                 </div>
                 <!-- Setup Glass Card Controls -->
@@ -367,6 +384,23 @@
                         </div>
                     </details>
                 </div>
                 <!-- CTA Action Button (Floats at bottom-left corner of visual container) -->
@@ -380,7 +414,7 @@
             </div>
             <!-- 3. Floating Categories Search Bar Overlay (Right/Center side, extremely clean glass box) -->
-            <div class="absolute top-1/2 right-6 lg:right-16 -translate-y-1/2 z-30 flex justify-center pointer-events-none w-full max-w-xs">
                 <div class="sam-input-bar rounded-2xl px-3.5 py-2.5 flex items-center gap-2 w-full pointer-events-auto">
                     <svg class="h-4 w-4 text-nvidia-brand shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2.5">
                         <path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
@@ -392,6 +426,9 @@
                         </svg>
                     </button>
                 </div>
             </div>
             <!-- Floating Workspace Status -->
@@ -459,20 +496,23 @@
                         </div>
                         <!-- Tag drawer box list (Grid: 7) -->
-                        <div class="sm:col-span-7 bg-black/60 rounded-xl p-4 border border-white/5 flex flex-col">
                             <div class="text-nvidia-brand font-mono font-bold border-b border-white/5 pb-1 mb-2 uppercase tracking-widest text-[9px] flex justify-between shrink-0">
                                 <span>🎯 Detected Target Overlays</span>
                                 <span id="detection-count-badge" class="text-[8px] bg-nvidia-brand/10 text-nvidia-brand border border-nvidia-brand/20 px-1.5 py-0.5 rounded-full font-bold">0</span>
                             </div>
-                            <div id="detection-tags-wrapper" class="flex-1 flex flex-wrap gap-1.5 max-h-[100px] overflow-y-auto pt-1 align-content-start text-[10px] text-slate-500">
-                                Run inference to populate target tags here.
                             </div>
                         </div>
                     </div>
                     <!-- Optional dynamic trace wrapper -->
-                    <div id="rich-trace-log" class="hidden border-t border-white/5 pt-3"></div>
                 </div>
             </div>
@@ -599,6 +639,72 @@
             }
         }
         // Switch workspace input styles without clearing
         function setMediaType(type) {
             selectedMediaType = type;
@@ -637,6 +743,7 @@
             outputVideo.src = "";
             outputVideo.classList.add("hidden");
             workspaceStatus.textContent = "Workspace Cleared";
         }
         // Drag and drop utilities
@@ -780,8 +887,9 @@
             richTraceLog.classList.add("hidden");
             metaStatus.textContent = "Processing...";
             metaStatus.className = "text-yellow-500 font-semibold";
-            detectionTagsWrapper.innerHTML = "Processing objects in backend...";
             detectionCountBadge.textContent = "0";
             try {
                 const clientInstance = await getClient();
@@ -855,23 +963,10 @@
                 metaBps.textContent = stats.bps || "-";
                 metaTime.textContent = stats.total_time_seconds ? `${stats.total_time_seconds}s` : "Optimal";
-                // Render detection tags
-                const detections = meta.detections || [];
-                detectionCountBadge.textContent = detections.length;
-                if (detections.length === 0) {
-                    detectionTagsWrapper.innerHTML = "No objects matched categories.";
-                } else {
-                    detectionTagsWrapper.innerHTML = "";
-                    detections.forEach(det => {
-                        const tag = document.createElement("span");
-                        tag.className = "px-2 py-0.5 rounded bg-nvidia-brand/10 text-nvidia-brand border border-nvidia-brand/20 font-bold uppercase tracking-wider text-[8px] animate-fade-in";
-                        tag.textContent = det.frame ? `[Frame ${det.frame}] ${det.label}` : det.label;
-                        detectionTagsWrapper.appendChild(tag);
-                    });
-                }
-                // Render logs trace
                 if (meta.html) {
                     richTraceLog.innerHTML = meta.html;
                     richTraceLog.classList.remove("hidden");

             border-color: #76b900 !important;
             background: rgba(118, 185, 0, 0.04) !important;
         }
+        /* Detection overlay tag pop-in (restored from previous demo) */
+        @keyframes det-pop {
+            0%   { opacity: 0; transform: translateY(10px) scale(0.88); }
+            60%  { opacity: 1; transform: translateY(-2px) scale(1.03); }
+            100% { opacity: 1; transform: translateY(0) scale(1); }
+        }
+        .det-tag-pop {
+            opacity: 0;
+            animation: det-pop 0.38s cubic-bezier(0.16, 1, 0.3, 1) forwards;
+        }
+        .det-count-pop {
+            animation: det-pop 0.35s cubic-bezier(0.16, 1, 0.3, 1) forwards;
+        }
     </style>
 </head>
 <body class="text-slate-100 font-sans min-h-screen pb-16 carbon-grid">
                     <p class="text-xs text-slate-400 max-w-sm font-medium leading-relaxed">
                         NVIDIA's advanced 3B vision-language model. Locate any object, UI target, or text in images and videos with natural language.
                     </p>
+                    <p class="text-[9px] text-slate-500 max-w-sm leading-relaxed border-l-2 border-nvidia-brand/30 pl-2.5">
+                        Note: inputs larger than 1K are auto-resized in this Space demo. For full-resolution inference, download the weights and run locally.
+                    </p>
                 </div>
                 <!-- Setup Glass Card Controls -->
                         </div>
                     </details>
+                    <!-- Quick Start Guide -->
+                    <details class="group border-t border-white/5 pt-3" open>
+                        <summary class="list-none flex justify-between items-center cursor-pointer select-none text-[8px] font-bold text-nvidia-brand tracking-wider uppercase hover:text-nvidia-hover transition-colors">
+                            <span>📖 How to Use</span>
+                            <svg class="h-3 w-3 transform group-open:rotate-180 transition-transform text-slate-500" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7" />
+                            </svg>
+                        </summary>
+                        <ol class="space-y-1.5 pt-2.5 text-[9px] text-slate-400 leading-relaxed list-decimal list-inside marker:text-nvidia-brand/70">
+                            <li>Upload an <strong class="text-slate-300">Image</strong> or <strong class="text-slate-300">Video</strong>, or pick a Quick Sandbox example below.</li>
+                            <li>Choose a <strong class="text-slate-300">Task Type</strong>: Detection · Grounding · OCR · GUI · Pointing.</li>
+                            <li>Enter <strong class="text-slate-300">Categories</strong> in the search bar (comma-separated, e.g. <code class="text-nvidia-brand/80">car, person</code>).</li>
+                            <li>Optionally tune <strong class="text-slate-300">Advanced parameters</strong> above (mode, resize, temperature, etc.).</li>
+                            <li>Click <strong class="text-nvidia-brand">Run Inference</strong> or press <kbd class="px-1 py-0.5 rounded bg-white/5 border border-white/10 text-[8px]">Enter</kbd> in the search bar.</li>
+                        </ol>
+                    </details>
                 </div>
                 <!-- CTA Action Button (Floats at bottom-left corner of visual container) -->
             </div>
             <!-- 3. Floating Categories Search Bar Overlay (Right/Center side, extremely clean glass box) -->
+            <div class="absolute top-1/2 right-6 lg:right-16 -translate-y-1/2 z-30 flex flex-col items-end gap-2 pointer-events-none w-full max-w-xs">
                 <div class="sam-input-bar rounded-2xl px-3.5 py-2.5 flex items-center gap-2 w-full pointer-events-auto">
                     <svg class="h-4 w-4 text-nvidia-brand shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2.5">
                         <path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
                         </svg>
                     </button>
                 </div>
+                <p class="text-[9px] text-slate-500 text-right leading-relaxed pointer-events-none px-1 max-w-full">
+                    Comma-separated targets · supports English &amp; Chinese · press <span class="text-slate-400">Enter</span> to run
+                </p>
             </div>
             <!-- Floating Workspace Status -->
                         </div>
                         <!-- Tag drawer box list (Grid: 7) -->
+                        <div class="sm:col-span-7 bg-black/60 rounded-xl p-4 border border-white/5 flex flex-col min-h-[140px]">
                             <div class="text-nvidia-brand font-mono font-bold border-b border-white/5 pb-1 mb-2 uppercase tracking-widest text-[9px] flex justify-between shrink-0">
                                 <span>🎯 Detected Target Overlays</span>
                                 <span id="detection-count-badge" class="text-[8px] bg-nvidia-brand/10 text-nvidia-brand border border-nvidia-brand/20 px-1.5 py-0.5 rounded-full font-bold">0</span>
                             </div>
+                            <div id="detection-tags-wrapper" class="flex-1 flex flex-col gap-1.5 max-h-[120px] overflow-y-auto pt-1 text-[10px] text-slate-500">
+                                <div id="detection-empty-hint" class="space-y-1.5 leading-relaxed">
+                                    <p>Run inference to populate detected targets here — each result will pop in one by one.</p>
+                                    <p class="text-[9px] text-slate-600">Adjustable: Task Type · Categories · Inference Mode · Resize Cap · Temperature · Top P/K · Max Video Frames</p>
+                                </div>
                             </div>
                         </div>
                     </div>
                     <!-- Optional dynamic trace wrapper -->
+                    <div id="rich-trace-log" class="hidden border-t border-white/5 pt-3 text-[10px]"></div>
                 </div>
             </div>
             }
         }
+        function formatDetectionCoords(det) {
+            const coords = det.coords || [];
+            if (!coords.length) return "";
+            const rounded = coords.map(c => Number.isFinite(c) ? Math.round(c) : c);
+            return rounded.join(", ");
+        }
+        function renderDetectionTags(detections) {
+            detectionTagsWrapper.innerHTML = "";
+            detectionCountBadge.textContent = "0";
+            detectionCountBadge.classList.remove("det-count-pop");
+            if (!detections.length) {
+                detectionTagsWrapper.innerHTML = '<p class="text-slate-500">No objects matched the given categories.</p>';
+                return;
+            }
+            // Animate count badge after tags finish popping in
+            const countDelay = detections.length * 80 + 120;
+            setTimeout(() => {
+                detectionCountBadge.textContent = detections.length;
+                detectionCountBadge.classList.add("det-count-pop");
+            }, countDelay);
+            detections.forEach((det, idx) => {
+                setTimeout(() => {
+                    const card = document.createElement("div");
+                    card.className = "det-tag-pop flex items-center justify-between gap-2 px-2 py-1.5 rounded-lg bg-nvidia-brand/8 border border-nvidia-brand/20 hover:border-nvidia-brand/40 transition-colors";
+                    card.style.animationDelay = "0s";
+                    const labelWrap = document.createElement("div");
+                    labelWrap.className = "flex items-center gap-1.5 min-w-0";
+                    const typeBadge = document.createElement("span");
+                    typeBadge.className = "shrink-0 px-1 py-0.5 rounded text-[7px] font-bold uppercase tracking-wider bg-black/40 text-nvidia-brand border border-nvidia-brand/25";
+                    typeBadge.textContent = det.type || "box";
+                    const label = document.createElement("span");
+                    label.className = "font-bold uppercase tracking-wider text-[9px] text-nvidia-brand truncate";
+                    label.textContent = det.frame ? `[F${det.frame}] ${det.label}` : (det.label || "object");
+                    labelWrap.appendChild(typeBadge);
+                    labelWrap.appendChild(label);
+                    const coords = document.createElement("span");
+                    coords.className = "shrink-0 font-mono text-[8px] text-slate-500";
+                    const coordStr = formatDetectionCoords(det);
+                    coords.textContent = coordStr ? `[${coordStr}]` : "";
+                    card.appendChild(labelWrap);
+                    card.appendChild(coords);
+                    detectionTagsWrapper.appendChild(card);
+                }, idx * 80);
+            });
+        }
+        function resetDetectionTagsPlaceholder() {
+            detectionTagsWrapper.innerHTML = `
+                <div id="detection-empty-hint" class="space-y-1.5 leading-relaxed">
+                    <p>Run inference to populate detected targets here — each result will pop in one by one.</p>
+                    <p class="text-[9px] text-slate-600">Adjustable: Task Type · Categories · Inference Mode · Resize Cap · Temperature · Top P/K · Max Video Frames</p>
+                </div>`;
+            detectionCountBadge.textContent = "0";
+            detectionCountBadge.classList.remove("det-count-pop");
+        }
         // Switch workspace input styles without clearing
         function setMediaType(type) {
             selectedMediaType = type;
             outputVideo.src = "";
             outputVideo.classList.add("hidden");
             workspaceStatus.textContent = "Workspace Cleared";
+            resetDetectionTagsPlaceholder();
         }
         // Drag and drop utilities
             richTraceLog.classList.add("hidden");
             metaStatus.textContent = "Processing...";
             metaStatus.className = "text-yellow-500 font-semibold";
+            detectionTagsWrapper.innerHTML = '<p class="text-slate-400 animate-pulse">Processing objects in backend...</p>';
             detectionCountBadge.textContent = "0";
+            detectionCountBadge.classList.remove("det-count-pop");
             try {
                 const clientInstance = await getClient();
                 metaBps.textContent = stats.bps || "-";
                 metaTime.textContent = stats.total_time_seconds ? `${stats.total_time_seconds}s` : "Optimal";
+                // Render detection tags with staggered pop-in animation
+                renderDetectionTags(meta.detections || []);
+                // Render decoding trace (token-by-token pop animation from previous version)
                 if (meta.html) {
                     richTraceLog.innerHTML = meta.html;
                     richTraceLog.classList.remove("hidden");