File size: 25,957 Bytes
21494a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
077fa26
 
 
 
 
21494a2
 
 
 
 
 
 
077fa26
 
 
 
 
 
21494a2
 
 
077fa26
 
 
 
 
 
21494a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
077fa26
21494a2
 
 
 
 
 
 
 
 
 
 
 
077fa26
 
 
 
 
 
 
 
 
 
21494a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
077fa26
 
 
 
 
 
 
 
21494a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
import gradio as gr
import os
import torch
import cv2
import numpy as np
import random
from PIL import Image
from utils import points_to_tensor
from utils import visualize_tracking
import mediapy as media

# ── Colormap (matches your viz_utils.get_colors logic) ───────────────────────
def get_colors(n):
    """Generate n random but unique colors in RGB 0-255."""
    random.seed(42)  # remove this line if you want different colors each run
    
    # Spread hues evenly across 0-179 (HSV in OpenCV), then shuffle
    hues = list(range(0, 180, max(1, 180 // n)))[:n]
    random.shuffle(hues)
    
    colors = []
    for hue in hues:
        # Randomize saturation and value slightly for more visual variety
        sat = random.randint(180, 255)
        val = random.randint(180, 255)
        hsv = np.uint8([[[hue, sat, val]]])
        rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0]
        colors.append(tuple(int(c) for c in rgb))
    
    return colors

N_POINTS  = 100
COLORMAP  = get_colors(N_POINTS)
select_points = []   # will hold np.array([x, y]) entries

# ── Video helpers ─────────────────────────────────────────────────────────────
def get_frame(video_path: str, frame_idx: int) -> np.ndarray:
    """Extract a single frame from video by index."""
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
    ret, frame = cap.read()
    cap.release()
    if not ret:
        raise ValueError(f"Could not read frame {frame_idx}")
    return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

def get_total_frames(video_path: str) -> int:
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total

# ── Draw points on frame ──────────────────────────────────────────────────────
def draw_points(frame: np.ndarray, points: list) -> np.ndarray:
    """Draw colored circle markers on frame for each selected point."""
    out = frame.copy()
    for i, pt in enumerate(points):
        color = COLORMAP[i % N_POINTS]          # RGB tuple
        bgr   = (color[2], color[1], color[0])  # cv2 uses BGR
        cv2.circle(out, (pt[0], pt[1]), radius=6,
                   color=bgr, thickness=-1)
        cv2.circle(out, (pt[0], pt[1]), radius=6,
                   color=(255, 255, 255), thickness=2)  # white border
        cv2.putText(out, str(i + 1), (pt[0] + 10, pt[1] - 6),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    return out

_SAMPLES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_samples")

# JS injected into gr.Blocks β€” controls download availability on video players
_DOWNLOAD_CTRL_JS = """
(function () {
  const EXAMPLE_IDS = ['video_upload_player', 'out_video_player'];
  const USER_IDS    = ['out_video_player'];

  function applyNoDownload(ids) {
    ids.forEach(function (id) {
      var el = document.getElementById(id);
      if (!el) return;
      el.querySelectorAll('video').forEach(function (v) {
        v.setAttribute('controlsList', 'nodownload');
        v.oncontextmenu = function (e) { e.preventDefault(); };
      });
      el.querySelectorAll('a').forEach(function (a) {
        a.style.cssText = 'display:none!important;pointer-events:none!important';
      });
      el.querySelectorAll('button').forEach(function (btn) {
        var lbl = (btn.getAttribute('aria-label') || btn.getAttribute('title') || '').toLowerCase();
        if (lbl.includes('download') || lbl.includes('save')) {
          btn.style.cssText = 'display:none!important;pointer-events:none!important';
        }
      });
    });
  }

  function clearNoDownload(ids) {
    ids.forEach(function (id) {
      var el = document.getElementById(id);
      if (!el) return;
      el.querySelectorAll('video').forEach(function (v) {
        v.removeAttribute('controlsList');
        v.oncontextmenu = null;
      });
      el.querySelectorAll('a').forEach(function (a) { a.style.cssText = ''; });
      el.querySelectorAll('button').forEach(function (btn) { btn.style.cssText = ''; });
    });
  }

  window._isExampleMode = false;

  function applyCurrentMode() {
    if (window._isExampleMode) applyNoDownload(EXAMPLE_IDS);
    else clearNoDownload(USER_IDS);
  }

  /* Watch both containers for DOM changes (e.g. when video src updates) */
  EXAMPLE_IDS.concat(['out_video_player']).forEach(function (id) {
    (function tryObserve() {
      var el = document.getElementById(id);
      if (!el) { setTimeout(tryObserve, 400); return; }
      new MutationObserver(applyCurrentMode)
        .observe(el, { childList: true, subtree: true });
    })();
  });

  /* Intercept value setter on hidden textbox to receive mode signal from Python */
  function hookTrigger() {
    var container = document.querySelector('#download_ctrl textarea');
    if (!container) { setTimeout(hookTrigger, 300); return; }
    var desc = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value');
    Object.defineProperty(container, 'value', {
      get: function ()  { return desc.get.call(this); },
      set: function (v) {
        desc.set.call(this, v);
        window._isExampleMode = (v === '1');
        applyCurrentMode();
      },
      configurable: true,
    });
  }

  setTimeout(hookTrigger, 500);
})();
"""
# label β†’ (path, is_ood)
EXAMPLE_VIDEOS = {
    "A4C":          (os.path.join(_SAMPLES_DIR, "input1.mp4"),          False),
    "A4C (OOD)":    (os.path.join(_SAMPLES_DIR, "input2.mp4"),          True),
    "RV (OOD)":     (os.path.join(_SAMPLES_DIR, "input3_RV.mp4"),       True),
    "PSAX (OOD)":   (os.path.join(_SAMPLES_DIR, "psax_video_crop.mp4"), True),
}

def _get_thumbnail(video_path: str) -> np.ndarray | None:
    """Extract a single frame near the middle of the video for use as a thumbnail."""
    try:
        cap = cv2.VideoCapture(video_path)
        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, int(total * 0.4)))
        ret, frame = cap.read()
        cap.release()
        if ret:
            return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    except Exception:
        pass
    return None

THUMBNAILS = {label: _get_thumbnail(path) for label, (path, _) in EXAMPLE_VIDEOS.items()}

# ── Gradio event handlers ─────────────────────────────────────────────────────
def on_video_upload(video_path):
    """Called when video is uploaded β€” jump to 72% frame."""
    if video_path is None:
        # return None, gr.update(value=0, maximum=0, interactive=False), "No video loaded.", []
        return None

    total  = get_total_frames(video_path)
    idx_72 = int(total * 0.72)

    frame  = get_frame(video_path, idx_72)
    #drawn  = draw_points(frame, select_points)

    frame_display_update = gr.update(
        value=frame,
        interactive=True, # enables click events via gr.SelectData
    )

    slider_update = gr.update(
        value=idx_72,
        minimum=0,
        maximum=total - 1,
        step=1,
        interactive=True,
        label=f"Frame selector  (total: {total} frames)"
    )

    select_points.clear()   # clear any existing points when new video is loaded

    status = f"πŸ“Ή Loaded β€” {total} frames | 🎞️ Showing frame {idx_72} (72%)"
    # last value resets the download-control style (user upload β†’ downloads allowed)
    return frame_display_update, slider_update, status, video_path, ""

def load_example(video_path):
    """Load an example video, reset all output/selection fields, and hide downloads."""
    frame_upd, slider_upd, status, state, _ = on_video_upload(video_path)
    return (
        gr.update(value=video_path),   # video_upload
        frame_upd,                     # frame_display
        slider_upd,                    # frame_slider
        status,                        # status_text
        state,                         # video_state
        gr.update(value=None),         # out_video  β€” clear previous result
        gr.update(value="No points selected yet."),  # points_display
        "1",                           # download_ctrl β€” disable downloads
    )

def on_slider_release(frame_idx, video_path, points_display):
    """Called when slider is released β€” show new frame, keep existing points."""
    if video_path is None:
        return None, "No video loaded.", points_display
    frame  = get_frame(video_path, int(frame_idx))
    select_points.clear()   # clear any existing points when new video is loaded
    #print(f"Selected point: {select_points}")
    points_display = gr.update(
                value="No points selected yet.",
                label="πŸ“‹ Selected Points",
                lines=5,
                interactive=False,
            )
    #drawn  = draw_points(frame, select_points)
    status = f"🎞️ Showing Frame {int(frame_idx)} ({int(frame_idx) / get_total_frames(video_path) * 100:.1f}%) | {len(select_points)} point(s) selected"
    return frame, status, points_display

def on_point_select(frame_idx, video_path, evt: gr.SelectData):
    """Called when user clicks on the image β€” add point, redraw."""
    if video_path is None:
        return None, "Upload a video first.", format_points()

    if len(select_points) >= N_POINTS:
        status = f"⚠️ Max {N_POINTS} points reached."
        frame  = get_frame(video_path, int(frame_idx))
        return draw_points(frame, select_points), status, format_points()

    x, y = int(evt.index[0]), int(evt.index[1])
    select_points.append(np.array([x, y]))

    #print(f"Selected point: {select_points}")

    frame  = get_frame(video_path, int(frame_idx))
    drawn  = draw_points(frame, select_points)
    status = f"βœ… Point {len(select_points)} added at ({x}, {y}) | Frame {int(frame_idx)}"
    return drawn, status, format_points()

def on_clear_points(frame_idx, video_path):
    """Clear all selected points."""
    select_points.clear()
    if video_path is None:
        return None, "Points cleared.", format_points()
    frame = get_frame(video_path, int(frame_idx))
    return draw_points(frame, select_points), "πŸ—‘οΈ All points cleared.", format_points()

def on_undo_point(frame_idx, video_path):
    """Remove last selected point."""
    if select_points:
        removed = select_points.pop()
        msg = f"↩️ Removed point at ({removed[0]}, {removed[1]})"
    else:
        msg = "No points to undo."
    if video_path is None:
        return None, msg, format_points()
    frame = get_frame(video_path, int(frame_idx))
    return draw_points(frame, select_points), msg, format_points()

def format_points():
    """Format select_points for display in the textbox."""
    if not select_points:
        return "No points selected yet."
    lines = [f"  [{i+1}]  x={p[0]},  y={p[1]}" for i, p in enumerate(select_points)]
    return "select_points:\n" + "\n".join(lines)

def track(video_path, frame_idx, out_video, target_size=(256, 256)):
    """Placeholder for tracking function β€” replace with your actual tracking logic."""
    if video_path is None:
        status = f"⚠️ No video loaded. Cannot run the tracker."
        return status
    if len(select_points) < 1:
        status = f"⚠️ No points selected. Please select at least one point to track."
        return status
    
    tracker, device = load_model("echotracker_cvamd_ts.pt")

    cap     = cv2.VideoCapture(video_path)
    W       = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H       = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    frames = []
    paint_frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        paint_frames.append(frame)
        frame = cv2.resize(frame, target_size)
        frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)))
    cap.release()
    paint_frames = np.array(paint_frames)
    frames = torch.from_numpy(np.array(frames)).unsqueeze(0).unsqueeze(2).float().to(device)  # shape: [B, T, H, W]
    q_points = points_to_tensor(select_points, frame_idx, H, W, 256).to(device)  # shape: [1, N, 3]
    #print(f"βœ… Loaded video frames: {frames.shape} {paint_frames.shape}")
    # print(f"Selected points: {q_points.shape}")
    
    with torch.no_grad():
        output = tracker(frames, q_points)
        trajs_e = output[-1].cpu().permute(0, 2, 1, 3)
    
    q_points[...,1] /= 256 - 1
    q_points[...,2] /= 256 - 1
    trajs_e[...,0] /= 256 - 1
    trajs_e[...,1] /= 256 - 1
    #print(f"Tracker output trajectories: {trajs_e.shape}")
    paint_frames = visualize_tracking(
        frames=paint_frames, points=trajs_e.squeeze().cpu().numpy(), 
        vis_color='random',
        thickness=5,
        track_length=30,
    )
    # Save or display paint_frames as needed (e.g., save as video or show in Gradio)
    out_vid = "outputs/output.mp4"
    os.makedirs("outputs", exist_ok=True)
    media.write_video(out_vid, paint_frames, fps=25)
    status = f"βœ… Tracking completed! The output is visualized below."
    out_video = gr.update(value=out_vid, autoplay=True, loop=True)
    return out_video, status
    

def load_model(model_path: str, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
    """Load a torchscript model

    Args:
        model_path (str): path to the torchscript weights
        device (str, optional): Defaults to "cuda" if torch.cuda.is_available() else "cpu".

    Returns:
        model: the loaded torchscript model
    """
    model = torch.jit.load(model_path, map_location=device).eval()
    #print(f"βœ… TorchScript model loaded on {device}")
    return model, device


# ── Gradio UI ─────────────────────────────────────────────────────────────────
HEADER = """
<div style="text-align:center; padding: 20px 0 8px;">
  <h1 style="font-size:2.2rem; font-weight:700; margin-bottom:4px;">πŸ«€ EchoTracker</h1>
  <p style="font-size:1.05rem; color:var(--echo-muted); margin:4px 0 0;">
    Advancing Myocardial Point Tracking in Echocardiography
  </p>
  <p style="font-size:0.9rem; color:var(--echo-subtle); margin:2px 0 0;">
    MICCAI 2024 &nbsp;Β·&nbsp;
    Azad, Chernyshov, Nyberg, Tveten, Lovstakken, Dalen, Grenne, Østvik
  </p>
  <p style="font-size:0.9rem; color:var(--echo-subtle); margin:4px 0 0;">
    Model weights from: <em>Taming Modern Point Tracking for Speckle Tracking Echocardiography via Impartial Motion</em>
    &nbsp;Β·&nbsp; ICCV 2025 Workshop &nbsp;Β·&nbsp;
    Azad, Nyberg, Dalen, Grenne, Lovstakken, Østvik
  </p>
  <div style="margin-top:12px; display:flex; justify-content:center; gap:10px; flex-wrap:wrap;">
    <a href="https://link.springer.com/chapter/10.1007/978-3-031-72083-3_60"
       target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#2563eb;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      πŸ“„ Paper (MICCAI 2024)
    </a>
    <a href="https://openaccess.thecvf.com/content/ICCV2025W/CVAMD/papers/Azad_Taming_Modern_Point_Tracking_for_Speckle_Tracking_Echocardiography_via_Impartial_CVAMD_2025_paper.pdf"
       target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#2563eb;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      πŸ“„ Paper (ICCV 2025 Workshop)
    </a>
    <a href="https://arxiv.org/abs/2405.08587" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#dc2626;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      πŸ“ ArXiv (EchoTracker)
    </a>
    <a href="https://arxiv.org/abs/2507.10127" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#dc2626;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      πŸ“ ArXiv (Taming)
    </a>
    <a href="https://github.com/riponazad/echotracker" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#1f2937;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      πŸ’» GitHub
    </a>
    <a href="https://riponazad.github.io/echotracker/" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#7c3aed;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      🌐 Project Page
    </a>
  </div>
</div>
"""

CITATION_MD = """
If you use EchoTracker or the model weights in this demo, please cite both papers:

```bibtex
@InProceedings{azad2024echotracker,
    author    = {Azad, Md Abulkalam and Chernyshov, Artem and Nyberg, John
                 and Tveten, Ingrid and Lovstakken, Lasse and Dalen, H{\\aa}vard
                 and Grenne, Bj{\\o}rnar and {\\O}stvik, Andreas},
    title     = {EchoTracker: Advancing Myocardial Point Tracking in Echocardiography},
    booktitle = {Medical Image Computing and Computer Assisted Intervention -- MICCAI 2024},
    year      = {2024},
    publisher = {Springer Nature Switzerland},
    doi       = {10.1007/978-3-031-72083-3_60}
}

@InProceedings{Azad_2025_ICCV,
    author    = {Azad, Md Abulkalam and Nyberg, John and Dalen, H{\\aa}vard
                 and Grenne, Bj{\\o}rnar and Lovstakken, Lasse and {\\O}stvik, Andreas},
    title     = {Taming Modern Point Tracking for Speckle Tracking Echocardiography via Impartial Motion},
    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
    month     = {October},
    year      = {2025},
    pages     = {1115--1124}
}
```
"""

with gr.Blocks(title="EchoTracker", theme=gr.themes.Soft(),
               css="""
.gr-button { font-weight: 600; }
:root  { --echo-muted: #444; --echo-subtle: #666; }
.dark  { --echo-muted: #c0c0c0; --echo-subtle: #a8a8a8; }
""",
               js=_DOWNLOAD_CTRL_JS) as demo:

    gr.HTML(HEADER)
    gr.Markdown("---")

    # ── Instructions ──────────────────────────────────────────────────────────
    with gr.Accordion("ℹ️  How to use", open=False):
        gr.Markdown("""
1. **Load a video** β€” upload your own echocardiography clip, or click one of the provided example videos below the panel.
2. **Navigate** to the desired query frame using the frame slider.
3. **Click** on the frame image to place tracking points on cardiac tissue surfaces (e.g. LV/RV walls, myocardium).
4. Use **Undo** or **Clear All** to adjust your selection.
5. Press **β–Ά Run EchoTracker** to generate tracked trajectories for all selected points.

> **Tip:** Select points at the *end-diastolic* frame for best results. Up to 100 points are supported.
> Example clips cover apical 4-chamber (A4C), right-ventricle (RV), and parasternal short-axis (PSAX) views.
> Clips marked **OOD** (πŸ”Ά) are out-of-distribution β€” different scanner or view not seen during training, showcasing EchoTracker's generalisation ability.
        """)

    # hidden state
    video_state  = gr.State(value=None)
    # injects/removes CSS that hides download buttons on example videos
    download_ctrl = gr.Textbox(value="0", visible=False, elem_id="download_ctrl")

    gr.Markdown("### Step 1 β€” Upload & Select Query Points")
    gr.Markdown(
        "Upload your own echocardiography video, or click one of the **example clips** below to get started."
    )

    with gr.Row(equal_height=False):
        # ── Left column: input + points ───────────────────────────────────────
        with gr.Column(scale=1, min_width=300):
            video_upload = gr.Video(
                label="Echocardiography Video β€” upload yours or use an example below",
                sources="upload",
                include_audio=False,
                autoplay=True,
                loop=True,
                elem_id="video_upload_player",
            )
            points_display = gr.Textbox(
                value="No points selected yet.",
                label="πŸ“‹ Selected Query Points",
                lines=5,
                max_lines=5,
                interactive=False,
            )
            gr.Markdown(
                "<small style='color:var(--echo-subtle)'>Coordinates are stored as "
                "<code>np.array([x, y])</code> and passed to the tracker.</small>"
            )

        # ── Right column: frame viewer + controls ─────────────────────────────
        with gr.Column(scale=2, min_width=400):
            frame_display = gr.Image(
                label="Query Frame β€” click to place tracking points",
                interactive=True,
                type="numpy",
                sources=[],
            )
            frame_slider = gr.Slider(
                minimum=0, maximum=100, value=0, step=1,
                label="Frame",
                interactive=False,
            )
            status_text = gr.Textbox(
                label="Status", lines=1, interactive=False, show_label=False,
                placeholder="Status messages will appear here…",
            )
            with gr.Row():
                undo_btn  = gr.Button("↩ Undo", scale=1)
                clear_btn = gr.Button("πŸ—‘ Clear All", variant="stop", scale=1)

    gr.Markdown("---")
    gr.Markdown("### Step 2 β€” Run Tracker & View Output")
    with gr.Row():
        with gr.Column(scale=1):
            run_btn = gr.Button("β–Ά  Run EchoTracker", variant="primary", size="lg")
        with gr.Column(scale=2):
            out_video = gr.Video(
                label="Tracking Output",
                sources=[],
                include_audio=False,
                interactive=False,
                autoplay=True,
                loop=True,
                elem_id="out_video_player",
            )

    gr.Markdown("---")

    gr.Markdown(
        "**Or try an example clip** "
        "<small style='color:var(--echo-subtle)'>β€” OOD = out-of-distribution (different scanner / view not seen during training)</small>"
    )
    gr.Markdown(
        "> ⚠️ **Example videos are provided for demonstration purposes only. "
        "They should not be downloaded, reproduced, or used for any purpose outside this demo.**"
    )
    ex_btns = []
    with gr.Row(equal_height=True):
        for label, (path, is_ood) in EXAMPLE_VIDEOS.items():
            with gr.Column(min_width=120):
                gr.Image(
                    value=THUMBNAILS[label],
                    show_label=False,
                    interactive=False,
                    height=110,
                    container=False,
                )
                btn_label = f"{label} πŸ”Ά" if is_ood else label
                ex_btns.append(gr.Button(btn_label, size="sm"))

    # ── Like request ──────────────────────────────────────────────────────────
    gr.Markdown(
        "<div style='text-align:center; padding: 8px 0;'>"
        "If you find this demo useful, please click the ❀️ <b>Like</b> button at the top of this Space β€” "
        "it helps others discover this work and supports open research in cardiac image analysis."
        "</div>"
    )

    # ── Citation ──────────────────────────────────────────────────────────────
    with gr.Accordion("πŸ“  Citation", open=False):
        gr.Markdown(CITATION_MD)

    # ── Wire events ───────────────────────────────────────────────────────────
    video_upload.upload(
        fn=on_video_upload,
        inputs=[video_upload],
        outputs=[frame_display, frame_slider, status_text, video_state, download_ctrl]
    )

    frame_slider.release(
        fn=on_slider_release,
        inputs=[frame_slider, video_state, points_display],
        outputs=[frame_display, status_text, points_display]
    )

    frame_display.select(
        fn=on_point_select,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    undo_btn.click(
        fn=on_undo_point,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    clear_btn.click(
        fn=on_clear_points,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    for btn, (path, _) in zip(ex_btns, EXAMPLE_VIDEOS.values()):
        btn.click(
            fn=load_example,
            inputs=gr.State(path),
            outputs=[video_upload, frame_display, frame_slider, status_text, video_state,
                     out_video, points_display, download_ctrl]
        )

    run_btn.click(
        fn=track,
        inputs=[video_state, frame_slider, out_video],
        outputs=[out_video, status_text]
    )

demo.launch(share=False)