Spaces:

riponazad
/

echotracker

Running

File size: 25,957 Bytes

import gradio as gr
import os
import torch
import cv2
import numpy as np
import random
from PIL import Image
from utils import points_to_tensor
from utils import visualize_tracking
import mediapy as media

# ── Colormap (matches your viz_utils.get_colors logic) ───────────────────────
def get_colors(n):
    """Generate n random but unique colors in RGB 0-255."""
    random.seed(42)  # remove this line if you want different colors each run
    
    # Spread hues evenly across 0-179 (HSV in OpenCV), then shuffle
    hues = list(range(0, 180, max(1, 180 // n)))[:n]
    random.shuffle(hues)
    
    colors = []
    for hue in hues:
        # Randomize saturation and value slightly for more visual variety
        sat = random.randint(180, 255)
        val = random.randint(180, 255)
        hsv = np.uint8([[[hue, sat, val]]])
        rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)[0][0]
        colors.append(tuple(int(c) for c in rgb))
    
    return colors

N_POINTS  = 100
COLORMAP  = get_colors(N_POINTS)
select_points = []   # will hold np.array([x, y]) entries

# ── Video helpers ─────────────────────────────────────────────────────────────
def get_frame(video_path: str, frame_idx: int) -> np.ndarray:
    """Extract a single frame from video by index."""
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
    ret, frame = cap.read()
    cap.release()
    if not ret:
        raise ValueError(f"Could not read frame {frame_idx}")
    return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

def get_total_frames(video_path: str) -> int:
    cap = cv2.VideoCapture(video_path)
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return total

# ── Draw points on frame ──────────────────────────────────────────────────────
def draw_points(frame: np.ndarray, points: list) -> np.ndarray:
    """Draw colored circle markers on frame for each selected point."""
    out = frame.copy()
    for i, pt in enumerate(points):
        color = COLORMAP[i % N_POINTS]          # RGB tuple
        bgr   = (color[2], color[1], color[0])  # cv2 uses BGR
        cv2.circle(out, (pt[0], pt[1]), radius=6,
                   color=bgr, thickness=-1)
        cv2.circle(out, (pt[0], pt[1]), radius=6,
                   color=(255, 255, 255), thickness=2)  # white border
        cv2.putText(out, str(i + 1), (pt[0] + 10, pt[1] - 6),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    return out

_SAMPLES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example_samples")

# JS injected into gr.Blocks — controls download availability on video players
_DOWNLOAD_CTRL_JS = """
(function () {
  const EXAMPLE_IDS = ['video_upload_player', 'out_video_player'];
  const USER_IDS    = ['out_video_player'];

  function applyNoDownload(ids) {
    ids.forEach(function (id) {
      var el = document.getElementById(id);
      if (!el) return;
      el.querySelectorAll('video').forEach(function (v) {
        v.setAttribute('controlsList', 'nodownload');
        v.oncontextmenu = function (e) { e.preventDefault(); };
      });
      el.querySelectorAll('a').forEach(function (a) {
        a.style.cssText = 'display:none!important;pointer-events:none!important';
      });
      el.querySelectorAll('button').forEach(function (btn) {
        var lbl = (btn.getAttribute('aria-label') || btn.getAttribute('title') || '').toLowerCase();
        if (lbl.includes('download') || lbl.includes('save')) {
          btn.style.cssText = 'display:none!important;pointer-events:none!important';
        }
      });
    });
  }

  function clearNoDownload(ids) {
    ids.forEach(function (id) {
      var el = document.getElementById(id);
      if (!el) return;
      el.querySelectorAll('video').forEach(function (v) {
        v.removeAttribute('controlsList');
        v.oncontextmenu = null;
      });
      el.querySelectorAll('a').forEach(function (a) { a.style.cssText = ''; });
      el.querySelectorAll('button').forEach(function (btn) { btn.style.cssText = ''; });
    });
  }

  window._isExampleMode = false;

  function applyCurrentMode() {
    if (window._isExampleMode) applyNoDownload(EXAMPLE_IDS);
    else clearNoDownload(USER_IDS);
  }

  /* Watch both containers for DOM changes (e.g. when video src updates) */
  EXAMPLE_IDS.concat(['out_video_player']).forEach(function (id) {
    (function tryObserve() {
      var el = document.getElementById(id);
      if (!el) { setTimeout(tryObserve, 400); return; }
      new MutationObserver(applyCurrentMode)
        .observe(el, { childList: true, subtree: true });
    })();
  });

  /* Intercept value setter on hidden textbox to receive mode signal from Python */
  function hookTrigger() {
    var container = document.querySelector('#download_ctrl textarea');
    if (!container) { setTimeout(hookTrigger, 300); return; }
    var desc = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value');
    Object.defineProperty(container, 'value', {
      get: function ()  { return desc.get.call(this); },
      set: function (v) {
        desc.set.call(this, v);
        window._isExampleMode = (v === '1');
        applyCurrentMode();
      },
      configurable: true,
    });
  }

  setTimeout(hookTrigger, 500);
})();
"""
# label → (path, is_ood)
EXAMPLE_VIDEOS = {
    "A4C":          (os.path.join(_SAMPLES_DIR, "input1.mp4"),          False),
    "A4C (OOD)":    (os.path.join(_SAMPLES_DIR, "input2.mp4"),          True),
    "RV (OOD)":     (os.path.join(_SAMPLES_DIR, "input3_RV.mp4"),       True),
    "PSAX (OOD)":   (os.path.join(_SAMPLES_DIR, "psax_video_crop.mp4"), True),
}

def _get_thumbnail(video_path: str) -> np.ndarray | None:
    """Extract a single frame near the middle of the video for use as a thumbnail."""
    try:
        cap = cv2.VideoCapture(video_path)
        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, int(total * 0.4)))
        ret, frame = cap.read()
        cap.release()
        if ret:
            return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    except Exception:
        pass
    return None

THUMBNAILS = {label: _get_thumbnail(path) for label, (path, _) in EXAMPLE_VIDEOS.items()}

# ── Gradio event handlers ─────────────────────────────────────────────────────
def on_video_upload(video_path):
    """Called when video is uploaded — jump to 72% frame."""
    if video_path is None:
        # return None, gr.update(value=0, maximum=0, interactive=False), "No video loaded.", []
        return None

    total  = get_total_frames(video_path)
    idx_72 = int(total * 0.72)

    frame  = get_frame(video_path, idx_72)
    #drawn  = draw_points(frame, select_points)

    frame_display_update = gr.update(
        value=frame,
        interactive=True, # enables click events via gr.SelectData
    )

    slider_update = gr.update(
        value=idx_72,
        minimum=0,
        maximum=total - 1,
        step=1,
        interactive=True,
        label=f"Frame selector  (total: {total} frames)"
    )

    select_points.clear()   # clear any existing points when new video is loaded

    status = f"📹 Loaded — {total} frames | 🎞️ Showing frame {idx_72} (72%)"
    # last value resets the download-control style (user upload → downloads allowed)
    return frame_display_update, slider_update, status, video_path, ""

def load_example(video_path):
    """Load an example video, reset all output/selection fields, and hide downloads."""
    frame_upd, slider_upd, status, state, _ = on_video_upload(video_path)
    return (
        gr.update(value=video_path),   # video_upload
        frame_upd,                     # frame_display
        slider_upd,                    # frame_slider
        status,                        # status_text
        state,                         # video_state
        gr.update(value=None),         # out_video  — clear previous result
        gr.update(value="No points selected yet."),  # points_display
        "1",                           # download_ctrl — disable downloads
    )

def on_slider_release(frame_idx, video_path, points_display):
    """Called when slider is released — show new frame, keep existing points."""
    if video_path is None:
        return None, "No video loaded.", points_display
    frame  = get_frame(video_path, int(frame_idx))
    select_points.clear()   # clear any existing points when new video is loaded
    #print(f"Selected point: {select_points}")
    points_display = gr.update(
                value="No points selected yet.",
                label="📋 Selected Points",
                lines=5,
                interactive=False,
            )
    #drawn  = draw_points(frame, select_points)
    status = f"🎞️ Showing Frame {int(frame_idx)} ({int(frame_idx) / get_total_frames(video_path) * 100:.1f}%) | {len(select_points)} point(s) selected"
    return frame, status, points_display

def on_point_select(frame_idx, video_path, evt: gr.SelectData):
    """Called when user clicks on the image — add point, redraw."""
    if video_path is None:
        return None, "Upload a video first.", format_points()

    if len(select_points) >= N_POINTS:
        status = f"⚠️ Max {N_POINTS} points reached."
        frame  = get_frame(video_path, int(frame_idx))
        return draw_points(frame, select_points), status, format_points()

    x, y = int(evt.index[0]), int(evt.index[1])
    select_points.append(np.array([x, y]))

    #print(f"Selected point: {select_points}")

    frame  = get_frame(video_path, int(frame_idx))
    drawn  = draw_points(frame, select_points)
    status = f"✅ Point {len(select_points)} added at ({x}, {y}) | Frame {int(frame_idx)}"
    return drawn, status, format_points()

def on_clear_points(frame_idx, video_path):
    """Clear all selected points."""
    select_points.clear()
    if video_path is None:
        return None, "Points cleared.", format_points()
    frame = get_frame(video_path, int(frame_idx))
    return draw_points(frame, select_points), "🗑️ All points cleared.", format_points()

def on_undo_point(frame_idx, video_path):
    """Remove last selected point."""
    if select_points:
        removed = select_points.pop()
        msg = f"↩️ Removed point at ({removed[0]}, {removed[1]})"
    else:
        msg = "No points to undo."
    if video_path is None:
        return None, msg, format_points()
    frame = get_frame(video_path, int(frame_idx))
    return draw_points(frame, select_points), msg, format_points()

def format_points():
    """Format select_points for display in the textbox."""
    if not select_points:
        return "No points selected yet."
    lines = [f"  [{i+1}]  x={p[0]},  y={p[1]}" for i, p in enumerate(select_points)]
    return "select_points:\n" + "\n".join(lines)

def track(video_path, frame_idx, out_video, target_size=(256, 256)):
    """Placeholder for tracking function — replace with your actual tracking logic."""
    if video_path is None:
        status = f"⚠️ No video loaded. Cannot run the tracker."
        return status
    if len(select_points) < 1:
        status = f"⚠️ No points selected. Please select at least one point to track."
        return status
    
    tracker, device = load_model("echotracker_cvamd_ts.pt")

    cap     = cv2.VideoCapture(video_path)
    W       = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H       = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    frames = []
    paint_frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        paint_frames.append(frame)
        frame = cv2.resize(frame, target_size)
        frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)))
    cap.release()
    paint_frames = np.array(paint_frames)
    frames = torch.from_numpy(np.array(frames)).unsqueeze(0).unsqueeze(2).float().to(device)  # shape: [B, T, H, W]
    q_points = points_to_tensor(select_points, frame_idx, H, W, 256).to(device)  # shape: [1, N, 3]
    #print(f"✅ Loaded video frames: {frames.shape} {paint_frames.shape}")
    # print(f"Selected points: {q_points.shape}")
    
    with torch.no_grad():
        output = tracker(frames, q_points)
        trajs_e = output[-1].cpu().permute(0, 2, 1, 3)
    
    q_points[...,1] /= 256 - 1
    q_points[...,2] /= 256 - 1
    trajs_e[...,0] /= 256 - 1
    trajs_e[...,1] /= 256 - 1
    #print(f"Tracker output trajectories: {trajs_e.shape}")
    paint_frames = visualize_tracking(
        frames=paint_frames, points=trajs_e.squeeze().cpu().numpy(), 
        vis_color='random',
        thickness=5,
        track_length=30,
    )
    # Save or display paint_frames as needed (e.g., save as video or show in Gradio)
    out_vid = "outputs/output.mp4"
    os.makedirs("outputs", exist_ok=True)
    media.write_video(out_vid, paint_frames, fps=25)
    status = f"✅ Tracking completed! The output is visualized below."
    out_video = gr.update(value=out_vid, autoplay=True, loop=True)
    return out_video, status
    

def load_model(model_path: str, device: str = "cuda" if torch.cuda.is_available() else "cpu"):
    """Load a torchscript model

    Args:
        model_path (str): path to the torchscript weights
        device (str, optional): Defaults to "cuda" if torch.cuda.is_available() else "cpu".

    Returns:
        model: the loaded torchscript model
    """
    model = torch.jit.load(model_path, map_location=device).eval()
    #print(f"✅ TorchScript model loaded on {device}")
    return model, device


# ── Gradio UI ─────────────────────────────────────────────────────────────────
HEADER = """
<div style="text-align:center; padding: 20px 0 8px;">
  <h1 style="font-size:2.2rem; font-weight:700; margin-bottom:4px;">🫀 EchoTracker</h1>
  <p style="font-size:1.05rem; color:var(--echo-muted); margin:4px 0 0;">
    Advancing Myocardial Point Tracking in Echocardiography
  </p>
  <p style="font-size:0.9rem; color:var(--echo-subtle); margin:2px 0 0;">
    MICCAI 2024 &nbsp;·&nbsp;
    Azad, Chernyshov, Nyberg, Tveten, Lovstakken, Dalen, Grenne, Østvik
  </p>
  <p style="font-size:0.9rem; color:var(--echo-subtle); margin:4px 0 0;">
    Model weights from: <em>Taming Modern Point Tracking for Speckle Tracking Echocardiography via Impartial Motion</em>
    &nbsp;·&nbsp; ICCV 2025 Workshop &nbsp;·&nbsp;
    Azad, Nyberg, Dalen, Grenne, Lovstakken, Østvik
  </p>
  <div style="margin-top:12px; display:flex; justify-content:center; gap:10px; flex-wrap:wrap;">
    <a href="https://link.springer.com/chapter/10.1007/978-3-031-72083-3_60"
       target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#2563eb;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      📄 Paper (MICCAI 2024)
    </a>
    <a href="https://openaccess.thecvf.com/content/ICCV2025W/CVAMD/papers/Azad_Taming_Modern_Point_Tracking_for_Speckle_Tracking_Echocardiography_via_Impartial_CVAMD_2025_paper.pdf"
       target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#2563eb;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      📄 Paper (ICCV 2025 Workshop)
    </a>
    <a href="https://arxiv.org/abs/2405.08587" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#dc2626;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      📝 ArXiv (EchoTracker)
    </a>
    <a href="https://arxiv.org/abs/2507.10127" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#dc2626;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      📝 ArXiv (Taming)
    </a>
    <a href="https://github.com/riponazad/echotracker" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#1f2937;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      💻 GitHub
    </a>
    <a href="https://riponazad.github.io/echotracker/" target="_blank"
       style="display:inline-flex;align-items:center;gap:5px;padding:5px 14px;border-radius:6px;
              background:#7c3aed;color:white;font-size:0.85rem;text-decoration:none;font-weight:500;">
      🌐 Project Page
    </a>
  </div>
</div>
"""

CITATION_MD = """
If you use EchoTracker or the model weights in this demo, please cite both papers:

```bibtex
@InProceedings{azad2024echotracker,
    author    = {Azad, Md Abulkalam and Chernyshov, Artem and Nyberg, John
                 and Tveten, Ingrid and Lovstakken, Lasse and Dalen, H{\\aa}vard
                 and Grenne, Bj{\\o}rnar and {\\O}stvik, Andreas},
    title     = {EchoTracker: Advancing Myocardial Point Tracking in Echocardiography},
    booktitle = {Medical Image Computing and Computer Assisted Intervention -- MICCAI 2024},
    year      = {2024},
    publisher = {Springer Nature Switzerland},
    doi       = {10.1007/978-3-031-72083-3_60}
}

@InProceedings{Azad_2025_ICCV,
    author    = {Azad, Md Abulkalam and Nyberg, John and Dalen, H{\\aa}vard
                 and Grenne, Bj{\\o}rnar and Lovstakken, Lasse and {\\O}stvik, Andreas},
    title     = {Taming Modern Point Tracking for Speckle Tracking Echocardiography via Impartial Motion},
    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV) Workshops},
    month     = {October},
    year      = {2025},
    pages     = {1115--1124}
}
```
"""

with gr.Blocks(title="EchoTracker", theme=gr.themes.Soft(),
               css="""
.gr-button { font-weight: 600; }
:root  { --echo-muted: #444; --echo-subtle: #666; }
.dark  { --echo-muted: #c0c0c0; --echo-subtle: #a8a8a8; }
""",
               js=_DOWNLOAD_CTRL_JS) as demo:

    gr.HTML(HEADER)
    gr.Markdown("---")

    # ── Instructions ──────────────────────────────────────────────────────────
    with gr.Accordion("ℹ️  How to use", open=False):
        gr.Markdown("""
1. **Load a video** — upload your own echocardiography clip, or click one of the provided example videos below the panel.
2. **Navigate** to the desired query frame using the frame slider.
3. **Click** on the frame image to place tracking points on cardiac tissue surfaces (e.g. LV/RV walls, myocardium).
4. Use **Undo** or **Clear All** to adjust your selection.
5. Press **▶ Run EchoTracker** to generate tracked trajectories for all selected points.

> **Tip:** Select points at the *end-diastolic* frame for best results. Up to 100 points are supported.
> Example clips cover apical 4-chamber (A4C), right-ventricle (RV), and parasternal short-axis (PSAX) views.
> Clips marked **OOD** (🔶) are out-of-distribution — different scanner or view not seen during training, showcasing EchoTracker's generalisation ability.
        """)

    # hidden state
    video_state  = gr.State(value=None)
    # injects/removes CSS that hides download buttons on example videos
    download_ctrl = gr.Textbox(value="0", visible=False, elem_id="download_ctrl")

    gr.Markdown("### Step 1 — Upload & Select Query Points")
    gr.Markdown(
        "Upload your own echocardiography video, or click one of the **example clips** below to get started."
    )

    with gr.Row(equal_height=False):
        # ── Left column: input + points ───────────────────────────────────────
        with gr.Column(scale=1, min_width=300):
            video_upload = gr.Video(
                label="Echocardiography Video — upload yours or use an example below",
                sources="upload",
                include_audio=False,
                autoplay=True,
                loop=True,
                elem_id="video_upload_player",
            )
            points_display = gr.Textbox(
                value="No points selected yet.",
                label="📋 Selected Query Points",
                lines=5,
                max_lines=5,
                interactive=False,
            )
            gr.Markdown(
                "<small style='color:var(--echo-subtle)'>Coordinates are stored as "
                "<code>np.array([x, y])</code> and passed to the tracker.</small>"
            )

        # ── Right column: frame viewer + controls ─────────────────────────────
        with gr.Column(scale=2, min_width=400):
            frame_display = gr.Image(
                label="Query Frame — click to place tracking points",
                interactive=True,
                type="numpy",
                sources=[],
            )
            frame_slider = gr.Slider(
                minimum=0, maximum=100, value=0, step=1,
                label="Frame",
                interactive=False,
            )
            status_text = gr.Textbox(
                label="Status", lines=1, interactive=False, show_label=False,
                placeholder="Status messages will appear here…",
            )
            with gr.Row():
                undo_btn  = gr.Button("↩ Undo", scale=1)
                clear_btn = gr.Button("🗑 Clear All", variant="stop", scale=1)

    gr.Markdown("---")
    gr.Markdown("### Step 2 — Run Tracker & View Output")
    with gr.Row():
        with gr.Column(scale=1):
            run_btn = gr.Button("▶  Run EchoTracker", variant="primary", size="lg")
        with gr.Column(scale=2):
            out_video = gr.Video(
                label="Tracking Output",
                sources=[],
                include_audio=False,
                interactive=False,
                autoplay=True,
                loop=True,
                elem_id="out_video_player",
            )

    gr.Markdown("---")

    gr.Markdown(
        "**Or try an example clip** "
        "<small style='color:var(--echo-subtle)'>— OOD = out-of-distribution (different scanner / view not seen during training)</small>"
    )
    gr.Markdown(
        "> ⚠️ **Example videos are provided for demonstration purposes only. "
        "They should not be downloaded, reproduced, or used for any purpose outside this demo.**"
    )
    ex_btns = []
    with gr.Row(equal_height=True):
        for label, (path, is_ood) in EXAMPLE_VIDEOS.items():
            with gr.Column(min_width=120):
                gr.Image(
                    value=THUMBNAILS[label],
                    show_label=False,
                    interactive=False,
                    height=110,
                    container=False,
                )
                btn_label = f"{label} 🔶" if is_ood else label
                ex_btns.append(gr.Button(btn_label, size="sm"))

    # ── Like request ──────────────────────────────────────────────────────────
    gr.Markdown(
        "<div style='text-align:center; padding: 8px 0;'>"
        "If you find this demo useful, please click the ❤️ <b>Like</b> button at the top of this Space — "
        "it helps others discover this work and supports open research in cardiac image analysis."
        "</div>"
    )

    # ── Citation ──────────────────────────────────────────────────────────────
    with gr.Accordion("📝  Citation", open=False):
        gr.Markdown(CITATION_MD)

    # ── Wire events ───────────────────────────────────────────────────────────
    video_upload.upload(
        fn=on_video_upload,
        inputs=[video_upload],
        outputs=[frame_display, frame_slider, status_text, video_state, download_ctrl]
    )

    frame_slider.release(
        fn=on_slider_release,
        inputs=[frame_slider, video_state, points_display],
        outputs=[frame_display, status_text, points_display]
    )

    frame_display.select(
        fn=on_point_select,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    undo_btn.click(
        fn=on_undo_point,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    clear_btn.click(
        fn=on_clear_points,
        inputs=[frame_slider, video_state],
        outputs=[frame_display, status_text, points_display]
    )

    for btn, (path, _) in zip(ex_btns, EXAMPLE_VIDEOS.values()):
        btn.click(
            fn=load_example,
            inputs=gr.State(path),
            outputs=[video_upload, frame_display, frame_slider, status_text, video_state,
                     out_video, points_display, download_ctrl]
        )

    run_btn.click(
        fn=track,
        inputs=[video_state, frame_slider, out_video],
        outputs=[out_video, status_text]
    )

demo.launch(share=False)