Spaces:

robometer
/

rewardeval_ui

Running

App Files Files Community

Anthony Liang commited on Jan 6

Commit

38f9df5

1 Parent(s): ad49410

update

Browse files

Files changed (1) hide show

app.py +458 -134

app.py CHANGED Viewed

@@ -23,13 +23,11 @@ matplotlib.use("Agg")  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import numpy as np
 import requests
-from PIL import Image
-import decord
 from typing import Any, Optional, Tuple
 from rfm.data.dataset_types import Trajectory, ProgressSample, PreferenceSample
 from rfm.evals.eval_utils import build_payload, post_batch_npy
-from rfm.evals.eval_viz_utils import create_combined_progress_success_plot
 from datasets import load_dataset as load_dataset_hf, get_dataset_config_names
 logger = logging.getLogger(__name__)
@@ -266,66 +264,6 @@ def get_trajectory_video_path(dataset, index, dataset_name):
         return None, None, None, None
-def extract_frames(video_path: str, fps: float = 1.0) -> np.ndarray:
-    """Extract frames from video file as numpy array (T, H, W, C).
-    Supports both local file paths and URLs (e.g., HuggingFace Hub URLs).
-    Uses the provided ``fps`` to control how densely frames are sampled from
-    the underlying video; there is no additional hard cap on the number of frames.
-    """
-    if video_path is None:
-        return None
-    if isinstance(video_path, tuple):
-        video_path = video_path[0]
-    # Check if it's a URL or local file
-    is_url = video_path.startswith(("http://", "https://"))
-    is_local_file = os.path.exists(video_path) if not is_url else False
-    if not is_url and not is_local_file:
-        logger.warning(f"Video path does not exist: {video_path}")
-        return None
-    try:
-        # decord.VideoReader can handle both local files and URLs
-        vr = decord.VideoReader(video_path, num_threads=1)
-        total_frames = len(vr)
-        # Determine native FPS; fall back to a reasonable default if unavailable
-        try:
-            native_fps = float(vr.get_avg_fps())
-        except Exception:
-            native_fps = 1.0
-        # If user-specified fps is invalid or None, default to native fps
-        if fps is None or fps <= 0:
-            fps = native_fps
-        # Compute how many frames we want based on desired fps
-        # num_frames ≈ total_duration * fps = total_frames * (fps / native_fps)
-        if native_fps > 0:
-            desired_frames = int(round(total_frames * (fps / native_fps)))
-        else:
-            desired_frames = total_frames
-        # Clamp to [1, total_frames]
-        desired_frames = max(1, min(desired_frames, total_frames))
-        # Evenly sample indices to match the desired number of frames
-        if desired_frames == total_frames:
-            frame_indices = list(range(total_frames))
-        else:
-            frame_indices = np.linspace(0, total_frames - 1, desired_frames, dtype=int).tolist()
-        frames_array = vr.get_batch(frame_indices).asnumpy()  # Shape: (T, H, W, C)
-        del vr
-        return frames_array
-    except Exception as e:
-        logger.error(f"Error extracting frames from {video_path}: {e}")
-        return None
 def process_single_video(
     video_path: str,
     task_text: str = "Complete the task",
@@ -394,7 +332,7 @@ def process_single_video(
         success_array = None
         if success_probs and len(success_probs) > 0:
             success_array = np.array(success_probs[0])
         # Convert success_array to binary if available
         success_binary = None
         if success_array is not None:
@@ -408,10 +346,9 @@ def process_single_video(
             success_probs=success_array,
             success_labels=None,  # No ground truth labels available
             is_discrete_mode=False,
-            num_bins=10,
             title=f"Progress & Success - {task_text}",
         )
         # Save to temporary file
         tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
         fig.savefig(tmp_file.name, dpi=150, bbox_inches="tight")
@@ -438,25 +375,25 @@ def process_dual_videos(
     prediction_type: str = "preference",
     server_url: str = "",
     fps: float = 1.0,
-) -> Tuple[Optional[str], Optional[str]]:
     """Process two videos for preference or similarity prediction using eval server."""
     if not server_url:
-        return "Please provide a server URL and check connection first.", None
     if not _server_state.get("server_url"):
-        return "Server not connected. Please check server connection first.", None
     if video_a_path is None or video_b_path is None:
-        return "Please provide both videos.", None
     try:
         frames_array_a = extract_frames(video_a_path, fps=fps)
         frames_array_b = extract_frames(video_b_path, fps=fps)
         if frames_array_a is None or frames_array_a.size == 0:
-            return "Could not extract frames from video A.", None
         if frames_array_b is None or frames_array_b.size == 0:
-            return "Could not extract frames from video B.", None
         # Convert frames to uint8
         if frames_array_a.dtype != np.uint8:
@@ -563,81 +500,27 @@ def process_dual_videos(
         else:  # similarity - not yet implemented in eval server response format
             result_text = "Similarity prediction not yet supported in eval server response format."
-        # Create comparison plot
-        frames_a_list = [Image.fromarray(frame) for frame in frames_array_a]
-        frames_b_list = [Image.fromarray(frame) for frame in frames_array_b]
-        comparison_plot = create_comparison_plot(frames_a_list, frames_b_list, prediction_type)
-        return result_text, comparison_plot
     except Exception as e:
-        return f"Error processing videos: {str(e)}", None
-def create_comparison_plot(frames_a: list, frames_b: list, prediction_type: str) -> str:
-    """Create side-by-side comparison plot of two videos."""
-    plt.rcParams["font.family"] = "DejaVu Sans"
-    plt.rcParams["font.size"] = 16
-    fig, axes = plt.subplots(2, min(8, max(len(frames_a), len(frames_b))), figsize=(16, 4))
-    if len(axes.shape) == 1:
-        axes = axes.reshape(2, -1)
-    # Sample frames to display
-    num_display = min(8, max(len(frames_a), len(frames_b)))
-    indices_a = np.linspace(0, len(frames_a) - 1, num_display, dtype=int) if len(frames_a) > 1 else [0]
-    indices_b = np.linspace(0, len(frames_b) - 1, num_display, dtype=int) if len(frames_b) > 1 else [0]
-    # Display frames from video A (top row)
-    for idx, frame_idx in enumerate(indices_a):
-        if frame_idx < len(frames_a):
-            axes[0, idx].imshow(frames_a[frame_idx])
-            axes[0, idx].axis("off")
-            axes[0, idx].set_title(f"Frame {frame_idx}", fontsize=12)
-    # Display frames from video B (bottom row)
-    for idx, frame_idx in enumerate(indices_b):
-        if frame_idx < len(frames_b):
-            axes[1, idx].imshow(frames_b[frame_idx])
-            axes[1, idx].axis("off")
-            axes[1, idx].set_title(f"Frame {frame_idx}", fontsize=12)
-    # Add row labels
-    fig.text(0.02, 0.75, "Video A", rotation=90, fontsize=18, fontweight="bold", va="center")
-    fig.text(0.02, 0.25, "Video B", rotation=90, fontsize=18, fontweight="bold", va="center")
-    title = f"{prediction_type.capitalize()} Comparison: Video A vs Video B"
-    fig.suptitle(title, fontsize=20, fontweight="bold", y=0.98)
-    plt.tight_layout()
-    # Save to temporary file
-    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
-    plt.savefig(tmp_file.name, dpi=150, bbox_inches="tight")
-    plt.close()
-    return tmp_file.name
 # Create Gradio interface
 try:
     # Try with theme (Gradio 4.0+)
-    demo = gr.Blocks(title="RFM Inference Visualizer", theme=gr.themes.Soft())
 except TypeError:
     # Fallback for older Gradio versions without theme support
-    demo = gr.Blocks(title="RFM Inference Visualizer")
 with demo:
     gr.Markdown(
         """
-        # RFM (Reward Foundation Model) Inference Visualizer
-        Visualize progress, success, preference, and similarity predictions from the Reward Foundation Model.
-        **Features:**
-        - **Single Video**: Get progress and success predictions
-        - **Dual Videos**: Compare two videos with preference or similarity predictions
         **Note:** This app connects to an eval server. Please provide the server URL and check connection before use.
         """
@@ -941,6 +824,58 @@ with demo:
         gr.Markdown("### Preference & Similarity Prediction")
         with gr.Row():
             with gr.Column():
                 video_a_input = gr.Video(label="Video A", height=250)
                 video_b_input = gr.Video(label="Video B", height=250)
                 task_text_dual = gr.Textbox(
@@ -964,13 +899,402 @@ with demo:
                 analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
             with gr.Column():
                 result_text = gr.Markdown("")
-                comparison_plot = gr.Image(label="Video Comparison", height=500)
         analyze_dual_btn.click(
             fn=process_dual_videos,
             inputs=[video_a_input, video_b_input, task_text_dual, prediction_type, server_url_input, fps_input_dual],
-            outputs=[result_text, comparison_plot],
             api_name="process_dual_videos",
         )

 import matplotlib.pyplot as plt
 import numpy as np
 import requests
 from typing import Any, Optional, Tuple
 from rfm.data.dataset_types import Trajectory, ProgressSample, PreferenceSample
 from rfm.evals.eval_utils import build_payload, post_batch_npy
+from rfm.evals.eval_viz_utils import create_combined_progress_success_plot, extract_frames
 from datasets import load_dataset as load_dataset_hf, get_dataset_config_names
 logger = logging.getLogger(__name__)
         return None, None, None, None
 def process_single_video(
     video_path: str,
     task_text: str = "Complete the task",
         success_array = None
         if success_probs and len(success_probs) > 0:
             success_array = np.array(success_probs[0])
         # Convert success_array to binary if available
         success_binary = None
         if success_array is not None:
             success_probs=success_array,
             success_labels=None,  # No ground truth labels available
             is_discrete_mode=False,
             title=f"Progress & Success - {task_text}",
         )
         # Save to temporary file
         tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
         fig.savefig(tmp_file.name, dpi=150, bbox_inches="tight")
     prediction_type: str = "preference",
     server_url: str = "",
     fps: float = 1.0,
+) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     """Process two videos for preference or similarity prediction using eval server."""
     if not server_url:
+        return "Please provide a server URL and check connection first.", None, None
     if not _server_state.get("server_url"):
+        return "Server not connected. Please check server connection first.", None, None
     if video_a_path is None or video_b_path is None:
+        return "Please provide both videos.", None, None
     try:
         frames_array_a = extract_frames(video_a_path, fps=fps)
         frames_array_b = extract_frames(video_b_path, fps=fps)
         if frames_array_a is None or frames_array_a.size == 0:
+            return "Could not extract frames from video A.", None, None
         if frames_array_b is None or frames_array_b.size == 0:
+            return "Could not extract frames from video B.", None, None
         # Convert frames to uint8
         if frames_array_a.dtype != np.uint8:
         else:  # similarity - not yet implemented in eval server response format
             result_text = "Similarity prediction not yet supported in eval server response format."
+        # Return result text and both video paths
+        return result_text, video_a_path, video_b_path
     except Exception as e:
+        return f"Error processing videos: {str(e)}", None, None
 # Create Gradio interface
 try:
     # Try with theme (Gradio 4.0+)
+    demo = gr.Blocks(title="RFM Evaluation Server", theme=gr.themes.Soft())
 except TypeError:
     # Fallback for older Gradio versions without theme support
+    demo = gr.Blocks(title="RFM Evaluation Server")
 with demo:
     gr.Markdown(
         """
+        # RFM (Reward Foundation Model) Evaluation Server
         **Note:** This app connects to an eval server. Please provide the server URL and check connection before use.
         """
         gr.Markdown("### Preference & Similarity Prediction")
         with gr.Row():
             with gr.Column():
+                with gr.Accordion("📁 Video A - Select from Dataset", open=False):
+                    dataset_name_a = gr.Dropdown(
+                        choices=PREDEFINED_DATASETS,
+                        value="jesbu1/oxe_rfm",
+                        label="Dataset Name",
+                        allow_custom_value=True,
+                    )
+                    config_name_a = gr.Dropdown(
+                        choices=[], value="", label="Configuration Name", allow_custom_value=True
+                    )
+                    with gr.Row():
+                        refresh_configs_btn_a = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
+                        load_dataset_btn_a = gr.Button("Load Dataset", variant="secondary", size="sm")
+                    dataset_status_a = gr.Markdown("", visible=False)
+                    with gr.Row():
+                        prev_traj_btn_a = gr.Button("⬅️ Prev", variant="secondary", size="sm")
+                        trajectory_slider_a = gr.Slider(
+                            minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
+                        )
+                        next_traj_btn_a = gr.Button("Next ➡️", variant="secondary", size="sm")
+                    trajectory_metadata_a = gr.Markdown("", visible=False)
+                    use_dataset_video_btn_a = gr.Button("Use Selected Video for A", variant="secondary")
+                with gr.Accordion("📁 Video B - Select from Dataset", open=False):
+                    dataset_name_b = gr.Dropdown(
+                        choices=PREDEFINED_DATASETS,
+                        value="jesbu1/oxe_rfm",
+                        label="Dataset Name",
+                        allow_custom_value=True,
+                    )
+                    config_name_b = gr.Dropdown(
+                        choices=[], value="", label="Configuration Name", allow_custom_value=True
+                    )
+                    with gr.Row():
+                        refresh_configs_btn_b = gr.Button("🔄 Refresh Configs", variant="secondary", size="sm")
+                        load_dataset_btn_b = gr.Button("Load Dataset", variant="secondary", size="sm")
+                    dataset_status_b = gr.Markdown("", visible=False)
+                    with gr.Row():
+                        prev_traj_btn_b = gr.Button("⬅️ Prev", variant="secondary", size="sm")
+                        trajectory_slider_b = gr.Slider(
+                            minimum=0, maximum=0, step=1, value=0, label="Trajectory Index", interactive=True
+                        )
+                        next_traj_btn_b = gr.Button("Next ➡️", variant="secondary", size="sm")
+                    trajectory_metadata_b = gr.Markdown("", visible=False)
+                    use_dataset_video_btn_b = gr.Button("Use Selected Video for B", variant="secondary")
+                gr.Markdown("---")
+                gr.Markdown("**OR Upload Videos Directly**")
+                gr.Markdown("---")
                 video_a_input = gr.Video(label="Video A", height=250)
                 video_b_input = gr.Video(label="Video B", height=250)
                 task_text_dual = gr.Textbox(
                 analyze_dual_btn = gr.Button("Compare Videos", variant="primary")
             with gr.Column():
+                # Videos displayed side by side
+                with gr.Row():
+                    video_a_display = gr.Video(label="Video A", height=400)
+                    video_b_display = gr.Video(label="Video B", height=400)
+                # Result text at the bottom
                 result_text = gr.Markdown("")
+        # State variables for datasets
+        current_dataset_a = gr.State(None)
+        current_dataset_b = gr.State(None)
+        # Helper functions for Video A
+        def update_config_choices_a(dataset_name):
+            """Update config choices for Video A when dataset changes."""
+            if not dataset_name:
+                return gr.update(choices=[], value="")
+            try:
+                configs = get_available_configs(dataset_name)
+                if configs:
+                    return gr.update(choices=configs, value=configs[0])
+                else:
+                    return gr.update(choices=[], value="")
+            except Exception as e:
+                logger.warning(f"Could not fetch configs: {e}")
+                return gr.update(choices=[], value="")
+        def load_dataset_a(dataset_name, config_name):
+            """Load dataset A and update slider."""
+            dataset, status = load_rfm_dataset(dataset_name, config_name)
+            if dataset is not None:
+                max_index = len(dataset) - 1
+                return (
+                    dataset,
+                    gr.update(value=status, visible=True),
+                    gr.update(
+                        maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
+                    ),
+                )
+            else:
+                return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
+        def use_dataset_video_a(dataset, index, dataset_name):
+            """Load video A from dataset and update input."""
+            if dataset is None:
+                return (
+                    None,
+                    gr.update(value="No dataset loaded", visible=True),
+                    gr.update(visible=False),
+                )
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                status_text = f"✅ Loaded trajectory {index} from dataset for Video A"
+                if metadata_text:
+                    status_text += f"\n\n{metadata_text}"
+                return (
+                    video_path,
+                    gr.update(value=status_text, visible=True),
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                )
+            else:
+                return (
+                    None,
+                    gr.update(value="❌ Error loading trajectory", visible=True),
+                    gr.update(visible=False),
+                )
+        def next_trajectory_a(dataset, current_idx, dataset_name):
+            """Go to next trajectory for Video A."""
+            if dataset is None:
+                return 0, None, gr.update(visible=False), gr.update(visible=False)
+            next_idx = min(current_idx + 1, len(dataset) - 1)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(
+                dataset, next_idx, dataset_name
+            )
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    next_idx,
+                    video_path,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return current_idx, None, gr.update(visible=False), gr.update(visible=False)
+        def prev_trajectory_a(dataset, current_idx, dataset_name):
+            """Go to previous trajectory for Video A."""
+            if dataset is None:
+                return 0, None, gr.update(visible=False), gr.update(visible=False)
+            prev_idx = max(current_idx - 1, 0)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(
+                dataset, prev_idx, dataset_name
+            )
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    prev_idx,
+                    video_path,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return current_idx, None, gr.update(visible=False), gr.update(visible=False)
+        def update_trajectory_on_slider_change_a(dataset, index, dataset_name):
+            """Update trajectory metadata when slider changes for Video A."""
+            if dataset is None:
+                return gr.update(visible=False), gr.update(visible=False)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return gr.update(visible=False), gr.update(visible=False)
+        # Helper functions for Video B (same as Video A)
+        def update_config_choices_b(dataset_name):
+            """Update config choices for Video B when dataset changes."""
+            if not dataset_name:
+                return gr.update(choices=[], value="")
+            try:
+                configs = get_available_configs(dataset_name)
+                if configs:
+                    return gr.update(choices=configs, value=configs[0])
+                else:
+                    return gr.update(choices=[], value="")
+            except Exception as e:
+                logger.warning(f"Could not fetch configs: {e}")
+                return gr.update(choices=[], value="")
+        def load_dataset_b(dataset_name, config_name):
+            """Load dataset B and update slider."""
+            dataset, status = load_rfm_dataset(dataset_name, config_name)
+            if dataset is not None:
+                max_index = len(dataset) - 1
+                return (
+                    dataset,
+                    gr.update(value=status, visible=True),
+                    gr.update(
+                        maximum=max_index, value=0, interactive=True, label=f"Trajectory Index (0 to {max_index})"
+                    ),
+                )
+            else:
+                return None, gr.update(value=status, visible=True), gr.update(maximum=0, value=0, interactive=False)
+        def use_dataset_video_b(dataset, index, dataset_name):
+            """Load video B from dataset and update input."""
+            if dataset is None:
+                return (
+                    None,
+                    gr.update(value="No dataset loaded", visible=True),
+                    gr.update(visible=False),
+                )
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                status_text = f"✅ Loaded trajectory {index} from dataset for Video B"
+                if metadata_text:
+                    status_text += f"\n\n{metadata_text}"
+                return (
+                    video_path,
+                    gr.update(value=status_text, visible=True),
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                )
+            else:
+                return (
+                    None,
+                    gr.update(value="❌ Error loading trajectory", visible=True),
+                    gr.update(visible=False),
+                )
+        def next_trajectory_b(dataset, current_idx, dataset_name):
+            """Go to next trajectory for Video B."""
+            if dataset is None:
+                return 0, None, gr.update(visible=False), gr.update(visible=False)
+            next_idx = min(current_idx + 1, len(dataset) - 1)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(
+                dataset, next_idx, dataset_name
+            )
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    next_idx,
+                    video_path,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return current_idx, None, gr.update(visible=False), gr.update(visible=False)
+        def prev_trajectory_b(dataset, current_idx, dataset_name):
+            """Go to previous trajectory for Video B."""
+            if dataset is None:
+                return 0, None, gr.update(visible=False), gr.update(visible=False)
+            prev_idx = max(current_idx - 1, 0)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(
+                dataset, prev_idx, dataset_name
+            )
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    prev_idx,
+                    video_path,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return current_idx, None, gr.update(visible=False), gr.update(visible=False)
+        def update_trajectory_on_slider_change_b(dataset, index, dataset_name):
+            """Update trajectory metadata when slider changes for Video B."""
+            if dataset is None:
+                return gr.update(visible=False), gr.update(visible=False)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True),
+                )
+            else:
+                return gr.update(visible=False), gr.update(visible=False)
+        # Video A dataset selection handlers
+        dataset_name_a.change(
+            fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
+        )
+        refresh_configs_btn_a.click(
+            fn=update_config_choices_a, inputs=[dataset_name_a], outputs=[config_name_a]
+        )
+        load_dataset_btn_a.click(
+            fn=load_dataset_a,
+            inputs=[dataset_name_a, config_name_a],
+            outputs=[current_dataset_a, dataset_status_a, trajectory_slider_a],
+        )
+        use_dataset_video_btn_a.click(
+            fn=use_dataset_video_a,
+            inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
+            outputs=[video_a_input, dataset_status_a, trajectory_metadata_a],
+        )
+        next_traj_btn_a.click(
+            fn=next_trajectory_a,
+            inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
+            outputs=[
+                trajectory_slider_a,
+                video_a_input,
+                trajectory_metadata_a,
+                dataset_status_a,
+            ],
+        )
+        prev_traj_btn_a.click(
+            fn=prev_trajectory_a,
+            inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
+            outputs=[
+                trajectory_slider_a,
+                video_a_input,
+                trajectory_metadata_a,
+                dataset_status_a,
+            ],
+        )
+        trajectory_slider_a.change(
+            fn=update_trajectory_on_slider_change_a,
+            inputs=[current_dataset_a, trajectory_slider_a, dataset_name_a],
+            outputs=[trajectory_metadata_a, dataset_status_a],
+        )
+        # Video B dataset selection handlers
+        dataset_name_b.change(
+            fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
+        )
+        refresh_configs_btn_b.click(
+            fn=update_config_choices_b, inputs=[dataset_name_b], outputs=[config_name_b]
+        )
+        load_dataset_btn_b.click(
+            fn=load_dataset_b,
+            inputs=[dataset_name_b, config_name_b],
+            outputs=[current_dataset_b, dataset_status_b, trajectory_slider_b],
+        )
+        use_dataset_video_btn_b.click(
+            fn=use_dataset_video_b,
+            inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
+            outputs=[video_b_input, dataset_status_b, trajectory_metadata_b],
+        )
+        next_traj_btn_b.click(
+            fn=next_trajectory_b,
+            inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
+            outputs=[
+                trajectory_slider_b,
+                video_b_input,
+                trajectory_metadata_b,
+                dataset_status_b,
+            ],
+        )
+        prev_traj_btn_b.click(
+            fn=prev_trajectory_b,
+            inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
+            outputs=[
+                trajectory_slider_b,
+                video_b_input,
+                trajectory_metadata_b,
+                dataset_status_b,
+            ],
+        )
+        trajectory_slider_b.change(
+            fn=update_trajectory_on_slider_change_b,
+            inputs=[current_dataset_b, trajectory_slider_b, dataset_name_b],
+            outputs=[trajectory_metadata_b, dataset_status_b],
+        )
         analyze_dual_btn.click(
             fn=process_dual_videos,
             inputs=[video_a_input, video_b_input, task_text_dual, prediction_type, server_url_input, fps_input_dual],
+            outputs=[result_text, video_a_display, video_b_display],
             api_name="process_dual_videos",
         )