Spaces:

robometer
/

rewardeval_ui

Running

App Files Files Community

Anthony Liang commited on Dec 17, 2025

Commit

f506da8

1 Parent(s): 6cf09b8

small ui updates

Browse files

Files changed (1) hide show

app.py +167 -29

app.py CHANGED Viewed

@@ -227,7 +227,7 @@ def get_available_configs(dataset_name):
 def get_trajectory_video_path(dataset, index, dataset_name):
-    """Get video path from a trajectory in the dataset."""
     try:
         item = dataset[int(index)]
         frames_data = item["frames"]
@@ -238,18 +238,25 @@ def get_trajectory_video_path(dataset, index, dataset_name):
                 video_path = f"https://huggingface.co/datasets/{dataset_name}/resolve/main/{frames_data}"
             else:
                 video_path = f"https://huggingface.co/datasets/aliangdw/rfm/resolve/main/{frames_data}"
-            return video_path, item.get("task", "Complete the task")
         else:
-            return None, None
     except Exception as e:
         logger.error(f"Error getting trajectory video path: {e}")
-        return None, None
-def extract_frames(video_path: str, max_frames: int = 16, fps: float = 1.0) -> np.ndarray:
     """Extract frames from video file as numpy array (T, H, W, C).
     Supports both local file paths and URLs (e.g., HuggingFace Hub URLs).
     """
     if video_path is None:
         return None
@@ -270,13 +277,31 @@ def extract_frames(video_path: str, max_frames: int = 16, fps: float = 1.0) -> n
         vr = decord.VideoReader(video_path, num_threads=1)
         total_frames = len(vr)
-        if total_frames <= max_frames:
             frame_indices = list(range(total_frames))
         else:
-            frame_indices = [
-                int(i * total_frames / max_frames)
-                for i in range(max_frames)
-            ]
         frames_array = vr.get_batch(frame_indices).asnumpy()  # Shape: (T, H, W, C)
         del vr
@@ -303,7 +328,7 @@ def process_single_video(
         return None, None, "Please provide a video."
     try:
-        frames_array = extract_frames(video_path, max_frames=16, fps=fps)
         if frames_array is None or frames_array.size == 0:
             return None, None, "Could not extract frames from video."
@@ -381,8 +406,8 @@ def process_dual_videos(
         return "Please provide both videos.", None
     try:
-        frames_array_a = extract_frames(video_a_path, max_frames=16, fps=fps)
-        frames_array_b = extract_frames(video_b_path, max_frames=16, fps=fps)
         if frames_array_a is None or frames_array_a.size == 0:
             return "Could not extract frames from video A.", None
@@ -483,7 +508,6 @@ def create_progress_plot(progress_pred: np.ndarray, num_frames: int) -> str:
     ax.set_ylabel('Progress (0-1)', fontsize=18, fontweight='bold')
     ax.set_title('Progress Prediction', fontsize=20, fontweight='bold')
     ax.set_ylim([0, 1])
-    ax.legend(fontsize=14)
     plt.tight_layout()
@@ -514,7 +538,6 @@ def create_success_plot(success_probs: np.ndarray, num_frames: int) -> str:
     ax.set_ylabel('Success Probability (0-1)', fontsize=18, fontweight='bold')
     ax.set_title('Success Prediction', fontsize=20, fontweight='bold')
     ax.set_ylim([0, 1])
-    ax.legend(fontsize=14)
     plt.tight_layout()
@@ -649,14 +672,18 @@ with demo:
                         load_dataset_btn = gr.Button("Load Dataset", variant="secondary", size="sm")
                     dataset_status_single = gr.Markdown("", visible=False)
-                    trajectory_slider = gr.Slider(
-                        minimum=0,
-                        maximum=0,
-                        step=1,
-                        value=0,
-                        label="Trajectory Index",
-                        interactive=False
-                    )
                     use_dataset_video_btn = gr.Button("Use Selected Video", variant="secondary")
                 gr.Markdown("---")
@@ -717,13 +744,104 @@ with demo:
         def use_dataset_video(dataset, index, dataset_name):
             """Load video from dataset and update inputs."""
             if dataset is None:
-                return None, "Complete the task", gr.update(value="No dataset loaded", visible=True)
-            video_path, task = get_trajectory_video_path(dataset, index, dataset_name)
             if video_path:
-                return video_path, task, gr.update(value=f"✅ Loaded trajectory {index} from dataset", visible=True)
             else:
-                return None, "Complete the task", gr.update(value="❌ Error loading trajectory", visible=True)
         # Dataset selection handlers
         dataset_name_single.change(
@@ -747,7 +865,27 @@ with demo:
         use_dataset_video_btn.click(
             fn=use_dataset_video,
             inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
-            outputs=[single_video_input, task_text_input, dataset_status_single]
         )
         analyze_single_btn.click(

 def get_trajectory_video_path(dataset, index, dataset_name):
+    """Get video path and metadata from a trajectory in the dataset."""
     try:
         item = dataset[int(index)]
         frames_data = item["frames"]
                 video_path = f"https://huggingface.co/datasets/{dataset_name}/resolve/main/{frames_data}"
             else:
                 video_path = f"https://huggingface.co/datasets/aliangdw/rfm/resolve/main/{frames_data}"
+            task = item.get("task", "Complete the task")
+            quality_label = item.get("quality_label", None)
+            partial_success = item.get("partial_success", None)
+            return video_path, task, quality_label, partial_success
         else:
+            return None, None, None, None
     except Exception as e:
         logger.error(f"Error getting trajectory video path: {e}")
+        return None, None, None, None
+def extract_frames(video_path: str, fps: float = 1.0) -> np.ndarray:
     """Extract frames from video file as numpy array (T, H, W, C).
     Supports both local file paths and URLs (e.g., HuggingFace Hub URLs).
+    Uses the provided ``fps`` to control how densely frames are sampled from
+    the underlying video; there is no additional hard cap on the number of frames.
     """
     if video_path is None:
         return None
         vr = decord.VideoReader(video_path, num_threads=1)
         total_frames = len(vr)
+        # Determine native FPS; fall back to a reasonable default if unavailable
+        try:
+            native_fps = float(vr.get_avg_fps())
+        except Exception:
+            native_fps = 1.0
+        # If user-specified fps is invalid or None, default to native fps
+        if fps is None or fps <= 0:
+            fps = native_fps
+        # Compute how many frames we want based on desired fps
+        # num_frames ≈ total_duration * fps = total_frames * (fps / native_fps)
+        if native_fps > 0:
+            desired_frames = int(round(total_frames * (fps / native_fps)))
+        else:
+            desired_frames = total_frames
+        # Clamp to [1, total_frames]
+        desired_frames = max(1, min(desired_frames, total_frames))
+        # Evenly sample indices to match the desired number of frames
+        if desired_frames == total_frames:
             frame_indices = list(range(total_frames))
         else:
+            frame_indices = np.linspace(0, total_frames - 1, desired_frames, dtype=int).tolist()
         frames_array = vr.get_batch(frame_indices).asnumpy()  # Shape: (T, H, W, C)
         del vr
         return None, None, "Please provide a video."
     try:
+        frames_array = extract_frames(video_path, fps=fps)
         if frames_array is None or frames_array.size == 0:
             return None, None, "Could not extract frames from video."
         return "Please provide both videos.", None
     try:
+        frames_array_a = extract_frames(video_a_path, fps=fps)
+        frames_array_b = extract_frames(video_b_path, fps=fps)
         if frames_array_a is None or frames_array_a.size == 0:
             return "Could not extract frames from video A.", None
     ax.set_ylabel('Progress (0-1)', fontsize=18, fontweight='bold')
     ax.set_title('Progress Prediction', fontsize=20, fontweight='bold')
     ax.set_ylim([0, 1])
     plt.tight_layout()
     ax.set_ylabel('Success Probability (0-1)', fontsize=18, fontweight='bold')
     ax.set_title('Success Prediction', fontsize=20, fontweight='bold')
     ax.set_ylim([0, 1])
     plt.tight_layout()
                         load_dataset_btn = gr.Button("Load Dataset", variant="secondary", size="sm")
                     dataset_status_single = gr.Markdown("", visible=False)
+                    with gr.Row():
+                        prev_traj_btn = gr.Button("⬅️ Prev", variant="secondary", size="sm")
+                        trajectory_slider = gr.Slider(
+                            minimum=0,
+                            maximum=0,
+                            step=1,
+                            value=0,
+                            label="Trajectory Index",
+                            interactive=True
+                        )
+                        next_traj_btn = gr.Button("Next ➡️", variant="secondary", size="sm")
+                    trajectory_metadata = gr.Markdown("", visible=False)
                     use_dataset_video_btn = gr.Button("Use Selected Video", variant="secondary")
                 gr.Markdown("---")
         def use_dataset_video(dataset, index, dataset_name):
             """Load video from dataset and update inputs."""
             if dataset is None:
+                return None, "Complete the task", gr.update(value="No dataset loaded", visible=True), gr.update(visible=False)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                status_text = f"✅ Loaded trajectory {index} from dataset"
+                if metadata_text:
+                    status_text += f"\n\n{metadata_text}"
+                return (
+                    video_path,
+                    task,
+                    gr.update(value=status_text, visible=True),
+                    gr.update(value=metadata_text, visible=bool(metadata_text))
+                )
+            else:
+                return None, "Complete the task", gr.update(value="❌ Error loading trajectory", visible=True), gr.update(visible=False)
+        def next_trajectory(dataset, current_idx, dataset_name):
+            """Go to next trajectory."""
+            if dataset is None:
+                return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
+            next_idx = min(current_idx + 1, len(dataset) - 1)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, next_idx, dataset_name)
             if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    next_idx,
+                    video_path,
+                    task,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {next_idx}/{len(dataset) - 1}", visible=True)
+                )
             else:
+                return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
+        def prev_trajectory(dataset, current_idx, dataset_name):
+            """Go to previous trajectory."""
+            if dataset is None:
+                return 0, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
+            prev_idx = max(current_idx - 1, 0)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, prev_idx, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    prev_idx,
+                    video_path,
+                    task,
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"✅ Trajectory {prev_idx}/{len(dataset) - 1}", visible=True)
+                )
+            else:
+                return current_idx, None, "Complete the task", gr.update(visible=False), gr.update(visible=False)
+        def update_trajectory_on_slider_change(dataset, index, dataset_name):
+            """Update trajectory metadata when slider changes."""
+            if dataset is None:
+                return gr.update(visible=False), gr.update(visible=False)
+            video_path, task, quality_label, partial_success = get_trajectory_video_path(dataset, index, dataset_name)
+            if video_path:
+                # Build metadata text
+                metadata_lines = []
+                if quality_label:
+                    metadata_lines.append(f"**Quality Label:** {quality_label}")
+                if partial_success is not None:
+                    metadata_lines.append(f"**Partial Success:** {partial_success:.3f}")
+                metadata_text = "\n".join(metadata_lines) if metadata_lines else ""
+                return (
+                    gr.update(value=metadata_text, visible=bool(metadata_text)),
+                    gr.update(value=f"Trajectory {index}/{len(dataset) - 1}", visible=True)
+                )
+            else:
+                return gr.update(visible=False), gr.update(visible=False)
         # Dataset selection handlers
         dataset_name_single.change(
         use_dataset_video_btn.click(
             fn=use_dataset_video,
             inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
+            outputs=[single_video_input, task_text_input, dataset_status_single, trajectory_metadata]
+        )
+        # Navigation buttons
+        next_traj_btn.click(
+            fn=next_trajectory,
+            inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
+            outputs=[trajectory_slider, single_video_input, task_text_input, trajectory_metadata, dataset_status_single]
+        )
+        prev_traj_btn.click(
+            fn=prev_trajectory,
+            inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
+            outputs=[trajectory_slider, single_video_input, task_text_input, trajectory_metadata, dataset_status_single]
+        )
+        # Update metadata when slider changes
+        trajectory_slider.change(
+            fn=update_trajectory_on_slider_change,
+            inputs=[current_dataset_single, trajectory_slider, dataset_name_single],
+            outputs=[trajectory_metadata, dataset_status_single]
         )
         analyze_single_btn.click(