Spaces:

rewardfm
/

end_frame_vis

Running

App Files Files Community

KaushikSid commited on Nov 7, 2025

Commit

b80cf0e

1 Parent(s): 9a7086f

Step 4: Add labeling interface with CSV export and navigation

Browse files

Files changed (2) hide show

app.py +189 -27
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import cv2
 import numpy as np
 import random
 import os
 import shutil
@@ -9,7 +10,7 @@ from datasets import load_dataset
 from huggingface_hub import hf_hub_download
 from tqdm import tqdm
-# Step 3: Add dataset loading and trajectory sampling
 def sample_trajectories(dataset_repo, config_name, is_robot, num_samples, max_to_check=10000):
     """Sample random trajectories from HuggingFace dataset."""
@@ -70,7 +71,7 @@ def download_video(trajectory, dataset_repo, config_name=None):
 def extract_frame(video_path, frame_num):
     """Extract a specific frame from video."""
     if not video_path or not os.path.exists(video_path):
-        return None, "No video loaded"
     cap = cv2.VideoCapture(video_path)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -85,34 +86,49 @@ def extract_frame(video_path, frame_num):
     if ret:
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         percent = (frame_num / total_frames * 100) if total_frames > 0 else 0
-        return frame_rgb, f"Frame {frame_num}/{total_frames-1} ({percent:.1f}%)"
-    return None, "Error reading frame"
-# Global state for loaded trajectories
 current_trajectories = []
 current_idx = 0
-def load_dataset_trajectories(dataset_repo, config_name, num_samples):
     """Load and download trajectories from dataset."""
     global current_trajectories, current_idx
     config = config_name.strip() if config_name else None
     try:
-        # Sample robot trajectories for now
-        trajs = sample_trajectories(dataset_repo, config, is_robot=True, num_samples=int(num_samples))
-        if not trajs:
-            return "No trajectories found", None, "No video", ""
-        # Download first trajectory
-        video_path = download_video(trajs[0], dataset_repo, config)
         current_trajectories = []
-        for traj in trajs:
             local_path = download_video(traj, dataset_repo, config)
             if local_path:
                 traj["local_video_path"] = local_path
                 current_trajectories.append(traj)
         current_idx = 0
@@ -121,27 +137,132 @@ def load_dataset_trajectories(dataset_repo, config_name, num_samples):
             first_traj = current_trajectories[0]
             video_path = first_traj.get("local_video_path")
             task = first_traj.get("task", "No task description")
-            # Get max frames
             cap = cv2.VideoCapture(video_path)
             max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
             cap.release()
             return (
-                f"✅ Loaded {len(current_trajectories)} robot trajectories",
                 gr.update(maximum=max_frames, value=0),
                 video_path,
-                task
             )
-        return "No videos downloaded", None, None, ""
     except Exception as e:
-        return f"❌ Error: {str(e)}", None, None, ""
 with gr.Blocks(title="Trajectory End Point Labeler") as demo:
     gr.Markdown("# Trajectory End Point Labeler")
-    gr.Markdown("Step 3: Dataset loading from HuggingFace")
     with gr.Row():
         with gr.Column(scale=1):
@@ -154,34 +275,75 @@ with gr.Blocks(title="Trajectory End Point Labeler") as demo:
                 label="Config Name (optional)",
                 placeholder="Leave empty if no config"
             )
-            num_samples = gr.Number(label="Number of Samples", value=3, precision=0)
             load_btn = gr.Button("Load Dataset", variant="primary")
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
             task_display = gr.Textbox(label="Task Description", interactive=False)
             video_player = gr.Video(label="Trajectory Video")
             frame_slider = gr.Slider(minimum=0, maximum=63, step=1, value=0, label="Frame Number")
             frame_display = gr.Image(label="Current Frame")
             frame_info = gr.Textbox(label="Frame Info", interactive=False)
-    # Connect handlers
     load_btn.click(
         load_dataset_trajectories,
-        inputs=[dataset_input, config_input, num_samples],
-        outputs=[status, frame_slider, video_player, task_display]
     )
     frame_slider.change(
         extract_frame,
         inputs=[video_player, frame_slider],
-        outputs=[frame_display, frame_info]
     )
     video_player.change(
-        lambda v: extract_frame(v, 0) if v else (None, "No video"),
         inputs=[video_player],
-        outputs=[frame_display, frame_info]
     )
 demo.launch()

 import gradio as gr
 import cv2
 import numpy as np
+import pandas as pd
 import random
 import os
 import shutil
 from huggingface_hub import hf_hub_download
 from tqdm import tqdm
+# Step 4: Add labeling interface with CSV export
 def sample_trajectories(dataset_repo, config_name, is_robot, num_samples, max_to_check=10000):
     """Sample random trajectories from HuggingFace dataset."""
 def extract_frame(video_path, frame_num):
     """Extract a specific frame from video."""
     if not video_path or not os.path.exists(video_path):
+        return None, "No video loaded", "0.0%"
     cap = cv2.VideoCapture(video_path)
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     if ret:
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         percent = (frame_num / total_frames * 100) if total_frames > 0 else 0
+        return frame_rgb, f"Frame {frame_num}/{total_frames-1}", f"{percent:.1f}%"
+    return None, "Error reading frame", "0.0%"
+# Global state
 current_trajectories = []
 current_idx = 0
+labels_df = pd.DataFrame(columns=[
+    "dataset_repo", "config_name", "trajectory_id", "is_robot",
+    "task", "manual_end_frame", "manual_end_percent", "notes"
+])
+def load_labels():
+    """Load existing labels from CSV."""
+    global labels_df
+    if Path("labels.csv").exists():
+        labels_df = pd.read_csv("labels.csv")
+def save_labels():
+    """Save labels to CSV."""
+    global labels_df
+    labels_df.to_csv("labels.csv", index=False)
+def load_dataset_trajectories(dataset_repo, config_name, num_human, num_robot):
     """Load and download trajectories from dataset."""
     global current_trajectories, current_idx
     config = config_name.strip() if config_name else None
     try:
+        human_trajs = sample_trajectories(dataset_repo, config, is_robot=False, num_samples=int(num_human))
+        robot_trajs = sample_trajectories(dataset_repo, config, is_robot=True, num_samples=int(num_robot))
+        all_trajs = human_trajs + robot_trajs
+        if not all_trajs:
+            return "No trajectories found", None, "No video", "", "0.0%", None, ""
         current_trajectories = []
+        for traj in all_trajs:
             local_path = download_video(traj, dataset_repo, config)
             if local_path:
                 traj["local_video_path"] = local_path
+                traj["dataset_repo"] = dataset_repo
+                traj["config_name"] = config
                 current_trajectories.append(traj)
         current_idx = 0
             first_traj = current_trajectories[0]
             video_path = first_traj.get("local_video_path")
             task = first_traj.get("task", "No task description")
+            is_robot_str = "Robot" if first_traj.get("is_robot") else "Human"
             cap = cv2.VideoCapture(video_path)
             max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
             cap.release()
+            traj_info = f"Trajectory 1/{len(current_trajectories)} | Type: {is_robot_str}"
+            frame, frame_text, percent = extract_frame(video_path, 0)
             return (
+                f"✅ Loaded {len(current_trajectories)} trajectories ({len(human_trajs)} human, {len(robot_trajs)} robot)",
                 gr.update(maximum=max_frames, value=0),
                 video_path,
+                task,
+                percent,
+                frame,
+                traj_info
             )
+        return "No videos downloaded", None, None, "", "0.0%", None, ""
     except Exception as e:
+        return f"❌ Error: {str(e)}", None, None, "", "0.0%", None, ""
+def save_label(dataset_repo, config_name, end_frame, notes):
+    """Save label for current trajectory."""
+    global current_trajectories, current_idx, labels_df
+    if not current_trajectories or current_idx >= len(current_trajectories):
+        return "No trajectory loaded"
+    traj = current_trajectories[current_idx]
+    video_path = traj.get("local_video_path")
+    if not video_path:
+        return "No video path"
+    cap = cv2.VideoCapture(video_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    cap.release()
+    end_percent = (int(end_frame) / total_frames * 100) if total_frames > 0 else 0
+    # Check if label exists
+    mask = (
+        (labels_df['dataset_repo'] == dataset_repo) &
+        (labels_df['config_name'] == (config_name or "")) &
+        (labels_df['trajectory_id'] == traj.get('id'))
+    )
+    if mask.any():
+        # Update existing
+        idx = labels_df[mask].index[0]
+        labels_df.at[idx, 'manual_end_frame'] = int(end_frame)
+        labels_df.at[idx, 'manual_end_percent'] = end_percent
+        labels_df.at[idx, 'notes'] = notes
+        save_labels()
+        return f"✅ Updated: Frame {int(end_frame)} ({end_percent:.1f}%)"
+    # Add new label
+    new_row = pd.DataFrame([{
+        "dataset_repo": dataset_repo,
+        "config_name": config_name or "",
+        "trajectory_id": traj.get('id'),
+        "is_robot": traj.get('is_robot', False),
+        "task": traj.get('task', ''),
+        "manual_end_frame": int(end_frame),
+        "manual_end_percent": end_percent,
+        "notes": notes
+    }])
+    labels_df = pd.concat([labels_df, new_row], ignore_index=True)
+    save_labels()
+    return f"✅ Saved: Frame {int(end_frame)} ({end_percent:.1f}%)"
+def navigate_next():
+    """Go to next trajectory."""
+    global current_idx
+    if not current_trajectories or current_idx >= len(current_trajectories) - 1:
+        return "No more trajectories", None, "", "0.0%", None, ""
+    current_idx += 1
+    traj = current_trajectories[current_idx]
+    video_path = traj.get("local_video_path")
+    task = traj.get("task", "No task description")
+    is_robot_str = "Robot" if traj.get("is_robot") else "Human"
+    cap = cv2.VideoCapture(video_path)
+    max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
+    cap.release()
+    traj_info = f"Trajectory {current_idx+1}/{len(current_trajectories)} | Type: {is_robot_str}"
+    frame, frame_text, percent = extract_frame(video_path, 0)
+    return gr.update(maximum=max_frames, value=0), video_path, task, percent, frame, traj_info
+def navigate_prev():
+    """Go to previous trajectory."""
+    global current_idx
+    if not current_trajectories or current_idx <= 0:
+        return "No previous trajectories", None, "", "0.0%", None, ""
+    current_idx -= 1
+    traj = current_trajectories[current_idx]
+    video_path = traj.get("local_video_path")
+    task = traj.get("task", "No task description")
+    is_robot_str = "Robot" if traj.get("is_robot") else "Human"
+    cap = cv2.VideoCapture(video_path)
+    max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
+    cap.release()
+    traj_info = f"Trajectory {current_idx+1}/{len(current_trajectories)} | Type: {is_robot_str}"
+    frame, frame_text, percent = extract_frame(video_path, 0)
+    return gr.update(maximum=max_frames, value=0), video_path, task, percent, frame, traj_info
+# Load existing labels on startup
+load_labels()
 with gr.Blocks(title="Trajectory End Point Labeler") as demo:
     gr.Markdown("# Trajectory End Point Labeler")
+    gr.Markdown("Step 4: Labeling interface with CSV export")
     with gr.Row():
         with gr.Column(scale=1):
                 label="Config Name (optional)",
                 placeholder="Leave empty if no config"
             )
+            num_human = gr.Number(label="Human Samples", value=10, precision=0)
+            num_robot = gr.Number(label="Robot Samples", value=10, precision=0)
             load_btn = gr.Button("Load Dataset", variant="primary")
             status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
+            traj_info = gr.Textbox(label="Current Trajectory", interactive=False)
             task_display = gr.Textbox(label="Task Description", interactive=False)
+            with gr.Row():
+                prev_btn = gr.Button("← Previous")
+                next_btn = gr.Button("Next →")
             video_player = gr.Video(label="Trajectory Video")
             frame_slider = gr.Slider(minimum=0, maximum=63, step=1, value=0, label="Frame Number")
             frame_display = gr.Image(label="Current Frame")
             frame_info = gr.Textbox(label="Frame Info", interactive=False)
+            with gr.Row():
+                end_frame_input = gr.Number(label="End Frame", value=0, precision=0)
+                end_percent = gr.Textbox(label="End Percent", interactive=False)
+            notes_input = gr.Textbox(label="Notes (optional)", placeholder="Add notes...")
+            save_btn = gr.Button("Save Label", variant="primary")
+            save_status = gr.Textbox(label="Save Status", interactive=False)
+    # Load dataset
     load_btn.click(
         load_dataset_trajectories,
+        inputs=[dataset_input, config_input, num_human, num_robot],
+        outputs=[status, frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
+    )
+    # Navigate trajectories
+    next_btn.click(
+        navigate_next,
+        outputs=[frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
     )
+    prev_btn.click(
+        navigate_prev,
+        outputs=[frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
+    )
+    # Frame navigation
     frame_slider.change(
         extract_frame,
         inputs=[video_player, frame_slider],
+        outputs=[frame_display, frame_info, end_percent]
     )
     video_player.change(
+        lambda v: extract_frame(v, 0) if v else (None, "No video", "0.0%"),
         inputs=[video_player],
+        outputs=[frame_display, frame_info, end_percent]
+    )
+    # Update percent when end frame changes
+    end_frame_input.change(
+        lambda v, f: (None, "No video", "0.0%")[2] if not v else f"{(int(f) / int(cv2.VideoCapture(v).get(cv2.CAP_PROP_FRAME_COUNT)) * 100):.1f}%" if os.path.exists(v) and int(cv2.VideoCapture(v).get(cv2.CAP_PROP_FRAME_COUNT)) > 0 else "0.0%",
+        inputs=[video_player, end_frame_input],
+        outputs=[end_percent]
+    )
+    # Save label
+    save_btn.click(
+        save_label,
+        inputs=[dataset_input, config_input, end_frame_input, notes_input],
+        outputs=[save_status]
     )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 opencv-python-headless>=4.8.0
 numpy>=1.24.0
 datasets>=2.14.0
 huggingface-hub>=0.16.0
 tqdm>=4.65.0

 opencv-python-headless>=4.8.0
 numpy>=1.24.0
+pandas>=2.0.0
 datasets>=2.14.0
 huggingface-hub>=0.16.0
 tqdm>=4.65.0