KaushikSid commited on
Commit
b80cf0e
·
1 Parent(s): 9a7086f

Step 4: Add labeling interface with CSV export and navigation

Browse files
Files changed (2) hide show
  1. app.py +189 -27
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
 
4
  import random
5
  import os
6
  import shutil
@@ -9,7 +10,7 @@ from datasets import load_dataset
9
  from huggingface_hub import hf_hub_download
10
  from tqdm import tqdm
11
 
12
- # Step 3: Add dataset loading and trajectory sampling
13
 
14
  def sample_trajectories(dataset_repo, config_name, is_robot, num_samples, max_to_check=10000):
15
  """Sample random trajectories from HuggingFace dataset."""
@@ -70,7 +71,7 @@ def download_video(trajectory, dataset_repo, config_name=None):
70
  def extract_frame(video_path, frame_num):
71
  """Extract a specific frame from video."""
72
  if not video_path or not os.path.exists(video_path):
73
- return None, "No video loaded"
74
 
75
  cap = cv2.VideoCapture(video_path)
76
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -85,34 +86,49 @@ def extract_frame(video_path, frame_num):
85
  if ret:
86
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
87
  percent = (frame_num / total_frames * 100) if total_frames > 0 else 0
88
- return frame_rgb, f"Frame {frame_num}/{total_frames-1} ({percent:.1f}%)"
89
- return None, "Error reading frame"
90
 
91
- # Global state for loaded trajectories
92
  current_trajectories = []
93
  current_idx = 0
 
 
 
 
94
 
95
- def load_dataset_trajectories(dataset_repo, config_name, num_samples):
 
 
 
 
 
 
 
 
 
 
 
96
  """Load and download trajectories from dataset."""
97
  global current_trajectories, current_idx
98
 
99
  config = config_name.strip() if config_name else None
100
 
101
  try:
102
- # Sample robot trajectories for now
103
- trajs = sample_trajectories(dataset_repo, config, is_robot=True, num_samples=int(num_samples))
 
104
 
105
- if not trajs:
106
- return "No trajectories found", None, "No video", ""
107
-
108
- # Download first trajectory
109
- video_path = download_video(trajs[0], dataset_repo, config)
110
 
111
  current_trajectories = []
112
- for traj in trajs:
113
  local_path = download_video(traj, dataset_repo, config)
114
  if local_path:
115
  traj["local_video_path"] = local_path
 
 
116
  current_trajectories.append(traj)
117
 
118
  current_idx = 0
@@ -121,27 +137,132 @@ def load_dataset_trajectories(dataset_repo, config_name, num_samples):
121
  first_traj = current_trajectories[0]
122
  video_path = first_traj.get("local_video_path")
123
  task = first_traj.get("task", "No task description")
 
124
 
125
- # Get max frames
126
  cap = cv2.VideoCapture(video_path)
127
  max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
128
  cap.release()
129
 
 
 
 
 
130
  return (
131
- f"✅ Loaded {len(current_trajectories)} robot trajectories",
132
  gr.update(maximum=max_frames, value=0),
133
  video_path,
134
- task
 
 
 
135
  )
136
 
137
- return "No videos downloaded", None, None, ""
138
 
139
  except Exception as e:
140
- return f"❌ Error: {str(e)}", None, None, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  with gr.Blocks(title="Trajectory End Point Labeler") as demo:
143
  gr.Markdown("# Trajectory End Point Labeler")
144
- gr.Markdown("Step 3: Dataset loading from HuggingFace")
145
 
146
  with gr.Row():
147
  with gr.Column(scale=1):
@@ -154,34 +275,75 @@ with gr.Blocks(title="Trajectory End Point Labeler") as demo:
154
  label="Config Name (optional)",
155
  placeholder="Leave empty if no config"
156
  )
157
- num_samples = gr.Number(label="Number of Samples", value=3, precision=0)
 
158
  load_btn = gr.Button("Load Dataset", variant="primary")
159
  status = gr.Textbox(label="Status", interactive=False)
160
 
161
  with gr.Column(scale=2):
 
162
  task_display = gr.Textbox(label="Task Description", interactive=False)
 
 
 
 
 
163
  video_player = gr.Video(label="Trajectory Video")
164
  frame_slider = gr.Slider(minimum=0, maximum=63, step=1, value=0, label="Frame Number")
165
  frame_display = gr.Image(label="Current Frame")
166
  frame_info = gr.Textbox(label="Frame Info", interactive=False)
 
 
 
 
 
 
 
 
167
 
168
- # Connect handlers
169
  load_btn.click(
170
  load_dataset_trajectories,
171
- inputs=[dataset_input, config_input, num_samples],
172
- outputs=[status, frame_slider, video_player, task_display]
 
 
 
 
 
 
173
  )
174
 
 
 
 
 
 
 
175
  frame_slider.change(
176
  extract_frame,
177
  inputs=[video_player, frame_slider],
178
- outputs=[frame_display, frame_info]
179
  )
180
 
181
  video_player.change(
182
- lambda v: extract_frame(v, 0) if v else (None, "No video"),
183
  inputs=[video_player],
184
- outputs=[frame_display, frame_info]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  )
186
 
187
  demo.launch()
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
+ import pandas as pd
5
  import random
6
  import os
7
  import shutil
 
10
  from huggingface_hub import hf_hub_download
11
  from tqdm import tqdm
12
 
13
+ # Step 4: Add labeling interface with CSV export
14
 
15
  def sample_trajectories(dataset_repo, config_name, is_robot, num_samples, max_to_check=10000):
16
  """Sample random trajectories from HuggingFace dataset."""
 
71
  def extract_frame(video_path, frame_num):
72
  """Extract a specific frame from video."""
73
  if not video_path or not os.path.exists(video_path):
74
+ return None, "No video loaded", "0.0%"
75
 
76
  cap = cv2.VideoCapture(video_path)
77
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
86
  if ret:
87
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
88
  percent = (frame_num / total_frames * 100) if total_frames > 0 else 0
89
+ return frame_rgb, f"Frame {frame_num}/{total_frames-1}", f"{percent:.1f}%"
90
+ return None, "Error reading frame", "0.0%"
91
 
92
+ # Global state
93
  current_trajectories = []
94
  current_idx = 0
95
+ labels_df = pd.DataFrame(columns=[
96
+ "dataset_repo", "config_name", "trajectory_id", "is_robot",
97
+ "task", "manual_end_frame", "manual_end_percent", "notes"
98
+ ])
99
 
100
+ def load_labels():
101
+ """Load existing labels from CSV."""
102
+ global labels_df
103
+ if Path("labels.csv").exists():
104
+ labels_df = pd.read_csv("labels.csv")
105
+
106
+ def save_labels():
107
+ """Save labels to CSV."""
108
+ global labels_df
109
+ labels_df.to_csv("labels.csv", index=False)
110
+
111
+ def load_dataset_trajectories(dataset_repo, config_name, num_human, num_robot):
112
  """Load and download trajectories from dataset."""
113
  global current_trajectories, current_idx
114
 
115
  config = config_name.strip() if config_name else None
116
 
117
  try:
118
+ human_trajs = sample_trajectories(dataset_repo, config, is_robot=False, num_samples=int(num_human))
119
+ robot_trajs = sample_trajectories(dataset_repo, config, is_robot=True, num_samples=int(num_robot))
120
+ all_trajs = human_trajs + robot_trajs
121
 
122
+ if not all_trajs:
123
+ return "No trajectories found", None, "No video", "", "0.0%", None, ""
 
 
 
124
 
125
  current_trajectories = []
126
+ for traj in all_trajs:
127
  local_path = download_video(traj, dataset_repo, config)
128
  if local_path:
129
  traj["local_video_path"] = local_path
130
+ traj["dataset_repo"] = dataset_repo
131
+ traj["config_name"] = config
132
  current_trajectories.append(traj)
133
 
134
  current_idx = 0
 
137
  first_traj = current_trajectories[0]
138
  video_path = first_traj.get("local_video_path")
139
  task = first_traj.get("task", "No task description")
140
+ is_robot_str = "Robot" if first_traj.get("is_robot") else "Human"
141
 
 
142
  cap = cv2.VideoCapture(video_path)
143
  max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
144
  cap.release()
145
 
146
+ traj_info = f"Trajectory 1/{len(current_trajectories)} | Type: {is_robot_str}"
147
+
148
+ frame, frame_text, percent = extract_frame(video_path, 0)
149
+
150
  return (
151
+ f"✅ Loaded {len(current_trajectories)} trajectories ({len(human_trajs)} human, {len(robot_trajs)} robot)",
152
  gr.update(maximum=max_frames, value=0),
153
  video_path,
154
+ task,
155
+ percent,
156
+ frame,
157
+ traj_info
158
  )
159
 
160
+ return "No videos downloaded", None, None, "", "0.0%", None, ""
161
 
162
  except Exception as e:
163
+ return f"❌ Error: {str(e)}", None, None, "", "0.0%", None, ""
164
+
165
+ def save_label(dataset_repo, config_name, end_frame, notes):
166
+ """Save label for current trajectory."""
167
+ global current_trajectories, current_idx, labels_df
168
+
169
+ if not current_trajectories or current_idx >= len(current_trajectories):
170
+ return "No trajectory loaded"
171
+
172
+ traj = current_trajectories[current_idx]
173
+ video_path = traj.get("local_video_path")
174
+
175
+ if not video_path:
176
+ return "No video path"
177
+
178
+ cap = cv2.VideoCapture(video_path)
179
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
180
+ cap.release()
181
+
182
+ end_percent = (int(end_frame) / total_frames * 100) if total_frames > 0 else 0
183
+
184
+ # Check if label exists
185
+ mask = (
186
+ (labels_df['dataset_repo'] == dataset_repo) &
187
+ (labels_df['config_name'] == (config_name or "")) &
188
+ (labels_df['trajectory_id'] == traj.get('id'))
189
+ )
190
+
191
+ if mask.any():
192
+ # Update existing
193
+ idx = labels_df[mask].index[0]
194
+ labels_df.at[idx, 'manual_end_frame'] = int(end_frame)
195
+ labels_df.at[idx, 'manual_end_percent'] = end_percent
196
+ labels_df.at[idx, 'notes'] = notes
197
+ save_labels()
198
+ return f"✅ Updated: Frame {int(end_frame)} ({end_percent:.1f}%)"
199
+
200
+ # Add new label
201
+ new_row = pd.DataFrame([{
202
+ "dataset_repo": dataset_repo,
203
+ "config_name": config_name or "",
204
+ "trajectory_id": traj.get('id'),
205
+ "is_robot": traj.get('is_robot', False),
206
+ "task": traj.get('task', ''),
207
+ "manual_end_frame": int(end_frame),
208
+ "manual_end_percent": end_percent,
209
+ "notes": notes
210
+ }])
211
+
212
+ labels_df = pd.concat([labels_df, new_row], ignore_index=True)
213
+ save_labels()
214
+ return f"✅ Saved: Frame {int(end_frame)} ({end_percent:.1f}%)"
215
+
216
+ def navigate_next():
217
+ """Go to next trajectory."""
218
+ global current_idx
219
+
220
+ if not current_trajectories or current_idx >= len(current_trajectories) - 1:
221
+ return "No more trajectories", None, "", "0.0%", None, ""
222
+
223
+ current_idx += 1
224
+ traj = current_trajectories[current_idx]
225
+ video_path = traj.get("local_video_path")
226
+ task = traj.get("task", "No task description")
227
+ is_robot_str = "Robot" if traj.get("is_robot") else "Human"
228
+
229
+ cap = cv2.VideoCapture(video_path)
230
+ max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
231
+ cap.release()
232
+
233
+ traj_info = f"Trajectory {current_idx+1}/{len(current_trajectories)} | Type: {is_robot_str}"
234
+ frame, frame_text, percent = extract_frame(video_path, 0)
235
+
236
+ return gr.update(maximum=max_frames, value=0), video_path, task, percent, frame, traj_info
237
+
238
+ def navigate_prev():
239
+ """Go to previous trajectory."""
240
+ global current_idx
241
+
242
+ if not current_trajectories or current_idx <= 0:
243
+ return "No previous trajectories", None, "", "0.0%", None, ""
244
+
245
+ current_idx -= 1
246
+ traj = current_trajectories[current_idx]
247
+ video_path = traj.get("local_video_path")
248
+ task = traj.get("task", "No task description")
249
+ is_robot_str = "Robot" if traj.get("is_robot") else "Human"
250
+
251
+ cap = cv2.VideoCapture(video_path)
252
+ max_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
253
+ cap.release()
254
+
255
+ traj_info = f"Trajectory {current_idx+1}/{len(current_trajectories)} | Type: {is_robot_str}"
256
+ frame, frame_text, percent = extract_frame(video_path, 0)
257
+
258
+ return gr.update(maximum=max_frames, value=0), video_path, task, percent, frame, traj_info
259
+
260
+ # Load existing labels on startup
261
+ load_labels()
262
 
263
  with gr.Blocks(title="Trajectory End Point Labeler") as demo:
264
  gr.Markdown("# Trajectory End Point Labeler")
265
+ gr.Markdown("Step 4: Labeling interface with CSV export")
266
 
267
  with gr.Row():
268
  with gr.Column(scale=1):
 
275
  label="Config Name (optional)",
276
  placeholder="Leave empty if no config"
277
  )
278
+ num_human = gr.Number(label="Human Samples", value=10, precision=0)
279
+ num_robot = gr.Number(label="Robot Samples", value=10, precision=0)
280
  load_btn = gr.Button("Load Dataset", variant="primary")
281
  status = gr.Textbox(label="Status", interactive=False)
282
 
283
  with gr.Column(scale=2):
284
+ traj_info = gr.Textbox(label="Current Trajectory", interactive=False)
285
  task_display = gr.Textbox(label="Task Description", interactive=False)
286
+
287
+ with gr.Row():
288
+ prev_btn = gr.Button("← Previous")
289
+ next_btn = gr.Button("Next →")
290
+
291
  video_player = gr.Video(label="Trajectory Video")
292
  frame_slider = gr.Slider(minimum=0, maximum=63, step=1, value=0, label="Frame Number")
293
  frame_display = gr.Image(label="Current Frame")
294
  frame_info = gr.Textbox(label="Frame Info", interactive=False)
295
+
296
+ with gr.Row():
297
+ end_frame_input = gr.Number(label="End Frame", value=0, precision=0)
298
+ end_percent = gr.Textbox(label="End Percent", interactive=False)
299
+
300
+ notes_input = gr.Textbox(label="Notes (optional)", placeholder="Add notes...")
301
+ save_btn = gr.Button("Save Label", variant="primary")
302
+ save_status = gr.Textbox(label="Save Status", interactive=False)
303
 
304
+ # Load dataset
305
  load_btn.click(
306
  load_dataset_trajectories,
307
+ inputs=[dataset_input, config_input, num_human, num_robot],
308
+ outputs=[status, frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
309
+ )
310
+
311
+ # Navigate trajectories
312
+ next_btn.click(
313
+ navigate_next,
314
+ outputs=[frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
315
  )
316
 
317
+ prev_btn.click(
318
+ navigate_prev,
319
+ outputs=[frame_slider, video_player, task_display, end_percent, frame_display, traj_info]
320
+ )
321
+
322
+ # Frame navigation
323
  frame_slider.change(
324
  extract_frame,
325
  inputs=[video_player, frame_slider],
326
+ outputs=[frame_display, frame_info, end_percent]
327
  )
328
 
329
  video_player.change(
330
+ lambda v: extract_frame(v, 0) if v else (None, "No video", "0.0%"),
331
  inputs=[video_player],
332
+ outputs=[frame_display, frame_info, end_percent]
333
+ )
334
+
335
+ # Update percent when end frame changes
336
+ end_frame_input.change(
337
+ lambda v, f: (None, "No video", "0.0%")[2] if not v else f"{(int(f) / int(cv2.VideoCapture(v).get(cv2.CAP_PROP_FRAME_COUNT)) * 100):.1f}%" if os.path.exists(v) and int(cv2.VideoCapture(v).get(cv2.CAP_PROP_FRAME_COUNT)) > 0 else "0.0%",
338
+ inputs=[video_player, end_frame_input],
339
+ outputs=[end_percent]
340
+ )
341
+
342
+ # Save label
343
+ save_btn.click(
344
+ save_label,
345
+ inputs=[dataset_input, config_input, end_frame_input, notes_input],
346
+ outputs=[save_status]
347
  )
348
 
349
  demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  opencv-python-headless>=4.8.0
2
  numpy>=1.24.0
 
3
  datasets>=2.14.0
4
  huggingface-hub>=0.16.0
5
  tqdm>=4.65.0
 
1
  opencv-python-headless>=4.8.0
2
  numpy>=1.24.0
3
+ pandas>=2.0.0
4
  datasets>=2.14.0
5
  huggingface-hub>=0.16.0
6
  tqdm>=4.65.0