Raffael-Kultyshev commited on
Commit
b9af90c
·
1 Parent(s): ab63e4b

Restructure app to match reference visualizer: sidebar, video player, synced plots

Browse files
Files changed (1) hide show
  1. app.py +302 -167
app.py CHANGED
@@ -5,7 +5,11 @@ Egocentric hand tracking dataset visualizer for robot training data
5
 
6
  import gradio as gr
7
  import json
 
8
  from pathlib import Path
 
 
 
9
 
10
  # Load pipeline data
11
  DATA_DIR = Path(__file__).parent / "data"
@@ -43,185 +47,316 @@ for frame_data in end_effector.values():
43
 
44
  print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}")
45
 
46
- def get_frame_info(frame_idx):
47
- """Get info for a specific frame."""
48
- try:
49
- frame_key = str(int(frame_idx))
50
-
51
- # Hand detection status
52
- hand_data = hands_2d.get(frame_key) or {}
53
- left_detected = bool(hand_data.get('left_hand'))
54
- right_detected = bool(hand_data.get('right_hand'))
 
 
 
 
 
 
 
 
 
55
 
56
- # End effector pose
57
- ee_data = end_effector.get(frame_key) or {}
58
- left_hand_data = ee_data.get('left_hand') if ee_data else None
59
- right_hand_data = ee_data.get('right_hand') if ee_data else None
60
 
61
- left_pose = None
62
- right_pose = None
63
  if left_hand_data and isinstance(left_hand_data, dict):
64
- left_pose = left_hand_data.get('pose_6dof')
65
- if right_hand_data and isinstance(right_hand_data, dict):
66
- right_pose = right_hand_data.get('pose_6dof')
67
-
68
- # Action
69
- action_data = actions.get(frame_key) or {}
70
- left_action = action_data.get('left_hand_action') if action_data else None
71
- camera_action = action_data.get('camera_action') if action_data else None
 
 
 
 
 
 
72
 
73
- # Format output
74
- info = f"""### Frame {int(frame_idx)} / {total_frames - 1}
75
- **Time:** {int(frame_idx) / fps:.2f}s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- ---
78
 
79
- #### 🖐️ Hand Detection
80
- - **Left Hand:** {'✅ Detected' if left_detected else '❌ Not Detected'}
81
- - **Right Hand:** {'✅ Detected' if right_detected else '❌ Not Detected'}
 
 
 
 
 
82
 
83
- ---
 
 
 
84
 
85
- #### 📍 End-Effector Pose (6DoF)
86
- """
87
-
88
- if left_pose and len(left_pose) >= 6:
89
- info += f"""
90
- **Left Hand:**
91
- - Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm
92
- - Rotation: Roll={left_pose[3]*57.3:.1f}°, Pitch={left_pose[4]*57.3:.1f}°, Yaw={left_pose[5]*57.3:.1f}°
93
- """
94
- else:
95
- info += "\n**Left Hand:** No pose available\n"
96
-
97
- if right_pose and len(right_pose) >= 6:
98
- info += f"""
99
- **Right Hand:**
100
- - Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm
101
- - Rotation: Roll={right_pose[3]*57.3:.1f}°, Pitch={right_pose[4]*57.3:.1f}°, Yaw={right_pose[5]*57.3:.1f}°
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  """
103
-
104
- info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"
105
-
106
- if left_action and len(left_action) >= 3:
107
- mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100
108
- info += f"**Left Hand Movement:** {mag:.2f} cm\n"
109
-
110
- if camera_action and len(camera_action) >= 3:
111
- cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100
112
- info += f"**Camera Movement:** {cam_mag:.2f} cm\n"
113
-
114
- return info
115
- except Exception as e:
116
- return f"Error: {str(e)}"
117
-
118
- def get_frame_image(frame_idx):
119
- """Get RGB frame image path."""
120
- # Round to nearest 10 (we only have every 10th frame)
121
- idx = int(frame_idx)
122
- idx = (idx // 10) * 10
123
- frame_path = DATA_DIR / "frames" / f"{idx}.jpg"
124
- if frame_path.exists():
125
- return str(frame_path)
126
- # Try exact frame
127
- frame_path = DATA_DIR / "frames" / f"{int(frame_idx)}.jpg"
128
- if frame_path.exists():
129
- return str(frame_path)
130
- return None
131
-
132
- def update_display(frame_idx):
133
- """Update frame display."""
134
- img = get_frame_image(frame_idx)
135
- info = get_frame_info(frame_idx)
136
- return img, info
137
-
138
- # Build Gradio Interface
139
- with gr.Blocks(title="DI Human Demo Visualizer") as demo:
140
-
141
- # Header
142
- gr.Markdown("""
143
- # 🤖 Dynamic Intelligence - Human Demo Visualizer
144
-
145
- **Egocentric hand tracking dataset for humanoid robot training**
146
-
147
- Pipeline: iPhone LiDAR → MediaPipe → 6DoF End-Effector → Robot Training Data
148
- """)
149
-
150
- # Stats row
151
- gr.Markdown(f"""
152
- | Stat | Value |
153
- |------|-------|
154
- | Total Frames | {total_frames} |
155
- | Hand Detection | {hand_detection_rate:.1f}% |
156
- | Left Hand Poses | {left_poses} |
157
- | Right Hand Poses | {right_poses} |
158
- | FPS | {fps} |
159
- """)
160
 
161
- gr.Markdown("---")
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # Main content
164
- with gr.Row():
165
- with gr.Column(scale=2):
166
- gr.Markdown("### 📹 RGB Frame")
167
- frame_image = gr.Image(label="Frame", height=400)
168
- frame_slider = gr.Slider(
169
- minimum=0,
170
- maximum=max(1, total_frames - 1),
171
- step=10,
172
- value=0,
173
- label="Frame (every 10th frame available)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  )
175
-
176
- with gr.Column(scale=1):
177
- frame_info = gr.Markdown("Select a frame to see details")
178
-
179
- gr.Markdown("---")
180
-
181
- # Plots
182
- gr.Markdown("### 📊 Validation Plots")
183
-
184
- with gr.Row():
185
- camera_plot = DATA_DIR / "plots" / "camera_trajectory.png"
186
- left_plot = DATA_DIR / "plots" / "left_hand_trajectory.png"
187
- if camera_plot.exists():
188
- gr.Image(value=str(camera_plot), label="Camera Trajectory")
189
- if left_plot.exists():
190
- gr.Image(value=str(left_plot), label="Left Hand Trajectory")
191
-
192
- with gr.Row():
193
- pose_plot = DATA_DIR / "plots" / "hand_pose_vs_time.png"
194
- action_plot = DATA_DIR / "plots" / "actions_histogram.png"
195
- if pose_plot.exists():
196
- gr.Image(value=str(pose_plot), label="Hand Pose vs Time")
197
- if action_plot.exists():
198
- gr.Image(value=str(action_plot), label="Actions Distribution")
199
-
200
- # Physics validation
201
- gr.Markdown("""
202
- ---
203
-
204
- ### ✅ Physics Validation Results
205
-
206
- | Check | Status | Details |
207
- |-------|--------|---------|
208
- | Camera Trajectory | ✅ PASS | Smooth movement, ~40cm total range |
209
- | Hand Depth Range | ✅ PASS | 15-60cm from camera (realistic) |
210
- | Action Magnitudes | ✅ PASS | Median 0.34cm/frame (no tracking errors) |
211
- | 6DoF Rotations | ✅ PASS | Natural hand movement patterns |
212
-
213
- ---
214
-
215
- **Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
216
- """)
217
 
218
- # Event handler
219
- frame_slider.change(
220
- fn=update_display,
221
- inputs=[frame_slider],
222
- outputs=[frame_image, frame_info]
223
- )
 
 
224
 
225
- # Launch
226
  if __name__ == "__main__":
227
- demo.launch()
 
5
 
6
  import gradio as gr
7
  import json
8
+ import numpy as np
9
  from pathlib import Path
10
+ import plotly.graph_objects as go
11
+ import plotly.io as pio
12
+ from typing import Dict, List
13
 
14
  # Load pipeline data
15
  DATA_DIR = Path(__file__).parent / "data"
 
47
 
48
  print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}")
49
 
50
+ # Video path
51
+ video_path = DATA_DIR / "video.mp4"
52
+
53
+ # Prepare time-series data
54
+ def prepare_data():
55
+ """Prepare time-series data for plots."""
56
+ times = []
57
+ left_pos = {'x': [], 'y': [], 'z': []}
58
+ left_rot = {'yaw': [], 'pitch': [], 'roll': []}
59
+ right_pos = {'x': [], 'y': [], 'z': []}
60
+ right_rot = {'yaw': [], 'pitch': [], 'roll': []}
61
+
62
+ frame_keys = sorted([int(k) for k in end_effector.keys() if k.isdigit()])
63
+
64
+ for frame_idx in frame_keys:
65
+ frame_key = str(frame_idx)
66
+ t = frame_idx / fps
67
+ times.append(t)
68
 
69
+ ee_data = end_effector.get(frame_key, {}) or {}
 
 
 
70
 
71
+ # Left hand
72
+ left_hand_data = ee_data.get('left_hand')
73
  if left_hand_data and isinstance(left_hand_data, dict):
74
+ pose = left_hand_data.get('pose_6dof')
75
+ if pose and len(pose) >= 6:
76
+ left_pos['x'].append(pose[0] * 100) # m to cm
77
+ left_pos['y'].append(pose[1] * 100)
78
+ left_pos['z'].append(pose[2] * 100)
79
+ left_rot['roll'].append(pose[3] * 57.3) # rad to deg
80
+ left_rot['pitch'].append(pose[4] * 57.3)
81
+ left_rot['yaw'].append(pose[5] * 57.3)
82
+ else:
83
+ for k in left_pos: left_pos[k].append(None)
84
+ for k in left_rot: left_rot[k].append(None)
85
+ else:
86
+ for k in left_pos: left_pos[k].append(None)
87
+ for k in left_rot: left_rot[k].append(None)
88
 
89
+ # Right hand
90
+ right_hand_data = ee_data.get('right_hand')
91
+ if right_hand_data and isinstance(right_hand_data, dict):
92
+ pose = right_hand_data.get('pose_6dof')
93
+ if pose and len(pose) >= 6:
94
+ right_pos['x'].append(pose[0] * 100)
95
+ right_pos['y'].append(pose[1] * 100)
96
+ right_pos['z'].append(pose[2] * 100)
97
+ right_rot['roll'].append(pose[3] * 57.3)
98
+ right_rot['pitch'].append(pose[4] * 57.3)
99
+ right_rot['yaw'].append(pose[5] * 57.3)
100
+ else:
101
+ for k in right_pos: right_pos[k].append(None)
102
+ for k in right_rot: right_rot[k].append(None)
103
+ else:
104
+ for k in right_pos: right_pos[k].append(None)
105
+ for k in right_rot: right_rot[k].append(None)
106
+
107
+ return {
108
+ 'times': times,
109
+ 'left_pos': left_pos,
110
+ 'left_rot': left_rot,
111
+ 'right_pos': right_pos,
112
+ 'right_rot': right_rot
113
+ }
114
 
115
+ plot_data = prepare_data()
116
 
117
+ METRIC_LABELS = {
118
+ "x_cm": "X (cm)",
119
+ "y_cm": "Y (cm)",
120
+ "z_cm": "Z (cm)",
121
+ "yaw_deg": "Yaw (°)",
122
+ "pitch_deg": "Pitch (°)",
123
+ "roll_deg": "Roll (°)",
124
+ }
125
 
126
+ PLOT_GRID = [
127
+ ["x_cm", "y_cm", "z_cm"],
128
+ ["yaw_deg", "pitch_deg", "roll_deg"],
129
+ ]
130
 
131
+ PLOT_ORDER = [metric for row in PLOT_GRID for metric in row]
132
+
133
+ CUSTOM_CSS = """
134
+ :root, .gradio-container, body {
135
+ background-color: #050a18 !important;
136
+ color: #f8fafc !important;
137
+ font-family: 'Inter', 'Segoe UI', system-ui, sans-serif;
138
+ }
139
+ .side-panel {
140
+ background: #0f172a;
141
+ padding: 20px;
142
+ border-radius: 18px;
143
+ border: 1px solid #1f2b47;
144
+ min-height: 100%;
145
+ }
146
+ .stats-card ul {
147
+ list-style: none;
148
+ padding: 0;
149
+ margin: 0;
150
+ font-size: 0.92rem;
151
+ }
152
+ .stats-card li {
153
+ margin-bottom: 10px;
154
+ color: #e2e8f0;
155
+ }
156
+ .stats-card span {
157
+ display: inline-block;
158
+ margin-right: 6px;
159
+ color: #7dd3fc;
160
+ }
161
+ .main-panel {
162
+ padding-top: 8px;
163
+ }
164
+ .instruction-card {
165
+ background: #0f172a;
166
+ padding: 18px 20px;
167
+ border-radius: 18px;
168
+ border: 1px solid #1f2b47;
169
+ }
170
+ .instruction-label {
171
+ font-size: 0.75rem;
172
+ letter-spacing: 0.12em;
173
+ text-transform: uppercase;
174
+ color: #94a3b8;
175
+ margin-bottom: 10px;
176
+ }
177
+ .instruction-text {
178
+ font-size: 1.1rem;
179
+ line-height: 1.5;
180
+ }
181
+ .video-card {
182
+ background: #0f172a;
183
+ border: 1px solid #1f2b47;
184
+ border-radius: 18px;
185
+ padding: 18px 20px;
186
+ margin-top: 18px;
187
+ }
188
+ .video-title {
189
+ font-size: 0.78rem;
190
+ text-transform: uppercase;
191
+ letter-spacing: 0.18em;
192
+ color: #94a3b8;
193
+ margin-bottom: 8px;
194
+ }
195
+ .video-panel video {
196
+ border-radius: 12px;
197
+ border: 1px solid #1f2b47;
198
+ background: #030712;
199
+ }
200
+ .download-button button {
201
+ border-radius: 999px;
202
+ border: 1px solid #334155;
203
+ background: #1e293b;
204
+ color: #f8fafc;
205
+ font-size: 0.85rem;
206
+ padding: 8px 24px;
207
+ }
208
+ .download-button button:hover {
209
+ border-color: #67e8f9;
210
+ color: #67e8f9;
211
+ }
212
+ .plots-wrap {
213
+ margin-top: 18px;
214
+ }
215
+ .plots-wrap .gr-row {
216
+ gap: 16px;
217
+ }
218
+ .plot-html {
219
+ background: #111a2c;
220
+ border-radius: 12px;
221
+ padding: 10px;
222
+ border: 1px solid #1f2b47;
223
+ min-height: 320px;
224
+ }
225
+ .plot-html iframe {
226
+ width: 100%;
227
+ height: 300px;
228
+ border: none;
229
+ }
230
  """
231
+
232
+ def build_plot_fig(metric: str, hand: str = "left") -> go.Figure:
233
+ """Build Plotly figure for a metric."""
234
+ times = plot_data['times']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
+ if hand == "left":
237
+ if "cm" in metric:
238
+ data = plot_data['left_pos'][metric.replace('_cm', '')]
239
+ else:
240
+ data = plot_data['left_rot'][metric.replace('_deg', '')]
241
+ name = "Left Hand"
242
+ else:
243
+ if "cm" in metric:
244
+ data = plot_data['right_pos'][metric.replace('_cm', '')]
245
+ else:
246
+ data = plot_data['right_rot'][metric.replace('_deg', '')]
247
+ name = "Right Hand"
248
 
249
+ fig = go.Figure()
250
+ fig.add_trace(
251
+ go.Scatter(
252
+ x=times,
253
+ y=data,
254
+ mode="lines",
255
+ name=name,
256
+ line=dict(color="#67e8f9", width=2)
257
+ )
258
+ )
259
+ fig.update_layout(
260
+ margin=dict(l=20, r=20, t=30, b=20),
261
+ height=250,
262
+ template="plotly_dark",
263
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
264
+ xaxis_title="Time (s)",
265
+ yaxis_title=METRIC_LABELS[metric],
266
+ )
267
+ fig.update_xaxes(showgrid=True, gridwidth=0.5, gridcolor="rgba(255,255,255,0.1)")
268
+ fig.update_yaxes(showgrid=True, gridwidth=0.5, gridcolor="rgba(255,255,255,0.1)")
269
+ return fig
270
+
271
+ def build_plot_html(metric: str, hand: str = "left") -> str:
272
+ """Build Plotly HTML for a metric."""
273
+ fig = build_plot_fig(metric, hand)
274
+ return pio.to_html(fig, include_plotlyjs="cdn", full_html=False)
275
+
276
+ # Build interface
277
+ stats_html = f"""
278
+ <div class="stats-card">
279
+ <ul>
280
+ <li><span>Number of samples/frames:</span> {total_frames:,}</li>
281
+ <li><span>Hand detection rate:</span> {hand_detection_rate:.1f}%</li>
282
+ <li><span>Left hand poses:</span> {left_poses}</li>
283
+ <li><span>Right hand poses:</span> {right_poses}</li>
284
+ <li><span>Frames per second:</span> {fps:.1f}</li>
285
+ </ul>
286
+ </div>
287
+ """
288
+
289
+ instruction_text = "LiDAR-based egocentric hand tracking for robot training data"
290
+
291
+ theme = gr.themes.Soft(
292
+ primary_hue="cyan", secondary_hue="blue", neutral_hue="slate"
293
+ ).set(
294
+ body_background_fill="#0c1424",
295
+ body_text_color="#f8fafc",
296
+ block_background_fill="#111a2c",
297
+ block_title_text_color="#f8fafc",
298
+ input_background_fill="#151f33",
299
+ border_color_primary="#1f2b47",
300
+ shadow_drop="none",
301
+ )
302
+
303
+ def format_instruction_html(text: str) -> str:
304
+ import html
305
+ safe_text = html.escape(text)
306
+ return (
307
+ '<div class="instruction-card">'
308
+ '<p class="instruction-label">Language Instruction</p>'
309
+ f'<p class="instruction-text">{safe_text}</p>'
310
+ "</div>"
311
+ )
312
+
313
+ with gr.Blocks(theme=theme, css=CUSTOM_CSS) as demo:
314
+ gr.Markdown("# 🤖 Dynamic Intelligence - Human Demo Visualizer")
315
+ gr.Markdown(
316
+ "Egocentric hand tracking dataset for humanoid robot training. "
317
+ "Pipeline: iPhone LiDAR → MediaPipe → 6DoF End-Effector → Robot Training Data"
318
+ )
319
+
320
+ with gr.Row(equal_height=True):
321
+ with gr.Column(scale=1, min_width=260, elem_classes=["side-panel"]):
322
+ gr.HTML(stats_html)
323
+ with gr.Column(scale=2, min_width=640, elem_classes=["main-panel"]):
324
+ instruction_box = gr.HTML(
325
+ format_instruction_html(instruction_text),
326
+ label="Language Instruction",
327
  )
328
+ with gr.Column(elem_classes=["video-card"]):
329
+ gr.HTML('<div class="video-title">RGB Video</div>')
330
+ video = gr.Video(
331
+ height=360,
332
+ value=str(video_path) if video_path.exists() else None,
333
+ elem_classes=["video-panel"],
334
+ show_label=False,
335
+ show_download_button=False,
336
+ )
337
+ download_button = gr.DownloadButton(
338
+ label="Download",
339
+ value=str(video_path) if video_path.exists() else None,
340
+ elem_classes=["download-button"],
341
+ )
342
+
343
+ gr.Markdown("### Left Hand Trajectories", elem_classes=["plots-title"])
344
+ plot_outputs_left = []
345
+ with gr.Column(elem_classes=["plots-wrap"]):
346
+ for row in PLOT_GRID:
347
+ with gr.Row():
348
+ for metric in row:
349
+ plot = gr.HTML(value=build_plot_html(metric, "left"), elem_classes=["plot-html"])
350
+ plot_outputs_left.append(plot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
+ gr.Markdown("### Right Hand Trajectories", elem_classes=["plots-title"])
353
+ plot_outputs_right = []
354
+ with gr.Column(elem_classes=["plots-wrap"]):
355
+ for row in PLOT_GRID:
356
+ with gr.Row():
357
+ for metric in row:
358
+ plot = gr.HTML(value=build_plot_html(metric, "right"), elem_classes=["plot-html"])
359
+ plot_outputs_right.append(plot)
360
 
 
361
  if __name__ == "__main__":
362
+ demo.queue().launch(show_api=False)