Raffael-Kultyshev commited on
Commit
cd205ff
Β·
1 Parent(s): a8115f2

Simplify app with better error handling

Browse files
Files changed (1) hide show
  1. app.py +108 -112
app.py CHANGED
@@ -5,7 +5,6 @@ Egocentric hand tracking dataset visualizer for robot training data
5
 
6
  import gradio as gr
7
  import json
8
- import os
9
  from pathlib import Path
10
 
11
  # Load pipeline data
@@ -15,46 +14,65 @@ def load_json_safe(path):
15
  try:
16
  with open(path, 'r') as f:
17
  return json.load(f)
18
- except:
 
19
  return {}
20
 
21
- # Load data
 
22
  metadata = load_json_safe(DATA_DIR / "metadata.json")
23
  end_effector = load_json_safe(DATA_DIR / "end_effector.json")
24
  actions = load_json_safe(DATA_DIR / "actions.json")
25
  hands_2d = load_json_safe(DATA_DIR / "hands_2d.json")
 
26
 
27
  # Stats
28
- total_frames = len(metadata.get('poses', []))
29
  fps = metadata.get('fps', 60)
30
- hand_detection_rate = len(hands_2d) / max(1, total_frames) * 100
31
- left_poses = sum(1 for f in end_effector.values() if f.get('left_hand'))
32
- right_poses = sum(1 for f in end_effector.values() if f.get('right_hand'))
 
 
 
 
 
 
 
 
 
 
33
 
34
  def get_frame_info(frame_idx):
35
  """Get info for a specific frame."""
36
- frame_key = str(frame_idx)
37
-
38
- # Hand detection status
39
- hand_data = hands_2d.get(frame_key, {}) or {}
40
- left_detected = hand_data.get('left_hand') is not None
41
- right_detected = hand_data.get('right_hand') is not None
42
-
43
- # End effector pose
44
- ee_data = end_effector.get(frame_key, {}) or {}
45
- left_hand_data = ee_data.get('left_hand') or {}
46
- right_hand_data = ee_data.get('right_hand') or {}
47
- left_pose = left_hand_data.get('pose_6dof', None) if left_hand_data else None
48
- right_pose = right_hand_data.get('pose_6dof', None) if right_hand_data else None
49
-
50
- # Action
51
- action_data = actions.get(frame_key, {}) or {}
52
- left_action = action_data.get('left_hand_action', None)
53
- camera_action = action_data.get('camera_action', None)
54
-
55
- # Format output
56
- info = f"""### Frame {frame_idx} / {total_frames - 1}
57
- **Time:** {frame_idx / fps:.2f}s
 
 
 
 
 
 
58
 
59
  ---
60
 
@@ -66,46 +84,55 @@ def get_frame_info(frame_idx):
66
 
67
  #### πŸ“ End-Effector Pose (6DoF)
68
  """
69
-
70
- if left_pose:
71
- info += f"""
72
  **Left Hand:**
73
  - Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm
74
  - Rotation: Roll={left_pose[3]*57.3:.1f}Β°, Pitch={left_pose[4]*57.3:.1f}Β°, Yaw={left_pose[5]*57.3:.1f}Β°
75
  """
76
- else:
77
- info += "\n**Left Hand:** No pose available\n"
78
-
79
- if right_pose:
80
- info += f"""
81
  **Right Hand:**
82
  - Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm
83
  - Rotation: Roll={right_pose[3]*57.3:.1f}Β°, Pitch={right_pose[4]*57.3:.1f}Β°, Yaw={right_pose[5]*57.3:.1f}Β°
84
  """
85
-
86
- info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"
87
-
88
- if left_action:
89
- mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100
90
- info += f"**Left Hand Movement:** {mag:.2f} cm\n"
91
-
92
- if camera_action:
93
- cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100
94
- info += f"**Camera Movement:** {cam_mag:.2f} cm\n"
95
-
96
- return info
 
 
97
 
98
  def get_frame_image(frame_idx):
99
  """Get RGB frame image path."""
100
- frame_path = DATA_DIR / "frames" / f"{frame_idx}.jpg"
 
 
 
 
 
 
 
101
  if frame_path.exists():
102
  return str(frame_path)
103
  return None
104
 
105
  def update_display(frame_idx):
106
  """Update frame display."""
107
- img = get_frame_image(int(frame_idx))
108
- info = get_frame_info(int(frame_idx))
109
  return img, info
110
 
111
  # Build Gradio Interface
@@ -117,48 +144,37 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
117
 
118
  **Egocentric hand tracking dataset for humanoid robot training**
119
 
120
- This visualizer shows the processed output of our LiDAR-based hand tracking pipeline:
121
- - RGB video from iPhone LiDAR camera
122
- - 21 hand joints tracked via MediaPipe
123
- - 6DoF end-effector pose computed from 3 key joints
124
- - Actions (delta movements) for robot learning
125
  """)
126
 
127
  # Stats row
128
- with gr.Row():
129
- with gr.Column(scale=1):
130
- gr.Markdown(f"### {total_frames}\n**Total Frames**")
131
- with gr.Column(scale=1):
132
- gr.Markdown(f"### {hand_detection_rate:.1f}%\n**Hand Detection**")
133
- with gr.Column(scale=1):
134
- gr.Markdown(f"### {left_poses}\n**Left Hand Poses**")
135
- with gr.Column(scale=1):
136
- gr.Markdown(f"### {fps}\n**FPS**")
137
 
138
  gr.Markdown("---")
139
 
140
  # Main content
141
  with gr.Row():
142
- # Left: Video frame
143
  with gr.Column(scale=2):
144
  gr.Markdown("### πŸ“Ή RGB Frame")
145
- frame_image = gr.Image(
146
- label="Frame",
147
- type="filepath",
148
- height=400
149
- )
150
  frame_slider = gr.Slider(
151
  minimum=0,
152
- maximum=total_frames - 1,
153
- step=1,
154
  value=0,
155
- label="Frame",
156
- info="Drag to scrub through video"
157
  )
158
 
159
- # Right: Frame info
160
  with gr.Column(scale=1):
161
- frame_info = gr.Markdown(get_frame_info(0))
162
 
163
  gr.Markdown("---")
164
 
@@ -166,28 +182,20 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
166
  gr.Markdown("### πŸ“Š Validation Plots")
167
 
168
  with gr.Row():
169
- gr.Image(
170
- value=str(DATA_DIR / "plots" / "camera_trajectory.png"),
171
- label="Camera Trajectory (World Frame)",
172
- height=300
173
- )
174
- gr.Image(
175
- value=str(DATA_DIR / "plots" / "left_hand_trajectory.png"),
176
- label="Left Hand Trajectory (Camera Frame)",
177
- height=300
178
- )
179
 
180
  with gr.Row():
181
- gr.Image(
182
- value=str(DATA_DIR / "plots" / "hand_pose_vs_time.png"),
183
- label="Hand 6DoF Pose vs Time",
184
- height=300
185
- )
186
- gr.Image(
187
- value=str(DATA_DIR / "plots" / "actions_histogram.png"),
188
- label="Actions Distribution",
189
- height=300
190
- )
191
 
192
  # Physics validation
193
  gr.Markdown("""
@@ -200,17 +208,11 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
200
  | Camera Trajectory | βœ… PASS | Smooth movement, ~40cm total range |
201
  | Hand Depth Range | βœ… PASS | 15-60cm from camera (realistic) |
202
  | Action Magnitudes | βœ… PASS | Median 0.34cm/frame (no tracking errors) |
203
- | Outlier Detection | βœ… PASS | No large jumps detected |
204
  | 6DoF Rotations | βœ… PASS | Natural hand movement patterns |
205
 
206
  ---
207
 
208
- ### πŸ“‹ Dataset Info
209
-
210
- - **Source:** iPhone 12 Pro+ LiDAR camera (Record3D app)
211
- - **Pipeline:** MediaPipe hand tracking β†’ Depth lookup β†’ 6DoF computation
212
- - **Format:** Compatible with Physical Intelligence / LeRobot training
213
- - **Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
214
  """)
215
 
216
  # Event handler
@@ -219,13 +221,7 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
219
  inputs=[frame_slider],
220
  outputs=[frame_image, frame_info]
221
  )
222
-
223
- # Initialize
224
- demo.load(
225
- fn=lambda: update_display(0),
226
- outputs=[frame_image, frame_info]
227
- )
228
 
 
229
  if __name__ == "__main__":
230
  demo.launch()
231
-
 
5
 
6
  import gradio as gr
7
  import json
 
8
  from pathlib import Path
9
 
10
  # Load pipeline data
 
14
  try:
15
  with open(path, 'r') as f:
16
  return json.load(f)
17
+ except Exception as e:
18
+ print(f"Error loading {path}: {e}")
19
  return {}
20
 
21
+ # Load data at startup
22
+ print("Loading data...")
23
  metadata = load_json_safe(DATA_DIR / "metadata.json")
24
  end_effector = load_json_safe(DATA_DIR / "end_effector.json")
25
  actions = load_json_safe(DATA_DIR / "actions.json")
26
  hands_2d = load_json_safe(DATA_DIR / "hands_2d.json")
27
+ print(f"Loaded: metadata={len(metadata)}, ee={len(end_effector)}, actions={len(actions)}, hands={len(hands_2d)}")
28
 
29
  # Stats
30
+ total_frames = max(1, len(metadata.get('poses', [])))
31
  fps = metadata.get('fps', 60)
32
+ hand_detection_rate = len(hands_2d) / total_frames * 100
33
+
34
+ # Count poses safely
35
+ left_poses = 0
36
+ right_poses = 0
37
+ for frame_data in end_effector.values():
38
+ if frame_data and isinstance(frame_data, dict):
39
+ if frame_data.get('left_hand'):
40
+ left_poses += 1
41
+ if frame_data.get('right_hand'):
42
+ right_poses += 1
43
+
44
+ print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}")
45
 
46
  def get_frame_info(frame_idx):
47
  """Get info for a specific frame."""
48
+ try:
49
+ frame_key = str(int(frame_idx))
50
+
51
+ # Hand detection status
52
+ hand_data = hands_2d.get(frame_key) or {}
53
+ left_detected = bool(hand_data.get('left_hand'))
54
+ right_detected = bool(hand_data.get('right_hand'))
55
+
56
+ # End effector pose
57
+ ee_data = end_effector.get(frame_key) or {}
58
+ left_hand_data = ee_data.get('left_hand') if ee_data else None
59
+ right_hand_data = ee_data.get('right_hand') if ee_data else None
60
+
61
+ left_pose = None
62
+ right_pose = None
63
+ if left_hand_data and isinstance(left_hand_data, dict):
64
+ left_pose = left_hand_data.get('pose_6dof')
65
+ if right_hand_data and isinstance(right_hand_data, dict):
66
+ right_pose = right_hand_data.get('pose_6dof')
67
+
68
+ # Action
69
+ action_data = actions.get(frame_key) or {}
70
+ left_action = action_data.get('left_hand_action') if action_data else None
71
+ camera_action = action_data.get('camera_action') if action_data else None
72
+
73
+ # Format output
74
+ info = f"""### Frame {int(frame_idx)} / {total_frames - 1}
75
+ **Time:** {int(frame_idx) / fps:.2f}s
76
 
77
  ---
78
 
 
84
 
85
  #### πŸ“ End-Effector Pose (6DoF)
86
  """
87
+
88
+ if left_pose and len(left_pose) >= 6:
89
+ info += f"""
90
  **Left Hand:**
91
  - Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm
92
  - Rotation: Roll={left_pose[3]*57.3:.1f}Β°, Pitch={left_pose[4]*57.3:.1f}Β°, Yaw={left_pose[5]*57.3:.1f}Β°
93
  """
94
+ else:
95
+ info += "\n**Left Hand:** No pose available\n"
96
+
97
+ if right_pose and len(right_pose) >= 6:
98
+ info += f"""
99
  **Right Hand:**
100
  - Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm
101
  - Rotation: Roll={right_pose[3]*57.3:.1f}Β°, Pitch={right_pose[4]*57.3:.1f}Β°, Yaw={right_pose[5]*57.3:.1f}Β°
102
  """
103
+
104
+ info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"
105
+
106
+ if left_action and len(left_action) >= 3:
107
+ mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100
108
+ info += f"**Left Hand Movement:** {mag:.2f} cm\n"
109
+
110
+ if camera_action and len(camera_action) >= 3:
111
+ cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100
112
+ info += f"**Camera Movement:** {cam_mag:.2f} cm\n"
113
+
114
+ return info
115
+ except Exception as e:
116
+ return f"Error: {str(e)}"
117
 
118
  def get_frame_image(frame_idx):
119
  """Get RGB frame image path."""
120
+ # Round to nearest 10 (we only have every 10th frame)
121
+ idx = int(frame_idx)
122
+ idx = (idx // 10) * 10
123
+ frame_path = DATA_DIR / "frames" / f"{idx}.jpg"
124
+ if frame_path.exists():
125
+ return str(frame_path)
126
+ # Try exact frame
127
+ frame_path = DATA_DIR / "frames" / f"{int(frame_idx)}.jpg"
128
  if frame_path.exists():
129
  return str(frame_path)
130
  return None
131
 
132
  def update_display(frame_idx):
133
  """Update frame display."""
134
+ img = get_frame_image(frame_idx)
135
+ info = get_frame_info(frame_idx)
136
  return img, info
137
 
138
  # Build Gradio Interface
 
144
 
145
  **Egocentric hand tracking dataset for humanoid robot training**
146
 
147
+ Pipeline: iPhone LiDAR β†’ MediaPipe β†’ 6DoF End-Effector β†’ Robot Training Data
 
 
 
 
148
  """)
149
 
150
  # Stats row
151
+ gr.Markdown(f"""
152
+ | Stat | Value |
153
+ |------|-------|
154
+ | Total Frames | {total_frames} |
155
+ | Hand Detection | {hand_detection_rate:.1f}% |
156
+ | Left Hand Poses | {left_poses} |
157
+ | Right Hand Poses | {right_poses} |
158
+ | FPS | {fps} |
159
+ """)
160
 
161
  gr.Markdown("---")
162
 
163
  # Main content
164
  with gr.Row():
 
165
  with gr.Column(scale=2):
166
  gr.Markdown("### πŸ“Ή RGB Frame")
167
+ frame_image = gr.Image(label="Frame", height=400)
 
 
 
 
168
  frame_slider = gr.Slider(
169
  minimum=0,
170
+ maximum=max(1, total_frames - 1),
171
+ step=10,
172
  value=0,
173
+ label="Frame (every 10th frame available)"
 
174
  )
175
 
 
176
  with gr.Column(scale=1):
177
+ frame_info = gr.Markdown("Select a frame to see details")
178
 
179
  gr.Markdown("---")
180
 
 
182
  gr.Markdown("### πŸ“Š Validation Plots")
183
 
184
  with gr.Row():
185
+ camera_plot = DATA_DIR / "plots" / "camera_trajectory.png"
186
+ left_plot = DATA_DIR / "plots" / "left_hand_trajectory.png"
187
+ if camera_plot.exists():
188
+ gr.Image(value=str(camera_plot), label="Camera Trajectory")
189
+ if left_plot.exists():
190
+ gr.Image(value=str(left_plot), label="Left Hand Trajectory")
 
 
 
 
191
 
192
  with gr.Row():
193
+ pose_plot = DATA_DIR / "plots" / "hand_pose_vs_time.png"
194
+ action_plot = DATA_DIR / "plots" / "actions_histogram.png"
195
+ if pose_plot.exists():
196
+ gr.Image(value=str(pose_plot), label="Hand Pose vs Time")
197
+ if action_plot.exists():
198
+ gr.Image(value=str(action_plot), label="Actions Distribution")
 
 
 
 
199
 
200
  # Physics validation
201
  gr.Markdown("""
 
208
  | Camera Trajectory | βœ… PASS | Smooth movement, ~40cm total range |
209
  | Hand Depth Range | βœ… PASS | 15-60cm from camera (realistic) |
210
  | Action Magnitudes | βœ… PASS | Median 0.34cm/frame (no tracking errors) |
 
211
  | 6DoF Rotations | βœ… PASS | Natural hand movement patterns |
212
 
213
  ---
214
 
215
+ **Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
 
 
 
 
 
216
  """)
217
 
218
  # Event handler
 
221
  inputs=[frame_slider],
222
  outputs=[frame_image, frame_info]
223
  )
 
 
 
 
 
 
224
 
225
+ # Launch
226
  if __name__ == "__main__":
227
  demo.launch()