Spaces:

DynamicIntelligence
/

human-demo-visualizer

Sleeping

App Files Files Community

Raffael-Kultyshev commited on 5 days ago

Commit

cd205ff

1 Parent(s): a8115f2

Simplify app with better error handling

Browse files

Files changed (1) hide show

app.py +108 -112

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ Egocentric hand tracking dataset visualizer for robot training data
 import gradio as gr
 import json
-import os
 from pathlib import Path
 # Load pipeline data
@@ -15,46 +14,65 @@ def load_json_safe(path):
     try:
         with open(path, 'r') as f:
             return json.load(f)
-    except:
         return {}
-# Load data
 metadata = load_json_safe(DATA_DIR / "metadata.json")
 end_effector = load_json_safe(DATA_DIR / "end_effector.json")
 actions = load_json_safe(DATA_DIR / "actions.json")
 hands_2d = load_json_safe(DATA_DIR / "hands_2d.json")
 # Stats
-total_frames = len(metadata.get('poses', []))
 fps = metadata.get('fps', 60)
-hand_detection_rate = len(hands_2d) / max(1, total_frames) * 100
-left_poses = sum(1 for f in end_effector.values() if f.get('left_hand'))
-right_poses = sum(1 for f in end_effector.values() if f.get('right_hand'))
 def get_frame_info(frame_idx):
     """Get info for a specific frame."""
-    frame_key = str(frame_idx)
-    # Hand detection status
-    hand_data = hands_2d.get(frame_key, {}) or {}
-    left_detected = hand_data.get('left_hand') is not None
-    right_detected = hand_data.get('right_hand') is not None
-    # End effector pose
-    ee_data = end_effector.get(frame_key, {}) or {}
-    left_hand_data = ee_data.get('left_hand') or {}
-    right_hand_data = ee_data.get('right_hand') or {}
-    left_pose = left_hand_data.get('pose_6dof', None) if left_hand_data else None
-    right_pose = right_hand_data.get('pose_6dof', None) if right_hand_data else None
-    # Action
-    action_data = actions.get(frame_key, {}) or {}
-    left_action = action_data.get('left_hand_action', None)
-    camera_action = action_data.get('camera_action', None)
-    # Format output
-    info = f"""### Frame {frame_idx} / {total_frames - 1}
-**Time:** {frame_idx / fps:.2f}s
 ---
@@ -66,46 +84,55 @@ def get_frame_info(frame_idx):
 #### 📍 End-Effector Pose (6DoF)
 """
-    if left_pose:
-        info += f"""
 **Left Hand:**
 - Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm
 - Rotation: Roll={left_pose[3]*57.3:.1f}°, Pitch={left_pose[4]*57.3:.1f}°, Yaw={left_pose[5]*57.3:.1f}°
 """
-    else:
-        info += "\n**Left Hand:** No pose available\n"
-    if right_pose:
-        info += f"""
 **Right Hand:**
 - Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm
 - Rotation: Roll={right_pose[3]*57.3:.1f}°, Pitch={right_pose[4]*57.3:.1f}°, Yaw={right_pose[5]*57.3:.1f}°
 """
-    info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"
-    if left_action:
-        mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100
-        info += f"**Left Hand Movement:** {mag:.2f} cm\n"
-    if camera_action:
-        cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100
-        info += f"**Camera Movement:** {cam_mag:.2f} cm\n"
-    return info
 def get_frame_image(frame_idx):
     """Get RGB frame image path."""
-    frame_path = DATA_DIR / "frames" / f"{frame_idx}.jpg"
     if frame_path.exists():
         return str(frame_path)
     return None
 def update_display(frame_idx):
     """Update frame display."""
-    img = get_frame_image(int(frame_idx))
-    info = get_frame_info(int(frame_idx))
     return img, info
 # Build Gradio Interface
@@ -117,48 +144,37 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
     **Egocentric hand tracking dataset for humanoid robot training**
-    This visualizer shows the processed output of our LiDAR-based hand tracking pipeline:
-    - RGB video from iPhone LiDAR camera
-    - 21 hand joints tracked via MediaPipe
-    - 6DoF end-effector pose computed from 3 key joints
-    - Actions (delta movements) for robot learning
     """)
     # Stats row
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown(f"### {total_frames}\n**Total Frames**")
-        with gr.Column(scale=1):
-            gr.Markdown(f"### {hand_detection_rate:.1f}%\n**Hand Detection**")
-        with gr.Column(scale=1):
-            gr.Markdown(f"### {left_poses}\n**Left Hand Poses**")
-        with gr.Column(scale=1):
-            gr.Markdown(f"### {fps}\n**FPS**")
     gr.Markdown("---")
     # Main content
     with gr.Row():
-        # Left: Video frame
         with gr.Column(scale=2):
             gr.Markdown("### 📹 RGB Frame")
-            frame_image = gr.Image(
-                label="Frame",
-                type="filepath",
-                height=400
-            )
             frame_slider = gr.Slider(
                 minimum=0,
-                maximum=total_frames - 1,
-                step=1,
                 value=0,
-                label="Frame",
-                info="Drag to scrub through video"
             )
-        # Right: Frame info
         with gr.Column(scale=1):
-            frame_info = gr.Markdown(get_frame_info(0))
     gr.Markdown("---")
@@ -166,28 +182,20 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
     gr.Markdown("### 📊 Validation Plots")
     with gr.Row():
-        gr.Image(
-            value=str(DATA_DIR / "plots" / "camera_trajectory.png"),
-            label="Camera Trajectory (World Frame)",
-            height=300
-        )
-        gr.Image(
-            value=str(DATA_DIR / "plots" / "left_hand_trajectory.png"),
-            label="Left Hand Trajectory (Camera Frame)",
-            height=300
-        )
     with gr.Row():
-        gr.Image(
-            value=str(DATA_DIR / "plots" / "hand_pose_vs_time.png"),
-            label="Hand 6DoF Pose vs Time",
-            height=300
-        )
-        gr.Image(
-            value=str(DATA_DIR / "plots" / "actions_histogram.png"),
-            label="Actions Distribution",
-            height=300
-        )
     # Physics validation
     gr.Markdown("""
@@ -200,17 +208,11 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
     | Camera Trajectory | ✅ PASS | Smooth movement, ~40cm total range |
     | Hand Depth Range | ✅ PASS | 15-60cm from camera (realistic) |
     | Action Magnitudes | ✅ PASS | Median 0.34cm/frame (no tracking errors) |
-    | Outlier Detection | ✅ PASS | No large jumps detected |
     | 6DoF Rotations | ✅ PASS | Natural hand movement patterns |
     ---
-    ### 📋 Dataset Info
-    - **Source:** iPhone 12 Pro+ LiDAR camera (Record3D app)
-    - **Pipeline:** MediaPipe hand tracking → Depth lookup → 6DoF computation
-    - **Format:** Compatible with Physical Intelligence / LeRobot training
-    - **Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
     """)
     # Event handler
@@ -219,13 +221,7 @@ with gr.Blocks(title="DI Human Demo Visualizer") as demo:
         inputs=[frame_slider],
         outputs=[frame_image, frame_info]
     )
-    # Initialize
-    demo.load(
-        fn=lambda: update_display(0),
-        outputs=[frame_image, frame_info]
-    )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import json
 from pathlib import Path
 # Load pipeline data
     try:
         with open(path, 'r') as f:
             return json.load(f)
+    except Exception as e:
+        print(f"Error loading {path}: {e}")
         return {}
+# Load data at startup
+print("Loading data...")
 metadata = load_json_safe(DATA_DIR / "metadata.json")
 end_effector = load_json_safe(DATA_DIR / "end_effector.json")
 actions = load_json_safe(DATA_DIR / "actions.json")
 hands_2d = load_json_safe(DATA_DIR / "hands_2d.json")
+print(f"Loaded: metadata={len(metadata)}, ee={len(end_effector)}, actions={len(actions)}, hands={len(hands_2d)}")
 # Stats
+total_frames = max(1, len(metadata.get('poses', [])))
 fps = metadata.get('fps', 60)
+hand_detection_rate = len(hands_2d) / total_frames * 100
+# Count poses safely
+left_poses = 0
+right_poses = 0
+for frame_data in end_effector.values():
+    if frame_data and isinstance(frame_data, dict):
+        if frame_data.get('left_hand'):
+            left_poses += 1
+        if frame_data.get('right_hand'):
+            right_poses += 1
+print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}")
 def get_frame_info(frame_idx):
     """Get info for a specific frame."""
+    try:
+        frame_key = str(int(frame_idx))
+        # Hand detection status
+        hand_data = hands_2d.get(frame_key) or {}
+        left_detected = bool(hand_data.get('left_hand'))
+        right_detected = bool(hand_data.get('right_hand'))
+        # End effector pose
+        ee_data = end_effector.get(frame_key) or {}
+        left_hand_data = ee_data.get('left_hand') if ee_data else None
+        right_hand_data = ee_data.get('right_hand') if ee_data else None
+        left_pose = None
+        right_pose = None
+        if left_hand_data and isinstance(left_hand_data, dict):
+            left_pose = left_hand_data.get('pose_6dof')
+        if right_hand_data and isinstance(right_hand_data, dict):
+            right_pose = right_hand_data.get('pose_6dof')
+        # Action
+        action_data = actions.get(frame_key) or {}
+        left_action = action_data.get('left_hand_action') if action_data else None
+        camera_action = action_data.get('camera_action') if action_data else None
+        # Format output
+        info = f"""### Frame {int(frame_idx)} / {total_frames - 1}
+**Time:** {int(frame_idx) / fps:.2f}s
 ---
 #### 📍 End-Effector Pose (6DoF)
 """
+        if left_pose and len(left_pose) >= 6:
+            info += f"""
 **Left Hand:**
 - Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm
 - Rotation: Roll={left_pose[3]*57.3:.1f}°, Pitch={left_pose[4]*57.3:.1f}°, Yaw={left_pose[5]*57.3:.1f}°
 """
+        else:
+            info += "\n**Left Hand:** No pose available\n"
+        if right_pose and len(right_pose) >= 6:
+            info += f"""
 **Right Hand:**
 - Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm
 - Rotation: Roll={right_pose[3]*57.3:.1f}°, Pitch={right_pose[4]*57.3:.1f}°, Yaw={right_pose[5]*57.3:.1f}°
 """
+        info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"
+        if left_action and len(left_action) >= 3:
+            mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100
+            info += f"**Left Hand Movement:** {mag:.2f} cm\n"
+        if camera_action and len(camera_action) >= 3:
+            cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100
+            info += f"**Camera Movement:** {cam_mag:.2f} cm\n"
+        return info
+    except Exception as e:
+        return f"Error: {str(e)}"
 def get_frame_image(frame_idx):
     """Get RGB frame image path."""
+    # Round to nearest 10 (we only have every 10th frame)
+    idx = int(frame_idx)
+    idx = (idx // 10) * 10
+    frame_path = DATA_DIR / "frames" / f"{idx}.jpg"
+    if frame_path.exists():
+        return str(frame_path)
+    # Try exact frame
+    frame_path = DATA_DIR / "frames" / f"{int(frame_idx)}.jpg"
     if frame_path.exists():
         return str(frame_path)
     return None
 def update_display(frame_idx):
     """Update frame display."""
+    img = get_frame_image(frame_idx)
+    info = get_frame_info(frame_idx)
     return img, info
 # Build Gradio Interface
     **Egocentric hand tracking dataset for humanoid robot training**
+    Pipeline: iPhone LiDAR → MediaPipe → 6DoF End-Effector → Robot Training Data
     """)
     # Stats row
+    gr.Markdown(f"""
+    | Stat | Value |
+    |------|-------|
+    | Total Frames | {total_frames} |
+    | Hand Detection | {hand_detection_rate:.1f}% |
+    | Left Hand Poses | {left_poses} |
+    | Right Hand Poses | {right_poses} |
+    | FPS | {fps} |
+    """)
     gr.Markdown("---")
     # Main content
     with gr.Row():
         with gr.Column(scale=2):
             gr.Markdown("### 📹 RGB Frame")
+            frame_image = gr.Image(label="Frame", height=400)
             frame_slider = gr.Slider(
                 minimum=0,
+                maximum=max(1, total_frames - 1),
+                step=10,
                 value=0,
+                label="Frame (every 10th frame available)"
             )
         with gr.Column(scale=1):
+            frame_info = gr.Markdown("Select a frame to see details")
     gr.Markdown("---")
     gr.Markdown("### 📊 Validation Plots")
     with gr.Row():
+        camera_plot = DATA_DIR / "plots" / "camera_trajectory.png"
+        left_plot = DATA_DIR / "plots" / "left_hand_trajectory.png"
+        if camera_plot.exists():
+            gr.Image(value=str(camera_plot), label="Camera Trajectory")
+        if left_plot.exists():
+            gr.Image(value=str(left_plot), label="Left Hand Trajectory")
     with gr.Row():
+        pose_plot = DATA_DIR / "plots" / "hand_pose_vs_time.png"
+        action_plot = DATA_DIR / "plots" / "actions_histogram.png"
+        if pose_plot.exists():
+            gr.Image(value=str(pose_plot), label="Hand Pose vs Time")
+        if action_plot.exists():
+            gr.Image(value=str(action_plot), label="Actions Distribution")
     # Physics validation
     gr.Markdown("""
     | Camera Trajectory | ✅ PASS | Smooth movement, ~40cm total range |
     | Hand Depth Range | ✅ PASS | 15-60cm from camera (realistic) |
     | Action Magnitudes | ✅ PASS | Median 0.34cm/frame (no tracking errors) |
     | 6DoF Rotations | ✅ PASS | Natural hand movement patterns |
     ---
+    **Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
     """)
     # Event handler
         inputs=[frame_slider],
         outputs=[frame_image, frame_info]
     )
+# Launch
 if __name__ == "__main__":
     demo.launch()