Spaces:

DynamicIntelligence
/

di_trajectory_visualizer

Build error

App Files Files Community

Raffael-Kultyshev commited on Jan 13

Commit

f31d3ad

1 Parent(s): 6bccacb

Add trajectory visualizer for 57 data streams

Browse files

Files changed (5) hide show

.DS_Store +0 -0
README.md +47 -6
app.py +397 -0
requirements.txt +5 -0
src/__init__.py +1 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,13 +1,54 @@
 ---
-title: Di Trajectory Visualizer
-emoji: 🐢
-colorFrom: yellow
-colorTo: gray
 sdk: gradio
-sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DI Trajectory Visualizer
+emoji: 🤖
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
 ---
+# Dynamic Intelligence - Trajectory Visualizer
+Visualize humanoid robot training data with **57 data streams** from egocentric human demonstrations.
+## Data Streams (57 total)
+### Visualized (15 streams)
+| Stream | Description | Unit |
+|--------|-------------|------|
+| Camera X, Y, Z | Camera position in world frame | meters |
+| Left Hand X, Y, Z | Left hand position in world frame | meters |
+| Right Hand X, Y, Z | Right hand position in world frame | meters |
+| Left Hand Roll, Pitch, Yaw | Left hand orientation | degrees |
+| Right Hand Roll, Pitch, Yaw | Right hand orientation | degrees |
+### Stored (42 streams)
+- **Left hand joints:** 21 keypoints × XYZ positions
+- **Right hand joints:** 21 keypoints × XYZ positions
+## Data Pipeline
+The data comes from the DI pipeline:
+1. **metadata.json** → Camera poses from ARKit (world frame)
+2. **hands_3d.json** → 3D hand positions and 21 joint landmarks
+3. **end_effector.json** → Hand roll/pitch/yaw orientations
+## Usage
+1. Select an episode from the dropdown
+2. Click "Load & Visualize"
+3. View time series plots (15 subplots) or 3D trajectory
+## Data Source
+Data is loaded from: [`DynamicIntelligence/humanoid-robots-training-dataset`](https://huggingface.co/datasets/DynamicIntelligence/humanoid-robots-training-dataset)
+## Technical Details
+- Built with Gradio + Plotly
+- Real-time data loading from HuggingFace Hub
+- Interactive 3D visualization
+- Frame-level temporal analysis

app.py ADDED Viewed

	@@ -0,0 +1,397 @@

+#!/usr/bin/env python3
+"""
+DI Trajectory Visualizer - HuggingFace Space
+Visualize 57 data streams from humanoid robot training data.
+"""
+import gradio as gr
+from pathlib import Path
+from huggingface_hub import hf_hub_download, list_repo_files
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import json
+import numpy as np
+from dataclasses import dataclass
+from typing import Optional, List, Dict
+# HuggingFace dataset repo
+DATASET_REPO = "DynamicIntelligence/humanoid-robots-training-dataset"
+@dataclass
+class TrajectoryData:
+    """Container for all 57 data streams."""
+    timestamps: np.ndarray
+    # Camera world frame (3 streams)
+    camera_x: np.ndarray
+    camera_y: np.ndarray
+    camera_z: np.ndarray
+    # Left hand position in world frame (3 streams)
+    left_hand_x: np.ndarray
+    left_hand_y: np.ndarray
+    left_hand_z: np.ndarray
+    # Right hand position in world frame (3 streams)
+    right_hand_x: np.ndarray
+    right_hand_y: np.ndarray
+    right_hand_z: np.ndarray
+    # Left hand orientation (3 streams)
+    left_hand_roll: np.ndarray
+    left_hand_pitch: np.ndarray
+    left_hand_yaw: np.ndarray
+    # Right hand orientation (3 streams)
+    right_hand_roll: np.ndarray
+    right_hand_pitch: np.ndarray
+    right_hand_yaw: np.ndarray
+    # Joint positions (42 streams - stored but not visualized)
+    left_hand_joints: np.ndarray
+    right_hand_joints: np.ndarray
+def load_trajectory_data(episode_path: Path) -> TrajectoryData:
+    """Load all pipeline outputs for one episode."""
+    # Load metadata.json for camera poses
+    metadata_path = episode_path / "extracted" / "metadata.json"
+    if not metadata_path.exists():
+        metadata_path = episode_path / "metadata.json"
+    with open(metadata_path, 'r') as f:
+        metadata = json.load(f)
+    # Load hands_3d.json for hand positions and joints
+    hands_3d_path = episode_path / "hands_3d.json"
+    hands_3d = {"frames": []}
+    if hands_3d_path.exists():
+        with open(hands_3d_path, 'r') as f:
+            hands_3d = json.load(f)
+    # Load end_effector.json for hand orientations
+    end_effector_path = episode_path / "end_effector.json"
+    end_effector = {"frames": []}
+    if end_effector_path.exists():
+        with open(end_effector_path, 'r') as f:
+            end_effector = json.load(f)
+    # Parse timestamps
+    frames = metadata.get('frames', metadata.get('poses', []))
+    num_frames = len(frames)
+    fps = metadata.get('fps', 30)
+    timestamps = np.arange(num_frames) / fps
+    # Parse camera world frame positions
+    camera_x, camera_y, camera_z = [], [], []
+    for f in frames:
+        if 'camera_pose' in f:
+            pos = f['camera_pose'].get('position', [0, 0, 0])
+        elif 'position' in f:
+            pos = f['position']
+        else:
+            pos = [0, 0, 0]
+        camera_x.append(pos[0])
+        camera_y.append(pos[1])
+        camera_z.append(pos[2])
+    camera_x = np.array(camera_x)
+    camera_y = np.array(camera_y)
+    camera_z = np.array(camera_z)
+    # Parse hand positions (world frame)
+    hands_frames = hands_3d.get('frames', [])
+    left_hand_x = np.array([f.get('left_hand', {}).get('position', [0,0,0])[0] for f in hands_frames] or [0]*num_frames)
+    left_hand_y = np.array([f.get('left_hand', {}).get('position', [0,0,0])[1] for f in hands_frames] or [0]*num_frames)
+    left_hand_z = np.array([f.get('left_hand', {}).get('position', [0,0,0])[2] for f in hands_frames] or [0]*num_frames)
+    right_hand_x = np.array([f.get('right_hand', {}).get('position', [0,0,0])[0] for f in hands_frames] or [0]*num_frames)
+    right_hand_y = np.array([f.get('right_hand', {}).get('position', [0,0,0])[1] for f in hands_frames] or [0]*num_frames)
+    right_hand_z = np.array([f.get('right_hand', {}).get('position', [0,0,0])[2] for f in hands_frames] or [0]*num_frames)
+    # Parse hand orientations
+    ee_frames = end_effector.get('frames', [])
+    left_hand_roll = np.array([f.get('left_hand', {}).get('orientation', [0,0,0])[0] for f in ee_frames] or [0]*num_frames)
+    left_hand_pitch = np.array([f.get('left_hand', {}).get('orientation', [0,0,0])[1] for f in ee_frames] or [0]*num_frames)
+    left_hand_yaw = np.array([f.get('left_hand', {}).get('orientation', [0,0,0])[2] for f in ee_frames] or [0]*num_frames)
+    right_hand_roll = np.array([f.get('right_hand', {}).get('orientation', [0,0,0])[0] for f in ee_frames] or [0]*num_frames)
+    right_hand_pitch = np.array([f.get('right_hand', {}).get('orientation', [0,0,0])[1] for f in ee_frames] or [0]*num_frames)
+    right_hand_yaw = np.array([f.get('right_hand', {}).get('orientation', [0,0,0])[2] for f in ee_frames] or [0]*num_frames)
+    # Parse 21 joint positions per hand
+    left_hand_joints = np.array([
+        f.get('left_hand', {}).get('landmarks_3d', np.zeros((21, 3)))
+        for f in hands_frames
+    ] or [np.zeros((21, 3))] * num_frames)
+    right_hand_joints = np.array([
+        f.get('right_hand', {}).get('landmarks_3d', np.zeros((21, 3)))
+        for f in hands_frames
+    ] or [np.zeros((21, 3))] * num_frames)
+    return TrajectoryData(
+        timestamps=timestamps,
+        camera_x=camera_x, camera_y=camera_y, camera_z=camera_z,
+        left_hand_x=left_hand_x, left_hand_y=left_hand_y, left_hand_z=left_hand_z,
+        right_hand_x=right_hand_x, right_hand_y=right_hand_y, right_hand_z=right_hand_z,
+        left_hand_roll=left_hand_roll, left_hand_pitch=left_hand_pitch, left_hand_yaw=left_hand_yaw,
+        right_hand_roll=right_hand_roll, right_hand_pitch=right_hand_pitch, right_hand_yaw=right_hand_yaw,
+        left_hand_joints=left_hand_joints,
+        right_hand_joints=right_hand_joints
+    )
+def create_trajectory_plots(data: TrajectoryData) -> go.Figure:
+    """Create visualization with 15 plots (57 data streams total, 42 stored only)."""
+    fig = make_subplots(
+        rows=5, cols=3,
+        subplot_titles=[
+            'Camera X (m)', 'Camera Y (m)', 'Camera Z (m)',
+            'Left Hand X (m)', 'Left Hand Y (m)', 'Left Hand Z (m)',
+            'Right Hand X (m)', 'Right Hand Y (m)', 'Right Hand Z (m)',
+            'Left Hand Roll', 'Left Hand Pitch', 'Left Hand Yaw',
+            'Right Hand Roll', 'Right Hand Pitch', 'Right Hand Yaw',
+        ],
+        vertical_spacing=0.08,
+        horizontal_spacing=0.05
+    )
+    t = data.timestamps
+    # Row 1: Camera world frame (blue)
+    fig.add_trace(go.Scatter(x=t, y=data.camera_x, name='cam_x', line=dict(color='#2563eb')), row=1, col=1)
+    fig.add_trace(go.Scatter(x=t, y=data.camera_y, name='cam_y', line=dict(color='#2563eb')), row=1, col=2)
+    fig.add_trace(go.Scatter(x=t, y=data.camera_z, name='cam_z', line=dict(color='#2563eb')), row=1, col=3)
+    # Row 2: Left hand position (red)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_x, name='L_x', line=dict(color='#dc2626')), row=2, col=1)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_y, name='L_y', line=dict(color='#dc2626')), row=2, col=2)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_z, name='L_z', line=dict(color='#dc2626')), row=2, col=3)
+    # Row 3: Right hand position (green)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_x, name='R_x', line=dict(color='#16a34a')), row=3, col=1)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_y, name='R_y', line=dict(color='#16a34a')), row=3, col=2)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_z, name='R_z', line=dict(color='#16a34a')), row=3, col=3)
+    # Row 4: Left hand orientation (orange)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_roll, name='L_roll', line=dict(color='#ea580c')), row=4, col=1)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_pitch, name='L_pitch', line=dict(color='#ea580c')), row=4, col=2)
+    fig.add_trace(go.Scatter(x=t, y=data.left_hand_yaw, name='L_yaw', line=dict(color='#ea580c')), row=4, col=3)
+    # Row 5: Right hand orientation (purple)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_roll, name='R_roll', line=dict(color='#9333ea')), row=5, col=1)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_pitch, name='R_pitch', line=dict(color='#9333ea')), row=5, col=2)
+    fig.add_trace(go.Scatter(x=t, y=data.right_hand_yaw, name='R_yaw', line=dict(color='#9333ea')), row=5, col=3)
+    fig.update_layout(
+        height=1200,
+        title_text="Trajectory Data Visualization (15 plots, 57 data streams)",
+        showlegend=False,
+        template="plotly_white"
+    )
+    fig.update_xaxes(title_text="Time (sec)")
+    return fig
+def create_3d_trajectory_plot(data: TrajectoryData) -> go.Figure:
+    """Create 3D visualization of camera and hand trajectories."""
+    fig = go.Figure()
+    # Camera trajectory (blue)
+    fig.add_trace(go.Scatter3d(
+        x=data.camera_x, y=data.camera_y, z=data.camera_z,
+        mode='lines',
+        name='Camera',
+        line=dict(color='#2563eb', width=4)
+    ))
+    # Hand positions are already in world frame
+    left_world_x = data.left_hand_x
+    left_world_y = data.left_hand_y
+    left_world_z = data.left_hand_z
+    fig.add_trace(go.Scatter3d(
+        x=left_world_x, y=left_world_y, z=left_world_z,
+        mode='lines',
+        name='Left Hand',
+        line=dict(color='#dc2626', width=4)
+    ))
+    # Right hand trajectory (green)
+    right_world_x = data.right_hand_x
+    right_world_y = data.right_hand_y
+    right_world_z = data.right_hand_z
+    fig.add_trace(go.Scatter3d(
+        x=right_world_x, y=right_world_y, z=right_world_z,
+        mode='lines',
+        name='Right Hand',
+        line=dict(color='#16a34a', width=4)
+    ))
+    fig.update_layout(
+        title='3D Trajectory (World Frame)',
+        scene=dict(
+            xaxis_title='X (m)',
+            yaxis_title='Y (m)',
+            zaxis_title='Z (m)',
+            aspectmode='data',
+            bgcolor='#fafafa'
+        ),
+        height=700,
+        template="plotly_white"
+    )
+    return fig
+def list_episodes() -> list:
+    """List all episodes in the dataset."""
+    try:
+        files = list_repo_files(DATASET_REPO, repo_type="dataset")
+        episodes = set()
+        for f in files:
+            parts = f.split('/')
+            if len(parts) > 1 and parts[0].startswith('episode'):
+                episodes.add(parts[0])
+        if not episodes:
+            # Try finding any folder
+            for f in files:
+                parts = f.split('/')
+                if len(parts) > 1:
+                    episodes.add(parts[0])
+        return sorted(list(episodes)) if episodes else ["No episodes found"]
+    except Exception as e:
+        return [f"Error listing: {str(e)}"]
+def load_and_visualize(episode_id: str):
+    """Load episode data and create visualizations."""
+    if not episode_id or episode_id.startswith("Error") or episode_id == "No episodes found":
+        empty_fig = go.Figure()
+        empty_fig.add_annotation(text="Select an episode to visualize", showarrow=False, font_size=20)
+        return empty_fig, empty_fig, "Select an episode from the dropdown"
+    try:
+        episode_path = Path(f"/tmp/{episode_id}")
+        episode_path.mkdir(parents=True, exist_ok=True)
+        # Try downloading metadata.json
+        try:
+            hf_hub_download(
+                repo_id=DATASET_REPO,
+                filename=f"{episode_id}/extracted/metadata.json",
+                local_dir="/tmp",
+                repo_type="dataset"
+            )
+        except:
+            hf_hub_download(
+                repo_id=DATASET_REPO,
+                filename=f"{episode_id}/metadata.json",
+                local_dir="/tmp",
+                repo_type="dataset"
+            )
+        # Try downloading hands_3d.json
+        try:
+            hf_hub_download(
+                repo_id=DATASET_REPO,
+                filename=f"{episode_id}/hands_3d.json",
+                local_dir="/tmp",
+                repo_type="dataset"
+            )
+        except:
+            pass
+        # Try downloading end_effector.json
+        try:
+            hf_hub_download(
+                repo_id=DATASET_REPO,
+                filename=f"{episode_id}/end_effector.json",
+                local_dir="/tmp",
+                repo_type="dataset"
+            )
+        except:
+            pass
+        # Load data
+        data = load_trajectory_data(Path(f"/tmp/{episode_id}"))
+        # Create plots
+        trajectory_plot = create_trajectory_plots(data)
+        plot_3d = create_3d_trajectory_plot(data)
+        # Stats
+        stats = f"""
+## Episode: {episode_id}
+| Metric | Value |
+|--------|-------|
+| Duration | {data.timestamps[-1]:.2f} seconds |
+| Frames | {len(data.timestamps)} |
+| Data streams | 57 total |
+| Visualized | 15 streams |
+| Stored (joints) | 42 streams |
+        """
+        return trajectory_plot, plot_3d, stats
+    except Exception as e:
+        empty_fig = go.Figure()
+        empty_fig.add_annotation(text=f"Error: {str(e)}", showarrow=False, font_size=14)
+        return empty_fig, empty_fig, f"**Error:** {str(e)}"
+# Build Gradio interface
+with gr.Blocks(
+    title="DI Trajectory Visualizer",
+    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
+) as demo:
+    gr.Markdown("""
+# Dynamic Intelligence - Trajectory Visualizer
+Visualize humanoid robot training data: camera poses, hand positions, and orientations.
+### Data Streams (57 total)
+| Category | Streams | Description |
+|----------|---------|-------------|
+| Camera Position | 3 | X, Y, Z in world frame (meters) |
+| Left Hand Position | 3 | X, Y, Z in world frame (meters) |
+| Right Hand Position | 3 | X, Y, Z in world frame (meters) |
+| Left Hand Orientation | 3 | Roll, Pitch, Yaw (degrees) |
+| Right Hand Orientation | 3 | Roll, Pitch, Yaw (degrees) |
+| Hand Joints (stored) | 42 | 21 joints x 2 hands x XYZ |
+    """)
+    with gr.Row():
+        episode_dropdown = gr.Dropdown(
+            label="Select Episode",
+            choices=list_episodes(),
+            interactive=True,
+            scale=3
+        )
+        load_btn = gr.Button("Load & Visualize", variant="primary", scale=1)
+    stats_output = gr.Markdown()
+    with gr.Tabs():
+        with gr.TabItem("Time Series (15 plots)"):
+            trajectory_plot = gr.Plot(label="Trajectory Data")
+        with gr.TabItem("3D View"):
+            plot_3d = gr.Plot(label="3D Trajectory")
+    load_btn.click(
+        fn=load_and_visualize,
+        inputs=[episode_dropdown],
+        outputs=[trajectory_plot, plot_3d, stats_output]
+    )
+    gr.Markdown("""
+---
+**Data Source:** [DynamicIntelligence/humanoid-robots-training-dataset](https://huggingface.co/datasets/DynamicIntelligence/humanoid-robots-training-dataset)
+    """)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+plotly>=5.18.0
+numpy>=1.24.0
+huggingface_hub>=0.20.0
+pandas>=2.0.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # DI Trajectory Visualizer