|
|
""" |
|
|
Dynamic Intelligence - Human Demo Visualizer |
|
|
Egocentric hand tracking dataset visualizer for robot training data |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import json |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
DATA_DIR = Path(__file__).parent / "data" |
|
|
|
|
|
def load_json_safe(path): |
|
|
try: |
|
|
with open(path, 'r') as f: |
|
|
return json.load(f) |
|
|
except Exception as e: |
|
|
print(f"Error loading {path}: {e}") |
|
|
return {} |
|
|
|
|
|
|
|
|
print("Loading data...") |
|
|
metadata = load_json_safe(DATA_DIR / "metadata.json") |
|
|
end_effector = load_json_safe(DATA_DIR / "end_effector.json") |
|
|
actions = load_json_safe(DATA_DIR / "actions.json") |
|
|
hands_2d = load_json_safe(DATA_DIR / "hands_2d.json") |
|
|
print(f"Loaded: metadata={len(metadata)}, ee={len(end_effector)}, actions={len(actions)}, hands={len(hands_2d)}") |
|
|
|
|
|
|
|
|
total_frames = max(1, len(metadata.get('poses', []))) |
|
|
fps = metadata.get('fps', 60) |
|
|
hand_detection_rate = len(hands_2d) / total_frames * 100 |
|
|
|
|
|
|
|
|
left_poses = 0 |
|
|
right_poses = 0 |
|
|
for frame_data in end_effector.values(): |
|
|
if frame_data and isinstance(frame_data, dict): |
|
|
if frame_data.get('left_hand'): |
|
|
left_poses += 1 |
|
|
if frame_data.get('right_hand'): |
|
|
right_poses += 1 |
|
|
|
|
|
print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}") |
|
|
|
|
|
def get_frame_info(frame_idx): |
|
|
"""Get info for a specific frame.""" |
|
|
try: |
|
|
frame_key = str(int(frame_idx)) |
|
|
|
|
|
|
|
|
hand_data = hands_2d.get(frame_key) or {} |
|
|
left_detected = bool(hand_data.get('left_hand')) |
|
|
right_detected = bool(hand_data.get('right_hand')) |
|
|
|
|
|
|
|
|
ee_data = end_effector.get(frame_key) or {} |
|
|
left_hand_data = ee_data.get('left_hand') if ee_data else None |
|
|
right_hand_data = ee_data.get('right_hand') if ee_data else None |
|
|
|
|
|
left_pose = None |
|
|
right_pose = None |
|
|
if left_hand_data and isinstance(left_hand_data, dict): |
|
|
left_pose = left_hand_data.get('pose_6dof') |
|
|
if right_hand_data and isinstance(right_hand_data, dict): |
|
|
right_pose = right_hand_data.get('pose_6dof') |
|
|
|
|
|
|
|
|
action_data = actions.get(frame_key) or {} |
|
|
left_action = action_data.get('left_hand_action') if action_data else None |
|
|
camera_action = action_data.get('camera_action') if action_data else None |
|
|
|
|
|
|
|
|
info = f"""### Frame {int(frame_idx)} / {total_frames - 1} |
|
|
**Time:** {int(frame_idx) / fps:.2f}s |
|
|
|
|
|
--- |
|
|
|
|
|
#### ποΈ Hand Detection |
|
|
- **Left Hand:** {'β
Detected' if left_detected else 'β Not Detected'} |
|
|
- **Right Hand:** {'β
Detected' if right_detected else 'β Not Detected'} |
|
|
|
|
|
--- |
|
|
|
|
|
#### π End-Effector Pose (6DoF) |
|
|
""" |
|
|
|
|
|
if left_pose and len(left_pose) >= 6: |
|
|
info += f""" |
|
|
**Left Hand:** |
|
|
- Position: X={left_pose[0]*100:.1f}cm, Y={left_pose[1]*100:.1f}cm, Z={left_pose[2]*100:.1f}cm |
|
|
- Rotation: Roll={left_pose[3]*57.3:.1f}Β°, Pitch={left_pose[4]*57.3:.1f}Β°, Yaw={left_pose[5]*57.3:.1f}Β° |
|
|
""" |
|
|
else: |
|
|
info += "\n**Left Hand:** No pose available\n" |
|
|
|
|
|
if right_pose and len(right_pose) >= 6: |
|
|
info += f""" |
|
|
**Right Hand:** |
|
|
- Position: X={right_pose[0]*100:.1f}cm, Y={right_pose[1]*100:.1f}cm, Z={right_pose[2]*100:.1f}cm |
|
|
- Rotation: Roll={right_pose[3]*57.3:.1f}Β°, Pitch={right_pose[4]*57.3:.1f}Β°, Yaw={right_pose[5]*57.3:.1f}Β° |
|
|
""" |
|
|
|
|
|
info += "\n---\n\n#### π― Actions (Delta per frame)\n" |
|
|
|
|
|
if left_action and len(left_action) >= 3: |
|
|
mag = (left_action[0]**2 + left_action[1]**2 + left_action[2]**2)**0.5 * 100 |
|
|
info += f"**Left Hand Movement:** {mag:.2f} cm\n" |
|
|
|
|
|
if camera_action and len(camera_action) >= 3: |
|
|
cam_mag = (camera_action[0]**2 + camera_action[1]**2 + camera_action[2]**2)**0.5 * 100 |
|
|
info += f"**Camera Movement:** {cam_mag:.2f} cm\n" |
|
|
|
|
|
return info |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def get_frame_image(frame_idx): |
|
|
"""Get RGB frame image path.""" |
|
|
|
|
|
idx = int(frame_idx) |
|
|
idx = (idx // 10) * 10 |
|
|
frame_path = DATA_DIR / "frames" / f"{idx}.jpg" |
|
|
if frame_path.exists(): |
|
|
return str(frame_path) |
|
|
|
|
|
frame_path = DATA_DIR / "frames" / f"{int(frame_idx)}.jpg" |
|
|
if frame_path.exists(): |
|
|
return str(frame_path) |
|
|
return None |
|
|
|
|
|
def update_display(frame_idx): |
|
|
"""Update frame display.""" |
|
|
img = get_frame_image(frame_idx) |
|
|
info = get_frame_info(frame_idx) |
|
|
return img, info |
|
|
|
|
|
|
|
|
with gr.Blocks(title="DI Human Demo Visualizer") as demo: |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
# π€ Dynamic Intelligence - Human Demo Visualizer |
|
|
|
|
|
**Egocentric hand tracking dataset for humanoid robot training** |
|
|
|
|
|
Pipeline: iPhone LiDAR β MediaPipe β 6DoF End-Effector β Robot Training Data |
|
|
""") |
|
|
|
|
|
|
|
|
gr.Markdown(f""" |
|
|
| Stat | Value | |
|
|
|------|-------| |
|
|
| Total Frames | {total_frames} | |
|
|
| Hand Detection | {hand_detection_rate:.1f}% | |
|
|
| Left Hand Poses | {left_poses} | |
|
|
| Right Hand Poses | {right_poses} | |
|
|
| FPS | {fps} | |
|
|
""") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
gr.Markdown("### πΉ RGB Frame") |
|
|
frame_image = gr.Image(label="Frame", height=400) |
|
|
frame_slider = gr.Slider( |
|
|
minimum=0, |
|
|
maximum=max(1, total_frames - 1), |
|
|
step=10, |
|
|
value=0, |
|
|
label="Frame (every 10th frame available)" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
frame_info = gr.Markdown("Select a frame to see details") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
gr.Markdown("### π Validation Plots") |
|
|
|
|
|
with gr.Row(): |
|
|
camera_plot = DATA_DIR / "plots" / "camera_trajectory.png" |
|
|
left_plot = DATA_DIR / "plots" / "left_hand_trajectory.png" |
|
|
if camera_plot.exists(): |
|
|
gr.Image(value=str(camera_plot), label="Camera Trajectory") |
|
|
if left_plot.exists(): |
|
|
gr.Image(value=str(left_plot), label="Left Hand Trajectory") |
|
|
|
|
|
with gr.Row(): |
|
|
pose_plot = DATA_DIR / "plots" / "hand_pose_vs_time.png" |
|
|
action_plot = DATA_DIR / "plots" / "actions_histogram.png" |
|
|
if pose_plot.exists(): |
|
|
gr.Image(value=str(pose_plot), label="Hand Pose vs Time") |
|
|
if action_plot.exists(): |
|
|
gr.Image(value=str(action_plot), label="Actions Distribution") |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
|
|
|
### β
Physics Validation Results |
|
|
|
|
|
| Check | Status | Details | |
|
|
|-------|--------|---------| |
|
|
| Camera Trajectory | β
PASS | Smooth movement, ~40cm total range | |
|
|
| Hand Depth Range | β
PASS | 15-60cm from camera (realistic) | |
|
|
| Action Magnitudes | β
PASS | Median 0.34cm/frame (no tracking errors) | |
|
|
| 6DoF Rotations | β
PASS | Natural hand movement patterns | |
|
|
|
|
|
--- |
|
|
|
|
|
**Organization:** [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence) |
|
|
""") |
|
|
|
|
|
|
|
|
frame_slider.change( |
|
|
fn=update_display, |
|
|
inputs=[frame_slider], |
|
|
outputs=[frame_image, frame_info] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|