Spaces:

DynamicIntelligence
/

human-demo-visualizer

Sleeping

App Files Files Community

human-demo-visualizer / app.py

Raffael-Kultyshev

Simplify app with better error handling

cd205ff 13 days ago

raw

history blame

7.52 kB

	"""
	Dynamic Intelligence - Human Demo Visualizer
	Egocentric hand tracking dataset visualizer for robot training data
	"""

	import gradio as gr
	import json
	from pathlib import Path

	# Load pipeline data
	DATA_DIR = Path(__file__).parent / "data"

	def load_json_safe(path):
	try:
	with open(path, 'r') as f:
	return json.load(f)
	except Exception as e:
	print(f"Error loading {path}: {e}")
	return {}

	# Load data at startup
	print("Loading data...")
	metadata = load_json_safe(DATA_DIR / "metadata.json")
	end_effector = load_json_safe(DATA_DIR / "end_effector.json")
	actions = load_json_safe(DATA_DIR / "actions.json")
	hands_2d = load_json_safe(DATA_DIR / "hands_2d.json")
	print(f"Loaded: metadata={len(metadata)}, ee={len(end_effector)}, actions={len(actions)}, hands={len(hands_2d)}")

	# Stats
	total_frames = max(1, len(metadata.get('poses', [])))
	fps = metadata.get('fps', 60)
	hand_detection_rate = len(hands_2d) / total_frames * 100

	# Count poses safely
	left_poses = 0
	right_poses = 0
	for frame_data in end_effector.values():
	if frame_data and isinstance(frame_data, dict):
	if frame_data.get('left_hand'):
	left_poses += 1
	if frame_data.get('right_hand'):
	right_poses += 1

	print(f"Stats: frames={total_frames}, left={left_poses}, right={right_poses}")

	def get_frame_info(frame_idx):
	"""Get info for a specific frame."""
	try:
	frame_key = str(int(frame_idx))

	# Hand detection status
	hand_data = hands_2d.get(frame_key) or {}
	left_detected = bool(hand_data.get('left_hand'))
	right_detected = bool(hand_data.get('right_hand'))

	# End effector pose
	ee_data = end_effector.get(frame_key) or {}
	left_hand_data = ee_data.get('left_hand') if ee_data else None
	right_hand_data = ee_data.get('right_hand') if ee_data else None

	left_pose = None
	right_pose = None
	if left_hand_data and isinstance(left_hand_data, dict):
	left_pose = left_hand_data.get('pose_6dof')
	if right_hand_data and isinstance(right_hand_data, dict):
	right_pose = right_hand_data.get('pose_6dof')

	# Action
	action_data = actions.get(frame_key) or {}
	left_action = action_data.get('left_hand_action') if action_data else None
	camera_action = action_data.get('camera_action') if action_data else None

	# Format output
	info = f"""### Frame {int(frame_idx)} / {total_frames - 1}
	Time: {int(frame_idx) / fps:.2f}s

	---

	#### 🖐️ Hand Detection
	- Left Hand: {'✅ Detected' if left_detected else '❌ Not Detected'}
	- Right Hand: {'✅ Detected' if right_detected else '❌ Not Detected'}

	---

	#### 📍 End-Effector Pose (6DoF)
	"""

	if left_pose and len(left_pose) >= 6:
	info += f"""
	Left Hand:
	- Position: X={left_pose[0]100:.1f}cm, Y={left_pose[1]100:.1f}cm, Z={left_pose[2]*100:.1f}cm
	- Rotation: Roll={left_pose[3]57.3:.1f}°, Pitch={left_pose[4]57.3:.1f}°, Yaw={left_pose[5]*57.3:.1f}°
	"""
	else:
	info += "\nLeft Hand: No pose available\n"

	if right_pose and len(right_pose) >= 6:
	info += f"""
	Right Hand:
	- Position: X={right_pose[0]100:.1f}cm, Y={right_pose[1]100:.1f}cm, Z={right_pose[2]*100:.1f}cm
	- Rotation: Roll={right_pose[3]57.3:.1f}°, Pitch={right_pose[4]57.3:.1f}°, Yaw={right_pose[5]*57.3:.1f}°
	"""

	info += "\n---\n\n#### 🎯 Actions (Delta per frame)\n"

	if left_action and len(left_action) >= 3:
	mag = (left_action[0]2 + left_action[1]2 + left_action[2]2)0.5 * 100
	info += f"Left Hand Movement: {mag:.2f} cm\n"

	if camera_action and len(camera_action) >= 3:
	cam_mag = (camera_action[0]2 + camera_action[1]2 + camera_action[2]2)0.5 * 100
	info += f"Camera Movement: {cam_mag:.2f} cm\n"

	return info
	except Exception as e:
	return f"Error: {str(e)}"

	def get_frame_image(frame_idx):
	"""Get RGB frame image path."""
	# Round to nearest 10 (we only have every 10th frame)
	idx = int(frame_idx)
	idx = (idx // 10) * 10
	frame_path = DATA_DIR / "frames" / f"{idx}.jpg"
	if frame_path.exists():
	return str(frame_path)
	# Try exact frame
	frame_path = DATA_DIR / "frames" / f"{int(frame_idx)}.jpg"
	if frame_path.exists():
	return str(frame_path)
	return None

	def update_display(frame_idx):
	"""Update frame display."""
	img = get_frame_image(frame_idx)
	info = get_frame_info(frame_idx)
	return img, info

	# Build Gradio Interface
	with gr.Blocks(title="DI Human Demo Visualizer") as demo:

	# Header
	gr.Markdown("""
	# 🤖 Dynamic Intelligence - Human Demo Visualizer

	Egocentric hand tracking dataset for humanoid robot training

	Pipeline: iPhone LiDAR → MediaPipe → 6DoF End-Effector → Robot Training Data
	""")

	# Stats row
	gr.Markdown(f"""
	\| Stat \| Value \|
	\|------\|-------\|
	\| Total Frames \| {total_frames} \|
	\| Hand Detection \| {hand_detection_rate:.1f}% \|
	\| Left Hand Poses \| {left_poses} \|
	\| Right Hand Poses \| {right_poses} \|
	\| FPS \| {fps} \|
	""")

	gr.Markdown("---")

	# Main content
	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### 📹 RGB Frame")
	frame_image = gr.Image(label="Frame", height=400)
	frame_slider = gr.Slider(
	minimum=0,
	maximum=max(1, total_frames - 1),
	step=10,
	value=0,
	label="Frame (every 10th frame available)"
	)

	with gr.Column(scale=1):
	frame_info = gr.Markdown("Select a frame to see details")

	gr.Markdown("---")

	# Plots
	gr.Markdown("### 📊 Validation Plots")

	with gr.Row():
	camera_plot = DATA_DIR / "plots" / "camera_trajectory.png"
	left_plot = DATA_DIR / "plots" / "left_hand_trajectory.png"
	if camera_plot.exists():
	gr.Image(value=str(camera_plot), label="Camera Trajectory")
	if left_plot.exists():
	gr.Image(value=str(left_plot), label="Left Hand Trajectory")

	with gr.Row():
	pose_plot = DATA_DIR / "plots" / "hand_pose_vs_time.png"
	action_plot = DATA_DIR / "plots" / "actions_histogram.png"
	if pose_plot.exists():
	gr.Image(value=str(pose_plot), label="Hand Pose vs Time")
	if action_plot.exists():
	gr.Image(value=str(action_plot), label="Actions Distribution")

	# Physics validation
	gr.Markdown("""
	---

	### ✅ Physics Validation Results

	\| Check \| Status \| Details \|
	\|-------\|--------\|---------\|
	\| Camera Trajectory \| ✅ PASS \| Smooth movement, ~40cm total range \|
	\| Hand Depth Range \| ✅ PASS \| 15-60cm from camera (realistic) \|
	\| Action Magnitudes \| ✅ PASS \| Median 0.34cm/frame (no tracking errors) \|
	\| 6DoF Rotations \| ✅ PASS \| Natural hand movement patterns \|

	---

	Organization: [Dynamic Intelligence](https://huggingface.co/DynamicIntelligence)
	""")

	# Event handler
	frame_slider.change(
	fn=update_display,
	inputs=[frame_slider],
	outputs=[frame_image, frame_info]
	)

	# Launch
	if __name__ == "__main__":
	demo.launch()