Raffael-Kultyshev commited on
Commit
9b7cf70
·
verified ·
1 Parent(s): 709d2ef

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +268 -0
app.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ DI LeRobot Converter API
4
+ ========================
5
+ Receives episode data (JSON + video URL) from the iOS app,
6
+ creates a LeRobot v2.0 parquet file, uploads parquet + video
7
+ to the HuggingFace dataset repo, and updates meta/info.json.
8
+
9
+ Deployed as a HuggingFace Space with Gradio.
10
+ The iOS app calls the /api/convert endpoint after uploading to GCS.
11
+ """
12
+
13
+ import gradio as gr
14
+ import json
15
+ import os
16
+ import tempfile
17
+ import shutil
18
+ from pathlib import Path
19
+
20
+ import pandas as pd
21
+ import numpy as np
22
+ from huggingface_hub import HfApi, hf_hub_download
23
+
24
+ # Config
25
+ HF_DATASET_REPO = "DynamicIntelligence/humanoid-robots-training-dataset"
26
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
27
+ GCS_BUCKET = "di_record_intern_data"
28
+ CHUNKS_SIZE = 100
29
+
30
+
31
+ def convert_episode(episode_json: str) -> str:
32
+ """
33
+ Convert episode data to LeRobot v2.0 format and upload to dataset repo.
34
+
35
+ Input JSON schema:
36
+ {
37
+ "episode_index": int, # auto-assigned if -1
38
+ "language_instruction": str,
39
+ "fps": int,
40
+ "frames": [
41
+ {
42
+ "timestamp": float,
43
+ "pose": {"x": f, "y": f, "z": f, "yaw": f, "pitch": f, "roll": f},
44
+ "left_hand": [x, y, z] or null,
45
+ "right_hand": [x, y, z] or null
46
+ }, ...
47
+ ],
48
+ "video_gcs_path": str # GCS path to rgb_video.mp4
49
+ }
50
+ """
51
+ try:
52
+ data = json.loads(episode_json)
53
+ except json.JSONDecodeError as e:
54
+ return json.dumps({"error": f"Invalid JSON: {e}"})
55
+
56
+ api = HfApi(token=HF_TOKEN)
57
+
58
+ # Determine episode index
59
+ episode_index = data.get("episode_index", -1)
60
+ if episode_index < 0:
61
+ # Auto-assign: read current info.json to get next index
62
+ try:
63
+ info_path = hf_hub_download(
64
+ repo_id=HF_DATASET_REPO, filename="meta/info.json",
65
+ repo_type="dataset", token=HF_TOKEN
66
+ )
67
+ with open(info_path) as f:
68
+ info = json.load(f)
69
+ episode_index = info.get("total_episodes", 0)
70
+ except Exception:
71
+ episode_index = 0
72
+
73
+ lang = data.get("language_instruction", "")
74
+ fps = data.get("fps", 30) or 30
75
+ frames = data.get("frames", [])
76
+ num_frames = len(frames)
77
+
78
+ if num_frames == 0:
79
+ return json.dumps({"error": "No frames in episode data"})
80
+
81
+ # Build parquet rows
82
+ rows = []
83
+ for i, frame in enumerate(frames):
84
+ pose = frame.get("pose", {})
85
+ cam_x = pose.get("x", 0)
86
+ cam_y = pose.get("y", 0)
87
+ cam_z = pose.get("z", 0)
88
+ cam_roll = pose.get("roll", 0)
89
+ cam_pitch = pose.get("pitch", 0)
90
+ cam_yaw = pose.get("yaw", 0)
91
+ camera_pose = [cam_x, cam_y, cam_z, cam_roll, cam_pitch, cam_yaw]
92
+
93
+ # Hand data: [x, y, z] from end_effector → pad to 9 values (3 joints × xyz)
94
+ lh = frame.get("left_hand") or [0, 0, 0]
95
+ rh = frame.get("right_hand") or [0, 0, 0]
96
+ # Pad single palm position to 3-joint format (wrist=palm, others=0)
97
+ left_hand = list(lh[:3]) + [0.0] * 6
98
+ right_hand = list(rh[:3]) + [0.0] * 6
99
+
100
+ # Action deltas
101
+ if i > 0:
102
+ prev = frames[i - 1]
103
+ pp = prev.get("pose", {})
104
+ prev_cam = [pp.get("x", 0), pp.get("y", 0), pp.get("z", 0),
105
+ pp.get("roll", 0), pp.get("pitch", 0), pp.get("yaw", 0)]
106
+ cam_delta = [camera_pose[j] - prev_cam[j] for j in range(6)]
107
+
108
+ plh = prev.get("left_hand") or [0, 0, 0]
109
+ prh = prev.get("right_hand") or [0, 0, 0]
110
+ lh_delta = [lh[j] - plh[j] if j < len(lh) and j < len(plh) else 0 for j in range(3)] + [0.0] * 6
111
+ rh_delta = [rh[j] - prh[j] if j < len(rh) and j < len(prh) else 0 for j in range(3)] + [0.0] * 6
112
+ else:
113
+ cam_delta = [0.0] * 6
114
+ lh_delta = [0.0] * 9
115
+ rh_delta = [0.0] * 9
116
+
117
+ rows.append({
118
+ "episode_index": episode_index,
119
+ "frame_index": i,
120
+ "timestamp": frame.get("timestamp", i / fps),
121
+ "observation.camera_pose": camera_pose,
122
+ "observation.left_hand": left_hand,
123
+ "observation.right_hand": right_hand,
124
+ "action.camera_delta": cam_delta,
125
+ "action.left_hand_delta": lh_delta,
126
+ "action.right_hand_delta": rh_delta,
127
+ "language_instruction": lang,
128
+ "next.done": i == num_frames - 1,
129
+ })
130
+
131
+ # Create parquet
132
+ tmp = Path(tempfile.mkdtemp())
133
+ try:
134
+ df = pd.DataFrame(rows)
135
+ chunk_idx = episode_index // CHUNKS_SIZE
136
+ parquet_path = tmp / f"episode_{episode_index:06d}.parquet"
137
+ df.to_parquet(parquet_path, index=False)
138
+
139
+ # Upload parquet
140
+ api.upload_file(
141
+ path_or_fileobj=str(parquet_path),
142
+ path_in_repo=f"data/chunk-{chunk_idx:03d}/episode_{episode_index:06d}.parquet",
143
+ repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN,
144
+ )
145
+
146
+ # Upload video from GCS if provided
147
+ video_gcs_path = data.get("video_gcs_path", "")
148
+ video_gcs_url = data.get("video_gcs_url", "")
149
+ video_uploaded = False
150
+
151
+ if video_gcs_url:
152
+ # Download from GCS public URL and re-upload to HF
153
+ import urllib.request
154
+ video_local = tmp / "rgb_video.mp4"
155
+ try:
156
+ urllib.request.urlretrieve(video_gcs_url, str(video_local))
157
+ api.upload_file(
158
+ path_or_fileobj=str(video_local),
159
+ path_in_repo=f"videos/chunk-{chunk_idx:03d}/rgb/episode_{episode_index:06d}.mp4",
160
+ repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN,
161
+ )
162
+ video_uploaded = True
163
+ except Exception as ve:
164
+ pass # Video upload is optional
165
+
166
+ # Update meta/info.json
167
+ try:
168
+ existing_info_path = hf_hub_download(
169
+ repo_id=HF_DATASET_REPO, filename="meta/info.json",
170
+ repo_type="dataset", token=HF_TOKEN
171
+ )
172
+ with open(existing_info_path) as f:
173
+ info = json.load(f)
174
+ info["total_episodes"] = max(info.get("total_episodes", 0), episode_index + 1)
175
+ info["total_frames"] = info.get("total_frames", 0) + num_frames
176
+ info["splits"] = {"train": f"0:{info['total_episodes']}"}
177
+ info["total_chunks"] = (info["total_episodes"] - 1) // CHUNKS_SIZE + 1
178
+ if video_uploaded:
179
+ info["total_videos"] = info.get("total_videos", 0) + 1
180
+ except Exception:
181
+ info = build_default_info(episode_index, num_frames)
182
+
183
+ meta_dir = tmp / "meta"
184
+ meta_dir.mkdir(exist_ok=True)
185
+ with open(meta_dir / "info.json", "w") as f:
186
+ json.dump(info, f, indent=2)
187
+
188
+ api.upload_folder(
189
+ folder_path=str(meta_dir), path_in_repo="meta",
190
+ repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN,
191
+ )
192
+
193
+ result = {
194
+ "success": True,
195
+ "episode_index": episode_index,
196
+ "num_frames": num_frames,
197
+ "parquet_uploaded": True,
198
+ "video_uploaded": video_uploaded,
199
+ "dataset_url": f"https://huggingface.co/datasets/{HF_DATASET_REPO}",
200
+ }
201
+ return json.dumps(result)
202
+
203
+ finally:
204
+ shutil.rmtree(tmp, ignore_errors=True)
205
+
206
+
207
+ def build_default_info(episode_index, num_frames):
208
+ return {
209
+ "codebase_version": "v2.0",
210
+ "robot_type": "unknown",
211
+ "total_episodes": episode_index + 1,
212
+ "total_frames": num_frames,
213
+ "total_tasks": 1,
214
+ "total_videos": 1,
215
+ "total_chunks": 1,
216
+ "chunks_size": CHUNKS_SIZE,
217
+ "fps": 30,
218
+ "splits": {"train": f"0:{episode_index + 1}"},
219
+ "data_path": "data/chunk-{episode_chunk:03d}/episode_{episode_index:06d}.parquet",
220
+ "video_path": "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4",
221
+ "features": {
222
+ "observation.camera_pose": {"dtype": "float32", "shape": [6],
223
+ "names": ["x", "y", "z", "roll", "pitch", "yaw"]},
224
+ "observation.left_hand": {"dtype": "float32", "shape": [9],
225
+ "names": ["wrist_x", "wrist_y", "wrist_z", "thumb_x", "thumb_y", "thumb_z",
226
+ "index_x", "index_y", "index_z"]},
227
+ "observation.right_hand": {"dtype": "float32", "shape": [9],
228
+ "names": ["wrist_x", "wrist_y", "wrist_z", "index_x", "index_y", "index_z",
229
+ "middle_x", "middle_y", "middle_z"]},
230
+ "action.camera_delta": {"dtype": "float32", "shape": [6],
231
+ "names": ["dx", "dy", "dz", "droll", "dpitch", "dyaw"]},
232
+ "action.left_hand_delta": {"dtype": "float32", "shape": [9],
233
+ "names": ["wrist_dx", "wrist_dy", "wrist_dz", "thumb_dx", "thumb_dy",
234
+ "thumb_dz", "index_dx", "index_dy", "index_dz"]},
235
+ "action.right_hand_delta": {"dtype": "float32", "shape": [9],
236
+ "names": ["wrist_dx", "wrist_dy", "wrist_dz", "index_dx", "index_dy",
237
+ "index_dz", "middle_dx", "middle_dy", "middle_dz"]},
238
+ "language_instruction": {"dtype": "string", "shape": [1], "names": None},
239
+ "timestamp": {"dtype": "float64", "shape": [1], "names": None},
240
+ "frame_index": {"dtype": "int64", "shape": [1], "names": None},
241
+ "episode_index": {"dtype": "int64", "shape": [1], "names": None},
242
+ "next.done": {"dtype": "bool", "shape": [1], "names": None},
243
+ "rgb": {"dtype": "video", "shape": [480, 640, 3],
244
+ "names": ["height", "width", "channels"],
245
+ "video_info": {"video.fps": 30, "video.codec": "h264",
246
+ "video.pix_fmt": "yuv420p", "video.is_depth_map": False,
247
+ "has_audio": False}},
248
+ },
249
+ "videos": {
250
+ "rgb": {"video_info": {"video.fps": 30, "video.codec": "h264",
251
+ "video.pix_fmt": "yuv420p", "video.is_depth_map": False,
252
+ "has_audio": False}}
253
+ },
254
+ }
255
+
256
+
257
+ # Gradio UI (also exposes /api/convert endpoint automatically)
258
+ demo = gr.Interface(
259
+ fn=convert_episode,
260
+ inputs=gr.Textbox(label="Episode JSON", lines=10, placeholder="Paste episode JSON here..."),
261
+ outputs=gr.Textbox(label="Result"),
262
+ title="DI LeRobot Converter",
263
+ description="Converts episode data from DI iOS app to LeRobot v2.0 format and uploads to HuggingFace dataset repo.",
264
+ api_name="convert",
265
+ )
266
+
267
+ if __name__ == "__main__":
268
+ demo.launch(server_name="0.0.0.0", server_port=7860)