xinjjj commited on
Commit
023981c
·
1 Parent(s): cebb4ba
Files changed (47) hide show
  1. app.py +5 -11
  2. common.py +10 -58
  3. embodied_gen/scripts/eval_collision_success.py +813 -0
  4. embodied_gen/scripts/room_gen/export_scene.py +198 -30
  5. embodied_gen/scripts/room_gen/gen_room.py +2 -2
  6. embodied_gen/scripts/room_gen/render_birdseye.py +271 -0
  7. embodied_gen/scripts/room_gen/render_usd.py +1606 -0
  8. embodied_gen/skills/README.md +62 -0
  9. embodied_gen/skills/asset-converter/SKILL.md +88 -0
  10. embodied_gen/skills/asset-creator/SKILL.md +96 -0
  11. embodied_gen/skills/asset-retrieval/SKILL.md +87 -0
  12. embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py +329 -0
  13. embodied_gen/skills/asset-scale/SKILL.md +94 -0
  14. embodied_gen/skills/asset-scale/__init__.py +36 -0
  15. embodied_gen/skills/asset-scale/asset_scale.py +347 -0
  16. embodied_gen/skills/background-creator/SKILL.md +70 -0
  17. embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json +17 -0
  18. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/.claude-plugin/plugin.json +8 -0
  19. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/assets.md +32 -0
  20. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/background.md +29 -0
  21. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/convert.md +32 -0
  22. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/layout.md +29 -0
  23. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/process.md +29 -0
  24. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/room.md +29 -0
  25. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/sim.md +29 -0
  26. embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/spatial.md +29 -0
  27. embodied_gen/skills/layout-creator/SKILL.md +83 -0
  28. embodied_gen/skills/room-creator/SKILL.md +83 -0
  29. embodied_gen/skills/sim-runner/SKILL.md +72 -0
  30. embodied_gen/skills/spatial-computing/README.md +59 -0
  31. embodied_gen/skills/spatial-computing/REFERENCE.md +236 -0
  32. embodied_gen/skills/spatial-computing/SKILL.md +374 -0
  33. embodied_gen/skills/spatial-computing/__init__.py +31 -0
  34. embodied_gen/skills/spatial-computing/api/__init__.py +34 -0
  35. embodied_gen/skills/spatial-computing/api/floorplan_api.py +917 -0
  36. embodied_gen/skills/spatial-computing/cli/__init__.py +7 -0
  37. embodied_gen/skills/spatial-computing/cli/main.py +267 -0
  38. embodied_gen/skills/spatial-computing/core/__init__.py +23 -0
  39. embodied_gen/skills/spatial-computing/core/collector.py +1102 -0
  40. embodied_gen/skills/spatial-computing/core/geometry.py +231 -0
  41. embodied_gen/skills/spatial-computing/core/visualizer.py +231 -0
  42. embodied_gen/utils/gpt_clients.py +15 -4
  43. embodied_gen/utils/monkey_patch/gradio.py +5 -146
  44. embodied_gen/utils/monkey_patch/infinigen.py +4 -6
  45. embodied_gen/utils/simulation.py +192 -2
  46. embodied_gen/utils/trender.py +2 -20
  47. requirements.txt +1 -1
app.py CHANGED
@@ -351,19 +351,13 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
351
  outputs=[is_samimage, single_sam_image_example, single_image_example],
352
  )
353
 
354
- def _preprocess_image_dispatch(img, rmbg):
355
- import sys, os
356
- print(f"[ZGPU] upload handler ENTER pid={os.getpid()}", flush=True)
357
- sys.stdout.flush()
358
- out = preprocess_image_fn(img, rmbg, _enable_pre_resize_default)
359
- print("[ZGPU] upload handler RETURN", flush=True)
360
- sys.stdout.flush()
361
- return out
362
-
363
  image_prompt.upload(
364
- _preprocess_image_dispatch,
 
 
365
  inputs=[image_prompt, rmbg_tag],
366
  outputs=[image_prompt, raw_image_cache],
 
367
  ).success(
368
  active_btn_by_content,
369
  inputs=image_prompt,
@@ -542,4 +536,4 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
542
 
543
 
544
  if __name__ == "__main__":
545
- demo.launch(ssr_mode=False)
 
351
  outputs=[is_samimage, single_sam_image_example, single_image_example],
352
  )
353
 
 
 
 
 
 
 
 
 
 
354
  image_prompt.upload(
355
+ lambda img, rmbg: preprocess_image_fn(
356
+ img, rmbg, _enable_pre_resize_default
357
+ ),
358
  inputs=[image_prompt, rmbg_tag],
359
  outputs=[image_prompt, raw_image_cache],
360
+ queue=False,
361
  ).success(
362
  active_btn_by_content,
363
  inputs=image_prompt,
 
536
 
537
 
538
  if __name__ == "__main__":
539
+ demo.launch()
common.py CHANGED
@@ -14,26 +14,15 @@
14
  # implied. See the License for the specific language governing
15
  # permissions and limitations under the License.
16
 
17
- # spaces 0.50 raises if CUDA was initialized before `import spaces`,
18
- # so the zerogpu-log injection (which touches site-packages files only)
19
- # must run first, then `import spaces`, then everything else.
20
- from embodied_gen.utils.monkey_patch.gradio import _patch_spaces_zerogpu_logs
21
-
22
- _patch_spaces_zerogpu_logs()
23
-
24
- import spaces # noqa: E402
25
 
 
26
  from embodied_gen.utils.monkey_patch.gradio import (
27
- _disable_xformers_flash3,
28
- _neutralize_warp_in_parent,
29
  _patch_open3d_cuda_device_count_bug,
30
  )
31
- from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
32
 
33
- _neutralize_warp_in_parent()
34
  _patch_open3d_cuda_device_count_bug()
35
- _disable_xformers_flash3()
36
- monkey_path_trellis()
37
 
38
  import gc
39
  import logging
@@ -172,24 +161,13 @@ def end_session(req: gr.Request) -> None:
172
  shutil.rmtree(user_dir)
173
 
174
 
175
- @spaces.GPU(duration=30)
176
  def preprocess_image_fn(
177
  image: str | np.ndarray | Image.Image,
178
  rmbg_tag: str = "rembg",
179
  preprocess: bool = True,
180
  ) -> tuple[Image.Image, Image.Image]:
181
  """Preprocess image with lazy model initialization to avoid CUDA init at import time."""
182
- import sys, os as _os
183
- print(f"[ZGPU] preprocess_image_fn ENTER pid={_os.getpid()}", flush=True)
184
- sys.stdout.flush()
185
- try:
186
- import torch as _torch
187
- print(f"[ZGPU] torch.cuda.is_available={_torch.cuda.is_available()} device_count={_torch.cuda.device_count()}", flush=True)
188
- _torch.zeros(1).cuda()
189
- print("[ZGPU] torch.zeros(1).cuda() OK", flush=True)
190
- except Exception as _e:
191
- print(f"[ZGPU] cuda probe FAILED: {_e!r}", flush=True)
192
- sys.stdout.flush()
193
  global _RBG_REMOVER, _RBG14_REMOVER
194
 
195
  if isinstance(image, str):
@@ -302,7 +280,7 @@ def select_point(
302
  return (image, masks), seg_image
303
 
304
 
305
- @spaces.GPU(duration=180)
306
  def image_to_3d(
307
  image: Image.Image,
308
  seed: int,
@@ -324,38 +302,17 @@ def image_to_3d(
324
  if isinstance(seg_image, np.ndarray):
325
  seg_image = Image.fromarray(seg_image)
326
 
327
- import time as _time
328
- def _t(tag, t0):
329
- dt = _time.time() - t0
330
- logger.info(f"[STAGE] {tag} took {dt:.2f}s")
331
- return _time.time()
332
-
333
- # Probe what xformers attention op is being used
334
- try:
335
- from xformers.ops.fmha import flash3 as _f3, flash as _f, cutlass as _c
336
- logger.info(
337
- f"[ATTN] flash3.disabled={bool(_f3.FwOp.not_supported_reasons.__func__(_f3.FwOp, None))} "
338
- f"flash3 module loaded; flash & cutlass available too"
339
- )
340
- except Exception as _e:
341
- logger.info(f"[ATTN] probe failed: {_e}")
342
-
343
  logger.info("Start generating 3D representation from image...")
344
  if isinstance(PIPELINE, Sam3dInference):
345
- _t0 = _time.time()
346
  outputs = PIPELINE.run(
347
  seg_image,
348
  seed=seed,
349
  stage1_inference_steps=ss_sampling_steps,
350
  stage2_inference_steps=slat_sampling_steps,
351
  )
352
- _t("Sam3dInference.run", _t0)
353
  else:
354
- _t0 = _time.time()
355
  PIPELINE.cuda()
356
- _t0 = _t("PIPELINE.cuda()", _t0)
357
  seg_image = trellis_preprocess(seg_image)
358
- _t0 = _t("trellis_preprocess", _t0)
359
  outputs = PIPELINE.run(
360
  seg_image,
361
  seed=seed,
@@ -370,18 +327,13 @@ def image_to_3d(
370
  "cfg_strength": slat_guidance_strength,
371
  },
372
  )
373
- _t0 = _t("PIPELINE.run (TRELLIS inference)", _t0)
374
  # Set back to cpu for memory saving.
375
  PIPELINE.cpu()
376
- _t("PIPELINE.cpu()", _t0)
377
 
378
- _t0 = _time.time()
379
  gs_model = outputs["gaussian"][0]
380
  mesh_model = outputs["mesh"][0]
381
  color_images = render_video(gs_model, r=1.85)["color"]
382
- _t0 = _t("render_video color", _t0)
383
  normal_images = render_video(mesh_model, r=1.85)["normal"]
384
- _t("render_video normal", _t0)
385
 
386
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
387
  os.makedirs(output_root, exist_ok=True)
@@ -629,7 +581,7 @@ def extract_urdf(
629
  )
630
 
631
 
632
- @spaces.GPU(duration=180)
633
  def text2image_fn(
634
  prompt: str,
635
  guidance_scale: float,
@@ -685,7 +637,7 @@ def text2image_fn(
685
  return save_paths + save_paths
686
 
687
 
688
- @spaces.GPU(duration=180)
689
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
690
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
691
 
@@ -701,7 +653,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
701
  return None, None, None
702
 
703
 
704
- @spaces.GPU(duration=180)
705
  def generate_texture_mvimages(
706
  prompt: str,
707
  controlnet_cond_scale: float = 0.55,
@@ -788,7 +740,7 @@ def backproject_texture(
788
  return output_glb_mesh, output_obj_mesh, zip_file
789
 
790
 
791
- @spaces.GPU(duration=180)
792
  def backproject_texture_v2(
793
  mesh_path: str,
794
  input_image: str,
@@ -835,7 +787,7 @@ def backproject_texture_v2(
835
  return output_glb_mesh, output_obj_mesh, zip_file
836
 
837
 
838
- @spaces.GPU(duration=180)
839
  def render_result_video(
840
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
841
  ) -> str:
 
14
  # implied. See the License for the specific language governing
15
  # permissions and limitations under the License.
16
 
17
+ import spaces
18
+ from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 
 
 
 
 
 
19
 
20
+ monkey_path_trellis()
21
  from embodied_gen.utils.monkey_patch.gradio import (
 
 
22
  _patch_open3d_cuda_device_count_bug,
23
  )
 
24
 
 
25
  _patch_open3d_cuda_device_count_bug()
 
 
26
 
27
  import gc
28
  import logging
 
161
  shutil.rmtree(user_dir)
162
 
163
 
164
+ @spaces.GPU
165
  def preprocess_image_fn(
166
  image: str | np.ndarray | Image.Image,
167
  rmbg_tag: str = "rembg",
168
  preprocess: bool = True,
169
  ) -> tuple[Image.Image, Image.Image]:
170
  """Preprocess image with lazy model initialization to avoid CUDA init at import time."""
 
 
 
 
 
 
 
 
 
 
 
171
  global _RBG_REMOVER, _RBG14_REMOVER
172
 
173
  if isinstance(image, str):
 
280
  return (image, masks), seg_image
281
 
282
 
283
+ @spaces.GPU
284
  def image_to_3d(
285
  image: Image.Image,
286
  seed: int,
 
302
  if isinstance(seg_image, np.ndarray):
303
  seg_image = Image.fromarray(seg_image)
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  logger.info("Start generating 3D representation from image...")
306
  if isinstance(PIPELINE, Sam3dInference):
 
307
  outputs = PIPELINE.run(
308
  seg_image,
309
  seed=seed,
310
  stage1_inference_steps=ss_sampling_steps,
311
  stage2_inference_steps=slat_sampling_steps,
312
  )
 
313
  else:
 
314
  PIPELINE.cuda()
 
315
  seg_image = trellis_preprocess(seg_image)
 
316
  outputs = PIPELINE.run(
317
  seg_image,
318
  seed=seed,
 
327
  "cfg_strength": slat_guidance_strength,
328
  },
329
  )
 
330
  # Set back to cpu for memory saving.
331
  PIPELINE.cpu()
 
332
 
 
333
  gs_model = outputs["gaussian"][0]
334
  mesh_model = outputs["mesh"][0]
335
  color_images = render_video(gs_model, r=1.85)["color"]
 
336
  normal_images = render_video(mesh_model, r=1.85)["normal"]
 
337
 
338
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
339
  os.makedirs(output_root, exist_ok=True)
 
581
  )
582
 
583
 
584
+ @spaces.GPU
585
  def text2image_fn(
586
  prompt: str,
587
  guidance_scale: float,
 
637
  return save_paths + save_paths
638
 
639
 
640
+ @spaces.GPU
641
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
642
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
643
 
 
653
  return None, None, None
654
 
655
 
656
+ @spaces.GPU
657
  def generate_texture_mvimages(
658
  prompt: str,
659
  controlnet_cond_scale: float = 0.55,
 
740
  return output_glb_mesh, output_obj_mesh, zip_file
741
 
742
 
743
+ @spaces.GPU
744
  def backproject_texture_v2(
745
  mesh_path: str,
746
  input_image: str,
 
787
  return output_glb_mesh, output_obj_mesh, zip_file
788
 
789
 
790
+ @spaces.GPU
791
  def render_result_video(
792
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
793
  ) -> str:
embodied_gen/scripts/eval_collision_success.py ADDED
@@ -0,0 +1,813 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ import json
18
+ import os
19
+ from dataclasses import asdict, dataclass, field
20
+ from typing import Optional
21
+
22
+ import imageio
23
+ import numpy as np
24
+ import sapien.core as sapien
25
+ import torch
26
+ import trimesh
27
+ import tyro
28
+ from embodied_gen.utils.log import logger
29
+ from embodied_gen.utils.simulation import (
30
+ FrankaPandaGrasper,
31
+ SapienSceneManager,
32
+ capture_frame,
33
+ create_panda_agent,
34
+ create_recording_camera,
35
+ estimate_grasp_width,
36
+ get_actor_bottom_z,
37
+ get_actor_mesh,
38
+ load_actor_from_urdf,
39
+ load_collision_mesh_from_urdf,
40
+ quat_from_yaw,
41
+ set_ground_base_color,
42
+ )
43
+
44
+ GROUND_BASE_COLOR = [0.78, 0.90, 0.72, 1.0]
45
+ SETTLE_CHECK_INTERVAL = 10
46
+ SETTLE_STABLE_WINDOWS = 3
47
+ SETTLE_BOTTOM_Z_TOL = 5e-4
48
+ MAX_EXTRA_SETTLE_STEPS = 120
49
+
50
+
51
+ @dataclass
52
+ class TrialResult:
53
+ """Stores per-trial evaluation results."""
54
+
55
+ yaw_deg: float
56
+ success: bool
57
+ scale_factor: float
58
+ object_width_before_scale: float
59
+ object_width_after_scale: float
60
+ settled_bottom_z: float
61
+ final_bottom_z: float
62
+ lift_delta: float
63
+ peak_bottom_z: float | None = None
64
+ peak_lift_delta: float | None = None
65
+ lift_success_threshold: float | None = None
66
+ tcp_object_offset_range: float | None = None
67
+ sync_tol: float | None = None
68
+ final_lift_pass: bool | None = None
69
+ sync_pass: bool | None = None
70
+ video_path: str | None = None
71
+ note: str = ""
72
+
73
+
74
+ @dataclass
75
+ class EvalCollisionConfig:
76
+ urdf_path: str
77
+ output_path: Optional[str] = None
78
+ num_trials: int = 4
79
+ max_gripper_width: float = 0.075
80
+ gripper_clearance_ratio: float = 0.9
81
+ sim_freq: int = 200
82
+ control_freq: int = 20
83
+ settle_steps: int = 240
84
+ post_grasp_steps: int = 80
85
+ lift_success_height: Optional[float] = None
86
+ lift_success_ratio: float = 0.01
87
+ min_lift_success_height: float = 0.001
88
+ max_lift_success_height: float = 0.003
89
+ sync_tol: float = 0.02
90
+ approach_offset: float = 0.06
91
+ grasp_clearance: float = 0.004
92
+ grasp_height_ratio: float = 0.45
93
+ max_descent_from_top: float = 0.03
94
+ hover_offset: float = 0.12
95
+ hover_open_steps: int = 10
96
+ descent_stage_count: int = 4
97
+ descent_n_max_step: int = 25
98
+ lift_distance: float = 0.10
99
+ close_steps: int = 20
100
+ object_x: float = 0.55
101
+ object_y: float = 0.0
102
+ z_offset: float = 0.005
103
+ sim_backend: str = "cpu"
104
+ render_backend: str = "gpu"
105
+ ray_tracing: bool = False
106
+ save_video: bool = True
107
+ video_path: Optional[str] = None
108
+ video_fps: int = 20
109
+ render_interval: int = 4
110
+ image_hw: tuple[int, int] = (512, 512)
111
+
112
+ def __post_init__(self) -> None:
113
+ output_dir = os.path.join(
114
+ os.path.dirname(self.urdf_path), "grasp_trial"
115
+ )
116
+ if self.output_path is None:
117
+ self.output_path = os.path.join(
118
+ output_dir, "collision_success_eval.json"
119
+ )
120
+ if self.video_path is None:
121
+ self.video_path = os.path.join(
122
+ output_dir, "collision_success_eval.mp4"
123
+ )
124
+
125
+
126
+ def _compute_scale_factor(
127
+ urdf_path: str,
128
+ max_gripper_width: float,
129
+ clearance_ratio: float,
130
+ ) -> tuple[float, float]:
131
+ """Compute a scale that fits the asset within the gripper width."""
132
+ mesh = load_collision_mesh_from_urdf(urdf_path)
133
+ grasp_width = estimate_grasp_width(mesh)
134
+ target_width = max_gripper_width * clearance_ratio
135
+ if grasp_width <= 1e-6:
136
+ raise ValueError(f"Invalid grasp width estimated from {urdf_path}")
137
+
138
+ scale = min(1.0, target_width / grasp_width)
139
+ return float(scale), float(grasp_width)
140
+
141
+
142
+ def _compute_spawn_center_z(
143
+ mesh: trimesh.Trimesh,
144
+ scale_factor: float,
145
+ z_offset: float,
146
+ ) -> float:
147
+ """Compute actor-center z so the scaled mesh bottom is z_offset above z=0."""
148
+ local_bottom_z = float(mesh.bounds[0, 2] * scale_factor)
149
+ return z_offset - local_bottom_z
150
+
151
+
152
+ def _compute_adaptive_lift_threshold(
153
+ actor: sapien.Entity,
154
+ ratio: float,
155
+ min_height: float,
156
+ max_height: float,
157
+ absolute_override: float | None = None,
158
+ ) -> float:
159
+ """Compute a robust lift threshold from the settled object height."""
160
+ if absolute_override is not None:
161
+ return float(absolute_override)
162
+
163
+ mesh = get_actor_mesh(actor)
164
+ object_height = float(mesh.bounds[1, 2] - mesh.bounds[0, 2])
165
+ adaptive_height = object_height * ratio
166
+ return float(np.clip(adaptive_height, min_height, max_height))
167
+
168
+
169
+ def _build_trial_video_path(
170
+ video_path: str,
171
+ trial_idx: int,
172
+ yaw_deg: float,
173
+ ) -> str:
174
+ """Build a unique per-trial video path from the base output path."""
175
+ root, ext = os.path.splitext(video_path)
176
+ if not ext:
177
+ ext = ".mp4"
178
+ return f"{root}_trial{trial_idx:02d}_yaw{int(round(yaw_deg)):03d}{ext}"
179
+
180
+
181
+ @dataclass
182
+ class _GraspTracker:
183
+ """Tracks gripper-object sync metrics during the grasp/lift phase.
184
+
185
+ The lift_delta of the object alone is fragile: a bounced-away object can
186
+ momentarily rise high before falling back. By logging the per-step offset
187
+ between the object bottom and the gripper TCP, we can also verify that the
188
+ object actually moves together with the gripper after closing.
189
+ """
190
+
191
+ actor: sapien.Entity
192
+ grasper: FrankaPandaGrasper
193
+ peak_bottom_z: float | None = None
194
+ tcp_object_offsets: list[float] = field(default_factory=list)
195
+
196
+ def update(self) -> None:
197
+ bottom_z = get_actor_bottom_z(self.actor)
198
+ tcp_z = float(self.grasper.agent.tcp.pose[0].sp.p[2])
199
+ self.peak_bottom_z = (
200
+ bottom_z
201
+ if self.peak_bottom_z is None
202
+ else max(self.peak_bottom_z, bottom_z)
203
+ )
204
+ self.tcp_object_offsets.append(bottom_z - tcp_z)
205
+
206
+ @property
207
+ def offset_range(self) -> float:
208
+ if not self.tcp_object_offsets:
209
+ return 0.0
210
+ return float(
211
+ max(self.tcp_object_offsets) - min(self.tcp_object_offsets)
212
+ )
213
+
214
+
215
+ def _execute_actions(
216
+ scene_manager: SapienSceneManager,
217
+ agent: object,
218
+ actions: np.ndarray,
219
+ control_freq: int,
220
+ camera: sapien.render.RenderCameraComponent | None = None,
221
+ render_interval: int = 1,
222
+ video_frames: list[np.ndarray] | None = None,
223
+ tracker: _GraspTracker | None = None,
224
+ ) -> None:
225
+ """Run a sequence of robot actions."""
226
+ sim_steps = max(1, scene_manager.sim_freq // control_freq)
227
+ cameras = [] if camera is None else [camera]
228
+ render_keys = [] if camera is None else ["Color"]
229
+ for idx, action in enumerate(actions):
230
+ frames = scene_manager.step_action(
231
+ agent,
232
+ torch.tensor(action[None, ...], dtype=torch.float32),
233
+ cameras=cameras,
234
+ render_keys=render_keys,
235
+ sim_steps_per_control=sim_steps,
236
+ )
237
+ if (
238
+ camera is not None
239
+ and video_frames is not None
240
+ and idx % max(1, render_interval) == 0
241
+ ):
242
+ video_frames.append(np.array(frames[camera.name][0]["Color"]))
243
+ if tracker is not None:
244
+ tracker.update()
245
+
246
+
247
+ def _hold_gripper_state(
248
+ scene_manager: SapienSceneManager,
249
+ grasper: FrankaPandaGrasper,
250
+ gripper_state: int,
251
+ control_freq: int,
252
+ n_step: int,
253
+ camera: sapien.render.RenderCameraComponent | None = None,
254
+ render_interval: int = 1,
255
+ video_frames: list[np.ndarray] | None = None,
256
+ tracker: _GraspTracker | None = None,
257
+ ) -> None:
258
+ """Hold gripper open/close while stepping the scene."""
259
+ hold_actions = grasper.control_gripper(
260
+ gripper_state=gripper_state,
261
+ n_step=n_step,
262
+ )
263
+ _execute_actions(
264
+ scene_manager,
265
+ grasper.agent,
266
+ hold_actions,
267
+ control_freq,
268
+ camera=camera,
269
+ render_interval=render_interval,
270
+ video_frames=video_frames,
271
+ tracker=tracker,
272
+ )
273
+
274
+
275
+ def _wait_until_actor_settled(
276
+ scene_manager: SapienSceneManager,
277
+ grasper: FrankaPandaGrasper,
278
+ actor: sapien.Entity,
279
+ control_freq: int,
280
+ initial_bottom_z: float,
281
+ max_extra_steps: int = MAX_EXTRA_SETTLE_STEPS,
282
+ check_interval: int = SETTLE_CHECK_INTERVAL,
283
+ stable_windows: int = SETTLE_STABLE_WINDOWS,
284
+ bottom_z_tol: float = SETTLE_BOTTOM_Z_TOL,
285
+ camera: sapien.render.RenderCameraComponent | None = None,
286
+ render_interval: int = 1,
287
+ video_frames: list[np.ndarray] | None = None,
288
+ ) -> float:
289
+ """Wait until the dropped object is visually settled on the ground."""
290
+ remaining_steps = max(0, max_extra_steps)
291
+ previous_bottom_z = initial_bottom_z
292
+ stable_count = 0
293
+
294
+ while remaining_steps > 0 and stable_count < stable_windows:
295
+ n_step = min(check_interval, remaining_steps)
296
+ _hold_gripper_state(
297
+ scene_manager,
298
+ grasper,
299
+ gripper_state=1,
300
+ control_freq=control_freq,
301
+ n_step=n_step,
302
+ camera=camera,
303
+ render_interval=render_interval,
304
+ video_frames=video_frames,
305
+ )
306
+ current_bottom_z = get_actor_bottom_z(actor)
307
+ if abs(current_bottom_z - previous_bottom_z) <= bottom_z_tol:
308
+ stable_count += 1
309
+ else:
310
+ stable_count = 0
311
+ previous_bottom_z = current_bottom_z
312
+ remaining_steps -= n_step
313
+
314
+ return previous_bottom_z
315
+
316
+
317
+ def _plan_scripted_grasp_stages(
318
+ grasper: FrankaPandaGrasper,
319
+ actor: sapien.Entity,
320
+ grasp_height_ratio: float,
321
+ grasp_clearance: float,
322
+ approach_offset: float,
323
+ lift_distance: float,
324
+ max_descent_from_top: float | None = None,
325
+ ) -> tuple[sapien.Pose, sapien.Pose, sapien.Pose]:
326
+ """Plan a simple top-down scripted grasp."""
327
+ mesh = get_actor_mesh(actor)
328
+ bounds = mesh.bounds
329
+ approaching = np.array([0.0, 0.0, -1.0])
330
+ center = bounds.mean(axis=0)
331
+ extents_xy = bounds[1, :2] - bounds[0, :2]
332
+ closing = (
333
+ np.array([1.0, 0.0, 0.0])
334
+ if extents_xy[0] <= extents_xy[1]
335
+ else np.array([0.0, 1.0, 0.0])
336
+ )
337
+ object_height = bounds[1, 2] - bounds[0, 2]
338
+ grasp_z = bounds[0, 2] + object_height * grasp_height_ratio
339
+ if max_descent_from_top is not None:
340
+ grasp_z = max(grasp_z, bounds[1, 2] - max_descent_from_top)
341
+ grasp_z = float(
342
+ np.clip(
343
+ grasp_z,
344
+ bounds[0, 2] + 0.015,
345
+ bounds[1, 2] - 0.005,
346
+ )
347
+ )
348
+ center = np.array([center[0], center[1], grasp_z + grasp_clearance])
349
+ grasp_pose = grasper.agent.build_grasp_pose(approaching, closing, center)
350
+ pre_grasp_pose = sapien.Pose(
351
+ p=grasp_pose.p + np.array([0.0, 0.0, approach_offset]),
352
+ q=grasp_pose.q,
353
+ )
354
+ lift_pose = sapien.Pose(
355
+ p=grasp_pose.p + np.array([0.0, 0.0, lift_distance]),
356
+ q=grasp_pose.q,
357
+ )
358
+
359
+ return pre_grasp_pose, grasp_pose, lift_pose
360
+
361
+
362
+ def _build_grasp_stage_candidates(
363
+ grasper: FrankaPandaGrasper,
364
+ actor: sapien.Entity,
365
+ grasp_height_ratio: float,
366
+ grasp_clearance: float,
367
+ approach_offset: float,
368
+ lift_distance: float,
369
+ max_descent_from_top: float | None = None,
370
+ ) -> list[tuple[float, float, sapien.Pose, sapien.Pose, sapien.Pose]]:
371
+ """Build fallback grasp-stage candidates for tapered objects like bottles."""
372
+ ratio_candidates = [
373
+ grasp_height_ratio,
374
+ min(0.95, grasp_height_ratio + 0.08),
375
+ min(0.95, grasp_height_ratio + 0.16),
376
+ ]
377
+ clearance_candidates = [
378
+ grasp_clearance,
379
+ grasp_clearance + 0.004,
380
+ grasp_clearance + 0.008,
381
+ ]
382
+ candidates = []
383
+ seen_keys = set()
384
+ for ratio, clearance in zip(ratio_candidates, clearance_candidates):
385
+ key = (round(ratio, 4), round(clearance, 4))
386
+ if key in seen_keys:
387
+ continue
388
+ seen_keys.add(key)
389
+ pre_grasp_pose, grasp_pose, lift_pose = _plan_scripted_grasp_stages(
390
+ grasper,
391
+ actor,
392
+ grasp_height_ratio=ratio,
393
+ grasp_clearance=clearance,
394
+ approach_offset=approach_offset,
395
+ lift_distance=lift_distance,
396
+ max_descent_from_top=max_descent_from_top,
397
+ )
398
+ candidates.append(
399
+ (ratio, clearance, pre_grasp_pose, grasp_pose, lift_pose)
400
+ )
401
+
402
+ return candidates
403
+
404
+
405
+ def _build_hover_pose(
406
+ actor: sapien.Entity,
407
+ grasp_pose: sapien.Pose,
408
+ hover_offset: float,
409
+ ) -> sapien.Pose:
410
+ """Build a hover pose at a fixed offset above the object top surface."""
411
+ mesh = get_actor_mesh(actor)
412
+ top_z = float(mesh.bounds[1, 2])
413
+ return sapien.Pose(
414
+ p=np.array([grasp_pose.p[0], grasp_pose.p[1], top_z + hover_offset]),
415
+ q=grasp_pose.q,
416
+ )
417
+
418
+
419
+ def _build_descent_stage_poses(
420
+ grasp_pose: sapien.Pose,
421
+ hover_offset: float,
422
+ num_stages: int,
423
+ ) -> list[sapien.Pose]:
424
+ """Split the downward approach into multiple slow open-gripper stages."""
425
+ if num_stages <= 0:
426
+ return [grasp_pose]
427
+
428
+ stage_offsets = np.linspace(hover_offset, 0.0, num_stages + 1)[1:]
429
+ return [
430
+ sapien.Pose(
431
+ p=grasp_pose.p + np.array([0.0, 0.0, float(offset)]),
432
+ q=grasp_pose.q,
433
+ )
434
+ for offset in stage_offsets
435
+ ]
436
+
437
+
438
+ def run_single_trial(
439
+ args: EvalCollisionConfig,
440
+ yaw_deg: float,
441
+ scale_factor: float,
442
+ grasp_width: float,
443
+ record_video: bool = False,
444
+ video_path: str | None = None,
445
+ ) -> TrialResult:
446
+ """Run one grasp trial with a fixed yaw."""
447
+ scene_manager = SapienSceneManager(
448
+ sim_freq=args.sim_freq,
449
+ ray_tracing=args.ray_tracing,
450
+ device=args.sim_backend,
451
+ )
452
+ scene = scene_manager.scene
453
+ set_ground_base_color(scene, GROUND_BASE_COLOR)
454
+ agent = create_panda_agent(
455
+ scene,
456
+ control_freq=args.control_freq,
457
+ sim_backend=args.sim_backend,
458
+ render_backend=args.render_backend,
459
+ )
460
+ video_frames: list[np.ndarray] | None = None
461
+ camera = None
462
+ if record_video:
463
+ video_frames = []
464
+ camera = create_recording_camera(
465
+ scene_manager,
466
+ eye_pos=[args.object_x - 0.32, args.object_y - 0.52, 0.56],
467
+ target_pt=[args.object_x - 0.01, args.object_y, 0.27],
468
+ image_hw=tuple(args.image_hw),
469
+ fovy_deg=60.0,
470
+ )
471
+
472
+ collision_mesh = load_collision_mesh_from_urdf(args.urdf_path)
473
+ spawn_center_z = _compute_spawn_center_z(
474
+ collision_mesh,
475
+ scale_factor=scale_factor,
476
+ z_offset=args.z_offset,
477
+ )
478
+ spawn_pose = sapien.Pose(
479
+ p=[
480
+ args.object_x,
481
+ args.object_y,
482
+ spawn_center_z,
483
+ ],
484
+ q=quat_from_yaw(yaw_deg),
485
+ )
486
+ actor = load_actor_from_urdf(
487
+ scene,
488
+ args.urdf_path,
489
+ pose=spawn_pose,
490
+ use_static=False,
491
+ update_mass=True,
492
+ scale=scale_factor,
493
+ )
494
+
495
+ if video_frames is not None and camera is not None:
496
+ video_frames.append(capture_frame(scene, camera))
497
+ grasper = FrankaPandaGrasper(agent, control_freq=args.control_freq)
498
+ _hold_gripper_state(
499
+ scene_manager,
500
+ grasper,
501
+ gripper_state=1,
502
+ control_freq=args.control_freq,
503
+ n_step=max(
504
+ 1,
505
+ args.settle_steps
506
+ // max(1, scene_manager.sim_freq // args.control_freq),
507
+ ),
508
+ camera=camera,
509
+ render_interval=args.render_interval,
510
+ video_frames=video_frames,
511
+ )
512
+ settled_bottom_z = get_actor_bottom_z(actor)
513
+ settled_bottom_z = _wait_until_actor_settled(
514
+ scene_manager,
515
+ grasper,
516
+ actor,
517
+ control_freq=args.control_freq,
518
+ initial_bottom_z=settled_bottom_z,
519
+ camera=camera,
520
+ render_interval=args.render_interval,
521
+ video_frames=video_frames,
522
+ )
523
+ grasp_candidates = _build_grasp_stage_candidates(
524
+ grasper,
525
+ actor,
526
+ grasp_height_ratio=args.grasp_height_ratio,
527
+ grasp_clearance=args.grasp_clearance,
528
+ approach_offset=args.approach_offset,
529
+ lift_distance=args.lift_distance,
530
+ max_descent_from_top=args.max_descent_from_top,
531
+ )
532
+ selected_lift_pose = None
533
+ selected_candidate_note = ""
534
+ grasp_stage_failure_note = "failed to reach pre-grasp pose"
535
+ for candidate_idx, candidate in enumerate(grasp_candidates):
536
+ (
537
+ candidate_ratio,
538
+ candidate_clearance,
539
+ _pre_grasp_pose,
540
+ grasp_pose,
541
+ lift_pose,
542
+ ) = candidate
543
+ hover_pose = _build_hover_pose(
544
+ actor, grasp_pose, hover_offset=args.hover_offset
545
+ )
546
+ hover_actions = grasper.move_to_pose(
547
+ hover_pose,
548
+ grasper.control_timestep,
549
+ gripper_state=1,
550
+ n_max_step=80,
551
+ )
552
+ if hover_actions is None:
553
+ grasp_stage_failure_note = "failed to reach hover pose"
554
+ continue
555
+ _execute_actions(
556
+ scene_manager,
557
+ agent,
558
+ hover_actions,
559
+ args.control_freq,
560
+ camera=camera,
561
+ render_interval=args.render_interval,
562
+ video_frames=video_frames,
563
+ )
564
+ _hold_gripper_state(
565
+ scene_manager,
566
+ grasper,
567
+ gripper_state=1,
568
+ control_freq=args.control_freq,
569
+ n_step=args.hover_open_steps,
570
+ camera=camera,
571
+ render_interval=args.render_interval,
572
+ video_frames=video_frames,
573
+ )
574
+
575
+ descent_failed = False
576
+ for descent_pose in _build_descent_stage_poses(
577
+ grasp_pose,
578
+ hover_offset=args.hover_offset,
579
+ num_stages=args.descent_stage_count,
580
+ ):
581
+ descent_actions = grasper.move_to_pose(
582
+ descent_pose,
583
+ grasper.control_timestep,
584
+ gripper_state=1,
585
+ n_max_step=args.descent_n_max_step,
586
+ )
587
+ if descent_actions is None:
588
+ descent_failed = True
589
+ grasp_stage_failure_note = (
590
+ "failed during slow descent to grasp pose"
591
+ )
592
+ break
593
+ _execute_actions(
594
+ scene_manager,
595
+ agent,
596
+ descent_actions,
597
+ args.control_freq,
598
+ camera=camera,
599
+ render_interval=args.render_interval,
600
+ video_frames=video_frames,
601
+ )
602
+ if descent_failed:
603
+ continue
604
+
605
+ _hold_gripper_state(
606
+ scene_manager,
607
+ grasper,
608
+ gripper_state=1,
609
+ control_freq=args.control_freq,
610
+ n_step=2,
611
+ camera=camera,
612
+ render_interval=args.render_interval,
613
+ video_frames=video_frames,
614
+ )
615
+ selected_lift_pose = lift_pose
616
+ selected_candidate_note = (
617
+ ""
618
+ if candidate_idx == 0
619
+ else (
620
+ f"fallback grasp candidate ratio={candidate_ratio:.2f}, "
621
+ f"clearance={candidate_clearance:.3f}"
622
+ )
623
+ )
624
+ break
625
+
626
+ if selected_lift_pose is None:
627
+ if video_frames is not None and video_path is not None:
628
+ os.makedirs(os.path.dirname(video_path), exist_ok=True)
629
+ imageio.mimsave(video_path, video_frames, fps=args.video_fps)
630
+ return TrialResult(
631
+ yaw_deg=yaw_deg,
632
+ success=False,
633
+ scale_factor=scale_factor,
634
+ object_width_before_scale=grasp_width,
635
+ object_width_after_scale=grasp_width * scale_factor,
636
+ settled_bottom_z=settled_bottom_z,
637
+ final_bottom_z=settled_bottom_z,
638
+ lift_delta=0.0,
639
+ video_path=video_path,
640
+ note=grasp_stage_failure_note,
641
+ )
642
+
643
+ lift_success_threshold = _compute_adaptive_lift_threshold(
644
+ actor,
645
+ ratio=args.lift_success_ratio,
646
+ min_height=args.min_lift_success_height,
647
+ max_height=args.max_lift_success_height,
648
+ absolute_override=args.lift_success_height,
649
+ )
650
+ tracker = _GraspTracker(actor=actor, grasper=grasper)
651
+ close_actions = grasper.control_gripper(
652
+ gripper_state=-1,
653
+ n_step=args.close_steps,
654
+ )
655
+ _execute_actions(
656
+ scene_manager,
657
+ agent,
658
+ close_actions,
659
+ args.control_freq,
660
+ camera=camera,
661
+ render_interval=args.render_interval,
662
+ video_frames=video_frames,
663
+ tracker=tracker,
664
+ )
665
+
666
+ stage_note = "ok"
667
+ lift_actions = grasper.move_to_pose(
668
+ selected_lift_pose,
669
+ grasper.control_timestep,
670
+ gripper_state=-1,
671
+ n_max_step=50,
672
+ )
673
+ if lift_actions is not None:
674
+ _execute_actions(
675
+ scene_manager,
676
+ agent,
677
+ lift_actions,
678
+ args.control_freq,
679
+ camera=camera,
680
+ render_interval=args.render_interval,
681
+ video_frames=video_frames,
682
+ tracker=tracker,
683
+ )
684
+ else:
685
+ stage_note = "failed to lift after closing"
686
+ _hold_gripper_state(
687
+ scene_manager,
688
+ grasper,
689
+ gripper_state=-1,
690
+ control_freq=args.control_freq,
691
+ n_step=args.post_grasp_steps,
692
+ camera=camera,
693
+ render_interval=args.render_interval,
694
+ video_frames=video_frames,
695
+ tracker=tracker,
696
+ )
697
+
698
+ final_bottom_z = get_actor_bottom_z(actor)
699
+ lift_delta = final_bottom_z - settled_bottom_z
700
+ peak_bottom_z = (
701
+ final_bottom_z
702
+ if tracker.peak_bottom_z is None
703
+ else tracker.peak_bottom_z
704
+ )
705
+ peak_lift_delta = peak_bottom_z - settled_bottom_z
706
+ offset_range = tracker.offset_range
707
+ final_lift_pass = bool(lift_delta >= lift_success_threshold)
708
+ sync_pass = bool(offset_range <= args.sync_tol)
709
+ success = bool(final_lift_pass and sync_pass)
710
+ if video_frames is not None and camera is not None:
711
+ video_frames.append(capture_frame(scene, camera))
712
+ if video_frames is not None and video_path is not None:
713
+ os.makedirs(os.path.dirname(video_path), exist_ok=True)
714
+ imageio.mimsave(video_path, video_frames, fps=args.video_fps)
715
+
716
+ if stage_note != "ok":
717
+ note = stage_note
718
+ elif success:
719
+ note = selected_candidate_note or "ok"
720
+ elif not final_lift_pass and not sync_pass:
721
+ note = "object dropped and decoupled from gripper"
722
+ elif not final_lift_pass:
723
+ note = "object did not stay lifted (likely bounced or dropped)"
724
+ else:
725
+ note = "object did not move synchronously with gripper"
726
+
727
+ return TrialResult(
728
+ yaw_deg=yaw_deg,
729
+ success=success,
730
+ scale_factor=scale_factor,
731
+ object_width_before_scale=grasp_width,
732
+ object_width_after_scale=grasp_width * scale_factor,
733
+ settled_bottom_z=settled_bottom_z,
734
+ final_bottom_z=final_bottom_z,
735
+ lift_delta=lift_delta,
736
+ peak_bottom_z=peak_bottom_z,
737
+ peak_lift_delta=peak_lift_delta,
738
+ lift_success_threshold=lift_success_threshold,
739
+ tcp_object_offset_range=offset_range,
740
+ sync_tol=args.sync_tol,
741
+ final_lift_pass=final_lift_pass,
742
+ sync_pass=sync_pass,
743
+ video_path=video_path,
744
+ note=note,
745
+ )
746
+
747
+
748
+ def entrypoint(**kwargs) -> dict:
749
+ """Run collision-success evaluation for a URDF asset."""
750
+ if kwargs:
751
+ kwargs.setdefault("urdf_path", "__dummy__.urdf")
752
+ args = EvalCollisionConfig(**kwargs)
753
+ else:
754
+ args = tyro.cli(EvalCollisionConfig)
755
+
756
+ if not os.path.exists(args.urdf_path):
757
+ raise FileNotFoundError(f"URDF file not found: {args.urdf_path}")
758
+
759
+ logger.info(
760
+ f"Start collision-success eval: urdf={args.urdf_path}, "
761
+ f"num_trials={args.num_trials}, sync_tol={args.sync_tol}, "
762
+ f"output={args.output_path}"
763
+ )
764
+ scale_factor, grasp_width = _compute_scale_factor(
765
+ args.urdf_path,
766
+ max_gripper_width=args.max_gripper_width,
767
+ clearance_ratio=args.gripper_clearance_ratio,
768
+ )
769
+ yaw_values = np.linspace(0, 360, args.num_trials, endpoint=False)
770
+ trials = [
771
+ run_single_trial(
772
+ args,
773
+ float(yaw_deg),
774
+ scale_factor,
775
+ grasp_width,
776
+ record_video=args.save_video,
777
+ video_path=(
778
+ _build_trial_video_path(args.video_path, idx, float(yaw_deg))
779
+ if args.save_video
780
+ else None
781
+ ),
782
+ )
783
+ for idx, yaw_deg in enumerate(yaw_values)
784
+ ]
785
+
786
+ success_count = sum(int(trial.success) for trial in trials)
787
+ result = {
788
+ "urdf_path": args.urdf_path,
789
+ "num_trials": args.num_trials,
790
+ "num_success": success_count,
791
+ "collision_success_rate": success_count / max(1, args.num_trials),
792
+ "scale_factor": scale_factor,
793
+ "estimated_grasp_width_before_scale": grasp_width,
794
+ "estimated_grasp_width_after_scale": grasp_width * scale_factor,
795
+ "video_path": args.video_path if args.save_video else None,
796
+ "trial_video_paths": [
797
+ trial.video_path
798
+ for trial in trials
799
+ if trial.video_path is not None
800
+ ],
801
+ "trials": [asdict(trial) for trial in trials],
802
+ }
803
+
804
+ os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
805
+ with open(args.output_path, "w", encoding="utf-8") as f:
806
+ json.dump(result, f, indent=2)
807
+ logger.info(f"Collision success report saved to {args.output_path}")
808
+
809
+ return result
810
+
811
+
812
+ if __name__ == "__main__":
813
+ entrypoint()
embodied_gen/scripts/room_gen/export_scene.py CHANGED
@@ -31,6 +31,7 @@ import gin
31
  import numpy as np
32
  import trimesh
33
  from infinigen.core.util import blender as butil
 
34
 
35
  logger = logging.getLogger(__name__)
36
 
@@ -213,15 +214,162 @@ def clean_names(obj=None):
213
 
214
  def remove_obj_parents(obj=None):
215
  if obj is not None:
216
- old_location = obj.matrix_world.to_translation()
217
  obj.parent = None
218
- obj.matrix_world.translation = old_location
219
  return
220
 
221
  for obj in bpy.data.objects:
222
- old_location = obj.matrix_world.to_translation()
223
  obj.parent = None
224
- obj.matrix_world.translation = old_location
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
 
227
  def delete_objects():
@@ -744,21 +892,12 @@ def triangulate_meshes():
744
 
745
 
746
  def adjust_wattages():
747
- logger.info("Adjusting light wattage")
748
  for obj in bpy.context.scene.objects:
749
  if obj.type == "LIGHT" and obj.data.type == "POINT":
750
  light = obj.data
751
- if hasattr(light, "energy") and hasattr(light, "shadow_soft_size"):
752
- X = light.energy
753
- r = light.shadow_soft_size
754
- # candelas * 1000 / (4 * math.pi * r**2). additionally units come out of blender at 1/100 scale
755
- new_wattage = (
756
- (X * 20 / (4 * math.pi))
757
- * 1000
758
- / (4 * math.pi * r**2)
759
- * 100
760
- )
761
- light.energy = new_wattage
762
 
763
 
764
  def set_center_of_mass():
@@ -862,10 +1001,15 @@ def bake_scene(folderPath: Path, image_res, vertex_colors, export_usd):
862
 
863
 
864
  def run_blender_export(
865
- exportPath: Path, format: str, vertex_colors: bool, individual_export: bool
 
 
 
 
866
  ):
867
  assert exportPath.parent.exists()
868
  exportPath = str(exportPath)
 
869
 
870
  if format == "obj":
871
  if vertex_colors:
@@ -913,14 +1057,18 @@ def run_blender_export(
913
  )
914
 
915
  if format in ["usda", "usdc"]:
916
- bpy.ops.wm.usd_export(
917
- filepath=exportPath,
918
- export_textures=True,
919
- # use_instancing=True,
920
- overwrite_textures=True,
921
- selected_objects_only=individual_export,
922
- root_prim_path="/World",
923
- )
 
 
 
 
924
 
925
 
926
  def export_scene(
@@ -1210,14 +1358,20 @@ def export_curr_scene(
1210
  task_uniqname=None,
1211
  deconvex=False,
1212
  center_scene=False,
 
1213
  align_quat=(0.7071, 0, 0, 0.7071), # xyzw
1214
  ) -> Path:
1215
  export_usd = format in ["usda", "usdc"]
1216
  export_folder = output_folder
1217
  export_folder.mkdir(exist_ok=True)
1218
  export_file = export_folder / output_folder.with_suffix(f".{format}").name
 
 
 
 
1219
  logger.info(f"Exporting to directory {export_folder=}")
1220
 
 
1221
  remove_obj_parents()
1222
  delete_objects()
1223
  triangulate_meshes()
@@ -1289,7 +1443,7 @@ def export_curr_scene(
1289
 
1290
  # iterate through all objects and bake them
1291
  bake_scene(
1292
- folderPath=export_folder / "textures",
1293
  image_res=image_res,
1294
  vertex_colors=vertex_colors,
1295
  export_usd=export_usd,
@@ -1377,7 +1531,11 @@ def export_curr_scene(
1377
  bpy.ops.object.modifier_apply(modifier=dec_mod.name)
1378
 
1379
  run_blender_export(
1380
- export_file, format, vertex_colors, individual_export
 
 
 
 
1381
  )
1382
  obj.select_set(False)
1383
 
@@ -1443,9 +1601,17 @@ def export_curr_scene(
1443
  return urdf_path
1444
  else:
1445
  logger.info(f"Exporting file to {export_file=}")
1446
- run_blender_export(
1447
- export_file, format, vertex_colors, individual_export
1448
- )
 
 
 
 
 
 
 
 
1449
 
1450
  return export_file
1451
 
@@ -1470,6 +1636,7 @@ def main(args):
1470
  omniverse_export=args.omniverse,
1471
  deconvex=args.deconvex,
1472
  center_scene=args.center_scene,
 
1473
  )
1474
 
1475
  bpy.ops.wm.quit_blender()
@@ -1489,6 +1656,7 @@ def make_args():
1489
  parser.add_argument("-o", "--omniverse", action="store_true")
1490
  parser.add_argument("--deconvex", action="store_true")
1491
  parser.add_argument("--center_scene", action="store_true")
 
1492
 
1493
  args = parser.parse_args()
1494
 
 
31
  import numpy as np
32
  import trimesh
33
  from infinigen.core.util import blender as butil
34
+ from mathutils import Vector
35
 
36
  logger = logging.getLogger(__name__)
37
 
 
214
 
215
  def remove_obj_parents(obj=None):
216
  if obj is not None:
217
+ world_matrix = obj.matrix_world.copy()
218
  obj.parent = None
219
+ obj.matrix_world = world_matrix
220
  return
221
 
222
  for obj in bpy.data.objects:
223
+ world_matrix = obj.matrix_world.copy()
224
  obj.parent = None
225
+ obj.matrix_world = world_matrix
226
+
227
+
228
+ def remove_placeholder_area_lights() -> int:
229
+ removed_count = 0
230
+ for obj in list(bpy.data.objects):
231
+ if obj.type != "LIGHT" or obj.data.type != "AREA":
232
+ continue
233
+
234
+ parent_name = obj.parent.name if obj.parent is not None else ""
235
+ if "WindowFactory" not in parent_name:
236
+ continue
237
+ if not obj.name.startswith("Area"):
238
+ continue
239
+ if not math.isclose(float(obj.data.energy), 10.0, abs_tol=1e-4):
240
+ continue
241
+
242
+ world_loc = np.array(obj.matrix_world.translation)
243
+ if not np.allclose(world_loc, 0.0, atol=1e-4):
244
+ continue
245
+
246
+ bpy.data.objects.remove(obj, do_unlink=True)
247
+ removed_count += 1
248
+
249
+ if removed_count > 0:
250
+ logger.info(
251
+ "Removed placeholder window area lights before export: "
252
+ f"{removed_count}"
253
+ )
254
+ return removed_count
255
+
256
+
257
+ def _get_export_scene_bounds() -> Optional[Tuple[np.ndarray, np.ndarray]]:
258
+ positions = []
259
+ view_objs = set(bpy.context.view_layer.objects)
260
+ for obj in bpy.data.objects:
261
+ if (
262
+ obj.type != "MESH"
263
+ or obj.data is None
264
+ or not obj.data.vertices
265
+ or obj.hide_render
266
+ or obj not in view_objs
267
+ ):
268
+ continue
269
+ for corner in obj.bound_box:
270
+ world_corner = obj.matrix_world @ Vector(corner)
271
+ positions.append(np.array(world_corner))
272
+
273
+ if not positions:
274
+ return None
275
+
276
+ points = np.stack(positions)
277
+ return points.min(axis=0), points.max(axis=0)
278
+
279
+
280
+ def _get_world_background_strength() -> float:
281
+ world = bpy.context.scene.world
282
+ if world is None or not world.use_nodes:
283
+ return 0.25
284
+
285
+ strengths = []
286
+ for node in world.node_tree.nodes:
287
+ if node.type == "BACKGROUND":
288
+ strengths.append(float(node.inputs["Strength"].default_value))
289
+
290
+ if not strengths:
291
+ return 0.25
292
+ return max(strengths)
293
+
294
+
295
+ def _get_world_sky_rotation() -> tuple[float, float]:
296
+ world = bpy.context.scene.world
297
+ if world is None or not world.use_nodes:
298
+ return (math.radians(55.0), 0.0)
299
+
300
+ for node in world.node_tree.nodes:
301
+ if node.type != "TEX_SKY":
302
+ continue
303
+ elevation = float(getattr(node, "sun_elevation", math.radians(35.0)))
304
+ rotation = float(getattr(node, "sun_rotation", 0.0))
305
+ return (math.pi * 0.5 - elevation, rotation)
306
+
307
+ return (math.radians(55.0), 0.0)
308
+
309
+
310
+ def add_world_export_lights(
311
+ world_strength: float = 8.0,
312
+ ) -> list[bpy.types.Object]:
313
+ world = bpy.context.scene.world
314
+ if world is None:
315
+ return []
316
+
317
+ bounds = _get_export_scene_bounds()
318
+ if bounds is None:
319
+ return []
320
+
321
+ min_corner, max_corner = bounds
322
+ center = (min_corner + max_corner) * 0.5
323
+ diagonal = float(np.linalg.norm(max_corner - min_corner))
324
+ strength = max(_get_world_background_strength(), world_strength)
325
+ sun_pitch, sun_yaw = _get_world_sky_rotation()
326
+
327
+ created_lights = []
328
+
329
+ bpy.ops.object.light_add(
330
+ type="SUN",
331
+ location=(
332
+ float(center[0]),
333
+ float(center[1]),
334
+ float(max_corner[2] + diagonal),
335
+ ),
336
+ rotation=(sun_pitch, 0.0, sun_yaw),
337
+ )
338
+ sun = bpy.context.object
339
+ sun.name = "__EXPORT_WORLD_SUN__"
340
+ sun.data.energy = max(strength * 2.0, 0.5)
341
+ created_lights.append(sun)
342
+
343
+ bpy.ops.object.light_add(
344
+ type="AREA",
345
+ location=(
346
+ float(center[0]),
347
+ float(center[1]),
348
+ float(max_corner[2] + 0.5 * diagonal),
349
+ ),
350
+ rotation=(0.0, 0.0, 0.0),
351
+ )
352
+ area = bpy.context.object
353
+ area.name = "__EXPORT_WORLD_AREA__"
354
+ area.data.shape = "DISK"
355
+ area.data.size = max(diagonal, 2.0)
356
+ area.data.energy = max(strength * 2500.0, 500.0)
357
+ created_lights.append(area)
358
+
359
+ logger.info(
360
+ "Added temporary world export lights: "
361
+ f"{[obj.name for obj in created_lights]}"
362
+ )
363
+ return created_lights
364
+
365
+
366
+ def remove_temp_export_objects(objects: list[bpy.types.Object]) -> None:
367
+ for obj in objects:
368
+ if obj is None:
369
+ continue
370
+ if obj.name not in bpy.data.objects:
371
+ continue
372
+ bpy.data.objects.remove(obj, do_unlink=True)
373
 
374
 
375
  def delete_objects():
 
892
 
893
 
894
  def adjust_wattages():
895
+ logger.info("Keeping original point light wattage for USD export")
896
  for obj in bpy.context.scene.objects:
897
  if obj.type == "LIGHT" and obj.data.type == "POINT":
898
  light = obj.data
899
+ if hasattr(light, "energy"):
900
+ light.energy = float(light.energy)
 
 
 
 
 
 
 
 
 
901
 
902
 
903
  def set_center_of_mass():
 
1001
 
1002
 
1003
  def run_blender_export(
1004
+ exportPath: Path,
1005
+ format: str,
1006
+ vertex_colors: bool,
1007
+ individual_export: bool,
1008
+ world_strength: float = 8.0,
1009
  ):
1010
  assert exportPath.parent.exists()
1011
  exportPath = str(exportPath)
1012
+ temp_export_objects: list[bpy.types.Object] = []
1013
 
1014
  if format == "obj":
1015
  if vertex_colors:
 
1057
  )
1058
 
1059
  if format in ["usda", "usdc"]:
1060
+ temp_export_objects = add_world_export_lights(world_strength)
1061
+ try:
1062
+ bpy.ops.wm.usd_export(
1063
+ filepath=exportPath,
1064
+ export_textures=True,
1065
+ # use_instancing=True,
1066
+ overwrite_textures=True,
1067
+ selected_objects_only=individual_export,
1068
+ root_prim_path="/World",
1069
+ )
1070
+ finally:
1071
+ remove_temp_export_objects(temp_export_objects)
1072
 
1073
 
1074
  def export_scene(
 
1358
  task_uniqname=None,
1359
  deconvex=False,
1360
  center_scene=False,
1361
+ world_strength=8.0,
1362
  align_quat=(0.7071, 0, 0, 0.7071), # xyzw
1363
  ) -> Path:
1364
  export_usd = format in ["usda", "usdc"]
1365
  export_folder = output_folder
1366
  export_folder.mkdir(exist_ok=True)
1367
  export_file = export_folder / output_folder.with_suffix(f".{format}").name
1368
+ texture_export_folder = export_folder / "textures"
1369
+ bake_texture_folder = texture_export_folder
1370
+ if export_usd:
1371
+ bake_texture_folder = export_folder / "_usd_bake_textures"
1372
  logger.info(f"Exporting to directory {export_folder=}")
1373
 
1374
+ remove_placeholder_area_lights()
1375
  remove_obj_parents()
1376
  delete_objects()
1377
  triangulate_meshes()
 
1443
 
1444
  # iterate through all objects and bake them
1445
  bake_scene(
1446
+ folderPath=bake_texture_folder,
1447
  image_res=image_res,
1448
  vertex_colors=vertex_colors,
1449
  export_usd=export_usd,
 
1531
  bpy.ops.object.modifier_apply(modifier=dec_mod.name)
1532
 
1533
  run_blender_export(
1534
+ export_file,
1535
+ format,
1536
+ vertex_colors,
1537
+ individual_export,
1538
+ world_strength=world_strength,
1539
  )
1540
  obj.select_set(False)
1541
 
 
1601
  return urdf_path
1602
  else:
1603
  logger.info(f"Exporting file to {export_file=}")
1604
+ try:
1605
+ run_blender_export(
1606
+ export_file,
1607
+ format,
1608
+ vertex_colors,
1609
+ individual_export,
1610
+ world_strength=world_strength,
1611
+ )
1612
+ finally:
1613
+ if export_usd and bake_texture_folder.exists():
1614
+ shutil.rmtree(bake_texture_folder, ignore_errors=True)
1615
 
1616
  return export_file
1617
 
 
1636
  omniverse_export=args.omniverse,
1637
  deconvex=args.deconvex,
1638
  center_scene=args.center_scene,
1639
+ world_strength=args.world_strength,
1640
  )
1641
 
1642
  bpy.ops.wm.quit_blender()
 
1656
  parser.add_argument("-o", "--omniverse", action="store_true")
1657
  parser.add_argument("--deconvex", action="store_true")
1658
  parser.add_argument("--center_scene", action="store_true")
1659
+ parser.add_argument("--world_strength", default=8.0, type=float)
1660
 
1661
  args = parser.parse_args()
1662
 
embodied_gen/scripts/room_gen/gen_room.py CHANGED
@@ -214,7 +214,7 @@ def generate_room(cfg: GenRoomArgs):
214
  "-f",
215
  "obj",
216
  "-r",
217
- "512",
218
  "--individual",
219
  "--deconvex",
220
  "--center_scene",
@@ -235,7 +235,7 @@ def generate_room(cfg: GenRoomArgs):
235
  "-f",
236
  "usdc",
237
  "-r",
238
- "512",
239
  "--omniverse",
240
  "--center_scene",
241
  ]
 
214
  "-f",
215
  "obj",
216
  "-r",
217
+ "1024",
218
  "--individual",
219
  "--deconvex",
220
  "--center_scene",
 
235
  "-f",
236
  "usdc",
237
  "-r",
238
+ "1024",
239
  "--omniverse",
240
  "--center_scene",
241
  ]
embodied_gen/scripts/room_gen/render_birdseye.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ """Render a top-down bird's-eye view of a USD scene with ceilings hidden."""
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import logging
24
+ import tempfile
25
+ from pathlib import Path
26
+
27
+ import bpy
28
+ from mathutils import Vector
29
+ from embodied_gen.scripts.room_gen.render_usd import RenderUsd
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ CEILING_KEYWORDS = ("ceiling", "exterior")
35
+ DEFAULT_ROOM_USD_GLOB = "seed*/usd/export_scene/export_scene.usdc"
36
+
37
+
38
+ class BirdseyeRenderUsd(RenderUsd):
39
+ """Top-down USD renderer with ceiling removal and orthographic camera."""
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ ortho_margin: float = 1.05,
45
+ use_cpu: bool = False,
46
+ **kwargs,
47
+ ) -> None:
48
+ super().__init__(**kwargs)
49
+ self.ortho_margin = ortho_margin
50
+ self.use_cpu = use_cpu
51
+
52
+ def configure_cycles(self) -> None:
53
+ if self.use_cpu:
54
+ self.scene.render.engine = "CYCLES"
55
+ self.scene.cycles.device = "CPU"
56
+ self.scene.cycles.samples = self.samples
57
+ self.scene.render.resolution_x = self.resolution[0]
58
+ self.scene.render.resolution_y = self.resolution[1]
59
+ self.scene.render.image_settings.file_format = "PNG"
60
+ else:
61
+ super().configure_cycles()
62
+ self.scene.render.film_transparent = True
63
+ self.scene.render.image_settings.color_mode = "RGBA"
64
+
65
+ def remove_ceiling_objects(self) -> int:
66
+ """Delete any object whose name contains a ceiling keyword."""
67
+ removed = 0
68
+ for obj in list(self.scene.objects):
69
+ lower = obj.name.lower()
70
+ if any(kw in lower for kw in CEILING_KEYWORDS):
71
+ bpy.data.objects.remove(obj, do_unlink=True)
72
+ removed += 1
73
+ logger.info("Removed %d ceiling objects.", removed)
74
+ return removed
75
+
76
+ def create_orthographic_camera(
77
+ self, center: Vector, top_z: float, scene_size: float
78
+ ) -> bpy.types.Object:
79
+ location = Vector((center.x, center.y, top_z + max(scene_size, 1.0)))
80
+ bpy.ops.object.camera_add(location=location, rotation=(0.0, 0.0, 0.0))
81
+ camera = bpy.context.object
82
+ camera.rotation_mode = "XYZ"
83
+ camera.data.type = "ORTHO"
84
+ camera.data.ortho_scale = scene_size * self.ortho_margin
85
+ camera.data.clip_start = 0.01
86
+ camera.data.clip_end = 1000.0
87
+ self.scene.camera = camera
88
+ return camera
89
+
90
+ def run(self) -> None:
91
+ rgb_output_path = self.get_rgb_output_path()
92
+ self.output_dir.mkdir(parents=True, exist_ok=True)
93
+ self.clear_scene()
94
+ self.import_usd()
95
+ self.remove_ceiling_objects()
96
+ self.validate_glb_args()
97
+ imported_glb_objects = self.import_glb_asset()
98
+ self.place_glb_asset(imported_glb_objects)
99
+
100
+ min_corner, max_corner = self.get_scene_bbox()
101
+ center = (min_corner + max_corner) * 0.5
102
+ diagonal = (max_corner - min_corner).length
103
+ scene_size = max(
104
+ max_corner.x - min_corner.x, max_corner.y - min_corner.y
105
+ )
106
+
107
+ self.create_orthographic_camera(center, max_corner.z, scene_size)
108
+ self.ensure_lighting(diagonal, center, max_corner.z)
109
+ world_created = self.ensure_world()
110
+ self.add_fill_light(
111
+ diagonal,
112
+ center,
113
+ max_corner.z,
114
+ energy=self.fill_light_energy,
115
+ )
116
+ if world_created:
117
+ self.add_light_rig(
118
+ diagonal,
119
+ center,
120
+ max_corner.z,
121
+ area_energy=1500.0,
122
+ sun_energy=0.35,
123
+ prefix="Fill",
124
+ )
125
+ self.configure_color_management()
126
+ self.configure_cycles()
127
+ with tempfile.TemporaryDirectory(
128
+ prefix="render_birdseye_", dir=None
129
+ ) as temp_dir:
130
+ self.temp_dir = Path(temp_dir)
131
+ self.render(rgb_output_path)
132
+ self.temp_dir = None
133
+
134
+ logger.info("Rendered bird's-eye outputs to %s", self.output_dir)
135
+
136
+
137
+ def build_arg_parser() -> argparse.ArgumentParser:
138
+ parser = argparse.ArgumentParser(
139
+ description="Render a top-down bird's-eye view of a USD scene."
140
+ )
141
+ input_group = parser.add_mutually_exclusive_group(required=True)
142
+ input_group.add_argument("--usd_path", type=Path)
143
+ input_group.add_argument(
144
+ "--input_dir",
145
+ type=Path,
146
+ help=(
147
+ "Directory with seed*/usd/export_scene/export_scene.usdc files "
148
+ "to render in batch."
149
+ ),
150
+ )
151
+ parser.add_argument(
152
+ "--output_dir",
153
+ type=Path,
154
+ help="Output directory for a single --usd_path render.",
155
+ )
156
+ parser.add_argument(
157
+ "--output_root",
158
+ type=Path,
159
+ help=(
160
+ "Batch output root. Defaults to <input_dir>/bev, with one "
161
+ "subdirectory per seed."
162
+ ),
163
+ )
164
+ parser.add_argument(
165
+ "--skip_existing",
166
+ action="store_true",
167
+ help="Skip batch items that already have render_rgb.png.",
168
+ )
169
+ parser.add_argument(
170
+ "--resolution",
171
+ type=int,
172
+ nargs=2,
173
+ metavar=("WIDTH", "HEIGHT"),
174
+ default=(1920, 1920),
175
+ )
176
+ parser.add_argument("--samples", type=int, default=512)
177
+ parser.add_argument("--exposure", type=float, default=-1.0)
178
+ parser.add_argument("--world_strength", type=float, default=1.0)
179
+ parser.add_argument("--fill_light_energy", type=float, default=1000.0)
180
+ parser.add_argument("--ortho_margin", type=float, default=1.05)
181
+ parser.add_argument("--use_cpu", action="store_true")
182
+ return parser
183
+
184
+
185
+ def find_room_usd_paths(input_dir: Path) -> list[Path]:
186
+ """Find seed room USD files under an input directory."""
187
+ return sorted(input_dir.glob(DEFAULT_ROOM_USD_GLOB))
188
+
189
+
190
+ def get_batch_output_dir(
191
+ usd_path: Path, input_dir: Path, output_root: Path
192
+ ) -> Path:
193
+ """Build the batch render output directory for a seed USD path."""
194
+ try:
195
+ seed_dir = usd_path.relative_to(input_dir).parts[0]
196
+ except ValueError:
197
+ seed_dir = usd_path.parents[2].name
198
+ return output_root / seed_dir
199
+
200
+
201
+ def build_renderer(
202
+ args: argparse.Namespace, usd_path: Path, output_dir: Path
203
+ ) -> BirdseyeRenderUsd:
204
+ """Build a bird's-eye renderer with shared CLI options."""
205
+ return BirdseyeRenderUsd(
206
+ usd_path=usd_path,
207
+ glb_path=None,
208
+ glb_xyz=None,
209
+ glb_rotation_deg=None,
210
+ output_dir=output_dir,
211
+ render_passes=("rgb",),
212
+ depth_mode="normalized",
213
+ resolution=tuple(args.resolution),
214
+ samples=args.samples,
215
+ camera_xyz=(0.0, 0.0, 0.0),
216
+ camera_rotation_deg=(0.0, 0.0, 0.0),
217
+ flow_camera_xyz=None,
218
+ flow_camera_rotation_deg=None,
219
+ focal_length_mm=20.0,
220
+ exposure=args.exposure,
221
+ world_strength=args.world_strength,
222
+ fill_light_energy=args.fill_light_energy,
223
+ ortho_margin=args.ortho_margin,
224
+ use_cpu=args.use_cpu,
225
+ )
226
+
227
+
228
+ def render_single(
229
+ args: argparse.Namespace, usd_path: Path, output_dir: Path
230
+ ) -> None:
231
+ build_renderer(args, usd_path, output_dir).run()
232
+
233
+
234
+ def render_batch(args: argparse.Namespace) -> None:
235
+ input_dir = args.input_dir
236
+ output_root = args.output_root or input_dir / "bev"
237
+ usd_paths = find_room_usd_paths(input_dir)
238
+ if not usd_paths:
239
+ raise FileNotFoundError(
240
+ f"No USD files found under {input_dir} matching "
241
+ f"{DEFAULT_ROOM_USD_GLOB}."
242
+ )
243
+
244
+ logger.info(
245
+ "Rendering %d bird's-eye views under %s.", len(usd_paths), input_dir
246
+ )
247
+ for usd_path in usd_paths:
248
+ output_dir = get_batch_output_dir(usd_path, input_dir, output_root)
249
+ rgb_output_path = output_dir / "render_rgb.png"
250
+ if args.skip_existing and rgb_output_path.exists():
251
+ logger.info("Skipping existing render %s", rgb_output_path)
252
+ continue
253
+
254
+ logger.info("Rendering %s to %s", usd_path, output_dir)
255
+ render_single(args, usd_path, output_dir)
256
+
257
+
258
+ def main() -> None:
259
+ logging.basicConfig(level=logging.INFO)
260
+ args = build_arg_parser().parse_args()
261
+ if args.input_dir is not None:
262
+ render_batch(args)
263
+ return
264
+
265
+ if args.output_dir is None:
266
+ raise ValueError("--output_dir is required when using --usd_path.")
267
+ render_single(args, args.usd_path, args.output_dir)
268
+
269
+
270
+ if __name__ == "__main__":
271
+ main()
embodied_gen/scripts/room_gen/render_usd.py ADDED
@@ -0,0 +1,1606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import logging
22
+ import math
23
+ import shutil
24
+ import tempfile
25
+ from collections.abc import Callable
26
+ from pathlib import Path
27
+
28
+ import bpy
29
+ import cv2
30
+ import numpy as np
31
+ from mathutils import Euler, Matrix, Vector
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ def build_arg_parser() -> argparse.ArgumentParser:
37
+ """Build the CLI parser for USD rendering."""
38
+ parser = argparse.ArgumentParser()
39
+ parser.add_argument("--usd_path", required=True, type=Path)
40
+ parser.add_argument("--glb_path", type=str, default="")
41
+ parser.add_argument(
42
+ "--glb_xyz",
43
+ type=float,
44
+ nargs=3,
45
+ metavar=("X", "Y", "Z"),
46
+ )
47
+ parser.add_argument(
48
+ "--glb_rotation_deg",
49
+ type=float,
50
+ nargs=3,
51
+ metavar=("RX", "RY", "RZ"),
52
+ )
53
+ parser.add_argument("--output_dir", required=True, type=Path)
54
+ parser.add_argument(
55
+ "--render_passes",
56
+ nargs="+",
57
+ choices=("rgb", "depth", "normal", "mesh", "instance_seg", "flow"),
58
+ default=("rgb",),
59
+ )
60
+ parser.add_argument(
61
+ "--depth_mode",
62
+ choices=("normalized", "metric"),
63
+ default="normalized",
64
+ )
65
+ parser.add_argument(
66
+ "--resolution",
67
+ type=int,
68
+ nargs=2,
69
+ metavar=("WIDTH", "HEIGHT"),
70
+ default=(1920, 1080),
71
+ )
72
+ parser.add_argument("--samples", type=int, default=1024)
73
+ parser.add_argument(
74
+ "--camera_xyz",
75
+ type=float,
76
+ nargs=3,
77
+ metavar=("X", "Y", "Z"),
78
+ required=True,
79
+ )
80
+ parser.add_argument(
81
+ "--camera_rotation_deg",
82
+ type=float,
83
+ nargs=3,
84
+ metavar=("RX", "RY", "RZ"),
85
+ required=True,
86
+ )
87
+ parser.add_argument(
88
+ "--flow_camera_xyz",
89
+ type=float,
90
+ nargs=3,
91
+ metavar=("X", "Y", "Z"),
92
+ )
93
+ parser.add_argument(
94
+ "--flow_camera_rotation_deg",
95
+ type=float,
96
+ nargs=3,
97
+ metavar=("RX", "RY", "RZ"),
98
+ )
99
+ parser.add_argument("--focal_length_mm", type=float, default=20.0)
100
+ parser.add_argument("--exposure", type=float, default=2.2)
101
+ parser.add_argument("--world_strength", type=float, default=8.0)
102
+ parser.add_argument("--fill_light_energy", type=float, default=14000.0)
103
+ return parser
104
+
105
+
106
+ def _parse_args() -> argparse.Namespace:
107
+ return build_arg_parser().parse_args()
108
+
109
+
110
+ class RenderUsd:
111
+ """USD renderer for RGB, depth, normal, mesh, segmentation, and flow."""
112
+
113
+ def __init__(
114
+ self,
115
+ *,
116
+ usd_path: Path,
117
+ glb_path: Path | str | None,
118
+ glb_xyz: tuple[float, float, float] | list[float] | None,
119
+ glb_rotation_deg: tuple[float, float, float] | list[float] | None,
120
+ output_dir: Path,
121
+ render_passes: tuple[str, ...] | list[str],
122
+ depth_mode: str,
123
+ resolution: tuple[int, int] | list[int],
124
+ samples: int,
125
+ camera_xyz: tuple[float, float, float] | list[float],
126
+ camera_rotation_deg: tuple[float, float, float] | list[float],
127
+ flow_camera_xyz: tuple[float, float, float] | list[float] | None,
128
+ flow_camera_rotation_deg: (
129
+ tuple[float, float, float] | list[float] | None
130
+ ),
131
+ focal_length_mm: float,
132
+ exposure: float,
133
+ world_strength: float,
134
+ fill_light_energy: float,
135
+ ) -> None:
136
+ """Initialize renderer configuration independent of CLI parsing."""
137
+ self.usd_path = usd_path
138
+ self.glb_path = self.normalize_optional_path(glb_path)
139
+ self.glb_xyz = tuple(glb_xyz) if glb_xyz is not None else None
140
+ self.glb_rotation_deg = (
141
+ tuple(glb_rotation_deg) if glb_rotation_deg is not None else None
142
+ )
143
+ self.output_dir = output_dir
144
+ self.render_passes = tuple(render_passes)
145
+ self.depth_mode = depth_mode
146
+ self.resolution = tuple(resolution)
147
+ self.samples = samples
148
+ self.camera_xyz = tuple(camera_xyz)
149
+ self.camera_rotation_deg = tuple(camera_rotation_deg)
150
+ self.flow_camera_xyz = (
151
+ tuple(flow_camera_xyz) if flow_camera_xyz is not None else None
152
+ )
153
+ self.flow_camera_rotation_deg = (
154
+ tuple(flow_camera_rotation_deg)
155
+ if flow_camera_rotation_deg is not None
156
+ else None
157
+ )
158
+ self.focal_length_mm = focal_length_mm
159
+ self.exposure = exposure
160
+ self.world_strength = world_strength
161
+ self.fill_light_energy = fill_light_energy
162
+ self.temp_dir: Path | None = None
163
+
164
+ @classmethod
165
+ def from_args(cls, args: argparse.Namespace) -> RenderUsd:
166
+ """Build a renderer from parsed CLI arguments."""
167
+ return cls(
168
+ usd_path=args.usd_path,
169
+ glb_path=args.glb_path,
170
+ glb_xyz=args.glb_xyz,
171
+ glb_rotation_deg=args.glb_rotation_deg,
172
+ output_dir=args.output_dir,
173
+ render_passes=args.render_passes,
174
+ depth_mode=args.depth_mode,
175
+ resolution=args.resolution,
176
+ samples=args.samples,
177
+ camera_xyz=args.camera_xyz,
178
+ camera_rotation_deg=args.camera_rotation_deg,
179
+ flow_camera_xyz=args.flow_camera_xyz,
180
+ flow_camera_rotation_deg=args.flow_camera_rotation_deg,
181
+ focal_length_mm=args.focal_length_mm,
182
+ exposure=args.exposure,
183
+ world_strength=args.world_strength,
184
+ fill_light_energy=args.fill_light_energy,
185
+ )
186
+
187
+ @property
188
+ def scene(self) -> bpy.types.Scene:
189
+ return bpy.context.scene
190
+
191
+ def normalize_optional_path(
192
+ self, path_value: Path | str | None
193
+ ) -> Path | None:
194
+ """Normalize an optional CLI path, treating empty strings as missing."""
195
+ if path_value is None:
196
+ return None
197
+ if isinstance(path_value, Path):
198
+ return path_value
199
+
200
+ normalized = path_value.strip()
201
+ if not normalized:
202
+ return None
203
+ return Path(normalized)
204
+
205
+ def build_output_path(self, filename: str) -> Path:
206
+ """Build a normalized output path under the render directory."""
207
+ return self.output_dir / filename
208
+
209
+ def build_temp_path(self, filename: str) -> Path:
210
+ """Build a temporary path outside the final output directory."""
211
+ if self.temp_dir is None:
212
+ raise RuntimeError(
213
+ "Temporary render directory is not initialized."
214
+ )
215
+ return self.temp_dir / filename
216
+
217
+ def get_rgb_output_path(self) -> Path:
218
+ return self.build_output_path("render_rgb.png")
219
+
220
+ def get_depth_vis_output_path(self, output_path: Path) -> Path:
221
+ del output_path
222
+ return self.build_output_path("render_depth.png")
223
+
224
+ def get_normal_output_path(self, output_path: Path) -> Path:
225
+ del output_path
226
+ return self.build_output_path("render_normal.png")
227
+
228
+ def get_mesh_output_path(self, output_path: Path) -> Path:
229
+ del output_path
230
+ return self.build_output_path("render_mesh.png")
231
+
232
+ def get_instance_seg_vis_output_path(self, output_path: Path) -> Path:
233
+ del output_path
234
+ return self.build_output_path("render_instance_seg_vis.png")
235
+
236
+ def get_instance_seg_temp_path(self, output_path: Path) -> Path:
237
+ del output_path
238
+ return self.build_temp_path("render_instance_seg_raw_0001.exr")
239
+
240
+ def get_flow_output_path(self, output_path: Path) -> Path:
241
+ del output_path
242
+ return self.build_output_path("render_flow.npy")
243
+
244
+ def get_flow_valid_output_path(self, output_path: Path) -> Path:
245
+ del output_path
246
+ return self.build_output_path("render_flow_valid.npy")
247
+
248
+ def get_flow_vis_output_path(self, output_path: Path) -> Path:
249
+ del output_path
250
+ return self.build_output_path("render_flow_vis.png")
251
+
252
+ def get_flow_depth_temp_path(self, output_path: Path) -> Path:
253
+ del output_path
254
+ return self.build_temp_path("render_flow_depth_raw_0001.exr")
255
+
256
+ def get_depth_gray_temp_path(self, output_path: Path) -> Path:
257
+ del output_path
258
+ return self.build_temp_path("render_depth_gray_0001.png")
259
+
260
+ def get_composite_output_path(
261
+ self, render_passes: list[str] | tuple[str, ...]
262
+ ) -> Path:
263
+ pass_names = "_".join(render_passes)
264
+ return self.build_output_path(f"render_composite_{pass_names}.png")
265
+
266
+ def build_occurrence_output_path(
267
+ self, output_path: Path, occurrence_index: int
268
+ ) -> Path:
269
+ """Build an occurrence-specific path for repeated preview outputs."""
270
+ if occurrence_index < 1:
271
+ raise ValueError("occurrence_index must be greater than 0.")
272
+ if occurrence_index == 1:
273
+ return output_path
274
+
275
+ return output_path.with_name(
276
+ f"{output_path.stem}_{occurrence_index}{output_path.suffix}"
277
+ )
278
+
279
+ def iter_render_pass_occurrences(self) -> list[tuple[str, int]]:
280
+ """Return requested render passes with 1-based occurrence indices."""
281
+ occurrence_counts: dict[str, int] = {}
282
+ render_pass_occurrences: list[tuple[str, int]] = []
283
+ for render_pass_name in self.render_passes:
284
+ occurrence_index = occurrence_counts.get(render_pass_name, 0) + 1
285
+ occurrence_counts[render_pass_name] = occurrence_index
286
+ render_pass_occurrences.append(
287
+ (render_pass_name, occurrence_index)
288
+ )
289
+ return render_pass_occurrences
290
+
291
+ def get_temp_output_slot_prefix(self, temp_output_path: Path) -> str:
292
+ """Return the compositor slot prefix without the frame suffix."""
293
+ stem_parts = temp_output_path.stem.rsplit("_", maxsplit=1)
294
+ if len(stem_parts) != 2 or not stem_parts[1].isdigit():
295
+ raise ValueError(
296
+ f"Unexpected temporary output filename: {temp_output_path.name}"
297
+ )
298
+ return f"{stem_parts[0]}_"
299
+
300
+ def get_mesh_objects(self) -> list[bpy.types.Object]:
301
+ return [obj for obj in self.scene.objects if obj.type == "MESH"]
302
+
303
+ def clear_scene(self) -> None:
304
+ bpy.ops.wm.read_factory_settings(use_empty=True)
305
+
306
+ def import_usd(self) -> None:
307
+ if not self.usd_path.exists():
308
+ raise FileNotFoundError(f"USD file not found: {self.usd_path}")
309
+ bpy.ops.wm.usd_import(filepath=str(self.usd_path))
310
+
311
+ def validate_glb_args(self) -> None:
312
+ """Normalize optional GLB arguments and ensure all-or-none usage."""
313
+ has_glb_path = self.glb_path is not None
314
+ has_glb_xyz = self.glb_xyz is not None
315
+ has_glb_rotation = self.glb_rotation_deg is not None
316
+ if len({has_glb_path, has_glb_xyz, has_glb_rotation}) != 1:
317
+ raise ValueError(
318
+ "--glb_path, --glb_xyz, and --glb_rotation_deg must be "
319
+ "provided together."
320
+ )
321
+ if not has_glb_path:
322
+ return
323
+
324
+ if not self.glb_path.exists():
325
+ raise FileNotFoundError(f"GLB file not found: {self.glb_path}")
326
+ if self.glb_path.suffix.lower() != ".glb":
327
+ raise ValueError(
328
+ f"Expected a .glb asset, but got: {self.glb_path}"
329
+ )
330
+
331
+ def enable_gltf_importer(self) -> None:
332
+ """Ensure Blender's glTF importer add-on is available."""
333
+ addon_name = "io_scene_gltf2"
334
+ if addon_name in bpy.context.preferences.addons:
335
+ return
336
+
337
+ try:
338
+ bpy.ops.preferences.addon_enable(module=addon_name)
339
+ except Exception as exc:
340
+ raise RuntimeError(
341
+ "Failed to enable Blender glTF importer add-on."
342
+ ) from exc
343
+
344
+ def import_glb_asset(self) -> list[bpy.types.Object]:
345
+ """Import the optional GLB asset and return created objects."""
346
+ if self.glb_path is None:
347
+ return []
348
+
349
+ self.enable_gltf_importer()
350
+ existing_object_ids = {obj.as_pointer() for obj in bpy.data.objects}
351
+ result = bpy.ops.import_scene.gltf(filepath=str(self.glb_path))
352
+ if "FINISHED" not in result:
353
+ raise RuntimeError(f"Failed to import GLB asset: {self.glb_path}")
354
+
355
+ imported_objects = [
356
+ obj
357
+ for obj in bpy.data.objects
358
+ if obj.as_pointer() not in existing_object_ids
359
+ ]
360
+ if not imported_objects:
361
+ raise ValueError(
362
+ f"No objects were imported from GLB asset: {self.glb_path}"
363
+ )
364
+ return imported_objects
365
+
366
+ def get_imported_root_objects(
367
+ self, imported_objects: list[bpy.types.Object]
368
+ ) -> list[bpy.types.Object]:
369
+ """Return top-level imported objects so transforms apply as one asset."""
370
+ imported_ids = {obj.as_pointer() for obj in imported_objects}
371
+ root_objects = [
372
+ obj
373
+ for obj in imported_objects
374
+ if obj.parent is None
375
+ or obj.parent.as_pointer() not in imported_ids
376
+ ]
377
+ return root_objects or imported_objects
378
+
379
+ def place_glb_asset(
380
+ self, imported_objects: list[bpy.types.Object]
381
+ ) -> None:
382
+ """Place the imported GLB asset using the requested world transform."""
383
+ if not imported_objects:
384
+ return
385
+ if self.glb_xyz is None or self.glb_rotation_deg is None:
386
+ raise ValueError("GLB transform arguments are not initialized.")
387
+
388
+ asset_transform = self.build_camera_matrix_world(
389
+ self.glb_xyz,
390
+ self.glb_rotation_deg,
391
+ )
392
+ for obj in self.get_imported_root_objects(imported_objects):
393
+ obj.matrix_world = asset_transform @ obj.matrix_world.copy()
394
+ bpy.context.view_layer.update()
395
+
396
+ def get_scene_bbox(self) -> tuple[Vector, Vector]:
397
+ """Compute the world-space bounding box across all mesh objects."""
398
+ mesh_objects = self.get_mesh_objects()
399
+ if not mesh_objects:
400
+ raise ValueError("No mesh objects found after USD import.")
401
+
402
+ points: list[Vector] = []
403
+ for obj in mesh_objects:
404
+ points.extend(
405
+ obj.matrix_world @ Vector(corner) for corner in obj.bound_box
406
+ )
407
+
408
+ min_corner = Vector(
409
+ (
410
+ min(p.x for p in points),
411
+ min(p.y for p in points),
412
+ min(p.z for p in points),
413
+ )
414
+ )
415
+ max_corner = Vector(
416
+ (
417
+ max(p.x for p in points),
418
+ max(p.y for p in points),
419
+ max(p.z for p in points),
420
+ )
421
+ )
422
+ return min_corner, max_corner
423
+
424
+ def create_camera(self) -> bpy.types.Object:
425
+ """Create and configure the primary render camera."""
426
+ if self.camera_xyz is None:
427
+ raise ValueError("--camera_xyz is required.")
428
+
429
+ location = Vector(tuple(self.camera_xyz))
430
+ rotation_rad = self.get_rotation_radians(self.camera_rotation_deg)
431
+ bpy.ops.object.camera_add(location=location, rotation=rotation_rad)
432
+ camera = bpy.context.object
433
+ camera.rotation_mode = "XYZ"
434
+ camera.data.lens = self.focal_length_mm
435
+ camera.data.clip_start = 0.01
436
+ camera.data.clip_end = 1000.0
437
+ self.scene.camera = camera
438
+ return camera
439
+
440
+ def add_light_rig(
441
+ self,
442
+ diagonal: float,
443
+ center: Vector,
444
+ top_z: float,
445
+ *,
446
+ area_energy: float,
447
+ sun_energy: float,
448
+ prefix: str,
449
+ ) -> None:
450
+ bpy.ops.object.light_add(
451
+ type="AREA",
452
+ location=(center.x, center.y, top_z + 0.5 * diagonal),
453
+ )
454
+ area = bpy.context.object
455
+ area.name = f"{prefix}Area"
456
+ area.data.energy = area_energy
457
+ area.data.shape = "DISK"
458
+ area.data.size = max(diagonal, 2.0)
459
+
460
+ bpy.ops.object.light_add(
461
+ type="SUN",
462
+ location=(
463
+ center.x + diagonal,
464
+ center.y - diagonal,
465
+ top_z + diagonal,
466
+ ),
467
+ )
468
+ sun = bpy.context.object
469
+ sun.name = f"{prefix}Sun"
470
+ sun.data.energy = sun_energy
471
+
472
+ def add_fill_light(
473
+ self,
474
+ diagonal: float,
475
+ center: Vector,
476
+ top_z: float,
477
+ energy: float,
478
+ ) -> None:
479
+ if energy <= 0.0:
480
+ return
481
+
482
+ bpy.ops.object.light_add(
483
+ type="AREA",
484
+ location=(center.x, center.y, top_z + 0.35 * diagonal),
485
+ rotation=(0.0, 0.0, 0.0),
486
+ )
487
+ area = bpy.context.object
488
+ area.name = "GlobalFillArea"
489
+ area.data.energy = energy
490
+ area.data.shape = "DISK"
491
+ area.data.size = max(diagonal * 0.9, 3.0)
492
+
493
+ def ensure_lighting(
494
+ self, diagonal: float, center: Vector, top_z: float
495
+ ) -> None:
496
+ if any(obj.type == "LIGHT" for obj in self.scene.objects):
497
+ return
498
+
499
+ self.add_light_rig(
500
+ diagonal,
501
+ center,
502
+ top_z,
503
+ area_energy=5000.0,
504
+ sun_energy=1.5,
505
+ prefix="Fallback",
506
+ )
507
+
508
+ def set_world_strength(self, strength: float) -> None:
509
+ world = self.scene.world
510
+ if world is None:
511
+ return
512
+
513
+ if not world.use_nodes:
514
+ world.use_nodes = True
515
+
516
+ tree = world.node_tree
517
+ background_nodes = [
518
+ node for node in tree.nodes if node.type == "BACKGROUND"
519
+ ]
520
+ if not background_nodes:
521
+ background = tree.nodes.new(type="ShaderNodeBackground")
522
+ output = next(
523
+ (node for node in tree.nodes if node.type == "OUTPUT_WORLD"),
524
+ None,
525
+ )
526
+ if output is None:
527
+ output = tree.nodes.new(type="ShaderNodeOutputWorld")
528
+ tree.links.new(
529
+ background.outputs["Background"], output.inputs["Surface"]
530
+ )
531
+ background_nodes = [background]
532
+
533
+ for background in background_nodes:
534
+ background.inputs["Strength"].default_value = strength
535
+
536
+ def ensure_world(self) -> bool:
537
+ """Ensure the scene has a world shader and return whether it was created."""
538
+ if self.scene.world is not None:
539
+ self.set_world_strength(self.world_strength)
540
+ return False
541
+
542
+ world = bpy.data.worlds.new(name="RenderWorld")
543
+ world.use_nodes = True
544
+ tree = world.node_tree
545
+ tree.nodes.clear()
546
+
547
+ output = tree.nodes.new(type="ShaderNodeOutputWorld")
548
+ background = tree.nodes.new(type="ShaderNodeBackground")
549
+ sky = tree.nodes.new(type="ShaderNodeTexSky")
550
+
551
+ background.inputs["Strength"].default_value = self.world_strength
552
+
553
+ tree.links.new(sky.outputs["Color"], background.inputs["Color"])
554
+ tree.links.new(
555
+ background.outputs["Background"], output.inputs["Surface"]
556
+ )
557
+
558
+ self.scene.world = world
559
+ return True
560
+
561
+ def configure_cycles(self) -> None:
562
+ self.scene.render.engine = "CYCLES"
563
+ self.scene.cycles.device = "GPU"
564
+ self.scene.cycles.samples = self.samples
565
+ self.scene.render.resolution_x = self.resolution[0]
566
+ self.scene.render.resolution_y = self.resolution[1]
567
+ self.scene.render.image_settings.file_format = "PNG"
568
+ self.scene.render.film_transparent = False
569
+
570
+ prefs = bpy.context.preferences.addons["cycles"].preferences
571
+ prefs.compute_device_type = "CUDA"
572
+ prefs.get_devices()
573
+ cuda_devices = [
574
+ device for device in prefs.devices if device.type == "CUDA"
575
+ ]
576
+ if not cuda_devices:
577
+ raise RuntimeError("No CUDA device found in Blender Cycles.")
578
+ for device in prefs.devices:
579
+ device.use = device.type == "CUDA"
580
+
581
+ def configure_color_management(self) -> None:
582
+ self.scene.view_settings.exposure = self.exposure
583
+
584
+ def snapshot_render_state(
585
+ self,
586
+ view_layer: bpy.types.ViewLayer,
587
+ *,
588
+ include_filepath: bool = False,
589
+ include_material_override: bool = False,
590
+ include_use_pass_z: bool = False,
591
+ include_use_pass_object_index: bool = False,
592
+ ) -> dict[str, object]:
593
+ """Capture the render state that temporary passes need to restore."""
594
+ state: dict[str, object] = {
595
+ "film_transparent": self.scene.render.film_transparent,
596
+ "view_transform": self.scene.view_settings.view_transform,
597
+ "look": self.scene.view_settings.look,
598
+ "exposure": self.scene.view_settings.exposure,
599
+ "gamma": self.scene.view_settings.gamma,
600
+ "file_format": self.scene.render.image_settings.file_format,
601
+ "color_mode": self.scene.render.image_settings.color_mode,
602
+ "color_depth": self.scene.render.image_settings.color_depth,
603
+ "use_nodes": self.scene.use_nodes,
604
+ "samples": self.scene.cycles.samples,
605
+ }
606
+ if include_filepath:
607
+ state["filepath"] = self.scene.render.filepath
608
+ if include_material_override:
609
+ state["material_override"] = view_layer.material_override
610
+ if include_use_pass_z:
611
+ state["use_pass_z"] = view_layer.use_pass_z
612
+ if include_use_pass_object_index:
613
+ state["use_pass_object_index"] = view_layer.use_pass_object_index
614
+ return state
615
+
616
+ def restore_render_state(
617
+ self, state: dict[str, object], view_layer: bpy.types.ViewLayer
618
+ ) -> None:
619
+ """Restore a render state captured by ``snapshot_render_state``."""
620
+ self.scene.render.film_transparent = state["film_transparent"]
621
+ self.scene.view_settings.view_transform = state["view_transform"]
622
+ self.scene.view_settings.look = state["look"]
623
+ self.scene.view_settings.exposure = state["exposure"]
624
+ self.scene.view_settings.gamma = state["gamma"]
625
+ self.scene.render.image_settings.file_format = state["file_format"]
626
+ self.scene.render.image_settings.color_mode = state["color_mode"]
627
+ self.scene.render.image_settings.color_depth = state["color_depth"]
628
+ self.scene.use_nodes = state["use_nodes"]
629
+ self.scene.cycles.samples = state["samples"]
630
+ if "filepath" in state:
631
+ self.scene.render.filepath = state["filepath"]
632
+ if "material_override" in state:
633
+ view_layer.material_override = state["material_override"]
634
+ if "use_pass_z" in state:
635
+ view_layer.use_pass_z = state["use_pass_z"]
636
+ if "use_pass_object_index" in state:
637
+ view_layer.use_pass_object_index = state["use_pass_object_index"]
638
+
639
+ def apply_raw_preview_settings(
640
+ self,
641
+ *,
642
+ use_nodes: bool,
643
+ samples: int,
644
+ color_mode: str,
645
+ color_depth: str,
646
+ ) -> None:
647
+ """Apply the shared render settings for auxiliary preview passes."""
648
+ self.scene.render.film_transparent = True
649
+ self.scene.view_settings.view_transform = "Raw"
650
+ self.scene.view_settings.look = "None"
651
+ self.scene.view_settings.exposure = 0.0
652
+ self.scene.view_settings.gamma = 1.0
653
+ self.scene.use_nodes = use_nodes
654
+ self.scene.cycles.samples = samples
655
+ self.scene.render.image_settings.file_format = "PNG"
656
+ self.scene.render.image_settings.color_mode = color_mode
657
+ self.scene.render.image_settings.color_depth = color_depth
658
+
659
+ def clear_compositor_tree(self) -> bpy.types.NodeTree:
660
+ """Reset the compositor tree so each pass starts from a clean slate."""
661
+ self.scene.use_nodes = True
662
+ tree = self.scene.node_tree
663
+ tree.nodes.clear()
664
+ return tree
665
+
666
+ def remove_render_nodes(self, created_nodes: list[bpy.types.Node]) -> None:
667
+ """Remove compositor nodes created for a temporary render pass."""
668
+ if not created_nodes:
669
+ return
670
+
671
+ node_tree = self.scene.node_tree
672
+ if node_tree is None:
673
+ return
674
+
675
+ for node in created_nodes:
676
+ if node.name in node_tree.nodes:
677
+ node_tree.nodes.remove(node)
678
+
679
+ def render_material_override_pass(
680
+ self,
681
+ preview_output_path: Path,
682
+ material_factory: Callable[[], bpy.types.Material],
683
+ *,
684
+ color_mode: str,
685
+ ) -> None:
686
+ """Render a pass with a temporary material override."""
687
+ preview_output_path.parent.mkdir(parents=True, exist_ok=True)
688
+ view_layer = self.scene.view_layers["ViewLayer"]
689
+ state = self.snapshot_render_state(
690
+ view_layer,
691
+ include_filepath=True,
692
+ include_material_override=True,
693
+ )
694
+
695
+ material = material_factory()
696
+ try:
697
+ self.apply_raw_preview_settings(
698
+ use_nodes=False,
699
+ samples=min(int(state["samples"]), 64),
700
+ color_mode=color_mode,
701
+ color_depth="8",
702
+ )
703
+ self.scene.render.filepath = str(preview_output_path)
704
+ view_layer.material_override = material
705
+ bpy.ops.render.render(write_still=True)
706
+ finally:
707
+ self.restore_render_state(state, view_layer)
708
+ bpy.data.materials.remove(material, do_unlink=True)
709
+
710
+ def render_temp_output_pass(
711
+ self,
712
+ output_path: Path,
713
+ temp_output_path: Path,
714
+ *,
715
+ add_output_node: Callable[
716
+ [Path], tuple[bpy.types.NodeTree, list[bpy.types.Node]]
717
+ ],
718
+ load_temp_output: Callable[[Path], np.ndarray],
719
+ finalize_output: Callable[[np.ndarray], None],
720
+ color_mode: str,
721
+ color_depth: str,
722
+ enable_depth_pass: bool = False,
723
+ enable_object_index_pass: bool = False,
724
+ ) -> None:
725
+ """Render a temporary compositor output and finalize it."""
726
+ output_path.parent.mkdir(parents=True, exist_ok=True)
727
+ view_layer = self.scene.view_layers["ViewLayer"]
728
+ state = self.snapshot_render_state(
729
+ view_layer,
730
+ include_use_pass_z=enable_depth_pass,
731
+ include_use_pass_object_index=enable_object_index_pass,
732
+ )
733
+ created_nodes: list[bpy.types.Node] = []
734
+
735
+ try:
736
+ if temp_output_path.exists():
737
+ temp_output_path.unlink()
738
+
739
+ self.apply_raw_preview_settings(
740
+ use_nodes=True,
741
+ samples=1,
742
+ color_mode=color_mode,
743
+ color_depth=color_depth,
744
+ )
745
+ if enable_depth_pass:
746
+ view_layer.use_pass_z = True
747
+ if enable_object_index_pass:
748
+ view_layer.use_pass_object_index = True
749
+
750
+ self.clear_compositor_tree()
751
+ _, created_nodes = add_output_node(output_path)
752
+ bpy.ops.render.render(write_still=False)
753
+ finalize_output(load_temp_output(temp_output_path))
754
+ finally:
755
+ self.remove_render_nodes(created_nodes)
756
+ if temp_output_path.exists():
757
+ temp_output_path.unlink()
758
+ self.restore_render_state(state, view_layer)
759
+
760
+ def get_rotation_radians(
761
+ self, rotation_deg: tuple[float, float, float] | list[float]
762
+ ) -> tuple[float, float, float]:
763
+ return tuple(math.radians(angle_deg) for angle_deg in rotation_deg)
764
+
765
+ def validate_flow_args(self) -> None:
766
+ """Normalize optional flow-camera arguments and fill defaults."""
767
+ has_flow_xyz = self.flow_camera_xyz is not None
768
+ has_flow_rotation = self.flow_camera_rotation_deg is not None
769
+ if has_flow_xyz != has_flow_rotation:
770
+ raise ValueError(
771
+ "--flow_camera_xyz and --flow_camera_rotation_deg must be "
772
+ "provided together."
773
+ )
774
+ if not has_flow_xyz:
775
+ xyz = list(self.camera_xyz)
776
+ xyz[0] += 0.5
777
+ self.flow_camera_xyz = tuple(xyz)
778
+ self.flow_camera_rotation_deg = tuple(self.camera_rotation_deg)
779
+
780
+ def build_depth_preview_node(
781
+ self,
782
+ tree: bpy.types.NodeTree,
783
+ render_layers: bpy.types.CompositorNodeRLayers,
784
+ camera: bpy.types.Camera,
785
+ depth_mode: str,
786
+ ) -> bpy.types.Node:
787
+ """Build the compositor node that converts raw depth to a previewable map."""
788
+ if depth_mode == "normalized":
789
+ normalize = tree.nodes.new(type="CompositorNodeNormalize")
790
+ tree.links.new(render_layers.outputs["Depth"], normalize.inputs[0])
791
+ return normalize
792
+
793
+ if depth_mode != "metric":
794
+ raise ValueError(f"Unsupported depth mode: {depth_mode}")
795
+
796
+ depth_map = tree.nodes.new(type="CompositorNodeMapRange")
797
+ depth_map.inputs["From Min"].default_value = camera.clip_start
798
+ depth_map.inputs["From Max"].default_value = camera.clip_end
799
+ depth_map.inputs["To Min"].default_value = 0.0
800
+ depth_map.inputs["To Max"].default_value = 1.0
801
+ depth_map.use_clamp = True
802
+ tree.links.new(render_layers.outputs["Depth"], depth_map.inputs[0])
803
+ return depth_map
804
+
805
+ def build_depth_vis_output(
806
+ self,
807
+ tree: bpy.types.NodeTree,
808
+ depth_preview_node: bpy.types.Node,
809
+ output_path: Path,
810
+ ) -> Path:
811
+ temp_output_path = self.get_depth_gray_temp_path(output_path)
812
+ output_node = tree.nodes.new(type="CompositorNodeOutputFile")
813
+ output_node.base_path = str(temp_output_path.parent)
814
+ output_node.file_slots[0].path = self.get_temp_output_slot_prefix(
815
+ temp_output_path
816
+ )
817
+ output_node.format.file_format = "PNG"
818
+ output_node.format.color_mode = "BW"
819
+ output_node.format.color_depth = "8"
820
+
821
+ tree.links.new(depth_preview_node.outputs[0], output_node.inputs[0])
822
+ return temp_output_path
823
+
824
+ def configure_auxiliary_outputs(
825
+ self,
826
+ output_path: Path,
827
+ render_passes: tuple[str, ...] | list[str],
828
+ depth_mode: str,
829
+ ) -> list[tuple[Path, Path]]:
830
+ """Configure compositor outputs needed during the base render."""
831
+ view_layer = self.scene.view_layers["ViewLayer"]
832
+ if "depth" in render_passes:
833
+ view_layer.use_pass_z = True
834
+
835
+ if "depth" not in render_passes:
836
+ return []
837
+
838
+ tree = self.clear_compositor_tree()
839
+
840
+ render_layers = tree.nodes.new(type="CompositorNodeRLayers")
841
+ temp_outputs: list[tuple[Path, Path]] = []
842
+
843
+ depth_preview_node = self.build_depth_preview_node(
844
+ tree,
845
+ render_layers,
846
+ self.scene.camera.data,
847
+ depth_mode,
848
+ )
849
+ temp_path = self.build_depth_vis_output(
850
+ tree=tree,
851
+ depth_preview_node=depth_preview_node,
852
+ output_path=output_path,
853
+ )
854
+ temp_outputs.append(
855
+ (temp_path, self.get_depth_vis_output_path(output_path))
856
+ )
857
+
858
+ return temp_outputs
859
+
860
+ def finalize_depth_output(
861
+ self, temp_path: Path, output_path: Path
862
+ ) -> None:
863
+ """Convert the grayscale depth temp image into the final colored preview."""
864
+ if output_path.exists():
865
+ output_path.unlink()
866
+ if not temp_path.exists():
867
+ raise FileNotFoundError(f"Depth file not generated: {temp_path}")
868
+ try:
869
+ depth = cv2.imread(str(temp_path), cv2.IMREAD_GRAYSCALE)
870
+ if depth is None:
871
+ raise FileNotFoundError(
872
+ f"Failed to read depth image: {temp_path}"
873
+ )
874
+
875
+ depth_uint8 = np.ascontiguousarray(depth)
876
+ depth_colormap = cv2.applyColorMap(depth_uint8, cv2.COLORMAP_JET)
877
+
878
+ if not cv2.imwrite(str(output_path), depth_colormap):
879
+ raise RuntimeError(
880
+ f"Failed to write depth visualization: {output_path}"
881
+ )
882
+ finally:
883
+ if temp_path.exists():
884
+ temp_path.unlink()
885
+
886
+ def create_clean_material(self, material_name: str) -> bpy.types.Material:
887
+ """Create a material with a cleared node tree."""
888
+ existing = bpy.data.materials.get(material_name)
889
+ if existing is not None:
890
+ bpy.data.materials.remove(existing, do_unlink=True)
891
+
892
+ material = bpy.data.materials.new(name=material_name)
893
+ material.use_nodes = True
894
+ material.shadow_method = "NONE"
895
+ tree = material.node_tree
896
+ tree.nodes.clear()
897
+ return material
898
+
899
+ def create_view_normal_material(self) -> bpy.types.Material:
900
+ material = self.create_clean_material("EmbodiedGenViewNormal")
901
+ tree = material.node_tree
902
+
903
+ geometry = tree.nodes.new(type="ShaderNodeNewGeometry")
904
+ invert = tree.nodes.new(type="ShaderNodeVectorMath")
905
+ invert.operation = "MULTIPLY"
906
+ invert.inputs[1].default_value = (-1.0, -1.0, -1.0)
907
+
908
+ face_mix = tree.nodes.new(type="ShaderNodeMix")
909
+ face_mix.data_type = "VECTOR"
910
+ face_mix.clamp_factor = True
911
+ face_mix.factor_mode = "UNIFORM"
912
+
913
+ view_transform = tree.nodes.new(type="ShaderNodeVectorTransform")
914
+ view_transform.vector_type = "NORMAL"
915
+ view_transform.convert_from = "WORLD"
916
+ view_transform.convert_to = "CAMERA"
917
+
918
+ flip_x = tree.nodes.new(type="ShaderNodeVectorMath")
919
+ flip_x.operation = "MULTIPLY"
920
+ flip_x.inputs[1].default_value = (-1.0, 1.0, -1.0)
921
+
922
+ scale_bias = tree.nodes.new(type="ShaderNodeVectorMath")
923
+ scale_bias.operation = "MULTIPLY_ADD"
924
+ scale_bias.inputs[1].default_value = (0.5, 0.5, 0.5)
925
+ scale_bias.inputs[2].default_value = (0.5, 0.5, 0.5)
926
+
927
+ emission = tree.nodes.new(type="ShaderNodeEmission")
928
+ output = tree.nodes.new(type="ShaderNodeOutputMaterial")
929
+
930
+ tree.links.new(geometry.outputs["True Normal"], invert.inputs[0])
931
+ tree.links.new(
932
+ geometry.outputs["Backfacing"], face_mix.inputs["Factor"]
933
+ )
934
+ tree.links.new(geometry.outputs["True Normal"], face_mix.inputs["A"])
935
+ tree.links.new(invert.outputs["Vector"], face_mix.inputs["B"])
936
+ tree.links.new(
937
+ face_mix.outputs["Result"], view_transform.inputs["Vector"]
938
+ )
939
+ tree.links.new(view_transform.outputs["Vector"], flip_x.inputs[0])
940
+ tree.links.new(flip_x.outputs["Vector"], scale_bias.inputs[0])
941
+ tree.links.new(scale_bias.outputs["Vector"], emission.inputs["Color"])
942
+ tree.links.new(emission.outputs["Emission"], output.inputs["Surface"])
943
+
944
+ return material
945
+
946
+ def create_mesh_preview_material(self) -> bpy.types.Material:
947
+ material = self.create_clean_material("EmbodiedGenMeshPreview")
948
+ tree = material.node_tree
949
+
950
+ layer_weight = tree.nodes.new(type="ShaderNodeLayerWeight")
951
+ layer_weight.inputs["Blend"].default_value = 0.35
952
+
953
+ base_ramp = tree.nodes.new(type="ShaderNodeValToRGB")
954
+ base_ramp.color_ramp.elements[0].position = 0.1
955
+ base_ramp.color_ramp.elements[0].color = (0.78, 0.81, 0.87, 1.0)
956
+ base_ramp.color_ramp.elements[1].position = 0.9
957
+ base_ramp.color_ramp.elements[1].color = (0.42, 0.48, 0.58, 1.0)
958
+
959
+ emission = tree.nodes.new(type="ShaderNodeEmission")
960
+ emission.inputs["Strength"].default_value = 0.82
961
+ output = tree.nodes.new(type="ShaderNodeOutputMaterial")
962
+
963
+ tree.links.new(layer_weight.outputs["Facing"], base_ramp.inputs["Fac"])
964
+ tree.links.new(base_ramp.outputs["Color"], emission.inputs["Color"])
965
+ tree.links.new(emission.outputs["Emission"], output.inputs["Surface"])
966
+
967
+ return material
968
+
969
+ def assign_instance_ids(self) -> dict[str, int]:
970
+ """Assign stable per-object pass indices for instance segmentation."""
971
+ mesh_objects = sorted(
972
+ self.get_mesh_objects(), key=lambda obj: obj.name
973
+ )
974
+ if not mesh_objects:
975
+ raise ValueError(
976
+ "No mesh objects found for instance segmentation."
977
+ )
978
+
979
+ instance_id_map: dict[str, int] = {}
980
+ for instance_id, obj in enumerate(mesh_objects, start=1):
981
+ obj.pass_index = instance_id
982
+ instance_id_map[obj.name] = instance_id
983
+ return instance_id_map
984
+
985
+ def snapshot_object_pass_indices(
986
+ self,
987
+ ) -> list[tuple[bpy.types.Object, int]]:
988
+ """Capture original object pass indices before a temporary override."""
989
+ return [(obj, obj.pass_index) for obj in self.get_mesh_objects()]
990
+
991
+ def restore_object_pass_indices(
992
+ self, original_pass_indices: list[tuple[bpy.types.Object, int]]
993
+ ) -> None:
994
+ """Restore object pass indices captured earlier."""
995
+ for obj, pass_index in original_pass_indices:
996
+ obj.pass_index = pass_index
997
+
998
+ def add_instance_seg_output_node(
999
+ self,
1000
+ output_path: Path,
1001
+ ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
1002
+ return self.add_exr_output_node(
1003
+ output_path=output_path,
1004
+ temp_output_path=self.get_instance_seg_temp_path(output_path),
1005
+ render_output_name="IndexOB",
1006
+ )
1007
+
1008
+ def add_flow_depth_output_node(
1009
+ self,
1010
+ output_path: Path,
1011
+ ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
1012
+ return self.add_exr_output_node(
1013
+ output_path=output_path,
1014
+ temp_output_path=self.get_flow_depth_temp_path(output_path),
1015
+ render_output_name="Depth",
1016
+ )
1017
+
1018
+ def add_exr_output_node(
1019
+ self,
1020
+ output_path: Path,
1021
+ temp_output_path: Path,
1022
+ render_output_name: str,
1023
+ ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
1024
+ """Attach a file-output EXR node for a specific render-layer socket."""
1025
+ tree = self.scene.node_tree
1026
+ render_layers = tree.nodes.new(type="CompositorNodeRLayers")
1027
+ output_node = tree.nodes.new(type="CompositorNodeOutputFile")
1028
+ output_node.base_path = str(temp_output_path.parent)
1029
+ output_node.file_slots[0].path = self.get_temp_output_slot_prefix(
1030
+ temp_output_path
1031
+ )
1032
+ output_node.format.file_format = "OPEN_EXR"
1033
+ output_node.format.color_mode = "RGB"
1034
+ output_node.format.color_depth = "32"
1035
+ output_node.format.exr_codec = "NONE"
1036
+
1037
+ tree.links.new(
1038
+ render_layers.outputs[render_output_name], output_node.inputs[0]
1039
+ )
1040
+ return tree, [render_layers, output_node]
1041
+
1042
+ def load_temp_exr_first_channel(
1043
+ self,
1044
+ temp_path: Path,
1045
+ error_message: str,
1046
+ ) -> np.ndarray:
1047
+ """Load the first channel from a temporary EXR and flip to image space."""
1048
+ if not temp_path.exists():
1049
+ raise FileNotFoundError(error_message.format(path=temp_path))
1050
+
1051
+ temp_image = bpy.data.images.load(str(temp_path), check_existing=False)
1052
+ try:
1053
+ width, height = temp_image.size
1054
+ channels = temp_image.channels
1055
+ pixels = np.array(temp_image.pixels[:], dtype=np.float32)
1056
+ if pixels.size != width * height * channels:
1057
+ raise RuntimeError(
1058
+ f"Unexpected EXR image layout for {temp_path}."
1059
+ )
1060
+
1061
+ image = pixels.reshape(height, width, channels)[..., 0]
1062
+ return np.flipud(image)
1063
+ finally:
1064
+ bpy.data.images.remove(temp_image)
1065
+
1066
+ def load_instance_seg_temp_output(self, temp_path: Path) -> np.ndarray:
1067
+ instance_seg = self.load_temp_exr_first_channel(
1068
+ temp_path,
1069
+ "Instance segmentation file not generated: {path}",
1070
+ )
1071
+ return np.ascontiguousarray(np.rint(instance_seg).astype(np.uint16))
1072
+
1073
+ def load_flow_depth_temp_output(self, temp_path: Path) -> np.ndarray:
1074
+ depth = self.load_temp_exr_first_channel(
1075
+ temp_path,
1076
+ "Flow depth file not generated: {path}",
1077
+ )
1078
+ depth = np.ascontiguousarray(depth.astype(np.float32))
1079
+ depth[~np.isfinite(depth)] = 0.0
1080
+ return depth
1081
+
1082
+ def build_instance_seg_visualization(
1083
+ self, instance_seg: np.ndarray, max_instance_id: int
1084
+ ) -> np.ndarray:
1085
+ """Map instance ids to deterministic RGB colors for visualization."""
1086
+ color_lut = np.zeros((max_instance_id + 1, 3), dtype=np.uint8)
1087
+ for instance_id in range(1, max_instance_id + 1):
1088
+ color_lut[instance_id] = (
1089
+ (instance_id * 37) % 256,
1090
+ (instance_id * 67) % 256,
1091
+ (instance_id * 97) % 256,
1092
+ )
1093
+ return color_lut[instance_seg]
1094
+
1095
+ def save_instance_seg_outputs(
1096
+ self,
1097
+ output_path: Path,
1098
+ instance_seg: np.ndarray,
1099
+ ) -> None:
1100
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1101
+ vis_output_path = self.get_instance_seg_vis_output_path(output_path)
1102
+
1103
+ visualization = self.build_instance_seg_visualization(
1104
+ instance_seg=instance_seg,
1105
+ max_instance_id=int(instance_seg.max(initial=0)),
1106
+ )
1107
+ if not cv2.imwrite(str(vis_output_path), visualization):
1108
+ raise RuntimeError(
1109
+ f"Failed to write instance segmentation preview: "
1110
+ f"{vis_output_path}"
1111
+ )
1112
+
1113
+ def build_flow_visualization(self, flow: np.ndarray) -> np.ndarray:
1114
+ flow_float = flow.astype(np.float32)
1115
+ magnitude, angle = cv2.cartToPolar(
1116
+ flow_float[..., 0],
1117
+ flow_float[..., 1],
1118
+ angleInDegrees=True,
1119
+ )
1120
+ max_magnitude = float(np.percentile(magnitude, 99.0))
1121
+ if max_magnitude <= 1e-6:
1122
+ max_magnitude = 1.0
1123
+
1124
+ magnitude_norm = np.clip(magnitude / max_magnitude, 0.0, 1.0)
1125
+ hsv = np.zeros((*flow.shape[:2], 3), dtype=np.float32)
1126
+ hsv[..., 0] = np.mod(angle, 360.0)
1127
+ hsv[..., 1] = magnitude_norm
1128
+ hsv[..., 2] = 1.0
1129
+ bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
1130
+ return np.clip(bgr * 255.0, 0.0, 255.0).astype(np.uint8)
1131
+
1132
+ def get_camera_intrinsics(
1133
+ self, camera: bpy.types.Object, width: int, height: int
1134
+ ) -> tuple[float, float, float, float]:
1135
+ camera_data = camera.data
1136
+ fx = width / (2.0 * math.tan(camera_data.angle_x * 0.5))
1137
+ fy = height / (2.0 * math.tan(camera_data.angle_y * 0.5))
1138
+ cx = (width - 1.0) * 0.5
1139
+ cy = (height - 1.0) * 0.5
1140
+ return fx, fy, cx, cy
1141
+
1142
+ def build_camera_matrix_world(
1143
+ self,
1144
+ xyz: tuple[float, float, float] | list[float],
1145
+ rotation_deg: tuple[float, float, float] | list[float],
1146
+ ) -> Matrix:
1147
+ rotation = Euler(self.get_rotation_radians(rotation_deg), "XYZ")
1148
+ translation = Matrix.Translation(Vector(tuple(xyz)))
1149
+ return translation @ rotation.to_matrix().to_4x4()
1150
+
1151
+ def compute_flow_from_depth(
1152
+ self,
1153
+ depth: np.ndarray,
1154
+ camera: bpy.types.Object,
1155
+ ) -> tuple[np.ndarray, np.ndarray]:
1156
+ """Project depth into a target camera and derive dense 2D flow."""
1157
+ height, width = depth.shape
1158
+ fx, fy, cx, cy = self.get_camera_intrinsics(camera, width, height)
1159
+ valid = np.isfinite(depth) & (depth > 0.0)
1160
+ valid_mask = np.zeros((height, width), dtype=bool)
1161
+ if not np.any(valid):
1162
+ return np.zeros((height, width, 2), dtype=np.float32), valid_mask
1163
+
1164
+ u_coords, v_coords = np.meshgrid(
1165
+ np.arange(width, dtype=np.float32),
1166
+ np.arange(height, dtype=np.float32),
1167
+ )
1168
+
1169
+ depth_valid = depth[valid]
1170
+ x_cam = ((u_coords[valid] - cx) / fx) * depth_valid
1171
+ y_cam = (-(v_coords[valid] - cy) / fy) * depth_valid
1172
+ z_cam = -depth_valid
1173
+
1174
+ camera_points = np.stack(
1175
+ [x_cam, y_cam, z_cam, np.ones_like(z_cam)], axis=1
1176
+ )
1177
+
1178
+ source_matrix_world = np.array(camera.matrix_world, dtype=np.float64)
1179
+ target_matrix_world = np.array(
1180
+ self.build_camera_matrix_world(
1181
+ self.flow_camera_xyz,
1182
+ self.flow_camera_rotation_deg,
1183
+ ),
1184
+ dtype=np.float64,
1185
+ )
1186
+ target_world_to_camera = np.linalg.inv(target_matrix_world)
1187
+
1188
+ world_points = camera_points @ source_matrix_world.T
1189
+ target_camera_points = world_points @ target_world_to_camera.T
1190
+
1191
+ target_z = target_camera_points[:, 2]
1192
+ positive_depth = target_z < -1e-6
1193
+ flow = np.zeros((height, width, 2), dtype=np.float32)
1194
+ if not np.any(positive_depth):
1195
+ return flow, valid_mask
1196
+
1197
+ projected_x = (
1198
+ fx
1199
+ * (
1200
+ target_camera_points[positive_depth, 0]
1201
+ / -target_z[positive_depth]
1202
+ )
1203
+ + cx
1204
+ )
1205
+ projected_y = (
1206
+ -fy
1207
+ * (
1208
+ target_camera_points[positive_depth, 1]
1209
+ / -target_z[positive_depth]
1210
+ )
1211
+ + cy
1212
+ )
1213
+ in_frame = (
1214
+ (projected_x >= 0.0)
1215
+ & (projected_x < width)
1216
+ & (projected_y >= 0.0)
1217
+ & (projected_y < height)
1218
+ )
1219
+ if not np.any(in_frame):
1220
+ return flow, valid_mask
1221
+
1222
+ source_x = u_coords[valid][positive_depth]
1223
+ source_y = v_coords[valid][positive_depth]
1224
+ flow_valid = np.stack(
1225
+ [
1226
+ projected_x[in_frame] - source_x[in_frame],
1227
+ projected_y[in_frame] - source_y[in_frame],
1228
+ ],
1229
+ axis=1,
1230
+ ).astype(np.float32)
1231
+
1232
+ flow_buffer = flow[valid]
1233
+ positive_depth_buffer = flow_buffer[positive_depth]
1234
+ positive_depth_buffer[in_frame] = flow_valid
1235
+ flow_buffer[positive_depth] = positive_depth_buffer
1236
+ flow[valid] = flow_buffer
1237
+ valid_mask_buffer = valid_mask[valid]
1238
+ positive_depth_mask = valid_mask_buffer[positive_depth]
1239
+ positive_depth_mask[in_frame] = True
1240
+ valid_mask_buffer[positive_depth] = positive_depth_mask
1241
+ valid_mask[valid] = valid_mask_buffer
1242
+ return flow, valid_mask
1243
+
1244
+ def save_numpy_array(self, output_path: Path, array: np.ndarray) -> None:
1245
+ """Persist a NumPy array atomically to avoid partial writes."""
1246
+ temp_output_path = output_path.with_suffix(".tmp.npy")
1247
+ if temp_output_path.exists():
1248
+ temp_output_path.unlink()
1249
+ np.save(temp_output_path, array)
1250
+ temp_output_path.replace(output_path)
1251
+
1252
+ def save_flow_outputs(
1253
+ self,
1254
+ output_path: Path,
1255
+ flow: np.ndarray,
1256
+ valid_mask: np.ndarray,
1257
+ ) -> None:
1258
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1259
+ flow_output_path = self.get_flow_output_path(output_path)
1260
+ flow_valid_output_path = self.get_flow_valid_output_path(output_path)
1261
+ flow_vis_output_path = self.get_flow_vis_output_path(output_path)
1262
+
1263
+ self.save_numpy_array(flow_output_path, flow)
1264
+ self.save_numpy_array(flow_valid_output_path, valid_mask)
1265
+ flow_vis = self.build_flow_visualization(flow)
1266
+ if not cv2.imwrite(str(flow_vis_output_path), flow_vis):
1267
+ raise RuntimeError(
1268
+ f"Failed to write flow preview: {flow_vis_output_path}"
1269
+ )
1270
+
1271
+ def get_preview_output_path(
1272
+ self,
1273
+ output_path: Path,
1274
+ render_pass_name: str,
1275
+ occurrence_index: int = 1,
1276
+ ) -> Path | None:
1277
+ preview_output_paths = {
1278
+ "rgb": output_path,
1279
+ "depth": self.get_depth_vis_output_path(output_path),
1280
+ "normal": self.get_normal_output_path(output_path),
1281
+ "mesh": self.get_mesh_output_path(output_path),
1282
+ "instance_seg": self.get_instance_seg_vis_output_path(output_path),
1283
+ "flow": self.get_flow_vis_output_path(output_path),
1284
+ }
1285
+ preview_output_path = preview_output_paths.get(render_pass_name)
1286
+ if preview_output_path is None:
1287
+ return None
1288
+ return self.build_occurrence_output_path(
1289
+ preview_output_path, occurrence_index
1290
+ )
1291
+
1292
+ def load_preview_image(self, image_path: Path) -> np.ndarray:
1293
+ image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
1294
+ if image is None:
1295
+ raise FileNotFoundError(
1296
+ f"Failed to read preview image: {image_path}"
1297
+ )
1298
+ return image
1299
+
1300
+ def collect_composite_images(
1301
+ self, output_path: Path
1302
+ ) -> list[tuple[str, np.ndarray]]:
1303
+ composite_images: list[tuple[str, np.ndarray]] = []
1304
+ for (
1305
+ render_pass_name,
1306
+ occurrence_index,
1307
+ ) in self.iter_render_pass_occurrences():
1308
+ preview_output_path = self.get_preview_output_path(
1309
+ output_path,
1310
+ render_pass_name,
1311
+ occurrence_index,
1312
+ )
1313
+ if preview_output_path is None or not preview_output_path.exists():
1314
+ continue
1315
+ composite_images.append(
1316
+ (
1317
+ render_pass_name,
1318
+ self.load_preview_image(preview_output_path),
1319
+ )
1320
+ )
1321
+ return composite_images
1322
+
1323
+ def replicate_duplicate_preview_outputs(self, output_path: Path) -> None:
1324
+ """Materialize repeated preview outputs without re-rendering."""
1325
+ for (
1326
+ render_pass_name,
1327
+ occurrence_index,
1328
+ ) in self.iter_render_pass_occurrences():
1329
+ if occurrence_index == 1:
1330
+ continue
1331
+
1332
+ source_output_path = self.get_preview_output_path(
1333
+ output_path, render_pass_name
1334
+ )
1335
+ duplicate_output_path = self.get_preview_output_path(
1336
+ output_path,
1337
+ render_pass_name,
1338
+ occurrence_index,
1339
+ )
1340
+ if source_output_path is None or duplicate_output_path is None:
1341
+ continue
1342
+ if not source_output_path.exists():
1343
+ raise FileNotFoundError(
1344
+ f"Preview output not generated for repeated pass "
1345
+ f"{render_pass_name}: {source_output_path}"
1346
+ )
1347
+ if duplicate_output_path.exists():
1348
+ duplicate_output_path.unlink()
1349
+ shutil.copyfile(source_output_path, duplicate_output_path)
1350
+
1351
+ def get_composite_separator_boundaries(
1352
+ self,
1353
+ render_pass_names: list[str] | tuple[str, ...],
1354
+ boundaries: np.ndarray,
1355
+ ) -> list[float]:
1356
+ """Return separator boundaries for adjacent passes that differ."""
1357
+ if len(boundaries) != len(render_pass_names) + 1:
1358
+ raise ValueError(
1359
+ "boundaries length must match the number of render passes + 1."
1360
+ )
1361
+
1362
+ separator_boundaries: list[float] = []
1363
+ for index, boundary in enumerate(boundaries[1:-1], start=1):
1364
+ if render_pass_names[index - 1] == render_pass_names[index]:
1365
+ continue
1366
+ separator_boundaries.append(float(boundary))
1367
+ return separator_boundaries
1368
+
1369
+ def build_composite_image(
1370
+ self,
1371
+ images: list[np.ndarray],
1372
+ render_pass_names: list[str] | tuple[str, ...],
1373
+ separator_width_px: int = 6,
1374
+ ) -> np.ndarray:
1375
+ if not images:
1376
+ raise ValueError("At least one image is required for composition.")
1377
+ if len(images) != len(render_pass_names):
1378
+ raise ValueError(
1379
+ "images and render_pass_names must have the same length."
1380
+ )
1381
+
1382
+ base_height, base_width = images[0].shape[:2]
1383
+ resized_images = [
1384
+ (
1385
+ image
1386
+ if image.shape[:2] == (base_height, base_width)
1387
+ else cv2.resize(
1388
+ image,
1389
+ (base_width, base_height),
1390
+ interpolation=cv2.INTER_LINEAR,
1391
+ )
1392
+ )
1393
+ for image in images
1394
+ ]
1395
+
1396
+ x_coords = np.broadcast_to(
1397
+ np.arange(base_width, dtype=np.float32),
1398
+ (base_height, base_width),
1399
+ )
1400
+ y_coords = np.broadcast_to(
1401
+ np.arange(base_height, dtype=np.float32)[:, None],
1402
+ (base_height, base_width),
1403
+ )
1404
+ slash_slope = 0.28 * (base_width / base_height)
1405
+ diagonal_coord = x_coords + y_coords * slash_slope
1406
+ diagonal_min = float(diagonal_coord.min())
1407
+ diagonal_max = float(diagonal_coord.max())
1408
+ boundaries = np.linspace(
1409
+ diagonal_min, diagonal_max, len(resized_images) + 1
1410
+ )
1411
+
1412
+ composite = np.zeros_like(resized_images[0])
1413
+ region_indices = np.digitize(
1414
+ diagonal_coord, boundaries[1:-1], right=False
1415
+ )
1416
+ for image_index, image in enumerate(resized_images):
1417
+ composite[region_indices == image_index] = image[
1418
+ region_indices == image_index
1419
+ ]
1420
+
1421
+ slash_mask = np.zeros((base_height, base_width), dtype=bool)
1422
+ separator_boundaries = self.get_composite_separator_boundaries(
1423
+ render_pass_names, boundaries
1424
+ )
1425
+ for boundary in separator_boundaries:
1426
+ slash_mask |= (
1427
+ np.abs(diagonal_coord - boundary) <= separator_width_px
1428
+ )
1429
+ composite[slash_mask] = 255
1430
+ return composite
1431
+
1432
+ def save_composite_preview(self, output_path: Path) -> None:
1433
+ composite_images = self.collect_composite_images(output_path)
1434
+ if len(composite_images) < 2:
1435
+ return
1436
+
1437
+ composite_output_path = self.get_composite_output_path(
1438
+ tuple(render_pass_name for render_pass_name, _ in composite_images)
1439
+ )
1440
+ composite_image = self.build_composite_image(
1441
+ [image for _, image in composite_images],
1442
+ [render_pass_name for render_pass_name, _ in composite_images],
1443
+ )
1444
+ if not cv2.imwrite(str(composite_output_path), composite_image):
1445
+ raise RuntimeError(
1446
+ f"Failed to write composite preview: {composite_output_path}"
1447
+ )
1448
+
1449
+ def render_flow_pass(self, output_path: Path) -> None:
1450
+ self.validate_flow_args()
1451
+ camera = self.scene.camera
1452
+ if camera is None:
1453
+ raise ValueError("Scene camera is required for flow rendering.")
1454
+
1455
+ temp_output_path = self.get_flow_depth_temp_path(output_path)
1456
+
1457
+ def finalize_flow_output(depth: np.ndarray) -> None:
1458
+ flow, valid_mask = self.compute_flow_from_depth(
1459
+ depth=depth, camera=camera
1460
+ )
1461
+ self.save_flow_outputs(
1462
+ output_path=output_path,
1463
+ flow=flow,
1464
+ valid_mask=valid_mask,
1465
+ )
1466
+
1467
+ self.render_temp_output_pass(
1468
+ output_path=output_path,
1469
+ temp_output_path=temp_output_path,
1470
+ add_output_node=self.add_flow_depth_output_node,
1471
+ load_temp_output=self.load_flow_depth_temp_output,
1472
+ finalize_output=finalize_flow_output,
1473
+ color_mode="RGB",
1474
+ color_depth="8",
1475
+ enable_depth_pass=True,
1476
+ )
1477
+
1478
+ def render_normal_pass(self, output_path: Path) -> None:
1479
+ normal_output_path = self.get_normal_output_path(output_path)
1480
+ self.render_material_override_pass(
1481
+ preview_output_path=normal_output_path,
1482
+ material_factory=self.create_view_normal_material,
1483
+ color_mode="RGB",
1484
+ )
1485
+
1486
+ def render_mesh_pass(self, output_path: Path) -> None:
1487
+ mesh_output_path = self.get_mesh_output_path(output_path)
1488
+ self.render_material_override_pass(
1489
+ preview_output_path=mesh_output_path,
1490
+ material_factory=self.create_mesh_preview_material,
1491
+ color_mode="RGBA",
1492
+ )
1493
+
1494
+ def render_instance_seg_pass(self, output_path: Path) -> None:
1495
+ original_pass_indices = self.snapshot_object_pass_indices()
1496
+ self.assign_instance_ids()
1497
+ temp_output_path = self.get_instance_seg_temp_path(output_path)
1498
+
1499
+ def finalize_instance_seg_output(instance_seg: np.ndarray) -> None:
1500
+ self.save_instance_seg_outputs(
1501
+ output_path=output_path,
1502
+ instance_seg=instance_seg,
1503
+ )
1504
+
1505
+ try:
1506
+ self.render_temp_output_pass(
1507
+ output_path=output_path,
1508
+ temp_output_path=temp_output_path,
1509
+ add_output_node=self.add_instance_seg_output_node,
1510
+ load_temp_output=self.load_instance_seg_temp_output,
1511
+ finalize_output=finalize_instance_seg_output,
1512
+ color_mode="BW",
1513
+ color_depth="16",
1514
+ enable_object_index_pass=True,
1515
+ )
1516
+ finally:
1517
+ self.restore_object_pass_indices(original_pass_indices)
1518
+
1519
+ def render(self, output_path: Path) -> None:
1520
+ """Run the requested render passes and write final outputs."""
1521
+ self.scene.use_nodes = False
1522
+ auxiliary_outputs: list[tuple[Path, Path]] = []
1523
+ needs_base_render = bool({"rgb", "depth"} & set(self.render_passes))
1524
+
1525
+ if "depth" in self.render_passes:
1526
+ auxiliary_outputs = self.configure_auxiliary_outputs(
1527
+ output_path, self.render_passes, self.depth_mode
1528
+ )
1529
+
1530
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1531
+ if "rgb" in self.render_passes:
1532
+ self.scene.render.filepath = str(output_path)
1533
+
1534
+ if needs_base_render:
1535
+ bpy.ops.render.render(write_still="rgb" in self.render_passes)
1536
+
1537
+ for temp_path, final_path in auxiliary_outputs:
1538
+ if final_path == self.get_depth_vis_output_path(output_path):
1539
+ self.finalize_depth_output(temp_path, final_path)
1540
+ continue
1541
+ raise ValueError(f"Unsupported render output target: {final_path}")
1542
+ if auxiliary_outputs:
1543
+ self.clear_compositor_tree()
1544
+ self.scene.use_nodes = False
1545
+
1546
+ if "normal" in self.render_passes:
1547
+ self.render_normal_pass(output_path)
1548
+ if "mesh" in self.render_passes:
1549
+ self.render_mesh_pass(output_path)
1550
+ if "instance_seg" in self.render_passes:
1551
+ self.render_instance_seg_pass(output_path)
1552
+ if "flow" in self.render_passes:
1553
+ self.render_flow_pass(output_path)
1554
+ self.replicate_duplicate_preview_outputs(output_path)
1555
+ self.save_composite_preview(output_path)
1556
+
1557
+ def run(self) -> None:
1558
+ """Prepare the scene, configure rendering, and execute all passes."""
1559
+ rgb_output_path = self.get_rgb_output_path()
1560
+ self.output_dir.mkdir(parents=True, exist_ok=True)
1561
+ self.clear_scene()
1562
+ self.import_usd()
1563
+ self.validate_glb_args()
1564
+ imported_glb_objects = self.import_glb_asset()
1565
+ self.place_glb_asset(imported_glb_objects)
1566
+ min_corner, max_corner = self.get_scene_bbox()
1567
+ center = (min_corner + max_corner) * 0.5
1568
+ diagonal = (max_corner - min_corner).length
1569
+ self.create_camera()
1570
+ self.ensure_lighting(diagonal, center, max_corner.z)
1571
+ world_created = self.ensure_world()
1572
+ self.add_fill_light(
1573
+ diagonal,
1574
+ center,
1575
+ max_corner.z,
1576
+ energy=self.fill_light_energy,
1577
+ )
1578
+ if world_created:
1579
+ self.add_light_rig(
1580
+ diagonal,
1581
+ center,
1582
+ max_corner.z,
1583
+ area_energy=1500.0,
1584
+ sun_energy=0.35,
1585
+ prefix="Fill",
1586
+ )
1587
+ self.configure_color_management()
1588
+ self.configure_cycles()
1589
+ with tempfile.TemporaryDirectory(
1590
+ prefix="render_usd_", dir=None
1591
+ ) as temp_dir:
1592
+ self.temp_dir = Path(temp_dir)
1593
+ self.render(rgb_output_path)
1594
+ self.temp_dir = None
1595
+
1596
+ logger.info("Rendered outputs to %s", self.output_dir)
1597
+
1598
+
1599
+ def main() -> None:
1600
+ logging.basicConfig(level=logging.INFO)
1601
+ args = _parse_args()
1602
+ RenderUsd.from_args(args).run()
1603
+
1604
+
1605
+ if __name__ == "__main__":
1606
+ main()
embodied_gen/skills/README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EmbodiedGen Skills
2
+
3
+ This directory is the canonical home for EmbodiedGen reusable skills.
4
+
5
+ The root of `embodied_gen/skills` only contains generic skill source.
6
+ Runtime-specific packaging should live in adapter subdirectories such as
7
+ `embodied_gen/skills/claude_adapter/`.
8
+
9
+ ## Included generic skills
10
+
11
+ - `asset-creator`
12
+ - `asset-retrieval`
13
+ - `background-creator`
14
+ - `layout-creator`
15
+ - `sim-runner`
16
+ - `asset-converter`
17
+ - `asset-scale`
18
+ - `room-creator`
19
+ - `spatial-computing`
20
+
21
+ ## Claude plugin package
22
+
23
+ Claude-compatible slash commands and plugin manifest are under:
24
+
25
+ ```text
26
+ embodied_gen/skills/claude_adapter/
27
+ ```
28
+
29
+ The local marketplace manifest is:
30
+
31
+ ```text
32
+ embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json
33
+ ```
34
+
35
+ The actual Claude plugin package is:
36
+
37
+ ```text
38
+ embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/
39
+ ```
40
+
41
+ Current commands include:
42
+
43
+ - `/embodiedgen-skills:assets`
44
+ - `/embodiedgen-skills:background`
45
+ - `/embodiedgen-skills:layout`
46
+ - `/embodiedgen-skills:sim`
47
+ - `/embodiedgen-skills:convert`
48
+ - `/embodiedgen-skills:process`
49
+ - `/embodiedgen-skills:room`
50
+ - `/embodiedgen-skills:spatial`
51
+
52
+ ## Local install for Claude
53
+
54
+ ```bash
55
+ bash install/install_agent_plugin.sh
56
+ ```
57
+
58
+ ## Notes
59
+
60
+ - Generic skills stay in their original directories under `embodied_gen/skills/`.
61
+ - Claude-specific files live only under `embodied_gen/skills/claude_adapter/`.
62
+ - This keeps the skill source portable for Codex, Copilot, and other runtimes.
embodied_gen/skills/asset-converter/SKILL.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: asset-converter
3
+ description: Convert EmbodiedGen URDF assets to simulator-specific formats (USD/MJCF/URDF) using embodied_gen.data.asset_converter APIs. Use this skill whenever users ask to export assets for IsaacSim, MuJoCo, Genesis, IsaacGym, PyBullet, or SAPIEN, batch-convert URDF assets, or choose the correct converter/source_type per simulator.
4
+ ---
5
+
6
+ # Asset Converter
7
+
8
+ Unified entry for simulator-targeted asset conversion using `embodied_gen.data.asset_converter`.
9
+
10
+ ## When To Use
11
+
12
+ Use this skill when users want to:
13
+ - Convert EmbodiedGen assets for IsaacSim (`USD`) or MuJoCo/Genesis (`MJCF`).
14
+ - Batch-convert multiple URDF assets into simulator-ready outputs.
15
+ - Map simulator names to the correct target format and conversion strategy.
16
+ - Decide when conversion is unnecessary (URDF can be used directly).
17
+
18
+ ## Routing Rules (Core)
19
+
20
+ 1. **IsaacSim** -> convert to `USD`.
21
+ 2. **MuJoCo / Genesis** -> convert to `MJCF` (`.xml`).
22
+ 3. **SAPIEN / IsaacGym / PyBullet** -> use EmbodiedGen `.urdf` directly (no conversion required).
23
+
24
+ ## Pre-checks
25
+
26
+ 1. Run from repository root with `embodiedgen` environment active.
27
+ 2. Confirm input URDF path(s) exist.
28
+ 3. For USD conversion, ensure IsaacLab/IsaacSim conversion dependencies are available.
29
+ 4. Prefer list inputs for `urdf_files` and `target_dirs` (same length, aligned by index).
30
+
31
+ ## Standard Python API Template
32
+
33
+ ```python
34
+ from embodied_gen.data.asset_converter import cvt_embodiedgen_asset_to_anysim
35
+ from embodied_gen.utils.enum import AssetType, SimAssetMapper
36
+
37
+ simulator_name = "mujoco" # isaacsim / mujoco / genesis / sapien / isaacgym / pybullet
38
+
39
+ asset_paths = cvt_embodiedgen_asset_to_anysim(
40
+ urdf_files=[
41
+ "outputs/demo_assets/remote_control/result/remote_control.urdf",
42
+ ],
43
+ target_dirs=[
44
+ "outputs/demo_assets/remote_control/mjcf",
45
+ ],
46
+ target_type=SimAssetMapper[simulator_name],
47
+ source_type=AssetType.URDF,
48
+ overwrite=True,
49
+ )
50
+ print(asset_paths)
51
+ ```
52
+
53
+ ## Source Type Guidance
54
+
55
+ - For `MJCF` target: prefer `source_type=AssetType.URDF`.
56
+ - For `USD` target: use `source_type=AssetType.MESH` by default; `AssetType.URDF` path is also supported when needed.
57
+ - For direct-URDF simulators (`sapien`, `isaacgym`, `pybullet`): skip conversion.
58
+
59
+ ## Direct Converter Template (Advanced)
60
+
61
+ ```python
62
+ from embodied_gen.data.asset_converter import AssetConverterFactory
63
+ from embodied_gen.utils.enum import AssetType
64
+
65
+ converter = AssetConverterFactory.create(
66
+ target_type=AssetType.USD,
67
+ source_type=AssetType.MESH,
68
+ )
69
+
70
+ with converter:
71
+ converter.convert(
72
+ "outputs/demo_assets/remote_control/result/remote_control.urdf",
73
+ "outputs/demo_assets/remote_control/usd/remote_control.usd",
74
+ )
75
+ ```
76
+
77
+ ## Output Conventions
78
+
79
+ - `MJCF`: `<target_dir>/<asset_name>.xml`
80
+ - `USD`: `<target_dir>/<asset_name>.usd`
81
+ - API return: `{<input_urdf_path>: <converted_output_path>}`
82
+
83
+ ## Failure Handling and Retry
84
+
85
+ 1. Unsupported conversion pair: verify `target_type` + `source_type` mapping.
86
+ 2. Missing dependencies (USD path): install/activate IsaacLab + required USD stack.
87
+ 3. Missing output file: verify parent output directory permissions and path correctness.
88
+ 4. Batch mismatch: ensure `len(urdf_files) == len(target_dirs)`.
embodied_gen/skills/asset-creator/SKILL.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: asset-creator
3
+ description: Create 3D assets with EmbodiedGen using img3d-cli, text3d-cli, and texture-cli. Use this skill whenever users ask to generate assets from images/text, texture existing meshes, run retry/seed controlled generation, or choose the proper asset-generation CLI from mixed requirements.
4
+ ---
5
+
6
+ # Assets Creator
7
+
8
+ Unified entry for three EmbodiedGen asset-generation CLIs: `img3d-cli`, `text3d-cli`, and `texture-cli`.
9
+
10
+ ## When To Use
11
+
12
+ Use this skill when users want to:
13
+ - Generate a 3D asset from one or more input images.
14
+ - Generate 3D assets in batch from text prompts.
15
+ - Generate or edit textures for existing meshes.
16
+ - Get help choosing the correct CLI from mixed asset-generation requirements.
17
+
18
+ ## Routing Rules (Core)
19
+
20
+ 1. `img3d-cli`: input is image paths (`--image_path` or `--image_root`).
21
+ 2. `text3d-cli`: input is text prompts (`--prompts`) and target is direct asset output.
22
+ 3. `texture-cli`: input is existing mesh path(s) (`--mesh_path`) plus texture prompt(s) (`--prompt`).
23
+
24
+ ## Pre-checks
25
+
26
+ 1. Run commands from the repository root.
27
+ 2. Confirm the active environment is `embodiedgen`.
28
+ 3. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
29
+
30
+ ## Standard Command Templates
31
+
32
+ ### 1) Image to 3D: `img3d-cli`
33
+
34
+ ```bash
35
+ img3d-cli --image_path .../sample.jpg --n_retry 1 --output_root outputs/imageto3d
36
+ ```
37
+
38
+ Common parameters:
39
+ - `--image_path` / `--image_root`
40
+ - `--output_root`
41
+ - `--n_retry`
42
+ - `--seed`
43
+ - `--skip_exists`
44
+
45
+ ---
46
+
47
+ ### 2) Text to 3D: `text3d-cli`
48
+
49
+ ```bash
50
+ text3d-cli \
51
+ --prompts "small bronze figurine of a lion" "A globe with wooden base" \
52
+ --n_image_retry 1 --n_asset_retry 1 --n_pipe_retry 1 \
53
+ --seed_img 0 \
54
+ --output_root outputs/textto3d
55
+ ```
56
+
57
+ Common parameters:
58
+ - `--prompts`
59
+ - `--output_root`
60
+ - `--asset_names`
61
+ - `--n_image_retry --n_asset_retry --n_pipe_retry`
62
+ - `--seed_img --seed_3d`
63
+
64
+ ---
65
+
66
+ ### 3) Mesh Texture Generation: `texture-cli`
67
+
68
+ ```bash
69
+ texture-cli \
70
+ --mesh_path ".../horse.obj" \
71
+ --prompt "A gray horse head with flying mane and brown eyes" \
72
+ --output_root "outputs/texture_gen" \
73
+ --seed 0
74
+ ```
75
+
76
+ Common parameters:
77
+ - `--mesh_path` (supports multiple inputs)
78
+ - `--prompt` (must align 1:1 with mesh inputs)
79
+ - `--output_root`
80
+ - `--seed`
81
+ - `--texture_size`
82
+ - `--ip_adapt_scale --ip_img_path` (optional reference-image control)
83
+
84
+ ---
85
+
86
+ ## Output Conventions
87
+
88
+ - `img3d-cli`: each sample is typically under `<output_root>/<sample>/result/`.
89
+ - `text3d-cli`: `<output_root>/asset3d/<asset_name>/result/`.
90
+ - `texture-cli`: `<output_root>/<mesh_stem>/texture_mesh/`.
91
+
92
+ ## Failure Handling and Retry
93
+
94
+ 1. OOM or GPU pressure: reduce batch size and concurrency.
95
+ 2. Unstable quality: increase `--n_retry` or `--n_*_retry`.
96
+ 3. Missing outputs: verify output-root permissions and path spelling; prefer absolute paths.
embodied_gen/skills/asset-retrieval/SKILL.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: asset-retrieval
3
+ description: Retrieve existing EmbodiedGen assets from a configurable dataset index CSV by natural-language descriptions and return matching `.urdf` paths. Use when users describe an asset they want, ask to find one or several existing URDF assets, or need a fast lookup over the local asset index instead of scanning folders manually.
4
+ ---
5
+
6
+ # Asset Retrieval
7
+
8
+ Look up simulation-ready assets from `dataset_index.csv` and return `.urdf`
9
+ paths. The CSV index is the single source of truth.
10
+
11
+ ## Workflow
12
+
13
+ **Preferred — agent reads CSV directly:**
14
+
15
+ 1. Read `dataset_index.csv` into context.
16
+ 2. Semantically match the user's description (open-ended, fuzzy, or in any
17
+ language) against `category`, `secondary_category`, `primary_category`,
18
+ and `description` columns.
19
+ 3. Return the best-matching absolute `.urdf` path; return multiple candidates
20
+ when the request is broad or explicitly asks for several.
21
+ 4. Briefly explain why the returned asset matches.
22
+
23
+ This path handles open-ended queries like "a tall chair suitable for a
24
+ coffee shop" or "能放在客厅角落的落地灯" that pure keyword matching cannot
25
+ resolve.
26
+
27
+ **Fallback — CLI script (no network / no LLM):**
28
+
29
+ When the agent is unavailable, use the helper script which performs offline
30
+ keyword-based ranking:
31
+
32
+ ```bash
33
+ python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
34
+ "modern dining chair curved backrest"
35
+ ```
36
+
37
+ For the CLI path, rewrite open-ended or Chinese descriptions into compact
38
+ English keywords first (e.g. `能放在客厅角落的落地灯` → `floor lamp`).
39
+
40
+ ## Index Resolution
41
+
42
+ Checked in order — first match wins:
43
+
44
+ 1. `--index-file` CLI argument
45
+ 2. `$EMBODIEDGEN_DATASET_INDEX` environment variable
46
+ 3. `$EMBODIEDGEN_DATASET_ROOT/dataset_index.csv`
47
+ 4. `<repo-root>/outputs/EmbodiedGenData/dataset/dataset_index.csv`
48
+
49
+ Dataset root follows a parallel order (`--dataset-root` →
50
+ `$EMBODIEDGEN_DATASET_ROOT` → repo default).
51
+
52
+ ### Required CSV Columns
53
+
54
+ `uuid`, `primary_category`, `secondary_category`, `category`, `description`,
55
+ `generate_time`, `urdf_path`
56
+
57
+ ## Query Guidelines
58
+
59
+ - Use explicit object words: `chair`, `bar stool`, `remote control`.
60
+ - Keep discriminating modifiers: `wooden`, `orange`, `modern`, `round`.
61
+ - Open-ended or Chinese descriptions are fine for the agent path; rewrite
62
+ to English keywords only when using the CLI script.
63
+
64
+ ## Script Usage
65
+
66
+ ```bash
67
+ # Single best match (absolute path on stdout)
68
+ python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
69
+ "modern dining chair curved backrest"
70
+
71
+ # Multiple candidates with scores
72
+ python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
73
+ "orange cushioned bar stool" \
74
+ --top-k 5 --format json
75
+
76
+ # Custom dataset location
77
+ python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
78
+ "black remote control" \
79
+ --dataset-root /path/to/dataset \
80
+ --index-file /path/to/dataset/dataset_index.csv
81
+
82
+ # Relative paths instead of absolute
83
+ python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
84
+ "wooden bar stool" --relative-paths
85
+ ```
86
+
87
+ Exit code 1 with `"No matching assets found."` on stderr when nothing matches.
embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Retrieve EmbodiedGen asset URDF paths from a CSV index."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import csv
8
+ import json
9
+ import os
10
+ import re
11
+ import unicodedata
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ STOP_WORDS = {
16
+ "a",
17
+ "an",
18
+ "and",
19
+ "asset",
20
+ "for",
21
+ "in",
22
+ "of",
23
+ "on",
24
+ "or",
25
+ "that",
26
+ "the",
27
+ "to",
28
+ "with",
29
+ }
30
+ TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class AssetRecord:
35
+ """Single asset entry from the CSV index."""
36
+
37
+ uuid: str
38
+ primary_category: str
39
+ secondary_category: str
40
+ category: str
41
+ description: str
42
+ generate_time: str
43
+ relative_urdf_path: str
44
+ absolute_urdf_path: str
45
+ search_text: str
46
+ primary_tokens: frozenset[str]
47
+ secondary_tokens: frozenset[str]
48
+ category_tokens: frozenset[str]
49
+ description_tokens: frozenset[str]
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class SearchResult:
54
+ """Ranked retrieval result."""
55
+
56
+ score: float
57
+ coverage: float
58
+ record: AssetRecord
59
+
60
+ def to_dict(self, use_relative_paths: bool) -> dict[str, object]:
61
+ """Convert the result to JSON-friendly output."""
62
+ urdf_path = (
63
+ self.record.relative_urdf_path
64
+ if use_relative_paths
65
+ else self.record.absolute_urdf_path
66
+ )
67
+ return {
68
+ "urdf_path": urdf_path,
69
+ "score": round(self.score, 3),
70
+ "coverage": round(self.coverage, 3),
71
+ "uuid": self.record.uuid,
72
+ "primary_category": self.record.primary_category,
73
+ "secondary_category": self.record.secondary_category,
74
+ "category": self.record.category,
75
+ "description": self.record.description,
76
+ "generate_time": self.record.generate_time,
77
+ }
78
+
79
+
80
+ def _repo_root() -> Path:
81
+ return Path(__file__).resolve().parents[4]
82
+
83
+
84
+ def _default_dataset_root() -> Path:
85
+ configured_root = os.getenv("EMBODIEDGEN_DATASET_ROOT")
86
+ if configured_root:
87
+ return Path(configured_root).expanduser().resolve()
88
+ return _repo_root() / "outputs" / "EmbodiedGenData" / "dataset"
89
+
90
+
91
+ def _default_index_file(dataset_root: Path) -> Path:
92
+ configured_index = os.getenv("EMBODIEDGEN_DATASET_INDEX")
93
+ if configured_index:
94
+ return Path(configured_index).expanduser().resolve()
95
+ return dataset_root / "dataset_index.csv"
96
+
97
+
98
+ def _normalize_text(text: str) -> str:
99
+ text = unicodedata.normalize("NFKC", text or "").lower()
100
+ text = text.replace("_", " ").replace("-", " ").replace("&", " and ")
101
+ text = re.sub(r"[^0-9a-z\s]", " ", text)
102
+ return re.sub(r"\s+", " ", text).strip()
103
+
104
+
105
+ def _normalize_token(token: str) -> str:
106
+ if token.endswith("ies") and len(token) > 4:
107
+ return token[:-3] + "y"
108
+ if (
109
+ token.endswith("s")
110
+ and len(token) > 3
111
+ and not token.endswith(("ss", "us"))
112
+ ):
113
+ return token[:-1]
114
+ return token
115
+
116
+
117
+ def _tokenize(text: str) -> list[str]:
118
+ tokens: list[str] = []
119
+ for raw_token in TOKEN_PATTERN.findall(_normalize_text(text)):
120
+ token = _normalize_token(raw_token)
121
+ if len(token) < 2 or token in STOP_WORDS:
122
+ continue
123
+ tokens.append(token)
124
+ return tokens
125
+
126
+
127
+ def _dedupe_tokens(tokens: list[str]) -> list[str]:
128
+ deduped: list[str] = []
129
+ seen: set[str] = set()
130
+ for token in tokens:
131
+ if token in seen:
132
+ continue
133
+ deduped.append(token)
134
+ seen.add(token)
135
+ return deduped
136
+
137
+
138
+ def load_records(index_file: Path, dataset_root: Path) -> list[AssetRecord]:
139
+ """Load asset records from dataset_index.csv."""
140
+ records: list[AssetRecord] = []
141
+ with index_file.open(newline="", encoding="utf-8") as csv_file:
142
+ reader = csv.DictReader(csv_file)
143
+ for row in reader:
144
+ relative_urdf_path = (row.get("urdf_path") or "").strip()
145
+ absolute_urdf_path = str(
146
+ (dataset_root / relative_urdf_path).resolve()
147
+ )
148
+
149
+ primary_category = row.get("primary_category", "")
150
+ secondary_category = row.get("secondary_category", "")
151
+ category = row.get("category", "")
152
+ description = row.get("description", "")
153
+
154
+ records.append(
155
+ AssetRecord(
156
+ uuid=row.get("uuid", ""),
157
+ primary_category=primary_category,
158
+ secondary_category=secondary_category,
159
+ category=category,
160
+ description=description,
161
+ generate_time=row.get("generate_time", ""),
162
+ relative_urdf_path=relative_urdf_path,
163
+ absolute_urdf_path=absolute_urdf_path,
164
+ search_text=" ".join(
165
+ part
166
+ for part in (
167
+ _normalize_text(primary_category),
168
+ _normalize_text(secondary_category),
169
+ _normalize_text(category),
170
+ _normalize_text(description),
171
+ )
172
+ if part
173
+ ),
174
+ primary_tokens=frozenset(_tokenize(primary_category)),
175
+ secondary_tokens=frozenset(_tokenize(secondary_category)),
176
+ category_tokens=frozenset(_tokenize(category)),
177
+ description_tokens=frozenset(_tokenize(description)),
178
+ )
179
+ )
180
+ return records
181
+
182
+
183
+ def _score_record(
184
+ record: AssetRecord,
185
+ query_text: str,
186
+ query_tokens: list[str],
187
+ ) -> SearchResult | None:
188
+ matched_tokens = 0
189
+ score = 0.0
190
+
191
+ for token in query_tokens:
192
+ token_score = 0.0
193
+ if token in record.category_tokens:
194
+ token_score = max(token_score, 8.0)
195
+ if token in record.secondary_tokens:
196
+ token_score = max(token_score, 5.0)
197
+ if token in record.primary_tokens:
198
+ token_score = max(token_score, 3.0)
199
+ if token in record.description_tokens:
200
+ token_score = max(token_score, 2.0)
201
+
202
+ if token_score > 0:
203
+ matched_tokens += 1
204
+ score += token_score
205
+
206
+ if query_text and query_text in record.search_text:
207
+ score += 8.0
208
+
209
+ if matched_tokens == 0:
210
+ return None
211
+
212
+ coverage = matched_tokens / len(query_tokens)
213
+ score += 4.0 * coverage
214
+ return SearchResult(score=score, coverage=coverage, record=record)
215
+
216
+
217
+ def search_assets(
218
+ records: list[AssetRecord],
219
+ query: str,
220
+ top_k: int,
221
+ ) -> list[SearchResult]:
222
+ """Return top-k lexical matches for a query."""
223
+ query_text = _normalize_text(query)
224
+ query_tokens = _dedupe_tokens(_tokenize(query))
225
+ if not query_text or not query_tokens:
226
+ raise ValueError(
227
+ "Query must contain searchable keywords after normalization."
228
+ )
229
+
230
+ ranked: list[SearchResult] = []
231
+ for record in records:
232
+ result = _score_record(record, query_text, query_tokens)
233
+ if result is not None:
234
+ ranked.append(result)
235
+
236
+ ranked.sort(
237
+ key=lambda result: (
238
+ -result.score,
239
+ -result.coverage,
240
+ -int(result.record.generate_time or 0),
241
+ result.record.absolute_urdf_path,
242
+ ),
243
+ )
244
+ return ranked[:top_k]
245
+
246
+
247
+ def _parse_args() -> argparse.Namespace:
248
+ parser = argparse.ArgumentParser(
249
+ description="Retrieve EmbodiedGen asset URDF paths from dataset_index.csv."
250
+ )
251
+ parser.add_argument("query", help="Natural-language asset query.")
252
+ parser.add_argument(
253
+ "--dataset-root",
254
+ default=str(_default_dataset_root()),
255
+ help=(
256
+ "Dataset root. "
257
+ "Default: $EMBODIEDGEN_DATASET_ROOT or repo-relative dataset path."
258
+ ),
259
+ )
260
+ parser.add_argument(
261
+ "--index-file",
262
+ default=None,
263
+ help=(
264
+ "CSV index path. "
265
+ "Default: $EMBODIEDGEN_DATASET_INDEX or <dataset-root>/dataset_index.csv."
266
+ ),
267
+ )
268
+ parser.add_argument(
269
+ "--top-k",
270
+ type=int,
271
+ default=1,
272
+ help="Number of matches to return.",
273
+ )
274
+ parser.add_argument(
275
+ "--format",
276
+ choices=("paths", "json"),
277
+ default="paths",
278
+ help="Output format.",
279
+ )
280
+ parser.add_argument(
281
+ "--relative-paths",
282
+ action="store_true",
283
+ help="Return dataset-relative URDF paths instead of absolute paths.",
284
+ )
285
+ return parser.parse_args()
286
+
287
+
288
+ def main() -> int:
289
+ args = _parse_args()
290
+ if args.top_k < 1:
291
+ raise ValueError("--top-k must be >= 1")
292
+
293
+ dataset_root = Path(args.dataset_root).expanduser().resolve()
294
+ index_file = (
295
+ Path(args.index_file).expanduser().resolve()
296
+ if args.index_file
297
+ else _default_index_file(dataset_root)
298
+ )
299
+ if not index_file.exists():
300
+ raise FileNotFoundError(f"Dataset index not found: {index_file}")
301
+
302
+ records = load_records(index_file=index_file, dataset_root=dataset_root)
303
+ results = search_assets(
304
+ records=records, query=args.query, top_k=args.top_k
305
+ )
306
+ if not results:
307
+ raise SystemExit("No matching assets found.")
308
+
309
+ if args.format == "json":
310
+ payload = [
311
+ result.to_dict(use_relative_paths=args.relative_paths)
312
+ for result in results
313
+ ]
314
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
315
+ return 0
316
+
317
+ for result in results:
318
+ urdf_path = (
319
+ result.record.relative_urdf_path
320
+ if args.relative_paths
321
+ else result.record.absolute_urdf_path
322
+ )
323
+ print(urdf_path)
324
+
325
+ return 0
326
+
327
+
328
+ if __name__ == "__main__":
329
+ raise SystemExit(main())
embodied_gen/skills/asset-scale/SKILL.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: asset-scale
3
+ description: Scale 3D assets including meshes (OBJ, GLB, collision), Gaussian splats, and URDF metadata. Maintains directory structure and processes all related files atomically. Use when resizing assets for simulation compatibility, real-world scale adjustment, or batch asset processing.
4
+ ---
5
+
6
+ # Asset Scaling
7
+
8
+ ## Overview
9
+
10
+ Scale 3D assets from URDF files, automatically processing all related mesh formats (OBJ, GLB, collision meshes) and Gaussian splatting models. The tool maintains the original directory structure and updates URDF metadata (min_height, max_height, real_height) consistently.
11
+
12
+ **When to use**: Use this skill when you need to resize 3D assets for different environments, adjust real-world scale, or prepare assets for simulation with specific size requirements.
13
+
14
+ ---
15
+
16
+ ## Core Convention: Input/Output Structure
17
+
18
+ The skill expects URDF files at `<asset_dir>/result/<asset_name>.urdf` and outputs to `<output_dir>/<asset_dir_name>/` maintaining the same structure.
19
+
20
+ | Mode | Input URDF Path | Output Structure |
21
+ |------|-----------------|------------------|
22
+ | **Normal** | `path/to/asset/result/asset.urdf` | `<output_dir>/asset/result/asset.urdf` |
23
+ | **Inplace** | `path/to/asset/result/asset.urdf` | `path/to/asset/result/asset.urdf` (modified) |
24
+
25
+ **Key behaviors**:
26
+ - **Normal mode**: Entire asset directory is copied to output location before scaling
27
+ - **Inplace mode**: Files are modified directly without copying
28
+ - All mesh files (OBJ, GLB, collision, Gaussian splat) are scaled consistently
29
+ - URDF metadata heights are scaled to match new dimensions
30
+
31
+ ---
32
+
33
+ ## CLI Examples
34
+
35
+ ### Example 1: Scale Asset to Output Directory
36
+
37
+ Scale a red box asset to 80% of its original size and save to a new location.
38
+
39
+ ```bash
40
+ python -m embodied_gen.skills.asset-scale.asset_scale \
41
+ --urdf-path outputs/assets/red_box/result/red_box.urdf \
42
+ --scale-factor 0.8 \
43
+ --output-dir outputs/scaled
44
+ ```
45
+
46
+ **Output**: `outputs/scaled/red_box/result/red_box.urdf`
47
+
48
+ ---
49
+
50
+ ### Example 2: Scale Asset Inplace
51
+
52
+ Modify the asset files directly without creating a copy (useful for batch processing).
53
+
54
+ ```bash
55
+ python -m embodied_gen.skills.asset-scale.asset_scale \
56
+ --urdf-path outputs/assets/red_box/result/red_box.urdf \
57
+ --scale-factor 0.8 \
58
+ --inplace
59
+ ```
60
+
61
+ **Output**: `outputs/assets/red_box/result/red_box.urdf` (modified inplace)
62
+
63
+ **Warning**: Inplace mode modifies the original files. Make sure to backup important assets before using this mode.
64
+
65
+ ---
66
+
67
+ ## What Gets Scaled
68
+
69
+ | File Type | Scaling Method | Location |
70
+ |-----------|---------------|----------|
71
+ | **OBJ Mesh** | Vertex coordinates multiplied by scale factor | `mesh/<name>.obj` |
72
+ | **GLB Mesh** | All geometry vertices scaled | `mesh/<name>.glb` |
73
+ | **Collision Mesh** | Multi-object OBJ parsed and scaled | `mesh/<name>_collision.obj` |
74
+ | **Gaussian Splat** | Position and scale parameters updated | `mesh/<name>_gs.ply` |
75
+ | **URDF Metadata** | `min_height`, `max_height`, `real_height` scaled | `<name>.urdf` |
76
+
77
+ **Note**: Material files (textures, MTL) are copied unchanged in normal mode.
78
+
79
+ ---
80
+
81
+ ## Common Parameters
82
+
83
+ | Parameter | Type | Default | Meaning |
84
+ |-----------|------|---------|---------|
85
+ | `urdf_path` | `str \| Path` | Required | Path to input URDF file |
86
+ | `scale_factor` | `float` | Required | Scaling multiplier (must be positive). E.g., 0.8 = 80%, 1.5 = 150% |
87
+ | `inplace` | `bool` | `False` | If True, modify files inplace. `output_dir` is not required |
88
+ | `output_dir` | `str \| Path \| None` | `None` | Root output directory. Required when `inplace=False`, ignored when `inplace=True` |
89
+
90
+ ---
91
+
92
+ ## Next Steps
93
+
94
+ - For complete API details and internal methods, see the source code at `embodied_gen/skills/asset-scale/asset_scale.py`
embodied_gen/skills/asset-scale/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ """Asset scaling skills for EmbodiedGen."""
18
+
19
+ from importlib import import_module
20
+
21
+ __all__ = [
22
+ "AssetScaler",
23
+ "AssetScaleConfig",
24
+ "scale_asset",
25
+ "entrypoint",
26
+ ]
27
+
28
+
29
+ def __getattr__(name: str) -> object:
30
+ """Lazily expose asset scaling APIs without pre-import side effects."""
31
+ if name not in __all__:
32
+ msg = f"module {__name__!r} has no attribute {name!r}"
33
+ raise AttributeError(msg)
34
+
35
+ module = import_module(f"{__name__}.asset_scale")
36
+ return getattr(module, name)
embodied_gen/skills/asset-scale/asset_scale.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ """Asset scaling utility for resizing 3D assets and URDF files."""
18
+
19
+ import shutil
20
+ import xml.etree.ElementTree as ET
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ from dataclasses import dataclass
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+ import trimesh
27
+ import tyro
28
+ from embodied_gen.models.gs_model import GaussianOperator
29
+ from embodied_gen.utils.log import logger
30
+
31
+ __all__ = ["AssetScaler", "AssetScaleConfig", "scale_asset", "entrypoint"]
32
+
33
+ # URDF metadata height fields (shared with urdf_convertor.py)
34
+ URDF_HEIGHT_FIELDS = ("min_height", "max_height", "real_height")
35
+
36
+ # Asset directory structure conventions
37
+ URDF_RESULT_DIR = "result"
38
+ MESH_DIR = "mesh"
39
+
40
+
41
+ @dataclass
42
+ class AssetScaleConfig:
43
+ """Configuration for asset scaling.
44
+
45
+ Args:
46
+ urdf_path: Path to the URDF file to scale.
47
+ scale_factor: Scaling factor (e.g., 0.8 for 80% size).
48
+ inplace: If True, modify files in-place. output_dir is not required.
49
+ output_dir: Root output directory for scaled assets (not needed if inplace=True).
50
+ """
51
+
52
+ urdf_path: str
53
+ scale_factor: float
54
+ inplace: bool = False
55
+ output_dir: Optional[str] = None
56
+
57
+
58
+ class AssetScaler:
59
+ """Scale 3D assets including meshes, Gaussian splats, and URDF metadata.
60
+
61
+ This class handles the complete scaling workflow for embodied assets,
62
+ processing OBJ, GLB, collision meshes, Gaussian splatting models, and
63
+ URDF metadata files.
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ urdf_path: str | Path,
69
+ scale_factor: float,
70
+ output_dir: Optional[str | Path] = None,
71
+ inplace: bool = False,
72
+ ) -> None:
73
+ """Initialize the asset scaler.
74
+
75
+ Args:
76
+ urdf_path: Path to the URDF file to scale.
77
+ scale_factor: Scaling factor (e.g., 0.8 for 80% size).
78
+ output_dir: Root output directory for scaled assets (not needed if inplace=True).
79
+ inplace: If True, modify files in-place instead of copying to output_dir.
80
+
81
+ Raises:
82
+ FileNotFoundError: If URDF file does not exist.
83
+ ValueError: If scale_factor is not positive, or if neither output_dir nor inplace is specified.
84
+ """
85
+ self.urdf_path = Path(urdf_path)
86
+ self.scale_factor = scale_factor
87
+ self.inplace = inplace
88
+
89
+ if not self.urdf_path.exists():
90
+ raise FileNotFoundError(f"URDF file not found: {self.urdf_path}")
91
+ if self.scale_factor <= 0:
92
+ raise ValueError(
93
+ f"Scale factor must be positive, got: {self.scale_factor}"
94
+ )
95
+
96
+ # Derive asset directory structure from URDF path
97
+ # URDF is at: <asset_dir>/result/<node_name>.urdf
98
+ self.asset_dir = self.urdf_path.parent.parent
99
+ self.node_name = self.urdf_path.stem
100
+
101
+ # Handle inplace mode
102
+ if self.inplace:
103
+ self.output_dir = self.asset_dir.parent
104
+ logger.info(
105
+ f"Running in inplace mode, will modify {self.asset_dir} directly"
106
+ )
107
+ else:
108
+ if output_dir is None:
109
+ raise ValueError("output_dir is required when inplace=False")
110
+ self.output_dir = Path(output_dir)
111
+
112
+ def scale(self) -> Path:
113
+ """Execute the complete scaling workflow.
114
+
115
+ Returns:
116
+ Path to the output URDF file.
117
+
118
+ Raises:
119
+ FileNotFoundError: If required mesh files are missing.
120
+ """
121
+ if self.inplace:
122
+ # Inplace mode: scale directly in asset_dir
123
+ output_urdf_path = self.urdf_path
124
+ self._scale_mesh_files_parallel(self.asset_dir)
125
+ self._scale_urdf_metadata(output_urdf_path)
126
+ logger.info(
127
+ f"Scaled {self.asset_dir} by x{self.scale_factor} (inplace)"
128
+ )
129
+ else:
130
+ # Normal mode: copy to output_dir and scale
131
+ relative_asset_dir = self.asset_dir.name
132
+ output_asset_dir = self.output_dir / relative_asset_dir
133
+ output_asset_dir.mkdir(parents=True, exist_ok=True)
134
+
135
+ # Copy entire asset directory structure first
136
+ output_urdf_path = self._copy_asset_structure(output_asset_dir)
137
+
138
+ # Scale all mesh files in parallel
139
+ self._scale_mesh_files_parallel(output_asset_dir)
140
+
141
+ # Scale URDF metadata
142
+ self._scale_urdf_metadata(output_urdf_path)
143
+
144
+ logger.info(
145
+ f"Scaled {self.asset_dir} by x{self.scale_factor} -> {output_asset_dir}"
146
+ )
147
+
148
+ return output_urdf_path
149
+
150
+ def _copy_asset_structure(self, output_asset_dir: Path) -> Path:
151
+ """Copy asset directory structure to output location.
152
+
153
+ Args:
154
+ output_asset_dir: Target directory for copied assets.
155
+
156
+ Returns:
157
+ Path to the copied URDF file.
158
+ """
159
+ # Use ignore_errors=True to avoid TOCTOU race condition
160
+ shutil.rmtree(output_asset_dir, ignore_errors=True)
161
+ shutil.copytree(self.asset_dir, output_asset_dir)
162
+
163
+ output_urdf_path = (
164
+ output_asset_dir / URDF_RESULT_DIR / f"{self.node_name}.urdf"
165
+ )
166
+ return output_urdf_path
167
+
168
+ def _scale_mesh_files_parallel(self, output_asset_dir: Path) -> None:
169
+ """Scale all mesh files in parallel for efficiency.
170
+
171
+ Args:
172
+ output_asset_dir: Directory containing assets to scale.
173
+ """
174
+ mesh_dir = output_asset_dir / URDF_RESULT_DIR / MESH_DIR
175
+
176
+ # Define mesh scaling tasks
177
+ tasks = [
178
+ (mesh_dir / f"{self.node_name}.obj", self._scale_obj_mesh),
179
+ (mesh_dir / f"{self.node_name}.glb", self._scale_glb_mesh),
180
+ (
181
+ mesh_dir / f"{self.node_name}_collision.obj",
182
+ self._scale_collision_mesh,
183
+ ),
184
+ (
185
+ mesh_dir / f"{self.node_name}_gs.ply",
186
+ self._scale_gaussian_splat,
187
+ ),
188
+ ]
189
+
190
+ # Process files in parallel
191
+ with ThreadPoolExecutor(max_workers=4) as executor:
192
+ futures = [executor.submit(task, path) for path, task in tasks]
193
+ for future in futures:
194
+ future.result() # Propagate any exceptions
195
+
196
+ def _scale_obj_mesh(self, mesh_path: Path) -> None:
197
+ """Scale OBJ mesh file."""
198
+ if not mesh_path.exists():
199
+ return
200
+
201
+ mesh = trimesh.load(str(mesh_path))
202
+ mesh.apply_scale(self.scale_factor)
203
+ mesh.export(str(mesh_path))
204
+
205
+ def _scale_glb_mesh(self, mesh_path: Path) -> None:
206
+ """Scale GLB mesh file."""
207
+ if not mesh_path.exists():
208
+ return
209
+
210
+ mesh = trimesh.load(str(mesh_path))
211
+ for mesh_part in mesh.geometry.values():
212
+ mesh_part.apply_scale(self.scale_factor)
213
+ mesh.export(str(mesh_path))
214
+
215
+ def _scale_collision_mesh(self, mesh_path: Path) -> None:
216
+ """Scale collision mesh file."""
217
+ if not mesh_path.exists():
218
+ return
219
+
220
+ meshes = self._load_collision_obj(str(mesh_path))
221
+ scene = trimesh.Scene()
222
+ for mesh_part in meshes:
223
+ mesh_part.apply_scale(self.scale_factor)
224
+ scene.add_geometry(mesh_part)
225
+ scene.export(str(mesh_path))
226
+
227
+ def _scale_gaussian_splat(self, mesh_path: Path) -> None:
228
+ """Scale Gaussian splatting model."""
229
+ if not mesh_path.exists():
230
+ return
231
+
232
+ gs_model: GaussianOperator = GaussianOperator.load_from_ply(
233
+ str(mesh_path)
234
+ )
235
+ gs_model.rescale(self.scale_factor)
236
+ gs_model.save_to_ply(str(mesh_path))
237
+
238
+ def _scale_urdf_metadata(self, urdf_path: Path) -> None:
239
+ """Scale height metadata in URDF file.
240
+
241
+ Args:
242
+ urdf_path: Path to URDF file to modify.
243
+ """
244
+ tree = ET.parse(str(urdf_path))
245
+ root = tree.getroot()
246
+
247
+ extra_info = root.find("link/extra_info")
248
+ if extra_info is None:
249
+ logger.warning(f"No extra_info found in URDF: {urdf_path}")
250
+ return
251
+
252
+ for height_field in URDF_HEIGHT_FIELDS:
253
+ element = extra_info.find(height_field)
254
+ if element is not None and element.text:
255
+ scaled_value = float(element.text) * self.scale_factor
256
+ element.text = f"{scaled_value:.3f}"
257
+
258
+ tree.write(str(urdf_path), encoding="utf-8", xml_declaration=True)
259
+
260
+ @staticmethod
261
+ def _load_collision_obj(filepath: str) -> list[trimesh.Trimesh]:
262
+ """Robustly load collision OBJ with multiple objects.
263
+
264
+ Handles OBJ files with multiple objects/groups by parsing manually
265
+ to avoid issues with trimesh's default loader.
266
+
267
+ Args:
268
+ filepath: Path to collision OBJ file.
269
+
270
+ Returns:
271
+ List of trimesh objects, one per object group in the file.
272
+ """
273
+ vertices = []
274
+ meshes = []
275
+ current_faces = []
276
+
277
+ # Use lazy iteration instead of readlines() for memory efficiency
278
+ with open(filepath, "r") as f:
279
+ for line in f:
280
+ if line.startswith("v "):
281
+ parts = line.split()
282
+ vertices.append(
283
+ [float(parts[1]), float(parts[2]), float(parts[3])]
284
+ )
285
+ elif line.startswith("f "):
286
+ parts = line.split()
287
+ face = [int(p.split("/")[0]) - 1 for p in parts[1:]]
288
+ current_faces.append(face)
289
+ elif line.startswith("o ") or line.startswith("g "):
290
+ if current_faces and vertices:
291
+ m = trimesh.Trimesh(
292
+ vertices=vertices,
293
+ faces=current_faces,
294
+ process=False,
295
+ )
296
+ m.remove_unreferenced_vertices()
297
+ meshes.append(m)
298
+ current_faces = []
299
+
300
+ # Flush final mesh
301
+ if current_faces and vertices:
302
+ m = trimesh.Trimesh(
303
+ vertices=vertices, faces=current_faces, process=False
304
+ )
305
+ m.remove_unreferenced_vertices()
306
+ meshes.append(m)
307
+
308
+ return meshes
309
+
310
+
311
+ def scale_asset(
312
+ urdf_path: str | Path,
313
+ scale_factor: float,
314
+ output_dir: Optional[str | Path] = None,
315
+ inplace: bool = False,
316
+ ) -> Path:
317
+ """Scale a 3D asset from URDF file.
318
+
319
+ Args:
320
+ urdf_path: Path to the URDF file to scale.
321
+ scale_factor: Scaling factor (e.g., 0.8 for 80% size).
322
+ output_dir: Root output directory for scaled assets (not needed if inplace=True).
323
+ inplace: If True, modify files in-place instead of copying to output_dir.
324
+
325
+ Returns:
326
+ Path to the output URDF file.
327
+ """
328
+ scaler = AssetScaler(urdf_path, scale_factor, output_dir, inplace)
329
+ return scaler.scale()
330
+
331
+
332
+ def entrypoint() -> None:
333
+ """CLI entrypoint for asset scaling."""
334
+ config = tyro.cli(AssetScaleConfig)
335
+
336
+ output_urdf = scale_asset(
337
+ urdf_path=config.urdf_path,
338
+ scale_factor=config.scale_factor,
339
+ output_dir=config.output_dir,
340
+ inplace=config.inplace,
341
+ )
342
+
343
+ logger.info(f"Scaled asset successfully: {output_urdf}")
344
+
345
+
346
+ if __name__ == "__main__":
347
+ entrypoint()
embodied_gen/skills/background-creator/SKILL.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: background-creator
3
+ description: Generate background 3D scenes with EmbodiedGen using scene3d-cli. Use this skill whenever users ask to create room/indoor background scenes from text prompts, pre-generate backgrounds for layout-cli, or control scene3d generation quality/runtime with retry, seed, and gs3d settings.
4
+ ---
5
+
6
+ # Background Creator
7
+
8
+ Unified entry for EmbodiedGen background scene generation via `scene3d-cli`.
9
+
10
+ ## When To Use
11
+
12
+ Use this skill when users want to:
13
+ - Generate indoor/background 3D scenes from text prompts.
14
+ - Pre-generate scene assets for `layout-cli`.
15
+ - Control `scene3d-cli` runtime/quality via seed, retry, and `gs3d` settings.
16
+
17
+ ## Routing Rule (Core)
18
+
19
+ Use `scene3d-cli` when input is scene-level text prompts and output target is a background scene (mesh + 3DGS), not single foreground assets.
20
+
21
+ ## Pre-checks
22
+
23
+ 1. Run commands from the repository root.
24
+ 2. Confirm the active environment is `embodiedgen`.
25
+ 3. Install scene3d dependencies first if needed:
26
+ `bash install.sh scene3d`
27
+ 4. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
28
+
29
+ ## Standard Command Template
30
+
31
+ ```bash
32
+ scene3d-cli --prompts "Art studio with easel and canvas" \
33
+ --output_dir outputs/bg_scenes \
34
+ --seed 0 \
35
+ --gs3d.max_steps 4000 \
36
+ --disable_pano_check
37
+ ```
38
+
39
+ ## Common Parameters
40
+
41
+ - `--prompts`: one or more scene text prompts.
42
+ - `--output_dir`: output root directory for generated scenes.
43
+ - `--seed`: random seed for reproducibility.
44
+ - `--n_retry`: panorama generation retries.
45
+ - `--real_height`: force target real-world room height in meters.
46
+ - `--pano_image_only`: generate only panorama image (debug/fast validation).
47
+ - `--disable_pano_check`: skip panorama quality check.
48
+ - `--keep_middle_result`: keep intermediate training artifacts.
49
+ - `--gs3d.max_steps`: training steps for 3DGS optimization.
50
+
51
+ ## Output Conventions
52
+
53
+ Each prompt is saved under `<output_dir>/scene_xxxx/`, typically including:
54
+ - `gs_model.ply`
55
+ - `mesh_model.ply`
56
+ - `pano_image.png`
57
+ - `prompt.txt`
58
+ - `video.mp4`
59
+ - `gsplat_cfg.yml`
60
+
61
+ ## Runtime Expectations
62
+
63
+ - Typical full generation time is around 30 minutes per scene.
64
+ - Use `--pano_image_only` for quick prompt validation before full generation.
65
+
66
+ ## Failure Handling and Retry
67
+
68
+ 1. OOM or GPU pressure: reduce concurrency and lower `--gs3d.max_steps`.
69
+ 2. Unstable scene quality: increase `--n_retry` or adjust prompt specificity.
70
+ 3. Missing outputs: verify `--output_dir` permissions and use absolute paths.
embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "embodiedgen-local",
3
+ "owner": {
4
+ "name": "EmbodiedGen"
5
+ },
6
+ "plugins": [
7
+ {
8
+ "name": "embodiedgen-skills",
9
+ "description": "EmbodiedGen workflow plugin bundle with slash commands and reusable skills",
10
+ "version": "0.1.0",
11
+ "author": {
12
+ "name": "EmbodiedGen"
13
+ },
14
+ "source": "./plugins/embodiedgen-skills"
15
+ }
16
+ ]
17
+ }
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/.claude-plugin/plugin.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "embodiedgen-skills",
3
+ "version": "0.1.0",
4
+ "description": "EmbodiedGen workflow plugin bundle with slash commands and reusable skills for asset generation, background generation, layout generation, simulation rendering, asset conversion, room creation, asset scaling, and spatial scene editing",
5
+ "author": {
6
+ "name": "EmbodiedGen"
7
+ }
8
+ }
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/assets.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen asset generation workflow for image-to-3D, text-to-3D, or texture generation
3
+ argument-hint: "[request or command requirements]"
4
+ ---
5
+
6
+ # Assets Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen asset generation workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask what the user wants to generate or texture.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "asset-creator"`.
19
+
20
+ ### Step 3: Execute the correct route
21
+
22
+ Follow the skill to choose one of:
23
+ - `img3d-cli`
24
+ - `text3d-cli`
25
+ - `texture-cli`
26
+
27
+ ### Step 4: Deliver
28
+
29
+ Return:
30
+ 1. The exact command used
31
+ 2. The output directory
32
+ 3. Any important runtime notes or dependency issues
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/background.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen background scene generation workflow with scene3d-cli
3
+ argument-hint: "[scene prompt or generation request]"
4
+ ---
5
+
6
+ # Background Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen background generation workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the target room or background scene description.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "background-creator"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and build the correct `scene3d-cli` command.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The output directory
29
+ 3. Expected runtime and any caveats
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/convert.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen simulator asset conversion workflow for USD, MJCF, or direct URDF usage
3
+ argument-hint: "[target simulator or conversion request]"
4
+ ---
5
+
6
+ # Convert Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen asset conversion workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the target simulator and input URDF path.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "asset-converter"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and choose:
23
+ - `USD` for IsaacSim
24
+ - `MJCF` for MuJoCo or Genesis
25
+ - direct `URDF` for SAPIEN, IsaacGym, or PyBullet
26
+
27
+ ### Step 4: Deliver
28
+
29
+ Return:
30
+ 1. The exact Python API or command used
31
+ 2. The converted output path
32
+ 3. Any dependency or simulator-specific notes
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/layout.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen layout generation workflow with layout-cli
3
+ argument-hint: "[task description or layout generation request]"
4
+ ---
5
+
6
+ # Layout Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen interactive layout workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the task description or task file path.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "layout-creator"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and build the correct `layout-cli` command, including `--bg_list` and output settings.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The output root
29
+ 3. Expected runtime and any dependency warnings
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/process.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen asset processing workflow for scaling URDF-based assets and related files
3
+ argument-hint: "[asset scaling request]"
4
+ ---
5
+
6
+ # Process Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen asset processing workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the URDF path and desired scale factor.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "asset-scale"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and build the correct `python -m embodied_gen.skills.asset-scale.asset_scale` command.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The output path
29
+ 3. Whether the operation is normal mode or inplace mode
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/room.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen room generation workflow with room-creator for room or house generation and export
3
+ argument-hint: "[room generation request]"
4
+ ---
5
+
6
+ # Room Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen room creation workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for room type, output root, and whether export is needed.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "room-creator"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and build the correct `python -m embodied_gen.scripts.room_gen.gen_room` or `room-cli` command.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The output directory
29
+ 3. Runtime and export-stage notes
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/sim.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen simulation rendering workflow with sim-cli
3
+ argument-hint: "[layout path or simulation request]"
4
+ ---
5
+
6
+ # Sim Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen simulation rendering workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the `layout.json` path or target simulation request.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "sim-runner"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and build the correct `sim-cli` command.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The output video path
29
+ 3. Any camera, performance, or rendering notes
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/spatial.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: Run the EmbodiedGen spatial computing workflow for floorplans and object placement or deletion in scenes
3
+ argument-hint: "[scene editing request]"
4
+ ---
5
+
6
+ # Spatial Skill Command
7
+
8
+ Route the user's request to the EmbodiedGen spatial computing workflow.
9
+
10
+ ## Workflow
11
+
12
+ ### Step 1: Interpret the request
13
+
14
+ Use `$ARGUMENTS` if provided. If it is empty, ask for the URDF path and target placement, deletion, or query request.
15
+
16
+ ### Step 2: Load the skill
17
+
18
+ Use `skill: "spatial-computing"`.
19
+
20
+ ### Step 3: Execute the workflow
21
+
22
+ Follow the skill and choose the correct `python -m embodied_gen.skills.spatial-computing.cli.main` or `room-cli -m ...` invocation.
23
+
24
+ ### Step 4: Deliver
25
+
26
+ Return:
27
+ 1. The exact command used
28
+ 2. The updated output file path
29
+ 3. Any constraints about USD updates or exact instance matching
embodied_gen/skills/layout-creator/SKILL.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: layout-creator
3
+ description: Generate interactive 3D layouts from task descriptions with EmbodiedGen using layout-cli. Use this skill whenever users ask to build task-driven 3D scenes, batch-generate layouts from task files, tune layout generation retries/seeds, or produce simulator-ready layout outputs from background scene lists.
4
+ ---
5
+
6
+ # Layout Creator
7
+
8
+ Unified entry for EmbodiedGen interactive layout generation via `layout-cli`.
9
+
10
+ ## When To Use
11
+
12
+ Use this skill when users want to:
13
+ - Generate interactive 3D scenes from task descriptions.
14
+ - Batch-generate layouts from a task list file.
15
+ - Build simulator-ready layout outputs (`layout.json`, renders) with optional robot insertion.
16
+ - Tune generation quality and stability via retry and seed settings.
17
+
18
+ ## Routing Rule (Core)
19
+
20
+ Use `layout-cli` when the user input is task-level natural language descriptions (e.g., "put the pen in the mug") and the target output is an interactive layout scene, not standalone assets or standalone background scenes.
21
+
22
+ ## Pre-checks
23
+
24
+ 1. Run commands from the repository root.
25
+ 2. Confirm the active environment is `embodiedgen`.
26
+ 3. Confirm background scene list file exists and is readable (via `--bg_list`).
27
+ 4. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
28
+
29
+ ## Standard Command Templates
30
+
31
+ ### 1) Generate layouts from inline task descriptions
32
+
33
+ ```bash
34
+ layout-cli \
35
+ --task_descs "Place the pen in the mug on the desk" "Put the fruit on the table on the plate" \
36
+ --bg_list "outputs/example_gen_scenes/scene_part_list.txt" \
37
+ --output_root "outputs/layouts_gen" \
38
+ --insert_robot
39
+ ```
40
+
41
+ ### 2) Batch generation from task list file (background run)
42
+
43
+ ```bash
44
+ layout-cli \
45
+ --task_descs "apps/assets/example_layout/task_list.txt" \
46
+ --bg_list "outputs/example_gen_scenes/scene_part_list.txt" \
47
+ --n_image_retry 4 --n_asset_retry 3 --n_pipe_retry 3 \
48
+ --output_root "outputs/layouts_gens" \
49
+ --insert_robot > layouts_gens.log 2>&1 &
50
+ ```
51
+
52
+ ## Common Parameters
53
+
54
+ - `--task_descs`: task descriptions or a task-list text file path.
55
+ - `--output_root`: root output directory.
56
+ - `--bg_list`: background scene list file (scene retrieval pool).
57
+ - `--insert_robot`: include robot pose in layout generation/simulation output.
58
+ - `--output_iscene`: export composed scene mesh (`Iscene.glb`).
59
+ - `--n_image_retry --n_asset_retry --n_pipe_retry`: retry controls for text-to-3D subpipeline.
60
+ - `--seed_img --seed_3d --seed_layout`: reproducibility controls.
61
+ - `--n_img_sample --text_guidance_scale --img_denoise_step`: text-to-image / asset-generation controls.
62
+ - `--keep_intermediate`: keep intermediate files from generation substeps.
63
+
64
+ ## Output Conventions
65
+
66
+ Outputs are organized by task index:
67
+ - `<output_root>/task_0000/layout.json`
68
+ - `<output_root>/task_0000/scene_tree.jpg`
69
+ - `<output_root>/task_0000/background/`
70
+ - `<output_root>/task_0000/asset3d/`
71
+ - Optional: `<output_root>/task_0000/Iscene.glb` (when `--output_iscene` is enabled)
72
+
73
+ ## Runtime Expectations
74
+
75
+ - Typical generation time is around 30 minutes per task (depends on retries/GPU/background matching).
76
+ - Batch jobs should use background execution (`nohup`) with log redirection.
77
+
78
+ ## Failure Handling and Retry
79
+
80
+ 1. Missing background candidate: verify `--bg_list` path and referenced scene directories.
81
+ 2. OOM or GPU pressure: reduce concurrency and lower retry/sample settings.
82
+ 3. Poor asset/layout quality: increase retry counts or refine task text.
83
+ 4. Missing outputs: verify output permissions and use absolute paths.
embodied_gen/skills/room-creator/SKILL.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: room-creator
3
+ description: Generate indoor rooms (single room or house) and export URDF/USD by wrapping embodied_gen/scripts/room_gen/gen_room.py. Use when users ask to create rooms with seed control, choose room type and complexity, run generation/export stages, or run reproducible room generation jobs (batch runs can be done by wrapping this command in an outer loop/script).
4
+ ---
5
+
6
+ # Room Creator
7
+
8
+ Generate room scenes with `python -m embodied_gen.scripts.room_gen.gen_room` from infinigen(https://github.com/princeton-vl/infinigen) and optionally export URDF/USD.
9
+
10
+ ## Use This Workflow
11
+
12
+ 1. Confirm output root and target room profile.
13
+ 2. Choose generation scope:
14
+ - `--gen --urdf --usd` for full pipeline.
15
+ - `--gen --no-urdf --no-usd` for generation only.
16
+ - `--no-gen --urdf --usd` for export from existing blender output.
17
+ 3. Run the command from repository root.
18
+ 4. Verify output folder: `<output_root>/<RoomType>_seed<seed>/` (if `--seed` is omitted, check the generated seed from logs first).
19
+
20
+ ## Parameters
21
+
22
+ - `--output-root` (required): base output directory.
23
+ - `--room-type`: `Bedroom | LivingRoom | Kitchen | Bathroom | DiningRoom | Office | House`.
24
+ - `--seed`: random seed. For reproducible runs, pass this explicitly; if omitted, a random seed is generated.
25
+ - `--complexity`: `minimalist | simple | medium | detail`.
26
+ - `--custom-params`: gin file copied to Infinigen `custom_solve.gin`.
27
+ - `--large-scene`: only for `House`; enables more rooms.
28
+ - `--gen/--no-gen`, `--urdf/--no-urdf`, `--usd/--no-usd`: pipeline switches.
29
+
30
+ ## Complexity Guidance
31
+
32
+ - `minimalist`: fastest, sparse furniture.
33
+ - `simple`: default, balanced quality/time.
34
+ - `medium`: richer layout, slower.
35
+ - `detail`: highest detail, longest runtime.
36
+
37
+ ## Command Templates
38
+
39
+ ```bash
40
+ # Full pipeline for one kitchen
41
+ python -m embodied_gen.scripts.room_gen.gen_room \
42
+ --output-root outputs/rooms \
43
+ --room-type Kitchen \
44
+ --seed 42 \
45
+ --complexity simple
46
+ ```
47
+
48
+ ```bash
49
+ # Generation only (no export)
50
+ python -m embodied_gen.scripts.room_gen.gen_room \
51
+ --output-root outputs/rooms \
52
+ --room-type LivingRoom \
53
+ --seed 100 \
54
+ --complexity medium \
55
+ --no-urdf --no-usd
56
+ ```
57
+
58
+ ```bash
59
+ # Export only from existing blender result
60
+ python -m embodied_gen.scripts.room_gen.gen_room \
61
+ --output-root outputs/rooms \
62
+ --room-type Kitchen \
63
+ --seed 42 \
64
+ --no-gen --urdf --usd
65
+ ```
66
+
67
+ ```bash
68
+ # House generation (use --large-scene for more rooms)
69
+ python -m embodied_gen.scripts.room_gen.gen_room \
70
+ --output-root outputs/rooms \
71
+ --room-type House \
72
+ --seed 7 \
73
+ --complexity simple \
74
+ --large-scene
75
+ ```
76
+
77
+ ## Runtime Requirements
78
+
79
+ - Run from repo root so relative paths resolve.
80
+ - Ensure Blender Python exists at `$BLENDER_PYTHON`. If not set, the command will try:
81
+ `thirdparty/infinigen/blender/4.2/python/bin/python3.11` (must exist on disk).
82
+ - `--no-gen` requires existing blender output at:
83
+ `<output_root>/<RoomType>_seed<seed>/blender`.
embodied_gen/skills/sim-runner/SKILL.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: sim-runner
3
+ description: Run SAPIEN-based simulation rendering from EmbodiedGen layout outputs using sim-cli. Use this skill whenever users ask to load a generated layout.json into simulation, render interactive scene videos, control camera/render settings, or enable robot grasp trajectory rendering.
4
+ ---
5
+
6
+ # Sim Runner
7
+
8
+ Unified entry for EmbodiedGen simulation rendering via `sim-cli`.
9
+
10
+ ## When To Use
11
+
12
+ Use this skill when users want to:
13
+ - Load a generated `layout.json` into simulation.
14
+ - Render interactive scene videos (foreground + 3DGS background composition).
15
+ - Adjust camera, rendering, or simulation-step parameters.
16
+ - Include robot grasp trajectory rendering with `--insert_robot`.
17
+
18
+ ## Routing Rule (Core)
19
+
20
+ Use `sim-cli` when the input is an existing layout result (especially `layout.json`) and the target output is simulation visualization (e.g., `Iscene.mp4`), not generation of new assets/backgrounds/layouts.
21
+
22
+ ## Pre-checks
23
+
24
+ 1. Run commands from the repository root.
25
+ 2. Confirm the active environment is `embodiedgen`.
26
+ 3. Confirm input `--layout_path` exists and points to a valid layout output.
27
+ 4. Ensure referenced background and asset files in the layout directory are present.
28
+ 5. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
29
+
30
+ ## Standard Command Template
31
+
32
+ ```bash
33
+ sim-cli \
34
+ --layout_path "outputs/layouts_gen/task_0000/layout.json" \
35
+ --output_dir "outputs/layouts_gen/task_0000/sapien_render" \
36
+ --insert_robot
37
+ ```
38
+
39
+ ## Common Parameters
40
+
41
+ - `--layout_path`: input layout file path.
42
+ - `--output_dir`: output directory for rendered video.
43
+ - `--insert_robot`: render robot grasp actions for manipulated objects.
44
+ - `--sim_freq --control_freq --sim_step`: simulation/control timing settings.
45
+ - `--render_interval`: render every N simulation steps.
46
+ - `--num_cameras --camera_radius --camera_height --fovy_deg`: camera configuration.
47
+ - `--image_hw`: output frame size.
48
+ - `--render_keys`: render channels (requires `Foreground` for final compositing).
49
+ - `--ray_tracing`: enable/disable ray tracing backend.
50
+ - `--device`: rendering device (e.g., `cuda`).
51
+
52
+ ## Output Conventions
53
+
54
+ Primary output:
55
+ - `<output_dir>/Iscene.mp4`
56
+
57
+ Typical input dependencies resolved from layout directory:
58
+ - `layout.json`
59
+ - background `gs_model.ply`
60
+ - per-object assets referenced by layout
61
+
62
+ ## Runtime Expectations
63
+
64
+ - Runtime depends on `sim_step`, `render_interval`, camera count, and ray-tracing mode.
65
+ - Enabling `--insert_robot` increases render time due to grasp-action rollout.
66
+
67
+ ## Failure Handling and Retry
68
+
69
+ 1. Missing file errors: verify layout-relative asset/background paths exist.
70
+ 2. GPU memory pressure: reduce `--num_cameras`, `--image_hw`, or disable heavy settings.
71
+ 3. Empty/invalid video output: ensure `Foreground` is included in `--render_keys`.
72
+ 4. Slow runtime: reduce `--sim_step` or increase `--render_interval`.
embodied_gen/skills/spatial-computing/README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Using with IDE Agent via Natural Language
2
+
3
+ The Agent will automatically load this skill based on its **description** when you mention URDF, floorplan, indoor scene, object placement, etc. You only need to specify in natural language **what to do** and provide **key information like paths/room names**.
4
+
5
+ ### LLM Environment Configuration (When Using Semantic Matching)
6
+
7
+ If you want to use natural language descriptions (e.g., "put lamp on bookshelf") instead of exact instance/room names, you need to configure the LLM environment first:
8
+
9
+ ```bash
10
+ # If outputs/env.sh exists, source it first
11
+ source outputs/env.sh
12
+ ```
13
+
14
+ If access to the LLM interface is unavailable, please provide exact instance names (you can check them via `--list_instances`).
15
+
16
+ ### URDF Visualization Only (Generate Floorplan)
17
+
18
+ **You can say:**
19
+ - "Help me visualize `path_to/scene.urdf` or `path_to/folder_contain/scene.urdf`"
20
+
21
+ **Agent will:** Use `visualize_floorplan(urdf_path=..., output_path=...)` or the corresponding CLI to generate the floorplan only, without modifying URDF/USD.
22
+
23
+ ### Insert Object and Update Scene (URDF, or URDF+USD)
24
+
25
+ **You can say:**
26
+ - "Put `chair.obj` into scene.urdf's kitchen room"
27
+ - "Put `bottle.obj` into the URDF at `outputs/rooms/Kitchen_seed3773`, instance name bottle_1, update scene and generate floorplan"
28
+ - "Put a cup on the table in the living room" → Agent will use `on_instance="table"`, `place_strategy="top"`, etc.
29
+
30
+ **If you also want to update USD:**
31
+ - "Put a chair in the kitchen, update both URDF and USD, USD path is `xxx/usd/export_scene.usdc`"
32
+ - Note that you need to use **room-cli** to execute (this skill will prompt the Agent), because writing USD requires bpy.
33
+
34
+ **Agent will:** Use `FloorplanManager` + `insert_object` (or `insert_object_to_scene`), execute according to the paths and room names you provided; when USD is needed, use room-cli to run the CLI.
35
+
36
+ ### View Instances and Rooms in the Scene
37
+
38
+ Before placing objects, you can first view what instances and rooms are in the scene:
39
+
40
+ **You can say:**
41
+ - "Help me list all instances and room names in `.../scene.urdf`"
42
+
43
+ **Agent will:** Execute `--list_instances` to display the instance names and room names in the current scene.
44
+
45
+ ### URDF/USD Output Notes
46
+
47
+ - **URDF Output**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf`
48
+ - **USD Output**: If `usd_path` is specified, the USD file will be written to `*_updated.usdc` following the same rule
49
+ - **Only Update USD**: Requires using **room-cli** to execute, because writing USD needs Blender (bpy)
50
+
51
+ ### What Information to Provide
52
+
53
+ | Goal | Suggested Information to Provide in Conversation |
54
+ |------|-----------------------------------------------|
55
+ | Visualization only | URDF path, floorplan save path (optional, Agent can default to floorplan.png in same directory) |
56
+ | View instances/rooms | URDF path, let Agent list instance names and room names in current scene |
57
+ | Placement + update | URDF path, object mesh path (.obj), instance name (e.g., chair_1), room name (e.g., kitchen); if placing on table, say "place on table"; if updating USD, also provide USD path and use room-cli |
58
+
59
+ Example in one go: "Use spatial-computing skill, generate floorplan for `.../scene.urdf` and save to floorplan.png in same directory, then put `path/to/bottle.obj` into kitchen, instance name bottle_1, update URDF only."
embodied_gen/skills/spatial-computing/REFERENCE.md ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Floorplan Skill — API Reference
2
+
3
+ This document provides API details, configuration items, errors, and dependencies for reference beyond the usage instructions in [SKILL.md](SKILL.md).
4
+
5
+ ## Contents
6
+
7
+ - [Floorplan Skill — API Reference](#floorplan-skill--api-reference)
8
+ - [Contents](#contents)
9
+ - [LLM Environment Configuration](#llm-environment-configuration)
10
+ - [FloorplanManager](#floorplanmanager)
11
+ - [Constructor](#constructor)
12
+ - [Methods](#methods)
13
+ - [Convenience Functions](#convenience-functions)
14
+ - [CLI Features](#cli-features)
15
+ - [Command Line Parameters](#command-line-parameters)
16
+ - [Configuration and Ignore Items](#configuration-and-ignore-items)
17
+ - [Smart File Naming Strategy](#smart-file-naming-strategy)
18
+ - [USD and Blender](#usd-and-blender)
19
+ - [Errors and Return Values](#errors-and-return-values)
20
+ - [Dependencies](#dependencies)
21
+ - [Usage Recommendations](#usage-recommendations)
22
+
23
+ ---
24
+
25
+ ## LLM Environment Configuration
26
+
27
+ Before using semantic matching (`resolve_*` methods), configure the LLM API:
28
+
29
+ ```bash
30
+ # Use the project-provided env (Azure + proxy), if outputs/env.sh exists:
31
+ source outputs/env.sh
32
+ ```
33
+
34
+ If access to the LLM interface is unavailable, prompt the user.
35
+
36
+ ---
37
+
38
+ ## FloorplanManager
39
+
40
+ ### Constructor
41
+
42
+ ```python
43
+ from importlib import import_module
44
+
45
+ FloorplanManager = import_module(
46
+ "embodied_gen.skills.spatial-computing.api"
47
+ ).FloorplanManager
48
+
49
+ manager = FloorplanManager(
50
+ urdf_path="scene.urdf", # Required
51
+ usd_path=None, # Optional; USD write after insert/delete if provided
52
+ mesh_sample_num=50000,
53
+ ignore_items=None, # Default ["ceiling", "light", "exterior"]
54
+ output_strategy="suffix", # "suffix" (default) / "timestamp" / "overwrite"
55
+ )
56
+ ```
57
+
58
+ ### Methods
59
+
60
+ | Method | Description |
61
+ |--------|-------------|
62
+ | `visualize(output_path)` | Generate floorplan and save as image |
63
+ | `insert_object(asset_path, instance_key, in_room=..., on_instance=..., beside_instance=..., place_strategy=..., n_max_attempt=2000, rotation_rpy=...)` | Place object, automatically write back to URDF/USD on success, return `[x,y,z]` or `None` |
64
+ | `delete_object(instance_key, in_room=..., urdf_output_path=..., usd_output_path=...)` | Delete instance from scene, return `True`/`False`. Supports room constraint via `in_room` |
65
+ | `query_instance_center(instance_key)` | Query instance center coordinates, return `[x,y,z]` or `None` |
66
+ | `update_scene(urdf_output_path=..., usd_output_path=...)` | Manually write back currently placed instances; generally not needed (called inside `insert_object`) |
67
+ | `get_room_names()` | List of room names |
68
+ | `get_instance_names()` | List of instance names (excluding walls/floor) |
69
+ | `get_instance_names_in_room(in_room)` | List of instance names within a specific room |
70
+ | `resolve_on_instance(on_instance, gpt_client=None)` | Resolve user description to exact instance name |
71
+ | `resolve_in_room(in_room, gpt_client=None)` | Resolve user description to exact room name |
72
+ | `resolve_beside_instance(beside_instance, gpt_client=None, in_room=None)` | Resolve user description to exact instance name for beside placement |
73
+ | `resolve_delete_instance(delete_instance, gpt_client=None, in_room=None)` | Resolve user description to exact instance name for deletion |
74
+ | `resolve_and_query_instance(query_instance, gpt_client=None)` | Resolve and query instance center in one call, return `(resolved_name, [x,y,z])` or `(None, None)` |
75
+ | `get_occupied_area()` | Occupied area Shapely geometry |
76
+ | `get_floor_union()` | Floor area union geometry |
77
+
78
+ **Key parameters**:
79
+ - `on_instance` / `beside_instance` / `delete_instance`: Exact instance name or semantic description (with `gpt_client`)
80
+ - `in_room`: Room constraint for placement/deletion/query
81
+ - `place_strategy`: `"random"` (default) or `"top"` (select highest surface)
82
+ - `beside_distance`: Max distance in meters for beside placement (default 0.5)
83
+
84
+ ---
85
+
86
+ ## Convenience Functions
87
+
88
+ | Function | Description |
89
+ |----------|-------------|
90
+ | `visualize_floorplan(urdf_path, output_path, ...)` | Generate floorplan only |
91
+ | `insert_object_to_scene(urdf_path, asset_path, instance_key, output_path, ...)` | Insert object and generate floorplan, return `[x,y,z]` or `None` |
92
+ | `delete_object_from_scene(urdf_path, instance_key, in_room=..., output_path=...)` | Delete instance and optionally generate floorplan, return `True`/`False` |
93
+ | `query_instance_position(urdf_path, instance_key)` | Quick query instance center coordinates, return `[x,y,z]` or `None` |
94
+ | `resolve_instance_with_llm(gpt_client, instance_names, user_spec, ...)` | Use LLM to match user description to exact instance name |
95
+
96
+ ---
97
+
98
+ ## CLI Features
99
+
100
+ ### Command Line Parameters
101
+
102
+ | Parameter | Description |
103
+ |-----------|-------------|
104
+ | `--urdf_path` | Input URDF scene file path (required) |
105
+ | `--usd_path` | Optional USD scene file path, update USD simultaneously if specified |
106
+ | `--asset_path` | Object mesh file path (.obj) for insertion |
107
+ | `--instance_key` | Unique identifier for the new instance, default `inserted_object` |
108
+ | `--in_room` | Limit placement to specified room, supports semantic description |
109
+ | `--on_instance` | Place on top of specified instance, supports semantic description |
110
+ | `--beside_instance` | Place beside specified instance on floor, supports semantic description |
111
+ | `--beside_distance` | Max distance (meters) from target instance, default 0.5 |
112
+ | `--place_strategy` | Placement strategy: `"random"` (default) or `"top"` |
113
+ | `--rotation_rpy` | Initial rotation angle (roll, pitch, yaw radians) |
114
+ | `--output_path` | Floorplan output path |
115
+ | `--output_strategy` | File naming strategy: `"suffix"` (default) / `"timestamp"` / `"overwrite"` |
116
+ | `--list_instances` | List instance names and room names, then exit |
117
+ | `--delete_instance` | Instance name to delete (supports semantic description) |
118
+ | `--delete_in_room` | Room constraint for deletion |
119
+ | `--query_instance` | Instance name to query position (supports semantic description) |
120
+ | `--max_placement_attempts` | Maximum placement attempts, default 2000 |
121
+
122
+ ### CLI Usage Examples
123
+
124
+ **View scene info**:
125
+ ```bash
126
+ python -m embodied_gen.skills.spatial-computing.cli.main \
127
+ --urdf_path .../scene.urdf --list_instances
128
+ ```
129
+
130
+ **Insert object with semantic matching**:
131
+ ```bash
132
+ source outputs/env.sh
133
+ python -m embodied_gen.skills.spatial-computing.cli.main \
134
+ --urdf_path .../scene.urdf --asset_path .../lamp.obj --instance_key lamp_1 \
135
+ --on_instance 书柜
136
+ ```
137
+
138
+ **Delete object with room constraint**:
139
+ ```bash
140
+ python -m embodied_gen.skills.spatial-computing.cli.main \
141
+ --urdf_path .../scene.urdf --delete_instance 沙发 --delete_in_room 客厅
142
+ ```
143
+
144
+ **Query instance position**:
145
+ ```bash
146
+ python -m embodied_gen.skills.spatial-computing.cli.main \
147
+ --urdf_path .../scene.urdf --query_instance 床
148
+ ```
149
+
150
+ **Update both URDF and USD (room-cli)**:
151
+ ```bash
152
+ room-cli -m embodied_gen.skills.spatial-computing.cli.main \
153
+ --urdf_path .../scene.urdf --usd_path .../scene.usdc \
154
+ --delete_instance 沙发
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Configuration and Ignore Items
160
+
161
+ | Parameter | Default | Description |
162
+ |-----------|---------|-------------|
163
+ | `mesh_sample_num` | 50000 | Number of mesh sampling points |
164
+ | `ignore_items` | `["ceiling", "light", "exterior"]` | Link name patterns to skip during URDF parsing |
165
+ | `output_strategy` | `"suffix"` | File naming strategy for output files |
166
+
167
+ ---
168
+
169
+ ## Smart File Naming Strategy
170
+
171
+ Default `output_strategy="suffix"` provides intelligent continuous operation support:
172
+
173
+ | Operation | Input File | Output File | Behavior |
174
+ |-----------|-----------|-------------|----------|
175
+ | First insert | `scene.urdf` | `scene_updated.urdf` | Creates new file |
176
+ | Second insert | `scene_updated.urdf` | `scene_updated.urdf` | **Overwrites** (continuous) |
177
+ | Delete | `scene_updated.urdf` | `scene_updated.urdf` | **Overwrites** (continuous) |
178
+
179
+ **Key features**:
180
+ - ✅ No `*_updated_updated.urdf` accumulation
181
+ - ✅ Original `scene.urdf` never modified
182
+ - ✅ Seamless insert/delete workflow
183
+
184
+ **Alternative strategies**:
185
+ - `"timestamp"`: Unique versioning (`scene_20260311_180235.urdf`)
186
+ - `"overwrite"`: Direct overwrite (use with caution)
187
+
188
+ ---
189
+
190
+ ## USD and Blender
191
+
192
+ - Writing USD requires **Blender (bpy)**. Use **room-cli** environment for USD operations.
193
+ - Without `usd_path`, only URDF is updated (no bpy needed).
194
+ - Assets in `.usd`/`.usdc`/`.usda` format are directly referenced; only `.obj` files are converted via bpy.
195
+ - If `*_collision.obj` exists alongside visual mesh, it will be used for URDF collision.
196
+
197
+ ---
198
+
199
+ ## Errors and Return Values
200
+
201
+ **Exceptions**
202
+
203
+ - **ValueError**: Room/instance not found; `update_scene()` called before insertion; `instance_key` already exists; attempting to delete protected instances (`walls`, `*floor*`).
204
+
205
+ **Return Values**
206
+
207
+ - `insert_object` / `insert_object_to_scene`: `[x, y, z]` on success, `None` on failure.
208
+ - `delete_object` / `delete_object_from_scene`: `True` on success, `False` on failure.
209
+ - `query_instance_center` / `query_instance_position`: `[x, y, z]` or `None`.
210
+
211
+ **Exit Codes (CLI)**
212
+
213
+ - `0`: Success
214
+ - `1`: Instance/room not found, deletion failed, or placement failed
215
+
216
+ ---
217
+
218
+ ## Dependencies
219
+
220
+ | Type | Package | Description |
221
+ |------|---------|-------------|
222
+ | Core | trimesh, shapely, matplotlib, numpy | Parsing and visualization |
223
+ | USD Writing | pxr, bpy | Required only when using `usd_path`; bpy requires Blender |
224
+ | LLM Semantic Matching | openai, project gpt_config | `resolve_*` methods require `GPTclient` instance |
225
+ | CLI | tyro | Required only for CLI entry point |
226
+
227
+ ---
228
+
229
+ ## Usage Recommendations
230
+
231
+ - **Upright objects**: Default orientation applies; for special orientations, pass `(roll, pitch, yaw)` radians.
232
+ - **Placing on furniture**: Use `resolve_on_instance()` to get exact name, then `insert_object(..., on_instance=resolved, place_strategy="top")`.
233
+ - **Placing beside furniture**: Use `insert_object(..., beside_instance=resolved, beside_distance=0.5)` for floor placement near target.
234
+ - **Deleting objects**: Use `resolve_delete_instance()` for semantic matching, then `delete_object(..., in_room=room)` for room-specific deletion.
235
+ - **Protected instances**: Cannot delete `walls` or instances containing `floor` in their names.
236
+ - **Continuous editing**: Use `scene_updated.urdf` as input for subsequent operations to maintain changes.
embodied_gen/skills/spatial-computing/SKILL.md ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: spatial-computing
3
+ description: Visualizes floorplans from URDF scene files and inserts/removes 3D assets with collision-aware placement on surfaces. Supports semantic instance matching via LLM (e.g., "put lamp on bookshelf", "delete sofa in living room"). Use when working with URDF/USD indoor scenes, floorplan visualization, object placement/deletion, or room-level scene editing.
4
+ ---
5
+
6
+ # Floorplan & Object Placement/Deletion
7
+
8
+ ## Overview
9
+
10
+ Parse indoor scenes from URDF, generate 2D floorplans, or place/remove 3D objects in scenes and write back to URDF/USD. After successful insertion/deletion, the corresponding file is automatically updated based on whether `urdf_path`/`usd_path` is provided.
11
+
12
+ **When to use**: Use this skill when you need to generate floorplans from URDF, place/delete objects on specified rooms/furniture surfaces, or batch update URDF/USD files.
13
+
14
+ > ⚠️ **USD updates require `room-cli`**: To update USD files, you **must** use `room-cli` instead of `python -m`, and specify the USD file via `--usd_path`. `room-cli` runs on Blender Python which includes the `bpy` module for OBJ→USD conversion; using `python -m` with `--usd_path` will fail with `ModuleNotFoundError: No module named 'bpy'`.
15
+ >
16
+ > ```bash
17
+ > # ✅ Correct: use room-cli to update both URDF and USD
18
+ > room-cli -m embodied_gen.skills.spatial-computing.cli.main \
19
+ > --urdf_path .../scene.urdf --usd_path .../scene.usdc ...
20
+
21
+
22
+ **Smart File Naming Strategy**:
23
+ - **Default behavior**: First operation creates `scene_updated.urdf`, subsequent operations automatically overwrite it
24
+ - **No file bloat**: Prevents `*_updated_updated.urdf` files from accumulating
25
+ - **Safe**: Original `scene.urdf` is never modified unless explicitly requested
26
+ - **Works for both insert and delete**: Seamless continuous scene editing
27
+
28
+ ---
29
+
30
+ ## Best Practices & Constraints
31
+
32
+ ### 1. Workflow for Continuous Scene Editing
33
+
34
+ **Recommended workflow** for multiple insert/delete operations:
35
+
36
+ ```bash
37
+ # Step 1: View current scene
38
+ python -m embodied_gen.skills.spatial-computing.cli.main \
39
+ --urdf_path .../scene.urdf --list_instances
40
+
41
+ # Step 2: First insert → creates scene_updated.urdf
42
+ python -m embodied_gen.skills.spatial-computing.cli.main \
43
+ --urdf_path .../scene.urdf \
44
+ --asset_path .../apple.obj --instance_key apple_1
45
+
46
+ # Step 3: Second insert → overwrites scene_updated.urdf
47
+ python -m embodied_gen.skills.spatial-computing.cli.main \
48
+ --urdf_path .../scene_updated.urdf \
49
+ --asset_path .../lamp.obj --instance_key lamp_1
50
+
51
+ # Step 4: Delete operation → overwrites scene_updated.urdf
52
+ python -m embodied_gen.skills.spatial-computing.cli.main \
53
+ --urdf_path .../scene_updated.urdf \
54
+ --delete_instance apple_1
55
+ ```
56
+
57
+ **Key benefits**:
58
+ - ✅ No multiple `*_updated_updated.urdf` files
59
+ - ✅ Original file `scene.urdf` always preserved
60
+ - ✅ Continuous insert/delete operations are seamless
61
+
62
+ **Result**: Clean workflow with only two files:
63
+ - `scene.urdf` (original, untouched)
64
+ - `scene_updated.urdf` (final state)
65
+
66
+ ### 2. When to Use Different Strategies
67
+
68
+ | Strategy | Use Case | Example |
69
+ |----------|----------|---------|
70
+ | **suffix** (default) | Standard workflow, continuous editing | Most scenarios |
71
+ | **timestamp** | Version tracking, backup before risky changes | `scene_20260311_180235.urdf` |
72
+ | **overwrite** | Confident single operation, no backup needed | Automated pipelines |
73
+
74
+ ### 3. Performance Optimization: Batch Insert
75
+
76
+ **Problem**: CLI commands re-parse URDF and process all meshes on every call, leading to slow performance when inserting multiple objects.
77
+
78
+ **Solution**: Use `--batch_insert_config` with JSON config for 3-4x speedup:
79
+
80
+ **Step 1**: Create JSON config file (`batch_chairs.json`):
81
+
82
+ ```json
83
+ [
84
+ {
85
+ "asset_path": "path/to/chair1.obj",
86
+ "instance_key": "chair_1",
87
+ "beside_instance": "table_dining_7178300",
88
+ "in_room": "dining_room_0_floor"
89
+ },
90
+ {
91
+ "asset_path": "path/to/chair2.obj",
92
+ "instance_key": "chair_2",
93
+ "beside_instance": "table_dining_7178300",
94
+ "in_room": "dining_room_0_floor"
95
+ },
96
+ {
97
+ "asset_path": "path/to/chair3.obj",
98
+ "instance_key": "chair_3",
99
+ "beside_instance": "table_dining_7178300",
100
+ "in_room": "dining_room_0_floor"
101
+ }
102
+ ]
103
+ ```
104
+
105
+ **Step 2**: Run batch insertion:
106
+
107
+ ```bash
108
+ # Update URDF only
109
+ room-cli -m embodied_gen.skills.spatial-computing.cli.main \
110
+ --urdf_path .../scene.urdf \
111
+ --batch_insert_config batch_chairs.json
112
+
113
+ # Update both URDF and USD
114
+ room-cli -m embodied_gen.skills.spatial-computing.cli.main \
115
+ --urdf_path .../scene.urdf \
116
+ --usd_path .../scene.usdc \
117
+ --batch_insert_config batch_chairs.json
118
+ ```
119
+
120
+ **JSON Config Fields**:
121
+ - `asset_path` (required): Path to asset mesh file (.obj)
122
+ - `instance_key` (required): Unique instance identifier
123
+ - `beside_instance`: Place beside target instance (on floor). **Must be exact name**.
124
+ - `on_instance`: Place on top of target instance. **Must be exact name**.
125
+ - `in_room`: Limit placement to specified room. **Must be exact name**.
126
+ - `beside_distance`: Max distance from target (default: 0.5m)
127
+ - `place_strategy`: "random" or "top" (default: "random")
128
+
129
+ > **⚠️ Batch insert does NOT support fuzzy/semantic matching.**
130
+ > `beside_instance`, `on_instance`, and `in_room` require exact names.
131
+ > Use `--list_instances` to get the exact instance / room names first:
132
+ > ```bash
133
+ > python -m embodied_gen.skills.spatial-computing.cli.main \
134
+ > --urdf_path .../scene.urdf --list_instances
135
+ > ```
136
+
137
+ **When to Use**:
138
+ - ✅ Inserting 2+ objects at once
139
+ - ✅ Performance-critical workflows
140
+ - ✅ Automated scene generation pipelines
141
+
142
+ ⚠️ **Batch config file cleanup**: The JSON config file for `--batch_insert_config` is a **temporary file** and **must not** be left in the project root directory. Always:
143
+ 1. Create the JSON config in the **same directory as the target scene** (e.g., `.../House_seed5/batch_fruits.json`).
144
+ 2. **Delete the JSON config file immediately after the batch command finishes**, regardless of success or failure.
145
+
146
+ ### 3. Important Constraints
147
+
148
+ ❌ **Wrong**: Using `scene.urdf` for all operations (ignores previous changes)
149
+ ```bash
150
+ # This will NOT see apple_1 from previous operation
151
+ python -m ... --urdf_path scene.urdf --asset_path lamp.obj
152
+ ```
153
+
154
+ ✅ **Right**: Chain operations using `scene_updated.urdf`
155
+ ```bash
156
+ # This WILL see apple_1 and add lamp_1
157
+ python -m ... --urdf_path scene_updated.urdf --asset_path lamp.obj
158
+ ```
159
+
160
+ ---
161
+
162
+ ## LLM Environment (Required for Semantic Matching)
163
+
164
+ Before using `resolve_instance_with_llm` for semantic matching in **Python**, configure the LLM API and ensure access to the interface. Prompt the user if access is unavailable.
165
+
166
+ ```bash
167
+ # Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
168
+ source outputs/env.sh
169
+ ```
170
+
171
+ ---
172
+
173
+ ## Core Convention: Placement/Deletion/Query Requests Must Use This Skill's Interface
174
+
175
+ When users request "put A somewhere", "delete A", "find A", or "visualize urdf", you **must** implement it using this skill's interface:
176
+
177
+ | User Request Example | Corresponding Parameter & Usage |
178
+ |---------------------|---------------------------------|
179
+ | **Put A on B** (e.g., "put lamp on bookshelf") | `on_instance` (instance name, obtained from `--list_instances`) |
180
+ | **Put A beside B** (e.g., "put chair beside table") | `beside_instance` (instance name, obtained from `--list_instances`); placed on floor near target |
181
+ | **Put A in a room** (e.g., "put table in living room") | `in_room` (room name, obtained from `--list_instances`) |
182
+ | **Put A beside B in a room** (e.g., "put chair beside table in kitchen") | `beside_instance` + `in_room` |
183
+ | **Put A on B in a room** (e.g., "put apple on table in living room") | Decomposed into "apple" and "living room" as `in_room` and `on_instance` |
184
+ | **Delete A** (e.g., "delete lamp") | `delete_instance` (instance name or semantic description, supports fuzzy matching with LLM) |
185
+ | **Delete A in a room** (e.g., "delete sofa in living room") | `delete_instance` + `delete_in_room` (only deletes if instance is in specified room) |
186
+ | **Find A** (e.g., "find lamp", "where is the bed") | `query_instance` (returns center coordinates [x, y, z], supports fuzzy matching with LLM) |
187
+
188
+ | `output_strategy` | `"suffix"` / `"timestamp"` / `"overwrite"` | File naming strategy for output files. Default is "suffix" (non-destructive). |
189
+ | **Visualize scene.urdf** | `cli.main --urdf_path .../scene.urdf --output_path .../floorplan.png`; output_path defaults to same directory as urdf |
190
+
191
+ - When no match is found, prompt "The object/room does not exist, please re-enter" and provide the current scene object or room list.
192
+ - Instance names should not use the `<link name="...">` from URDF. **Recommended**: Run `--list_instances` before placement/deletion/query to view current instance name list, and select the closest semantic match.
193
+
194
+ ---
195
+
196
+ ## CLI Examples
197
+
198
+ > **Tip**: The URDF file is typically located at `<room_folder>/urdf/export_scene/scene.urdf` (e.g., `outputs/rooms/Kitchen_seed0/urdf/export_scene/scene.urdf`).
199
+
200
+ ### Example 1: View Instance Names and Room Names in Current Scene
201
+
202
+ ```bash
203
+ # View instance names and room names in current scene (to fill in --on_instance / --in_room)
204
+ python -m embodied_gen.skills.spatial-computing.cli.main \
205
+ --urdf_path .../scene.urdf --list_instances
206
+ ```
207
+
208
+ ### Example 2: Visualize Floorplan Only
209
+
210
+ ```bash
211
+ python -m embodied_gen.skills.spatial-computing.cli.main \
212
+ --urdf_path .../scene.urdf --output_path .../floorplan.png
213
+ ```
214
+
215
+ ### Example 3: Put Lamp on Bookshelf (Place on an Object)
216
+
217
+ `--on_instance` can be filled with the instance name returned by `--list_instances` or a semantic description.
218
+
219
+ ```bash
220
+ python -m embodied_gen.skills.spatial-computing.cli.main \
221
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
222
+ --asset_path .../lamp.obj --instance_key lamp_on_bookcase --on_instance 书柜
223
+ ```
224
+
225
+ ---
226
+
227
+ ### Example 4: Put Table in Living Room (Place in a Room)
228
+
229
+ ```bash
230
+ python -m embodied_gen.skills.spatial-computing.cli.main \
231
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
232
+ --asset_path .../table.obj --instance_key table_1 \
233
+ --in_room living_room
234
+ ```
235
+
236
+ ---
237
+
238
+ ### Example 5: Put Apple on Table in Living Room (Room + on Object)
239
+
240
+ ```bash
241
+ python -m embodied_gen.skills.spatial-computing.cli.main \
242
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
243
+ --asset_path .../apple.obj --instance_key apple_1 \
244
+ --in_room living_room --on_instance table --place_strategy top
245
+ ```
246
+
247
+ ---
248
+
249
+ ### Example 7: Delete an Object (Exact Name)
250
+
251
+ ```bash
252
+ python -m embodied_gen.skills.spatial-computing.cli.main \
253
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
254
+ --delete_instance bed_192207
255
+ ```
256
+
257
+ ---
258
+
259
+ ### Example 8: Delete Object with Fuzzy Matching (Semantic Description)
260
+
261
+ Requires LLM environment (see "LLM Environment" section).
262
+
263
+ ```bash
264
+ python -m embodied_gen.skills.spatial-computing.cli.main \
265
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
266
+ --delete_instance "沙发"
267
+ ```
268
+
269
+ ---
270
+
271
+ ### Example 9: Delete Object in Specific Room
272
+
273
+ Only deletes the instance if it's located in the specified room.
274
+
275
+ ```bash
276
+ python -m embodied_gen.skills.spatial-computing.cli.main \
277
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
278
+ --delete_instance "沙发" --delete_in_room "客厅"
279
+ ```
280
+
281
+ **Update both URDF and USD using room-cli:**
282
+ ```bash
283
+ room-cli -m embodied_gen.skills.spatial-computing.cli.main \
284
+ --urdf_path .../scene.urdf --usd_path .../scene.usdc \
285
+ --output_path .../floorplan.png \
286
+ --delete_instance "沙发" --delete_in_room "客厅"
287
+ ```
288
+
289
+ ---
290
+
291
+ ### Example 10: Query Instance Position (Exact Name)
292
+
293
+ ```bash
294
+ python -m embodied_gen.skills.spatial-computing.cli.main \
295
+ --urdf_path .../scene.urdf \
296
+ --query_instance bed_192207
297
+ ```
298
+
299
+ **Expected output**:
300
+ ```
301
+ 📍 Instance 'bed_192207' center: (-0.9250, -6.5830, 0.5000)
302
+ ```
303
+
304
+ ---
305
+
306
+ ### Example 11: Query Instance Position with Fuzzy Matching
307
+
308
+ Requires LLM environment (see "LLM Environment" section).
309
+
310
+ ```bash
311
+ python -m embodied_gen.skills.spatial-computing.cli.main \
312
+ --urdf_path .../scene.urdf \
313
+ --query_instance "床"
314
+ ```
315
+
316
+ ---
317
+
318
+ #### **Alternative Strategies**
319
+
320
+ **Timestamp** - Unique versioning for each operation:
321
+ ```bash
322
+ # Output: scene_20260311_180235.urdf
323
+ python -m embodied_gen.skills.spatial-computing.cli.main \
324
+ --urdf_path .../scene.urdf \
325
+ --asset_path .../apple.obj --instance_key apple_1 \
326
+ --output_strategy timestamp
327
+ ```
328
+
329
+ **Overwrite** - Directly overwrite original (use with caution):
330
+ ```bash
331
+ # Overwrites: scene.urdf
332
+ python -m embodied_gen.skills.spatial-computing.cli.main \
333
+ --urdf_path .../scene.urdf \
334
+ --asset_path .../apple.obj --instance_key apple_1 \
335
+ --output_strategy overwrite
336
+ ```
337
+
338
+ ---
339
+
340
+ ### Query Instance Position
341
+
342
+ Query the center coordinates of an instance in the scene. Supports fuzzy matching with LLM.
343
+
344
+ **CLI Interface**:
345
+ ```bash
346
+ # Exact instance name
347
+ python -m embodied_gen.skills.spatial-computing.cli.main \
348
+ --urdf_path scene.urdf \
349
+ --query_instance bed_192207
350
+
351
+ # Fuzzy matching (requires GPT)
352
+ source outputs/env.sh
353
+ python -m embodied_gen.skills.spatial-computing.cli.main \
354
+ --urdf_path scene.urdf \
355
+ --query_instance "床"
356
+ ```
357
+
358
+ ### 6. Common Parameters
359
+
360
+ | Parameter | Meaning |
361
+ |-----------|---------|
362
+ | `in_room` | Limit placement to specified room |
363
+ | `on_instance` | Place on top of specified instance; must be **exact instance name** (obtained via `resolve_instance_with_llm`) |
364
+ | `beside_instance` | Place beside specified instance on the floor; must be **exact instance name** (obtained via `resolve_instance_with_llm`). Mutually exclusive with `on_instance` |
365
+ | `beside_distance` | Max distance (meters) from target instance for beside placement. Default `0.5`. Increase if placement fails |
366
+ | `place_strategy` | `"random"` random placement (default, e.g., bookshelf with 3 layers will randomly select one), `"top"` select highest surface |
367
+ | `rotation_rpy` | Not required by default; pass (roll, pitch, yaw) radians for special orientations |
368
+ | `delete_instance` | Instance name or semantic description to delete (supports fuzzy matching with LLM). Cannot delete protected items (walls, floors) |
369
+ | `delete_in_room` | Optional room constraint for deletion - only delete if instance is in this room |
370
+ | `query_instance` | Instance name or semantic description to query center coordinates (supports fuzzy matching with LLM). Returns [x, y, z] position |
371
+
372
+ ## Next Steps
373
+
374
+ - For complete API, configuration, errors, and dependencies, see [REFERENCE.md](REFERENCE.md).
embodied_gen/skills/spatial-computing/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from .core import (
18
+ UrdfSemanticInfoCollector,
19
+ get_actionable_surface,
20
+ points_to_polygon,
21
+ )
22
+ from .core.visualizer import (
23
+ FloorplanVisualizer,
24
+ )
25
+
26
+ __all__ = [
27
+ "FloorplanVisualizer",
28
+ "UrdfSemanticInfoCollector",
29
+ "points_to_polygon",
30
+ "get_actionable_surface",
31
+ ]
embodied_gen/skills/spatial-computing/api/__init__.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
18
+
19
+ from .floorplan_api import (
20
+ FloorplanManager,
21
+ delete_object_from_scene,
22
+ insert_object_to_scene,
23
+ query_instance_position,
24
+ visualize_floorplan,
25
+ )
26
+
27
+ __all__ = [
28
+ "FloorplanManager",
29
+ "visualize_floorplan",
30
+ "insert_object_to_scene",
31
+ "delete_object_from_scene",
32
+ "query_instance_position",
33
+ "resolve_instance_with_llm",
34
+ ]
embodied_gen/skills/spatial-computing/api/floorplan_api.py ADDED
@@ -0,0 +1,917 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import os
21
+ from dataclasses import dataclass, field
22
+ from datetime import datetime
23
+ from typing import Literal
24
+
25
+ from shapely.geometry import MultiPolygon, Polygon
26
+ from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
27
+
28
+ from ..core import (
29
+ UrdfSemanticInfoCollector,
30
+ )
31
+ from ..core.collector import (
32
+ DEFAULT_BESIDE_DISTANCE,
33
+ DEFAULT_IGNORE_ITEMS,
34
+ DEFAULT_MESH_SAMPLE_NUM,
35
+ DEFAULT_ROTATION_RPY,
36
+ )
37
+ from ..core.visualizer import (
38
+ FloorplanVisualizer,
39
+ )
40
+
41
+ # Type aliases
42
+ Geometry = Polygon | MultiPolygon
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ @dataclass
47
+ class FloorplanConfig:
48
+ """Configuration for floorplan operations."""
49
+
50
+ urdf_path: str
51
+ """Path to the input URDF scene file."""
52
+
53
+ output_path: str | None = None
54
+ """Path to save the floorplan visualization image."""
55
+
56
+ usd_path: str | None = None
57
+ """Optional path to the USD scene file for USD export."""
58
+
59
+ asset_path: str | None = None
60
+ """Optional path to the asset mesh file (.obj)."""
61
+
62
+ instance_key: str = "inserted_object"
63
+ """Unique key for the added instance."""
64
+
65
+ in_room: str | None = None
66
+ """Optional room name to constrain asset placement."""
67
+
68
+ on_instance: str | None = None
69
+ """Optional instance name to place the asset on top of (exact key from get_instance_names())."""
70
+
71
+ beside_instance: str | None = None
72
+ """Optional instance name to place the asset beside (on floor, near the target)."""
73
+
74
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE
75
+ """Max distance (meters) from the target instance for beside placement."""
76
+
77
+ place_strategy: Literal["top", "random"] = "random"
78
+ """Placement strategy for the asset."""
79
+
80
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY
81
+ """Rotation in roll-pitch-yaw (radians)."""
82
+
83
+ ignore_items: list[str] = field(
84
+ default_factory=lambda: list(DEFAULT_IGNORE_ITEMS)
85
+ )
86
+ """List of item name patterns to ignore during parsing."""
87
+
88
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM
89
+ """Number of points to sample from meshes."""
90
+
91
+ max_placement_attempts: int = 2000
92
+ """Maximum attempts for asset placement."""
93
+
94
+ update_urdf: bool = True
95
+ """Whether to update and save the URDF file."""
96
+
97
+ update_usd: bool = True
98
+ """Whether to update and save the USD file."""
99
+
100
+ list_instances: bool = False
101
+ """If True, print instance and room names then exit (no placement/visualization)."""
102
+
103
+ delete_instance: str | None = None
104
+ """Optional instance name to delete from the scene (supports fuzzy matching with LLM)."""
105
+
106
+ delete_in_room: str | None = None
107
+ """Optional room constraint for deletion - only delete if instance is in this room."""
108
+
109
+ query_instance: str | None = None
110
+ """Optional instance name to query and return its center coordinates (supports fuzzy matching with LLM)."""
111
+
112
+ output_strategy: Literal["suffix", "overwrite", "timestamp"] = "suffix"
113
+ """File naming strategy for output files.
114
+
115
+ - "suffix": Add '_updated' suffix (default, non-destructive)
116
+ - "overwrite": Overwrite original files (use with caution)
117
+ - "timestamp": Add timestamp suffix (e.g., '_20260311_171500')
118
+ """
119
+
120
+ batch_insert_config: str | None = None
121
+ """Path to JSON config file for batch insertion (3-4x faster than multiple CLI calls).
122
+
123
+ JSON format example:
124
+ [
125
+ {
126
+ "asset_path": "path/to/chair1.obj",
127
+ "instance_key": "chair_1",
128
+ "beside_instance": "table_dining_7178300",
129
+ "in_room": "dining_room_0_floor"
130
+ },
131
+ {
132
+ "asset_path": "path/to/chair2.obj",
133
+ "instance_key": "chair_2",
134
+ "beside_instance": "table_dining_7178300",
135
+ "in_room": "dining_room_0_floor"
136
+ }
137
+ ]
138
+ """
139
+
140
+
141
+ class FloorplanManager:
142
+ """High-level API for floorplan operations.
143
+
144
+ This class provides simplified methods for:
145
+ - Loading and analyzing URDF scenes
146
+ - Visualizing floorplans
147
+ - Inserting objects into scenes
148
+ - Updating URDF and USD files
149
+
150
+ Example:
151
+ >>> manager = FloorplanManager(urdf_path="scene.urdf", usd_path="scene.usdc")
152
+ >>> manager.visualize(output_path="floorplan.png")
153
+ >>> position = manager.insert_object(
154
+ ... asset_path="chair.obj",
155
+ ... instance_key="chair_1",
156
+ ... in_room="kitchen"
157
+ ... )
158
+ # URDF/USD are updated automatically after insert
159
+ """
160
+
161
+ def __init__(
162
+ self,
163
+ urdf_path: str,
164
+ usd_path: str | None = None,
165
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
166
+ ignore_items: list[str] | None = None,
167
+ output_strategy: Literal[
168
+ "suffix", "overwrite", "timestamp"
169
+ ] = "suffix",
170
+ ) -> None:
171
+ """Initialize the floorplan manager.
172
+
173
+ Args:
174
+ urdf_path: Path to the URDF file.
175
+ usd_path: Optional path to the USD file for scene updates.
176
+ mesh_sample_num: Number of points to sample from meshes.
177
+ ignore_items: List of item name patterns to ignore.
178
+ output_strategy: File naming strategy for output files.
179
+
180
+ """
181
+ self.urdf_path = urdf_path
182
+ self.usd_path = usd_path
183
+ self.output_strategy = output_strategy
184
+ self.collector = UrdfSemanticInfoCollector(
185
+ mesh_sample_num=mesh_sample_num,
186
+ ignore_items=ignore_items,
187
+ )
188
+ self.collector.collect(urdf_path)
189
+ self.pending_instance_data: dict | None = None
190
+
191
+ def _get_output_path(
192
+ self,
193
+ input_path: str,
194
+ custom_output_path: str | None = None,
195
+ ) -> str:
196
+ """Generate output path based on the naming strategy.
197
+
198
+ Smart file naming strategy:
199
+ - "suffix" (default):
200
+ * If input already ends with "_updated", overwrite it (continuous operations)
201
+ * Otherwise, add "_updated" suffix (first operation)
202
+ - "timestamp": Add timestamp suffix for unique versioning
203
+ - "overwrite": Always overwrite the input file
204
+
205
+ Args:
206
+ input_path: Original input file path.
207
+ custom_output_path: Optional custom output path (highest priority).
208
+
209
+ Returns:
210
+ Generated output path based on strategy.
211
+
212
+ """
213
+ if custom_output_path is not None:
214
+ return custom_output_path
215
+
216
+ name, ext = os.path.splitext(input_path)
217
+
218
+ if self.output_strategy == "overwrite":
219
+ return input_path
220
+ elif self.output_strategy == "timestamp":
221
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
222
+ return f"{name}_{timestamp}{ext}"
223
+ else: # "suffix" (default) - smart continuous operation support
224
+ # If input already has "_updated" suffix, overwrite it (continuous operation)
225
+ if name.endswith("_updated"):
226
+ return input_path
227
+ # Otherwise, add "_updated" suffix (first operation)
228
+ else:
229
+ return f"{name}_updated{ext}"
230
+
231
+ def visualize(
232
+ self,
233
+ output_path: str,
234
+ ) -> None:
235
+ """Generate and save a floorplan visualization.
236
+
237
+ Args:
238
+ output_path: Path to save the output image.
239
+
240
+ """
241
+ FloorplanVisualizer.plot(
242
+ self.collector.rooms,
243
+ self.collector.footprints,
244
+ self.collector.occ_area,
245
+ output_path,
246
+ )
247
+ logger.info(f"✅ Floorplan visualization saved to {output_path}")
248
+
249
+ def insert_object(
250
+ self,
251
+ asset_path: str,
252
+ instance_key: str,
253
+ in_room: str | None = None,
254
+ on_instance: str | None = None,
255
+ beside_instance: str | None = None,
256
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
257
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
258
+ n_max_attempt: int = 2000,
259
+ place_strategy: Literal["top", "random"] = "random",
260
+ ) -> list[float] | None:
261
+ """Insert an object into the scene with automatic placement.
262
+
263
+ Args:
264
+ asset_path: Path to the asset mesh file (.obj).
265
+ instance_key: Unique key for the new instance.
266
+ in_room: Optional room name to constrain placement.
267
+ on_instance: Optional instance name to place on top of.
268
+ beside_instance: Optional instance name to place beside (on floor).
269
+ beside_distance: Max distance from target for beside placement.
270
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
271
+ n_max_attempt: Maximum placement attempts.
272
+ place_strategy: Either "top" or "random".
273
+
274
+ Returns:
275
+ List [x, y, z] of the placed instance center, or None if failed.
276
+
277
+ """
278
+ center = self.collector.add_instance(
279
+ asset_path=asset_path,
280
+ instance_key=instance_key,
281
+ in_room=in_room,
282
+ on_instance=on_instance,
283
+ beside_instance=beside_instance,
284
+ beside_distance=beside_distance,
285
+ rotation_rpy=rotation_rpy,
286
+ n_max_attempt=n_max_attempt,
287
+ place_strategy=place_strategy,
288
+ )
289
+
290
+ if center is not None:
291
+ self.pending_instance_data = {
292
+ "asset_path": asset_path,
293
+ "instance_key": instance_key,
294
+ "center": center,
295
+ "rotation_rpy": rotation_rpy,
296
+ }
297
+ self.update_scene()
298
+
299
+ return center
300
+
301
+ def batch_insert_objects(
302
+ self,
303
+ objects: list[dict],
304
+ defer_update: bool = False,
305
+ ) -> list[list[float] | None]:
306
+ """Batch insert multiple objects into the scene efficiently.
307
+
308
+ Args:
309
+ objects: List of object configs, each containing:
310
+ asset_path: Path to the asset mesh file (.obj).
311
+ instance_key: Unique key for the new instance.
312
+ in_room: Optional room name to constrain placement.
313
+ on_instance: Optional instance name to place on top of.
314
+ beside_instance: Optional instance name to place beside.
315
+ beside_distance: Max distance from target (default: 0.5m).
316
+ rotation_rpy: Initial rotation (default: (0, 0, 0)).
317
+ place_strategy: Either "top" or "random" (default: "random").
318
+ defer_update: If True, don't update URDF/USD after each
319
+ insertion. Useful when inserting many objects at once.
320
+
321
+ Returns:
322
+ List of centers [x, y, z] for each inserted object,
323
+ or None if failed.
324
+
325
+ Example:
326
+ >>> objects = [
327
+ ... {"asset_path": "chair1.obj",
328
+ ... "instance_key": "chair_1",
329
+ ... "beside_instance": "table"},
330
+ ... ]
331
+ >>> centers = manager.batch_insert_objects(objects)
332
+
333
+ """
334
+ centers = []
335
+ usd_source = self.usd_path
336
+
337
+ for i, obj_config in enumerate(objects, 1):
338
+ logger.info(
339
+ f"[{i}/{len(objects)}] Inserting '{obj_config.get('instance_key', 'unknown')}'..."
340
+ )
341
+
342
+ center = self.collector.add_instance(
343
+ asset_path=obj_config["asset_path"],
344
+ instance_key=obj_config["instance_key"],
345
+ in_room=obj_config.get("in_room"),
346
+ on_instance=obj_config.get("on_instance"),
347
+ beside_instance=obj_config.get("beside_instance"),
348
+ beside_distance=obj_config.get(
349
+ "beside_distance", DEFAULT_BESIDE_DISTANCE
350
+ ),
351
+ rotation_rpy=obj_config.get(
352
+ "rotation_rpy", DEFAULT_ROTATION_RPY
353
+ ),
354
+ n_max_attempt=obj_config.get("n_max_attempt", 2000),
355
+ place_strategy=obj_config.get("place_strategy", "random"),
356
+ )
357
+
358
+ if center is not None:
359
+ # Store instance data for later update
360
+ collision_path = obj_config["asset_path"].replace(
361
+ ".obj", "_collision.obj"
362
+ )
363
+ if not os.path.exists(collision_path):
364
+ collision_path = None
365
+
366
+ # Update URDF incrementally
367
+ if self.urdf_path and not defer_update:
368
+ urdf_out = self._get_output_path(self.urdf_path)
369
+ self.collector.update_urdf_info(
370
+ output_path=urdf_out,
371
+ instance_key=obj_config["instance_key"],
372
+ visual_mesh_path=obj_config["asset_path"],
373
+ collision_mesh_path=collision_path,
374
+ trans_xyz=tuple(center),
375
+ rot_rpy=obj_config.get(
376
+ "rotation_rpy", DEFAULT_ROTATION_RPY
377
+ ),
378
+ joint_type="fixed",
379
+ )
380
+
381
+ # Update USD incrementally
382
+ if self.usd_path and not defer_update:
383
+ usd_out = self._get_output_path(self.usd_path)
384
+ self.collector.update_usd_info(
385
+ usd_path=usd_source,
386
+ output_path=usd_out,
387
+ instance_key=obj_config["instance_key"],
388
+ visual_mesh_path=obj_config["asset_path"],
389
+ trans_xyz=center,
390
+ rot_rpy=obj_config.get(
391
+ "rotation_rpy", DEFAULT_ROTATION_RPY
392
+ ),
393
+ )
394
+ usd_source = usd_out
395
+
396
+ logger.info(f"✅ Placed at {center}")
397
+ else:
398
+ logger.warning(f"❌ Failed to place object")
399
+
400
+ centers.append(center)
401
+
402
+ return centers
403
+
404
+ def update_scene(
405
+ self,
406
+ urdf_output_path: str | None = None,
407
+ usd_output_path: str | None = None,
408
+ ) -> None:
409
+ """Update URDF and/or USD with inserted instances.
410
+
411
+ Updates URDF if self.urdf_path is set, USD if self.usd_path is set.
412
+ Both are updated when both paths are set. No-op when no instance was inserted.
413
+
414
+ Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
415
+
416
+ Args:
417
+ urdf_output_path: Optional custom path for URDF output.
418
+ usd_output_path: Optional custom path for USD output.
419
+
420
+ Raises:
421
+ ValueError: If no instance has been inserted.
422
+
423
+ """
424
+ if self.pending_instance_data is None:
425
+ raise ValueError(
426
+ "No instance to update. Call insert_object() first."
427
+ )
428
+
429
+ data = self.pending_instance_data
430
+ collision_path = data["asset_path"].replace(".obj", "_collision.obj")
431
+ if not os.path.exists(collision_path):
432
+ collision_path = None
433
+
434
+ if self.urdf_path:
435
+ urdf_out = self._get_output_path(self.urdf_path, urdf_output_path)
436
+ self.collector.update_urdf_info(
437
+ output_path=urdf_out,
438
+ instance_key=data["instance_key"],
439
+ visual_mesh_path=data["asset_path"],
440
+ collision_mesh_path=collision_path,
441
+ trans_xyz=tuple(data["center"]),
442
+ rot_rpy=data["rotation_rpy"],
443
+ joint_type="fixed",
444
+ )
445
+
446
+ if self.usd_path:
447
+ usd_out = self._get_output_path(self.usd_path, usd_output_path)
448
+ self.collector.update_usd_info(
449
+ usd_path=self.usd_path,
450
+ output_path=usd_out,
451
+ instance_key=data["instance_key"],
452
+ visual_mesh_path=data["asset_path"],
453
+ trans_xyz=data["center"],
454
+ rot_rpy=data["rotation_rpy"],
455
+ )
456
+
457
+ def delete_object(
458
+ self,
459
+ instance_key: str,
460
+ in_room: str | None = None,
461
+ urdf_output_path: str | None = None,
462
+ usd_output_path: str | None = None,
463
+ ) -> bool:
464
+ """Delete an object from the scene.
465
+
466
+ Args:
467
+ instance_key: Exact instance name to delete.
468
+ in_room: Optional room constraint - only delete if instance is in this room.
469
+ urdf_output_path: Optional custom path for URDF output.
470
+ usd_output_path: Optional custom path for USD output.
471
+
472
+ Returns:
473
+ True if deletion succeeded, False otherwise.
474
+
475
+ """
476
+ success = self.collector.remove_instance(
477
+ instance_key=instance_key,
478
+ in_room=in_room,
479
+ )
480
+
481
+ if success:
482
+ # Update URDF
483
+ if self.urdf_path:
484
+ urdf_out = self._get_output_path(
485
+ self.urdf_path, urdf_output_path
486
+ )
487
+ self.collector.save_urdf(urdf_out)
488
+
489
+ # Update USD
490
+ if self.usd_path:
491
+ usd_out = self._get_output_path(self.usd_path, usd_output_path)
492
+ self.collector.remove_usd_instance(
493
+ usd_path=self.usd_path,
494
+ output_path=usd_out,
495
+ instance_key=instance_key,
496
+ )
497
+
498
+ return success
499
+
500
+ def get_instance_names(self) -> list[str]:
501
+ """Get list of instance names in the scene.
502
+
503
+ Returns:
504
+ List of instance names.
505
+
506
+ """
507
+ return [
508
+ k
509
+ for k in self.collector.instances.keys()
510
+ if k != "walls" and "floor" not in k.lower()
511
+ ]
512
+
513
+ def get_room_names(self) -> list[str]:
514
+ """Get list of room names in the scene.
515
+
516
+ Returns:
517
+ List of room names.
518
+
519
+ """
520
+ return list(self.collector.rooms.keys())
521
+
522
+ def get_instance_names_in_room(self, in_room: str) -> list[str]:
523
+ """Get instance names that are spatially inside a given room.
524
+
525
+ Buffers the room polygon slightly to handle mesh-sampling precision.
526
+
527
+ Args:
528
+ in_room: Exact room key (must exist in get_room_names()).
529
+
530
+ Returns:
531
+ List of instance names within the room.
532
+
533
+ """
534
+ room_poly = self.collector.rooms.get(in_room)
535
+ if room_poly is None:
536
+ return self.get_instance_names()
537
+ room_buffered = room_poly.buffer(0.1)
538
+ all_names = self.get_instance_names()
539
+ return [
540
+ k
541
+ for k in all_names
542
+ if room_buffered.contains(
543
+ self.collector.instances[k].representative_point()
544
+ )
545
+ ]
546
+
547
+ def resolve_on_instance(
548
+ self,
549
+ on_instance: str,
550
+ gpt_client: object | None = None,
551
+ ) -> str | None:
552
+ r"""Resolve on_instance to an exact key (for placement).
553
+
554
+ If on_instance is already in get_instance_names(), return it.
555
+ Otherwise if gpt_client is provided, use LLM to resolve user description
556
+ (e.g. \"柜子\", \"书柜\") to one exact instance key.
557
+
558
+ Args:
559
+ on_instance: Exact instance key or semantic description.
560
+ gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
561
+
562
+ Returns:
563
+ Exact instance key, or None if not found / LLM returned NONE.
564
+ """
565
+ names = self.get_instance_names()
566
+ if on_instance in names:
567
+ return on_instance
568
+ if gpt_client is not None:
569
+ return resolve_instance_with_llm(
570
+ gpt_client, names, on_instance # type: ignore[arg-type]
571
+ )
572
+ return None
573
+
574
+ def resolve_in_room(
575
+ self,
576
+ in_room: str,
577
+ gpt_client: object | None = None,
578
+ ) -> str | None:
579
+ r"""Resolve in_room to an exact room name (for placement).
580
+
581
+ If in_room is already in get_room_names(), return it.
582
+ Otherwise if gpt_client is provided, use LLM to resolve user description
583
+ (e.g. \"kitchen\", \"the place for cooking\") to one exact room name.
584
+
585
+ Args:
586
+ in_room: Exact room name or semantic description.
587
+ gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
588
+
589
+ Returns:
590
+ Exact room name, or None if not found / LLM returned NONE.
591
+ """
592
+ names = self.get_room_names()
593
+ if in_room in names:
594
+ return in_room
595
+ if gpt_client is not None:
596
+ return resolve_instance_with_llm(
597
+ gpt_client, names, in_room # type: ignore[arg-type]
598
+ )
599
+ return None
600
+
601
+ def resolve_beside_instance(
602
+ self,
603
+ beside_instance: str,
604
+ gpt_client: object | None = None,
605
+ in_room: str | None = None,
606
+ ) -> str | None:
607
+ r"""Resolve beside_instance to an exact key (for beside placement).
608
+
609
+ If beside_instance is already in get_instance_names(), return it.
610
+ Otherwise if gpt_client is provided, use LLM to resolve user description
611
+ (e.g. "桌子", "沙发") to one exact instance key.
612
+
613
+ When `in_room` is given, only instances spatially inside that room are
614
+ considered as candidates.
615
+
616
+ Args:
617
+ beside_instance: Exact instance key or semantic description.
618
+ gpt_client: Optional GPT client for semantic resolve.
619
+ in_room: Optional resolved room key to restrict candidate scope.
620
+
621
+ Returns:
622
+ Exact instance key, or None if not found / LLM returned NONE.
623
+ """
624
+ if in_room is not None:
625
+ names = self.get_instance_names_in_room(in_room)
626
+ else:
627
+ names = self.get_instance_names()
628
+ if beside_instance in names:
629
+ return beside_instance
630
+
631
+ # Substring matching as fallback
632
+ query_lower = beside_instance.lower()
633
+ matches = [n for n in names if query_lower in n.lower()]
634
+ if len(matches) == 1:
635
+ logger.info(
636
+ "Substring match: '%s' -> '%s'", beside_instance, matches[0]
637
+ )
638
+ return matches[0]
639
+ elif len(matches) > 1:
640
+ logger.warning(
641
+ "Multiple substring matches for '%s': %s. Using '%s'. "
642
+ "Use exact name or LLM for better matching.",
643
+ beside_instance,
644
+ matches,
645
+ matches[0],
646
+ )
647
+ return matches[0]
648
+
649
+ if gpt_client is not None:
650
+ return resolve_instance_with_llm(
651
+ gpt_client, names, beside_instance # type: ignore[arg-type]
652
+ )
653
+ return None
654
+
655
+ def resolve_delete_instance(
656
+ self,
657
+ delete_instance: str,
658
+ gpt_client: object | None = None,
659
+ in_room: str | None = None,
660
+ ) -> str | None:
661
+ r"""Resolve delete_instance to an exact key (for deletion).
662
+
663
+ Similar to resolve_beside_instance but specifically for deletion.
664
+ If delete_instance is already in get_instance_names(), return it.
665
+ Otherwise if gpt_client is provided, use LLM to resolve user description
666
+ (e.g. "桌子", "沙发") to one exact instance key.
667
+
668
+ When `in_room` is given, only instances spatially inside that room are
669
+ considered as candidates.
670
+
671
+ Args:
672
+ delete_instance: Exact instance key or semantic description.
673
+ gpt_client: Optional GPT client for semantic resolve.
674
+ in_room: Optional resolved room key to restrict candidate scope.
675
+
676
+ Returns:
677
+ Exact instance key, or None if not found / LLM returned NONE.
678
+ """
679
+ if in_room is not None:
680
+ names = self.get_instance_names_in_room(in_room)
681
+ else:
682
+ names = self.get_instance_names()
683
+
684
+ if delete_instance in names:
685
+ return delete_instance
686
+
687
+ # Substring matching as fallback
688
+ query_lower = delete_instance.lower()
689
+ matches = [n for n in names if query_lower in n.lower()]
690
+ if len(matches) == 1:
691
+ logger.info(
692
+ "Substring match: '%s' -> '%s'", delete_instance, matches[0]
693
+ )
694
+ return matches[0]
695
+ elif len(matches) > 1:
696
+ logger.warning(
697
+ "Multiple substring matches for '%s': %s. Using '%s'. "
698
+ "Use exact name or LLM for better matching.",
699
+ delete_instance,
700
+ matches,
701
+ matches[0],
702
+ )
703
+ return matches[0]
704
+
705
+ if gpt_client is not None:
706
+ return resolve_instance_with_llm(
707
+ gpt_client, names, delete_instance # type: ignore[arg-type]
708
+ )
709
+ return None
710
+
711
+ def query_instance_center(
712
+ self,
713
+ instance_key: str,
714
+ ) -> list[float] | None:
715
+ """Query the center coordinates of an instance.
716
+
717
+ Args:
718
+ instance_key: Exact instance name to query.
719
+
720
+ Returns:
721
+ List [x, y, z] of the instance center, or None if not found.
722
+
723
+ """
724
+ return self.collector.get_instance_center(instance_key)
725
+
726
+ def resolve_and_query_instance(
727
+ self,
728
+ query_instance: str,
729
+ gpt_client: object | None = None,
730
+ ) -> tuple[str | None, list[float] | None]:
731
+ r"""Resolve instance name and return its center coordinates.
732
+
733
+ Combines fuzzy matching with coordinate query.
734
+ If query_instance is already in get_instance_names(), return its center.
735
+ Otherwise if gpt_client is provided, use LLM to resolve user description
736
+ (e.g. "床", "沙发") to one exact instance key.
737
+
738
+ Args:
739
+ query_instance: Exact instance key or semantic description.
740
+ gpt_client: Optional GPT client for semantic resolve.
741
+
742
+ Returns:
743
+ Tuple of (resolved_instance_name, center_coordinates), or (None, None) if not found.
744
+
745
+ """
746
+ names = self.get_instance_names()
747
+
748
+ if query_instance in names:
749
+ center = self.query_instance_center(query_instance)
750
+ return query_instance, center
751
+
752
+ # Substring matching as fallback
753
+ query_lower = query_instance.lower()
754
+ matches = [n for n in names if query_lower in n.lower()]
755
+ if len(matches) == 1:
756
+ logger.info(
757
+ "Substring match: '%s' -> '%s'", query_instance, matches[0]
758
+ )
759
+ center = self.query_instance_center(matches[0])
760
+ return matches[0], center
761
+ elif len(matches) > 1:
762
+ logger.warning(
763
+ "Multiple substring matches for '%s': %s. Using '%s'. "
764
+ "Use exact name or LLM for better matching.",
765
+ query_instance,
766
+ matches,
767
+ matches[0],
768
+ )
769
+ center = self.query_instance_center(matches[0])
770
+ return matches[0], center
771
+
772
+ if gpt_client is not None:
773
+ resolved = resolve_instance_with_llm(
774
+ gpt_client, names, query_instance # type: ignore[arg-type]
775
+ )
776
+ if resolved:
777
+ center = self.query_instance_center(resolved)
778
+ return resolved, center
779
+
780
+ return None, None
781
+
782
+ def get_occupied_area(self) -> Geometry:
783
+ """Get the union of all occupied areas.
784
+
785
+ Returns:
786
+ Shapely geometry representing occupied areas.
787
+
788
+ """
789
+ return self.collector.occ_area
790
+
791
+ def get_floor_union(self) -> Geometry:
792
+ """Get the union of all floor areas.
793
+
794
+ Returns:
795
+ Shapely geometry representing floor areas.
796
+
797
+ """
798
+ return self.collector.floor_union
799
+
800
+
801
+ def visualize_floorplan(
802
+ urdf_path: str,
803
+ output_path: str,
804
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
805
+ ignore_items: list[str] | None = None,
806
+ ) -> None:
807
+ """Quick function to visualize a floorplan.
808
+
809
+ Args:
810
+ urdf_path: Path to the URDF file.
811
+ output_path: Path to save the output image.
812
+ mesh_sample_num: Number of points to sample from meshes.
813
+ ignore_items: List of item name patterns to ignore.
814
+
815
+ """
816
+ manager = FloorplanManager(
817
+ urdf_path=urdf_path,
818
+ mesh_sample_num=mesh_sample_num,
819
+ ignore_items=ignore_items,
820
+ )
821
+ manager.visualize(output_path=output_path)
822
+
823
+
824
+ def insert_object_to_scene(
825
+ urdf_path: str,
826
+ asset_path: str,
827
+ instance_key: str,
828
+ output_path: str,
829
+ usd_path: str | None = None,
830
+ in_room: str | None = None,
831
+ on_instance: str | None = None,
832
+ beside_instance: str | None = None,
833
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
834
+ place_strategy: Literal["top", "random"] = "random",
835
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
836
+ ) -> list[float] | None:
837
+ """Quick function to insert an object and generate floorplan.
838
+
839
+ Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
840
+
841
+ Args:
842
+ urdf_path: Path to the URDF file.
843
+ asset_path: Path to the asset mesh file (.obj).
844
+ instance_key: Unique key for the new instance.
845
+ output_path: Path to save the floorplan image.
846
+ usd_path: Optional path to the USD file (requires Blender).
847
+ in_room: Optional room name to constrain placement.
848
+ on_instance: Optional instance name to place on top of.
849
+ beside_instance: Optional instance name to place beside (on floor).
850
+ beside_distance: Max distance for beside placement (meters).
851
+ place_strategy: Either "top" or "random".
852
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
853
+
854
+ Returns:
855
+ List [x, y, z] of the placed instance center, or None if failed.
856
+
857
+ """
858
+ manager = FloorplanManager(urdf_path=urdf_path, usd_path=usd_path)
859
+ center = manager.insert_object(
860
+ asset_path=asset_path,
861
+ instance_key=instance_key,
862
+ in_room=in_room,
863
+ on_instance=on_instance,
864
+ beside_instance=beside_instance,
865
+ beside_distance=beside_distance,
866
+ rotation_rpy=rotation_rpy,
867
+ place_strategy=place_strategy,
868
+ )
869
+ if center is not None:
870
+ manager.visualize(output_path=output_path)
871
+ return center
872
+
873
+
874
+ def delete_object_from_scene(
875
+ urdf_path: str,
876
+ instance_key: str,
877
+ in_room: str | None = None,
878
+ output_path: str | None = None,
879
+ ) -> bool:
880
+ """Quick function to delete an object from scene.
881
+
882
+ Args:
883
+ urdf_path: Path to the URDF file.
884
+ instance_key: Exact instance name to delete.
885
+ in_room: Optional room constraint - only delete if instance is in this room.
886
+ output_path: Optional path to save the floorplan image after deletion.
887
+
888
+ Returns:
889
+ True if deletion succeeded, False otherwise.
890
+
891
+ """
892
+ manager = FloorplanManager(urdf_path=urdf_path)
893
+ success = manager.delete_object(
894
+ instance_key=instance_key,
895
+ in_room=in_room,
896
+ )
897
+ if success and output_path is not None:
898
+ manager.visualize(output_path=output_path)
899
+ return success
900
+
901
+
902
+ def query_instance_position(
903
+ urdf_path: str,
904
+ instance_key: str,
905
+ ) -> list[float] | None:
906
+ """Quick function to query instance center coordinates.
907
+
908
+ Args:
909
+ urdf_path: Path to the URDF file.
910
+ instance_key: Exact instance name to query.
911
+
912
+ Returns:
913
+ List [x, y, z] of the instance center, or None if not found.
914
+
915
+ """
916
+ manager = FloorplanManager(urdf_path=urdf_path)
917
+ return manager.query_instance_center(instance_key)
embodied_gen/skills/spatial-computing/cli/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """Command-line interface for floorplan operations.
2
+
3
+ This package provides CLI utilities for floorplan visualization and
4
+ scene manipulation.
5
+ """
6
+
7
+ __all__: list[str] = []
embodied_gen/skills/spatial-computing/cli/main.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import sys
22
+ import warnings
23
+
24
+ import tyro
25
+
26
+ from ..api.floorplan_api import (
27
+ FloorplanConfig,
28
+ FloorplanManager,
29
+ )
30
+
31
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
32
+
33
+ logging.basicConfig(
34
+ format="%(asctime)s - %(levelname)s - %(message)s",
35
+ level=logging.INFO,
36
+ force=True,
37
+ )
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def _get_gpt_client() -> object | None:
42
+ """Lazy-import GPT_CLIENT for semantic --on_instance resolution."""
43
+ try:
44
+ from embodied_gen.utils.gpt_clients import GPT_CLIENT
45
+
46
+ return GPT_CLIENT
47
+ except Exception:
48
+ return None
49
+
50
+
51
+ def entrypoint(cfg: FloorplanConfig) -> None:
52
+ """Main entry point for floorplan visualization and scene manipulation.
53
+
54
+ Args:
55
+ cfg: Configuration object with all parameters.
56
+
57
+ """
58
+ manager = FloorplanManager(
59
+ urdf_path=cfg.urdf_path,
60
+ usd_path=cfg.usd_path,
61
+ mesh_sample_num=cfg.mesh_sample_num,
62
+ ignore_items=cfg.ignore_items,
63
+ output_strategy=cfg.output_strategy,
64
+ )
65
+
66
+ # List instances/rooms and exit if requested
67
+ if cfg.list_instances:
68
+ names = manager.get_instance_names()
69
+ rooms = manager.get_room_names()
70
+ logger.info(f"instance_names: {names}")
71
+ logger.info(f"room_names: {rooms}")
72
+ return
73
+
74
+ # Batch insertion
75
+ if cfg.batch_insert_config is not None:
76
+ logger.info(
77
+ f"Loading batch insert config from {cfg.batch_insert_config}"
78
+ )
79
+ with open(cfg.batch_insert_config, 'r') as f:
80
+ objects = json.load(f)
81
+
82
+ logger.info(f"Batch inserting {len(objects)} objects...")
83
+ centers = manager.batch_insert_objects(objects)
84
+
85
+ success_count = len([c for c in centers if c is not None])
86
+ logger.info(
87
+ f"✅ Successfully placed {success_count}/{len(objects)} objects"
88
+ )
89
+
90
+ if success_count < len(objects):
91
+ logger.warning(
92
+ f"⚠️ Failed to place {len(objects) - success_count} objects"
93
+ )
94
+ sys.exit(1)
95
+
96
+ # Generate floorplan visualization if requested
97
+ if cfg.output_path is not None:
98
+ manager.visualize(output_path=cfg.output_path)
99
+
100
+ return
101
+
102
+ gpt_client = _get_gpt_client()
103
+ on_instance = cfg.on_instance
104
+ if on_instance is not None:
105
+ resolved = manager.resolve_on_instance(
106
+ on_instance, gpt_client=gpt_client
107
+ )
108
+ if resolved is None:
109
+ logger.error(
110
+ "No object matched \"%s\"。Current scene instance name: %s。",
111
+ on_instance,
112
+ manager.get_instance_names(),
113
+ )
114
+ sys.exit(1)
115
+ on_instance = resolved
116
+ if resolved != cfg.on_instance:
117
+ logger.info("\"%s\" -> \"%s\"", cfg.on_instance, resolved)
118
+
119
+ in_room = cfg.in_room
120
+ if in_room is not None:
121
+ resolved = manager.resolve_in_room(in_room, gpt_client=gpt_client)
122
+ if resolved is None:
123
+ logger.error(
124
+ "No room matched \"%s\"。Current scene room names: %s。",
125
+ in_room,
126
+ manager.get_room_names(),
127
+ )
128
+ sys.exit(1)
129
+ in_room = resolved
130
+ if resolved != cfg.in_room:
131
+ logger.info("\"%s\" -> \"%s\"", cfg.in_room, resolved)
132
+
133
+ beside_instance = cfg.beside_instance
134
+ if beside_instance is not None:
135
+ resolved = manager.resolve_beside_instance(
136
+ beside_instance, gpt_client=gpt_client, in_room=in_room
137
+ )
138
+ if resolved is None:
139
+ candidates = (
140
+ manager.get_instance_names_in_room(in_room)
141
+ if in_room
142
+ else manager.get_instance_names()
143
+ )
144
+ logger.error(
145
+ "No object matched \"%s\"。Current %sinstance name: %s。",
146
+ beside_instance,
147
+ f"room '{in_room}' " if in_room else "",
148
+ candidates,
149
+ )
150
+ sys.exit(1)
151
+ beside_instance = resolved
152
+ if resolved != cfg.beside_instance:
153
+ logger.info("\"%s\" -> \"%s\"", cfg.beside_instance, resolved)
154
+
155
+ delete_instance = cfg.delete_instance
156
+ delete_in_room = cfg.delete_in_room
157
+ if delete_instance is not None:
158
+ # Resolve room constraint if provided
159
+ if delete_in_room is not None:
160
+ resolved_room = manager.resolve_in_room(
161
+ delete_in_room, gpt_client=gpt_client
162
+ )
163
+ if resolved_room is None:
164
+ logger.error(
165
+ "No room matched \"%s\"。Current scene room names: %s。",
166
+ delete_in_room,
167
+ manager.get_room_names(),
168
+ )
169
+ sys.exit(1)
170
+ delete_in_room = resolved_room
171
+ if resolved_room != cfg.delete_in_room:
172
+ logger.info(
173
+ "\"%s\" -> \"%s\"", cfg.delete_in_room, resolved_room
174
+ )
175
+
176
+ # Resolve delete_instance with fuzzy matching
177
+ resolved = manager.resolve_delete_instance(
178
+ delete_instance, gpt_client=gpt_client, in_room=delete_in_room
179
+ )
180
+ if resolved is None:
181
+ candidates = (
182
+ manager.get_instance_names_in_room(delete_in_room)
183
+ if delete_in_room
184
+ else manager.get_instance_names()
185
+ )
186
+ logger.error(
187
+ "No object matched \"%s\"。Current %sinstance name: %s。",
188
+ delete_instance,
189
+ f"room '{delete_in_room}' " if delete_in_room else "",
190
+ candidates,
191
+ )
192
+ sys.exit(1)
193
+ delete_instance = resolved
194
+ if resolved != cfg.delete_instance:
195
+ logger.info("\"%s\" -> \"%s\"", cfg.delete_instance, resolved)
196
+
197
+ # Perform deletion
198
+ success = manager.delete_object(
199
+ instance_key=delete_instance,
200
+ in_room=delete_in_room,
201
+ )
202
+ if success:
203
+ logger.info(
204
+ f"✅ Successfully deleted '{delete_instance}' from scene."
205
+ )
206
+ else:
207
+ logger.error(f"❌ Failed to delete '{delete_instance}'.")
208
+ sys.exit(1)
209
+
210
+ # Query instance position
211
+ query_instance = cfg.query_instance
212
+ if query_instance is not None:
213
+ # Resolve instance with fuzzy matching
214
+ resolved_name, center = manager.resolve_and_query_instance(
215
+ query_instance, gpt_client=gpt_client
216
+ )
217
+
218
+ if resolved_name is None or center is None:
219
+ logger.error(
220
+ "No object matched \"%s\"。Current instance names: %s。",
221
+ query_instance,
222
+ manager.get_instance_names(),
223
+ )
224
+ sys.exit(1)
225
+
226
+ if resolved_name != query_instance:
227
+ logger.info("\"%s\" -> \"%s\"", query_instance, resolved_name)
228
+
229
+ logger.info(
230
+ f"📍 Instance '{resolved_name}' center: "
231
+ f"({center[0]:.4f}, {center[1]:.4f}, {center[2]:.4f})"
232
+ )
233
+
234
+ # Add asset instance if specified
235
+ center = None
236
+ if cfg.asset_path is not None:
237
+ center = manager.insert_object(
238
+ asset_path=cfg.asset_path,
239
+ instance_key=cfg.instance_key,
240
+ in_room=in_room,
241
+ on_instance=on_instance,
242
+ beside_instance=beside_instance,
243
+ beside_distance=cfg.beside_distance,
244
+ rotation_rpy=cfg.rotation_rpy,
245
+ n_max_attempt=cfg.max_placement_attempts,
246
+ place_strategy=cfg.place_strategy,
247
+ )
248
+
249
+ if center is not None:
250
+ logger.info(
251
+ f"Successfully placed '{cfg.instance_key}' at "
252
+ f"({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})"
253
+ )
254
+ else:
255
+ logger.error(
256
+ f"❌ Failed to place '{cfg.instance_key}' in the scene."
257
+ )
258
+ sys.exit(1)
259
+
260
+ # Generate floorplan visualization
261
+ if cfg.output_path is not None:
262
+ manager.visualize(output_path=cfg.output_path)
263
+
264
+
265
+ if __name__ == "__main__":
266
+ config = tyro.cli(FloorplanConfig)
267
+ entrypoint(config)
embodied_gen/skills/spatial-computing/core/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Floorplan skill core modules.
2
+
3
+ This package provides core functionality for floorplan visualization
4
+ and object placement in 3D indoor scenes.
5
+ """
6
+
7
+ from .collector import (
8
+ UrdfSemanticInfoCollector,
9
+ )
10
+ from .geometry import (
11
+ get_actionable_surface,
12
+ points_to_polygon,
13
+ )
14
+ from .visualizer import (
15
+ FloorplanVisualizer,
16
+ )
17
+
18
+ __all__ = [
19
+ "FloorplanVisualizer",
20
+ "UrdfSemanticInfoCollector",
21
+ "points_to_polygon",
22
+ "get_actionable_surface",
23
+ ]
embodied_gen/skills/spatial-computing/core/collector.py ADDED
@@ -0,0 +1,1102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import os
22
+ import random
23
+ import re
24
+ import xml.etree.ElementTree as ET
25
+ from concurrent.futures import ProcessPoolExecutor, as_completed
26
+ from shutil import copy2, copytree
27
+ from typing import Literal
28
+
29
+ import numpy as np
30
+ import trimesh
31
+ from scipy.spatial.transform import Rotation as R
32
+ from shapely.affinity import translate
33
+ from shapely.geometry import MultiPolygon, Polygon
34
+ from shapely.ops import unary_union
35
+
36
+ from .geometry import (
37
+ DEFAULT_MESH_SAMPLE_NUM,
38
+ get_actionable_surface,
39
+ points_to_polygon,
40
+ )
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # Type aliases
45
+ Geometry = Polygon | MultiPolygon
46
+
47
+ # Constants
48
+ DEFAULT_ROTATION_RPY = (1.57, 0.0, 0.0)
49
+ DEFAULT_MAX_PLACEMENT_ATTEMPTS = 3000
50
+ DEFAULT_IGNORE_ITEMS = ("ceiling", "light", "exterior")
51
+ DEFAULT_BESIDE_DISTANCE = 0.5
52
+ DEFAULT_Z_OFFSET = 0.001
53
+
54
+
55
+ def _load_mesh_to_poly(
56
+ mesh_path: str,
57
+ xyz: np.ndarray,
58
+ rpy: np.ndarray,
59
+ mesh_sample_num: int,
60
+ use_exact_projection: bool = False,
61
+ ) -> Polygon:
62
+ """Load mesh and convert to 2D footprint polygon (process-safe).
63
+
64
+ Standalone function for use with ProcessPoolExecutor.
65
+
66
+ """
67
+ if not os.path.exists(mesh_path):
68
+ return Polygon()
69
+
70
+ mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
71
+
72
+ matrix = np.eye(4)
73
+ matrix[:3, :3] = R.from_euler("xyz", rpy).as_matrix()
74
+ matrix[:3, 3] = xyz
75
+ mesh.apply_transform(matrix)
76
+
77
+ if use_exact_projection:
78
+ triangle_polys = []
79
+ for triangle in mesh.triangles[:, :, :2]:
80
+ poly = Polygon(triangle)
81
+ if poly.is_valid and poly.area > 1e-8:
82
+ triangle_polys.append(poly)
83
+
84
+ if triangle_polys:
85
+ projected_poly = unary_union(triangle_polys).buffer(0)
86
+ if not projected_poly.is_empty:
87
+ return projected_poly
88
+
89
+ verts = np.asarray(mesh.sample(mesh_sample_num))[:, :2]
90
+ return points_to_polygon(verts)
91
+
92
+
93
+ class UrdfSemanticInfoCollector:
94
+ """Collector for URDF semantic information.
95
+
96
+ Parses URDF files to extract room layouts, object footprints, and
97
+ provides methods for adding new instances and updating URDF/USD files.
98
+
99
+ Attributes:
100
+ mesh_sample_num: Number of points to sample from meshes.
101
+ ignore_items: List of item name patterns to ignore.
102
+ instances: Dictionary of instance name to footprint polygon.
103
+ instance_meta: Dictionary of instance metadata (mesh path, pose).
104
+ rooms: Dictionary of room polygons.
105
+ footprints: Dictionary of object footprints.
106
+ occ_area: Union of all occupied areas.
107
+ floor_union: Union of all floor polygons.
108
+
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
114
+ ignore_items: list[str] | None = None,
115
+ ) -> None:
116
+ """Initialize the collector.
117
+
118
+ Args:
119
+ mesh_sample_num: Number of points to sample from meshes.
120
+ ignore_items: List of item name patterns to ignore during parsing.
121
+
122
+ """
123
+ self.mesh_sample_num = mesh_sample_num
124
+ self.ignore_items = ignore_items or list(DEFAULT_IGNORE_ITEMS)
125
+
126
+ self.instances: dict[str, Polygon] = {}
127
+ self.instance_meta: dict[str, dict] = {}
128
+ self.rooms: dict[str, Geometry] = {}
129
+ self.footprints: dict[str, Geometry] = {}
130
+ self.occ_area: Geometry = Polygon()
131
+ self.floor_union: Geometry = Polygon()
132
+
133
+ self.urdf_path: str = ""
134
+ self._tree: ET.ElementTree | None = None
135
+ self._root: ET.Element | None = None
136
+
137
+ def _get_transform(
138
+ self,
139
+ joint_elem: ET.Element,
140
+ ) -> tuple[np.ndarray, np.ndarray]:
141
+ """Extract transform (xyz, rpy) from a joint element.
142
+
143
+ Args:
144
+ joint_elem: XML Element representing a URDF joint.
145
+
146
+ Returns:
147
+ Tuple of (xyz, rpy) arrays.
148
+
149
+ """
150
+ origin = joint_elem.find("origin")
151
+ if origin is not None:
152
+ xyz = np.fromstring(origin.attrib.get("xyz", "0 0 0"), sep=" ")
153
+ rpy = np.fromstring(origin.attrib.get("rpy", "0 0 0"), sep=" ")
154
+ else:
155
+ xyz, rpy = np.zeros(3), np.zeros(3)
156
+ return xyz, rpy
157
+
158
+ def collect(self, urdf_path: str) -> None:
159
+ """Parse URDF file and collect semantic information.
160
+
161
+ Args:
162
+ urdf_path: Path to the URDF file.
163
+
164
+ """
165
+ logger.info(f"Collecting URDF semantic info from {urdf_path}")
166
+ self.urdf_path = urdf_path
167
+ urdf_dir = os.path.dirname(urdf_path)
168
+
169
+ self._tree = ET.parse(urdf_path)
170
+ self._root = self._tree.getroot()
171
+
172
+ link_transforms = self._build_link_transforms()
173
+ self._process_links(urdf_dir, link_transforms)
174
+ self._update_internal_state()
175
+
176
+ def _build_link_transforms(
177
+ self,
178
+ ) -> dict[str, tuple[np.ndarray, np.ndarray]]:
179
+ """Build mapping from link names to their transforms.
180
+
181
+ Returns:
182
+ Dictionary mapping link names to (xyz, rpy) tuples.
183
+
184
+ """
185
+ link_transforms: dict[str, tuple[np.ndarray, np.ndarray]] = {}
186
+
187
+ for joint in self._tree.findall("joint"):
188
+ child = joint.find("child")
189
+ if child is not None:
190
+ link_name = child.attrib["link"]
191
+ link_transforms[link_name] = self._get_transform(joint)
192
+
193
+ return link_transforms
194
+
195
+ def _process_links(
196
+ self,
197
+ urdf_dir: str,
198
+ link_transforms: dict[str, tuple[np.ndarray, np.ndarray]],
199
+ ) -> None:
200
+ """Process all links in the URDF tree (parallel).
201
+
202
+ Uses ProcessPoolExecutor to bypass GIL for CPU-bound mesh
203
+ loading and sampling.
204
+
205
+ Args:
206
+ urdf_dir: Directory containing the URDF file.
207
+ link_transforms: Dictionary of link transforms.
208
+
209
+ """
210
+ self.instances = {}
211
+ self.instance_meta = {}
212
+ wall_polys: list[Polygon] = []
213
+
214
+ # Collect tasks for parallel processing
215
+ tasks: list[dict] = []
216
+ for link in self._tree.findall("link"):
217
+ name = link.attrib.get("name", "").lower()
218
+ if any(ign in name for ign in self.ignore_items):
219
+ continue
220
+
221
+ visual = link.find("visual")
222
+ if visual is None:
223
+ continue
224
+
225
+ mesh_node = visual.find("geometry/mesh")
226
+ if mesh_node is None:
227
+ continue
228
+
229
+ mesh_path = os.path.join(urdf_dir, mesh_node.attrib["filename"])
230
+ default_transform = (np.zeros(3), np.zeros(3))
231
+ xyz, rpy = link_transforms.get(
232
+ link.attrib["name"], default_transform
233
+ )
234
+ tasks.append(
235
+ {
236
+ "link_name": link.attrib["name"],
237
+ "link_name_lower": name,
238
+ "mesh_path": mesh_path,
239
+ "xyz": xyz,
240
+ "rpy": rpy,
241
+ }
242
+ )
243
+
244
+ logger.info(
245
+ "Processing %d URDF links to extract geometry "
246
+ "(parallel, sample_num=%d)...",
247
+ len(tasks),
248
+ self.mesh_sample_num,
249
+ )
250
+
251
+ # ProcessPoolExecutor bypasses GIL for CPU-bound trimesh ops.
252
+ # Cap workers to balance parallelism vs memory overhead.
253
+ n_workers = min(len(tasks), os.cpu_count() or 4, 8)
254
+ futures_map: dict = {}
255
+ with ProcessPoolExecutor(max_workers=n_workers) as executor:
256
+ for task in tasks:
257
+ future = executor.submit(
258
+ _load_mesh_to_poly,
259
+ task["mesh_path"],
260
+ task["xyz"],
261
+ task["rpy"],
262
+ self.mesh_sample_num,
263
+ "_floor" in task["link_name_lower"],
264
+ )
265
+ futures_map[future] = task
266
+
267
+ for future in as_completed(futures_map):
268
+ task = futures_map[future]
269
+ try:
270
+ poly = future.result()
271
+ except Exception:
272
+ logger.warning(
273
+ "Failed to process link '%s', skipping.",
274
+ task["link_name"],
275
+ exc_info=True,
276
+ )
277
+ continue
278
+
279
+ if poly.is_empty:
280
+ continue
281
+
282
+ if "wall" in task["link_name_lower"]:
283
+ wall_polys.append(poly)
284
+ else:
285
+ key = self._process_safe_key_robust(task["link_name"])
286
+ self.instances[key] = poly
287
+ self.instance_meta[key] = {
288
+ "mesh_path": task["mesh_path"],
289
+ "xyz": task["xyz"],
290
+ "rpy": task["rpy"],
291
+ "original_link_name": task["link_name"],
292
+ }
293
+
294
+ self.instances["walls"] = unary_union(wall_polys)
295
+
296
+ def _update_internal_state(self) -> None:
297
+ """Update derived state (rooms, footprints, occupied area)."""
298
+ self.rooms = {
299
+ k: v
300
+ for k, v in self.instances.items()
301
+ if "_floor" in k.lower() and not v.is_empty
302
+ }
303
+
304
+ self.footprints = {
305
+ k: v
306
+ for k, v in self.instances.items()
307
+ if k != "walls"
308
+ and "_floor" not in k.lower()
309
+ and "rug" not in k.lower()
310
+ and not v.is_empty
311
+ }
312
+ self.occ_area = unary_union(list(self.footprints.values()))
313
+ self.floor_union = unary_union(list(self.rooms.values()))
314
+
315
+ def _process_safe_key_robust(self, name: str) -> str:
316
+ """Convert a link name to a safe, normalized key.
317
+
318
+ Args:
319
+ name: Original link name.
320
+
321
+ Returns:
322
+ Normalized key string.
323
+
324
+ """
325
+ if name.endswith("_floor"):
326
+ parts = name.split("_")
327
+ return "_".join(parts[:-2] + ["floor"])
328
+
329
+ if "Factory" in name:
330
+ # Handle infinigen naming convention
331
+ prefix = name.split("Factory")[0]
332
+ suffix = f"_{name.split('_')[-1]}"
333
+ else:
334
+ prefix, suffix = name, ""
335
+
336
+ res = prefix.replace(" ", "_")
337
+ res = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", res)
338
+ res = res.lower()
339
+ res = re.sub(r"_+", "_", res).strip("_ ")
340
+
341
+ return f"{res}{suffix}"
342
+
343
+ def add_instance(
344
+ self,
345
+ asset_path: str,
346
+ instance_key: str,
347
+ in_room: str | None = None,
348
+ on_instance: str | None = None,
349
+ beside_instance: str | None = None,
350
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
351
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
352
+ n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
353
+ place_strategy: Literal["top", "random"] = "random",
354
+ ) -> list[float] | None:
355
+ """Add a new instance to the scene with automatic placement.
356
+
357
+ Args:
358
+ asset_path: Path to the asset mesh file.
359
+ instance_key: Unique key for the new instance.
360
+ in_room: Optional room name to constrain placement.
361
+ on_instance: Optional instance name to place on top of.
362
+ beside_instance: Optional instance name to place beside (on floor).
363
+ beside_distance: Initial buffer distance from the target instance
364
+ for beside placement (meters). Will auto-expand if needed.
365
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
366
+ n_max_attempt: Maximum placement attempts.
367
+ place_strategy: Either "top" or "random".
368
+
369
+ Returns:
370
+ List [x, y, z] of the placed instance center, or None if failed.
371
+
372
+ Raises:
373
+ ValueError: If instance_key already exists or room/instance not found.
374
+
375
+ """
376
+ if instance_key in self.instances:
377
+ raise ValueError(f"Instance key '{instance_key}' already exists.")
378
+
379
+ room_poly = self._resolve_room_polygon(in_room)
380
+
381
+ # Load mesh and compute base polygon (needed for all placement modes)
382
+ mesh = trimesh.load(asset_path, force="mesh")
383
+ mesh.apply_transform(
384
+ trimesh.transformations.euler_matrix(*rotation_rpy, "sxyz")
385
+ )
386
+
387
+ verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
388
+ base_poly = points_to_polygon(verts)
389
+ centroid = base_poly.centroid
390
+ base_poly = translate(base_poly, xoff=-centroid.x, yoff=-centroid.y)
391
+
392
+ if beside_instance is not None:
393
+ placement = self._try_place_beside(
394
+ base_poly=base_poly,
395
+ beside_instance=beside_instance,
396
+ room_poly=room_poly,
397
+ beside_distance=beside_distance,
398
+ n_max_attempt=n_max_attempt,
399
+ multi_match_strategy="first", # Default strategy
400
+ )
401
+ base_z = 0.0
402
+ else:
403
+ target_area, obstacles, base_z = self._resolve_placement_target(
404
+ on_instance, room_poly, place_strategy
405
+ )
406
+
407
+ if target_area.is_empty:
408
+ logger.error("Target area for placement is empty.")
409
+ return None
410
+
411
+ placement = self._try_place_polygon(
412
+ base_poly, target_area, obstacles, n_max_attempt
413
+ )
414
+
415
+ if placement is None:
416
+ logger.error(
417
+ f"Failed to place '{instance_key}' after all attempts."
418
+ )
419
+ return None
420
+
421
+ x, y, candidate = placement
422
+ self.instances[instance_key] = candidate
423
+ final_z = base_z - mesh.bounds[0][2] + DEFAULT_Z_OFFSET
424
+ self._update_internal_state()
425
+
426
+ return [round(v, 4) for v in (x, y, final_z)]
427
+
428
+ def _resolve_room_polygon(self, in_room: str | None) -> Geometry | None:
429
+ """Resolve room name to polygon.
430
+
431
+ Args:
432
+ in_room: Room name query string.
433
+
434
+ Returns:
435
+ Room polygon or None if not specified.
436
+
437
+ Raises:
438
+ ValueError: If room not found.
439
+
440
+ """
441
+ if in_room is None:
442
+ return None
443
+
444
+ query_room = in_room.lower()
445
+ room_matches = [
446
+ k for k in self.rooms.keys() if query_room in k.lower()
447
+ ]
448
+
449
+ if not room_matches:
450
+ raise ValueError(f"Room '{in_room}' not found.")
451
+
452
+ return unary_union([self.rooms[k] for k in room_matches])
453
+
454
+ def _try_place_beside(
455
+ self,
456
+ base_poly: Polygon,
457
+ beside_instance: str,
458
+ room_poly: Geometry | None,
459
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
460
+ n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
461
+ max_expand_steps: int = 5,
462
+ expand_factor: float = 1.5,
463
+ multi_match_strategy: Literal["first", "random", "largest"] = "first",
464
+ ) -> tuple[float, float, Polygon] | None:
465
+ """Place object beside target with progressive distance expansion.
466
+
467
+ More robust than fixed-distance placement:
468
+ 1. Ensures minimum distance accommodates the new object's size.
469
+ 2. Pre-subtracts obstacles from the ring → sampling only in free area.
470
+ 3. Progressively expands distance on failure (up to max_expand_steps).
471
+ 4. Skips steps where the free area is too small for the object.
472
+
473
+ Args:
474
+ base_poly: Object footprint polygon centered at origin.
475
+ beside_instance: Target instance name to place beside.
476
+ room_poly: Optional room constraint polygon.
477
+ beside_distance: Initial buffer distance (meters).
478
+ n_max_attempt: Total max placement attempts across all steps.
479
+ max_expand_steps: Max number of distance expansion rounds.
480
+ expand_factor: Distance multiplier per expansion round.
481
+
482
+ Returns:
483
+ Tuple (x, y, placed_polygon) on success, or None if all failed.
484
+
485
+ Raises:
486
+ ValueError: If beside_instance not found in scene.
487
+
488
+ """
489
+ # --- Resolve target instance ---
490
+ query_obj = beside_instance.lower()
491
+ possible_matches = [
492
+ k
493
+ for k in self.instances.keys()
494
+ if query_obj in k.lower() and k != "walls"
495
+ ]
496
+
497
+ if room_poly is not None:
498
+ # Check that the object's representative point falls inside
499
+ # the room (buffered slightly for mesh-sampling tolerance).
500
+ room_buffered = room_poly.buffer(0.1)
501
+ possible_matches = [
502
+ k
503
+ for k in possible_matches
504
+ if room_buffered.contains(
505
+ self.instances[k].representative_point()
506
+ )
507
+ ]
508
+
509
+ if not possible_matches:
510
+ location_msg = " in specified room" if room_poly else ""
511
+ # Log candidate distances for easier debugging
512
+ all_matches = [
513
+ k
514
+ for k in self.instances.keys()
515
+ if query_obj in k.lower() and k != "walls"
516
+ ]
517
+ if all_matches and room_poly is not None:
518
+ dists = {
519
+ k: round(self.instances[k].distance(room_poly), 4)
520
+ for k in all_matches
521
+ }
522
+ logger.error("Candidate distances to room polygon: %s", dists)
523
+ raise ValueError(
524
+ f"No instance matching '{beside_instance}' "
525
+ f"found{location_msg}."
526
+ )
527
+
528
+ if len(possible_matches) > 1:
529
+ # Apply multi-match strategy
530
+ if multi_match_strategy == "random":
531
+ target_key = random.choice(possible_matches)
532
+ elif multi_match_strategy == "largest":
533
+ target_key = max(
534
+ possible_matches, key=lambda k: self.instances[k].area
535
+ )
536
+ else: # "first"
537
+ target_key = possible_matches[0]
538
+ logger.warning(
539
+ f"Multiple matches for '{beside_instance}': "
540
+ f"{possible_matches}. Using '{target_key}' "
541
+ f"(strategy: {multi_match_strategy})."
542
+ )
543
+ else:
544
+ target_key = possible_matches[0]
545
+
546
+ target_footprint = self.instances[target_key]
547
+ floor = room_poly if room_poly is not None else self.floor_union
548
+
549
+ # --- Ensure initial distance accommodates the object's size ---
550
+ obj_bounds = base_poly.bounds # (minx, miny, maxx, maxy)
551
+ obj_half_diag = (
552
+ np.hypot(
553
+ obj_bounds[2] - obj_bounds[0],
554
+ obj_bounds[3] - obj_bounds[1],
555
+ )
556
+ / 2.0
557
+ )
558
+ current_distance = max(beside_distance, obj_half_diag * 1.5)
559
+
560
+ # Budget attempts across expansion steps
561
+ attempts_per_step = max(n_max_attempt // (max_expand_steps + 1), 500)
562
+ empty_obstacle = Polygon() # pre-created; obstacles are pre-subtracted
563
+
564
+ for step in range(max_expand_steps + 1):
565
+ # Build ring: buffer - footprint, intersected with floor
566
+ buffered = target_footprint.buffer(current_distance)
567
+ ring_area = buffered.difference(target_footprint)
568
+ ring_area = ring_area.intersection(floor)
569
+
570
+ if ring_area.is_empty:
571
+ logger.info(
572
+ f"[beside step {step}] Ring empty at "
573
+ f"{current_distance:.2f}m, expanding..."
574
+ )
575
+ current_distance *= expand_factor
576
+ continue
577
+
578
+ # Pre-subtract all obstacles → sample only from actual free area
579
+ free_area = ring_area.difference(self.occ_area)
580
+
581
+ if free_area.is_empty or free_area.area < base_poly.area * 0.5:
582
+ logger.info(
583
+ f"[beside step {step}] Free area too small at "
584
+ f"{current_distance:.2f}m "
585
+ f"(free={free_area.area:.4f}, "
586
+ f"need≈{base_poly.area:.4f}), expanding..."
587
+ )
588
+ current_distance *= expand_factor
589
+ continue
590
+
591
+ # Attempt placement in the free area (obstacles already removed)
592
+ placement = self._try_place_polygon(
593
+ base_poly, free_area, empty_obstacle, attempts_per_step
594
+ )
595
+
596
+ if placement is not None:
597
+ logger.info(
598
+ f"Placed beside '{target_key}' at distance "
599
+ f"{current_distance:.2f}m (step {step})"
600
+ )
601
+ return placement
602
+
603
+ logger.info(
604
+ f"[beside step {step}] Failed at {current_distance:.2f}m "
605
+ f"after {attempts_per_step} attempts, expanding..."
606
+ )
607
+ current_distance *= expand_factor
608
+
609
+ logger.error(
610
+ f"Failed to place beside '{target_key}' after "
611
+ f"{max_expand_steps + 1} expansion steps "
612
+ f"(final distance: {current_distance / expand_factor:.2f}m)."
613
+ )
614
+ return None
615
+
616
+ def _resolve_placement_target(
617
+ self,
618
+ on_instance: str | None,
619
+ room_poly: Geometry | None,
620
+ place_strategy: Literal["top", "random"],
621
+ ) -> tuple[Geometry, Geometry, float]:
622
+ """Resolve the target placement area and obstacles.
623
+
624
+ Args:
625
+ on_instance: Instance name to place on.
626
+ room_poly: Room polygon constraint.
627
+ place_strategy: Placement strategy.
628
+
629
+ Returns:
630
+ Tuple of (target_area, obstacles, base_z_height).
631
+
632
+ Raises:
633
+ ValueError: If on_instance not found.
634
+
635
+ """
636
+ if on_instance is None:
637
+ if room_poly is not None:
638
+ return room_poly, self.occ_area, 0.0
639
+ return self.floor_union, self.occ_area, 0.0
640
+
641
+ query_obj = on_instance.lower()
642
+ possible_matches = [
643
+ k
644
+ for k in self.instances.keys()
645
+ if query_obj in k.lower() and k != "walls"
646
+ ]
647
+
648
+ if room_poly is not None:
649
+ room_buffered = room_poly.buffer(0.1)
650
+ possible_matches = [
651
+ k
652
+ for k in possible_matches
653
+ if room_buffered.contains(
654
+ self.instances[k].representative_point()
655
+ )
656
+ ]
657
+
658
+ if not possible_matches:
659
+ location_msg = f" in room '{on_instance}'" if room_poly else ""
660
+ raise ValueError(
661
+ f"No instance matching '{on_instance}' found{location_msg}."
662
+ )
663
+
664
+ if place_strategy == "random":
665
+ target_parent_key = random.choice(possible_matches)
666
+ else:
667
+ target_parent_key = possible_matches[0]
668
+
669
+ if len(possible_matches) > 1:
670
+ logger.warning(
671
+ f"Multiple matches for '{on_instance}': {possible_matches}. "
672
+ f"Using '{target_parent_key}'."
673
+ )
674
+
675
+ meta = self.instance_meta[target_parent_key]
676
+ parent_mesh = trimesh.load(meta["mesh_path"], force="mesh")
677
+ matrix = np.eye(4)
678
+ matrix[:3, :3] = R.from_euler("xyz", meta["rpy"]).as_matrix()
679
+ matrix[:3, 3] = meta["xyz"]
680
+ parent_mesh.apply_transform(matrix)
681
+
682
+ best_z, surface_poly = get_actionable_surface(
683
+ parent_mesh, place_strategy=place_strategy
684
+ )
685
+ obstacles = self.occ_area.difference(self.instances[target_parent_key])
686
+
687
+ # Re-add footprints of objects inside the parent polygon so they
688
+ # remain obstacles (difference above removes them).
689
+ parent_poly = self.instances[target_parent_key]
690
+ children_on_parent = [
691
+ poly
692
+ for key, poly in self.footprints.items()
693
+ if key != target_parent_key and parent_poly.contains(poly)
694
+ ]
695
+ if children_on_parent:
696
+ obstacles = unary_union([obstacles] + children_on_parent)
697
+
698
+ logger.info(f"Placing on '{target_parent_key}' (Z={best_z:.3f})")
699
+
700
+ return surface_poly, obstacles, best_z
701
+
702
+ def _try_place_polygon(
703
+ self,
704
+ base_poly: Polygon,
705
+ target_area: Geometry,
706
+ obstacles: Geometry,
707
+ n_max_attempt: int,
708
+ ) -> tuple[float, float, Polygon] | None:
709
+ """Try to place polygon in target area avoiding obstacles.
710
+
711
+ Pre-computes the free area (target minus obstacles) so that the
712
+ containment check alone is sufficient, avoiding redundant
713
+ intersection tests against obstacles on every iteration.
714
+
715
+ Args:
716
+ base_poly: Polygon to place (centered at origin).
717
+ target_area: Area where placement is allowed.
718
+ obstacles: Areas to avoid.
719
+ n_max_attempt: Maximum attempts.
720
+
721
+ Returns:
722
+ Tuple of (x, y, placed_polygon) or None if failed.
723
+
724
+ """
725
+ if not obstacles.is_empty:
726
+ free_area = target_area.difference(obstacles)
727
+ else:
728
+ free_area = target_area
729
+
730
+ if free_area.is_empty:
731
+ return None
732
+
733
+ minx, miny, maxx, maxy = free_area.bounds
734
+
735
+ for _ in range(n_max_attempt):
736
+ x = np.random.uniform(minx, maxx)
737
+ y = np.random.uniform(miny, maxy)
738
+ candidate = translate(base_poly, xoff=x, yoff=y)
739
+
740
+ if free_area.contains(candidate):
741
+ return x, y, candidate
742
+
743
+ return None
744
+
745
+ def update_urdf_info(
746
+ self,
747
+ output_path: str,
748
+ instance_key: str,
749
+ visual_mesh_path: str,
750
+ collision_mesh_path: str | None = None,
751
+ trans_xyz: tuple[float, float, float] = (0, 0, 0),
752
+ rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
753
+ joint_type: str = "fixed",
754
+ ) -> None:
755
+ """Add a new link to the URDF tree and save.
756
+
757
+ Args:
758
+ output_path: Path to save the updated URDF.
759
+ instance_key: Name for the new link.
760
+ visual_mesh_path: Path to the visual mesh file.
761
+ collision_mesh_path: Optional path to collision mesh.
762
+ trans_xyz: Translation (x, y, z).
763
+ rot_rpy: Rotation (roll, pitch, yaw).
764
+ joint_type: Type of joint (e.g., "fixed").
765
+
766
+ """
767
+ if self._root is None:
768
+ return
769
+
770
+ logger.info(f"Updating URDF for instance '{instance_key}'.")
771
+ urdf_dir = os.path.dirname(self.urdf_path)
772
+
773
+ # Copy mesh files
774
+ copytree(
775
+ os.path.dirname(visual_mesh_path),
776
+ f"{urdf_dir}/{instance_key}",
777
+ dirs_exist_ok=True,
778
+ )
779
+ visual_rel_path = (
780
+ f"{instance_key}/{os.path.basename(visual_mesh_path)}"
781
+ )
782
+
783
+ collision_rel_path = None
784
+ if collision_mesh_path is not None:
785
+ copytree(
786
+ os.path.dirname(collision_mesh_path),
787
+ f"{urdf_dir}/{instance_key}",
788
+ dirs_exist_ok=True,
789
+ )
790
+ collision_rel_path = (
791
+ f"{instance_key}/{os.path.basename(collision_mesh_path)}"
792
+ )
793
+
794
+ # Create link element
795
+ link = ET.SubElement(self._root, "link", attrib={"name": instance_key})
796
+
797
+ visual = ET.SubElement(link, "visual")
798
+ v_geo = ET.SubElement(visual, "geometry")
799
+ ET.SubElement(v_geo, "mesh", attrib={"filename": visual_rel_path})
800
+
801
+ if collision_rel_path is not None:
802
+ collision = ET.SubElement(link, "collision")
803
+ c_geo = ET.SubElement(collision, "geometry")
804
+ ET.SubElement(
805
+ c_geo, "mesh", attrib={"filename": collision_rel_path}
806
+ )
807
+
808
+ # Create joint element
809
+ joint_name = f"joint_{instance_key}"
810
+ joint = ET.SubElement(
811
+ self._root,
812
+ "joint",
813
+ attrib={"name": joint_name, "type": joint_type},
814
+ )
815
+
816
+ ET.SubElement(joint, "parent", attrib={"link": "base"})
817
+ ET.SubElement(joint, "child", attrib={"link": instance_key})
818
+
819
+ xyz_str = f"{trans_xyz[0]:.4f} {trans_xyz[1]:.4f} {trans_xyz[2]:.4f}"
820
+ rpy_str = f"{rot_rpy[0]:.4f} {rot_rpy[1]:.4f} {rot_rpy[2]:.4f}"
821
+ ET.SubElement(joint, "origin", attrib={"xyz": xyz_str, "rpy": rpy_str})
822
+
823
+ self.save_urdf(output_path)
824
+
825
+ def update_usd_info(
826
+ self,
827
+ usd_path: str,
828
+ output_path: str,
829
+ instance_key: str,
830
+ visual_mesh_path: str,
831
+ trans_xyz: list[float],
832
+ rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
833
+ ) -> None:
834
+ """Add a mesh instance to an existing USD file.
835
+
836
+ Uses Blender (bpy) to convert OBJ to USD format.
837
+
838
+ Args:
839
+ usd_path: Path to the source USD file.
840
+ output_path: Path to save the modified USD.
841
+ instance_key: Prim path name for the new instance.
842
+ visual_mesh_path: Path to the visual mesh (OBJ format).
843
+ trans_xyz: Translation [x, y, z].
844
+ rot_rpy: Rotation (roll, pitch, yaw).
845
+
846
+ Raises:
847
+ ImportError: If pxr (USD) library or bpy is not available.
848
+
849
+ """
850
+ import bpy
851
+ from pxr import Gf, Usd, UsdGeom
852
+
853
+ prim_path = f"/{instance_key}"
854
+ out_dir = os.path.dirname(output_path)
855
+ target_dir = os.path.join(out_dir, instance_key)
856
+ os.makedirs(target_dir, exist_ok=True)
857
+
858
+ mesh_filename = os.path.basename(visual_mesh_path)
859
+ usdc_filename = os.path.splitext(mesh_filename)[0] + ".usdc"
860
+ target_usdc_path = os.path.join(target_dir, usdc_filename)
861
+
862
+ logger.info(
863
+ f"Converting with Blender (bpy): "
864
+ f"{visual_mesh_path} -> {target_usdc_path}"
865
+ )
866
+ bpy.ops.wm.read_factory_settings(use_empty=True)
867
+ bpy.ops.wm.obj_import(
868
+ filepath=visual_mesh_path,
869
+ forward_axis="Y",
870
+ up_axis="Z",
871
+ )
872
+ bpy.ops.wm.usd_export(
873
+ filepath=target_usdc_path,
874
+ selected_objects_only=False,
875
+ )
876
+
877
+ # Copy texture files
878
+ src_dir = os.path.dirname(visual_mesh_path)
879
+ for f in os.listdir(src_dir):
880
+ if f.lower().endswith((".png", ".jpg", ".jpeg", ".mtl")):
881
+ copy2(os.path.join(src_dir, f), target_dir)
882
+
883
+ final_rel_path = f"./{instance_key}/{usdc_filename}"
884
+
885
+ # Update USD stage
886
+ stage = Usd.Stage.Open(usd_path)
887
+ mesh_prim = UsdGeom.Xform.Define(stage, prim_path)
888
+
889
+ ref_prim = UsdGeom.Mesh.Define(stage, f"{prim_path}/Mesh")
890
+ ref_prim.GetPrim().GetReferences().AddReference(final_rel_path)
891
+
892
+ # Build transform matrix
893
+ translation_mat = Gf.Matrix4d().SetTranslate(
894
+ Gf.Vec3d(trans_xyz[0], trans_xyz[1], trans_xyz[2])
895
+ )
896
+ rx = Gf.Matrix4d().SetRotate(
897
+ Gf.Rotation(Gf.Vec3d(1, 0, 0), np.degrees(rot_rpy[0]))
898
+ )
899
+ ry = Gf.Matrix4d().SetRotate(
900
+ Gf.Rotation(Gf.Vec3d(0, 1, 0), np.degrees(rot_rpy[1]))
901
+ )
902
+ rz = Gf.Matrix4d().SetRotate(
903
+ Gf.Rotation(Gf.Vec3d(0, 0, 1), np.degrees(rot_rpy[2]))
904
+ )
905
+ rotation_mat = rx * ry * rz
906
+ transform = rotation_mat * translation_mat
907
+ mesh_prim.AddTransformOp().Set(transform)
908
+
909
+ stage.GetRootLayer().Export(output_path)
910
+ logger.info(f"✅ Saved updated USD to {output_path}")
911
+
912
+ def remove_usd_instance(
913
+ self,
914
+ usd_path: str,
915
+ output_path: str,
916
+ instance_key: str,
917
+ ) -> None:
918
+ """Remove an instance from a USD file.
919
+
920
+ Args:
921
+ usd_path: Path to the source USD file.
922
+ output_path: Path to save the modified USD.
923
+ instance_key: Prim path name of the instance to remove.
924
+
925
+ Raises:
926
+ ImportError: If pxr (USD) library is not available.
927
+
928
+ """
929
+ from pxr import Usd
930
+
931
+ prim_path = f"/{instance_key}"
932
+
933
+ # Open USD stage
934
+ stage = Usd.Stage.Open(usd_path)
935
+
936
+ # Find and remove the prim
937
+ prim = stage.GetPrimAtPath(prim_path)
938
+ if prim.IsValid():
939
+ stage.RemovePrim(prim_path)
940
+ logger.info(f"Removed prim '{prim_path}' from USD.")
941
+ else:
942
+ logger.warning(f"Prim '{prim_path}' not found in USD stage.")
943
+
944
+ # Export modified stage
945
+ stage.GetRootLayer().Export(output_path)
946
+ logger.info(f"✅ Saved updated USD to {output_path}")
947
+
948
+ def remove_instance(
949
+ self,
950
+ instance_key: str,
951
+ in_room: str | None = None,
952
+ ) -> bool:
953
+ """Remove an instance from the scene.
954
+
955
+ Args:
956
+ instance_key: Exact instance name or semantic description to remove.
957
+ in_room: Optional room constraint - only remove if instance is in this room.
958
+
959
+ Returns:
960
+ True if instance was removed, False if not found.
961
+
962
+ Raises:
963
+ ValueError: If instance_key is a protected item (walls, floors).
964
+
965
+ """
966
+ # Protect critical items
967
+ protected = ["walls"] + [
968
+ k for k in self.instances.keys() if "floor" in k.lower()
969
+ ]
970
+ if instance_key in protected:
971
+ raise ValueError(
972
+ f"Cannot remove protected instance '{instance_key}'. "
973
+ f"Protected items: {protected}"
974
+ )
975
+
976
+ # Check if instance exists
977
+ if instance_key not in self.instances:
978
+ logger.warning(f"Instance '{instance_key}' not found in scene.")
979
+ return False
980
+
981
+ # Check room constraint if specified
982
+ if in_room is not None:
983
+ room_poly = self._resolve_room_polygon(in_room)
984
+ if room_poly is not None:
985
+ room_buffered = room_poly.buffer(0.1)
986
+ instance_point = self.instances[
987
+ instance_key
988
+ ].representative_point()
989
+ if not room_buffered.contains(instance_point):
990
+ logger.warning(
991
+ f"Instance '{instance_key}' is not in room '{in_room}'."
992
+ )
993
+ return False
994
+
995
+ # Remove from URDF XML tree
996
+ if self._root is not None:
997
+ self._remove_link_and_joint(instance_key)
998
+
999
+ # Remove from instances dict
1000
+ del self.instances[instance_key]
1001
+
1002
+ # Remove from metadata
1003
+ if instance_key in self.instance_meta:
1004
+ del self.instance_meta[instance_key]
1005
+
1006
+ # Update internal state
1007
+ self._update_internal_state()
1008
+
1009
+ logger.info(f"✅ Removed instance '{instance_key}' from scene.")
1010
+ return True
1011
+
1012
+ def _remove_link_and_joint(self, instance_key: str) -> None:
1013
+ """Remove link and joint elements from URDF XML tree.
1014
+
1015
+ Args:
1016
+ instance_key: Key of the instance to remove (simplified key).
1017
+
1018
+ """
1019
+ if self._root is None:
1020
+ return
1021
+
1022
+ # Get original link name from metadata
1023
+ meta = self.instance_meta.get(instance_key, {})
1024
+ original_link_name = meta.get("original_link_name", instance_key)
1025
+
1026
+ # Find and remove the link element
1027
+ link_removed = False
1028
+ for link in self._root.findall("link"):
1029
+ if link.attrib.get("name") == original_link_name:
1030
+ self._root.remove(link)
1031
+ logger.info(f"Removed link '{original_link_name}' from URDF.")
1032
+ link_removed = True
1033
+ break
1034
+
1035
+ if not link_removed:
1036
+ logger.warning(
1037
+ f"Link '{original_link_name}' not found in URDF tree."
1038
+ )
1039
+
1040
+ # Find and remove the joint element
1041
+ joint_removed = False
1042
+ for joint in self._root.findall("joint"):
1043
+ child = joint.find("child")
1044
+ if (
1045
+ child is not None
1046
+ and child.attrib.get("link") == original_link_name
1047
+ ):
1048
+ self._root.remove(joint)
1049
+ logger.info(
1050
+ f"Removed joint for '{original_link_name}' from URDF."
1051
+ )
1052
+ joint_removed = True
1053
+ break
1054
+
1055
+ if not joint_removed:
1056
+ logger.warning(
1057
+ f"Joint for '{original_link_name}' not found in URDF tree."
1058
+ )
1059
+
1060
+ def get_instance_center(self, instance_key: str) -> list[float] | None:
1061
+ """Get the center position of an instance.
1062
+
1063
+ Args:
1064
+ instance_key: Name of the instance to query.
1065
+
1066
+ Returns:
1067
+ List [x, y, z] of the instance center, or None if not found.
1068
+
1069
+ """
1070
+ if instance_key not in self.instances:
1071
+ logger.warning(f"Instance '{instance_key}' not found in scene.")
1072
+ return None
1073
+
1074
+ # Get instance metadata
1075
+ meta = self.instance_meta.get(instance_key, {})
1076
+ xyz = meta.get("xyz", np.zeros(3))
1077
+
1078
+ # Get polygon centroid for 2D position
1079
+ poly = self.instances[instance_key]
1080
+ centroid = poly.centroid
1081
+
1082
+ # Return [x, y, z] where x,y are from polygon centroid, z from metadata
1083
+ center = [round(centroid.x, 4), round(centroid.y, 4), round(xyz[2], 4)]
1084
+
1085
+ logger.info(f"Instance '{instance_key}' center: {center}")
1086
+ return center
1087
+
1088
+ def save_urdf(self, output_path: str) -> None:
1089
+ """Save the current URDF tree to file.
1090
+
1091
+ Args:
1092
+ output_path: Path to save the URDF file.
1093
+
1094
+ """
1095
+ if self._tree is None:
1096
+ return
1097
+
1098
+ if hasattr(ET, "indent"):
1099
+ ET.indent(self._tree, space=" ", level=0)
1100
+
1101
+ self._tree.write(output_path, encoding="utf-8", xml_declaration=True)
1102
+ logger.info(f"✅ Saved updated URDF to {output_path}")
embodied_gen/skills/spatial-computing/core/geometry.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import random
22
+ from typing import Literal
23
+
24
+ import numpy as np
25
+ import trimesh
26
+ from shapely.geometry import MultiPoint, MultiPolygon, Polygon
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Type aliases
31
+ Geometry = Polygon | MultiPolygon
32
+
33
+ # Constants
34
+ DEFAULT_MESH_SAMPLE_NUM = 10000
35
+ DEFAULT_MAX_PLACEMENT_ATTEMPTS = 2000
36
+
37
+
38
+ def points_to_polygon(
39
+ points: np.ndarray,
40
+ smooth_thresh: float = 0.2,
41
+ scanline_step: float = 0.01,
42
+ ) -> Polygon:
43
+ """Convert point clouds into polygon contours using sweep line algorithm.
44
+
45
+ Args:
46
+ points: Array of 2D points with shape (N, 2).
47
+ smooth_thresh: Buffer threshold for smoothing the polygon.
48
+ scanline_step: Step size for the scanline sweep.
49
+
50
+ Returns:
51
+ A Shapely Polygon representing the contour of the point cloud.
52
+
53
+ """
54
+ if len(points) == 0:
55
+ return Polygon()
56
+
57
+ ys = points[:, 1]
58
+ y_min, y_max = ys.min(), ys.max()
59
+ y_values = np.arange(y_min, y_max + scanline_step, scanline_step)
60
+
61
+ upper: list[list[float]] = []
62
+ lower: list[list[float]] = []
63
+
64
+ for y in y_values:
65
+ pts_in_strip = points[(ys >= y) & (ys < y + scanline_step)]
66
+ if len(pts_in_strip) == 0:
67
+ continue
68
+
69
+ xs = pts_in_strip[:, 0]
70
+ upper.append([xs.max(), y])
71
+ lower.append([xs.min(), y])
72
+
73
+ contour = upper + lower[::-1]
74
+ if len(contour) < 3:
75
+ return Polygon()
76
+
77
+ poly = Polygon(contour)
78
+ return poly.buffer(smooth_thresh).buffer(-smooth_thresh)
79
+
80
+
81
+ def get_actionable_surface(
82
+ mesh: trimesh.Trimesh,
83
+ tol_angle: int = 10,
84
+ tol_z: float = 0.02,
85
+ area_tolerance: float = 0.15,
86
+ place_strategy: Literal["top", "random"] = "random",
87
+ ) -> tuple[float, Geometry]:
88
+ """Extract the actionable (placeable) surface from a mesh.
89
+
90
+ Finds upward-facing surfaces and returns the best one based on the
91
+ placement strategy.
92
+
93
+ Args:
94
+ mesh: The input trimesh object.
95
+ tol_angle: Angle tolerance in degrees for detecting up-facing normals.
96
+ tol_z: Z-coordinate tolerance for clustering faces.
97
+ area_tolerance: Tolerance for selecting candidate surfaces by area.
98
+ place_strategy: Either "top" (highest surface) or "random".
99
+
100
+ Returns:
101
+ A tuple of (z_height, surface_polygon) representing the selected
102
+ actionable surface.
103
+
104
+ """
105
+ up_vec = np.array([0, 0, 1])
106
+ dots = np.dot(mesh.face_normals, up_vec)
107
+ valid_mask = dots > np.cos(np.deg2rad(tol_angle))
108
+
109
+ if not np.any(valid_mask):
110
+ logger.warning(
111
+ "No up-facing surfaces found. Falling back to bounding box top."
112
+ )
113
+ verts = mesh.vertices[:, :2]
114
+ return mesh.bounds[1][2], MultiPoint(verts).convex_hull
115
+
116
+ valid_faces_indices = np.where(valid_mask)[0]
117
+ face_z = mesh.triangles_center[valid_mask][:, 2]
118
+ face_areas = mesh.area_faces[valid_mask]
119
+
120
+ z_clusters = _cluster_faces_by_z(
121
+ face_z, face_areas, valid_faces_indices, tol_z
122
+ )
123
+
124
+ if not z_clusters:
125
+ return mesh.bounds[1][2], MultiPoint(mesh.vertices[:, :2]).convex_hull
126
+
127
+ selected_z, selected_data = _select_surface_cluster(
128
+ z_clusters, area_tolerance, place_strategy
129
+ )
130
+
131
+ # For "top" strategy, use the highest z among all clusters for
132
+ # base height, while keeping the largest-area polygon for XY placement.
133
+ if place_strategy == "top":
134
+ highest_z = max(z_clusters.keys())
135
+ if highest_z > selected_z:
136
+ logger.info(
137
+ f"Overriding base Z from {selected_z:.3f} to "
138
+ f"highest surface {highest_z:.3f}"
139
+ )
140
+ selected_z = highest_z
141
+
142
+ cluster_faces = mesh.faces[selected_data["indices"]]
143
+ temp_mesh = trimesh.Trimesh(vertices=mesh.vertices, faces=cluster_faces)
144
+ samples, _ = trimesh.sample.sample_surface(temp_mesh, 10000)
145
+
146
+ if len(samples) < 3:
147
+ logger.warning(
148
+ f"Failed to sample enough points on layer Z={selected_z}. "
149
+ "Returning empty polygon."
150
+ )
151
+ return selected_z, Polygon()
152
+
153
+ surface_poly = MultiPoint(samples[:, :2]).convex_hull
154
+ return selected_z, surface_poly
155
+
156
+
157
+ def _cluster_faces_by_z(
158
+ face_z: np.ndarray,
159
+ face_areas: np.ndarray,
160
+ face_indices: np.ndarray,
161
+ tol_z: float,
162
+ ) -> dict[float, dict]:
163
+ """Cluster mesh faces by their Z coordinate.
164
+
165
+ Args:
166
+ face_z: Z coordinates of face centers.
167
+ face_areas: Areas of each face.
168
+ face_indices: Original indices of the faces.
169
+ tol_z: Tolerance for Z clustering.
170
+
171
+ Returns:
172
+ Dictionary mapping Z values to cluster data (area and indices).
173
+
174
+ """
175
+ z_clusters: dict[float, dict] = {}
176
+
177
+ for i, z in enumerate(face_z):
178
+ key = round(z / tol_z) * tol_z
179
+
180
+ if key not in z_clusters:
181
+ z_clusters[key] = {"area": 0.0, "indices": []}
182
+
183
+ z_clusters[key]["area"] += face_areas[i]
184
+ z_clusters[key]["indices"].append(face_indices[i])
185
+
186
+ return z_clusters
187
+
188
+
189
+ def _select_surface_cluster(
190
+ z_clusters: dict[float, dict],
191
+ area_tolerance: float,
192
+ place_strategy: Literal["top", "random"],
193
+ ) -> tuple[float, dict]:
194
+ """Select the best surface cluster based on strategy.
195
+
196
+ Args:
197
+ z_clusters: Dictionary of Z clusters with area and indices.
198
+ area_tolerance: Tolerance for candidate selection by area.
199
+ place_strategy: Either "top" or "random".
200
+
201
+ Returns:
202
+ Tuple of (selected_z, cluster_data).
203
+
204
+ """
205
+ max_area = max(c["area"] for c in z_clusters.values())
206
+ candidates = [
207
+ (z, data)
208
+ for z, data in z_clusters.items()
209
+ if data["area"] >= max_area * (1.0 - area_tolerance)
210
+ ]
211
+
212
+ if not candidates:
213
+ best_item = max(z_clusters.items(), key=lambda x: x[1]["area"])
214
+ candidates = [best_item]
215
+
216
+ if place_strategy == "random":
217
+ selected_z, selected_data = random.choice(candidates)
218
+ logger.info(
219
+ f"Strategy 'random': Selected Z={selected_z:.3f} "
220
+ f"(Area={selected_data['area']:.3f}) "
221
+ f"from {len(candidates)} candidates."
222
+ )
223
+ else:
224
+ candidates.sort(key=lambda x: x[0], reverse=True)
225
+ selected_z, selected_data = candidates[0]
226
+ logger.info(
227
+ f"Strategy 'top': Selected highest Z={selected_z:.3f} "
228
+ f"(Area={selected_data['area']:.3f})"
229
+ )
230
+
231
+ return selected_z, selected_data
embodied_gen/skills/spatial-computing/core/visualizer.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+
22
+ import matplotlib.pyplot as plt
23
+ from matplotlib.axes import Axes
24
+ from shapely.geometry import MultiPolygon, Polygon
25
+ from shapely.ops import unary_union
26
+
27
+ # Type aliases
28
+ Geometry = Polygon | MultiPolygon
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class FloorplanVisualizer:
34
+ """Static utility class for visualizing floorplans."""
35
+
36
+ @staticmethod
37
+ def draw_poly(ax: Axes, poly: Geometry, **kwargs) -> None:
38
+ """Draw a polygon or multi-polygon on matplotlib axes.
39
+
40
+ Args:
41
+ ax: Matplotlib axes object.
42
+ poly: Shapely Polygon or MultiPolygon to draw.
43
+ **kwargs: Additional arguments passed to ax.fill().
44
+
45
+ """
46
+ if poly.is_empty:
47
+ return
48
+
49
+ geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
50
+
51
+ color = kwargs.pop("color", None)
52
+ if color is None:
53
+ cmap = plt.get_cmap("tab10")
54
+ colors = [cmap(i) for i in range(len(geoms))]
55
+ else:
56
+ colors = [color] * len(geoms)
57
+
58
+ for i, p in enumerate(geoms):
59
+ if p.is_empty:
60
+ continue
61
+ x, y = p.exterior.xy
62
+ ax.fill(x, y, facecolor=colors[i], **kwargs)
63
+
64
+ @classmethod
65
+ def plot(
66
+ cls,
67
+ rooms: dict[str, Geometry],
68
+ footprints: dict[str, Geometry],
69
+ occ_area: Geometry,
70
+ save_path: str,
71
+ ) -> None:
72
+ """Generate and save a floorplan visualization.
73
+
74
+ Args:
75
+ rooms: Dictionary mapping room names to floor polygons.
76
+ footprints: Dictionary mapping object names to footprint polygons.
77
+ occ_area: Union of all occupied areas.
78
+ save_path: Path to save the output image.
79
+
80
+ """
81
+ fig, ax = plt.subplots(figsize=(10, 10))
82
+ ax.set_aspect("equal")
83
+ cmap_rooms = plt.get_cmap("Pastel1")
84
+
85
+ cls._draw_room_floors(ax, rooms, cmap_rooms)
86
+ cls._draw_occupied_area(ax, occ_area)
87
+ cls._draw_footprint_outlines(ax, footprints)
88
+ cls._draw_footprint_labels(ax, footprints)
89
+ cls._draw_room_labels(ax, rooms)
90
+ cls._configure_axes(ax, rooms, occ_area)
91
+
92
+ ax.set_title("")
93
+ ax.set_xlabel("")
94
+ ax.set_ylabel("")
95
+ ax.set_xticks([])
96
+ ax.set_yticks([])
97
+ for spine in ax.spines.values():
98
+ spine.set_visible(False)
99
+ fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
100
+ fig.patch.set_alpha(0)
101
+ ax.patch.set_alpha(0)
102
+ plt.savefig(
103
+ save_path,
104
+ dpi=300,
105
+ bbox_inches="tight",
106
+ pad_inches=0,
107
+ transparent=True,
108
+ )
109
+ plt.close(fig)
110
+
111
+ @classmethod
112
+ def _draw_room_floors(
113
+ cls,
114
+ ax: Axes,
115
+ rooms: dict[str, Geometry],
116
+ cmap: plt.cm.ScalarMappable,
117
+ ) -> None:
118
+ """Draw colored room floor polygons (Layer 1)."""
119
+ for i, (name, poly) in enumerate(rooms.items()):
120
+ color = cmap(i % cmap.N)
121
+ cls.draw_poly(
122
+ ax,
123
+ poly,
124
+ color=color,
125
+ alpha=1.0,
126
+ edgecolor="black",
127
+ linestyle="--",
128
+ zorder=1,
129
+ )
130
+
131
+ @classmethod
132
+ def _draw_occupied_area(cls, ax: Axes, occ_area: Geometry) -> None:
133
+ """Draw the occupied area overlay (Layer 2)."""
134
+ cls.draw_poly(
135
+ ax,
136
+ occ_area,
137
+ color="tab:blue",
138
+ alpha=0.5,
139
+ lw=0,
140
+ zorder=2,
141
+ )
142
+
143
+ @staticmethod
144
+ def _draw_footprint_outlines(
145
+ ax: Axes,
146
+ footprints: dict[str, Geometry],
147
+ ) -> None:
148
+ """Draw footprint outlines (Layer 3)."""
149
+ for poly in footprints.values():
150
+ if poly.is_empty:
151
+ continue
152
+ geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
153
+ for p in geoms:
154
+ ax.plot(*p.exterior.xy, "--", lw=0.8, color="gray", zorder=3)
155
+
156
+ @staticmethod
157
+ def _draw_footprint_labels(
158
+ ax: Axes,
159
+ footprints: dict[str, Geometry],
160
+ ) -> None:
161
+ """Draw footprint text labels (Layer 4)."""
162
+ import re
163
+
164
+ for name, poly in footprints.items():
165
+ if poly.is_empty:
166
+ continue
167
+ label = re.sub(r"_\d+$", "", name)
168
+ ax.text(
169
+ poly.centroid.x,
170
+ poly.centroid.y,
171
+ label,
172
+ fontsize=8,
173
+ ha="center",
174
+ va="center",
175
+ bbox={
176
+ "facecolor": "white",
177
+ "alpha": 0.5,
178
+ "edgecolor": "none",
179
+ "pad": 0.1,
180
+ },
181
+ zorder=4,
182
+ )
183
+
184
+ @staticmethod
185
+ def _draw_room_labels(ax: Axes, rooms: dict[str, Geometry]) -> None:
186
+ """Draw room text labels (Layer 5)."""
187
+ for name, poly in rooms.items():
188
+ if poly.is_empty:
189
+ continue
190
+ label = name.replace("_floor", "")
191
+ ax.text(
192
+ poly.centroid.x,
193
+ poly.centroid.y,
194
+ label,
195
+ fontsize=9,
196
+ color="black",
197
+ weight="bold",
198
+ ha="center",
199
+ va="center",
200
+ bbox={
201
+ "facecolor": "lightgray",
202
+ "alpha": 0.7,
203
+ "edgecolor": "black",
204
+ "boxstyle": "round,pad=0.3",
205
+ },
206
+ zorder=5,
207
+ )
208
+
209
+ @staticmethod
210
+ def _configure_axes(
211
+ ax: Axes,
212
+ rooms: dict[str, Geometry],
213
+ occ_area: Geometry,
214
+ ) -> None:
215
+ """Configure axes limits and labels."""
216
+ total_geom = unary_union(list(rooms.values()) + [occ_area])
217
+
218
+ if total_geom.is_empty:
219
+ minx, miny, maxx, maxy = -1, -1, 1, 1
220
+ else:
221
+ minx, miny, maxx, maxy = total_geom.bounds
222
+
223
+ cx = (minx + maxx) * 0.5
224
+ cy = (miny + maxy) * 0.5
225
+ half = max(maxx - minx, maxy - miny) * 0.5 * 1.05
226
+
227
+ ax.set_xlim(cx - half, cx + half)
228
+ ax.set_ylim(cy - half, cy + half)
229
+ ax.set_title("Floorplan Analysis", fontsize=14)
230
+ ax.set_xlabel("X (m)")
231
+ ax.set_ylabel("Y (m)")
embodied_gen/utils/gpt_clients.py CHANGED
@@ -30,6 +30,7 @@ from tenacity import (
30
  retry,
31
  retry_if_not_exception_type,
32
  stop_after_attempt,
 
33
  wait_random_exponential,
34
  )
35
 
@@ -44,6 +45,7 @@ __all__ = [
44
 
45
  _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
46
  CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
 
47
 
48
 
49
  def combine_images_to_grid(
@@ -90,6 +92,7 @@ class GPTclient:
90
  api_version (str, optional): API version (for Azure).
91
  check_connection (bool, optional): Whether to check API connection.
92
  verbose (bool, optional): Enable verbose logging.
 
93
 
94
  Example:
95
  ```sh
@@ -117,21 +120,27 @@ class GPTclient:
117
  api_version: str = None,
118
  check_connection: bool = True,
119
  verbose: bool = False,
 
120
  ):
121
  if api_version is not None:
122
  self.client = AzureOpenAI(
123
  azure_endpoint=endpoint,
124
  api_key=api_key,
125
  api_version=api_version,
 
 
126
  )
127
  else:
128
  self.client = OpenAI(
129
  base_url=endpoint,
130
  api_key=api_key,
 
 
131
  )
132
 
133
  self.endpoint = endpoint
134
  self.model_name = model_name
 
135
  self.image_formats = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
136
  self.verbose = verbose
137
  if check_connection:
@@ -142,7 +151,7 @@ class GPTclient:
142
  @retry(
143
  retry=retry_if_not_exception_type(openai.BadRequestError),
144
  wait=wait_random_exponential(min=1, max=10),
145
- stop=stop_after_attempt(5),
146
  )
147
  def completion_with_backoff(self, **kwargs):
148
  """Performs a chat completion request with retry/backoff."""
@@ -253,9 +262,9 @@ class GPTclient:
253
  temperature=0,
254
  max_tokens=100,
255
  )
256
- content = response.choices[0].message.content
257
- logger.info(f"Connection check success.")
258
- except Exception as e:
259
  raise ConnectionError(
260
  f"Failed to connect to GPT API at {self.endpoint}, "
261
  f"please check setting in `{CONFIG_FILE}` and `README`."
@@ -273,6 +282,7 @@ endpoint = os.environ.get("ENDPOINT", agent_config.get("endpoint"))
273
  api_key = os.environ.get("API_KEY", agent_config.get("api_key"))
274
  api_version = os.environ.get("API_VERSION", agent_config.get("api_version"))
275
  model_name = os.environ.get("MODEL_NAME", agent_config.get("model_name"))
 
276
 
277
  GPT_CLIENT = GPTclient(
278
  endpoint=endpoint,
@@ -280,6 +290,7 @@ GPT_CLIENT = GPTclient(
280
  api_version=api_version,
281
  model_name=model_name,
282
  check_connection=False,
 
283
  )
284
 
285
 
 
30
  retry,
31
  retry_if_not_exception_type,
32
  stop_after_attempt,
33
+ stop_after_delay,
34
  wait_random_exponential,
35
  )
36
 
 
45
 
46
  _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
47
  CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
48
+ DEFAULT_GPT_TIMEOUT = float(os.environ.get("GPT_TIMEOUT", 120))
49
 
50
 
51
  def combine_images_to_grid(
 
92
  api_version (str, optional): API version (for Azure).
93
  check_connection (bool, optional): Whether to check API connection.
94
  verbose (bool, optional): Enable verbose logging.
95
+ timeout (float, optional): Max seconds for a single GPT request.
96
 
97
  Example:
98
  ```sh
 
120
  api_version: str = None,
121
  check_connection: bool = True,
122
  verbose: bool = False,
123
+ timeout: float = DEFAULT_GPT_TIMEOUT,
124
  ):
125
  if api_version is not None:
126
  self.client = AzureOpenAI(
127
  azure_endpoint=endpoint,
128
  api_key=api_key,
129
  api_version=api_version,
130
+ timeout=timeout,
131
+ max_retries=0,
132
  )
133
  else:
134
  self.client = OpenAI(
135
  base_url=endpoint,
136
  api_key=api_key,
137
+ timeout=timeout,
138
+ max_retries=0,
139
  )
140
 
141
  self.endpoint = endpoint
142
  self.model_name = model_name
143
+ self.timeout = timeout
144
  self.image_formats = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
145
  self.verbose = verbose
146
  if check_connection:
 
151
  @retry(
152
  retry=retry_if_not_exception_type(openai.BadRequestError),
153
  wait=wait_random_exponential(min=1, max=10),
154
+ stop=stop_after_attempt(5) | stop_after_delay(DEFAULT_GPT_TIMEOUT),
155
  )
156
  def completion_with_backoff(self, **kwargs):
157
  """Performs a chat completion request with retry/backoff."""
 
262
  temperature=0,
263
  max_tokens=100,
264
  )
265
+ response.choices[0].message.content
266
+ logger.info("Connection check success.")
267
+ except Exception:
268
  raise ConnectionError(
269
  f"Failed to connect to GPT API at {self.endpoint}, "
270
  f"please check setting in `{CONFIG_FILE}` and `README`."
 
282
  api_key = os.environ.get("API_KEY", agent_config.get("api_key"))
283
  api_version = os.environ.get("API_VERSION", agent_config.get("api_version"))
284
  model_name = os.environ.get("MODEL_NAME", agent_config.get("model_name"))
285
+ timeout = DEFAULT_GPT_TIMEOUT
286
 
287
  GPT_CLIENT = GPTclient(
288
  endpoint=endpoint,
 
290
  api_version=api_version,
291
  model_name=model_name,
292
  check_connection=False,
293
+ timeout=timeout,
294
  )
295
 
296
 
embodied_gen/utils/monkey_patch/gradio.py CHANGED
@@ -45,155 +45,14 @@ def _patch_gradio_schema_bool_bug() -> None:
45
 
46
 
47
  def _patch_open3d_cuda_device_count_bug() -> None:
48
- """Force open3d to use CPU pybind only and skip ALL CUDA dlopen.
49
-
50
- Even an empty CDLL(open3d/cuda/pybind*.so) loads libcudart into the
51
- parent process, which corrupts the forked ZeroGPU worker's CUDA
52
- context and silently hangs every @spaces.GPU call in spaces>=0.50.
53
- Bypass the entire CUDA branch so __DEVICE_API__ stays "cpu".
54
- """
55
- init_path = f'{site.getsitepackages()[0]}/open3d/__init__.py'
56
- with fileinput.FileInput(init_path, inplace=True) as file:
57
  for line in file:
58
  print(
59
  line.replace(
60
- 'if _build_config["BUILD_CUDA_MODULE"]:',
61
- 'if False: # patched by EmbodiedGen: force CPU pybind only',
62
  ),
63
  end='',
64
  )
65
-
66
-
67
- def _patch_spaces_zerogpu_logs() -> None:
68
- """Inject stderr prints into spaces.zero internals to locate hangs.
69
-
70
- Hooks (visible in HF Space logs):
71
- - client.schedule ENTER (parent, before HTTP)
72
- - gradio_handler ENTER (parent, decorator entry)
73
- - worker_init ENTER (forked worker, before unpatch)
74
- - worker_init BEFORE torch.init (forked worker)
75
- Must be called BEFORE `import spaces`.
76
- """
77
- import site
78
-
79
- sp = site.getsitepackages()[0]
80
- client_py = f"{sp}/spaces/zero/client.py"
81
- wrappers_py = f"{sp}/spaces/zero/wrappers.py"
82
-
83
- def line(indent: int, tag: str) -> str:
84
- pad = " " * indent
85
- return (
86
- f"{pad}import os as _o, sys as _s, time as _t; "
87
- f"_s.stderr.write(f'[ZGPU-UP ' + _t.strftime('%H:%M:%S') + "
88
- f"f' pid={{_o.getpid()}}] {tag}\\n'); _s.stderr.flush()\n"
89
- )
90
-
91
- def inject(path: str, marker: str, snippet: str) -> None:
92
- text = open(path).read()
93
- if snippet.strip() in text:
94
- return
95
- if marker not in text:
96
- print(f"[zerogpu_logs] WARN marker not found in {path}: {marker[:60]!r}")
97
- return
98
- open(path, "w").write(text.replace(marker, snippet + marker, 1))
99
-
100
- inject(
101
- client_py,
102
- " if not (gradio_version := version.parse",
103
- line(4, "client.schedule ENTER"),
104
- )
105
- inject(
106
- wrappers_py,
107
- " # Immediately close file descriptors",
108
- line(4, "worker_init ENTER"),
109
- )
110
- inject(
111
- wrappers_py,
112
- " torch.init(nvidia_uuid)",
113
- line(12, "worker_init BEFORE torch.init"),
114
- )
115
- inject(
116
- wrappers_py,
117
- " if forked:\n",
118
- line(8, "gradio_handler ENTER"),
119
- )
120
-
121
-
122
- def _neutralize_warp_in_parent() -> None:
123
- """Prevent NVIDIA Warp from calling cuInit() in the ZeroGPU parent.
124
-
125
- Root cause of @spaces.GPU silent hangs (spaces>=0.50): kaolin imports
126
- warp at module top-level. When any kaolin module triggers warp.init(),
127
- Warp's `init_cuda_driver` dlopens libcuda.so + calls cuInit() in the
128
- parent process. After spaces forks the worker, torch.init(nvidia_uuid)
129
- in the worker hangs forever because the inherited CUDA driver state is
130
- poisoned (parent never had a real GPU; ZeroGPU exposes one only post-fork).
131
-
132
- Fix: stub warp.init / warp.context.runtime_init with a pid-aware no-op.
133
- The parent-resident pid skips init; the forked worker (different pid)
134
- runs the real init so warp keeps working inside @spaces.GPU code paths.
135
-
136
- Must be called BEFORE any import that pulls kaolin (e.g. embodied_gen.data,
137
- thirdparty.TRELLIS).
138
- """
139
- import os
140
- import sys
141
-
142
- try:
143
- import warp # noqa: F401 -- pure python import, no cuInit
144
- except ImportError:
145
- return
146
-
147
- parent_pid = os.getpid()
148
-
149
- def _make_pid_safe(orig):
150
- def _wrapped(*args, **kwargs):
151
- if os.getpid() == parent_pid:
152
- sys.stderr.write(
153
- f"[warp-neutralize] skip {orig.__name__} in parent pid={parent_pid}\n"
154
- )
155
- sys.stderr.flush()
156
- return None
157
- return orig(*args, **kwargs)
158
- _wrapped.__wrapped__ = orig
159
- _wrapped.__name__ = getattr(orig, "__name__", "wrapped")
160
- return _wrapped
161
-
162
- if hasattr(warp, "init") and not hasattr(warp.init, "__wrapped__"):
163
- warp.init = _make_pid_safe(warp.init)
164
-
165
- try:
166
- from warp import context as _wctx
167
- if hasattr(_wctx, "runtime_init") and not hasattr(
168
- _wctx.runtime_init, "__wrapped__"
169
- ):
170
- _wctx.runtime_init = _make_pid_safe(_wctx.runtime_init)
171
- except Exception:
172
- pass
173
-
174
-
175
- def _disable_xformers_flash3() -> None:
176
- """Force xformers dispatcher to skip Flash-Attention v3 (Hopper-only).
177
-
178
- sm_120 (Blackwell) has no FA3 kernel binary; the dispatcher still picks
179
- flash3 and the launch aborts with:
180
- `CUDA error ... hopper/flash_fwd_launch_template.h:188: invalid argument`
181
- Env vars `XFORMERS_FLASH3_ATTENTION_DISABLED=1` are silently ignored in
182
- xformers 0.0.32.post2, so we patch `not_supported_reasons` directly.
183
- Cutlass and FA2 both work on sm_120, so removing flash3 from candidates
184
- is enough.
185
- """
186
- try:
187
- from xformers.ops.fmha import flash3 as _f3
188
- except Exception:
189
- return
190
-
191
- _disabled = ["disabled by EmbodiedGen: no FA3 kernel for sm_120"]
192
-
193
- def _ns(cls, d): # noqa: ARG001
194
- return list(_disabled)
195
-
196
- if hasattr(_f3, "FwOp"):
197
- _f3.FwOp.not_supported_reasons = classmethod(_ns)
198
- if hasattr(_f3, "BwOp"):
199
- _f3.BwOp.not_supported_reasons = classmethod(_ns)
 
45
 
46
 
47
  def _patch_open3d_cuda_device_count_bug() -> None:
48
+ """Patch open3d to avoid cuda device count bug."""
49
+ with fileinput.FileInput(
50
+ f'{site.getsitepackages()[0]}/open3d/__init__.py', inplace=True
51
+ ) as file:
 
 
 
 
 
52
  for line in file:
53
  print(
54
  line.replace(
55
+ '_pybind_cuda.open3d_core_cuda_device_count()', '1'
 
56
  ),
57
  end='',
58
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
embodied_gen/utils/monkey_patch/infinigen.py CHANGED
@@ -119,13 +119,11 @@ def patch_doors_base_simple():
119
  if constants is None:
120
  constants = RoomConstants()
121
  self.width = constants.door_width - 0.02
122
- self.door_frame_style = np.random.choice(
123
- ["single_column", "full_frame_square", "full_frame_dome"]
124
- )
125
  self.door_frame_width = 0.02
126
- handle_types = ["knob", "lever", "pull", "none"]
127
- if self.door_frame_style != "full_frame_dome":
128
- handle_types.append("bar")
129
  if self.door_frame_style != "single_column":
130
  self.width += -0.02
131
  self.height += -0.04
 
119
  if constants is None:
120
  constants = RoomConstants()
121
  self.width = constants.door_width - 0.02
122
+ # Force a rectangular full frame so generated doors can close
123
+ # cleanly against the wall opening.
124
+ self.door_frame_style = "full_frame_square"
125
  self.door_frame_width = 0.02
126
+ handle_types = ["knob", "lever", "pull", "none", "bar"]
 
 
127
  if self.door_frame_style != "single_column":
128
  self.width += -0.02
129
  self.height += -0.04
embodied_gen/utils/simulation.py CHANGED
@@ -55,10 +55,19 @@ SIM_COORD_ALIGN = np.array(
55
  __all__ = [
56
  "SIM_COORD_ALIGN",
57
  "FrankaPandaGrasper",
 
 
 
 
 
 
 
58
  "load_assets_from_layout_file",
 
59
  "load_mani_skill_robot",
 
60
  "render_images",
61
- "is_urdf_articulated",
62
  ]
63
 
64
 
@@ -722,6 +731,8 @@ class FrankaPandaGrasper(object):
722
  result[action_key] = result[action_key][::sample_ratio]
723
 
724
  n_step = len(result[action_key])
 
 
725
  actions = []
726
  for i in range(n_step):
727
  qpos = result[action_key][i]
@@ -805,10 +816,14 @@ class FrankaPandaGrasper(object):
805
  gripper_state=1,
806
  env_idx=env_idx,
807
  )
 
 
 
 
 
808
  actions.append(grasp_actions)
809
  close_actions = self.control_gripper(
810
  gripper_state=-1,
811
- env_idx=env_idx,
812
  )
813
  actions.append(close_actions)
814
  back_actions = self.move_to_pose(
@@ -817,6 +832,181 @@ class FrankaPandaGrasper(object):
817
  gripper_state=-1,
818
  env_idx=env_idx,
819
  )
 
 
 
 
 
820
  actions.append(back_actions)
821
 
822
  return np.concatenate(actions, axis=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  __all__ = [
56
  "SIM_COORD_ALIGN",
57
  "FrankaPandaGrasper",
58
+ "capture_frame",
59
+ "create_panda_agent",
60
+ "create_recording_camera",
61
+ "estimate_grasp_width",
62
+ "get_actor_bottom_z",
63
+ "get_actor_mesh",
64
+ "is_urdf_articulated",
65
  "load_assets_from_layout_file",
66
+ "load_collision_mesh_from_urdf",
67
  "load_mani_skill_robot",
68
+ "quat_from_yaw",
69
  "render_images",
70
+ "set_ground_base_color",
71
  ]
72
 
73
 
 
731
  result[action_key] = result[action_key][::sample_ratio]
732
 
733
  n_step = len(result[action_key])
734
+ if n_step == 0:
735
+ return None
736
  actions = []
737
  for i in range(n_step):
738
  qpos = result[action_key][i]
 
816
  gripper_state=1,
817
  env_idx=env_idx,
818
  )
819
+ if grasp_actions is None:
820
+ logger.warning(
821
+ f"Failed to move from reach pose to grasp pose for `{actor.name}`."
822
+ )
823
+ return None
824
  actions.append(grasp_actions)
825
  close_actions = self.control_gripper(
826
  gripper_state=-1,
 
827
  )
828
  actions.append(close_actions)
829
  back_actions = self.move_to_pose(
 
832
  gripper_state=-1,
833
  env_idx=env_idx,
834
  )
835
+ if back_actions is None:
836
+ logger.warning(
837
+ f"Failed to retreat after grasping `{actor.name}`."
838
+ )
839
+ return None
840
  actions.append(back_actions)
841
 
842
  return np.concatenate(actions, axis=0)
843
+
844
+
845
+ def load_collision_mesh_from_urdf(urdf_path: str) -> trimesh.Trimesh:
846
+ """Load the collision mesh referenced by a URDF in its link frame.
847
+
848
+ Applies the optional collision/origin transform so the returned mesh sits
849
+ in the same frame the simulator will use; required for correct spawn-z
850
+ estimation downstream.
851
+ """
852
+ root = ET.parse(urdf_path).getroot()
853
+ collision_mesh = root.find(".//collision/geometry/mesh")
854
+ if collision_mesh is None:
855
+ raise ValueError(f"Collision mesh not found in URDF: {urdf_path}")
856
+
857
+ collision_file = collision_mesh.get("filename")
858
+ if not collision_file:
859
+ raise ValueError(f"Collision mesh filename missing in {urdf_path}")
860
+
861
+ scale_attr = collision_mesh.get("scale", "1.0 1.0 1.0")
862
+ mesh_scale = np.array([float(x) for x in scale_attr.split()])
863
+ mesh_path = os.path.join(os.path.dirname(urdf_path), collision_file)
864
+ mesh = trimesh.load(mesh_path)
865
+ if isinstance(mesh, trimesh.Scene):
866
+ mesh = mesh.dump(concatenate=True)
867
+ mesh.apply_scale(mesh_scale)
868
+
869
+ collision_origin = root.find(".//collision/origin")
870
+ if collision_origin is not None:
871
+ xyz = [float(v) for v in collision_origin.get("xyz", "0 0 0").split()]
872
+ rpy = [float(v) for v in collision_origin.get("rpy", "0 0 0").split()]
873
+ transform = np.eye(4, dtype=np.float64)
874
+ transform[:3, :3] = R.from_euler("xyz", rpy, degrees=False).as_matrix()
875
+ transform[:3, 3] = np.array(xyz, dtype=np.float64)
876
+ mesh.apply_transform(transform)
877
+
878
+ return mesh
879
+
880
+
881
+ def estimate_grasp_width(mesh: trimesh.Trimesh) -> float:
882
+ """Estimate a conservative top-down grasp width from OBB extents."""
883
+ extents = np.sort(mesh.bounding_box_oriented.extents)
884
+ return float(extents[1])
885
+
886
+
887
+ def get_actor_mesh(actor: sapien.Entity) -> trimesh.Trimesh:
888
+ """Get the actor collision mesh in world coordinates."""
889
+ physx_rigid = actor.components[1]
890
+ mesh = get_component_mesh(physx_rigid, to_world_frame=True)
891
+ if mesh is None or mesh.is_empty:
892
+ raise ValueError(f"Actor `{actor.name}` has no valid collision mesh.")
893
+
894
+ return mesh
895
+
896
+
897
+ def get_actor_bottom_z(actor: sapien.Entity) -> float:
898
+ """Get the actor world-space bottom z from its collision mesh."""
899
+ return float(get_actor_mesh(actor).bounds[0, 2])
900
+
901
+
902
+ def quat_from_yaw(yaw_deg: float) -> list[float]:
903
+ """Convert z-axis yaw angle (degrees) to a SAPIEN quaternion (w,x,y,z)."""
904
+ yaw = np.deg2rad(yaw_deg)
905
+ return [float(np.cos(yaw / 2)), 0.0, 0.0, float(np.sin(yaw / 2))]
906
+
907
+
908
+ def set_ground_base_color(scene: sapien.Scene, rgba: list[float]) -> None:
909
+ """Update the default ground plane material color for this scene."""
910
+ for actor in scene.get_all_actors():
911
+ if actor.name != "ground":
912
+ continue
913
+ for component in actor.components:
914
+ render_shapes = getattr(component, "render_shapes", None)
915
+ if render_shapes is None:
916
+ continue
917
+ for render_shape in render_shapes:
918
+ render_shape.material.set_base_color(rgba)
919
+ return
920
+
921
+ raise ValueError("Ground actor not found in the scene.")
922
+
923
+
924
+ def capture_frame(
925
+ scene: sapien.Scene,
926
+ camera: sapien.render.RenderCameraComponent,
927
+ ) -> np.ndarray:
928
+ """Capture a single RGB frame from the camera (updates render first)."""
929
+ scene.update_render()
930
+ camera.take_picture()
931
+ return np.array(render_images(camera, ["Color"])["Color"])
932
+
933
+
934
+ def create_recording_camera(
935
+ scene_manager: "SapienSceneManager",
936
+ eye_pos: list[float],
937
+ target_pt: list[float],
938
+ image_hw: tuple[int, int],
939
+ fovy_deg: float = 45.0,
940
+ cam_name: str = "recording_camera",
941
+ ) -> sapien.render.RenderCameraComponent:
942
+ """Create a camera looking from eye_pos at target_pt for video capture."""
943
+ eye_pos = np.array(eye_pos, dtype=np.float32)
944
+ target_pt = np.array(target_pt, dtype=np.float32)
945
+ world_up_vec = np.array([0.0, 0.0, 1.0], dtype=np.float32)
946
+ forward_vec = target_pt - eye_pos
947
+ forward_vec = forward_vec / np.linalg.norm(forward_vec)
948
+ temp_right_vec = np.cross(forward_vec, world_up_vec)
949
+ if np.linalg.norm(temp_right_vec) < 1e-6:
950
+ temp_right_vec = np.array([1.0, 0.0, 0.0], dtype=np.float32)
951
+ right_vec = temp_right_vec / np.linalg.norm(temp_right_vec)
952
+ up_vec = np.cross(right_vec, forward_vec)
953
+ rotation_matrix = np.array([forward_vec, -right_vec, up_vec]).T
954
+ scipy_quat = R.from_matrix(rotation_matrix).as_quat()
955
+ quat = [
956
+ float(scipy_quat[3]),
957
+ float(scipy_quat[0]),
958
+ float(scipy_quat[1]),
959
+ float(scipy_quat[2]),
960
+ ]
961
+
962
+ return scene_manager.create_camera(
963
+ cam_name,
964
+ pose=sapien.Pose(p=eye_pos.tolist(), q=quat),
965
+ image_hw=image_hw,
966
+ fovy_deg=fovy_deg,
967
+ )
968
+
969
+
970
+ def create_panda_agent(
971
+ scene: sapien.Scene,
972
+ control_freq: int,
973
+ sim_backend: str,
974
+ render_backend: str,
975
+ initial_qpos: np.ndarray | None = None,
976
+ control_mode: str = "pd_joint_pos",
977
+ ) -> BaseAgent:
978
+ """Create a ManiSkill Panda agent attached to a SAPIEN scene."""
979
+ from mani_skill.agents import REGISTERED_AGENTS
980
+ from mani_skill.envs.utils.system.backend import (
981
+ parse_sim_and_render_backend,
982
+ )
983
+
984
+ backend = parse_sim_and_render_backend(sim_backend, render_backend)
985
+ ms_scene = ManiSkillScene([scene], device=sim_backend, backend=backend)
986
+ robot_cls = REGISTERED_AGENTS["panda"].agent_cls
987
+ agent = robot_cls(
988
+ scene=ms_scene,
989
+ control_freq=control_freq,
990
+ control_mode=control_mode,
991
+ initial_pose=sapien.Pose([0, 0, 0], [1, 0, 0, 0]),
992
+ )
993
+ if initial_qpos is None:
994
+ initial_qpos = np.array(
995
+ [
996
+ 0.0,
997
+ np.pi / 8,
998
+ 0.0,
999
+ -np.pi * 3 / 8,
1000
+ 0.0,
1001
+ np.pi * 3 / 4,
1002
+ np.pi / 4,
1003
+ 0.04,
1004
+ 0.04,
1005
+ ],
1006
+ dtype=np.float32,
1007
+ )
1008
+ agent.reset(initial_qpos[None, ...].copy())
1009
+ agent.init_qpos = agent.robot.qpos
1010
+ agent.controller.controllers["gripper"].reset()
1011
+
1012
+ return agent
embodied_gen/utils/trender.py CHANGED
@@ -49,16 +49,8 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
49
  renderer.rendering_options.far = options.get("far", 100)
50
  renderer.rendering_options.ssaa = options.get("ssaa", 4)
51
  rets = {}
52
- import time as _time, sys as _sys
53
- _renderer_t0 = _time.time()
54
- _i = -1
55
- for _i, (extr, intr) in enumerate(tqdm(zip(extrinsics, intrinsics), desc="Rendering")):
56
- _t0 = _time.time()
57
  res = renderer.render(sample, extr, intr)
58
- if torch.cuda.is_available():
59
- torch.cuda.synchronize()
60
- if _i < 3:
61
- _sys.stderr.write(f"[STAGE] render_mesh frame {_i} took {_time.time()-_t0:.2f}s\n"); _sys.stderr.flush()
62
  if "normal" not in rets:
63
  rets["normal"] = []
64
  normal = torch.lerp(
@@ -68,7 +60,6 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
68
  normal.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
69
  ).astype(np.uint8)
70
  rets["normal"].append(normal)
71
- _sys.stderr.write(f"[STAGE] render_mesh total {_i+1} frames took {_time.time()-_renderer_t0:.2f}s\n"); _sys.stderr.flush()
72
 
73
  return rets
74
 
@@ -112,22 +103,13 @@ def render_gs_frames(
112
  if verbose:
113
  iterator = tqdm(iterator, total=len(extrinsics), desc="Rendering")
114
 
115
- import time as _time, sys as _sys
116
- _renderer_t0 = _time.time()
117
- _i = -1
118
- for _i, (extr, intr) in enumerate(iterator):
119
- _t0 = _time.time()
120
  res = renderer.render(
121
  sample, extr, intr, colors_overwrite=colors_overwrite
122
  )
123
- if torch.cuda.is_available():
124
- torch.cuda.synchronize()
125
- if _i < 3:
126
- _sys.stderr.write(f"[STAGE] render_gs frame {_i} took {_time.time()-_t0:.2f}s\n"); _sys.stderr.flush()
127
  outputs["color"].append(to_img(res["color"]))
128
  depth = res.get("percent_depth") or res.get("depth")
129
  outputs["depth"].append(to_numpy(depth) if depth is not None else None)
130
- _sys.stderr.write(f"[STAGE] render_gs total {_i+1} frames took {_time.time()-_renderer_t0:.2f}s\n"); _sys.stderr.flush()
131
 
132
  return dict(outputs)
133
 
 
49
  renderer.rendering_options.far = options.get("far", 100)
50
  renderer.rendering_options.ssaa = options.get("ssaa", 4)
51
  rets = {}
52
+ for extr, intr in tqdm(zip(extrinsics, intrinsics), desc="Rendering"):
 
 
 
 
53
  res = renderer.render(sample, extr, intr)
 
 
 
 
54
  if "normal" not in rets:
55
  rets["normal"] = []
56
  normal = torch.lerp(
 
60
  normal.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
61
  ).astype(np.uint8)
62
  rets["normal"].append(normal)
 
63
 
64
  return rets
65
 
 
103
  if verbose:
104
  iterator = tqdm(iterator, total=len(extrinsics), desc="Rendering")
105
 
106
+ for extr, intr in iterator:
 
 
 
 
107
  res = renderer.render(
108
  sample, extr, intr, colors_overwrite=colors_overwrite
109
  )
 
 
 
 
110
  outputs["color"].append(to_img(res["color"]))
111
  depth = res.get("percent_depth") or res.get("depth")
112
  outputs["depth"].append(to_numpy(depth) if depth is not None else None)
 
113
 
114
  return dict(outputs)
115
 
requirements.txt CHANGED
@@ -61,7 +61,7 @@ seaborn
61
  hydra-core
62
  modelscope
63
  timm
64
- open3d-cpu
65
  MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
66
 
67
  https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu128/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl
 
61
  hydra-core
62
  modelscope
63
  timm
64
+ open3d
65
  MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
66
 
67
  https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu128/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl