Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjjj commited on 10 days ago

Commit

023981c

1 Parent(s): cebb4ba

update

Browse files

Files changed (47) hide show

app.py +5 -11
common.py +10 -58
embodied_gen/scripts/eval_collision_success.py +813 -0
embodied_gen/scripts/room_gen/export_scene.py +198 -30
embodied_gen/scripts/room_gen/gen_room.py +2 -2
embodied_gen/scripts/room_gen/render_birdseye.py +271 -0
embodied_gen/scripts/room_gen/render_usd.py +1606 -0
embodied_gen/skills/README.md +62 -0
embodied_gen/skills/asset-converter/SKILL.md +88 -0
embodied_gen/skills/asset-creator/SKILL.md +96 -0
embodied_gen/skills/asset-retrieval/SKILL.md +87 -0
embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py +329 -0
embodied_gen/skills/asset-scale/SKILL.md +94 -0
embodied_gen/skills/asset-scale/__init__.py +36 -0
embodied_gen/skills/asset-scale/asset_scale.py +347 -0
embodied_gen/skills/background-creator/SKILL.md +70 -0
embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json +17 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/.claude-plugin/plugin.json +8 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/assets.md +32 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/background.md +29 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/convert.md +32 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/layout.md +29 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/process.md +29 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/room.md +29 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/sim.md +29 -0
embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/spatial.md +29 -0
embodied_gen/skills/layout-creator/SKILL.md +83 -0
embodied_gen/skills/room-creator/SKILL.md +83 -0
embodied_gen/skills/sim-runner/SKILL.md +72 -0
embodied_gen/skills/spatial-computing/README.md +59 -0
embodied_gen/skills/spatial-computing/REFERENCE.md +236 -0
embodied_gen/skills/spatial-computing/SKILL.md +374 -0
embodied_gen/skills/spatial-computing/__init__.py +31 -0
embodied_gen/skills/spatial-computing/api/__init__.py +34 -0
embodied_gen/skills/spatial-computing/api/floorplan_api.py +917 -0
embodied_gen/skills/spatial-computing/cli/__init__.py +7 -0
embodied_gen/skills/spatial-computing/cli/main.py +267 -0
embodied_gen/skills/spatial-computing/core/__init__.py +23 -0
embodied_gen/skills/spatial-computing/core/collector.py +1102 -0
embodied_gen/skills/spatial-computing/core/geometry.py +231 -0
embodied_gen/skills/spatial-computing/core/visualizer.py +231 -0
embodied_gen/utils/gpt_clients.py +15 -4
embodied_gen/utils/monkey_patch/gradio.py +5 -146
embodied_gen/utils/monkey_patch/infinigen.py +4 -6
embodied_gen/utils/simulation.py +192 -2
embodied_gen/utils/trender.py +2 -20
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -351,19 +351,13 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
         outputs=[is_samimage, single_sam_image_example, single_image_example],
     )
-    def _preprocess_image_dispatch(img, rmbg):
-        import sys, os
-        print(f"[ZGPU] upload handler ENTER pid={os.getpid()}", flush=True)
-        sys.stdout.flush()
-        out = preprocess_image_fn(img, rmbg, _enable_pre_resize_default)
-        print("[ZGPU] upload handler RETURN", flush=True)
-        sys.stdout.flush()
-        return out
     image_prompt.upload(
-        _preprocess_image_dispatch,
         inputs=[image_prompt, rmbg_tag],
         outputs=[image_prompt, raw_image_cache],
     ).success(
         active_btn_by_content,
         inputs=image_prompt,
@@ -542,4 +536,4 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
 if __name__ == "__main__":
-    demo.launch(ssr_mode=False)

         outputs=[is_samimage, single_sam_image_example, single_image_example],
     )
     image_prompt.upload(
+        lambda img, rmbg: preprocess_image_fn(
+            img, rmbg, _enable_pre_resize_default
+        ),
         inputs=[image_prompt, rmbg_tag],
         outputs=[image_prompt, raw_image_cache],
+        queue=False,
     ).success(
         active_btn_by_content,
         inputs=image_prompt,
 if __name__ == "__main__":
+    demo.launch()

common.py CHANGED Viewed

@@ -14,26 +14,15 @@
 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
-# spaces 0.50 raises if CUDA was initialized before `import spaces`,
-# so the zerogpu-log injection (which touches site-packages files only)
-# must run first, then `import spaces`, then everything else.
-from embodied_gen.utils.monkey_patch.gradio import _patch_spaces_zerogpu_logs
-_patch_spaces_zerogpu_logs()
-import spaces  # noqa: E402
 from embodied_gen.utils.monkey_patch.gradio import (
-    _disable_xformers_flash3,
-    _neutralize_warp_in_parent,
     _patch_open3d_cuda_device_count_bug,
 )
-from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
-_neutralize_warp_in_parent()
 _patch_open3d_cuda_device_count_bug()
-_disable_xformers_flash3()
-monkey_path_trellis()
 import gc
 import logging
@@ -172,24 +161,13 @@ def end_session(req: gr.Request) -> None:
         shutil.rmtree(user_dir)
-@spaces.GPU(duration=30)
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
     preprocess: bool = True,
 ) -> tuple[Image.Image, Image.Image]:
     """Preprocess image with lazy model initialization to avoid CUDA init at import time."""
-    import sys, os as _os
-    print(f"[ZGPU] preprocess_image_fn ENTER pid={_os.getpid()}", flush=True)
-    sys.stdout.flush()
-    try:
-        import torch as _torch
-        print(f"[ZGPU] torch.cuda.is_available={_torch.cuda.is_available()} device_count={_torch.cuda.device_count()}", flush=True)
-        _torch.zeros(1).cuda()
-        print("[ZGPU] torch.zeros(1).cuda() OK", flush=True)
-    except Exception as _e:
-        print(f"[ZGPU] cuda probe FAILED: {_e!r}", flush=True)
-    sys.stdout.flush()
     global _RBG_REMOVER, _RBG14_REMOVER
     if isinstance(image, str):
@@ -302,7 +280,7 @@ def select_point(
     return (image, masks), seg_image
-@spaces.GPU(duration=180)
 def image_to_3d(
     image: Image.Image,
     seed: int,
@@ -324,38 +302,17 @@ def image_to_3d(
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
-    import time as _time
-    def _t(tag, t0):
-        dt = _time.time() - t0
-        logger.info(f"[STAGE] {tag} took {dt:.2f}s")
-        return _time.time()
-    # Probe what xformers attention op is being used
-    try:
-        from xformers.ops.fmha import flash3 as _f3, flash as _f, cutlass as _c
-        logger.info(
-            f"[ATTN] flash3.disabled={bool(_f3.FwOp.not_supported_reasons.__func__(_f3.FwOp, None))} "
-            f"flash3 module loaded; flash & cutlass available too"
-        )
-    except Exception as _e:
-        logger.info(f"[ATTN] probe failed: {_e}")
     logger.info("Start generating 3D representation from image...")
     if isinstance(PIPELINE, Sam3dInference):
-        _t0 = _time.time()
         outputs = PIPELINE.run(
             seg_image,
             seed=seed,
             stage1_inference_steps=ss_sampling_steps,
             stage2_inference_steps=slat_sampling_steps,
         )
-        _t("Sam3dInference.run", _t0)
     else:
-        _t0 = _time.time()
         PIPELINE.cuda()
-        _t0 = _t("PIPELINE.cuda()", _t0)
         seg_image = trellis_preprocess(seg_image)
-        _t0 = _t("trellis_preprocess", _t0)
         outputs = PIPELINE.run(
             seg_image,
             seed=seed,
@@ -370,18 +327,13 @@ def image_to_3d(
                 "cfg_strength": slat_guidance_strength,
             },
         )
-        _t0 = _t("PIPELINE.run (TRELLIS inference)", _t0)
         # Set back to cpu for memory saving.
         PIPELINE.cpu()
-        _t("PIPELINE.cpu()", _t0)
-    _t0 = _time.time()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model, r=1.85)["color"]
-    _t0 = _t("render_video color", _t0)
     normal_images = render_video(mesh_model, r=1.85)["normal"]
-    _t("render_video normal", _t0)
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(output_root, exist_ok=True)
@@ -629,7 +581,7 @@ def extract_urdf(
     )
-@spaces.GPU(duration=180)
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
@@ -685,7 +637,7 @@ def text2image_fn(
     return save_paths + save_paths
-@spaces.GPU(duration=180)
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
@@ -701,7 +653,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     return None, None, None
-@spaces.GPU(duration=180)
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
@@ -788,7 +740,7 @@ def backproject_texture(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=180)
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
@@ -835,7 +787,7 @@ def backproject_texture_v2(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=180)
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
+import spaces
+from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
+monkey_path_trellis()
 from embodied_gen.utils.monkey_patch.gradio import (
     _patch_open3d_cuda_device_count_bug,
 )
 _patch_open3d_cuda_device_count_bug()
 import gc
 import logging
         shutil.rmtree(user_dir)
+@spaces.GPU
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
     preprocess: bool = True,
 ) -> tuple[Image.Image, Image.Image]:
     """Preprocess image with lazy model initialization to avoid CUDA init at import time."""
     global _RBG_REMOVER, _RBG14_REMOVER
     if isinstance(image, str):
     return (image, masks), seg_image
+@spaces.GPU
 def image_to_3d(
     image: Image.Image,
     seed: int,
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
     logger.info("Start generating 3D representation from image...")
     if isinstance(PIPELINE, Sam3dInference):
         outputs = PIPELINE.run(
             seg_image,
             seed=seed,
             stage1_inference_steps=ss_sampling_steps,
             stage2_inference_steps=slat_sampling_steps,
         )
     else:
         PIPELINE.cuda()
         seg_image = trellis_preprocess(seg_image)
         outputs = PIPELINE.run(
             seg_image,
             seed=seed,
                 "cfg_strength": slat_guidance_strength,
             },
         )
         # Set back to cpu for memory saving.
         PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model, r=1.85)["color"]
     normal_images = render_video(mesh_model, r=1.85)["normal"]
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(output_root, exist_ok=True)
     )
+@spaces.GPU
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
     return save_paths + save_paths
+@spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     return None, None, None
+@spaces.GPU
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

embodied_gen/scripts/eval_collision_success.py ADDED Viewed

	@@ -0,0 +1,813 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+import json
+import os
+from dataclasses import asdict, dataclass, field
+from typing import Optional
+import imageio
+import numpy as np
+import sapien.core as sapien
+import torch
+import trimesh
+import tyro
+from embodied_gen.utils.log import logger
+from embodied_gen.utils.simulation import (
+    FrankaPandaGrasper,
+    SapienSceneManager,
+    capture_frame,
+    create_panda_agent,
+    create_recording_camera,
+    estimate_grasp_width,
+    get_actor_bottom_z,
+    get_actor_mesh,
+    load_actor_from_urdf,
+    load_collision_mesh_from_urdf,
+    quat_from_yaw,
+    set_ground_base_color,
+)
+GROUND_BASE_COLOR = [0.78, 0.90, 0.72, 1.0]
+SETTLE_CHECK_INTERVAL = 10
+SETTLE_STABLE_WINDOWS = 3
+SETTLE_BOTTOM_Z_TOL = 5e-4
+MAX_EXTRA_SETTLE_STEPS = 120
+@dataclass
+class TrialResult:
+    """Stores per-trial evaluation results."""
+    yaw_deg: float
+    success: bool
+    scale_factor: float
+    object_width_before_scale: float
+    object_width_after_scale: float
+    settled_bottom_z: float
+    final_bottom_z: float
+    lift_delta: float
+    peak_bottom_z: float | None = None
+    peak_lift_delta: float | None = None
+    lift_success_threshold: float | None = None
+    tcp_object_offset_range: float | None = None
+    sync_tol: float | None = None
+    final_lift_pass: bool | None = None
+    sync_pass: bool | None = None
+    video_path: str | None = None
+    note: str = ""
+@dataclass
+class EvalCollisionConfig:
+    urdf_path: str
+    output_path: Optional[str] = None
+    num_trials: int = 4
+    max_gripper_width: float = 0.075
+    gripper_clearance_ratio: float = 0.9
+    sim_freq: int = 200
+    control_freq: int = 20
+    settle_steps: int = 240
+    post_grasp_steps: int = 80
+    lift_success_height: Optional[float] = None
+    lift_success_ratio: float = 0.01
+    min_lift_success_height: float = 0.001
+    max_lift_success_height: float = 0.003
+    sync_tol: float = 0.02
+    approach_offset: float = 0.06
+    grasp_clearance: float = 0.004
+    grasp_height_ratio: float = 0.45
+    max_descent_from_top: float = 0.03
+    hover_offset: float = 0.12
+    hover_open_steps: int = 10
+    descent_stage_count: int = 4
+    descent_n_max_step: int = 25
+    lift_distance: float = 0.10
+    close_steps: int = 20
+    object_x: float = 0.55
+    object_y: float = 0.0
+    z_offset: float = 0.005
+    sim_backend: str = "cpu"
+    render_backend: str = "gpu"
+    ray_tracing: bool = False
+    save_video: bool = True
+    video_path: Optional[str] = None
+    video_fps: int = 20
+    render_interval: int = 4
+    image_hw: tuple[int, int] = (512, 512)
+    def __post_init__(self) -> None:
+        output_dir = os.path.join(
+            os.path.dirname(self.urdf_path), "grasp_trial"
+        )
+        if self.output_path is None:
+            self.output_path = os.path.join(
+                output_dir, "collision_success_eval.json"
+            )
+        if self.video_path is None:
+            self.video_path = os.path.join(
+                output_dir, "collision_success_eval.mp4"
+            )
+def _compute_scale_factor(
+    urdf_path: str,
+    max_gripper_width: float,
+    clearance_ratio: float,
+) -> tuple[float, float]:
+    """Compute a scale that fits the asset within the gripper width."""
+    mesh = load_collision_mesh_from_urdf(urdf_path)
+    grasp_width = estimate_grasp_width(mesh)
+    target_width = max_gripper_width * clearance_ratio
+    if grasp_width <= 1e-6:
+        raise ValueError(f"Invalid grasp width estimated from {urdf_path}")
+    scale = min(1.0, target_width / grasp_width)
+    return float(scale), float(grasp_width)
+def _compute_spawn_center_z(
+    mesh: trimesh.Trimesh,
+    scale_factor: float,
+    z_offset: float,
+) -> float:
+    """Compute actor-center z so the scaled mesh bottom is z_offset above z=0."""
+    local_bottom_z = float(mesh.bounds[0, 2] * scale_factor)
+    return z_offset - local_bottom_z
+def _compute_adaptive_lift_threshold(
+    actor: sapien.Entity,
+    ratio: float,
+    min_height: float,
+    max_height: float,
+    absolute_override: float | None = None,
+) -> float:
+    """Compute a robust lift threshold from the settled object height."""
+    if absolute_override is not None:
+        return float(absolute_override)
+    mesh = get_actor_mesh(actor)
+    object_height = float(mesh.bounds[1, 2] - mesh.bounds[0, 2])
+    adaptive_height = object_height * ratio
+    return float(np.clip(adaptive_height, min_height, max_height))
+def _build_trial_video_path(
+    video_path: str,
+    trial_idx: int,
+    yaw_deg: float,
+) -> str:
+    """Build a unique per-trial video path from the base output path."""
+    root, ext = os.path.splitext(video_path)
+    if not ext:
+        ext = ".mp4"
+    return f"{root}_trial{trial_idx:02d}_yaw{int(round(yaw_deg)):03d}{ext}"
+@dataclass
+class _GraspTracker:
+    """Tracks gripper-object sync metrics during the grasp/lift phase.
+    The lift_delta of the object alone is fragile: a bounced-away object can
+    momentarily rise high before falling back. By logging the per-step offset
+    between the object bottom and the gripper TCP, we can also verify that the
+    object actually moves together with the gripper after closing.
+    """
+    actor: sapien.Entity
+    grasper: FrankaPandaGrasper
+    peak_bottom_z: float | None = None
+    tcp_object_offsets: list[float] = field(default_factory=list)
+    def update(self) -> None:
+        bottom_z = get_actor_bottom_z(self.actor)
+        tcp_z = float(self.grasper.agent.tcp.pose[0].sp.p[2])
+        self.peak_bottom_z = (
+            bottom_z
+            if self.peak_bottom_z is None
+            else max(self.peak_bottom_z, bottom_z)
+        )
+        self.tcp_object_offsets.append(bottom_z - tcp_z)
+    @property
+    def offset_range(self) -> float:
+        if not self.tcp_object_offsets:
+            return 0.0
+        return float(
+            max(self.tcp_object_offsets) - min(self.tcp_object_offsets)
+        )
+def _execute_actions(
+    scene_manager: SapienSceneManager,
+    agent: object,
+    actions: np.ndarray,
+    control_freq: int,
+    camera: sapien.render.RenderCameraComponent | None = None,
+    render_interval: int = 1,
+    video_frames: list[np.ndarray] | None = None,
+    tracker: _GraspTracker | None = None,
+) -> None:
+    """Run a sequence of robot actions."""
+    sim_steps = max(1, scene_manager.sim_freq // control_freq)
+    cameras = [] if camera is None else [camera]
+    render_keys = [] if camera is None else ["Color"]
+    for idx, action in enumerate(actions):
+        frames = scene_manager.step_action(
+            agent,
+            torch.tensor(action[None, ...], dtype=torch.float32),
+            cameras=cameras,
+            render_keys=render_keys,
+            sim_steps_per_control=sim_steps,
+        )
+        if (
+            camera is not None
+            and video_frames is not None
+            and idx % max(1, render_interval) == 0
+        ):
+            video_frames.append(np.array(frames[camera.name][0]["Color"]))
+        if tracker is not None:
+            tracker.update()
+def _hold_gripper_state(
+    scene_manager: SapienSceneManager,
+    grasper: FrankaPandaGrasper,
+    gripper_state: int,
+    control_freq: int,
+    n_step: int,
+    camera: sapien.render.RenderCameraComponent | None = None,
+    render_interval: int = 1,
+    video_frames: list[np.ndarray] | None = None,
+    tracker: _GraspTracker | None = None,
+) -> None:
+    """Hold gripper open/close while stepping the scene."""
+    hold_actions = grasper.control_gripper(
+        gripper_state=gripper_state,
+        n_step=n_step,
+    )
+    _execute_actions(
+        scene_manager,
+        grasper.agent,
+        hold_actions,
+        control_freq,
+        camera=camera,
+        render_interval=render_interval,
+        video_frames=video_frames,
+        tracker=tracker,
+    )
+def _wait_until_actor_settled(
+    scene_manager: SapienSceneManager,
+    grasper: FrankaPandaGrasper,
+    actor: sapien.Entity,
+    control_freq: int,
+    initial_bottom_z: float,
+    max_extra_steps: int = MAX_EXTRA_SETTLE_STEPS,
+    check_interval: int = SETTLE_CHECK_INTERVAL,
+    stable_windows: int = SETTLE_STABLE_WINDOWS,
+    bottom_z_tol: float = SETTLE_BOTTOM_Z_TOL,
+    camera: sapien.render.RenderCameraComponent | None = None,
+    render_interval: int = 1,
+    video_frames: list[np.ndarray] | None = None,
+) -> float:
+    """Wait until the dropped object is visually settled on the ground."""
+    remaining_steps = max(0, max_extra_steps)
+    previous_bottom_z = initial_bottom_z
+    stable_count = 0
+    while remaining_steps > 0 and stable_count < stable_windows:
+        n_step = min(check_interval, remaining_steps)
+        _hold_gripper_state(
+            scene_manager,
+            grasper,
+            gripper_state=1,
+            control_freq=control_freq,
+            n_step=n_step,
+            camera=camera,
+            render_interval=render_interval,
+            video_frames=video_frames,
+        )
+        current_bottom_z = get_actor_bottom_z(actor)
+        if abs(current_bottom_z - previous_bottom_z) <= bottom_z_tol:
+            stable_count += 1
+        else:
+            stable_count = 0
+        previous_bottom_z = current_bottom_z
+        remaining_steps -= n_step
+    return previous_bottom_z
+def _plan_scripted_grasp_stages(
+    grasper: FrankaPandaGrasper,
+    actor: sapien.Entity,
+    grasp_height_ratio: float,
+    grasp_clearance: float,
+    approach_offset: float,
+    lift_distance: float,
+    max_descent_from_top: float | None = None,
+) -> tuple[sapien.Pose, sapien.Pose, sapien.Pose]:
+    """Plan a simple top-down scripted grasp."""
+    mesh = get_actor_mesh(actor)
+    bounds = mesh.bounds
+    approaching = np.array([0.0, 0.0, -1.0])
+    center = bounds.mean(axis=0)
+    extents_xy = bounds[1, :2] - bounds[0, :2]
+    closing = (
+        np.array([1.0, 0.0, 0.0])
+        if extents_xy[0] <= extents_xy[1]
+        else np.array([0.0, 1.0, 0.0])
+    )
+    object_height = bounds[1, 2] - bounds[0, 2]
+    grasp_z = bounds[0, 2] + object_height * grasp_height_ratio
+    if max_descent_from_top is not None:
+        grasp_z = max(grasp_z, bounds[1, 2] - max_descent_from_top)
+    grasp_z = float(
+        np.clip(
+            grasp_z,
+            bounds[0, 2] + 0.015,
+            bounds[1, 2] - 0.005,
+        )
+    )
+    center = np.array([center[0], center[1], grasp_z + grasp_clearance])
+    grasp_pose = grasper.agent.build_grasp_pose(approaching, closing, center)
+    pre_grasp_pose = sapien.Pose(
+        p=grasp_pose.p + np.array([0.0, 0.0, approach_offset]),
+        q=grasp_pose.q,
+    )
+    lift_pose = sapien.Pose(
+        p=grasp_pose.p + np.array([0.0, 0.0, lift_distance]),
+        q=grasp_pose.q,
+    )
+    return pre_grasp_pose, grasp_pose, lift_pose
+def _build_grasp_stage_candidates(
+    grasper: FrankaPandaGrasper,
+    actor: sapien.Entity,
+    grasp_height_ratio: float,
+    grasp_clearance: float,
+    approach_offset: float,
+    lift_distance: float,
+    max_descent_from_top: float | None = None,
+) -> list[tuple[float, float, sapien.Pose, sapien.Pose, sapien.Pose]]:
+    """Build fallback grasp-stage candidates for tapered objects like bottles."""
+    ratio_candidates = [
+        grasp_height_ratio,
+        min(0.95, grasp_height_ratio + 0.08),
+        min(0.95, grasp_height_ratio + 0.16),
+    ]
+    clearance_candidates = [
+        grasp_clearance,
+        grasp_clearance + 0.004,
+        grasp_clearance + 0.008,
+    ]
+    candidates = []
+    seen_keys = set()
+    for ratio, clearance in zip(ratio_candidates, clearance_candidates):
+        key = (round(ratio, 4), round(clearance, 4))
+        if key in seen_keys:
+            continue
+        seen_keys.add(key)
+        pre_grasp_pose, grasp_pose, lift_pose = _plan_scripted_grasp_stages(
+            grasper,
+            actor,
+            grasp_height_ratio=ratio,
+            grasp_clearance=clearance,
+            approach_offset=approach_offset,
+            lift_distance=lift_distance,
+            max_descent_from_top=max_descent_from_top,
+        )
+        candidates.append(
+            (ratio, clearance, pre_grasp_pose, grasp_pose, lift_pose)
+        )
+    return candidates
+def _build_hover_pose(
+    actor: sapien.Entity,
+    grasp_pose: sapien.Pose,
+    hover_offset: float,
+) -> sapien.Pose:
+    """Build a hover pose at a fixed offset above the object top surface."""
+    mesh = get_actor_mesh(actor)
+    top_z = float(mesh.bounds[1, 2])
+    return sapien.Pose(
+        p=np.array([grasp_pose.p[0], grasp_pose.p[1], top_z + hover_offset]),
+        q=grasp_pose.q,
+    )
+def _build_descent_stage_poses(
+    grasp_pose: sapien.Pose,
+    hover_offset: float,
+    num_stages: int,
+) -> list[sapien.Pose]:
+    """Split the downward approach into multiple slow open-gripper stages."""
+    if num_stages <= 0:
+        return [grasp_pose]
+    stage_offsets = np.linspace(hover_offset, 0.0, num_stages + 1)[1:]
+    return [
+        sapien.Pose(
+            p=grasp_pose.p + np.array([0.0, 0.0, float(offset)]),
+            q=grasp_pose.q,
+        )
+        for offset in stage_offsets
+    ]
+def run_single_trial(
+    args: EvalCollisionConfig,
+    yaw_deg: float,
+    scale_factor: float,
+    grasp_width: float,
+    record_video: bool = False,
+    video_path: str | None = None,
+) -> TrialResult:
+    """Run one grasp trial with a fixed yaw."""
+    scene_manager = SapienSceneManager(
+        sim_freq=args.sim_freq,
+        ray_tracing=args.ray_tracing,
+        device=args.sim_backend,
+    )
+    scene = scene_manager.scene
+    set_ground_base_color(scene, GROUND_BASE_COLOR)
+    agent = create_panda_agent(
+        scene,
+        control_freq=args.control_freq,
+        sim_backend=args.sim_backend,
+        render_backend=args.render_backend,
+    )
+    video_frames: list[np.ndarray] | None = None
+    camera = None
+    if record_video:
+        video_frames = []
+        camera = create_recording_camera(
+            scene_manager,
+            eye_pos=[args.object_x - 0.32, args.object_y - 0.52, 0.56],
+            target_pt=[args.object_x - 0.01, args.object_y, 0.27],
+            image_hw=tuple(args.image_hw),
+            fovy_deg=60.0,
+        )
+    collision_mesh = load_collision_mesh_from_urdf(args.urdf_path)
+    spawn_center_z = _compute_spawn_center_z(
+        collision_mesh,
+        scale_factor=scale_factor,
+        z_offset=args.z_offset,
+    )
+    spawn_pose = sapien.Pose(
+        p=[
+            args.object_x,
+            args.object_y,
+            spawn_center_z,
+        ],
+        q=quat_from_yaw(yaw_deg),
+    )
+    actor = load_actor_from_urdf(
+        scene,
+        args.urdf_path,
+        pose=spawn_pose,
+        use_static=False,
+        update_mass=True,
+        scale=scale_factor,
+    )
+    if video_frames is not None and camera is not None:
+        video_frames.append(capture_frame(scene, camera))
+    grasper = FrankaPandaGrasper(agent, control_freq=args.control_freq)
+    _hold_gripper_state(
+        scene_manager,
+        grasper,
+        gripper_state=1,
+        control_freq=args.control_freq,
+        n_step=max(
+            1,
+            args.settle_steps
+            // max(1, scene_manager.sim_freq // args.control_freq),
+        ),
+        camera=camera,
+        render_interval=args.render_interval,
+        video_frames=video_frames,
+    )
+    settled_bottom_z = get_actor_bottom_z(actor)
+    settled_bottom_z = _wait_until_actor_settled(
+        scene_manager,
+        grasper,
+        actor,
+        control_freq=args.control_freq,
+        initial_bottom_z=settled_bottom_z,
+        camera=camera,
+        render_interval=args.render_interval,
+        video_frames=video_frames,
+    )
+    grasp_candidates = _build_grasp_stage_candidates(
+        grasper,
+        actor,
+        grasp_height_ratio=args.grasp_height_ratio,
+        grasp_clearance=args.grasp_clearance,
+        approach_offset=args.approach_offset,
+        lift_distance=args.lift_distance,
+        max_descent_from_top=args.max_descent_from_top,
+    )
+    selected_lift_pose = None
+    selected_candidate_note = ""
+    grasp_stage_failure_note = "failed to reach pre-grasp pose"
+    for candidate_idx, candidate in enumerate(grasp_candidates):
+        (
+            candidate_ratio,
+            candidate_clearance,
+            _pre_grasp_pose,
+            grasp_pose,
+            lift_pose,
+        ) = candidate
+        hover_pose = _build_hover_pose(
+            actor, grasp_pose, hover_offset=args.hover_offset
+        )
+        hover_actions = grasper.move_to_pose(
+            hover_pose,
+            grasper.control_timestep,
+            gripper_state=1,
+            n_max_step=80,
+        )
+        if hover_actions is None:
+            grasp_stage_failure_note = "failed to reach hover pose"
+            continue
+        _execute_actions(
+            scene_manager,
+            agent,
+            hover_actions,
+            args.control_freq,
+            camera=camera,
+            render_interval=args.render_interval,
+            video_frames=video_frames,
+        )
+        _hold_gripper_state(
+            scene_manager,
+            grasper,
+            gripper_state=1,
+            control_freq=args.control_freq,
+            n_step=args.hover_open_steps,
+            camera=camera,
+            render_interval=args.render_interval,
+            video_frames=video_frames,
+        )
+        descent_failed = False
+        for descent_pose in _build_descent_stage_poses(
+            grasp_pose,
+            hover_offset=args.hover_offset,
+            num_stages=args.descent_stage_count,
+        ):
+            descent_actions = grasper.move_to_pose(
+                descent_pose,
+                grasper.control_timestep,
+                gripper_state=1,
+                n_max_step=args.descent_n_max_step,
+            )
+            if descent_actions is None:
+                descent_failed = True
+                grasp_stage_failure_note = (
+                    "failed during slow descent to grasp pose"
+                )
+                break
+            _execute_actions(
+                scene_manager,
+                agent,
+                descent_actions,
+                args.control_freq,
+                camera=camera,
+                render_interval=args.render_interval,
+                video_frames=video_frames,
+            )
+        if descent_failed:
+            continue
+        _hold_gripper_state(
+            scene_manager,
+            grasper,
+            gripper_state=1,
+            control_freq=args.control_freq,
+            n_step=2,
+            camera=camera,
+            render_interval=args.render_interval,
+            video_frames=video_frames,
+        )
+        selected_lift_pose = lift_pose
+        selected_candidate_note = (
+            ""
+            if candidate_idx == 0
+            else (
+                f"fallback grasp candidate ratio={candidate_ratio:.2f}, "
+                f"clearance={candidate_clearance:.3f}"
+            )
+        )
+        break
+    if selected_lift_pose is None:
+        if video_frames is not None and video_path is not None:
+            os.makedirs(os.path.dirname(video_path), exist_ok=True)
+            imageio.mimsave(video_path, video_frames, fps=args.video_fps)
+        return TrialResult(
+            yaw_deg=yaw_deg,
+            success=False,
+            scale_factor=scale_factor,
+            object_width_before_scale=grasp_width,
+            object_width_after_scale=grasp_width * scale_factor,
+            settled_bottom_z=settled_bottom_z,
+            final_bottom_z=settled_bottom_z,
+            lift_delta=0.0,
+            video_path=video_path,
+            note=grasp_stage_failure_note,
+        )
+    lift_success_threshold = _compute_adaptive_lift_threshold(
+        actor,
+        ratio=args.lift_success_ratio,
+        min_height=args.min_lift_success_height,
+        max_height=args.max_lift_success_height,
+        absolute_override=args.lift_success_height,
+    )
+    tracker = _GraspTracker(actor=actor, grasper=grasper)
+    close_actions = grasper.control_gripper(
+        gripper_state=-1,
+        n_step=args.close_steps,
+    )
+    _execute_actions(
+        scene_manager,
+        agent,
+        close_actions,
+        args.control_freq,
+        camera=camera,
+        render_interval=args.render_interval,
+        video_frames=video_frames,
+        tracker=tracker,
+    )
+    stage_note = "ok"
+    lift_actions = grasper.move_to_pose(
+        selected_lift_pose,
+        grasper.control_timestep,
+        gripper_state=-1,
+        n_max_step=50,
+    )
+    if lift_actions is not None:
+        _execute_actions(
+            scene_manager,
+            agent,
+            lift_actions,
+            args.control_freq,
+            camera=camera,
+            render_interval=args.render_interval,
+            video_frames=video_frames,
+            tracker=tracker,
+        )
+    else:
+        stage_note = "failed to lift after closing"
+    _hold_gripper_state(
+        scene_manager,
+        grasper,
+        gripper_state=-1,
+        control_freq=args.control_freq,
+        n_step=args.post_grasp_steps,
+        camera=camera,
+        render_interval=args.render_interval,
+        video_frames=video_frames,
+        tracker=tracker,
+    )
+    final_bottom_z = get_actor_bottom_z(actor)
+    lift_delta = final_bottom_z - settled_bottom_z
+    peak_bottom_z = (
+        final_bottom_z
+        if tracker.peak_bottom_z is None
+        else tracker.peak_bottom_z
+    )
+    peak_lift_delta = peak_bottom_z - settled_bottom_z
+    offset_range = tracker.offset_range
+    final_lift_pass = bool(lift_delta >= lift_success_threshold)
+    sync_pass = bool(offset_range <= args.sync_tol)
+    success = bool(final_lift_pass and sync_pass)
+    if video_frames is not None and camera is not None:
+        video_frames.append(capture_frame(scene, camera))
+    if video_frames is not None and video_path is not None:
+        os.makedirs(os.path.dirname(video_path), exist_ok=True)
+        imageio.mimsave(video_path, video_frames, fps=args.video_fps)
+    if stage_note != "ok":
+        note = stage_note
+    elif success:
+        note = selected_candidate_note or "ok"
+    elif not final_lift_pass and not sync_pass:
+        note = "object dropped and decoupled from gripper"
+    elif not final_lift_pass:
+        note = "object did not stay lifted (likely bounced or dropped)"
+    else:
+        note = "object did not move synchronously with gripper"
+    return TrialResult(
+        yaw_deg=yaw_deg,
+        success=success,
+        scale_factor=scale_factor,
+        object_width_before_scale=grasp_width,
+        object_width_after_scale=grasp_width * scale_factor,
+        settled_bottom_z=settled_bottom_z,
+        final_bottom_z=final_bottom_z,
+        lift_delta=lift_delta,
+        peak_bottom_z=peak_bottom_z,
+        peak_lift_delta=peak_lift_delta,
+        lift_success_threshold=lift_success_threshold,
+        tcp_object_offset_range=offset_range,
+        sync_tol=args.sync_tol,
+        final_lift_pass=final_lift_pass,
+        sync_pass=sync_pass,
+        video_path=video_path,
+        note=note,
+    )
+def entrypoint(**kwargs) -> dict:
+    """Run collision-success evaluation for a URDF asset."""
+    if kwargs:
+        kwargs.setdefault("urdf_path", "__dummy__.urdf")
+        args = EvalCollisionConfig(**kwargs)
+    else:
+        args = tyro.cli(EvalCollisionConfig)
+    if not os.path.exists(args.urdf_path):
+        raise FileNotFoundError(f"URDF file not found: {args.urdf_path}")
+    logger.info(
+        f"Start collision-success eval: urdf={args.urdf_path}, "
+        f"num_trials={args.num_trials}, sync_tol={args.sync_tol}, "
+        f"output={args.output_path}"
+    )
+    scale_factor, grasp_width = _compute_scale_factor(
+        args.urdf_path,
+        max_gripper_width=args.max_gripper_width,
+        clearance_ratio=args.gripper_clearance_ratio,
+    )
+    yaw_values = np.linspace(0, 360, args.num_trials, endpoint=False)
+    trials = [
+        run_single_trial(
+            args,
+            float(yaw_deg),
+            scale_factor,
+            grasp_width,
+            record_video=args.save_video,
+            video_path=(
+                _build_trial_video_path(args.video_path, idx, float(yaw_deg))
+                if args.save_video
+                else None
+            ),
+        )
+        for idx, yaw_deg in enumerate(yaw_values)
+    ]
+    success_count = sum(int(trial.success) for trial in trials)
+    result = {
+        "urdf_path": args.urdf_path,
+        "num_trials": args.num_trials,
+        "num_success": success_count,
+        "collision_success_rate": success_count / max(1, args.num_trials),
+        "scale_factor": scale_factor,
+        "estimated_grasp_width_before_scale": grasp_width,
+        "estimated_grasp_width_after_scale": grasp_width * scale_factor,
+        "video_path": args.video_path if args.save_video else None,
+        "trial_video_paths": [
+            trial.video_path
+            for trial in trials
+            if trial.video_path is not None
+        ],
+        "trials": [asdict(trial) for trial in trials],
+    }
+    os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
+    with open(args.output_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, indent=2)
+    logger.info(f"Collision success report saved to {args.output_path}")
+    return result
+if __name__ == "__main__":
+    entrypoint()

embodied_gen/scripts/room_gen/export_scene.py CHANGED Viewed

@@ -31,6 +31,7 @@ import gin
 import numpy as np
 import trimesh
 from infinigen.core.util import blender as butil
 logger = logging.getLogger(__name__)
@@ -213,15 +214,162 @@ def clean_names(obj=None):
 def remove_obj_parents(obj=None):
     if obj is not None:
-        old_location = obj.matrix_world.to_translation()
         obj.parent = None
-        obj.matrix_world.translation = old_location
         return
     for obj in bpy.data.objects:
-        old_location = obj.matrix_world.to_translation()
         obj.parent = None
-        obj.matrix_world.translation = old_location
 def delete_objects():
@@ -744,21 +892,12 @@ def triangulate_meshes():
 def adjust_wattages():
-    logger.info("Adjusting light wattage")
     for obj in bpy.context.scene.objects:
         if obj.type == "LIGHT" and obj.data.type == "POINT":
             light = obj.data
-            if hasattr(light, "energy") and hasattr(light, "shadow_soft_size"):
-                X = light.energy
-                r = light.shadow_soft_size
-                # candelas * 1000 / (4 * math.pi * r**2). additionally units come out of blender at 1/100 scale
-                new_wattage = (
-                    (X * 20 / (4 * math.pi))
-                    * 1000
-                    / (4 * math.pi * r**2)
-                    * 100
-                )
-                light.energy = new_wattage
 def set_center_of_mass():
@@ -862,10 +1001,15 @@ def bake_scene(folderPath: Path, image_res, vertex_colors, export_usd):
 def run_blender_export(
-    exportPath: Path, format: str, vertex_colors: bool, individual_export: bool
 ):
     assert exportPath.parent.exists()
     exportPath = str(exportPath)
     if format == "obj":
         if vertex_colors:
@@ -913,14 +1057,18 @@ def run_blender_export(
         )
     if format in ["usda", "usdc"]:
-        bpy.ops.wm.usd_export(
-            filepath=exportPath,
-            export_textures=True,
-            # use_instancing=True,
-            overwrite_textures=True,
-            selected_objects_only=individual_export,
-            root_prim_path="/World",
-        )
 def export_scene(
@@ -1210,14 +1358,20 @@ def export_curr_scene(
     task_uniqname=None,
     deconvex=False,
     center_scene=False,
     align_quat=(0.7071, 0, 0, 0.7071),  # xyzw
 ) -> Path:
     export_usd = format in ["usda", "usdc"]
     export_folder = output_folder
     export_folder.mkdir(exist_ok=True)
     export_file = export_folder / output_folder.with_suffix(f".{format}").name
     logger.info(f"Exporting to directory {export_folder=}")
     remove_obj_parents()
     delete_objects()
     triangulate_meshes()
@@ -1289,7 +1443,7 @@ def export_curr_scene(
     # iterate through all objects and bake them
     bake_scene(
-        folderPath=export_folder / "textures",
         image_res=image_res,
         vertex_colors=vertex_colors,
         export_usd=export_usd,
@@ -1377,7 +1531,11 @@ def export_curr_scene(
                 bpy.ops.object.modifier_apply(modifier=dec_mod.name)
             run_blender_export(
-                export_file, format, vertex_colors, individual_export
             )
             obj.select_set(False)
@@ -1443,9 +1601,17 @@ def export_curr_scene(
         return urdf_path
     else:
         logger.info(f"Exporting file to {export_file=}")
-        run_blender_export(
-            export_file, format, vertex_colors, individual_export
-        )
         return export_file
@@ -1470,6 +1636,7 @@ def main(args):
             omniverse_export=args.omniverse,
             deconvex=args.deconvex,
             center_scene=args.center_scene,
         )
     bpy.ops.wm.quit_blender()
@@ -1489,6 +1656,7 @@ def make_args():
     parser.add_argument("-o", "--omniverse", action="store_true")
     parser.add_argument("--deconvex", action="store_true")
     parser.add_argument("--center_scene", action="store_true")
     args = parser.parse_args()

 import numpy as np
 import trimesh
 from infinigen.core.util import blender as butil
+from mathutils import Vector
 logger = logging.getLogger(__name__)
 def remove_obj_parents(obj=None):
     if obj is not None:
+        world_matrix = obj.matrix_world.copy()
         obj.parent = None
+        obj.matrix_world = world_matrix
         return
     for obj in bpy.data.objects:
+        world_matrix = obj.matrix_world.copy()
         obj.parent = None
+        obj.matrix_world = world_matrix
+def remove_placeholder_area_lights() -> int:
+    removed_count = 0
+    for obj in list(bpy.data.objects):
+        if obj.type != "LIGHT" or obj.data.type != "AREA":
+            continue
+        parent_name = obj.parent.name if obj.parent is not None else ""
+        if "WindowFactory" not in parent_name:
+            continue
+        if not obj.name.startswith("Area"):
+            continue
+        if not math.isclose(float(obj.data.energy), 10.0, abs_tol=1e-4):
+            continue
+        world_loc = np.array(obj.matrix_world.translation)
+        if not np.allclose(world_loc, 0.0, atol=1e-4):
+            continue
+        bpy.data.objects.remove(obj, do_unlink=True)
+        removed_count += 1
+    if removed_count > 0:
+        logger.info(
+            "Removed placeholder window area lights before export: "
+            f"{removed_count}"
+        )
+    return removed_count
+def _get_export_scene_bounds() -> Optional[Tuple[np.ndarray, np.ndarray]]:
+    positions = []
+    view_objs = set(bpy.context.view_layer.objects)
+    for obj in bpy.data.objects:
+        if (
+            obj.type != "MESH"
+            or obj.data is None
+            or not obj.data.vertices
+            or obj.hide_render
+            or obj not in view_objs
+        ):
+            continue
+        for corner in obj.bound_box:
+            world_corner = obj.matrix_world @ Vector(corner)
+            positions.append(np.array(world_corner))
+    if not positions:
+        return None
+    points = np.stack(positions)
+    return points.min(axis=0), points.max(axis=0)
+def _get_world_background_strength() -> float:
+    world = bpy.context.scene.world
+    if world is None or not world.use_nodes:
+        return 0.25
+    strengths = []
+    for node in world.node_tree.nodes:
+        if node.type == "BACKGROUND":
+            strengths.append(float(node.inputs["Strength"].default_value))
+    if not strengths:
+        return 0.25
+    return max(strengths)
+def _get_world_sky_rotation() -> tuple[float, float]:
+    world = bpy.context.scene.world
+    if world is None or not world.use_nodes:
+        return (math.radians(55.0), 0.0)
+    for node in world.node_tree.nodes:
+        if node.type != "TEX_SKY":
+            continue
+        elevation = float(getattr(node, "sun_elevation", math.radians(35.0)))
+        rotation = float(getattr(node, "sun_rotation", 0.0))
+        return (math.pi * 0.5 - elevation, rotation)
+    return (math.radians(55.0), 0.0)
+def add_world_export_lights(
+    world_strength: float = 8.0,
+) -> list[bpy.types.Object]:
+    world = bpy.context.scene.world
+    if world is None:
+        return []
+    bounds = _get_export_scene_bounds()
+    if bounds is None:
+        return []
+    min_corner, max_corner = bounds
+    center = (min_corner + max_corner) * 0.5
+    diagonal = float(np.linalg.norm(max_corner - min_corner))
+    strength = max(_get_world_background_strength(), world_strength)
+    sun_pitch, sun_yaw = _get_world_sky_rotation()
+    created_lights = []
+    bpy.ops.object.light_add(
+        type="SUN",
+        location=(
+            float(center[0]),
+            float(center[1]),
+            float(max_corner[2] + diagonal),
+        ),
+        rotation=(sun_pitch, 0.0, sun_yaw),
+    )
+    sun = bpy.context.object
+    sun.name = "__EXPORT_WORLD_SUN__"
+    sun.data.energy = max(strength * 2.0, 0.5)
+    created_lights.append(sun)
+    bpy.ops.object.light_add(
+        type="AREA",
+        location=(
+            float(center[0]),
+            float(center[1]),
+            float(max_corner[2] + 0.5 * diagonal),
+        ),
+        rotation=(0.0, 0.0, 0.0),
+    )
+    area = bpy.context.object
+    area.name = "__EXPORT_WORLD_AREA__"
+    area.data.shape = "DISK"
+    area.data.size = max(diagonal, 2.0)
+    area.data.energy = max(strength * 2500.0, 500.0)
+    created_lights.append(area)
+    logger.info(
+        "Added temporary world export lights: "
+        f"{[obj.name for obj in created_lights]}"
+    )
+    return created_lights
+def remove_temp_export_objects(objects: list[bpy.types.Object]) -> None:
+    for obj in objects:
+        if obj is None:
+            continue
+        if obj.name not in bpy.data.objects:
+            continue
+        bpy.data.objects.remove(obj, do_unlink=True)
 def delete_objects():
 def adjust_wattages():
+    logger.info("Keeping original point light wattage for USD export")
     for obj in bpy.context.scene.objects:
         if obj.type == "LIGHT" and obj.data.type == "POINT":
             light = obj.data
+            if hasattr(light, "energy"):
+                light.energy = float(light.energy)
 def set_center_of_mass():
 def run_blender_export(
+    exportPath: Path,
+    format: str,
+    vertex_colors: bool,
+    individual_export: bool,
+    world_strength: float = 8.0,
 ):
     assert exportPath.parent.exists()
     exportPath = str(exportPath)
+    temp_export_objects: list[bpy.types.Object] = []
     if format == "obj":
         if vertex_colors:
         )
     if format in ["usda", "usdc"]:
+        temp_export_objects = add_world_export_lights(world_strength)
+        try:
+            bpy.ops.wm.usd_export(
+                filepath=exportPath,
+                export_textures=True,
+                # use_instancing=True,
+                overwrite_textures=True,
+                selected_objects_only=individual_export,
+                root_prim_path="/World",
+            )
+        finally:
+            remove_temp_export_objects(temp_export_objects)
 def export_scene(
     task_uniqname=None,
     deconvex=False,
     center_scene=False,
+    world_strength=8.0,
     align_quat=(0.7071, 0, 0, 0.7071),  # xyzw
 ) -> Path:
     export_usd = format in ["usda", "usdc"]
     export_folder = output_folder
     export_folder.mkdir(exist_ok=True)
     export_file = export_folder / output_folder.with_suffix(f".{format}").name
+    texture_export_folder = export_folder / "textures"
+    bake_texture_folder = texture_export_folder
+    if export_usd:
+        bake_texture_folder = export_folder / "_usd_bake_textures"
     logger.info(f"Exporting to directory {export_folder=}")
+    remove_placeholder_area_lights()
     remove_obj_parents()
     delete_objects()
     triangulate_meshes()
     # iterate through all objects and bake them
     bake_scene(
+        folderPath=bake_texture_folder,
         image_res=image_res,
         vertex_colors=vertex_colors,
         export_usd=export_usd,
                 bpy.ops.object.modifier_apply(modifier=dec_mod.name)
             run_blender_export(
+                export_file,
+                format,
+                vertex_colors,
+                individual_export,
+                world_strength=world_strength,
             )
             obj.select_set(False)
         return urdf_path
     else:
         logger.info(f"Exporting file to {export_file=}")
+        try:
+            run_blender_export(
+                export_file,
+                format,
+                vertex_colors,
+                individual_export,
+                world_strength=world_strength,
+            )
+        finally:
+            if export_usd and bake_texture_folder.exists():
+                shutil.rmtree(bake_texture_folder, ignore_errors=True)
         return export_file
             omniverse_export=args.omniverse,
             deconvex=args.deconvex,
             center_scene=args.center_scene,
+            world_strength=args.world_strength,
         )
     bpy.ops.wm.quit_blender()
     parser.add_argument("-o", "--omniverse", action="store_true")
     parser.add_argument("--deconvex", action="store_true")
     parser.add_argument("--center_scene", action="store_true")
+    parser.add_argument("--world_strength", default=8.0, type=float)
     args = parser.parse_args()

embodied_gen/scripts/room_gen/gen_room.py CHANGED Viewed

@@ -214,7 +214,7 @@ def generate_room(cfg: GenRoomArgs):
                 "-f",
                 "obj",
                 "-r",
-                "512",
                 "--individual",
                 "--deconvex",
                 "--center_scene",
@@ -235,7 +235,7 @@ def generate_room(cfg: GenRoomArgs):
                 "-f",
                 "usdc",
                 "-r",
-                "512",
                 "--omniverse",
                 "--center_scene",
             ]

                 "-f",
                 "obj",
                 "-r",
+                "1024",
                 "--individual",
                 "--deconvex",
                 "--center_scene",
                 "-f",
                 "usdc",
                 "-r",
+                "1024",
                 "--omniverse",
                 "--center_scene",
             ]

embodied_gen/scripts/room_gen/render_birdseye.py ADDED Viewed

	@@ -0,0 +1,271 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+"""Render a top-down bird's-eye view of a USD scene with ceilings hidden."""
+from __future__ import annotations
+import argparse
+import logging
+import tempfile
+from pathlib import Path
+import bpy
+from mathutils import Vector
+from embodied_gen.scripts.room_gen.render_usd import RenderUsd
+logger = logging.getLogger(__name__)
+CEILING_KEYWORDS = ("ceiling", "exterior")
+DEFAULT_ROOM_USD_GLOB = "seed*/usd/export_scene/export_scene.usdc"
+class BirdseyeRenderUsd(RenderUsd):
+    """Top-down USD renderer with ceiling removal and orthographic camera."""
+    def __init__(
+        self,
+        *,
+        ortho_margin: float = 1.05,
+        use_cpu: bool = False,
+        **kwargs,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.ortho_margin = ortho_margin
+        self.use_cpu = use_cpu
+    def configure_cycles(self) -> None:
+        if self.use_cpu:
+            self.scene.render.engine = "CYCLES"
+            self.scene.cycles.device = "CPU"
+            self.scene.cycles.samples = self.samples
+            self.scene.render.resolution_x = self.resolution[0]
+            self.scene.render.resolution_y = self.resolution[1]
+            self.scene.render.image_settings.file_format = "PNG"
+        else:
+            super().configure_cycles()
+        self.scene.render.film_transparent = True
+        self.scene.render.image_settings.color_mode = "RGBA"
+    def remove_ceiling_objects(self) -> int:
+        """Delete any object whose name contains a ceiling keyword."""
+        removed = 0
+        for obj in list(self.scene.objects):
+            lower = obj.name.lower()
+            if any(kw in lower for kw in CEILING_KEYWORDS):
+                bpy.data.objects.remove(obj, do_unlink=True)
+                removed += 1
+        logger.info("Removed %d ceiling objects.", removed)
+        return removed
+    def create_orthographic_camera(
+        self, center: Vector, top_z: float, scene_size: float
+    ) -> bpy.types.Object:
+        location = Vector((center.x, center.y, top_z + max(scene_size, 1.0)))
+        bpy.ops.object.camera_add(location=location, rotation=(0.0, 0.0, 0.0))
+        camera = bpy.context.object
+        camera.rotation_mode = "XYZ"
+        camera.data.type = "ORTHO"
+        camera.data.ortho_scale = scene_size * self.ortho_margin
+        camera.data.clip_start = 0.01
+        camera.data.clip_end = 1000.0
+        self.scene.camera = camera
+        return camera
+    def run(self) -> None:
+        rgb_output_path = self.get_rgb_output_path()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.clear_scene()
+        self.import_usd()
+        self.remove_ceiling_objects()
+        self.validate_glb_args()
+        imported_glb_objects = self.import_glb_asset()
+        self.place_glb_asset(imported_glb_objects)
+        min_corner, max_corner = self.get_scene_bbox()
+        center = (min_corner + max_corner) * 0.5
+        diagonal = (max_corner - min_corner).length
+        scene_size = max(
+            max_corner.x - min_corner.x, max_corner.y - min_corner.y
+        )
+        self.create_orthographic_camera(center, max_corner.z, scene_size)
+        self.ensure_lighting(diagonal, center, max_corner.z)
+        world_created = self.ensure_world()
+        self.add_fill_light(
+            diagonal,
+            center,
+            max_corner.z,
+            energy=self.fill_light_energy,
+        )
+        if world_created:
+            self.add_light_rig(
+                diagonal,
+                center,
+                max_corner.z,
+                area_energy=1500.0,
+                sun_energy=0.35,
+                prefix="Fill",
+            )
+        self.configure_color_management()
+        self.configure_cycles()
+        with tempfile.TemporaryDirectory(
+            prefix="render_birdseye_", dir=None
+        ) as temp_dir:
+            self.temp_dir = Path(temp_dir)
+            self.render(rgb_output_path)
+            self.temp_dir = None
+        logger.info("Rendered bird's-eye outputs to %s", self.output_dir)
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Render a top-down bird's-eye view of a USD scene."
+    )
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument("--usd_path", type=Path)
+    input_group.add_argument(
+        "--input_dir",
+        type=Path,
+        help=(
+            "Directory with seed*/usd/export_scene/export_scene.usdc files "
+            "to render in batch."
+        ),
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=Path,
+        help="Output directory for a single --usd_path render.",
+    )
+    parser.add_argument(
+        "--output_root",
+        type=Path,
+        help=(
+            "Batch output root. Defaults to <input_dir>/bev, with one "
+            "subdirectory per seed."
+        ),
+    )
+    parser.add_argument(
+        "--skip_existing",
+        action="store_true",
+        help="Skip batch items that already have render_rgb.png.",
+    )
+    parser.add_argument(
+        "--resolution",
+        type=int,
+        nargs=2,
+        metavar=("WIDTH", "HEIGHT"),
+        default=(1920, 1920),
+    )
+    parser.add_argument("--samples", type=int, default=512)
+    parser.add_argument("--exposure", type=float, default=-1.0)
+    parser.add_argument("--world_strength", type=float, default=1.0)
+    parser.add_argument("--fill_light_energy", type=float, default=1000.0)
+    parser.add_argument("--ortho_margin", type=float, default=1.05)
+    parser.add_argument("--use_cpu", action="store_true")
+    return parser
+def find_room_usd_paths(input_dir: Path) -> list[Path]:
+    """Find seed room USD files under an input directory."""
+    return sorted(input_dir.glob(DEFAULT_ROOM_USD_GLOB))
+def get_batch_output_dir(
+    usd_path: Path, input_dir: Path, output_root: Path
+) -> Path:
+    """Build the batch render output directory for a seed USD path."""
+    try:
+        seed_dir = usd_path.relative_to(input_dir).parts[0]
+    except ValueError:
+        seed_dir = usd_path.parents[2].name
+    return output_root / seed_dir
+def build_renderer(
+    args: argparse.Namespace, usd_path: Path, output_dir: Path
+) -> BirdseyeRenderUsd:
+    """Build a bird's-eye renderer with shared CLI options."""
+    return BirdseyeRenderUsd(
+        usd_path=usd_path,
+        glb_path=None,
+        glb_xyz=None,
+        glb_rotation_deg=None,
+        output_dir=output_dir,
+        render_passes=("rgb",),
+        depth_mode="normalized",
+        resolution=tuple(args.resolution),
+        samples=args.samples,
+        camera_xyz=(0.0, 0.0, 0.0),
+        camera_rotation_deg=(0.0, 0.0, 0.0),
+        flow_camera_xyz=None,
+        flow_camera_rotation_deg=None,
+        focal_length_mm=20.0,
+        exposure=args.exposure,
+        world_strength=args.world_strength,
+        fill_light_energy=args.fill_light_energy,
+        ortho_margin=args.ortho_margin,
+        use_cpu=args.use_cpu,
+    )
+def render_single(
+    args: argparse.Namespace, usd_path: Path, output_dir: Path
+) -> None:
+    build_renderer(args, usd_path, output_dir).run()
+def render_batch(args: argparse.Namespace) -> None:
+    input_dir = args.input_dir
+    output_root = args.output_root or input_dir / "bev"
+    usd_paths = find_room_usd_paths(input_dir)
+    if not usd_paths:
+        raise FileNotFoundError(
+            f"No USD files found under {input_dir} matching "
+            f"{DEFAULT_ROOM_USD_GLOB}."
+        )
+    logger.info(
+        "Rendering %d bird's-eye views under %s.", len(usd_paths), input_dir
+    )
+    for usd_path in usd_paths:
+        output_dir = get_batch_output_dir(usd_path, input_dir, output_root)
+        rgb_output_path = output_dir / "render_rgb.png"
+        if args.skip_existing and rgb_output_path.exists():
+            logger.info("Skipping existing render %s", rgb_output_path)
+            continue
+        logger.info("Rendering %s to %s", usd_path, output_dir)
+        render_single(args, usd_path, output_dir)
+def main() -> None:
+    logging.basicConfig(level=logging.INFO)
+    args = build_arg_parser().parse_args()
+    if args.input_dir is not None:
+        render_batch(args)
+        return
+    if args.output_dir is None:
+        raise ValueError("--output_dir is required when using --usd_path.")
+    render_single(args, args.usd_path, args.output_dir)
+if __name__ == "__main__":
+    main()

embodied_gen/scripts/room_gen/render_usd.py ADDED Viewed

	@@ -0,0 +1,1606 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import argparse
+import logging
+import math
+import shutil
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+import bpy
+import cv2
+import numpy as np
+from mathutils import Euler, Matrix, Vector
+logger = logging.getLogger(__name__)
+def build_arg_parser() -> argparse.ArgumentParser:
+    """Build the CLI parser for USD rendering."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--usd_path", required=True, type=Path)
+    parser.add_argument("--glb_path", type=str, default="")
+    parser.add_argument(
+        "--glb_xyz",
+        type=float,
+        nargs=3,
+        metavar=("X", "Y", "Z"),
+    )
+    parser.add_argument(
+        "--glb_rotation_deg",
+        type=float,
+        nargs=3,
+        metavar=("RX", "RY", "RZ"),
+    )
+    parser.add_argument("--output_dir", required=True, type=Path)
+    parser.add_argument(
+        "--render_passes",
+        nargs="+",
+        choices=("rgb", "depth", "normal", "mesh", "instance_seg", "flow"),
+        default=("rgb",),
+    )
+    parser.add_argument(
+        "--depth_mode",
+        choices=("normalized", "metric"),
+        default="normalized",
+    )
+    parser.add_argument(
+        "--resolution",
+        type=int,
+        nargs=2,
+        metavar=("WIDTH", "HEIGHT"),
+        default=(1920, 1080),
+    )
+    parser.add_argument("--samples", type=int, default=1024)
+    parser.add_argument(
+        "--camera_xyz",
+        type=float,
+        nargs=3,
+        metavar=("X", "Y", "Z"),
+        required=True,
+    )
+    parser.add_argument(
+        "--camera_rotation_deg",
+        type=float,
+        nargs=3,
+        metavar=("RX", "RY", "RZ"),
+        required=True,
+    )
+    parser.add_argument(
+        "--flow_camera_xyz",
+        type=float,
+        nargs=3,
+        metavar=("X", "Y", "Z"),
+    )
+    parser.add_argument(
+        "--flow_camera_rotation_deg",
+        type=float,
+        nargs=3,
+        metavar=("RX", "RY", "RZ"),
+    )
+    parser.add_argument("--focal_length_mm", type=float, default=20.0)
+    parser.add_argument("--exposure", type=float, default=2.2)
+    parser.add_argument("--world_strength", type=float, default=8.0)
+    parser.add_argument("--fill_light_energy", type=float, default=14000.0)
+    return parser
+def _parse_args() -> argparse.Namespace:
+    return build_arg_parser().parse_args()
+class RenderUsd:
+    """USD renderer for RGB, depth, normal, mesh, segmentation, and flow."""
+    def __init__(
+        self,
+        *,
+        usd_path: Path,
+        glb_path: Path | str | None,
+        glb_xyz: tuple[float, float, float] | list[float] | None,
+        glb_rotation_deg: tuple[float, float, float] | list[float] | None,
+        output_dir: Path,
+        render_passes: tuple[str, ...] | list[str],
+        depth_mode: str,
+        resolution: tuple[int, int] | list[int],
+        samples: int,
+        camera_xyz: tuple[float, float, float] | list[float],
+        camera_rotation_deg: tuple[float, float, float] | list[float],
+        flow_camera_xyz: tuple[float, float, float] | list[float] | None,
+        flow_camera_rotation_deg: (
+            tuple[float, float, float] | list[float] | None
+        ),
+        focal_length_mm: float,
+        exposure: float,
+        world_strength: float,
+        fill_light_energy: float,
+    ) -> None:
+        """Initialize renderer configuration independent of CLI parsing."""
+        self.usd_path = usd_path
+        self.glb_path = self.normalize_optional_path(glb_path)
+        self.glb_xyz = tuple(glb_xyz) if glb_xyz is not None else None
+        self.glb_rotation_deg = (
+            tuple(glb_rotation_deg) if glb_rotation_deg is not None else None
+        )
+        self.output_dir = output_dir
+        self.render_passes = tuple(render_passes)
+        self.depth_mode = depth_mode
+        self.resolution = tuple(resolution)
+        self.samples = samples
+        self.camera_xyz = tuple(camera_xyz)
+        self.camera_rotation_deg = tuple(camera_rotation_deg)
+        self.flow_camera_xyz = (
+            tuple(flow_camera_xyz) if flow_camera_xyz is not None else None
+        )
+        self.flow_camera_rotation_deg = (
+            tuple(flow_camera_rotation_deg)
+            if flow_camera_rotation_deg is not None
+            else None
+        )
+        self.focal_length_mm = focal_length_mm
+        self.exposure = exposure
+        self.world_strength = world_strength
+        self.fill_light_energy = fill_light_energy
+        self.temp_dir: Path | None = None
+    @classmethod
+    def from_args(cls, args: argparse.Namespace) -> RenderUsd:
+        """Build a renderer from parsed CLI arguments."""
+        return cls(
+            usd_path=args.usd_path,
+            glb_path=args.glb_path,
+            glb_xyz=args.glb_xyz,
+            glb_rotation_deg=args.glb_rotation_deg,
+            output_dir=args.output_dir,
+            render_passes=args.render_passes,
+            depth_mode=args.depth_mode,
+            resolution=args.resolution,
+            samples=args.samples,
+            camera_xyz=args.camera_xyz,
+            camera_rotation_deg=args.camera_rotation_deg,
+            flow_camera_xyz=args.flow_camera_xyz,
+            flow_camera_rotation_deg=args.flow_camera_rotation_deg,
+            focal_length_mm=args.focal_length_mm,
+            exposure=args.exposure,
+            world_strength=args.world_strength,
+            fill_light_energy=args.fill_light_energy,
+        )
+    @property
+    def scene(self) -> bpy.types.Scene:
+        return bpy.context.scene
+    def normalize_optional_path(
+        self, path_value: Path | str | None
+    ) -> Path | None:
+        """Normalize an optional CLI path, treating empty strings as missing."""
+        if path_value is None:
+            return None
+        if isinstance(path_value, Path):
+            return path_value
+        normalized = path_value.strip()
+        if not normalized:
+            return None
+        return Path(normalized)
+    def build_output_path(self, filename: str) -> Path:
+        """Build a normalized output path under the render directory."""
+        return self.output_dir / filename
+    def build_temp_path(self, filename: str) -> Path:
+        """Build a temporary path outside the final output directory."""
+        if self.temp_dir is None:
+            raise RuntimeError(
+                "Temporary render directory is not initialized."
+            )
+        return self.temp_dir / filename
+    def get_rgb_output_path(self) -> Path:
+        return self.build_output_path("render_rgb.png")
+    def get_depth_vis_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_depth.png")
+    def get_normal_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_normal.png")
+    def get_mesh_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_mesh.png")
+    def get_instance_seg_vis_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_instance_seg_vis.png")
+    def get_instance_seg_temp_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_temp_path("render_instance_seg_raw_0001.exr")
+    def get_flow_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_flow.npy")
+    def get_flow_valid_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_flow_valid.npy")
+    def get_flow_vis_output_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_output_path("render_flow_vis.png")
+    def get_flow_depth_temp_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_temp_path("render_flow_depth_raw_0001.exr")
+    def get_depth_gray_temp_path(self, output_path: Path) -> Path:
+        del output_path
+        return self.build_temp_path("render_depth_gray_0001.png")
+    def get_composite_output_path(
+        self, render_passes: list[str] | tuple[str, ...]
+    ) -> Path:
+        pass_names = "_".join(render_passes)
+        return self.build_output_path(f"render_composite_{pass_names}.png")
+    def build_occurrence_output_path(
+        self, output_path: Path, occurrence_index: int
+    ) -> Path:
+        """Build an occurrence-specific path for repeated preview outputs."""
+        if occurrence_index < 1:
+            raise ValueError("occurrence_index must be greater than 0.")
+        if occurrence_index == 1:
+            return output_path
+        return output_path.with_name(
+            f"{output_path.stem}_{occurrence_index}{output_path.suffix}"
+        )
+    def iter_render_pass_occurrences(self) -> list[tuple[str, int]]:
+        """Return requested render passes with 1-based occurrence indices."""
+        occurrence_counts: dict[str, int] = {}
+        render_pass_occurrences: list[tuple[str, int]] = []
+        for render_pass_name in self.render_passes:
+            occurrence_index = occurrence_counts.get(render_pass_name, 0) + 1
+            occurrence_counts[render_pass_name] = occurrence_index
+            render_pass_occurrences.append(
+                (render_pass_name, occurrence_index)
+            )
+        return render_pass_occurrences
+    def get_temp_output_slot_prefix(self, temp_output_path: Path) -> str:
+        """Return the compositor slot prefix without the frame suffix."""
+        stem_parts = temp_output_path.stem.rsplit("_", maxsplit=1)
+        if len(stem_parts) != 2 or not stem_parts[1].isdigit():
+            raise ValueError(
+                f"Unexpected temporary output filename: {temp_output_path.name}"
+            )
+        return f"{stem_parts[0]}_"
+    def get_mesh_objects(self) -> list[bpy.types.Object]:
+        return [obj for obj in self.scene.objects if obj.type == "MESH"]
+    def clear_scene(self) -> None:
+        bpy.ops.wm.read_factory_settings(use_empty=True)
+    def import_usd(self) -> None:
+        if not self.usd_path.exists():
+            raise FileNotFoundError(f"USD file not found: {self.usd_path}")
+        bpy.ops.wm.usd_import(filepath=str(self.usd_path))
+    def validate_glb_args(self) -> None:
+        """Normalize optional GLB arguments and ensure all-or-none usage."""
+        has_glb_path = self.glb_path is not None
+        has_glb_xyz = self.glb_xyz is not None
+        has_glb_rotation = self.glb_rotation_deg is not None
+        if len({has_glb_path, has_glb_xyz, has_glb_rotation}) != 1:
+            raise ValueError(
+                "--glb_path, --glb_xyz, and --glb_rotation_deg must be "
+                "provided together."
+            )
+        if not has_glb_path:
+            return
+        if not self.glb_path.exists():
+            raise FileNotFoundError(f"GLB file not found: {self.glb_path}")
+        if self.glb_path.suffix.lower() != ".glb":
+            raise ValueError(
+                f"Expected a .glb asset, but got: {self.glb_path}"
+            )
+    def enable_gltf_importer(self) -> None:
+        """Ensure Blender's glTF importer add-on is available."""
+        addon_name = "io_scene_gltf2"
+        if addon_name in bpy.context.preferences.addons:
+            return
+        try:
+            bpy.ops.preferences.addon_enable(module=addon_name)
+        except Exception as exc:
+            raise RuntimeError(
+                "Failed to enable Blender glTF importer add-on."
+            ) from exc
+    def import_glb_asset(self) -> list[bpy.types.Object]:
+        """Import the optional GLB asset and return created objects."""
+        if self.glb_path is None:
+            return []
+        self.enable_gltf_importer()
+        existing_object_ids = {obj.as_pointer() for obj in bpy.data.objects}
+        result = bpy.ops.import_scene.gltf(filepath=str(self.glb_path))
+        if "FINISHED" not in result:
+            raise RuntimeError(f"Failed to import GLB asset: {self.glb_path}")
+        imported_objects = [
+            obj
+            for obj in bpy.data.objects
+            if obj.as_pointer() not in existing_object_ids
+        ]
+        if not imported_objects:
+            raise ValueError(
+                f"No objects were imported from GLB asset: {self.glb_path}"
+            )
+        return imported_objects
+    def get_imported_root_objects(
+        self, imported_objects: list[bpy.types.Object]
+    ) -> list[bpy.types.Object]:
+        """Return top-level imported objects so transforms apply as one asset."""
+        imported_ids = {obj.as_pointer() for obj in imported_objects}
+        root_objects = [
+            obj
+            for obj in imported_objects
+            if obj.parent is None
+            or obj.parent.as_pointer() not in imported_ids
+        ]
+        return root_objects or imported_objects
+    def place_glb_asset(
+        self, imported_objects: list[bpy.types.Object]
+    ) -> None:
+        """Place the imported GLB asset using the requested world transform."""
+        if not imported_objects:
+            return
+        if self.glb_xyz is None or self.glb_rotation_deg is None:
+            raise ValueError("GLB transform arguments are not initialized.")
+        asset_transform = self.build_camera_matrix_world(
+            self.glb_xyz,
+            self.glb_rotation_deg,
+        )
+        for obj in self.get_imported_root_objects(imported_objects):
+            obj.matrix_world = asset_transform @ obj.matrix_world.copy()
+        bpy.context.view_layer.update()
+    def get_scene_bbox(self) -> tuple[Vector, Vector]:
+        """Compute the world-space bounding box across all mesh objects."""
+        mesh_objects = self.get_mesh_objects()
+        if not mesh_objects:
+            raise ValueError("No mesh objects found after USD import.")
+        points: list[Vector] = []
+        for obj in mesh_objects:
+            points.extend(
+                obj.matrix_world @ Vector(corner) for corner in obj.bound_box
+            )
+        min_corner = Vector(
+            (
+                min(p.x for p in points),
+                min(p.y for p in points),
+                min(p.z for p in points),
+            )
+        )
+        max_corner = Vector(
+            (
+                max(p.x for p in points),
+                max(p.y for p in points),
+                max(p.z for p in points),
+            )
+        )
+        return min_corner, max_corner
+    def create_camera(self) -> bpy.types.Object:
+        """Create and configure the primary render camera."""
+        if self.camera_xyz is None:
+            raise ValueError("--camera_xyz is required.")
+        location = Vector(tuple(self.camera_xyz))
+        rotation_rad = self.get_rotation_radians(self.camera_rotation_deg)
+        bpy.ops.object.camera_add(location=location, rotation=rotation_rad)
+        camera = bpy.context.object
+        camera.rotation_mode = "XYZ"
+        camera.data.lens = self.focal_length_mm
+        camera.data.clip_start = 0.01
+        camera.data.clip_end = 1000.0
+        self.scene.camera = camera
+        return camera
+    def add_light_rig(
+        self,
+        diagonal: float,
+        center: Vector,
+        top_z: float,
+        *,
+        area_energy: float,
+        sun_energy: float,
+        prefix: str,
+    ) -> None:
+        bpy.ops.object.light_add(
+            type="AREA",
+            location=(center.x, center.y, top_z + 0.5 * diagonal),
+        )
+        area = bpy.context.object
+        area.name = f"{prefix}Area"
+        area.data.energy = area_energy
+        area.data.shape = "DISK"
+        area.data.size = max(diagonal, 2.0)
+        bpy.ops.object.light_add(
+            type="SUN",
+            location=(
+                center.x + diagonal,
+                center.y - diagonal,
+                top_z + diagonal,
+            ),
+        )
+        sun = bpy.context.object
+        sun.name = f"{prefix}Sun"
+        sun.data.energy = sun_energy
+    def add_fill_light(
+        self,
+        diagonal: float,
+        center: Vector,
+        top_z: float,
+        energy: float,
+    ) -> None:
+        if energy <= 0.0:
+            return
+        bpy.ops.object.light_add(
+            type="AREA",
+            location=(center.x, center.y, top_z + 0.35 * diagonal),
+            rotation=(0.0, 0.0, 0.0),
+        )
+        area = bpy.context.object
+        area.name = "GlobalFillArea"
+        area.data.energy = energy
+        area.data.shape = "DISK"
+        area.data.size = max(diagonal * 0.9, 3.0)
+    def ensure_lighting(
+        self, diagonal: float, center: Vector, top_z: float
+    ) -> None:
+        if any(obj.type == "LIGHT" for obj in self.scene.objects):
+            return
+        self.add_light_rig(
+            diagonal,
+            center,
+            top_z,
+            area_energy=5000.0,
+            sun_energy=1.5,
+            prefix="Fallback",
+        )
+    def set_world_strength(self, strength: float) -> None:
+        world = self.scene.world
+        if world is None:
+            return
+        if not world.use_nodes:
+            world.use_nodes = True
+        tree = world.node_tree
+        background_nodes = [
+            node for node in tree.nodes if node.type == "BACKGROUND"
+        ]
+        if not background_nodes:
+            background = tree.nodes.new(type="ShaderNodeBackground")
+            output = next(
+                (node for node in tree.nodes if node.type == "OUTPUT_WORLD"),
+                None,
+            )
+            if output is None:
+                output = tree.nodes.new(type="ShaderNodeOutputWorld")
+            tree.links.new(
+                background.outputs["Background"], output.inputs["Surface"]
+            )
+            background_nodes = [background]
+        for background in background_nodes:
+            background.inputs["Strength"].default_value = strength
+    def ensure_world(self) -> bool:
+        """Ensure the scene has a world shader and return whether it was created."""
+        if self.scene.world is not None:
+            self.set_world_strength(self.world_strength)
+            return False
+        world = bpy.data.worlds.new(name="RenderWorld")
+        world.use_nodes = True
+        tree = world.node_tree
+        tree.nodes.clear()
+        output = tree.nodes.new(type="ShaderNodeOutputWorld")
+        background = tree.nodes.new(type="ShaderNodeBackground")
+        sky = tree.nodes.new(type="ShaderNodeTexSky")
+        background.inputs["Strength"].default_value = self.world_strength
+        tree.links.new(sky.outputs["Color"], background.inputs["Color"])
+        tree.links.new(
+            background.outputs["Background"], output.inputs["Surface"]
+        )
+        self.scene.world = world
+        return True
+    def configure_cycles(self) -> None:
+        self.scene.render.engine = "CYCLES"
+        self.scene.cycles.device = "GPU"
+        self.scene.cycles.samples = self.samples
+        self.scene.render.resolution_x = self.resolution[0]
+        self.scene.render.resolution_y = self.resolution[1]
+        self.scene.render.image_settings.file_format = "PNG"
+        self.scene.render.film_transparent = False
+        prefs = bpy.context.preferences.addons["cycles"].preferences
+        prefs.compute_device_type = "CUDA"
+        prefs.get_devices()
+        cuda_devices = [
+            device for device in prefs.devices if device.type == "CUDA"
+        ]
+        if not cuda_devices:
+            raise RuntimeError("No CUDA device found in Blender Cycles.")
+        for device in prefs.devices:
+            device.use = device.type == "CUDA"
+    def configure_color_management(self) -> None:
+        self.scene.view_settings.exposure = self.exposure
+    def snapshot_render_state(
+        self,
+        view_layer: bpy.types.ViewLayer,
+        *,
+        include_filepath: bool = False,
+        include_material_override: bool = False,
+        include_use_pass_z: bool = False,
+        include_use_pass_object_index: bool = False,
+    ) -> dict[str, object]:
+        """Capture the render state that temporary passes need to restore."""
+        state: dict[str, object] = {
+            "film_transparent": self.scene.render.film_transparent,
+            "view_transform": self.scene.view_settings.view_transform,
+            "look": self.scene.view_settings.look,
+            "exposure": self.scene.view_settings.exposure,
+            "gamma": self.scene.view_settings.gamma,
+            "file_format": self.scene.render.image_settings.file_format,
+            "color_mode": self.scene.render.image_settings.color_mode,
+            "color_depth": self.scene.render.image_settings.color_depth,
+            "use_nodes": self.scene.use_nodes,
+            "samples": self.scene.cycles.samples,
+        }
+        if include_filepath:
+            state["filepath"] = self.scene.render.filepath
+        if include_material_override:
+            state["material_override"] = view_layer.material_override
+        if include_use_pass_z:
+            state["use_pass_z"] = view_layer.use_pass_z
+        if include_use_pass_object_index:
+            state["use_pass_object_index"] = view_layer.use_pass_object_index
+        return state
+    def restore_render_state(
+        self, state: dict[str, object], view_layer: bpy.types.ViewLayer
+    ) -> None:
+        """Restore a render state captured by ``snapshot_render_state``."""
+        self.scene.render.film_transparent = state["film_transparent"]
+        self.scene.view_settings.view_transform = state["view_transform"]
+        self.scene.view_settings.look = state["look"]
+        self.scene.view_settings.exposure = state["exposure"]
+        self.scene.view_settings.gamma = state["gamma"]
+        self.scene.render.image_settings.file_format = state["file_format"]
+        self.scene.render.image_settings.color_mode = state["color_mode"]
+        self.scene.render.image_settings.color_depth = state["color_depth"]
+        self.scene.use_nodes = state["use_nodes"]
+        self.scene.cycles.samples = state["samples"]
+        if "filepath" in state:
+            self.scene.render.filepath = state["filepath"]
+        if "material_override" in state:
+            view_layer.material_override = state["material_override"]
+        if "use_pass_z" in state:
+            view_layer.use_pass_z = state["use_pass_z"]
+        if "use_pass_object_index" in state:
+            view_layer.use_pass_object_index = state["use_pass_object_index"]
+    def apply_raw_preview_settings(
+        self,
+        *,
+        use_nodes: bool,
+        samples: int,
+        color_mode: str,
+        color_depth: str,
+    ) -> None:
+        """Apply the shared render settings for auxiliary preview passes."""
+        self.scene.render.film_transparent = True
+        self.scene.view_settings.view_transform = "Raw"
+        self.scene.view_settings.look = "None"
+        self.scene.view_settings.exposure = 0.0
+        self.scene.view_settings.gamma = 1.0
+        self.scene.use_nodes = use_nodes
+        self.scene.cycles.samples = samples
+        self.scene.render.image_settings.file_format = "PNG"
+        self.scene.render.image_settings.color_mode = color_mode
+        self.scene.render.image_settings.color_depth = color_depth
+    def clear_compositor_tree(self) -> bpy.types.NodeTree:
+        """Reset the compositor tree so each pass starts from a clean slate."""
+        self.scene.use_nodes = True
+        tree = self.scene.node_tree
+        tree.nodes.clear()
+        return tree
+    def remove_render_nodes(self, created_nodes: list[bpy.types.Node]) -> None:
+        """Remove compositor nodes created for a temporary render pass."""
+        if not created_nodes:
+            return
+        node_tree = self.scene.node_tree
+        if node_tree is None:
+            return
+        for node in created_nodes:
+            if node.name in node_tree.nodes:
+                node_tree.nodes.remove(node)
+    def render_material_override_pass(
+        self,
+        preview_output_path: Path,
+        material_factory: Callable[[], bpy.types.Material],
+        *,
+        color_mode: str,
+    ) -> None:
+        """Render a pass with a temporary material override."""
+        preview_output_path.parent.mkdir(parents=True, exist_ok=True)
+        view_layer = self.scene.view_layers["ViewLayer"]
+        state = self.snapshot_render_state(
+            view_layer,
+            include_filepath=True,
+            include_material_override=True,
+        )
+        material = material_factory()
+        try:
+            self.apply_raw_preview_settings(
+                use_nodes=False,
+                samples=min(int(state["samples"]), 64),
+                color_mode=color_mode,
+                color_depth="8",
+            )
+            self.scene.render.filepath = str(preview_output_path)
+            view_layer.material_override = material
+            bpy.ops.render.render(write_still=True)
+        finally:
+            self.restore_render_state(state, view_layer)
+            bpy.data.materials.remove(material, do_unlink=True)
+    def render_temp_output_pass(
+        self,
+        output_path: Path,
+        temp_output_path: Path,
+        *,
+        add_output_node: Callable[
+            [Path], tuple[bpy.types.NodeTree, list[bpy.types.Node]]
+        ],
+        load_temp_output: Callable[[Path], np.ndarray],
+        finalize_output: Callable[[np.ndarray], None],
+        color_mode: str,
+        color_depth: str,
+        enable_depth_pass: bool = False,
+        enable_object_index_pass: bool = False,
+    ) -> None:
+        """Render a temporary compositor output and finalize it."""
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        view_layer = self.scene.view_layers["ViewLayer"]
+        state = self.snapshot_render_state(
+            view_layer,
+            include_use_pass_z=enable_depth_pass,
+            include_use_pass_object_index=enable_object_index_pass,
+        )
+        created_nodes: list[bpy.types.Node] = []
+        try:
+            if temp_output_path.exists():
+                temp_output_path.unlink()
+            self.apply_raw_preview_settings(
+                use_nodes=True,
+                samples=1,
+                color_mode=color_mode,
+                color_depth=color_depth,
+            )
+            if enable_depth_pass:
+                view_layer.use_pass_z = True
+            if enable_object_index_pass:
+                view_layer.use_pass_object_index = True
+            self.clear_compositor_tree()
+            _, created_nodes = add_output_node(output_path)
+            bpy.ops.render.render(write_still=False)
+            finalize_output(load_temp_output(temp_output_path))
+        finally:
+            self.remove_render_nodes(created_nodes)
+            if temp_output_path.exists():
+                temp_output_path.unlink()
+            self.restore_render_state(state, view_layer)
+    def get_rotation_radians(
+        self, rotation_deg: tuple[float, float, float] | list[float]
+    ) -> tuple[float, float, float]:
+        return tuple(math.radians(angle_deg) for angle_deg in rotation_deg)
+    def validate_flow_args(self) -> None:
+        """Normalize optional flow-camera arguments and fill defaults."""
+        has_flow_xyz = self.flow_camera_xyz is not None
+        has_flow_rotation = self.flow_camera_rotation_deg is not None
+        if has_flow_xyz != has_flow_rotation:
+            raise ValueError(
+                "--flow_camera_xyz and --flow_camera_rotation_deg must be "
+                "provided together."
+            )
+        if not has_flow_xyz:
+            xyz = list(self.camera_xyz)
+            xyz[0] += 0.5
+            self.flow_camera_xyz = tuple(xyz)
+            self.flow_camera_rotation_deg = tuple(self.camera_rotation_deg)
+    def build_depth_preview_node(
+        self,
+        tree: bpy.types.NodeTree,
+        render_layers: bpy.types.CompositorNodeRLayers,
+        camera: bpy.types.Camera,
+        depth_mode: str,
+    ) -> bpy.types.Node:
+        """Build the compositor node that converts raw depth to a previewable map."""
+        if depth_mode == "normalized":
+            normalize = tree.nodes.new(type="CompositorNodeNormalize")
+            tree.links.new(render_layers.outputs["Depth"], normalize.inputs[0])
+            return normalize
+        if depth_mode != "metric":
+            raise ValueError(f"Unsupported depth mode: {depth_mode}")
+        depth_map = tree.nodes.new(type="CompositorNodeMapRange")
+        depth_map.inputs["From Min"].default_value = camera.clip_start
+        depth_map.inputs["From Max"].default_value = camera.clip_end
+        depth_map.inputs["To Min"].default_value = 0.0
+        depth_map.inputs["To Max"].default_value = 1.0
+        depth_map.use_clamp = True
+        tree.links.new(render_layers.outputs["Depth"], depth_map.inputs[0])
+        return depth_map
+    def build_depth_vis_output(
+        self,
+        tree: bpy.types.NodeTree,
+        depth_preview_node: bpy.types.Node,
+        output_path: Path,
+    ) -> Path:
+        temp_output_path = self.get_depth_gray_temp_path(output_path)
+        output_node = tree.nodes.new(type="CompositorNodeOutputFile")
+        output_node.base_path = str(temp_output_path.parent)
+        output_node.file_slots[0].path = self.get_temp_output_slot_prefix(
+            temp_output_path
+        )
+        output_node.format.file_format = "PNG"
+        output_node.format.color_mode = "BW"
+        output_node.format.color_depth = "8"
+        tree.links.new(depth_preview_node.outputs[0], output_node.inputs[0])
+        return temp_output_path
+    def configure_auxiliary_outputs(
+        self,
+        output_path: Path,
+        render_passes: tuple[str, ...] | list[str],
+        depth_mode: str,
+    ) -> list[tuple[Path, Path]]:
+        """Configure compositor outputs needed during the base render."""
+        view_layer = self.scene.view_layers["ViewLayer"]
+        if "depth" in render_passes:
+            view_layer.use_pass_z = True
+        if "depth" not in render_passes:
+            return []
+        tree = self.clear_compositor_tree()
+        render_layers = tree.nodes.new(type="CompositorNodeRLayers")
+        temp_outputs: list[tuple[Path, Path]] = []
+        depth_preview_node = self.build_depth_preview_node(
+            tree,
+            render_layers,
+            self.scene.camera.data,
+            depth_mode,
+        )
+        temp_path = self.build_depth_vis_output(
+            tree=tree,
+            depth_preview_node=depth_preview_node,
+            output_path=output_path,
+        )
+        temp_outputs.append(
+            (temp_path, self.get_depth_vis_output_path(output_path))
+        )
+        return temp_outputs
+    def finalize_depth_output(
+        self, temp_path: Path, output_path: Path
+    ) -> None:
+        """Convert the grayscale depth temp image into the final colored preview."""
+        if output_path.exists():
+            output_path.unlink()
+        if not temp_path.exists():
+            raise FileNotFoundError(f"Depth file not generated: {temp_path}")
+        try:
+            depth = cv2.imread(str(temp_path), cv2.IMREAD_GRAYSCALE)
+            if depth is None:
+                raise FileNotFoundError(
+                    f"Failed to read depth image: {temp_path}"
+                )
+            depth_uint8 = np.ascontiguousarray(depth)
+            depth_colormap = cv2.applyColorMap(depth_uint8, cv2.COLORMAP_JET)
+            if not cv2.imwrite(str(output_path), depth_colormap):
+                raise RuntimeError(
+                    f"Failed to write depth visualization: {output_path}"
+                )
+        finally:
+            if temp_path.exists():
+                temp_path.unlink()
+    def create_clean_material(self, material_name: str) -> bpy.types.Material:
+        """Create a material with a cleared node tree."""
+        existing = bpy.data.materials.get(material_name)
+        if existing is not None:
+            bpy.data.materials.remove(existing, do_unlink=True)
+        material = bpy.data.materials.new(name=material_name)
+        material.use_nodes = True
+        material.shadow_method = "NONE"
+        tree = material.node_tree
+        tree.nodes.clear()
+        return material
+    def create_view_normal_material(self) -> bpy.types.Material:
+        material = self.create_clean_material("EmbodiedGenViewNormal")
+        tree = material.node_tree
+        geometry = tree.nodes.new(type="ShaderNodeNewGeometry")
+        invert = tree.nodes.new(type="ShaderNodeVectorMath")
+        invert.operation = "MULTIPLY"
+        invert.inputs[1].default_value = (-1.0, -1.0, -1.0)
+        face_mix = tree.nodes.new(type="ShaderNodeMix")
+        face_mix.data_type = "VECTOR"
+        face_mix.clamp_factor = True
+        face_mix.factor_mode = "UNIFORM"
+        view_transform = tree.nodes.new(type="ShaderNodeVectorTransform")
+        view_transform.vector_type = "NORMAL"
+        view_transform.convert_from = "WORLD"
+        view_transform.convert_to = "CAMERA"
+        flip_x = tree.nodes.new(type="ShaderNodeVectorMath")
+        flip_x.operation = "MULTIPLY"
+        flip_x.inputs[1].default_value = (-1.0, 1.0, -1.0)
+        scale_bias = tree.nodes.new(type="ShaderNodeVectorMath")
+        scale_bias.operation = "MULTIPLY_ADD"
+        scale_bias.inputs[1].default_value = (0.5, 0.5, 0.5)
+        scale_bias.inputs[2].default_value = (0.5, 0.5, 0.5)
+        emission = tree.nodes.new(type="ShaderNodeEmission")
+        output = tree.nodes.new(type="ShaderNodeOutputMaterial")
+        tree.links.new(geometry.outputs["True Normal"], invert.inputs[0])
+        tree.links.new(
+            geometry.outputs["Backfacing"], face_mix.inputs["Factor"]
+        )
+        tree.links.new(geometry.outputs["True Normal"], face_mix.inputs["A"])
+        tree.links.new(invert.outputs["Vector"], face_mix.inputs["B"])
+        tree.links.new(
+            face_mix.outputs["Result"], view_transform.inputs["Vector"]
+        )
+        tree.links.new(view_transform.outputs["Vector"], flip_x.inputs[0])
+        tree.links.new(flip_x.outputs["Vector"], scale_bias.inputs[0])
+        tree.links.new(scale_bias.outputs["Vector"], emission.inputs["Color"])
+        tree.links.new(emission.outputs["Emission"], output.inputs["Surface"])
+        return material
+    def create_mesh_preview_material(self) -> bpy.types.Material:
+        material = self.create_clean_material("EmbodiedGenMeshPreview")
+        tree = material.node_tree
+        layer_weight = tree.nodes.new(type="ShaderNodeLayerWeight")
+        layer_weight.inputs["Blend"].default_value = 0.35
+        base_ramp = tree.nodes.new(type="ShaderNodeValToRGB")
+        base_ramp.color_ramp.elements[0].position = 0.1
+        base_ramp.color_ramp.elements[0].color = (0.78, 0.81, 0.87, 1.0)
+        base_ramp.color_ramp.elements[1].position = 0.9
+        base_ramp.color_ramp.elements[1].color = (0.42, 0.48, 0.58, 1.0)
+        emission = tree.nodes.new(type="ShaderNodeEmission")
+        emission.inputs["Strength"].default_value = 0.82
+        output = tree.nodes.new(type="ShaderNodeOutputMaterial")
+        tree.links.new(layer_weight.outputs["Facing"], base_ramp.inputs["Fac"])
+        tree.links.new(base_ramp.outputs["Color"], emission.inputs["Color"])
+        tree.links.new(emission.outputs["Emission"], output.inputs["Surface"])
+        return material
+    def assign_instance_ids(self) -> dict[str, int]:
+        """Assign stable per-object pass indices for instance segmentation."""
+        mesh_objects = sorted(
+            self.get_mesh_objects(), key=lambda obj: obj.name
+        )
+        if not mesh_objects:
+            raise ValueError(
+                "No mesh objects found for instance segmentation."
+            )
+        instance_id_map: dict[str, int] = {}
+        for instance_id, obj in enumerate(mesh_objects, start=1):
+            obj.pass_index = instance_id
+            instance_id_map[obj.name] = instance_id
+        return instance_id_map
+    def snapshot_object_pass_indices(
+        self,
+    ) -> list[tuple[bpy.types.Object, int]]:
+        """Capture original object pass indices before a temporary override."""
+        return [(obj, obj.pass_index) for obj in self.get_mesh_objects()]
+    def restore_object_pass_indices(
+        self, original_pass_indices: list[tuple[bpy.types.Object, int]]
+    ) -> None:
+        """Restore object pass indices captured earlier."""
+        for obj, pass_index in original_pass_indices:
+            obj.pass_index = pass_index
+    def add_instance_seg_output_node(
+        self,
+        output_path: Path,
+    ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
+        return self.add_exr_output_node(
+            output_path=output_path,
+            temp_output_path=self.get_instance_seg_temp_path(output_path),
+            render_output_name="IndexOB",
+        )
+    def add_flow_depth_output_node(
+        self,
+        output_path: Path,
+    ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
+        return self.add_exr_output_node(
+            output_path=output_path,
+            temp_output_path=self.get_flow_depth_temp_path(output_path),
+            render_output_name="Depth",
+        )
+    def add_exr_output_node(
+        self,
+        output_path: Path,
+        temp_output_path: Path,
+        render_output_name: str,
+    ) -> tuple[bpy.types.NodeTree, list[bpy.types.Node]]:
+        """Attach a file-output EXR node for a specific render-layer socket."""
+        tree = self.scene.node_tree
+        render_layers = tree.nodes.new(type="CompositorNodeRLayers")
+        output_node = tree.nodes.new(type="CompositorNodeOutputFile")
+        output_node.base_path = str(temp_output_path.parent)
+        output_node.file_slots[0].path = self.get_temp_output_slot_prefix(
+            temp_output_path
+        )
+        output_node.format.file_format = "OPEN_EXR"
+        output_node.format.color_mode = "RGB"
+        output_node.format.color_depth = "32"
+        output_node.format.exr_codec = "NONE"
+        tree.links.new(
+            render_layers.outputs[render_output_name], output_node.inputs[0]
+        )
+        return tree, [render_layers, output_node]
+    def load_temp_exr_first_channel(
+        self,
+        temp_path: Path,
+        error_message: str,
+    ) -> np.ndarray:
+        """Load the first channel from a temporary EXR and flip to image space."""
+        if not temp_path.exists():
+            raise FileNotFoundError(error_message.format(path=temp_path))
+        temp_image = bpy.data.images.load(str(temp_path), check_existing=False)
+        try:
+            width, height = temp_image.size
+            channels = temp_image.channels
+            pixels = np.array(temp_image.pixels[:], dtype=np.float32)
+            if pixels.size != width * height * channels:
+                raise RuntimeError(
+                    f"Unexpected EXR image layout for {temp_path}."
+                )
+            image = pixels.reshape(height, width, channels)[..., 0]
+            return np.flipud(image)
+        finally:
+            bpy.data.images.remove(temp_image)
+    def load_instance_seg_temp_output(self, temp_path: Path) -> np.ndarray:
+        instance_seg = self.load_temp_exr_first_channel(
+            temp_path,
+            "Instance segmentation file not generated: {path}",
+        )
+        return np.ascontiguousarray(np.rint(instance_seg).astype(np.uint16))
+    def load_flow_depth_temp_output(self, temp_path: Path) -> np.ndarray:
+        depth = self.load_temp_exr_first_channel(
+            temp_path,
+            "Flow depth file not generated: {path}",
+        )
+        depth = np.ascontiguousarray(depth.astype(np.float32))
+        depth[~np.isfinite(depth)] = 0.0
+        return depth
+    def build_instance_seg_visualization(
+        self, instance_seg: np.ndarray, max_instance_id: int
+    ) -> np.ndarray:
+        """Map instance ids to deterministic RGB colors for visualization."""
+        color_lut = np.zeros((max_instance_id + 1, 3), dtype=np.uint8)
+        for instance_id in range(1, max_instance_id + 1):
+            color_lut[instance_id] = (
+                (instance_id * 37) % 256,
+                (instance_id * 67) % 256,
+                (instance_id * 97) % 256,
+            )
+        return color_lut[instance_seg]
+    def save_instance_seg_outputs(
+        self,
+        output_path: Path,
+        instance_seg: np.ndarray,
+    ) -> None:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        vis_output_path = self.get_instance_seg_vis_output_path(output_path)
+        visualization = self.build_instance_seg_visualization(
+            instance_seg=instance_seg,
+            max_instance_id=int(instance_seg.max(initial=0)),
+        )
+        if not cv2.imwrite(str(vis_output_path), visualization):
+            raise RuntimeError(
+                f"Failed to write instance segmentation preview: "
+                f"{vis_output_path}"
+            )
+    def build_flow_visualization(self, flow: np.ndarray) -> np.ndarray:
+        flow_float = flow.astype(np.float32)
+        magnitude, angle = cv2.cartToPolar(
+            flow_float[..., 0],
+            flow_float[..., 1],
+            angleInDegrees=True,
+        )
+        max_magnitude = float(np.percentile(magnitude, 99.0))
+        if max_magnitude <= 1e-6:
+            max_magnitude = 1.0
+        magnitude_norm = np.clip(magnitude / max_magnitude, 0.0, 1.0)
+        hsv = np.zeros((*flow.shape[:2], 3), dtype=np.float32)
+        hsv[..., 0] = np.mod(angle, 360.0)
+        hsv[..., 1] = magnitude_norm
+        hsv[..., 2] = 1.0
+        bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+        return np.clip(bgr * 255.0, 0.0, 255.0).astype(np.uint8)
+    def get_camera_intrinsics(
+        self, camera: bpy.types.Object, width: int, height: int
+    ) -> tuple[float, float, float, float]:
+        camera_data = camera.data
+        fx = width / (2.0 * math.tan(camera_data.angle_x * 0.5))
+        fy = height / (2.0 * math.tan(camera_data.angle_y * 0.5))
+        cx = (width - 1.0) * 0.5
+        cy = (height - 1.0) * 0.5
+        return fx, fy, cx, cy
+    def build_camera_matrix_world(
+        self,
+        xyz: tuple[float, float, float] | list[float],
+        rotation_deg: tuple[float, float, float] | list[float],
+    ) -> Matrix:
+        rotation = Euler(self.get_rotation_radians(rotation_deg), "XYZ")
+        translation = Matrix.Translation(Vector(tuple(xyz)))
+        return translation @ rotation.to_matrix().to_4x4()
+    def compute_flow_from_depth(
+        self,
+        depth: np.ndarray,
+        camera: bpy.types.Object,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Project depth into a target camera and derive dense 2D flow."""
+        height, width = depth.shape
+        fx, fy, cx, cy = self.get_camera_intrinsics(camera, width, height)
+        valid = np.isfinite(depth) & (depth > 0.0)
+        valid_mask = np.zeros((height, width), dtype=bool)
+        if not np.any(valid):
+            return np.zeros((height, width, 2), dtype=np.float32), valid_mask
+        u_coords, v_coords = np.meshgrid(
+            np.arange(width, dtype=np.float32),
+            np.arange(height, dtype=np.float32),
+        )
+        depth_valid = depth[valid]
+        x_cam = ((u_coords[valid] - cx) / fx) * depth_valid
+        y_cam = (-(v_coords[valid] - cy) / fy) * depth_valid
+        z_cam = -depth_valid
+        camera_points = np.stack(
+            [x_cam, y_cam, z_cam, np.ones_like(z_cam)], axis=1
+        )
+        source_matrix_world = np.array(camera.matrix_world, dtype=np.float64)
+        target_matrix_world = np.array(
+            self.build_camera_matrix_world(
+                self.flow_camera_xyz,
+                self.flow_camera_rotation_deg,
+            ),
+            dtype=np.float64,
+        )
+        target_world_to_camera = np.linalg.inv(target_matrix_world)
+        world_points = camera_points @ source_matrix_world.T
+        target_camera_points = world_points @ target_world_to_camera.T
+        target_z = target_camera_points[:, 2]
+        positive_depth = target_z < -1e-6
+        flow = np.zeros((height, width, 2), dtype=np.float32)
+        if not np.any(positive_depth):
+            return flow, valid_mask
+        projected_x = (
+            fx
+            * (
+                target_camera_points[positive_depth, 0]
+                / -target_z[positive_depth]
+            )
+            + cx
+        )
+        projected_y = (
+            -fy
+            * (
+                target_camera_points[positive_depth, 1]
+                / -target_z[positive_depth]
+            )
+            + cy
+        )
+        in_frame = (
+            (projected_x >= 0.0)
+            & (projected_x < width)
+            & (projected_y >= 0.0)
+            & (projected_y < height)
+        )
+        if not np.any(in_frame):
+            return flow, valid_mask
+        source_x = u_coords[valid][positive_depth]
+        source_y = v_coords[valid][positive_depth]
+        flow_valid = np.stack(
+            [
+                projected_x[in_frame] - source_x[in_frame],
+                projected_y[in_frame] - source_y[in_frame],
+            ],
+            axis=1,
+        ).astype(np.float32)
+        flow_buffer = flow[valid]
+        positive_depth_buffer = flow_buffer[positive_depth]
+        positive_depth_buffer[in_frame] = flow_valid
+        flow_buffer[positive_depth] = positive_depth_buffer
+        flow[valid] = flow_buffer
+        valid_mask_buffer = valid_mask[valid]
+        positive_depth_mask = valid_mask_buffer[positive_depth]
+        positive_depth_mask[in_frame] = True
+        valid_mask_buffer[positive_depth] = positive_depth_mask
+        valid_mask[valid] = valid_mask_buffer
+        return flow, valid_mask
+    def save_numpy_array(self, output_path: Path, array: np.ndarray) -> None:
+        """Persist a NumPy array atomically to avoid partial writes."""
+        temp_output_path = output_path.with_suffix(".tmp.npy")
+        if temp_output_path.exists():
+            temp_output_path.unlink()
+        np.save(temp_output_path, array)
+        temp_output_path.replace(output_path)
+    def save_flow_outputs(
+        self,
+        output_path: Path,
+        flow: np.ndarray,
+        valid_mask: np.ndarray,
+    ) -> None:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        flow_output_path = self.get_flow_output_path(output_path)
+        flow_valid_output_path = self.get_flow_valid_output_path(output_path)
+        flow_vis_output_path = self.get_flow_vis_output_path(output_path)
+        self.save_numpy_array(flow_output_path, flow)
+        self.save_numpy_array(flow_valid_output_path, valid_mask)
+        flow_vis = self.build_flow_visualization(flow)
+        if not cv2.imwrite(str(flow_vis_output_path), flow_vis):
+            raise RuntimeError(
+                f"Failed to write flow preview: {flow_vis_output_path}"
+            )
+    def get_preview_output_path(
+        self,
+        output_path: Path,
+        render_pass_name: str,
+        occurrence_index: int = 1,
+    ) -> Path | None:
+        preview_output_paths = {
+            "rgb": output_path,
+            "depth": self.get_depth_vis_output_path(output_path),
+            "normal": self.get_normal_output_path(output_path),
+            "mesh": self.get_mesh_output_path(output_path),
+            "instance_seg": self.get_instance_seg_vis_output_path(output_path),
+            "flow": self.get_flow_vis_output_path(output_path),
+        }
+        preview_output_path = preview_output_paths.get(render_pass_name)
+        if preview_output_path is None:
+            return None
+        return self.build_occurrence_output_path(
+            preview_output_path, occurrence_index
+        )
+    def load_preview_image(self, image_path: Path) -> np.ndarray:
+        image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
+        if image is None:
+            raise FileNotFoundError(
+                f"Failed to read preview image: {image_path}"
+            )
+        return image
+    def collect_composite_images(
+        self, output_path: Path
+    ) -> list[tuple[str, np.ndarray]]:
+        composite_images: list[tuple[str, np.ndarray]] = []
+        for (
+            render_pass_name,
+            occurrence_index,
+        ) in self.iter_render_pass_occurrences():
+            preview_output_path = self.get_preview_output_path(
+                output_path,
+                render_pass_name,
+                occurrence_index,
+            )
+            if preview_output_path is None or not preview_output_path.exists():
+                continue
+            composite_images.append(
+                (
+                    render_pass_name,
+                    self.load_preview_image(preview_output_path),
+                )
+            )
+        return composite_images
+    def replicate_duplicate_preview_outputs(self, output_path: Path) -> None:
+        """Materialize repeated preview outputs without re-rendering."""
+        for (
+            render_pass_name,
+            occurrence_index,
+        ) in self.iter_render_pass_occurrences():
+            if occurrence_index == 1:
+                continue
+            source_output_path = self.get_preview_output_path(
+                output_path, render_pass_name
+            )
+            duplicate_output_path = self.get_preview_output_path(
+                output_path,
+                render_pass_name,
+                occurrence_index,
+            )
+            if source_output_path is None or duplicate_output_path is None:
+                continue
+            if not source_output_path.exists():
+                raise FileNotFoundError(
+                    f"Preview output not generated for repeated pass "
+                    f"{render_pass_name}: {source_output_path}"
+                )
+            if duplicate_output_path.exists():
+                duplicate_output_path.unlink()
+            shutil.copyfile(source_output_path, duplicate_output_path)
+    def get_composite_separator_boundaries(
+        self,
+        render_pass_names: list[str] | tuple[str, ...],
+        boundaries: np.ndarray,
+    ) -> list[float]:
+        """Return separator boundaries for adjacent passes that differ."""
+        if len(boundaries) != len(render_pass_names) + 1:
+            raise ValueError(
+                "boundaries length must match the number of render passes + 1."
+            )
+        separator_boundaries: list[float] = []
+        for index, boundary in enumerate(boundaries[1:-1], start=1):
+            if render_pass_names[index - 1] == render_pass_names[index]:
+                continue
+            separator_boundaries.append(float(boundary))
+        return separator_boundaries
+    def build_composite_image(
+        self,
+        images: list[np.ndarray],
+        render_pass_names: list[str] | tuple[str, ...],
+        separator_width_px: int = 6,
+    ) -> np.ndarray:
+        if not images:
+            raise ValueError("At least one image is required for composition.")
+        if len(images) != len(render_pass_names):
+            raise ValueError(
+                "images and render_pass_names must have the same length."
+            )
+        base_height, base_width = images[0].shape[:2]
+        resized_images = [
+            (
+                image
+                if image.shape[:2] == (base_height, base_width)
+                else cv2.resize(
+                    image,
+                    (base_width, base_height),
+                    interpolation=cv2.INTER_LINEAR,
+                )
+            )
+            for image in images
+        ]
+        x_coords = np.broadcast_to(
+            np.arange(base_width, dtype=np.float32),
+            (base_height, base_width),
+        )
+        y_coords = np.broadcast_to(
+            np.arange(base_height, dtype=np.float32)[:, None],
+            (base_height, base_width),
+        )
+        slash_slope = 0.28 * (base_width / base_height)
+        diagonal_coord = x_coords + y_coords * slash_slope
+        diagonal_min = float(diagonal_coord.min())
+        diagonal_max = float(diagonal_coord.max())
+        boundaries = np.linspace(
+            diagonal_min, diagonal_max, len(resized_images) + 1
+        )
+        composite = np.zeros_like(resized_images[0])
+        region_indices = np.digitize(
+            diagonal_coord, boundaries[1:-1], right=False
+        )
+        for image_index, image in enumerate(resized_images):
+            composite[region_indices == image_index] = image[
+                region_indices == image_index
+            ]
+        slash_mask = np.zeros((base_height, base_width), dtype=bool)
+        separator_boundaries = self.get_composite_separator_boundaries(
+            render_pass_names, boundaries
+        )
+        for boundary in separator_boundaries:
+            slash_mask |= (
+                np.abs(diagonal_coord - boundary) <= separator_width_px
+            )
+        composite[slash_mask] = 255
+        return composite
+    def save_composite_preview(self, output_path: Path) -> None:
+        composite_images = self.collect_composite_images(output_path)
+        if len(composite_images) < 2:
+            return
+        composite_output_path = self.get_composite_output_path(
+            tuple(render_pass_name for render_pass_name, _ in composite_images)
+        )
+        composite_image = self.build_composite_image(
+            [image for _, image in composite_images],
+            [render_pass_name for render_pass_name, _ in composite_images],
+        )
+        if not cv2.imwrite(str(composite_output_path), composite_image):
+            raise RuntimeError(
+                f"Failed to write composite preview: {composite_output_path}"
+            )
+    def render_flow_pass(self, output_path: Path) -> None:
+        self.validate_flow_args()
+        camera = self.scene.camera
+        if camera is None:
+            raise ValueError("Scene camera is required for flow rendering.")
+        temp_output_path = self.get_flow_depth_temp_path(output_path)
+        def finalize_flow_output(depth: np.ndarray) -> None:
+            flow, valid_mask = self.compute_flow_from_depth(
+                depth=depth, camera=camera
+            )
+            self.save_flow_outputs(
+                output_path=output_path,
+                flow=flow,
+                valid_mask=valid_mask,
+            )
+        self.render_temp_output_pass(
+            output_path=output_path,
+            temp_output_path=temp_output_path,
+            add_output_node=self.add_flow_depth_output_node,
+            load_temp_output=self.load_flow_depth_temp_output,
+            finalize_output=finalize_flow_output,
+            color_mode="RGB",
+            color_depth="8",
+            enable_depth_pass=True,
+        )
+    def render_normal_pass(self, output_path: Path) -> None:
+        normal_output_path = self.get_normal_output_path(output_path)
+        self.render_material_override_pass(
+            preview_output_path=normal_output_path,
+            material_factory=self.create_view_normal_material,
+            color_mode="RGB",
+        )
+    def render_mesh_pass(self, output_path: Path) -> None:
+        mesh_output_path = self.get_mesh_output_path(output_path)
+        self.render_material_override_pass(
+            preview_output_path=mesh_output_path,
+            material_factory=self.create_mesh_preview_material,
+            color_mode="RGBA",
+        )
+    def render_instance_seg_pass(self, output_path: Path) -> None:
+        original_pass_indices = self.snapshot_object_pass_indices()
+        self.assign_instance_ids()
+        temp_output_path = self.get_instance_seg_temp_path(output_path)
+        def finalize_instance_seg_output(instance_seg: np.ndarray) -> None:
+            self.save_instance_seg_outputs(
+                output_path=output_path,
+                instance_seg=instance_seg,
+            )
+        try:
+            self.render_temp_output_pass(
+                output_path=output_path,
+                temp_output_path=temp_output_path,
+                add_output_node=self.add_instance_seg_output_node,
+                load_temp_output=self.load_instance_seg_temp_output,
+                finalize_output=finalize_instance_seg_output,
+                color_mode="BW",
+                color_depth="16",
+                enable_object_index_pass=True,
+            )
+        finally:
+            self.restore_object_pass_indices(original_pass_indices)
+    def render(self, output_path: Path) -> None:
+        """Run the requested render passes and write final outputs."""
+        self.scene.use_nodes = False
+        auxiliary_outputs: list[tuple[Path, Path]] = []
+        needs_base_render = bool({"rgb", "depth"} & set(self.render_passes))
+        if "depth" in self.render_passes:
+            auxiliary_outputs = self.configure_auxiliary_outputs(
+                output_path, self.render_passes, self.depth_mode
+            )
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        if "rgb" in self.render_passes:
+            self.scene.render.filepath = str(output_path)
+        if needs_base_render:
+            bpy.ops.render.render(write_still="rgb" in self.render_passes)
+        for temp_path, final_path in auxiliary_outputs:
+            if final_path == self.get_depth_vis_output_path(output_path):
+                self.finalize_depth_output(temp_path, final_path)
+                continue
+            raise ValueError(f"Unsupported render output target: {final_path}")
+        if auxiliary_outputs:
+            self.clear_compositor_tree()
+            self.scene.use_nodes = False
+        if "normal" in self.render_passes:
+            self.render_normal_pass(output_path)
+        if "mesh" in self.render_passes:
+            self.render_mesh_pass(output_path)
+        if "instance_seg" in self.render_passes:
+            self.render_instance_seg_pass(output_path)
+        if "flow" in self.render_passes:
+            self.render_flow_pass(output_path)
+        self.replicate_duplicate_preview_outputs(output_path)
+        self.save_composite_preview(output_path)
+    def run(self) -> None:
+        """Prepare the scene, configure rendering, and execute all passes."""
+        rgb_output_path = self.get_rgb_output_path()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.clear_scene()
+        self.import_usd()
+        self.validate_glb_args()
+        imported_glb_objects = self.import_glb_asset()
+        self.place_glb_asset(imported_glb_objects)
+        min_corner, max_corner = self.get_scene_bbox()
+        center = (min_corner + max_corner) * 0.5
+        diagonal = (max_corner - min_corner).length
+        self.create_camera()
+        self.ensure_lighting(diagonal, center, max_corner.z)
+        world_created = self.ensure_world()
+        self.add_fill_light(
+            diagonal,
+            center,
+            max_corner.z,
+            energy=self.fill_light_energy,
+        )
+        if world_created:
+            self.add_light_rig(
+                diagonal,
+                center,
+                max_corner.z,
+                area_energy=1500.0,
+                sun_energy=0.35,
+                prefix="Fill",
+            )
+        self.configure_color_management()
+        self.configure_cycles()
+        with tempfile.TemporaryDirectory(
+            prefix="render_usd_", dir=None
+        ) as temp_dir:
+            self.temp_dir = Path(temp_dir)
+            self.render(rgb_output_path)
+            self.temp_dir = None
+        logger.info("Rendered outputs to %s", self.output_dir)
+def main() -> None:
+    logging.basicConfig(level=logging.INFO)
+    args = _parse_args()
+    RenderUsd.from_args(args).run()
+if __name__ == "__main__":
+    main()

embodied_gen/skills/README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# EmbodiedGen Skills
+This directory is the canonical home for EmbodiedGen reusable skills.
+The root of `embodied_gen/skills` only contains generic skill source.
+Runtime-specific packaging should live in adapter subdirectories such as
+`embodied_gen/skills/claude_adapter/`.
+## Included generic skills
+- `asset-creator`
+- `asset-retrieval`
+- `background-creator`
+- `layout-creator`
+- `sim-runner`
+- `asset-converter`
+- `asset-scale`
+- `room-creator`
+- `spatial-computing`
+## Claude plugin package
+Claude-compatible slash commands and plugin manifest are under:
+```text
+embodied_gen/skills/claude_adapter/
+```
+The local marketplace manifest is:
+```text
+embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json
+```
+The actual Claude plugin package is:
+```text
+embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/
+```
+Current commands include:
+- `/embodiedgen-skills:assets`
+- `/embodiedgen-skills:background`
+- `/embodiedgen-skills:layout`
+- `/embodiedgen-skills:sim`
+- `/embodiedgen-skills:convert`
+- `/embodiedgen-skills:process`
+- `/embodiedgen-skills:room`
+- `/embodiedgen-skills:spatial`
+## Local install for Claude
+```bash
+bash install/install_agent_plugin.sh
+```
+## Notes
+- Generic skills stay in their original directories under `embodied_gen/skills/`.
+- Claude-specific files live only under `embodied_gen/skills/claude_adapter/`.
+- This keeps the skill source portable for Codex, Copilot, and other runtimes.

embodied_gen/skills/asset-converter/SKILL.md ADDED Viewed

	@@ -0,0 +1,88 @@

+---
+name: asset-converter
+description: Convert EmbodiedGen URDF assets to simulator-specific formats (USD/MJCF/URDF) using embodied_gen.data.asset_converter APIs. Use this skill whenever users ask to export assets for IsaacSim, MuJoCo, Genesis, IsaacGym, PyBullet, or SAPIEN, batch-convert URDF assets, or choose the correct converter/source_type per simulator.
+---
+# Asset Converter
+Unified entry for simulator-targeted asset conversion using `embodied_gen.data.asset_converter`.
+## When To Use
+Use this skill when users want to:
+- Convert EmbodiedGen assets for IsaacSim (`USD`) or MuJoCo/Genesis (`MJCF`).
+- Batch-convert multiple URDF assets into simulator-ready outputs.
+- Map simulator names to the correct target format and conversion strategy.
+- Decide when conversion is unnecessary (URDF can be used directly).
+## Routing Rules (Core)
+1. **IsaacSim** -> convert to `USD`.
+2. **MuJoCo / Genesis** -> convert to `MJCF` (`.xml`).
+3. **SAPIEN / IsaacGym / PyBullet** -> use EmbodiedGen `.urdf` directly (no conversion required).
+## Pre-checks
+1. Run from repository root with `embodiedgen` environment active.
+2. Confirm input URDF path(s) exist.
+3. For USD conversion, ensure IsaacLab/IsaacSim conversion dependencies are available.
+4. Prefer list inputs for `urdf_files` and `target_dirs` (same length, aligned by index).
+## Standard Python API Template
+```python
+from embodied_gen.data.asset_converter import cvt_embodiedgen_asset_to_anysim
+from embodied_gen.utils.enum import AssetType, SimAssetMapper
+simulator_name = "mujoco"  # isaacsim / mujoco / genesis / sapien / isaacgym / pybullet
+asset_paths = cvt_embodiedgen_asset_to_anysim(
+    urdf_files=[
+        "outputs/demo_assets/remote_control/result/remote_control.urdf",
+    ],
+    target_dirs=[
+        "outputs/demo_assets/remote_control/mjcf",
+    ],
+    target_type=SimAssetMapper[simulator_name],
+    source_type=AssetType.URDF,
+    overwrite=True,
+)
+print(asset_paths)
+```
+## Source Type Guidance
+- For `MJCF` target: prefer `source_type=AssetType.URDF`.
+- For `USD` target: use `source_type=AssetType.MESH` by default; `AssetType.URDF` path is also supported when needed.
+- For direct-URDF simulators (`sapien`, `isaacgym`, `pybullet`): skip conversion.
+## Direct Converter Template (Advanced)
+```python
+from embodied_gen.data.asset_converter import AssetConverterFactory
+from embodied_gen.utils.enum import AssetType
+converter = AssetConverterFactory.create(
+    target_type=AssetType.USD,
+    source_type=AssetType.MESH,
+)
+with converter:
+    converter.convert(
+        "outputs/demo_assets/remote_control/result/remote_control.urdf",
+        "outputs/demo_assets/remote_control/usd/remote_control.usd",
+    )
+```
+## Output Conventions
+- `MJCF`: `<target_dir>/<asset_name>.xml`
+- `USD`: `<target_dir>/<asset_name>.usd`
+- API return: `{<input_urdf_path>: <converted_output_path>}`
+## Failure Handling and Retry
+1. Unsupported conversion pair: verify `target_type` + `source_type` mapping.
+2. Missing dependencies (USD path): install/activate IsaacLab + required USD stack.
+3. Missing output file: verify parent output directory permissions and path correctness.
+4. Batch mismatch: ensure `len(urdf_files) == len(target_dirs)`.

embodied_gen/skills/asset-creator/SKILL.md ADDED Viewed

	@@ -0,0 +1,96 @@

+---
+name: asset-creator
+description: Create 3D assets with EmbodiedGen using img3d-cli, text3d-cli, and texture-cli. Use this skill whenever users ask to generate assets from images/text, texture existing meshes, run retry/seed controlled generation, or choose the proper asset-generation CLI from mixed requirements.
+---
+# Assets Creator
+Unified entry for three EmbodiedGen asset-generation CLIs: `img3d-cli`, `text3d-cli`, and `texture-cli`.
+## When To Use
+Use this skill when users want to:
+- Generate a 3D asset from one or more input images.
+- Generate 3D assets in batch from text prompts.
+- Generate or edit textures for existing meshes.
+- Get help choosing the correct CLI from mixed asset-generation requirements.
+## Routing Rules (Core)
+1. `img3d-cli`: input is image paths (`--image_path` or `--image_root`).
+2. `text3d-cli`: input is text prompts (`--prompts`) and target is direct asset output.
+3. `texture-cli`: input is existing mesh path(s) (`--mesh_path`) plus texture prompt(s) (`--prompt`).
+## Pre-checks
+1. Run commands from the repository root.
+2. Confirm the active environment is `embodiedgen`.
+3. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
+## Standard Command Templates
+### 1) Image to 3D: `img3d-cli`
+```bash
+img3d-cli --image_path .../sample.jpg --n_retry 1 --output_root outputs/imageto3d
+```
+Common parameters:
+- `--image_path` / `--image_root`
+- `--output_root`
+- `--n_retry`
+- `--seed`
+- `--skip_exists`
+---
+### 2) Text to 3D: `text3d-cli`
+```bash
+text3d-cli \
+  --prompts "small bronze figurine of a lion" "A globe with wooden base" \
+  --n_image_retry 1 --n_asset_retry 1 --n_pipe_retry 1 \
+  --seed_img 0 \
+  --output_root outputs/textto3d
+```
+Common parameters:
+- `--prompts`
+- `--output_root`
+- `--asset_names`
+- `--n_image_retry --n_asset_retry --n_pipe_retry`
+- `--seed_img --seed_3d`
+---
+### 3) Mesh Texture Generation: `texture-cli`
+```bash
+texture-cli \
+  --mesh_path ".../horse.obj" \
+  --prompt "A gray horse head with flying mane and brown eyes" \
+  --output_root "outputs/texture_gen" \
+  --seed 0
+```
+Common parameters:
+- `--mesh_path` (supports multiple inputs)
+- `--prompt` (must align 1:1 with mesh inputs)
+- `--output_root`
+- `--seed`
+- `--texture_size`
+- `--ip_adapt_scale --ip_img_path` (optional reference-image control)
+---
+## Output Conventions
+- `img3d-cli`: each sample is typically under `<output_root>/<sample>/result/`.
+- `text3d-cli`: `<output_root>/asset3d/<asset_name>/result/`.
+- `texture-cli`: `<output_root>/<mesh_stem>/texture_mesh/`.
+## Failure Handling and Retry
+1. OOM or GPU pressure: reduce batch size and concurrency.
+2. Unstable quality: increase `--n_retry` or `--n_*_retry`.
+3. Missing outputs: verify output-root permissions and path spelling; prefer absolute paths.

embodied_gen/skills/asset-retrieval/SKILL.md ADDED Viewed

	@@ -0,0 +1,87 @@

+---
+name: asset-retrieval
+description: Retrieve existing EmbodiedGen assets from a configurable dataset index CSV by natural-language descriptions and return matching `.urdf` paths. Use when users describe an asset they want, ask to find one or several existing URDF assets, or need a fast lookup over the local asset index instead of scanning folders manually.
+---
+# Asset Retrieval
+Look up simulation-ready assets from `dataset_index.csv` and return `.urdf`
+paths. The CSV index is the single source of truth.
+## Workflow
+**Preferred — agent reads CSV directly:**
+1. Read `dataset_index.csv` into context.
+2. Semantically match the user's description (open-ended, fuzzy, or in any
+   language) against `category`, `secondary_category`, `primary_category`,
+   and `description` columns.
+3. Return the best-matching absolute `.urdf` path; return multiple candidates
+   when the request is broad or explicitly asks for several.
+4. Briefly explain why the returned asset matches.
+This path handles open-ended queries like "a tall chair suitable for a
+coffee shop" or "能放在客厅角落的落地灯" that pure keyword matching cannot
+resolve.
+**Fallback — CLI script (no network / no LLM):**
+When the agent is unavailable, use the helper script which performs offline
+keyword-based ranking:
+```bash
+python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
+  "modern dining chair curved backrest"
+```
+For the CLI path, rewrite open-ended or Chinese descriptions into compact
+English keywords first (e.g. `能放在客厅角落的落地灯` → `floor lamp`).
+## Index Resolution
+Checked in order — first match wins:
+1. `--index-file` CLI argument
+2. `$EMBODIEDGEN_DATASET_INDEX` environment variable
+3. `$EMBODIEDGEN_DATASET_ROOT/dataset_index.csv`
+4. `<repo-root>/outputs/EmbodiedGenData/dataset/dataset_index.csv`
+Dataset root follows a parallel order (`--dataset-root` →
+`$EMBODIEDGEN_DATASET_ROOT` → repo default).
+### Required CSV Columns
+`uuid`, `primary_category`, `secondary_category`, `category`, `description`,
+`generate_time`, `urdf_path`
+## Query Guidelines
+- Use explicit object words: `chair`, `bar stool`, `remote control`.
+- Keep discriminating modifiers: `wooden`, `orange`, `modern`, `round`.
+- Open-ended or Chinese descriptions are fine for the agent path; rewrite
+  to English keywords only when using the CLI script.
+## Script Usage
+```bash
+# Single best match (absolute path on stdout)
+python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
+  "modern dining chair curved backrest"
+# Multiple candidates with scores
+python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
+  "orange cushioned bar stool" \
+  --top-k 5 --format json
+# Custom dataset location
+python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
+  "black remote control" \
+  --dataset-root /path/to/dataset \
+  --index-file /path/to/dataset/dataset_index.csv
+# Relative paths instead of absolute
+python embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py \
+  "wooden bar stool" --relative-paths
+```
+Exit code 1 with `"No matching assets found."` on stderr when nothing matches.

embodied_gen/skills/asset-retrieval/scripts/retrieve_asset.py ADDED Viewed

	@@ -0,0 +1,329 @@

+#!/usr/bin/env python3
+"""Retrieve EmbodiedGen asset URDF paths from a CSV index."""
+from __future__ import annotations
+import argparse
+import csv
+import json
+import os
+import re
+import unicodedata
+from dataclasses import dataclass
+from pathlib import Path
+STOP_WORDS = {
+    "a",
+    "an",
+    "and",
+    "asset",
+    "for",
+    "in",
+    "of",
+    "on",
+    "or",
+    "that",
+    "the",
+    "to",
+    "with",
+}
+TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
+@dataclass(frozen=True)
+class AssetRecord:
+    """Single asset entry from the CSV index."""
+    uuid: str
+    primary_category: str
+    secondary_category: str
+    category: str
+    description: str
+    generate_time: str
+    relative_urdf_path: str
+    absolute_urdf_path: str
+    search_text: str
+    primary_tokens: frozenset[str]
+    secondary_tokens: frozenset[str]
+    category_tokens: frozenset[str]
+    description_tokens: frozenset[str]
+@dataclass(frozen=True)
+class SearchResult:
+    """Ranked retrieval result."""
+    score: float
+    coverage: float
+    record: AssetRecord
+    def to_dict(self, use_relative_paths: bool) -> dict[str, object]:
+        """Convert the result to JSON-friendly output."""
+        urdf_path = (
+            self.record.relative_urdf_path
+            if use_relative_paths
+            else self.record.absolute_urdf_path
+        )
+        return {
+            "urdf_path": urdf_path,
+            "score": round(self.score, 3),
+            "coverage": round(self.coverage, 3),
+            "uuid": self.record.uuid,
+            "primary_category": self.record.primary_category,
+            "secondary_category": self.record.secondary_category,
+            "category": self.record.category,
+            "description": self.record.description,
+            "generate_time": self.record.generate_time,
+        }
+def _repo_root() -> Path:
+    return Path(__file__).resolve().parents[4]
+def _default_dataset_root() -> Path:
+    configured_root = os.getenv("EMBODIEDGEN_DATASET_ROOT")
+    if configured_root:
+        return Path(configured_root).expanduser().resolve()
+    return _repo_root() / "outputs" / "EmbodiedGenData" / "dataset"
+def _default_index_file(dataset_root: Path) -> Path:
+    configured_index = os.getenv("EMBODIEDGEN_DATASET_INDEX")
+    if configured_index:
+        return Path(configured_index).expanduser().resolve()
+    return dataset_root / "dataset_index.csv"
+def _normalize_text(text: str) -> str:
+    text = unicodedata.normalize("NFKC", text or "").lower()
+    text = text.replace("_", " ").replace("-", " ").replace("&", " and ")
+    text = re.sub(r"[^0-9a-z\s]", " ", text)
+    return re.sub(r"\s+", " ", text).strip()
+def _normalize_token(token: str) -> str:
+    if token.endswith("ies") and len(token) > 4:
+        return token[:-3] + "y"
+    if (
+        token.endswith("s")
+        and len(token) > 3
+        and not token.endswith(("ss", "us"))
+    ):
+        return token[:-1]
+    return token
+def _tokenize(text: str) -> list[str]:
+    tokens: list[str] = []
+    for raw_token in TOKEN_PATTERN.findall(_normalize_text(text)):
+        token = _normalize_token(raw_token)
+        if len(token) < 2 or token in STOP_WORDS:
+            continue
+        tokens.append(token)
+    return tokens
+def _dedupe_tokens(tokens: list[str]) -> list[str]:
+    deduped: list[str] = []
+    seen: set[str] = set()
+    for token in tokens:
+        if token in seen:
+            continue
+        deduped.append(token)
+        seen.add(token)
+    return deduped
+def load_records(index_file: Path, dataset_root: Path) -> list[AssetRecord]:
+    """Load asset records from dataset_index.csv."""
+    records: list[AssetRecord] = []
+    with index_file.open(newline="", encoding="utf-8") as csv_file:
+        reader = csv.DictReader(csv_file)
+        for row in reader:
+            relative_urdf_path = (row.get("urdf_path") or "").strip()
+            absolute_urdf_path = str(
+                (dataset_root / relative_urdf_path).resolve()
+            )
+            primary_category = row.get("primary_category", "")
+            secondary_category = row.get("secondary_category", "")
+            category = row.get("category", "")
+            description = row.get("description", "")
+            records.append(
+                AssetRecord(
+                    uuid=row.get("uuid", ""),
+                    primary_category=primary_category,
+                    secondary_category=secondary_category,
+                    category=category,
+                    description=description,
+                    generate_time=row.get("generate_time", ""),
+                    relative_urdf_path=relative_urdf_path,
+                    absolute_urdf_path=absolute_urdf_path,
+                    search_text=" ".join(
+                        part
+                        for part in (
+                            _normalize_text(primary_category),
+                            _normalize_text(secondary_category),
+                            _normalize_text(category),
+                            _normalize_text(description),
+                        )
+                        if part
+                    ),
+                    primary_tokens=frozenset(_tokenize(primary_category)),
+                    secondary_tokens=frozenset(_tokenize(secondary_category)),
+                    category_tokens=frozenset(_tokenize(category)),
+                    description_tokens=frozenset(_tokenize(description)),
+                )
+            )
+    return records
+def _score_record(
+    record: AssetRecord,
+    query_text: str,
+    query_tokens: list[str],
+) -> SearchResult | None:
+    matched_tokens = 0
+    score = 0.0
+    for token in query_tokens:
+        token_score = 0.0
+        if token in record.category_tokens:
+            token_score = max(token_score, 8.0)
+        if token in record.secondary_tokens:
+            token_score = max(token_score, 5.0)
+        if token in record.primary_tokens:
+            token_score = max(token_score, 3.0)
+        if token in record.description_tokens:
+            token_score = max(token_score, 2.0)
+        if token_score > 0:
+            matched_tokens += 1
+            score += token_score
+    if query_text and query_text in record.search_text:
+        score += 8.0
+    if matched_tokens == 0:
+        return None
+    coverage = matched_tokens / len(query_tokens)
+    score += 4.0 * coverage
+    return SearchResult(score=score, coverage=coverage, record=record)
+def search_assets(
+    records: list[AssetRecord],
+    query: str,
+    top_k: int,
+) -> list[SearchResult]:
+    """Return top-k lexical matches for a query."""
+    query_text = _normalize_text(query)
+    query_tokens = _dedupe_tokens(_tokenize(query))
+    if not query_text or not query_tokens:
+        raise ValueError(
+            "Query must contain searchable keywords after normalization."
+        )
+    ranked: list[SearchResult] = []
+    for record in records:
+        result = _score_record(record, query_text, query_tokens)
+        if result is not None:
+            ranked.append(result)
+    ranked.sort(
+        key=lambda result: (
+            -result.score,
+            -result.coverage,
+            -int(result.record.generate_time or 0),
+            result.record.absolute_urdf_path,
+        ),
+    )
+    return ranked[:top_k]
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Retrieve EmbodiedGen asset URDF paths from dataset_index.csv."
+    )
+    parser.add_argument("query", help="Natural-language asset query.")
+    parser.add_argument(
+        "--dataset-root",
+        default=str(_default_dataset_root()),
+        help=(
+            "Dataset root. "
+            "Default: $EMBODIEDGEN_DATASET_ROOT or repo-relative dataset path."
+        ),
+    )
+    parser.add_argument(
+        "--index-file",
+        default=None,
+        help=(
+            "CSV index path. "
+            "Default: $EMBODIEDGEN_DATASET_INDEX or <dataset-root>/dataset_index.csv."
+        ),
+    )
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=1,
+        help="Number of matches to return.",
+    )
+    parser.add_argument(
+        "--format",
+        choices=("paths", "json"),
+        default="paths",
+        help="Output format.",
+    )
+    parser.add_argument(
+        "--relative-paths",
+        action="store_true",
+        help="Return dataset-relative URDF paths instead of absolute paths.",
+    )
+    return parser.parse_args()
+def main() -> int:
+    args = _parse_args()
+    if args.top_k < 1:
+        raise ValueError("--top-k must be >= 1")
+    dataset_root = Path(args.dataset_root).expanduser().resolve()
+    index_file = (
+        Path(args.index_file).expanduser().resolve()
+        if args.index_file
+        else _default_index_file(dataset_root)
+    )
+    if not index_file.exists():
+        raise FileNotFoundError(f"Dataset index not found: {index_file}")
+    records = load_records(index_file=index_file, dataset_root=dataset_root)
+    results = search_assets(
+        records=records, query=args.query, top_k=args.top_k
+    )
+    if not results:
+        raise SystemExit("No matching assets found.")
+    if args.format == "json":
+        payload = [
+            result.to_dict(use_relative_paths=args.relative_paths)
+            for result in results
+        ]
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+        return 0
+    for result in results:
+        urdf_path = (
+            result.record.relative_urdf_path
+            if args.relative_paths
+            else result.record.absolute_urdf_path
+        )
+        print(urdf_path)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

embodied_gen/skills/asset-scale/SKILL.md ADDED Viewed

	@@ -0,0 +1,94 @@

+---
+name: asset-scale
+description: Scale 3D assets including meshes (OBJ, GLB, collision), Gaussian splats, and URDF metadata. Maintains directory structure and processes all related files atomically. Use when resizing assets for simulation compatibility, real-world scale adjustment, or batch asset processing.
+---
+# Asset Scaling
+## Overview
+Scale 3D assets from URDF files, automatically processing all related mesh formats (OBJ, GLB, collision meshes) and Gaussian splatting models. The tool maintains the original directory structure and updates URDF metadata (min_height, max_height, real_height) consistently.
+**When to use**: Use this skill when you need to resize 3D assets for different environments, adjust real-world scale, or prepare assets for simulation with specific size requirements.
+---
+## Core Convention: Input/Output Structure
+The skill expects URDF files at `<asset_dir>/result/<asset_name>.urdf` and outputs to `<output_dir>/<asset_dir_name>/` maintaining the same structure.
+| Mode | Input URDF Path | Output Structure |
+|------|-----------------|------------------|
+| **Normal** | `path/to/asset/result/asset.urdf` | `<output_dir>/asset/result/asset.urdf` |
+| **Inplace** | `path/to/asset/result/asset.urdf` | `path/to/asset/result/asset.urdf` (modified) |
+**Key behaviors**:
+- **Normal mode**: Entire asset directory is copied to output location before scaling
+- **Inplace mode**: Files are modified directly without copying
+- All mesh files (OBJ, GLB, collision, Gaussian splat) are scaled consistently
+- URDF metadata heights are scaled to match new dimensions
+---
+## CLI Examples
+### Example 1: Scale Asset to Output Directory
+Scale a red box asset to 80% of its original size and save to a new location.
+```bash
+python -m embodied_gen.skills.asset-scale.asset_scale \
+  --urdf-path outputs/assets/red_box/result/red_box.urdf \
+  --scale-factor 0.8 \
+  --output-dir outputs/scaled
+```
+**Output**: `outputs/scaled/red_box/result/red_box.urdf`
+---
+### Example 2: Scale Asset Inplace
+Modify the asset files directly without creating a copy (useful for batch processing).
+```bash
+python -m embodied_gen.skills.asset-scale.asset_scale \
+  --urdf-path outputs/assets/red_box/result/red_box.urdf \
+  --scale-factor 0.8 \
+  --inplace
+```
+**Output**: `outputs/assets/red_box/result/red_box.urdf` (modified inplace)
+**Warning**: Inplace mode modifies the original files. Make sure to backup important assets before using this mode.
+---
+## What Gets Scaled
+| File Type | Scaling Method | Location |
+|-----------|---------------|----------|
+| **OBJ Mesh** | Vertex coordinates multiplied by scale factor | `mesh/<name>.obj` |
+| **GLB Mesh** | All geometry vertices scaled | `mesh/<name>.glb` |
+| **Collision Mesh** | Multi-object OBJ parsed and scaled | `mesh/<name>_collision.obj` |
+| **Gaussian Splat** | Position and scale parameters updated | `mesh/<name>_gs.ply` |
+| **URDF Metadata** | `min_height`, `max_height`, `real_height` scaled | `<name>.urdf` |
+**Note**: Material files (textures, MTL) are copied unchanged in normal mode.
+---
+## Common Parameters
+| Parameter | Type | Default | Meaning |
+|-----------|------|---------|---------|
+| `urdf_path` | `str \| Path` | Required | Path to input URDF file |
+| `scale_factor` | `float` | Required | Scaling multiplier (must be positive). E.g., 0.8 = 80%, 1.5 = 150% |
+| `inplace` | `bool` | `False` | If True, modify files inplace. `output_dir` is not required |
+| `output_dir` | `str \| Path \| None` | `None` | Root output directory. Required when `inplace=False`, ignored when `inplace=True` |
+---
+## Next Steps
+- For complete API details and internal methods, see the source code at `embodied_gen/skills/asset-scale/asset_scale.py`

embodied_gen/skills/asset-scale/__init__.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+"""Asset scaling skills for EmbodiedGen."""
+from importlib import import_module
+__all__ = [
+    "AssetScaler",
+    "AssetScaleConfig",
+    "scale_asset",
+    "entrypoint",
+]
+def __getattr__(name: str) -> object:
+    """Lazily expose asset scaling APIs without pre-import side effects."""
+    if name not in __all__:
+        msg = f"module {__name__!r} has no attribute {name!r}"
+        raise AttributeError(msg)
+    module = import_module(f"{__name__}.asset_scale")
+    return getattr(module, name)

embodied_gen/skills/asset-scale/asset_scale.py ADDED Viewed

	@@ -0,0 +1,347 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+"""Asset scaling utility for resizing 3D assets and URDF files."""
+import shutil
+import xml.etree.ElementTree as ET
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+import trimesh
+import tyro
+from embodied_gen.models.gs_model import GaussianOperator
+from embodied_gen.utils.log import logger
+__all__ = ["AssetScaler", "AssetScaleConfig", "scale_asset", "entrypoint"]
+# URDF metadata height fields (shared with urdf_convertor.py)
+URDF_HEIGHT_FIELDS = ("min_height", "max_height", "real_height")
+# Asset directory structure conventions
+URDF_RESULT_DIR = "result"
+MESH_DIR = "mesh"
+@dataclass
+class AssetScaleConfig:
+    """Configuration for asset scaling.
+    Args:
+        urdf_path: Path to the URDF file to scale.
+        scale_factor: Scaling factor (e.g., 0.8 for 80% size).
+        inplace: If True, modify files in-place. output_dir is not required.
+        output_dir: Root output directory for scaled assets (not needed if inplace=True).
+    """
+    urdf_path: str
+    scale_factor: float
+    inplace: bool = False
+    output_dir: Optional[str] = None
+class AssetScaler:
+    """Scale 3D assets including meshes, Gaussian splats, and URDF metadata.
+    This class handles the complete scaling workflow for embodied assets,
+    processing OBJ, GLB, collision meshes, Gaussian splatting models, and
+    URDF metadata files.
+    """
+    def __init__(
+        self,
+        urdf_path: str | Path,
+        scale_factor: float,
+        output_dir: Optional[str | Path] = None,
+        inplace: bool = False,
+    ) -> None:
+        """Initialize the asset scaler.
+        Args:
+            urdf_path: Path to the URDF file to scale.
+            scale_factor: Scaling factor (e.g., 0.8 for 80% size).
+            output_dir: Root output directory for scaled assets (not needed if inplace=True).
+            inplace: If True, modify files in-place instead of copying to output_dir.
+        Raises:
+            FileNotFoundError: If URDF file does not exist.
+            ValueError: If scale_factor is not positive, or if neither output_dir nor inplace is specified.
+        """
+        self.urdf_path = Path(urdf_path)
+        self.scale_factor = scale_factor
+        self.inplace = inplace
+        if not self.urdf_path.exists():
+            raise FileNotFoundError(f"URDF file not found: {self.urdf_path}")
+        if self.scale_factor <= 0:
+            raise ValueError(
+                f"Scale factor must be positive, got: {self.scale_factor}"
+            )
+        # Derive asset directory structure from URDF path
+        # URDF is at: <asset_dir>/result/<node_name>.urdf
+        self.asset_dir = self.urdf_path.parent.parent
+        self.node_name = self.urdf_path.stem
+        # Handle inplace mode
+        if self.inplace:
+            self.output_dir = self.asset_dir.parent
+            logger.info(
+                f"Running in inplace mode, will modify {self.asset_dir} directly"
+            )
+        else:
+            if output_dir is None:
+                raise ValueError("output_dir is required when inplace=False")
+            self.output_dir = Path(output_dir)
+    def scale(self) -> Path:
+        """Execute the complete scaling workflow.
+        Returns:
+            Path to the output URDF file.
+        Raises:
+            FileNotFoundError: If required mesh files are missing.
+        """
+        if self.inplace:
+            # Inplace mode: scale directly in asset_dir
+            output_urdf_path = self.urdf_path
+            self._scale_mesh_files_parallel(self.asset_dir)
+            self._scale_urdf_metadata(output_urdf_path)
+            logger.info(
+                f"Scaled {self.asset_dir} by x{self.scale_factor} (inplace)"
+            )
+        else:
+            # Normal mode: copy to output_dir and scale
+            relative_asset_dir = self.asset_dir.name
+            output_asset_dir = self.output_dir / relative_asset_dir
+            output_asset_dir.mkdir(parents=True, exist_ok=True)
+            # Copy entire asset directory structure first
+            output_urdf_path = self._copy_asset_structure(output_asset_dir)
+            # Scale all mesh files in parallel
+            self._scale_mesh_files_parallel(output_asset_dir)
+            # Scale URDF metadata
+            self._scale_urdf_metadata(output_urdf_path)
+            logger.info(
+                f"Scaled {self.asset_dir} by x{self.scale_factor} -> {output_asset_dir}"
+            )
+        return output_urdf_path
+    def _copy_asset_structure(self, output_asset_dir: Path) -> Path:
+        """Copy asset directory structure to output location.
+        Args:
+            output_asset_dir: Target directory for copied assets.
+        Returns:
+            Path to the copied URDF file.
+        """
+        # Use ignore_errors=True to avoid TOCTOU race condition
+        shutil.rmtree(output_asset_dir, ignore_errors=True)
+        shutil.copytree(self.asset_dir, output_asset_dir)
+        output_urdf_path = (
+            output_asset_dir / URDF_RESULT_DIR / f"{self.node_name}.urdf"
+        )
+        return output_urdf_path
+    def _scale_mesh_files_parallel(self, output_asset_dir: Path) -> None:
+        """Scale all mesh files in parallel for efficiency.
+        Args:
+            output_asset_dir: Directory containing assets to scale.
+        """
+        mesh_dir = output_asset_dir / URDF_RESULT_DIR / MESH_DIR
+        # Define mesh scaling tasks
+        tasks = [
+            (mesh_dir / f"{self.node_name}.obj", self._scale_obj_mesh),
+            (mesh_dir / f"{self.node_name}.glb", self._scale_glb_mesh),
+            (
+                mesh_dir / f"{self.node_name}_collision.obj",
+                self._scale_collision_mesh,
+            ),
+            (
+                mesh_dir / f"{self.node_name}_gs.ply",
+                self._scale_gaussian_splat,
+            ),
+        ]
+        # Process files in parallel
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = [executor.submit(task, path) for path, task in tasks]
+            for future in futures:
+                future.result()  # Propagate any exceptions
+    def _scale_obj_mesh(self, mesh_path: Path) -> None:
+        """Scale OBJ mesh file."""
+        if not mesh_path.exists():
+            return
+        mesh = trimesh.load(str(mesh_path))
+        mesh.apply_scale(self.scale_factor)
+        mesh.export(str(mesh_path))
+    def _scale_glb_mesh(self, mesh_path: Path) -> None:
+        """Scale GLB mesh file."""
+        if not mesh_path.exists():
+            return
+        mesh = trimesh.load(str(mesh_path))
+        for mesh_part in mesh.geometry.values():
+            mesh_part.apply_scale(self.scale_factor)
+        mesh.export(str(mesh_path))
+    def _scale_collision_mesh(self, mesh_path: Path) -> None:
+        """Scale collision mesh file."""
+        if not mesh_path.exists():
+            return
+        meshes = self._load_collision_obj(str(mesh_path))
+        scene = trimesh.Scene()
+        for mesh_part in meshes:
+            mesh_part.apply_scale(self.scale_factor)
+            scene.add_geometry(mesh_part)
+        scene.export(str(mesh_path))
+    def _scale_gaussian_splat(self, mesh_path: Path) -> None:
+        """Scale Gaussian splatting model."""
+        if not mesh_path.exists():
+            return
+        gs_model: GaussianOperator = GaussianOperator.load_from_ply(
+            str(mesh_path)
+        )
+        gs_model.rescale(self.scale_factor)
+        gs_model.save_to_ply(str(mesh_path))
+    def _scale_urdf_metadata(self, urdf_path: Path) -> None:
+        """Scale height metadata in URDF file.
+        Args:
+            urdf_path: Path to URDF file to modify.
+        """
+        tree = ET.parse(str(urdf_path))
+        root = tree.getroot()
+        extra_info = root.find("link/extra_info")
+        if extra_info is None:
+            logger.warning(f"No extra_info found in URDF: {urdf_path}")
+            return
+        for height_field in URDF_HEIGHT_FIELDS:
+            element = extra_info.find(height_field)
+            if element is not None and element.text:
+                scaled_value = float(element.text) * self.scale_factor
+                element.text = f"{scaled_value:.3f}"
+        tree.write(str(urdf_path), encoding="utf-8", xml_declaration=True)
+    @staticmethod
+    def _load_collision_obj(filepath: str) -> list[trimesh.Trimesh]:
+        """Robustly load collision OBJ with multiple objects.
+        Handles OBJ files with multiple objects/groups by parsing manually
+        to avoid issues with trimesh's default loader.
+        Args:
+            filepath: Path to collision OBJ file.
+        Returns:
+            List of trimesh objects, one per object group in the file.
+        """
+        vertices = []
+        meshes = []
+        current_faces = []
+        # Use lazy iteration instead of readlines() for memory efficiency
+        with open(filepath, "r") as f:
+            for line in f:
+                if line.startswith("v "):
+                    parts = line.split()
+                    vertices.append(
+                        [float(parts[1]), float(parts[2]), float(parts[3])]
+                    )
+                elif line.startswith("f "):
+                    parts = line.split()
+                    face = [int(p.split("/")[0]) - 1 for p in parts[1:]]
+                    current_faces.append(face)
+                elif line.startswith("o ") or line.startswith("g "):
+                    if current_faces and vertices:
+                        m = trimesh.Trimesh(
+                            vertices=vertices,
+                            faces=current_faces,
+                            process=False,
+                        )
+                        m.remove_unreferenced_vertices()
+                        meshes.append(m)
+                    current_faces = []
+        # Flush final mesh
+        if current_faces and vertices:
+            m = trimesh.Trimesh(
+                vertices=vertices, faces=current_faces, process=False
+            )
+            m.remove_unreferenced_vertices()
+            meshes.append(m)
+        return meshes
+def scale_asset(
+    urdf_path: str | Path,
+    scale_factor: float,
+    output_dir: Optional[str | Path] = None,
+    inplace: bool = False,
+) -> Path:
+    """Scale a 3D asset from URDF file.
+    Args:
+        urdf_path: Path to the URDF file to scale.
+        scale_factor: Scaling factor (e.g., 0.8 for 80% size).
+        output_dir: Root output directory for scaled assets (not needed if inplace=True).
+        inplace: If True, modify files in-place instead of copying to output_dir.
+    Returns:
+        Path to the output URDF file.
+    """
+    scaler = AssetScaler(urdf_path, scale_factor, output_dir, inplace)
+    return scaler.scale()
+def entrypoint() -> None:
+    """CLI entrypoint for asset scaling."""
+    config = tyro.cli(AssetScaleConfig)
+    output_urdf = scale_asset(
+        urdf_path=config.urdf_path,
+        scale_factor=config.scale_factor,
+        output_dir=config.output_dir,
+        inplace=config.inplace,
+    )
+    logger.info(f"Scaled asset successfully: {output_urdf}")
+if __name__ == "__main__":
+    entrypoint()

embodied_gen/skills/background-creator/SKILL.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+name: background-creator
+description: Generate background 3D scenes with EmbodiedGen using scene3d-cli. Use this skill whenever users ask to create room/indoor background scenes from text prompts, pre-generate backgrounds for layout-cli, or control scene3d generation quality/runtime with retry, seed, and gs3d settings.
+---
+# Background Creator
+Unified entry for EmbodiedGen background scene generation via `scene3d-cli`.
+## When To Use
+Use this skill when users want to:
+- Generate indoor/background 3D scenes from text prompts.
+- Pre-generate scene assets for `layout-cli`.
+- Control `scene3d-cli` runtime/quality via seed, retry, and `gs3d` settings.
+## Routing Rule (Core)
+Use `scene3d-cli` when input is scene-level text prompts and output target is a background scene (mesh + 3DGS), not single foreground assets.
+## Pre-checks
+1. Run commands from the repository root.
+2. Confirm the active environment is `embodiedgen`.
+3. Install scene3d dependencies first if needed:
+   `bash install.sh scene3d`
+4. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
+## Standard Command Template
+```bash
+scene3d-cli --prompts "Art studio with easel and canvas" \
+  --output_dir outputs/bg_scenes \
+  --seed 0 \
+  --gs3d.max_steps 4000 \
+  --disable_pano_check
+```
+## Common Parameters
+- `--prompts`: one or more scene text prompts.
+- `--output_dir`: output root directory for generated scenes.
+- `--seed`: random seed for reproducibility.
+- `--n_retry`: panorama generation retries.
+- `--real_height`: force target real-world room height in meters.
+- `--pano_image_only`: generate only panorama image (debug/fast validation).
+- `--disable_pano_check`: skip panorama quality check.
+- `--keep_middle_result`: keep intermediate training artifacts.
+- `--gs3d.max_steps`: training steps for 3DGS optimization.
+## Output Conventions
+Each prompt is saved under `<output_dir>/scene_xxxx/`, typically including:
+- `gs_model.ply`
+- `mesh_model.ply`
+- `pano_image.png`
+- `prompt.txt`
+- `video.mp4`
+- `gsplat_cfg.yml`
+## Runtime Expectations
+- Typical full generation time is around 30 minutes per scene.
+- Use `--pano_image_only` for quick prompt validation before full generation.
+## Failure Handling and Retry
+1. OOM or GPU pressure: reduce concurrency and lower `--gs3d.max_steps`.
+2. Unstable scene quality: increase `--n_retry` or adjust prompt specificity.
+3. Missing outputs: verify `--output_dir` permissions and use absolute paths.

embodied_gen/skills/claude_adapter/.claude-plugin/marketplace.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "name": "embodiedgen-local",
+  "owner": {
+    "name": "EmbodiedGen"
+  },
+  "plugins": [
+    {
+      "name": "embodiedgen-skills",
+      "description": "EmbodiedGen workflow plugin bundle with slash commands and reusable skills",
+      "version": "0.1.0",
+      "author": {
+        "name": "EmbodiedGen"
+      },
+      "source": "./plugins/embodiedgen-skills"
+    }
+  ]
+}

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/.claude-plugin/plugin.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "name": "embodiedgen-skills",
+  "version": "0.1.0",
+  "description": "EmbodiedGen workflow plugin bundle with slash commands and reusable skills for asset generation, background generation, layout generation, simulation rendering, asset conversion, room creation, asset scaling, and spatial scene editing",
+  "author": {
+    "name": "EmbodiedGen"
+  }
+}

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/assets.md ADDED Viewed

	@@ -0,0 +1,32 @@

+---
+description: Run the EmbodiedGen asset generation workflow for image-to-3D, text-to-3D, or texture generation
+argument-hint: "[request or command requirements]"
+---
+# Assets Skill Command
+Route the user's request to the EmbodiedGen asset generation workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask what the user wants to generate or texture.
+### Step 2: Load the skill
+Use `skill: "asset-creator"`.
+### Step 3: Execute the correct route
+Follow the skill to choose one of:
+- `img3d-cli`
+- `text3d-cli`
+- `texture-cli`
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output directory
+3. Any important runtime notes or dependency issues

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/background.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen background scene generation workflow with scene3d-cli
+argument-hint: "[scene prompt or generation request]"
+---
+# Background Skill Command
+Route the user's request to the EmbodiedGen background generation workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the target room or background scene description.
+### Step 2: Load the skill
+Use `skill: "background-creator"`.
+### Step 3: Execute the workflow
+Follow the skill and build the correct `scene3d-cli` command.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output directory
+3. Expected runtime and any caveats

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/convert.md ADDED Viewed

	@@ -0,0 +1,32 @@

+---
+description: Run the EmbodiedGen simulator asset conversion workflow for USD, MJCF, or direct URDF usage
+argument-hint: "[target simulator or conversion request]"
+---
+# Convert Skill Command
+Route the user's request to the EmbodiedGen asset conversion workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the target simulator and input URDF path.
+### Step 2: Load the skill
+Use `skill: "asset-converter"`.
+### Step 3: Execute the workflow
+Follow the skill and choose:
+- `USD` for IsaacSim
+- `MJCF` for MuJoCo or Genesis
+- direct `URDF` for SAPIEN, IsaacGym, or PyBullet
+### Step 4: Deliver
+Return:
+1. The exact Python API or command used
+2. The converted output path
+3. Any dependency or simulator-specific notes

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/layout.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen layout generation workflow with layout-cli
+argument-hint: "[task description or layout generation request]"
+---
+# Layout Skill Command
+Route the user's request to the EmbodiedGen interactive layout workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the task description or task file path.
+### Step 2: Load the skill
+Use `skill: "layout-creator"`.
+### Step 3: Execute the workflow
+Follow the skill and build the correct `layout-cli` command, including `--bg_list` and output settings.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output root
+3. Expected runtime and any dependency warnings

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/process.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen asset processing workflow for scaling URDF-based assets and related files
+argument-hint: "[asset scaling request]"
+---
+# Process Skill Command
+Route the user's request to the EmbodiedGen asset processing workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the URDF path and desired scale factor.
+### Step 2: Load the skill
+Use `skill: "asset-scale"`.
+### Step 3: Execute the workflow
+Follow the skill and build the correct `python -m embodied_gen.skills.asset-scale.asset_scale` command.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output path
+3. Whether the operation is normal mode or inplace mode

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/room.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen room generation workflow with room-creator for room or house generation and export
+argument-hint: "[room generation request]"
+---
+# Room Skill Command
+Route the user's request to the EmbodiedGen room creation workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for room type, output root, and whether export is needed.
+### Step 2: Load the skill
+Use `skill: "room-creator"`.
+### Step 3: Execute the workflow
+Follow the skill and build the correct `python -m embodied_gen.scripts.room_gen.gen_room` or `room-cli` command.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output directory
+3. Runtime and export-stage notes

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/sim.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen simulation rendering workflow with sim-cli
+argument-hint: "[layout path or simulation request]"
+---
+# Sim Skill Command
+Route the user's request to the EmbodiedGen simulation rendering workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the `layout.json` path or target simulation request.
+### Step 2: Load the skill
+Use `skill: "sim-runner"`.
+### Step 3: Execute the workflow
+Follow the skill and build the correct `sim-cli` command.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The output video path
+3. Any camera, performance, or rendering notes

embodied_gen/skills/claude_adapter/plugins/embodiedgen-skills/commands/spatial.md ADDED Viewed

	@@ -0,0 +1,29 @@

+---
+description: Run the EmbodiedGen spatial computing workflow for floorplans and object placement or deletion in scenes
+argument-hint: "[scene editing request]"
+---
+# Spatial Skill Command
+Route the user's request to the EmbodiedGen spatial computing workflow.
+## Workflow
+### Step 1: Interpret the request
+Use `$ARGUMENTS` if provided. If it is empty, ask for the URDF path and target placement, deletion, or query request.
+### Step 2: Load the skill
+Use `skill: "spatial-computing"`.
+### Step 3: Execute the workflow
+Follow the skill and choose the correct `python -m embodied_gen.skills.spatial-computing.cli.main` or `room-cli -m ...` invocation.
+### Step 4: Deliver
+Return:
+1. The exact command used
+2. The updated output file path
+3. Any constraints about USD updates or exact instance matching

embodied_gen/skills/layout-creator/SKILL.md ADDED Viewed

	@@ -0,0 +1,83 @@

+---
+name: layout-creator
+description: Generate interactive 3D layouts from task descriptions with EmbodiedGen using layout-cli. Use this skill whenever users ask to build task-driven 3D scenes, batch-generate layouts from task files, tune layout generation retries/seeds, or produce simulator-ready layout outputs from background scene lists.
+---
+# Layout Creator
+Unified entry for EmbodiedGen interactive layout generation via `layout-cli`.
+## When To Use
+Use this skill when users want to:
+- Generate interactive 3D scenes from task descriptions.
+- Batch-generate layouts from a task list file.
+- Build simulator-ready layout outputs (`layout.json`, renders) with optional robot insertion.
+- Tune generation quality and stability via retry and seed settings.
+## Routing Rule (Core)
+Use `layout-cli` when the user input is task-level natural language descriptions (e.g., "put the pen in the mug") and the target output is an interactive layout scene, not standalone assets or standalone background scenes.
+## Pre-checks
+1. Run commands from the repository root.
+2. Confirm the active environment is `embodiedgen`.
+3. Confirm background scene list file exists and is readable (via `--bg_list`).
+4. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
+## Standard Command Templates
+### 1) Generate layouts from inline task descriptions
+```bash
+layout-cli \
+  --task_descs "Place the pen in the mug on the desk" "Put the fruit on the table on the plate" \
+  --bg_list "outputs/example_gen_scenes/scene_part_list.txt" \
+  --output_root "outputs/layouts_gen" \
+  --insert_robot
+```
+### 2) Batch generation from task list file (background run)
+```bash
+layout-cli \
+  --task_descs "apps/assets/example_layout/task_list.txt" \
+  --bg_list "outputs/example_gen_scenes/scene_part_list.txt" \
+  --n_image_retry 4 --n_asset_retry 3 --n_pipe_retry 3 \
+  --output_root "outputs/layouts_gens" \
+  --insert_robot > layouts_gens.log 2>&1 &
+```
+## Common Parameters
+- `--task_descs`: task descriptions or a task-list text file path.
+- `--output_root`: root output directory.
+- `--bg_list`: background scene list file (scene retrieval pool).
+- `--insert_robot`: include robot pose in layout generation/simulation output.
+- `--output_iscene`: export composed scene mesh (`Iscene.glb`).
+- `--n_image_retry --n_asset_retry --n_pipe_retry`: retry controls for text-to-3D subpipeline.
+- `--seed_img --seed_3d --seed_layout`: reproducibility controls.
+- `--n_img_sample --text_guidance_scale --img_denoise_step`: text-to-image / asset-generation controls.
+- `--keep_intermediate`: keep intermediate files from generation substeps.
+## Output Conventions
+Outputs are organized by task index:
+- `<output_root>/task_0000/layout.json`
+- `<output_root>/task_0000/scene_tree.jpg`
+- `<output_root>/task_0000/background/`
+- `<output_root>/task_0000/asset3d/`
+- Optional: `<output_root>/task_0000/Iscene.glb` (when `--output_iscene` is enabled)
+## Runtime Expectations
+- Typical generation time is around 30 minutes per task (depends on retries/GPU/background matching).
+- Batch jobs should use background execution (`nohup`) with log redirection.
+## Failure Handling and Retry
+1. Missing background candidate: verify `--bg_list` path and referenced scene directories.
+2. OOM or GPU pressure: reduce concurrency and lower retry/sample settings.
+3. Poor asset/layout quality: increase retry counts or refine task text.
+4. Missing outputs: verify output permissions and use absolute paths.

embodied_gen/skills/room-creator/SKILL.md ADDED Viewed

	@@ -0,0 +1,83 @@

+---
+name: room-creator
+description: Generate indoor rooms (single room or house) and export URDF/USD by wrapping embodied_gen/scripts/room_gen/gen_room.py. Use when users ask to create rooms with seed control, choose room type and complexity, run generation/export stages, or run reproducible room generation jobs (batch runs can be done by wrapping this command in an outer loop/script).
+---
+# Room Creator
+Generate room scenes with `python -m embodied_gen.scripts.room_gen.gen_room` from infinigen(https://github.com/princeton-vl/infinigen) and optionally export URDF/USD.
+## Use This Workflow
+1. Confirm output root and target room profile.
+2. Choose generation scope:
+- `--gen --urdf --usd` for full pipeline.
+- `--gen --no-urdf --no-usd` for generation only.
+- `--no-gen --urdf --usd` for export from existing blender output.
+3. Run the command from repository root.
+4. Verify output folder: `<output_root>/<RoomType>_seed<seed>/` (if `--seed` is omitted, check the generated seed from logs first).
+## Parameters
+- `--output-root` (required): base output directory.
+- `--room-type`: `Bedroom | LivingRoom | Kitchen | Bathroom | DiningRoom | Office | House`.
+- `--seed`: random seed. For reproducible runs, pass this explicitly; if omitted, a random seed is generated.
+- `--complexity`: `minimalist | simple | medium | detail`.
+- `--custom-params`: gin file copied to Infinigen `custom_solve.gin`.
+- `--large-scene`: only for `House`; enables more rooms.
+- `--gen/--no-gen`, `--urdf/--no-urdf`, `--usd/--no-usd`: pipeline switches.
+## Complexity Guidance
+- `minimalist`: fastest, sparse furniture.
+- `simple`: default, balanced quality/time.
+- `medium`: richer layout, slower.
+- `detail`: highest detail, longest runtime.
+## Command Templates
+```bash
+# Full pipeline for one kitchen
+python -m embodied_gen.scripts.room_gen.gen_room \
+  --output-root outputs/rooms \
+  --room-type Kitchen \
+  --seed 42 \
+  --complexity simple
+```
+```bash
+# Generation only (no export)
+python -m embodied_gen.scripts.room_gen.gen_room \
+  --output-root outputs/rooms \
+  --room-type LivingRoom \
+  --seed 100 \
+  --complexity medium \
+  --no-urdf --no-usd
+```
+```bash
+# Export only from existing blender result
+python -m embodied_gen.scripts.room_gen.gen_room \
+  --output-root outputs/rooms \
+  --room-type Kitchen \
+  --seed 42 \
+  --no-gen --urdf --usd
+```
+```bash
+# House generation (use --large-scene for more rooms)
+python -m embodied_gen.scripts.room_gen.gen_room \
+  --output-root outputs/rooms \
+  --room-type House \
+  --seed 7 \
+  --complexity simple \
+  --large-scene
+```
+## Runtime Requirements
+- Run from repo root so relative paths resolve.
+- Ensure Blender Python exists at `$BLENDER_PYTHON`. If not set, the command will try:
+  `thirdparty/infinigen/blender/4.2/python/bin/python3.11` (must exist on disk).
+- `--no-gen` requires existing blender output at:
+  `<output_root>/<RoomType>_seed<seed>/blender`.

embodied_gen/skills/sim-runner/SKILL.md ADDED Viewed

	@@ -0,0 +1,72 @@

+---
+name: sim-runner
+description: Run SAPIEN-based simulation rendering from EmbodiedGen layout outputs using sim-cli. Use this skill whenever users ask to load a generated layout.json into simulation, render interactive scene videos, control camera/render settings, or enable robot grasp trajectory rendering.
+---
+# Sim Runner
+Unified entry for EmbodiedGen simulation rendering via `sim-cli`.
+## When To Use
+Use this skill when users want to:
+- Load a generated `layout.json` into simulation.
+- Render interactive scene videos (foreground + 3DGS background composition).
+- Adjust camera, rendering, or simulation-step parameters.
+- Include robot grasp trajectory rendering with `--insert_robot`.
+## Routing Rule (Core)
+Use `sim-cli` when the input is an existing layout result (especially `layout.json`) and the target output is simulation visualization (e.g., `Iscene.mp4`), not generation of new assets/backgrounds/layouts.
+## Pre-checks
+1. Run commands from the repository root.
+2. Confirm the active environment is `embodiedgen`.
+3. Confirm input `--layout_path` exists and points to a valid layout output.
+4. Ensure referenced background and asset files in the layout directory are present.
+5. If CLI commands are unavailable, run `pip install -e .` to register entrypoints.
+## Standard Command Template
+```bash
+sim-cli \
+  --layout_path "outputs/layouts_gen/task_0000/layout.json" \
+  --output_dir "outputs/layouts_gen/task_0000/sapien_render" \
+  --insert_robot
+```
+## Common Parameters
+- `--layout_path`: input layout file path.
+- `--output_dir`: output directory for rendered video.
+- `--insert_robot`: render robot grasp actions for manipulated objects.
+- `--sim_freq --control_freq --sim_step`: simulation/control timing settings.
+- `--render_interval`: render every N simulation steps.
+- `--num_cameras --camera_radius --camera_height --fovy_deg`: camera configuration.
+- `--image_hw`: output frame size.
+- `--render_keys`: render channels (requires `Foreground` for final compositing).
+- `--ray_tracing`: enable/disable ray tracing backend.
+- `--device`: rendering device (e.g., `cuda`).
+## Output Conventions
+Primary output:
+- `<output_dir>/Iscene.mp4`
+Typical input dependencies resolved from layout directory:
+- `layout.json`
+- background `gs_model.ply`
+- per-object assets referenced by layout
+## Runtime Expectations
+- Runtime depends on `sim_step`, `render_interval`, camera count, and ray-tracing mode.
+- Enabling `--insert_robot` increases render time due to grasp-action rollout.
+## Failure Handling and Retry
+1. Missing file errors: verify layout-relative asset/background paths exist.
+2. GPU memory pressure: reduce `--num_cameras`, `--image_hw`, or disable heavy settings.
+3. Empty/invalid video output: ensure `Foreground` is included in `--render_keys`.
+4. Slow runtime: reduce `--sim_step` or increase `--render_interval`.

embodied_gen/skills/spatial-computing/README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+## Using with IDE Agent via Natural Language
+The Agent will automatically load this skill based on its **description** when you mention URDF, floorplan, indoor scene, object placement, etc. You only need to specify in natural language **what to do** and provide **key information like paths/room names**.
+### LLM Environment Configuration (When Using Semantic Matching)
+If you want to use natural language descriptions (e.g., "put lamp on bookshelf") instead of exact instance/room names, you need to configure the LLM environment first:
+```bash
+# If outputs/env.sh exists, source it first
+source outputs/env.sh
+```
+If access to the LLM interface is unavailable, please provide exact instance names (you can check them via `--list_instances`).
+### URDF Visualization Only (Generate Floorplan)
+**You can say:**
+- "Help me visualize `path_to/scene.urdf` or `path_to/folder_contain/scene.urdf`"
+**Agent will:** Use `visualize_floorplan(urdf_path=..., output_path=...)` or the corresponding CLI to generate the floorplan only, without modifying URDF/USD.
+### Insert Object and Update Scene (URDF, or URDF+USD)
+**You can say:**
+- "Put `chair.obj` into scene.urdf's kitchen room"
+- "Put `bottle.obj` into the URDF at `outputs/rooms/Kitchen_seed3773`, instance name bottle_1, update scene and generate floorplan"
+- "Put a cup on the table in the living room" → Agent will use `on_instance="table"`, `place_strategy="top"`, etc.
+**If you also want to update USD:**
+- "Put a chair in the kitchen, update both URDF and USD, USD path is `xxx/usd/export_scene.usdc`"
+- Note that you need to use **room-cli** to execute (this skill will prompt the Agent), because writing USD requires bpy.
+**Agent will:** Use `FloorplanManager` + `insert_object` (or `insert_object_to_scene`), execute according to the paths and room names you provided; when USD is needed, use room-cli to run the CLI.
+### View Instances and Rooms in the Scene
+Before placing objects, you can first view what instances and rooms are in the scene:
+**You can say:**
+- "Help me list all instances and room names in `.../scene.urdf`"
+**Agent will:** Execute `--list_instances` to display the instance names and room names in the current scene.
+### URDF/USD Output Notes
+- **URDF Output**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf`
+- **USD Output**: If `usd_path` is specified, the USD file will be written to `*_updated.usdc` following the same rule
+- **Only Update USD**: Requires using **room-cli** to execute, because writing USD needs Blender (bpy)
+### What Information to Provide
+| Goal | Suggested Information to Provide in Conversation |
+|------|-----------------------------------------------|
+| Visualization only | URDF path, floorplan save path (optional, Agent can default to floorplan.png in same directory) |
+| View instances/rooms | URDF path, let Agent list instance names and room names in current scene |
+| Placement + update | URDF path, object mesh path (.obj), instance name (e.g., chair_1), room name (e.g., kitchen); if placing on table, say "place on table"; if updating USD, also provide USD path and use room-cli |
+Example in one go: "Use spatial-computing skill, generate floorplan for `.../scene.urdf` and save to floorplan.png in same directory, then put `path/to/bottle.obj` into kitchen, instance name bottle_1, update URDF only."

embodied_gen/skills/spatial-computing/REFERENCE.md ADDED Viewed

	@@ -0,0 +1,236 @@

+# Floorplan Skill — API Reference
+This document provides API details, configuration items, errors, and dependencies for reference beyond the usage instructions in [SKILL.md](SKILL.md).
+## Contents
+- [Floorplan Skill — API Reference](#floorplan-skill--api-reference)
+  - [Contents](#contents)
+  - [LLM Environment Configuration](#llm-environment-configuration)
+  - [FloorplanManager](#floorplanmanager)
+    - [Constructor](#constructor)
+    - [Methods](#methods)
+  - [Convenience Functions](#convenience-functions)
+  - [CLI Features](#cli-features)
+    - [Command Line Parameters](#command-line-parameters)
+  - [Configuration and Ignore Items](#configuration-and-ignore-items)
+  - [Smart File Naming Strategy](#smart-file-naming-strategy)
+  - [USD and Blender](#usd-and-blender)
+  - [Errors and Return Values](#errors-and-return-values)
+  - [Dependencies](#dependencies)
+  - [Usage Recommendations](#usage-recommendations)
+---
+## LLM Environment Configuration
+Before using semantic matching (`resolve_*` methods), configure the LLM API:
+```bash
+# Use the project-provided env (Azure + proxy), if outputs/env.sh exists:
+source outputs/env.sh
+```
+If access to the LLM interface is unavailable, prompt the user.
+---
+## FloorplanManager
+### Constructor
+```python
+from importlib import import_module
+FloorplanManager = import_module(
+    "embodied_gen.skills.spatial-computing.api"
+).FloorplanManager
+manager = FloorplanManager(
+    urdf_path="scene.urdf",      # Required
+    usd_path=None,               # Optional; USD write after insert/delete if provided
+    mesh_sample_num=50000,
+    ignore_items=None,           # Default ["ceiling", "light", "exterior"]
+    output_strategy="suffix",    # "suffix" (default) / "timestamp" / "overwrite"
+)
+```
+### Methods
+| Method | Description |
+|--------|-------------|
+| `visualize(output_path)` | Generate floorplan and save as image |
+| `insert_object(asset_path, instance_key, in_room=..., on_instance=..., beside_instance=..., place_strategy=..., n_max_attempt=2000, rotation_rpy=...)` | Place object, automatically write back to URDF/USD on success, return `[x,y,z]` or `None` |
+| `delete_object(instance_key, in_room=..., urdf_output_path=..., usd_output_path=...)` | Delete instance from scene, return `True`/`False`. Supports room constraint via `in_room` |
+| `query_instance_center(instance_key)` | Query instance center coordinates, return `[x,y,z]` or `None` |
+| `update_scene(urdf_output_path=..., usd_output_path=...)` | Manually write back currently placed instances; generally not needed (called inside `insert_object`) |
+| `get_room_names()` | List of room names |
+| `get_instance_names()` | List of instance names (excluding walls/floor) |
+| `get_instance_names_in_room(in_room)` | List of instance names within a specific room |
+| `resolve_on_instance(on_instance, gpt_client=None)` | Resolve user description to exact instance name |
+| `resolve_in_room(in_room, gpt_client=None)` | Resolve user description to exact room name |
+| `resolve_beside_instance(beside_instance, gpt_client=None, in_room=None)` | Resolve user description to exact instance name for beside placement |
+| `resolve_delete_instance(delete_instance, gpt_client=None, in_room=None)` | Resolve user description to exact instance name for deletion |
+| `resolve_and_query_instance(query_instance, gpt_client=None)` | Resolve and query instance center in one call, return `(resolved_name, [x,y,z])` or `(None, None)` |
+| `get_occupied_area()` | Occupied area Shapely geometry |
+| `get_floor_union()` | Floor area union geometry |
+**Key parameters**:
+- `on_instance` / `beside_instance` / `delete_instance`: Exact instance name or semantic description (with `gpt_client`)
+- `in_room`: Room constraint for placement/deletion/query
+- `place_strategy`: `"random"` (default) or `"top"` (select highest surface)
+- `beside_distance`: Max distance in meters for beside placement (default 0.5)
+---
+## Convenience Functions
+| Function | Description |
+|----------|-------------|
+| `visualize_floorplan(urdf_path, output_path, ...)` | Generate floorplan only |
+| `insert_object_to_scene(urdf_path, asset_path, instance_key, output_path, ...)` | Insert object and generate floorplan, return `[x,y,z]` or `None` |
+| `delete_object_from_scene(urdf_path, instance_key, in_room=..., output_path=...)` | Delete instance and optionally generate floorplan, return `True`/`False` |
+| `query_instance_position(urdf_path, instance_key)` | Quick query instance center coordinates, return `[x,y,z]` or `None` |
+| `resolve_instance_with_llm(gpt_client, instance_names, user_spec, ...)` | Use LLM to match user description to exact instance name |
+---
+## CLI Features
+### Command Line Parameters
+| Parameter | Description |
+|-----------|-------------|
+| `--urdf_path` | Input URDF scene file path (required) |
+| `--usd_path` | Optional USD scene file path, update USD simultaneously if specified |
+| `--asset_path` | Object mesh file path (.obj) for insertion |
+| `--instance_key` | Unique identifier for the new instance, default `inserted_object` |
+| `--in_room` | Limit placement to specified room, supports semantic description |
+| `--on_instance` | Place on top of specified instance, supports semantic description |
+| `--beside_instance` | Place beside specified instance on floor, supports semantic description |
+| `--beside_distance` | Max distance (meters) from target instance, default 0.5 |
+| `--place_strategy` | Placement strategy: `"random"` (default) or `"top"` |
+| `--rotation_rpy` | Initial rotation angle (roll, pitch, yaw radians) |
+| `--output_path` | Floorplan output path |
+| `--output_strategy` | File naming strategy: `"suffix"` (default) / `"timestamp"` / `"overwrite"` |
+| `--list_instances` | List instance names and room names, then exit |
+| `--delete_instance` | Instance name to delete (supports semantic description) |
+| `--delete_in_room` | Room constraint for deletion |
+| `--query_instance` | Instance name to query position (supports semantic description) |
+| `--max_placement_attempts` | Maximum placement attempts, default 2000 |
+### CLI Usage Examples
+**View scene info**:
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --list_instances
+```
+**Insert object with semantic matching**:
+```bash
+source outputs/env.sh
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --asset_path .../lamp.obj --instance_key lamp_1 \
+  --on_instance 书柜
+```
+**Delete object with room constraint**:
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --delete_instance 沙发 --delete_in_room 客厅
+```
+**Query instance position**:
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --query_instance 床
+```
+**Update both URDF and USD (room-cli)**:
+```bash
+room-cli -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --usd_path .../scene.usdc \
+  --delete_instance 沙发
+```
+---
+## Configuration and Ignore Items
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `mesh_sample_num` | 50000 | Number of mesh sampling points |
+| `ignore_items` | `["ceiling", "light", "exterior"]` | Link name patterns to skip during URDF parsing |
+| `output_strategy` | `"suffix"` | File naming strategy for output files |
+---
+## Smart File Naming Strategy
+Default `output_strategy="suffix"` provides intelligent continuous operation support:
+| Operation | Input File | Output File | Behavior |
+|-----------|-----------|-------------|----------|
+| First insert | `scene.urdf` | `scene_updated.urdf` | Creates new file |
+| Second insert | `scene_updated.urdf` | `scene_updated.urdf` | **Overwrites** (continuous) |
+| Delete | `scene_updated.urdf` | `scene_updated.urdf` | **Overwrites** (continuous) |
+**Key features**:
+- ✅ No `*_updated_updated.urdf` accumulation
+- ✅ Original `scene.urdf` never modified
+- ✅ Seamless insert/delete workflow
+**Alternative strategies**:
+- `"timestamp"`: Unique versioning (`scene_20260311_180235.urdf`)
+- `"overwrite"`: Direct overwrite (use with caution)
+---
+## USD and Blender
+- Writing USD requires **Blender (bpy)**. Use **room-cli** environment for USD operations.
+- Without `usd_path`, only URDF is updated (no bpy needed).
+- Assets in `.usd`/`.usdc`/`.usda` format are directly referenced; only `.obj` files are converted via bpy.
+- If `*_collision.obj` exists alongside visual mesh, it will be used for URDF collision.
+---
+## Errors and Return Values
+**Exceptions**
+- **ValueError**: Room/instance not found; `update_scene()` called before insertion; `instance_key` already exists; attempting to delete protected instances (`walls`, `*floor*`).
+**Return Values**
+- `insert_object` / `insert_object_to_scene`: `[x, y, z]` on success, `None` on failure.
+- `delete_object` / `delete_object_from_scene`: `True` on success, `False` on failure.
+- `query_instance_center` / `query_instance_position`: `[x, y, z]` or `None`.
+**Exit Codes (CLI)**
+- `0`: Success
+- `1`: Instance/room not found, deletion failed, or placement failed
+---
+## Dependencies
+| Type | Package | Description |
+|------|---------|-------------|
+| Core | trimesh, shapely, matplotlib, numpy | Parsing and visualization |
+| USD Writing | pxr, bpy | Required only when using `usd_path`; bpy requires Blender |
+| LLM Semantic Matching | openai, project gpt_config | `resolve_*` methods require `GPTclient` instance |
+| CLI | tyro | Required only for CLI entry point |
+---
+## Usage Recommendations
+- **Upright objects**: Default orientation applies; for special orientations, pass `(roll, pitch, yaw)` radians.
+- **Placing on furniture**: Use `resolve_on_instance()` to get exact name, then `insert_object(..., on_instance=resolved, place_strategy="top")`.
+- **Placing beside furniture**: Use `insert_object(..., beside_instance=resolved, beside_distance=0.5)` for floor placement near target.
+- **Deleting objects**: Use `resolve_delete_instance()` for semantic matching, then `delete_object(..., in_room=room)` for room-specific deletion.
+- **Protected instances**: Cannot delete `walls` or instances containing `floor` in their names.
+- **Continuous editing**: Use `scene_updated.urdf` as input for subsequent operations to maintain changes.

embodied_gen/skills/spatial-computing/SKILL.md ADDED Viewed

	@@ -0,0 +1,374 @@

+---
+name: spatial-computing
+description: Visualizes floorplans from URDF scene files and inserts/removes 3D assets with collision-aware placement on surfaces. Supports semantic instance matching via LLM (e.g., "put lamp on bookshelf", "delete sofa in living room"). Use when working with URDF/USD indoor scenes, floorplan visualization, object placement/deletion, or room-level scene editing.
+---
+# Floorplan & Object Placement/Deletion
+## Overview
+Parse indoor scenes from URDF, generate 2D floorplans, or place/remove 3D objects in scenes and write back to URDF/USD. After successful insertion/deletion, the corresponding file is automatically updated based on whether `urdf_path`/`usd_path` is provided.
+**When to use**: Use this skill when you need to generate floorplans from URDF, place/delete objects on specified rooms/furniture surfaces, or batch update URDF/USD files.
+> ⚠️ **USD updates require `room-cli`**: To update USD files, you **must** use `room-cli` instead of `python -m`, and specify the USD file via `--usd_path`. `room-cli` runs on Blender Python which includes the `bpy` module for OBJ→USD conversion; using `python -m` with `--usd_path` will fail with `ModuleNotFoundError: No module named 'bpy'`.
+>
+> ```bash
+> # ✅ Correct: use room-cli to update both URDF and USD
+> room-cli -m embodied_gen.skills.spatial-computing.cli.main \
+>   --urdf_path .../scene.urdf --usd_path .../scene.usdc ...
+**Smart File Naming Strategy**:
+- **Default behavior**: First operation creates `scene_updated.urdf`, subsequent operations automatically overwrite it
+- **No file bloat**: Prevents `*_updated_updated.urdf` files from accumulating
+- **Safe**: Original `scene.urdf` is never modified unless explicitly requested
+- **Works for both insert and delete**: Seamless continuous scene editing
+---
+## Best Practices & Constraints
+### 1. Workflow for Continuous Scene Editing
+**Recommended workflow** for multiple insert/delete operations:
+```bash
+# Step 1: View current scene
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --list_instances
+# Step 2: First insert → creates scene_updated.urdf
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --asset_path .../apple.obj --instance_key apple_1
+# Step 3: Second insert → overwrites scene_updated.urdf
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene_updated.urdf \
+  --asset_path .../lamp.obj --instance_key lamp_1
+# Step 4: Delete operation → overwrites scene_updated.urdf
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene_updated.urdf \
+  --delete_instance apple_1
+```
+**Key benefits**:
+- ✅ No multiple `*_updated_updated.urdf` files
+- ✅ Original file `scene.urdf` always preserved
+- ✅ Continuous insert/delete operations are seamless
+**Result**: Clean workflow with only two files:
+- `scene.urdf` (original, untouched)
+- `scene_updated.urdf` (final state)
+### 2. When to Use Different Strategies
+| Strategy | Use Case | Example |
+|----------|----------|---------|
+| **suffix** (default) | Standard workflow, continuous editing | Most scenarios |
+| **timestamp** | Version tracking, backup before risky changes | `scene_20260311_180235.urdf` |
+| **overwrite** | Confident single operation, no backup needed | Automated pipelines |
+### 3. Performance Optimization: Batch Insert
+**Problem**: CLI commands re-parse URDF and process all meshes on every call, leading to slow performance when inserting multiple objects.
+**Solution**: Use `--batch_insert_config` with JSON config for 3-4x speedup:
+**Step 1**: Create JSON config file (`batch_chairs.json`):
+```json
+[
+    {
+        "asset_path": "path/to/chair1.obj",
+        "instance_key": "chair_1",
+        "beside_instance": "table_dining_7178300",
+        "in_room": "dining_room_0_floor"
+    },
+    {
+        "asset_path": "path/to/chair2.obj",
+        "instance_key": "chair_2",
+        "beside_instance": "table_dining_7178300",
+        "in_room": "dining_room_0_floor"
+    },
+    {
+        "asset_path": "path/to/chair3.obj",
+        "instance_key": "chair_3",
+        "beside_instance": "table_dining_7178300",
+        "in_room": "dining_room_0_floor"
+    }
+]
+```
+**Step 2**: Run batch insertion:
+```bash
+# Update URDF only
+room-cli -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --batch_insert_config batch_chairs.json
+# Update both URDF and USD
+room-cli -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --usd_path .../scene.usdc \
+  --batch_insert_config batch_chairs.json
+```
+**JSON Config Fields**:
+- `asset_path` (required): Path to asset mesh file (.obj)
+- `instance_key` (required): Unique instance identifier
+- `beside_instance`: Place beside target instance (on floor). **Must be exact name**.
+- `on_instance`: Place on top of target instance. **Must be exact name**.
+- `in_room`: Limit placement to specified room. **Must be exact name**.
+- `beside_distance`: Max distance from target (default: 0.5m)
+- `place_strategy`: "random" or "top" (default: "random")
+> **⚠️ Batch insert does NOT support fuzzy/semantic matching.**
+> `beside_instance`, `on_instance`, and `in_room` require exact names.
+> Use `--list_instances` to get the exact instance / room names first:
+> ```bash
+> python -m embodied_gen.skills.spatial-computing.cli.main \
+>   --urdf_path .../scene.urdf --list_instances
+> ```
+**When to Use**:
+- ✅ Inserting 2+ objects at once
+- ✅ Performance-critical workflows
+- ✅ Automated scene generation pipelines
+⚠️ **Batch config file cleanup**: The JSON config file for `--batch_insert_config` is a **temporary file** and **must not** be left in the project root directory. Always:
+1. Create the JSON config in the **same directory as the target scene** (e.g., `.../House_seed5/batch_fruits.json`).
+2. **Delete the JSON config file immediately after the batch command finishes**, regardless of success or failure.
+### 3. Important Constraints
+❌ **Wrong**: Using `scene.urdf` for all operations (ignores previous changes)
+```bash
+# This will NOT see apple_1 from previous operation
+python -m ... --urdf_path scene.urdf --asset_path lamp.obj
+```
+✅ **Right**: Chain operations using `scene_updated.urdf`
+```bash
+# This WILL see apple_1 and add lamp_1
+python -m ... --urdf_path scene_updated.urdf --asset_path lamp.obj
+```
+---
+## LLM Environment (Required for Semantic Matching)
+Before using `resolve_instance_with_llm` for semantic matching in **Python**, configure the LLM API and ensure access to the interface. Prompt the user if access is unavailable.
+```bash
+# Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
+source outputs/env.sh
+```
+---
+## Core Convention: Placement/Deletion/Query Requests Must Use This Skill's Interface
+When users request "put A somewhere", "delete A", "find A", or "visualize urdf", you **must** implement it using this skill's interface:
+| User Request Example | Corresponding Parameter & Usage |
+|---------------------|---------------------------------|
+| **Put A on B** (e.g., "put lamp on bookshelf") | `on_instance` (instance name, obtained from `--list_instances`) |
+| **Put A beside B** (e.g., "put chair beside table") | `beside_instance` (instance name, obtained from `--list_instances`); placed on floor near target |
+| **Put A in a room** (e.g., "put table in living room") | `in_room` (room name, obtained from `--list_instances`) |
+| **Put A beside B in a room** (e.g., "put chair beside table in kitchen") | `beside_instance` + `in_room` |
+| **Put A on B in a room** (e.g., "put apple on table in living room") | Decomposed into "apple" and "living room" as `in_room` and `on_instance` |
+| **Delete A** (e.g., "delete lamp") | `delete_instance` (instance name or semantic description, supports fuzzy matching with LLM) |
+| **Delete A in a room** (e.g., "delete sofa in living room") | `delete_instance` + `delete_in_room` (only deletes if instance is in specified room) |
+| **Find A** (e.g., "find lamp", "where is the bed") | `query_instance` (returns center coordinates [x, y, z], supports fuzzy matching with LLM) |
+| `output_strategy` | `"suffix"` / `"timestamp"` / `"overwrite"` | File naming strategy for output files. Default is "suffix" (non-destructive). |
+| **Visualize scene.urdf** | `cli.main --urdf_path .../scene.urdf --output_path .../floorplan.png`; output_path defaults to same directory as urdf |
+- When no match is found, prompt "The object/room does not exist, please re-enter" and provide the current scene object or room list.
+- Instance names should not use the `<link name="...">` from URDF. **Recommended**: Run `--list_instances` before placement/deletion/query to view current instance name list, and select the closest semantic match.
+---
+## CLI Examples
+> **Tip**: The URDF file is typically located at `<room_folder>/urdf/export_scene/scene.urdf` (e.g., `outputs/rooms/Kitchen_seed0/urdf/export_scene/scene.urdf`).
+### Example 1: View Instance Names and Room Names in Current Scene
+```bash
+# View instance names and room names in current scene (to fill in --on_instance / --in_room)
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --list_instances
+```
+### Example 2: Visualize Floorplan Only
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png
+```
+### Example 3: Put Lamp on Bookshelf (Place on an Object)
+`--on_instance` can be filled with the instance name returned by `--list_instances` or a semantic description.
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../lamp.obj --instance_key lamp_on_bookcase --on_instance 书柜
+```
+---
+### Example 4: Put Table in Living Room (Place in a Room)
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../table.obj --instance_key table_1 \
+  --in_room living_room
+```
+---
+### Example 5: Put Apple on Table in Living Room (Room + on Object)
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../apple.obj --instance_key apple_1 \
+  --in_room living_room --on_instance table --place_strategy top
+```
+---
+### Example 7: Delete an Object (Exact Name)
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --delete_instance bed_192207
+```
+---
+### Example 8: Delete Object with Fuzzy Matching (Semantic Description)
+Requires LLM environment (see "LLM Environment" section).
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --delete_instance "沙发"
+```
+---
+### Example 9: Delete Object in Specific Room
+Only deletes the instance if it's located in the specified room.
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --delete_instance "沙发" --delete_in_room "客厅"
+```
+**Update both URDF and USD using room-cli:**
+```bash
+room-cli -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf --usd_path .../scene.usdc \
+  --output_path .../floorplan.png \
+  --delete_instance "沙发" --delete_in_room "客厅"
+```
+---
+### Example 10: Query Instance Position (Exact Name)
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --query_instance bed_192207
+```
+**Expected output**:
+```
+📍 Instance 'bed_192207' center: (-0.9250, -6.5830, 0.5000)
+```
+---
+### Example 11: Query Instance Position with Fuzzy Matching
+Requires LLM environment (see "LLM Environment" section).
+```bash
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --query_instance "床"
+```
+---
+#### **Alternative Strategies**
+**Timestamp** - Unique versioning for each operation:
+```bash
+# Output: scene_20260311_180235.urdf
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --asset_path .../apple.obj --instance_key apple_1 \
+  --output_strategy timestamp
+```
+**Overwrite** - Directly overwrite original (use with caution):
+```bash
+# Overwrites: scene.urdf
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path .../scene.urdf \
+  --asset_path .../apple.obj --instance_key apple_1 \
+  --output_strategy overwrite
+```
+---
+### Query Instance Position
+Query the center coordinates of an instance in the scene. Supports fuzzy matching with LLM.
+**CLI Interface**:
+```bash
+# Exact instance name
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path scene.urdf \
+  --query_instance bed_192207
+# Fuzzy matching (requires GPT)
+source outputs/env.sh
+python -m embodied_gen.skills.spatial-computing.cli.main \
+  --urdf_path scene.urdf \
+  --query_instance "床"
+```
+### 6. Common Parameters
+| Parameter | Meaning |
+|-----------|---------|
+| `in_room` | Limit placement to specified room |
+| `on_instance` | Place on top of specified instance; must be **exact instance name** (obtained via `resolve_instance_with_llm`) |
+| `beside_instance` | Place beside specified instance on the floor; must be **exact instance name** (obtained via `resolve_instance_with_llm`). Mutually exclusive with `on_instance` |
+| `beside_distance` | Max distance (meters) from target instance for beside placement. Default `0.5`. Increase if placement fails |
+| `place_strategy` | `"random"` random placement (default, e.g., bookshelf with 3 layers will randomly select one), `"top"` select highest surface |
+| `rotation_rpy` | Not required by default; pass (roll, pitch, yaw) radians for special orientations |
+| `delete_instance` | Instance name or semantic description to delete (supports fuzzy matching with LLM). Cannot delete protected items (walls, floors) |
+| `delete_in_room` | Optional room constraint for deletion - only delete if instance is in this room |
+| `query_instance` | Instance name or semantic description to query center coordinates (supports fuzzy matching with LLM). Returns [x, y, z] position |
+## Next Steps
+- For complete API, configuration, errors, and dependencies, see [REFERENCE.md](REFERENCE.md).

embodied_gen/skills/spatial-computing/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from .core import (
+    UrdfSemanticInfoCollector,
+    get_actionable_surface,
+    points_to_polygon,
+)
+from .core.visualizer import (
+    FloorplanVisualizer,
+)
+__all__ = [
+    "FloorplanVisualizer",
+    "UrdfSemanticInfoCollector",
+    "points_to_polygon",
+    "get_actionable_surface",
+]

embodied_gen/skills/spatial-computing/api/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
+from .floorplan_api import (
+    FloorplanManager,
+    delete_object_from_scene,
+    insert_object_to_scene,
+    query_instance_position,
+    visualize_floorplan,
+)
+__all__ = [
+    "FloorplanManager",
+    "visualize_floorplan",
+    "insert_object_to_scene",
+    "delete_object_from_scene",
+    "query_instance_position",
+    "resolve_instance_with_llm",
+]

embodied_gen/skills/spatial-computing/api/floorplan_api.py ADDED Viewed

	@@ -0,0 +1,917 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import os
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Literal
+from shapely.geometry import MultiPolygon, Polygon
+from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
+from ..core import (
+    UrdfSemanticInfoCollector,
+)
+from ..core.collector import (
+    DEFAULT_BESIDE_DISTANCE,
+    DEFAULT_IGNORE_ITEMS,
+    DEFAULT_MESH_SAMPLE_NUM,
+    DEFAULT_ROTATION_RPY,
+)
+from ..core.visualizer import (
+    FloorplanVisualizer,
+)
+# Type aliases
+Geometry = Polygon | MultiPolygon
+logger = logging.getLogger(__name__)
+@dataclass
+class FloorplanConfig:
+    """Configuration for floorplan operations."""
+    urdf_path: str
+    """Path to the input URDF scene file."""
+    output_path: str | None = None
+    """Path to save the floorplan visualization image."""
+    usd_path: str | None = None
+    """Optional path to the USD scene file for USD export."""
+    asset_path: str | None = None
+    """Optional path to the asset mesh file (.obj)."""
+    instance_key: str = "inserted_object"
+    """Unique key for the added instance."""
+    in_room: str | None = None
+    """Optional room name to constrain asset placement."""
+    on_instance: str | None = None
+    """Optional instance name to place the asset on top of (exact key from get_instance_names())."""
+    beside_instance: str | None = None
+    """Optional instance name to place the asset beside (on floor, near the target)."""
+    beside_distance: float = DEFAULT_BESIDE_DISTANCE
+    """Max distance (meters) from the target instance for beside placement."""
+    place_strategy: Literal["top", "random"] = "random"
+    """Placement strategy for the asset."""
+    rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY
+    """Rotation in roll-pitch-yaw (radians)."""
+    ignore_items: list[str] = field(
+        default_factory=lambda: list(DEFAULT_IGNORE_ITEMS)
+    )
+    """List of item name patterns to ignore during parsing."""
+    mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM
+    """Number of points to sample from meshes."""
+    max_placement_attempts: int = 2000
+    """Maximum attempts for asset placement."""
+    update_urdf: bool = True
+    """Whether to update and save the URDF file."""
+    update_usd: bool = True
+    """Whether to update and save the USD file."""
+    list_instances: bool = False
+    """If True, print instance and room names then exit (no placement/visualization)."""
+    delete_instance: str | None = None
+    """Optional instance name to delete from the scene (supports fuzzy matching with LLM)."""
+    delete_in_room: str | None = None
+    """Optional room constraint for deletion - only delete if instance is in this room."""
+    query_instance: str | None = None
+    """Optional instance name to query and return its center coordinates (supports fuzzy matching with LLM)."""
+    output_strategy: Literal["suffix", "overwrite", "timestamp"] = "suffix"
+    """File naming strategy for output files.
+    - "suffix": Add '_updated' suffix (default, non-destructive)
+    - "overwrite": Overwrite original files (use with caution)
+    - "timestamp": Add timestamp suffix (e.g., '_20260311_171500')
+    """
+    batch_insert_config: str | None = None
+    """Path to JSON config file for batch insertion (3-4x faster than multiple CLI calls).
+    JSON format example:
+    [
+        {
+            "asset_path": "path/to/chair1.obj",
+            "instance_key": "chair_1",
+            "beside_instance": "table_dining_7178300",
+            "in_room": "dining_room_0_floor"
+        },
+        {
+            "asset_path": "path/to/chair2.obj",
+            "instance_key": "chair_2",
+            "beside_instance": "table_dining_7178300",
+            "in_room": "dining_room_0_floor"
+        }
+    ]
+    """
+class FloorplanManager:
+    """High-level API for floorplan operations.
+    This class provides simplified methods for:
+    - Loading and analyzing URDF scenes
+    - Visualizing floorplans
+    - Inserting objects into scenes
+    - Updating URDF and USD files
+    Example:
+        >>> manager = FloorplanManager(urdf_path="scene.urdf", usd_path="scene.usdc")
+        >>> manager.visualize(output_path="floorplan.png")
+        >>> position = manager.insert_object(
+        ...     asset_path="chair.obj",
+        ...     instance_key="chair_1",
+        ...     in_room="kitchen"
+        ... )
+        # URDF/USD are updated automatically after insert
+    """
+    def __init__(
+        self,
+        urdf_path: str,
+        usd_path: str | None = None,
+        mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+        ignore_items: list[str] | None = None,
+        output_strategy: Literal[
+            "suffix", "overwrite", "timestamp"
+        ] = "suffix",
+    ) -> None:
+        """Initialize the floorplan manager.
+        Args:
+            urdf_path: Path to the URDF file.
+            usd_path: Optional path to the USD file for scene updates.
+            mesh_sample_num: Number of points to sample from meshes.
+            ignore_items: List of item name patterns to ignore.
+            output_strategy: File naming strategy for output files.
+        """
+        self.urdf_path = urdf_path
+        self.usd_path = usd_path
+        self.output_strategy = output_strategy
+        self.collector = UrdfSemanticInfoCollector(
+            mesh_sample_num=mesh_sample_num,
+            ignore_items=ignore_items,
+        )
+        self.collector.collect(urdf_path)
+        self.pending_instance_data: dict | None = None
+    def _get_output_path(
+        self,
+        input_path: str,
+        custom_output_path: str | None = None,
+    ) -> str:
+        """Generate output path based on the naming strategy.
+        Smart file naming strategy:
+        - "suffix" (default):
+          * If input already ends with "_updated", overwrite it (continuous operations)
+          * Otherwise, add "_updated" suffix (first operation)
+        - "timestamp": Add timestamp suffix for unique versioning
+        - "overwrite": Always overwrite the input file
+        Args:
+            input_path: Original input file path.
+            custom_output_path: Optional custom output path (highest priority).
+        Returns:
+            Generated output path based on strategy.
+        """
+        if custom_output_path is not None:
+            return custom_output_path
+        name, ext = os.path.splitext(input_path)
+        if self.output_strategy == "overwrite":
+            return input_path
+        elif self.output_strategy == "timestamp":
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            return f"{name}_{timestamp}{ext}"
+        else:  # "suffix" (default) - smart continuous operation support
+            # If input already has "_updated" suffix, overwrite it (continuous operation)
+            if name.endswith("_updated"):
+                return input_path
+            # Otherwise, add "_updated" suffix (first operation)
+            else:
+                return f"{name}_updated{ext}"
+    def visualize(
+        self,
+        output_path: str,
+    ) -> None:
+        """Generate and save a floorplan visualization.
+        Args:
+            output_path: Path to save the output image.
+        """
+        FloorplanVisualizer.plot(
+            self.collector.rooms,
+            self.collector.footprints,
+            self.collector.occ_area,
+            output_path,
+        )
+        logger.info(f"✅ Floorplan visualization saved to {output_path}")
+    def insert_object(
+        self,
+        asset_path: str,
+        instance_key: str,
+        in_room: str | None = None,
+        on_instance: str | None = None,
+        beside_instance: str | None = None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        n_max_attempt: int = 2000,
+        place_strategy: Literal["top", "random"] = "random",
+    ) -> list[float] | None:
+        """Insert an object into the scene with automatic placement.
+        Args:
+            asset_path: Path to the asset mesh file (.obj).
+            instance_key: Unique key for the new instance.
+            in_room: Optional room name to constrain placement.
+            on_instance: Optional instance name to place on top of.
+            beside_instance: Optional instance name to place beside (on floor).
+            beside_distance: Max distance from target for beside placement.
+            rotation_rpy: Initial rotation in roll-pitch-yaw.
+            n_max_attempt: Maximum placement attempts.
+            place_strategy: Either "top" or "random".
+        Returns:
+            List [x, y, z] of the placed instance center, or None if failed.
+        """
+        center = self.collector.add_instance(
+            asset_path=asset_path,
+            instance_key=instance_key,
+            in_room=in_room,
+            on_instance=on_instance,
+            beside_instance=beside_instance,
+            beside_distance=beside_distance,
+            rotation_rpy=rotation_rpy,
+            n_max_attempt=n_max_attempt,
+            place_strategy=place_strategy,
+        )
+        if center is not None:
+            self.pending_instance_data = {
+                "asset_path": asset_path,
+                "instance_key": instance_key,
+                "center": center,
+                "rotation_rpy": rotation_rpy,
+            }
+            self.update_scene()
+        return center
+    def batch_insert_objects(
+        self,
+        objects: list[dict],
+        defer_update: bool = False,
+    ) -> list[list[float] | None]:
+        """Batch insert multiple objects into the scene efficiently.
+        Args:
+            objects: List of object configs, each containing:
+                asset_path: Path to the asset mesh file (.obj).
+                instance_key: Unique key for the new instance.
+                in_room: Optional room name to constrain placement.
+                on_instance: Optional instance name to place on top of.
+                beside_instance: Optional instance name to place beside.
+                beside_distance: Max distance from target (default: 0.5m).
+                rotation_rpy: Initial rotation (default: (0, 0, 0)).
+                place_strategy: Either "top" or "random" (default: "random").
+            defer_update: If True, don't update URDF/USD after each
+                insertion. Useful when inserting many objects at once.
+        Returns:
+            List of centers [x, y, z] for each inserted object,
+            or None if failed.
+        Example:
+            >>> objects = [
+            ...     {"asset_path": "chair1.obj",
+            ...      "instance_key": "chair_1",
+            ...      "beside_instance": "table"},
+            ... ]
+            >>> centers = manager.batch_insert_objects(objects)
+        """
+        centers = []
+        usd_source = self.usd_path
+        for i, obj_config in enumerate(objects, 1):
+            logger.info(
+                f"[{i}/{len(objects)}] Inserting '{obj_config.get('instance_key', 'unknown')}'..."
+            )
+            center = self.collector.add_instance(
+                asset_path=obj_config["asset_path"],
+                instance_key=obj_config["instance_key"],
+                in_room=obj_config.get("in_room"),
+                on_instance=obj_config.get("on_instance"),
+                beside_instance=obj_config.get("beside_instance"),
+                beside_distance=obj_config.get(
+                    "beside_distance", DEFAULT_BESIDE_DISTANCE
+                ),
+                rotation_rpy=obj_config.get(
+                    "rotation_rpy", DEFAULT_ROTATION_RPY
+                ),
+                n_max_attempt=obj_config.get("n_max_attempt", 2000),
+                place_strategy=obj_config.get("place_strategy", "random"),
+            )
+            if center is not None:
+                # Store instance data for later update
+                collision_path = obj_config["asset_path"].replace(
+                    ".obj", "_collision.obj"
+                )
+                if not os.path.exists(collision_path):
+                    collision_path = None
+                # Update URDF incrementally
+                if self.urdf_path and not defer_update:
+                    urdf_out = self._get_output_path(self.urdf_path)
+                    self.collector.update_urdf_info(
+                        output_path=urdf_out,
+                        instance_key=obj_config["instance_key"],
+                        visual_mesh_path=obj_config["asset_path"],
+                        collision_mesh_path=collision_path,
+                        trans_xyz=tuple(center),
+                        rot_rpy=obj_config.get(
+                            "rotation_rpy", DEFAULT_ROTATION_RPY
+                        ),
+                        joint_type="fixed",
+                    )
+                # Update USD incrementally
+                if self.usd_path and not defer_update:
+                    usd_out = self._get_output_path(self.usd_path)
+                    self.collector.update_usd_info(
+                        usd_path=usd_source,
+                        output_path=usd_out,
+                        instance_key=obj_config["instance_key"],
+                        visual_mesh_path=obj_config["asset_path"],
+                        trans_xyz=center,
+                        rot_rpy=obj_config.get(
+                            "rotation_rpy", DEFAULT_ROTATION_RPY
+                        ),
+                    )
+                    usd_source = usd_out
+                logger.info(f"✅ Placed at {center}")
+            else:
+                logger.warning(f"❌ Failed to place object")
+            centers.append(center)
+        return centers
+    def update_scene(
+        self,
+        urdf_output_path: str | None = None,
+        usd_output_path: str | None = None,
+    ) -> None:
+        """Update URDF and/or USD with inserted instances.
+        Updates URDF if self.urdf_path is set, USD if self.usd_path is set.
+        Both are updated when both paths are set. No-op when no instance was inserted.
+        Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
+        Args:
+            urdf_output_path: Optional custom path for URDF output.
+            usd_output_path: Optional custom path for USD output.
+        Raises:
+            ValueError: If no instance has been inserted.
+        """
+        if self.pending_instance_data is None:
+            raise ValueError(
+                "No instance to update. Call insert_object() first."
+            )
+        data = self.pending_instance_data
+        collision_path = data["asset_path"].replace(".obj", "_collision.obj")
+        if not os.path.exists(collision_path):
+            collision_path = None
+        if self.urdf_path:
+            urdf_out = self._get_output_path(self.urdf_path, urdf_output_path)
+            self.collector.update_urdf_info(
+                output_path=urdf_out,
+                instance_key=data["instance_key"],
+                visual_mesh_path=data["asset_path"],
+                collision_mesh_path=collision_path,
+                trans_xyz=tuple(data["center"]),
+                rot_rpy=data["rotation_rpy"],
+                joint_type="fixed",
+            )
+        if self.usd_path:
+            usd_out = self._get_output_path(self.usd_path, usd_output_path)
+            self.collector.update_usd_info(
+                usd_path=self.usd_path,
+                output_path=usd_out,
+                instance_key=data["instance_key"],
+                visual_mesh_path=data["asset_path"],
+                trans_xyz=data["center"],
+                rot_rpy=data["rotation_rpy"],
+            )
+    def delete_object(
+        self,
+        instance_key: str,
+        in_room: str | None = None,
+        urdf_output_path: str | None = None,
+        usd_output_path: str | None = None,
+    ) -> bool:
+        """Delete an object from the scene.
+        Args:
+            instance_key: Exact instance name to delete.
+            in_room: Optional room constraint - only delete if instance is in this room.
+            urdf_output_path: Optional custom path for URDF output.
+            usd_output_path: Optional custom path for USD output.
+        Returns:
+            True if deletion succeeded, False otherwise.
+        """
+        success = self.collector.remove_instance(
+            instance_key=instance_key,
+            in_room=in_room,
+        )
+        if success:
+            # Update URDF
+            if self.urdf_path:
+                urdf_out = self._get_output_path(
+                    self.urdf_path, urdf_output_path
+                )
+                self.collector.save_urdf(urdf_out)
+            # Update USD
+            if self.usd_path:
+                usd_out = self._get_output_path(self.usd_path, usd_output_path)
+                self.collector.remove_usd_instance(
+                    usd_path=self.usd_path,
+                    output_path=usd_out,
+                    instance_key=instance_key,
+                )
+        return success
+    def get_instance_names(self) -> list[str]:
+        """Get list of instance names in the scene.
+        Returns:
+            List of instance names.
+        """
+        return [
+            k
+            for k in self.collector.instances.keys()
+            if k != "walls" and "floor" not in k.lower()
+        ]
+    def get_room_names(self) -> list[str]:
+        """Get list of room names in the scene.
+        Returns:
+            List of room names.
+        """
+        return list(self.collector.rooms.keys())
+    def get_instance_names_in_room(self, in_room: str) -> list[str]:
+        """Get instance names that are spatially inside a given room.
+        Buffers the room polygon slightly to handle mesh-sampling precision.
+        Args:
+            in_room: Exact room key (must exist in get_room_names()).
+        Returns:
+            List of instance names within the room.
+        """
+        room_poly = self.collector.rooms.get(in_room)
+        if room_poly is None:
+            return self.get_instance_names()
+        room_buffered = room_poly.buffer(0.1)
+        all_names = self.get_instance_names()
+        return [
+            k
+            for k in all_names
+            if room_buffered.contains(
+                self.collector.instances[k].representative_point()
+            )
+        ]
+    def resolve_on_instance(
+        self,
+        on_instance: str,
+        gpt_client: object | None = None,
+    ) -> str | None:
+        r"""Resolve on_instance to an exact key (for placement).
+        If on_instance is already in get_instance_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. \"柜子\", \"书柜\") to one exact instance key.
+        Args:
+            on_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
+        Returns:
+            Exact instance key, or None if not found / LLM returned NONE.
+        """
+        names = self.get_instance_names()
+        if on_instance in names:
+            return on_instance
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, on_instance  # type: ignore[arg-type]
+            )
+        return None
+    def resolve_in_room(
+        self,
+        in_room: str,
+        gpt_client: object | None = None,
+    ) -> str | None:
+        r"""Resolve in_room to an exact room name (for placement).
+        If in_room is already in get_room_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. \"kitchen\", \"the place for cooking\") to one exact room name.
+        Args:
+            in_room: Exact room name or semantic description.
+            gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
+        Returns:
+            Exact room name, or None if not found / LLM returned NONE.
+        """
+        names = self.get_room_names()
+        if in_room in names:
+            return in_room
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, in_room  # type: ignore[arg-type]
+            )
+        return None
+    def resolve_beside_instance(
+        self,
+        beside_instance: str,
+        gpt_client: object | None = None,
+        in_room: str | None = None,
+    ) -> str | None:
+        r"""Resolve beside_instance to an exact key (for beside placement).
+        If beside_instance is already in get_instance_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. "桌子", "沙发") to one exact instance key.
+        When `in_room` is given, only instances spatially inside that room are
+        considered as candidates.
+        Args:
+            beside_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve.
+            in_room: Optional resolved room key to restrict candidate scope.
+        Returns:
+            Exact instance key, or None if not found / LLM returned NONE.
+        """
+        if in_room is not None:
+            names = self.get_instance_names_in_room(in_room)
+        else:
+            names = self.get_instance_names()
+        if beside_instance in names:
+            return beside_instance
+        # Substring matching as fallback
+        query_lower = beside_instance.lower()
+        matches = [n for n in names if query_lower in n.lower()]
+        if len(matches) == 1:
+            logger.info(
+                "Substring match: '%s' -> '%s'", beside_instance, matches[0]
+            )
+            return matches[0]
+        elif len(matches) > 1:
+            logger.warning(
+                "Multiple substring matches for '%s': %s. Using '%s'. "
+                "Use exact name or LLM for better matching.",
+                beside_instance,
+                matches,
+                matches[0],
+            )
+            return matches[0]
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, beside_instance  # type: ignore[arg-type]
+            )
+        return None
+    def resolve_delete_instance(
+        self,
+        delete_instance: str,
+        gpt_client: object | None = None,
+        in_room: str | None = None,
+    ) -> str | None:
+        r"""Resolve delete_instance to an exact key (for deletion).
+        Similar to resolve_beside_instance but specifically for deletion.
+        If delete_instance is already in get_instance_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. "桌子", "沙发") to one exact instance key.
+        When `in_room` is given, only instances spatially inside that room are
+        considered as candidates.
+        Args:
+            delete_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve.
+            in_room: Optional resolved room key to restrict candidate scope.
+        Returns:
+            Exact instance key, or None if not found / LLM returned NONE.
+        """
+        if in_room is not None:
+            names = self.get_instance_names_in_room(in_room)
+        else:
+            names = self.get_instance_names()
+        if delete_instance in names:
+            return delete_instance
+        # Substring matching as fallback
+        query_lower = delete_instance.lower()
+        matches = [n for n in names if query_lower in n.lower()]
+        if len(matches) == 1:
+            logger.info(
+                "Substring match: '%s' -> '%s'", delete_instance, matches[0]
+            )
+            return matches[0]
+        elif len(matches) > 1:
+            logger.warning(
+                "Multiple substring matches for '%s': %s. Using '%s'. "
+                "Use exact name or LLM for better matching.",
+                delete_instance,
+                matches,
+                matches[0],
+            )
+            return matches[0]
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, delete_instance  # type: ignore[arg-type]
+            )
+        return None
+    def query_instance_center(
+        self,
+        instance_key: str,
+    ) -> list[float] | None:
+        """Query the center coordinates of an instance.
+        Args:
+            instance_key: Exact instance name to query.
+        Returns:
+            List [x, y, z] of the instance center, or None if not found.
+        """
+        return self.collector.get_instance_center(instance_key)
+    def resolve_and_query_instance(
+        self,
+        query_instance: str,
+        gpt_client: object | None = None,
+    ) -> tuple[str | None, list[float] | None]:
+        r"""Resolve instance name and return its center coordinates.
+        Combines fuzzy matching with coordinate query.
+        If query_instance is already in get_instance_names(), return its center.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. "床", "沙发") to one exact instance key.
+        Args:
+            query_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve.
+        Returns:
+            Tuple of (resolved_instance_name, center_coordinates), or (None, None) if not found.
+        """
+        names = self.get_instance_names()
+        if query_instance in names:
+            center = self.query_instance_center(query_instance)
+            return query_instance, center
+        # Substring matching as fallback
+        query_lower = query_instance.lower()
+        matches = [n for n in names if query_lower in n.lower()]
+        if len(matches) == 1:
+            logger.info(
+                "Substring match: '%s' -> '%s'", query_instance, matches[0]
+            )
+            center = self.query_instance_center(matches[0])
+            return matches[0], center
+        elif len(matches) > 1:
+            logger.warning(
+                "Multiple substring matches for '%s': %s. Using '%s'. "
+                "Use exact name or LLM for better matching.",
+                query_instance,
+                matches,
+                matches[0],
+            )
+            center = self.query_instance_center(matches[0])
+            return matches[0], center
+        if gpt_client is not None:
+            resolved = resolve_instance_with_llm(
+                gpt_client, names, query_instance  # type: ignore[arg-type]
+            )
+            if resolved:
+                center = self.query_instance_center(resolved)
+                return resolved, center
+        return None, None
+    def get_occupied_area(self) -> Geometry:
+        """Get the union of all occupied areas.
+        Returns:
+            Shapely geometry representing occupied areas.
+        """
+        return self.collector.occ_area
+    def get_floor_union(self) -> Geometry:
+        """Get the union of all floor areas.
+        Returns:
+            Shapely geometry representing floor areas.
+        """
+        return self.collector.floor_union
+def visualize_floorplan(
+    urdf_path: str,
+    output_path: str,
+    mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+    ignore_items: list[str] | None = None,
+) -> None:
+    """Quick function to visualize a floorplan.
+    Args:
+        urdf_path: Path to the URDF file.
+        output_path: Path to save the output image.
+        mesh_sample_num: Number of points to sample from meshes.
+        ignore_items: List of item name patterns to ignore.
+    """
+    manager = FloorplanManager(
+        urdf_path=urdf_path,
+        mesh_sample_num=mesh_sample_num,
+        ignore_items=ignore_items,
+    )
+    manager.visualize(output_path=output_path)
+def insert_object_to_scene(
+    urdf_path: str,
+    asset_path: str,
+    instance_key: str,
+    output_path: str,
+    usd_path: str | None = None,
+    in_room: str | None = None,
+    on_instance: str | None = None,
+    beside_instance: str | None = None,
+    beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+    place_strategy: Literal["top", "random"] = "random",
+    rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+) -> list[float] | None:
+    """Quick function to insert an object and generate floorplan.
+    Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
+    Args:
+        urdf_path: Path to the URDF file.
+        asset_path: Path to the asset mesh file (.obj).
+        instance_key: Unique key for the new instance.
+        output_path: Path to save the floorplan image.
+        usd_path: Optional path to the USD file (requires Blender).
+        in_room: Optional room name to constrain placement.
+        on_instance: Optional instance name to place on top of.
+        beside_instance: Optional instance name to place beside (on floor).
+        beside_distance: Max distance for beside placement (meters).
+        place_strategy: Either "top" or "random".
+        rotation_rpy: Initial rotation in roll-pitch-yaw.
+    Returns:
+        List [x, y, z] of the placed instance center, or None if failed.
+    """
+    manager = FloorplanManager(urdf_path=urdf_path, usd_path=usd_path)
+    center = manager.insert_object(
+        asset_path=asset_path,
+        instance_key=instance_key,
+        in_room=in_room,
+        on_instance=on_instance,
+        beside_instance=beside_instance,
+        beside_distance=beside_distance,
+        rotation_rpy=rotation_rpy,
+        place_strategy=place_strategy,
+    )
+    if center is not None:
+        manager.visualize(output_path=output_path)
+    return center
+def delete_object_from_scene(
+    urdf_path: str,
+    instance_key: str,
+    in_room: str | None = None,
+    output_path: str | None = None,
+) -> bool:
+    """Quick function to delete an object from scene.
+    Args:
+        urdf_path: Path to the URDF file.
+        instance_key: Exact instance name to delete.
+        in_room: Optional room constraint - only delete if instance is in this room.
+        output_path: Optional path to save the floorplan image after deletion.
+    Returns:
+        True if deletion succeeded, False otherwise.
+    """
+    manager = FloorplanManager(urdf_path=urdf_path)
+    success = manager.delete_object(
+        instance_key=instance_key,
+        in_room=in_room,
+    )
+    if success and output_path is not None:
+        manager.visualize(output_path=output_path)
+    return success
+def query_instance_position(
+    urdf_path: str,
+    instance_key: str,
+) -> list[float] | None:
+    """Quick function to query instance center coordinates.
+    Args:
+        urdf_path: Path to the URDF file.
+        instance_key: Exact instance name to query.
+    Returns:
+        List [x, y, z] of the instance center, or None if not found.
+    """
+    manager = FloorplanManager(urdf_path=urdf_path)
+    return manager.query_instance_center(instance_key)

embodied_gen/skills/spatial-computing/cli/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Command-line interface for floorplan operations.
+This package provides CLI utilities for floorplan visualization and
+scene manipulation.
+"""
+__all__: list[str] = []

embodied_gen/skills/spatial-computing/cli/main.py ADDED Viewed

	@@ -0,0 +1,267 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import json
+import logging
+import sys
+import warnings
+import tyro
+from ..api.floorplan_api import (
+    FloorplanConfig,
+    FloorplanManager,
+)
+warnings.filterwarnings("ignore", category=RuntimeWarning)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+    force=True,
+)
+logger = logging.getLogger(__name__)
+def _get_gpt_client() -> object | None:
+    """Lazy-import GPT_CLIENT for semantic --on_instance resolution."""
+    try:
+        from embodied_gen.utils.gpt_clients import GPT_CLIENT
+        return GPT_CLIENT
+    except Exception:
+        return None
+def entrypoint(cfg: FloorplanConfig) -> None:
+    """Main entry point for floorplan visualization and scene manipulation.
+    Args:
+        cfg: Configuration object with all parameters.
+    """
+    manager = FloorplanManager(
+        urdf_path=cfg.urdf_path,
+        usd_path=cfg.usd_path,
+        mesh_sample_num=cfg.mesh_sample_num,
+        ignore_items=cfg.ignore_items,
+        output_strategy=cfg.output_strategy,
+    )
+    # List instances/rooms and exit if requested
+    if cfg.list_instances:
+        names = manager.get_instance_names()
+        rooms = manager.get_room_names()
+        logger.info(f"instance_names: {names}")
+        logger.info(f"room_names: {rooms}")
+        return
+    # Batch insertion
+    if cfg.batch_insert_config is not None:
+        logger.info(
+            f"Loading batch insert config from {cfg.batch_insert_config}"
+        )
+        with open(cfg.batch_insert_config, 'r') as f:
+            objects = json.load(f)
+        logger.info(f"Batch inserting {len(objects)} objects...")
+        centers = manager.batch_insert_objects(objects)
+        success_count = len([c for c in centers if c is not None])
+        logger.info(
+            f"✅ Successfully placed {success_count}/{len(objects)} objects"
+        )
+        if success_count < len(objects):
+            logger.warning(
+                f"⚠️  Failed to place {len(objects) - success_count} objects"
+            )
+            sys.exit(1)
+        # Generate floorplan visualization if requested
+        if cfg.output_path is not None:
+            manager.visualize(output_path=cfg.output_path)
+        return
+    gpt_client = _get_gpt_client()
+    on_instance = cfg.on_instance
+    if on_instance is not None:
+        resolved = manager.resolve_on_instance(
+            on_instance, gpt_client=gpt_client
+        )
+        if resolved is None:
+            logger.error(
+                "No object matched \"%s\"。Current scene instance name: %s。",
+                on_instance,
+                manager.get_instance_names(),
+            )
+            sys.exit(1)
+        on_instance = resolved
+        if resolved != cfg.on_instance:
+            logger.info("\"%s\" -> \"%s\"", cfg.on_instance, resolved)
+    in_room = cfg.in_room
+    if in_room is not None:
+        resolved = manager.resolve_in_room(in_room, gpt_client=gpt_client)
+        if resolved is None:
+            logger.error(
+                "No room matched \"%s\"。Current scene room names: %s。",
+                in_room,
+                manager.get_room_names(),
+            )
+            sys.exit(1)
+        in_room = resolved
+        if resolved != cfg.in_room:
+            logger.info("\"%s\" -> \"%s\"", cfg.in_room, resolved)
+    beside_instance = cfg.beside_instance
+    if beside_instance is not None:
+        resolved = manager.resolve_beside_instance(
+            beside_instance, gpt_client=gpt_client, in_room=in_room
+        )
+        if resolved is None:
+            candidates = (
+                manager.get_instance_names_in_room(in_room)
+                if in_room
+                else manager.get_instance_names()
+            )
+            logger.error(
+                "No object matched \"%s\"。Current %sinstance name: %s。",
+                beside_instance,
+                f"room '{in_room}' " if in_room else "",
+                candidates,
+            )
+            sys.exit(1)
+        beside_instance = resolved
+        if resolved != cfg.beside_instance:
+            logger.info("\"%s\" -> \"%s\"", cfg.beside_instance, resolved)
+    delete_instance = cfg.delete_instance
+    delete_in_room = cfg.delete_in_room
+    if delete_instance is not None:
+        # Resolve room constraint if provided
+        if delete_in_room is not None:
+            resolved_room = manager.resolve_in_room(
+                delete_in_room, gpt_client=gpt_client
+            )
+            if resolved_room is None:
+                logger.error(
+                    "No room matched \"%s\"。Current scene room names: %s。",
+                    delete_in_room,
+                    manager.get_room_names(),
+                )
+                sys.exit(1)
+            delete_in_room = resolved_room
+            if resolved_room != cfg.delete_in_room:
+                logger.info(
+                    "\"%s\" -> \"%s\"", cfg.delete_in_room, resolved_room
+                )
+        # Resolve delete_instance with fuzzy matching
+        resolved = manager.resolve_delete_instance(
+            delete_instance, gpt_client=gpt_client, in_room=delete_in_room
+        )
+        if resolved is None:
+            candidates = (
+                manager.get_instance_names_in_room(delete_in_room)
+                if delete_in_room
+                else manager.get_instance_names()
+            )
+            logger.error(
+                "No object matched \"%s\"。Current %sinstance name: %s。",
+                delete_instance,
+                f"room '{delete_in_room}' " if delete_in_room else "",
+                candidates,
+            )
+            sys.exit(1)
+        delete_instance = resolved
+        if resolved != cfg.delete_instance:
+            logger.info("\"%s\" -> \"%s\"", cfg.delete_instance, resolved)
+        # Perform deletion
+        success = manager.delete_object(
+            instance_key=delete_instance,
+            in_room=delete_in_room,
+        )
+        if success:
+            logger.info(
+                f"✅ Successfully deleted '{delete_instance}' from scene."
+            )
+        else:
+            logger.error(f"❌ Failed to delete '{delete_instance}'.")
+            sys.exit(1)
+    # Query instance position
+    query_instance = cfg.query_instance
+    if query_instance is not None:
+        # Resolve instance with fuzzy matching
+        resolved_name, center = manager.resolve_and_query_instance(
+            query_instance, gpt_client=gpt_client
+        )
+        if resolved_name is None or center is None:
+            logger.error(
+                "No object matched \"%s\"。Current instance names: %s。",
+                query_instance,
+                manager.get_instance_names(),
+            )
+            sys.exit(1)
+        if resolved_name != query_instance:
+            logger.info("\"%s\" -> \"%s\"", query_instance, resolved_name)
+        logger.info(
+            f"📍 Instance '{resolved_name}' center: "
+            f"({center[0]:.4f}, {center[1]:.4f}, {center[2]:.4f})"
+        )
+    # Add asset instance if specified
+    center = None
+    if cfg.asset_path is not None:
+        center = manager.insert_object(
+            asset_path=cfg.asset_path,
+            instance_key=cfg.instance_key,
+            in_room=in_room,
+            on_instance=on_instance,
+            beside_instance=beside_instance,
+            beside_distance=cfg.beside_distance,
+            rotation_rpy=cfg.rotation_rpy,
+            n_max_attempt=cfg.max_placement_attempts,
+            place_strategy=cfg.place_strategy,
+        )
+        if center is not None:
+            logger.info(
+                f"Successfully placed '{cfg.instance_key}' at "
+                f"({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})"
+            )
+        else:
+            logger.error(
+                f"❌ Failed to place '{cfg.instance_key}' in the scene."
+            )
+            sys.exit(1)
+    # Generate floorplan visualization
+    if cfg.output_path is not None:
+        manager.visualize(output_path=cfg.output_path)
+if __name__ == "__main__":
+    config = tyro.cli(FloorplanConfig)
+    entrypoint(config)

embodied_gen/skills/spatial-computing/core/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Floorplan skill core modules.
+This package provides core functionality for floorplan visualization
+and object placement in 3D indoor scenes.
+"""
+from .collector import (
+    UrdfSemanticInfoCollector,
+)
+from .geometry import (
+    get_actionable_surface,
+    points_to_polygon,
+)
+from .visualizer import (
+    FloorplanVisualizer,
+)
+__all__ = [
+    "FloorplanVisualizer",
+    "UrdfSemanticInfoCollector",
+    "points_to_polygon",
+    "get_actionable_surface",
+]

embodied_gen/skills/spatial-computing/core/collector.py ADDED Viewed

	@@ -0,0 +1,1102 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import os
+import random
+import re
+import xml.etree.ElementTree as ET
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from shutil import copy2, copytree
+from typing import Literal
+import numpy as np
+import trimesh
+from scipy.spatial.transform import Rotation as R
+from shapely.affinity import translate
+from shapely.geometry import MultiPolygon, Polygon
+from shapely.ops import unary_union
+from .geometry import (
+    DEFAULT_MESH_SAMPLE_NUM,
+    get_actionable_surface,
+    points_to_polygon,
+)
+logger = logging.getLogger(__name__)
+# Type aliases
+Geometry = Polygon | MultiPolygon
+# Constants
+DEFAULT_ROTATION_RPY = (1.57, 0.0, 0.0)
+DEFAULT_MAX_PLACEMENT_ATTEMPTS = 3000
+DEFAULT_IGNORE_ITEMS = ("ceiling", "light", "exterior")
+DEFAULT_BESIDE_DISTANCE = 0.5
+DEFAULT_Z_OFFSET = 0.001
+def _load_mesh_to_poly(
+    mesh_path: str,
+    xyz: np.ndarray,
+    rpy: np.ndarray,
+    mesh_sample_num: int,
+    use_exact_projection: bool = False,
+) -> Polygon:
+    """Load mesh and convert to 2D footprint polygon (process-safe).
+    Standalone function for use with ProcessPoolExecutor.
+    """
+    if not os.path.exists(mesh_path):
+        return Polygon()
+    mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
+    matrix = np.eye(4)
+    matrix[:3, :3] = R.from_euler("xyz", rpy).as_matrix()
+    matrix[:3, 3] = xyz
+    mesh.apply_transform(matrix)
+    if use_exact_projection:
+        triangle_polys = []
+        for triangle in mesh.triangles[:, :, :2]:
+            poly = Polygon(triangle)
+            if poly.is_valid and poly.area > 1e-8:
+                triangle_polys.append(poly)
+        if triangle_polys:
+            projected_poly = unary_union(triangle_polys).buffer(0)
+            if not projected_poly.is_empty:
+                return projected_poly
+    verts = np.asarray(mesh.sample(mesh_sample_num))[:, :2]
+    return points_to_polygon(verts)
+class UrdfSemanticInfoCollector:
+    """Collector for URDF semantic information.
+    Parses URDF files to extract room layouts, object footprints, and
+    provides methods for adding new instances and updating URDF/USD files.
+    Attributes:
+        mesh_sample_num: Number of points to sample from meshes.
+        ignore_items: List of item name patterns to ignore.
+        instances: Dictionary of instance name to footprint polygon.
+        instance_meta: Dictionary of instance metadata (mesh path, pose).
+        rooms: Dictionary of room polygons.
+        footprints: Dictionary of object footprints.
+        occ_area: Union of all occupied areas.
+        floor_union: Union of all floor polygons.
+    """
+    def __init__(
+        self,
+        mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+        ignore_items: list[str] | None = None,
+    ) -> None:
+        """Initialize the collector.
+        Args:
+            mesh_sample_num: Number of points to sample from meshes.
+            ignore_items: List of item name patterns to ignore during parsing.
+        """
+        self.mesh_sample_num = mesh_sample_num
+        self.ignore_items = ignore_items or list(DEFAULT_IGNORE_ITEMS)
+        self.instances: dict[str, Polygon] = {}
+        self.instance_meta: dict[str, dict] = {}
+        self.rooms: dict[str, Geometry] = {}
+        self.footprints: dict[str, Geometry] = {}
+        self.occ_area: Geometry = Polygon()
+        self.floor_union: Geometry = Polygon()
+        self.urdf_path: str = ""
+        self._tree: ET.ElementTree | None = None
+        self._root: ET.Element | None = None
+    def _get_transform(
+        self,
+        joint_elem: ET.Element,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Extract transform (xyz, rpy) from a joint element.
+        Args:
+            joint_elem: XML Element representing a URDF joint.
+        Returns:
+            Tuple of (xyz, rpy) arrays.
+        """
+        origin = joint_elem.find("origin")
+        if origin is not None:
+            xyz = np.fromstring(origin.attrib.get("xyz", "0 0 0"), sep=" ")
+            rpy = np.fromstring(origin.attrib.get("rpy", "0 0 0"), sep=" ")
+        else:
+            xyz, rpy = np.zeros(3), np.zeros(3)
+        return xyz, rpy
+    def collect(self, urdf_path: str) -> None:
+        """Parse URDF file and collect semantic information.
+        Args:
+            urdf_path: Path to the URDF file.
+        """
+        logger.info(f"Collecting URDF semantic info from {urdf_path}")
+        self.urdf_path = urdf_path
+        urdf_dir = os.path.dirname(urdf_path)
+        self._tree = ET.parse(urdf_path)
+        self._root = self._tree.getroot()
+        link_transforms = self._build_link_transforms()
+        self._process_links(urdf_dir, link_transforms)
+        self._update_internal_state()
+    def _build_link_transforms(
+        self,
+    ) -> dict[str, tuple[np.ndarray, np.ndarray]]:
+        """Build mapping from link names to their transforms.
+        Returns:
+            Dictionary mapping link names to (xyz, rpy) tuples.
+        """
+        link_transforms: dict[str, tuple[np.ndarray, np.ndarray]] = {}
+        for joint in self._tree.findall("joint"):
+            child = joint.find("child")
+            if child is not None:
+                link_name = child.attrib["link"]
+                link_transforms[link_name] = self._get_transform(joint)
+        return link_transforms
+    def _process_links(
+        self,
+        urdf_dir: str,
+        link_transforms: dict[str, tuple[np.ndarray, np.ndarray]],
+    ) -> None:
+        """Process all links in the URDF tree (parallel).
+        Uses ProcessPoolExecutor to bypass GIL for CPU-bound mesh
+        loading and sampling.
+        Args:
+            urdf_dir: Directory containing the URDF file.
+            link_transforms: Dictionary of link transforms.
+        """
+        self.instances = {}
+        self.instance_meta = {}
+        wall_polys: list[Polygon] = []
+        # Collect tasks for parallel processing
+        tasks: list[dict] = []
+        for link in self._tree.findall("link"):
+            name = link.attrib.get("name", "").lower()
+            if any(ign in name for ign in self.ignore_items):
+                continue
+            visual = link.find("visual")
+            if visual is None:
+                continue
+            mesh_node = visual.find("geometry/mesh")
+            if mesh_node is None:
+                continue
+            mesh_path = os.path.join(urdf_dir, mesh_node.attrib["filename"])
+            default_transform = (np.zeros(3), np.zeros(3))
+            xyz, rpy = link_transforms.get(
+                link.attrib["name"], default_transform
+            )
+            tasks.append(
+                {
+                    "link_name": link.attrib["name"],
+                    "link_name_lower": name,
+                    "mesh_path": mesh_path,
+                    "xyz": xyz,
+                    "rpy": rpy,
+                }
+            )
+        logger.info(
+            "Processing %d URDF links to extract geometry "
+            "(parallel, sample_num=%d)...",
+            len(tasks),
+            self.mesh_sample_num,
+        )
+        # ProcessPoolExecutor bypasses GIL for CPU-bound trimesh ops.
+        # Cap workers to balance parallelism vs memory overhead.
+        n_workers = min(len(tasks), os.cpu_count() or 4, 8)
+        futures_map: dict = {}
+        with ProcessPoolExecutor(max_workers=n_workers) as executor:
+            for task in tasks:
+                future = executor.submit(
+                    _load_mesh_to_poly,
+                    task["mesh_path"],
+                    task["xyz"],
+                    task["rpy"],
+                    self.mesh_sample_num,
+                    "_floor" in task["link_name_lower"],
+                )
+                futures_map[future] = task
+            for future in as_completed(futures_map):
+                task = futures_map[future]
+                try:
+                    poly = future.result()
+                except Exception:
+                    logger.warning(
+                        "Failed to process link '%s', skipping.",
+                        task["link_name"],
+                        exc_info=True,
+                    )
+                    continue
+                if poly.is_empty:
+                    continue
+                if "wall" in task["link_name_lower"]:
+                    wall_polys.append(poly)
+                else:
+                    key = self._process_safe_key_robust(task["link_name"])
+                    self.instances[key] = poly
+                    self.instance_meta[key] = {
+                        "mesh_path": task["mesh_path"],
+                        "xyz": task["xyz"],
+                        "rpy": task["rpy"],
+                        "original_link_name": task["link_name"],
+                    }
+        self.instances["walls"] = unary_union(wall_polys)
+    def _update_internal_state(self) -> None:
+        """Update derived state (rooms, footprints, occupied area)."""
+        self.rooms = {
+            k: v
+            for k, v in self.instances.items()
+            if "_floor" in k.lower() and not v.is_empty
+        }
+        self.footprints = {
+            k: v
+            for k, v in self.instances.items()
+            if k != "walls"
+            and "_floor" not in k.lower()
+            and "rug" not in k.lower()
+            and not v.is_empty
+        }
+        self.occ_area = unary_union(list(self.footprints.values()))
+        self.floor_union = unary_union(list(self.rooms.values()))
+    def _process_safe_key_robust(self, name: str) -> str:
+        """Convert a link name to a safe, normalized key.
+        Args:
+            name: Original link name.
+        Returns:
+            Normalized key string.
+        """
+        if name.endswith("_floor"):
+            parts = name.split("_")
+            return "_".join(parts[:-2] + ["floor"])
+        if "Factory" in name:
+            # Handle infinigen naming convention
+            prefix = name.split("Factory")[0]
+            suffix = f"_{name.split('_')[-1]}"
+        else:
+            prefix, suffix = name, ""
+        res = prefix.replace(" ", "_")
+        res = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", res)
+        res = res.lower()
+        res = re.sub(r"_+", "_", res).strip("_ ")
+        return f"{res}{suffix}"
+    def add_instance(
+        self,
+        asset_path: str,
+        instance_key: str,
+        in_room: str | None = None,
+        on_instance: str | None = None,
+        beside_instance: str | None = None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
+        place_strategy: Literal["top", "random"] = "random",
+    ) -> list[float] | None:
+        """Add a new instance to the scene with automatic placement.
+        Args:
+            asset_path: Path to the asset mesh file.
+            instance_key: Unique key for the new instance.
+            in_room: Optional room name to constrain placement.
+            on_instance: Optional instance name to place on top of.
+            beside_instance: Optional instance name to place beside (on floor).
+            beside_distance: Initial buffer distance from the target instance
+                for beside placement (meters). Will auto-expand if needed.
+            rotation_rpy: Initial rotation in roll-pitch-yaw.
+            n_max_attempt: Maximum placement attempts.
+            place_strategy: Either "top" or "random".
+        Returns:
+            List [x, y, z] of the placed instance center, or None if failed.
+        Raises:
+            ValueError: If instance_key already exists or room/instance not found.
+        """
+        if instance_key in self.instances:
+            raise ValueError(f"Instance key '{instance_key}' already exists.")
+        room_poly = self._resolve_room_polygon(in_room)
+        # Load mesh and compute base polygon (needed for all placement modes)
+        mesh = trimesh.load(asset_path, force="mesh")
+        mesh.apply_transform(
+            trimesh.transformations.euler_matrix(*rotation_rpy, "sxyz")
+        )
+        verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
+        base_poly = points_to_polygon(verts)
+        centroid = base_poly.centroid
+        base_poly = translate(base_poly, xoff=-centroid.x, yoff=-centroid.y)
+        if beside_instance is not None:
+            placement = self._try_place_beside(
+                base_poly=base_poly,
+                beside_instance=beside_instance,
+                room_poly=room_poly,
+                beside_distance=beside_distance,
+                n_max_attempt=n_max_attempt,
+                multi_match_strategy="first",  # Default strategy
+            )
+            base_z = 0.0
+        else:
+            target_area, obstacles, base_z = self._resolve_placement_target(
+                on_instance, room_poly, place_strategy
+            )
+            if target_area.is_empty:
+                logger.error("Target area for placement is empty.")
+                return None
+            placement = self._try_place_polygon(
+                base_poly, target_area, obstacles, n_max_attempt
+            )
+        if placement is None:
+            logger.error(
+                f"Failed to place '{instance_key}' after all attempts."
+            )
+            return None
+        x, y, candidate = placement
+        self.instances[instance_key] = candidate
+        final_z = base_z - mesh.bounds[0][2] + DEFAULT_Z_OFFSET
+        self._update_internal_state()
+        return [round(v, 4) for v in (x, y, final_z)]
+    def _resolve_room_polygon(self, in_room: str | None) -> Geometry | None:
+        """Resolve room name to polygon.
+        Args:
+            in_room: Room name query string.
+        Returns:
+            Room polygon or None if not specified.
+        Raises:
+            ValueError: If room not found.
+        """
+        if in_room is None:
+            return None
+        query_room = in_room.lower()
+        room_matches = [
+            k for k in self.rooms.keys() if query_room in k.lower()
+        ]
+        if not room_matches:
+            raise ValueError(f"Room '{in_room}' not found.")
+        return unary_union([self.rooms[k] for k in room_matches])
+    def _try_place_beside(
+        self,
+        base_poly: Polygon,
+        beside_instance: str,
+        room_poly: Geometry | None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
+        max_expand_steps: int = 5,
+        expand_factor: float = 1.5,
+        multi_match_strategy: Literal["first", "random", "largest"] = "first",
+    ) -> tuple[float, float, Polygon] | None:
+        """Place object beside target with progressive distance expansion.
+        More robust than fixed-distance placement:
+        1. Ensures minimum distance accommodates the new object's size.
+        2. Pre-subtracts obstacles from the ring → sampling only in free area.
+        3. Progressively expands distance on failure (up to max_expand_steps).
+        4. Skips steps where the free area is too small for the object.
+        Args:
+            base_poly: Object footprint polygon centered at origin.
+            beside_instance: Target instance name to place beside.
+            room_poly: Optional room constraint polygon.
+            beside_distance: Initial buffer distance (meters).
+            n_max_attempt: Total max placement attempts across all steps.
+            max_expand_steps: Max number of distance expansion rounds.
+            expand_factor: Distance multiplier per expansion round.
+        Returns:
+            Tuple (x, y, placed_polygon) on success, or None if all failed.
+        Raises:
+            ValueError: If beside_instance not found in scene.
+        """
+        # --- Resolve target instance ---
+        query_obj = beside_instance.lower()
+        possible_matches = [
+            k
+            for k in self.instances.keys()
+            if query_obj in k.lower() and k != "walls"
+        ]
+        if room_poly is not None:
+            # Check that the object's representative point falls inside
+            # the room (buffered slightly for mesh-sampling tolerance).
+            room_buffered = room_poly.buffer(0.1)
+            possible_matches = [
+                k
+                for k in possible_matches
+                if room_buffered.contains(
+                    self.instances[k].representative_point()
+                )
+            ]
+        if not possible_matches:
+            location_msg = " in specified room" if room_poly else ""
+            # Log candidate distances for easier debugging
+            all_matches = [
+                k
+                for k in self.instances.keys()
+                if query_obj in k.lower() and k != "walls"
+            ]
+            if all_matches and room_poly is not None:
+                dists = {
+                    k: round(self.instances[k].distance(room_poly), 4)
+                    for k in all_matches
+                }
+                logger.error("Candidate distances to room polygon: %s", dists)
+            raise ValueError(
+                f"No instance matching '{beside_instance}' "
+                f"found{location_msg}."
+            )
+        if len(possible_matches) > 1:
+            # Apply multi-match strategy
+            if multi_match_strategy == "random":
+                target_key = random.choice(possible_matches)
+            elif multi_match_strategy == "largest":
+                target_key = max(
+                    possible_matches, key=lambda k: self.instances[k].area
+                )
+            else:  # "first"
+                target_key = possible_matches[0]
+            logger.warning(
+                f"Multiple matches for '{beside_instance}': "
+                f"{possible_matches}. Using '{target_key}' "
+                f"(strategy: {multi_match_strategy})."
+            )
+        else:
+            target_key = possible_matches[0]
+        target_footprint = self.instances[target_key]
+        floor = room_poly if room_poly is not None else self.floor_union
+        # --- Ensure initial distance accommodates the object's size ---
+        obj_bounds = base_poly.bounds  # (minx, miny, maxx, maxy)
+        obj_half_diag = (
+            np.hypot(
+                obj_bounds[2] - obj_bounds[0],
+                obj_bounds[3] - obj_bounds[1],
+            )
+            / 2.0
+        )
+        current_distance = max(beside_distance, obj_half_diag * 1.5)
+        # Budget attempts across expansion steps
+        attempts_per_step = max(n_max_attempt // (max_expand_steps + 1), 500)
+        empty_obstacle = Polygon()  # pre-created; obstacles are pre-subtracted
+        for step in range(max_expand_steps + 1):
+            # Build ring: buffer - footprint, intersected with floor
+            buffered = target_footprint.buffer(current_distance)
+            ring_area = buffered.difference(target_footprint)
+            ring_area = ring_area.intersection(floor)
+            if ring_area.is_empty:
+                logger.info(
+                    f"[beside step {step}] Ring empty at "
+                    f"{current_distance:.2f}m, expanding..."
+                )
+                current_distance *= expand_factor
+                continue
+            # Pre-subtract all obstacles → sample only from actual free area
+            free_area = ring_area.difference(self.occ_area)
+            if free_area.is_empty or free_area.area < base_poly.area * 0.5:
+                logger.info(
+                    f"[beside step {step}] Free area too small at "
+                    f"{current_distance:.2f}m "
+                    f"(free={free_area.area:.4f}, "
+                    f"need≈{base_poly.area:.4f}), expanding..."
+                )
+                current_distance *= expand_factor
+                continue
+            # Attempt placement in the free area (obstacles already removed)
+            placement = self._try_place_polygon(
+                base_poly, free_area, empty_obstacle, attempts_per_step
+            )
+            if placement is not None:
+                logger.info(
+                    f"Placed beside '{target_key}' at distance "
+                    f"{current_distance:.2f}m (step {step})"
+                )
+                return placement
+            logger.info(
+                f"[beside step {step}] Failed at {current_distance:.2f}m "
+                f"after {attempts_per_step} attempts, expanding..."
+            )
+            current_distance *= expand_factor
+        logger.error(
+            f"Failed to place beside '{target_key}' after "
+            f"{max_expand_steps + 1} expansion steps "
+            f"(final distance: {current_distance / expand_factor:.2f}m)."
+        )
+        return None
+    def _resolve_placement_target(
+        self,
+        on_instance: str | None,
+        room_poly: Geometry | None,
+        place_strategy: Literal["top", "random"],
+    ) -> tuple[Geometry, Geometry, float]:
+        """Resolve the target placement area and obstacles.
+        Args:
+            on_instance: Instance name to place on.
+            room_poly: Room polygon constraint.
+            place_strategy: Placement strategy.
+        Returns:
+            Tuple of (target_area, obstacles, base_z_height).
+        Raises:
+            ValueError: If on_instance not found.
+        """
+        if on_instance is None:
+            if room_poly is not None:
+                return room_poly, self.occ_area, 0.0
+            return self.floor_union, self.occ_area, 0.0
+        query_obj = on_instance.lower()
+        possible_matches = [
+            k
+            for k in self.instances.keys()
+            if query_obj in k.lower() and k != "walls"
+        ]
+        if room_poly is not None:
+            room_buffered = room_poly.buffer(0.1)
+            possible_matches = [
+                k
+                for k in possible_matches
+                if room_buffered.contains(
+                    self.instances[k].representative_point()
+                )
+            ]
+        if not possible_matches:
+            location_msg = f" in room '{on_instance}'" if room_poly else ""
+            raise ValueError(
+                f"No instance matching '{on_instance}' found{location_msg}."
+            )
+        if place_strategy == "random":
+            target_parent_key = random.choice(possible_matches)
+        else:
+            target_parent_key = possible_matches[0]
+        if len(possible_matches) > 1:
+            logger.warning(
+                f"Multiple matches for '{on_instance}': {possible_matches}. "
+                f"Using '{target_parent_key}'."
+            )
+        meta = self.instance_meta[target_parent_key]
+        parent_mesh = trimesh.load(meta["mesh_path"], force="mesh")
+        matrix = np.eye(4)
+        matrix[:3, :3] = R.from_euler("xyz", meta["rpy"]).as_matrix()
+        matrix[:3, 3] = meta["xyz"]
+        parent_mesh.apply_transform(matrix)
+        best_z, surface_poly = get_actionable_surface(
+            parent_mesh, place_strategy=place_strategy
+        )
+        obstacles = self.occ_area.difference(self.instances[target_parent_key])
+        # Re-add footprints of objects inside the parent polygon so they
+        # remain obstacles (difference above removes them).
+        parent_poly = self.instances[target_parent_key]
+        children_on_parent = [
+            poly
+            for key, poly in self.footprints.items()
+            if key != target_parent_key and parent_poly.contains(poly)
+        ]
+        if children_on_parent:
+            obstacles = unary_union([obstacles] + children_on_parent)
+        logger.info(f"Placing on '{target_parent_key}' (Z={best_z:.3f})")
+        return surface_poly, obstacles, best_z
+    def _try_place_polygon(
+        self,
+        base_poly: Polygon,
+        target_area: Geometry,
+        obstacles: Geometry,
+        n_max_attempt: int,
+    ) -> tuple[float, float, Polygon] | None:
+        """Try to place polygon in target area avoiding obstacles.
+        Pre-computes the free area (target minus obstacles) so that the
+        containment check alone is sufficient, avoiding redundant
+        intersection tests against obstacles on every iteration.
+        Args:
+            base_poly: Polygon to place (centered at origin).
+            target_area: Area where placement is allowed.
+            obstacles: Areas to avoid.
+            n_max_attempt: Maximum attempts.
+        Returns:
+            Tuple of (x, y, placed_polygon) or None if failed.
+        """
+        if not obstacles.is_empty:
+            free_area = target_area.difference(obstacles)
+        else:
+            free_area = target_area
+        if free_area.is_empty:
+            return None
+        minx, miny, maxx, maxy = free_area.bounds
+        for _ in range(n_max_attempt):
+            x = np.random.uniform(minx, maxx)
+            y = np.random.uniform(miny, maxy)
+            candidate = translate(base_poly, xoff=x, yoff=y)
+            if free_area.contains(candidate):
+                return x, y, candidate
+        return None
+    def update_urdf_info(
+        self,
+        output_path: str,
+        instance_key: str,
+        visual_mesh_path: str,
+        collision_mesh_path: str | None = None,
+        trans_xyz: tuple[float, float, float] = (0, 0, 0),
+        rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        joint_type: str = "fixed",
+    ) -> None:
+        """Add a new link to the URDF tree and save.
+        Args:
+            output_path: Path to save the updated URDF.
+            instance_key: Name for the new link.
+            visual_mesh_path: Path to the visual mesh file.
+            collision_mesh_path: Optional path to collision mesh.
+            trans_xyz: Translation (x, y, z).
+            rot_rpy: Rotation (roll, pitch, yaw).
+            joint_type: Type of joint (e.g., "fixed").
+        """
+        if self._root is None:
+            return
+        logger.info(f"Updating URDF for instance '{instance_key}'.")
+        urdf_dir = os.path.dirname(self.urdf_path)
+        # Copy mesh files
+        copytree(
+            os.path.dirname(visual_mesh_path),
+            f"{urdf_dir}/{instance_key}",
+            dirs_exist_ok=True,
+        )
+        visual_rel_path = (
+            f"{instance_key}/{os.path.basename(visual_mesh_path)}"
+        )
+        collision_rel_path = None
+        if collision_mesh_path is not None:
+            copytree(
+                os.path.dirname(collision_mesh_path),
+                f"{urdf_dir}/{instance_key}",
+                dirs_exist_ok=True,
+            )
+            collision_rel_path = (
+                f"{instance_key}/{os.path.basename(collision_mesh_path)}"
+            )
+        # Create link element
+        link = ET.SubElement(self._root, "link", attrib={"name": instance_key})
+        visual = ET.SubElement(link, "visual")
+        v_geo = ET.SubElement(visual, "geometry")
+        ET.SubElement(v_geo, "mesh", attrib={"filename": visual_rel_path})
+        if collision_rel_path is not None:
+            collision = ET.SubElement(link, "collision")
+            c_geo = ET.SubElement(collision, "geometry")
+            ET.SubElement(
+                c_geo, "mesh", attrib={"filename": collision_rel_path}
+            )
+        # Create joint element
+        joint_name = f"joint_{instance_key}"
+        joint = ET.SubElement(
+            self._root,
+            "joint",
+            attrib={"name": joint_name, "type": joint_type},
+        )
+        ET.SubElement(joint, "parent", attrib={"link": "base"})
+        ET.SubElement(joint, "child", attrib={"link": instance_key})
+        xyz_str = f"{trans_xyz[0]:.4f} {trans_xyz[1]:.4f} {trans_xyz[2]:.4f}"
+        rpy_str = f"{rot_rpy[0]:.4f} {rot_rpy[1]:.4f} {rot_rpy[2]:.4f}"
+        ET.SubElement(joint, "origin", attrib={"xyz": xyz_str, "rpy": rpy_str})
+        self.save_urdf(output_path)
+    def update_usd_info(
+        self,
+        usd_path: str,
+        output_path: str,
+        instance_key: str,
+        visual_mesh_path: str,
+        trans_xyz: list[float],
+        rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+    ) -> None:
+        """Add a mesh instance to an existing USD file.
+        Uses Blender (bpy) to convert OBJ to USD format.
+        Args:
+            usd_path: Path to the source USD file.
+            output_path: Path to save the modified USD.
+            instance_key: Prim path name for the new instance.
+            visual_mesh_path: Path to the visual mesh (OBJ format).
+            trans_xyz: Translation [x, y, z].
+            rot_rpy: Rotation (roll, pitch, yaw).
+        Raises:
+            ImportError: If pxr (USD) library or bpy is not available.
+        """
+        import bpy
+        from pxr import Gf, Usd, UsdGeom
+        prim_path = f"/{instance_key}"
+        out_dir = os.path.dirname(output_path)
+        target_dir = os.path.join(out_dir, instance_key)
+        os.makedirs(target_dir, exist_ok=True)
+        mesh_filename = os.path.basename(visual_mesh_path)
+        usdc_filename = os.path.splitext(mesh_filename)[0] + ".usdc"
+        target_usdc_path = os.path.join(target_dir, usdc_filename)
+        logger.info(
+            f"Converting with Blender (bpy): "
+            f"{visual_mesh_path} -> {target_usdc_path}"
+        )
+        bpy.ops.wm.read_factory_settings(use_empty=True)
+        bpy.ops.wm.obj_import(
+            filepath=visual_mesh_path,
+            forward_axis="Y",
+            up_axis="Z",
+        )
+        bpy.ops.wm.usd_export(
+            filepath=target_usdc_path,
+            selected_objects_only=False,
+        )
+        # Copy texture files
+        src_dir = os.path.dirname(visual_mesh_path)
+        for f in os.listdir(src_dir):
+            if f.lower().endswith((".png", ".jpg", ".jpeg", ".mtl")):
+                copy2(os.path.join(src_dir, f), target_dir)
+        final_rel_path = f"./{instance_key}/{usdc_filename}"
+        # Update USD stage
+        stage = Usd.Stage.Open(usd_path)
+        mesh_prim = UsdGeom.Xform.Define(stage, prim_path)
+        ref_prim = UsdGeom.Mesh.Define(stage, f"{prim_path}/Mesh")
+        ref_prim.GetPrim().GetReferences().AddReference(final_rel_path)
+        # Build transform matrix
+        translation_mat = Gf.Matrix4d().SetTranslate(
+            Gf.Vec3d(trans_xyz[0], trans_xyz[1], trans_xyz[2])
+        )
+        rx = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(1, 0, 0), np.degrees(rot_rpy[0]))
+        )
+        ry = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(0, 1, 0), np.degrees(rot_rpy[1]))
+        )
+        rz = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(0, 0, 1), np.degrees(rot_rpy[2]))
+        )
+        rotation_mat = rx * ry * rz
+        transform = rotation_mat * translation_mat
+        mesh_prim.AddTransformOp().Set(transform)
+        stage.GetRootLayer().Export(output_path)
+        logger.info(f"✅ Saved updated USD to {output_path}")
+    def remove_usd_instance(
+        self,
+        usd_path: str,
+        output_path: str,
+        instance_key: str,
+    ) -> None:
+        """Remove an instance from a USD file.
+        Args:
+            usd_path: Path to the source USD file.
+            output_path: Path to save the modified USD.
+            instance_key: Prim path name of the instance to remove.
+        Raises:
+            ImportError: If pxr (USD) library is not available.
+        """
+        from pxr import Usd
+        prim_path = f"/{instance_key}"
+        # Open USD stage
+        stage = Usd.Stage.Open(usd_path)
+        # Find and remove the prim
+        prim = stage.GetPrimAtPath(prim_path)
+        if prim.IsValid():
+            stage.RemovePrim(prim_path)
+            logger.info(f"Removed prim '{prim_path}' from USD.")
+        else:
+            logger.warning(f"Prim '{prim_path}' not found in USD stage.")
+        # Export modified stage
+        stage.GetRootLayer().Export(output_path)
+        logger.info(f"✅ Saved updated USD to {output_path}")
+    def remove_instance(
+        self,
+        instance_key: str,
+        in_room: str | None = None,
+    ) -> bool:
+        """Remove an instance from the scene.
+        Args:
+            instance_key: Exact instance name or semantic description to remove.
+            in_room: Optional room constraint - only remove if instance is in this room.
+        Returns:
+            True if instance was removed, False if not found.
+        Raises:
+            ValueError: If instance_key is a protected item (walls, floors).
+        """
+        # Protect critical items
+        protected = ["walls"] + [
+            k for k in self.instances.keys() if "floor" in k.lower()
+        ]
+        if instance_key in protected:
+            raise ValueError(
+                f"Cannot remove protected instance '{instance_key}'. "
+                f"Protected items: {protected}"
+            )
+        # Check if instance exists
+        if instance_key not in self.instances:
+            logger.warning(f"Instance '{instance_key}' not found in scene.")
+            return False
+        # Check room constraint if specified
+        if in_room is not None:
+            room_poly = self._resolve_room_polygon(in_room)
+            if room_poly is not None:
+                room_buffered = room_poly.buffer(0.1)
+                instance_point = self.instances[
+                    instance_key
+                ].representative_point()
+                if not room_buffered.contains(instance_point):
+                    logger.warning(
+                        f"Instance '{instance_key}' is not in room '{in_room}'."
+                    )
+                    return False
+        # Remove from URDF XML tree
+        if self._root is not None:
+            self._remove_link_and_joint(instance_key)
+        # Remove from instances dict
+        del self.instances[instance_key]
+        # Remove from metadata
+        if instance_key in self.instance_meta:
+            del self.instance_meta[instance_key]
+        # Update internal state
+        self._update_internal_state()
+        logger.info(f"✅ Removed instance '{instance_key}' from scene.")
+        return True
+    def _remove_link_and_joint(self, instance_key: str) -> None:
+        """Remove link and joint elements from URDF XML tree.
+        Args:
+            instance_key: Key of the instance to remove (simplified key).
+        """
+        if self._root is None:
+            return
+        # Get original link name from metadata
+        meta = self.instance_meta.get(instance_key, {})
+        original_link_name = meta.get("original_link_name", instance_key)
+        # Find and remove the link element
+        link_removed = False
+        for link in self._root.findall("link"):
+            if link.attrib.get("name") == original_link_name:
+                self._root.remove(link)
+                logger.info(f"Removed link '{original_link_name}' from URDF.")
+                link_removed = True
+                break
+        if not link_removed:
+            logger.warning(
+                f"Link '{original_link_name}' not found in URDF tree."
+            )
+        # Find and remove the joint element
+        joint_removed = False
+        for joint in self._root.findall("joint"):
+            child = joint.find("child")
+            if (
+                child is not None
+                and child.attrib.get("link") == original_link_name
+            ):
+                self._root.remove(joint)
+                logger.info(
+                    f"Removed joint for '{original_link_name}' from URDF."
+                )
+                joint_removed = True
+                break
+        if not joint_removed:
+            logger.warning(
+                f"Joint for '{original_link_name}' not found in URDF tree."
+            )
+    def get_instance_center(self, instance_key: str) -> list[float] | None:
+        """Get the center position of an instance.
+        Args:
+            instance_key: Name of the instance to query.
+        Returns:
+            List [x, y, z] of the instance center, or None if not found.
+        """
+        if instance_key not in self.instances:
+            logger.warning(f"Instance '{instance_key}' not found in scene.")
+            return None
+        # Get instance metadata
+        meta = self.instance_meta.get(instance_key, {})
+        xyz = meta.get("xyz", np.zeros(3))
+        # Get polygon centroid for 2D position
+        poly = self.instances[instance_key]
+        centroid = poly.centroid
+        # Return [x, y, z] where x,y are from polygon centroid, z from metadata
+        center = [round(centroid.x, 4), round(centroid.y, 4), round(xyz[2], 4)]
+        logger.info(f"Instance '{instance_key}' center: {center}")
+        return center
+    def save_urdf(self, output_path: str) -> None:
+        """Save the current URDF tree to file.
+        Args:
+            output_path: Path to save the URDF file.
+        """
+        if self._tree is None:
+            return
+        if hasattr(ET, "indent"):
+            ET.indent(self._tree, space="  ", level=0)
+        self._tree.write(output_path, encoding="utf-8", xml_declaration=True)
+        logger.info(f"✅ Saved updated URDF to {output_path}")

embodied_gen/skills/spatial-computing/core/geometry.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import random
+from typing import Literal
+import numpy as np
+import trimesh
+from shapely.geometry import MultiPoint, MultiPolygon, Polygon
+logger = logging.getLogger(__name__)
+# Type aliases
+Geometry = Polygon | MultiPolygon
+# Constants
+DEFAULT_MESH_SAMPLE_NUM = 10000
+DEFAULT_MAX_PLACEMENT_ATTEMPTS = 2000
+def points_to_polygon(
+    points: np.ndarray,
+    smooth_thresh: float = 0.2,
+    scanline_step: float = 0.01,
+) -> Polygon:
+    """Convert point clouds into polygon contours using sweep line algorithm.
+    Args:
+        points: Array of 2D points with shape (N, 2).
+        smooth_thresh: Buffer threshold for smoothing the polygon.
+        scanline_step: Step size for the scanline sweep.
+    Returns:
+        A Shapely Polygon representing the contour of the point cloud.
+    """
+    if len(points) == 0:
+        return Polygon()
+    ys = points[:, 1]
+    y_min, y_max = ys.min(), ys.max()
+    y_values = np.arange(y_min, y_max + scanline_step, scanline_step)
+    upper: list[list[float]] = []
+    lower: list[list[float]] = []
+    for y in y_values:
+        pts_in_strip = points[(ys >= y) & (ys < y + scanline_step)]
+        if len(pts_in_strip) == 0:
+            continue
+        xs = pts_in_strip[:, 0]
+        upper.append([xs.max(), y])
+        lower.append([xs.min(), y])
+    contour = upper + lower[::-1]
+    if len(contour) < 3:
+        return Polygon()
+    poly = Polygon(contour)
+    return poly.buffer(smooth_thresh).buffer(-smooth_thresh)
+def get_actionable_surface(
+    mesh: trimesh.Trimesh,
+    tol_angle: int = 10,
+    tol_z: float = 0.02,
+    area_tolerance: float = 0.15,
+    place_strategy: Literal["top", "random"] = "random",
+) -> tuple[float, Geometry]:
+    """Extract the actionable (placeable) surface from a mesh.
+    Finds upward-facing surfaces and returns the best one based on the
+    placement strategy.
+    Args:
+        mesh: The input trimesh object.
+        tol_angle: Angle tolerance in degrees for detecting up-facing normals.
+        tol_z: Z-coordinate tolerance for clustering faces.
+        area_tolerance: Tolerance for selecting candidate surfaces by area.
+        place_strategy: Either "top" (highest surface) or "random".
+    Returns:
+        A tuple of (z_height, surface_polygon) representing the selected
+        actionable surface.
+    """
+    up_vec = np.array([0, 0, 1])
+    dots = np.dot(mesh.face_normals, up_vec)
+    valid_mask = dots > np.cos(np.deg2rad(tol_angle))
+    if not np.any(valid_mask):
+        logger.warning(
+            "No up-facing surfaces found. Falling back to bounding box top."
+        )
+        verts = mesh.vertices[:, :2]
+        return mesh.bounds[1][2], MultiPoint(verts).convex_hull
+    valid_faces_indices = np.where(valid_mask)[0]
+    face_z = mesh.triangles_center[valid_mask][:, 2]
+    face_areas = mesh.area_faces[valid_mask]
+    z_clusters = _cluster_faces_by_z(
+        face_z, face_areas, valid_faces_indices, tol_z
+    )
+    if not z_clusters:
+        return mesh.bounds[1][2], MultiPoint(mesh.vertices[:, :2]).convex_hull
+    selected_z, selected_data = _select_surface_cluster(
+        z_clusters, area_tolerance, place_strategy
+    )
+    # For "top" strategy, use the highest z among all clusters for
+    # base height, while keeping the largest-area polygon for XY placement.
+    if place_strategy == "top":
+        highest_z = max(z_clusters.keys())
+        if highest_z > selected_z:
+            logger.info(
+                f"Overriding base Z from {selected_z:.3f} to "
+                f"highest surface {highest_z:.3f}"
+            )
+            selected_z = highest_z
+    cluster_faces = mesh.faces[selected_data["indices"]]
+    temp_mesh = trimesh.Trimesh(vertices=mesh.vertices, faces=cluster_faces)
+    samples, _ = trimesh.sample.sample_surface(temp_mesh, 10000)
+    if len(samples) < 3:
+        logger.warning(
+            f"Failed to sample enough points on layer Z={selected_z}. "
+            "Returning empty polygon."
+        )
+        return selected_z, Polygon()
+    surface_poly = MultiPoint(samples[:, :2]).convex_hull
+    return selected_z, surface_poly
+def _cluster_faces_by_z(
+    face_z: np.ndarray,
+    face_areas: np.ndarray,
+    face_indices: np.ndarray,
+    tol_z: float,
+) -> dict[float, dict]:
+    """Cluster mesh faces by their Z coordinate.
+    Args:
+        face_z: Z coordinates of face centers.
+        face_areas: Areas of each face.
+        face_indices: Original indices of the faces.
+        tol_z: Tolerance for Z clustering.
+    Returns:
+        Dictionary mapping Z values to cluster data (area and indices).
+    """
+    z_clusters: dict[float, dict] = {}
+    for i, z in enumerate(face_z):
+        key = round(z / tol_z) * tol_z
+        if key not in z_clusters:
+            z_clusters[key] = {"area": 0.0, "indices": []}
+        z_clusters[key]["area"] += face_areas[i]
+        z_clusters[key]["indices"].append(face_indices[i])
+    return z_clusters
+def _select_surface_cluster(
+    z_clusters: dict[float, dict],
+    area_tolerance: float,
+    place_strategy: Literal["top", "random"],
+) -> tuple[float, dict]:
+    """Select the best surface cluster based on strategy.
+    Args:
+        z_clusters: Dictionary of Z clusters with area and indices.
+        area_tolerance: Tolerance for candidate selection by area.
+        place_strategy: Either "top" or "random".
+    Returns:
+        Tuple of (selected_z, cluster_data).
+    """
+    max_area = max(c["area"] for c in z_clusters.values())
+    candidates = [
+        (z, data)
+        for z, data in z_clusters.items()
+        if data["area"] >= max_area * (1.0 - area_tolerance)
+    ]
+    if not candidates:
+        best_item = max(z_clusters.items(), key=lambda x: x[1]["area"])
+        candidates = [best_item]
+    if place_strategy == "random":
+        selected_z, selected_data = random.choice(candidates)
+        logger.info(
+            f"Strategy 'random': Selected Z={selected_z:.3f} "
+            f"(Area={selected_data['area']:.3f}) "
+            f"from {len(candidates)} candidates."
+        )
+    else:
+        candidates.sort(key=lambda x: x[0], reverse=True)
+        selected_z, selected_data = candidates[0]
+        logger.info(
+            f"Strategy 'top': Selected highest Z={selected_z:.3f} "
+            f"(Area={selected_data['area']:.3f})"
+        )
+    return selected_z, selected_data

embodied_gen/skills/spatial-computing/core/visualizer.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+from shapely.geometry import MultiPolygon, Polygon
+from shapely.ops import unary_union
+# Type aliases
+Geometry = Polygon | MultiPolygon
+logger = logging.getLogger(__name__)
+class FloorplanVisualizer:
+    """Static utility class for visualizing floorplans."""
+    @staticmethod
+    def draw_poly(ax: Axes, poly: Geometry, **kwargs) -> None:
+        """Draw a polygon or multi-polygon on matplotlib axes.
+        Args:
+            ax: Matplotlib axes object.
+            poly: Shapely Polygon or MultiPolygon to draw.
+            **kwargs: Additional arguments passed to ax.fill().
+        """
+        if poly.is_empty:
+            return
+        geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
+        color = kwargs.pop("color", None)
+        if color is None:
+            cmap = plt.get_cmap("tab10")
+            colors = [cmap(i) for i in range(len(geoms))]
+        else:
+            colors = [color] * len(geoms)
+        for i, p in enumerate(geoms):
+            if p.is_empty:
+                continue
+            x, y = p.exterior.xy
+            ax.fill(x, y, facecolor=colors[i], **kwargs)
+    @classmethod
+    def plot(
+        cls,
+        rooms: dict[str, Geometry],
+        footprints: dict[str, Geometry],
+        occ_area: Geometry,
+        save_path: str,
+    ) -> None:
+        """Generate and save a floorplan visualization.
+        Args:
+            rooms: Dictionary mapping room names to floor polygons.
+            footprints: Dictionary mapping object names to footprint polygons.
+            occ_area: Union of all occupied areas.
+            save_path: Path to save the output image.
+        """
+        fig, ax = plt.subplots(figsize=(10, 10))
+        ax.set_aspect("equal")
+        cmap_rooms = plt.get_cmap("Pastel1")
+        cls._draw_room_floors(ax, rooms, cmap_rooms)
+        cls._draw_occupied_area(ax, occ_area)
+        cls._draw_footprint_outlines(ax, footprints)
+        cls._draw_footprint_labels(ax, footprints)
+        cls._draw_room_labels(ax, rooms)
+        cls._configure_axes(ax, rooms, occ_area)
+        ax.set_title("")
+        ax.set_xlabel("")
+        ax.set_ylabel("")
+        ax.set_xticks([])
+        ax.set_yticks([])
+        for spine in ax.spines.values():
+            spine.set_visible(False)
+        fig.subplots_adjust(left=0, right=1, top=1, bottom=0)
+        fig.patch.set_alpha(0)
+        ax.patch.set_alpha(0)
+        plt.savefig(
+            save_path,
+            dpi=300,
+            bbox_inches="tight",
+            pad_inches=0,
+            transparent=True,
+        )
+        plt.close(fig)
+    @classmethod
+    def _draw_room_floors(
+        cls,
+        ax: Axes,
+        rooms: dict[str, Geometry],
+        cmap: plt.cm.ScalarMappable,
+    ) -> None:
+        """Draw colored room floor polygons (Layer 1)."""
+        for i, (name, poly) in enumerate(rooms.items()):
+            color = cmap(i % cmap.N)
+            cls.draw_poly(
+                ax,
+                poly,
+                color=color,
+                alpha=1.0,
+                edgecolor="black",
+                linestyle="--",
+                zorder=1,
+            )
+    @classmethod
+    def _draw_occupied_area(cls, ax: Axes, occ_area: Geometry) -> None:
+        """Draw the occupied area overlay (Layer 2)."""
+        cls.draw_poly(
+            ax,
+            occ_area,
+            color="tab:blue",
+            alpha=0.5,
+            lw=0,
+            zorder=2,
+        )
+    @staticmethod
+    def _draw_footprint_outlines(
+        ax: Axes,
+        footprints: dict[str, Geometry],
+    ) -> None:
+        """Draw footprint outlines (Layer 3)."""
+        for poly in footprints.values():
+            if poly.is_empty:
+                continue
+            geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
+            for p in geoms:
+                ax.plot(*p.exterior.xy, "--", lw=0.8, color="gray", zorder=3)
+    @staticmethod
+    def _draw_footprint_labels(
+        ax: Axes,
+        footprints: dict[str, Geometry],
+    ) -> None:
+        """Draw footprint text labels (Layer 4)."""
+        import re
+        for name, poly in footprints.items():
+            if poly.is_empty:
+                continue
+            label = re.sub(r"_\d+$", "", name)
+            ax.text(
+                poly.centroid.x,
+                poly.centroid.y,
+                label,
+                fontsize=8,
+                ha="center",
+                va="center",
+                bbox={
+                    "facecolor": "white",
+                    "alpha": 0.5,
+                    "edgecolor": "none",
+                    "pad": 0.1,
+                },
+                zorder=4,
+            )
+    @staticmethod
+    def _draw_room_labels(ax: Axes, rooms: dict[str, Geometry]) -> None:
+        """Draw room text labels (Layer 5)."""
+        for name, poly in rooms.items():
+            if poly.is_empty:
+                continue
+            label = name.replace("_floor", "")
+            ax.text(
+                poly.centroid.x,
+                poly.centroid.y,
+                label,
+                fontsize=9,
+                color="black",
+                weight="bold",
+                ha="center",
+                va="center",
+                bbox={
+                    "facecolor": "lightgray",
+                    "alpha": 0.7,
+                    "edgecolor": "black",
+                    "boxstyle": "round,pad=0.3",
+                },
+                zorder=5,
+            )
+    @staticmethod
+    def _configure_axes(
+        ax: Axes,
+        rooms: dict[str, Geometry],
+        occ_area: Geometry,
+    ) -> None:
+        """Configure axes limits and labels."""
+        total_geom = unary_union(list(rooms.values()) + [occ_area])
+        if total_geom.is_empty:
+            minx, miny, maxx, maxy = -1, -1, 1, 1
+        else:
+            minx, miny, maxx, maxy = total_geom.bounds
+        cx = (minx + maxx) * 0.5
+        cy = (miny + maxy) * 0.5
+        half = max(maxx - minx, maxy - miny) * 0.5 * 1.05
+        ax.set_xlim(cx - half, cx + half)
+        ax.set_ylim(cy - half, cy + half)
+        ax.set_title("Floorplan Analysis", fontsize=14)
+        ax.set_xlabel("X (m)")
+        ax.set_ylabel("Y (m)")

embodied_gen/utils/gpt_clients.py CHANGED Viewed

@@ -30,6 +30,7 @@ from tenacity import (
     retry,
     retry_if_not_exception_type,
     stop_after_attempt,
     wait_random_exponential,
 )
@@ -44,6 +45,7 @@ __all__ = [
 _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
 def combine_images_to_grid(
@@ -90,6 +92,7 @@ class GPTclient:
         api_version (str, optional): API version (for Azure).
         check_connection (bool, optional): Whether to check API connection.
         verbose (bool, optional): Enable verbose logging.
     Example:
         ```sh
@@ -117,21 +120,27 @@ class GPTclient:
         api_version: str = None,
         check_connection: bool = True,
         verbose: bool = False,
     ):
         if api_version is not None:
             self.client = AzureOpenAI(
                 azure_endpoint=endpoint,
                 api_key=api_key,
                 api_version=api_version,
             )
         else:
             self.client = OpenAI(
                 base_url=endpoint,
                 api_key=api_key,
             )
         self.endpoint = endpoint
         self.model_name = model_name
         self.image_formats = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
         self.verbose = verbose
         if check_connection:
@@ -142,7 +151,7 @@ class GPTclient:
     @retry(
         retry=retry_if_not_exception_type(openai.BadRequestError),
         wait=wait_random_exponential(min=1, max=10),
-        stop=stop_after_attempt(5),
     )
     def completion_with_backoff(self, **kwargs):
         """Performs a chat completion request with retry/backoff."""
@@ -253,9 +262,9 @@ class GPTclient:
                 temperature=0,
                 max_tokens=100,
             )
-            content = response.choices[0].message.content
-            logger.info(f"Connection check success.")
-        except Exception as e:
             raise ConnectionError(
                 f"Failed to connect to GPT API at {self.endpoint}, "
                 f"please check setting in `{CONFIG_FILE}` and `README`."
@@ -273,6 +282,7 @@ endpoint = os.environ.get("ENDPOINT", agent_config.get("endpoint"))
 api_key = os.environ.get("API_KEY", agent_config.get("api_key"))
 api_version = os.environ.get("API_VERSION", agent_config.get("api_version"))
 model_name = os.environ.get("MODEL_NAME", agent_config.get("model_name"))
 GPT_CLIENT = GPTclient(
     endpoint=endpoint,
@@ -280,6 +290,7 @@ GPT_CLIENT = GPTclient(
     api_version=api_version,
     model_name=model_name,
     check_connection=False,
 )

     retry,
     retry_if_not_exception_type,
     stop_after_attempt,
+    stop_after_delay,
     wait_random_exponential,
 )
 _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
+DEFAULT_GPT_TIMEOUT = float(os.environ.get("GPT_TIMEOUT", 120))
 def combine_images_to_grid(
         api_version (str, optional): API version (for Azure).
         check_connection (bool, optional): Whether to check API connection.
         verbose (bool, optional): Enable verbose logging.
+        timeout (float, optional): Max seconds for a single GPT request.
     Example:
         ```sh
         api_version: str = None,
         check_connection: bool = True,
         verbose: bool = False,
+        timeout: float = DEFAULT_GPT_TIMEOUT,
     ):
         if api_version is not None:
             self.client = AzureOpenAI(
                 azure_endpoint=endpoint,
                 api_key=api_key,
                 api_version=api_version,
+                timeout=timeout,
+                max_retries=0,
             )
         else:
             self.client = OpenAI(
                 base_url=endpoint,
                 api_key=api_key,
+                timeout=timeout,
+                max_retries=0,
             )
         self.endpoint = endpoint
         self.model_name = model_name
+        self.timeout = timeout
         self.image_formats = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"}
         self.verbose = verbose
         if check_connection:
     @retry(
         retry=retry_if_not_exception_type(openai.BadRequestError),
         wait=wait_random_exponential(min=1, max=10),
+        stop=stop_after_attempt(5) | stop_after_delay(DEFAULT_GPT_TIMEOUT),
     )
     def completion_with_backoff(self, **kwargs):
         """Performs a chat completion request with retry/backoff."""
                 temperature=0,
                 max_tokens=100,
             )
+            response.choices[0].message.content
+            logger.info("Connection check success.")
+        except Exception:
             raise ConnectionError(
                 f"Failed to connect to GPT API at {self.endpoint}, "
                 f"please check setting in `{CONFIG_FILE}` and `README`."
 api_key = os.environ.get("API_KEY", agent_config.get("api_key"))
 api_version = os.environ.get("API_VERSION", agent_config.get("api_version"))
 model_name = os.environ.get("MODEL_NAME", agent_config.get("model_name"))
+timeout = DEFAULT_GPT_TIMEOUT
 GPT_CLIENT = GPTclient(
     endpoint=endpoint,
     api_version=api_version,
     model_name=model_name,
     check_connection=False,
+    timeout=timeout,
 )

embodied_gen/utils/monkey_patch/gradio.py CHANGED Viewed

@@ -45,155 +45,14 @@ def _patch_gradio_schema_bool_bug() -> None:
 def _patch_open3d_cuda_device_count_bug() -> None:
-    """Force open3d to use CPU pybind only and skip ALL CUDA dlopen.
-    Even an empty CDLL(open3d/cuda/pybind*.so) loads libcudart into the
-    parent process, which corrupts the forked ZeroGPU worker's CUDA
-    context and silently hangs every @spaces.GPU call in spaces>=0.50.
-    Bypass the entire CUDA branch so __DEVICE_API__ stays "cpu".
-    """
-    init_path = f'{site.getsitepackages()[0]}/open3d/__init__.py'
-    with fileinput.FileInput(init_path, inplace=True) as file:
         for line in file:
             print(
                 line.replace(
-                    'if _build_config["BUILD_CUDA_MODULE"]:',
-                    'if False:  # patched by EmbodiedGen: force CPU pybind only',
                 ),
                 end='',
             )
-def _patch_spaces_zerogpu_logs() -> None:
-    """Inject stderr prints into spaces.zero internals to locate hangs.
-    Hooks (visible in HF Space logs):
-      - client.schedule ENTER       (parent, before HTTP)
-      - gradio_handler ENTER        (parent, decorator entry)
-      - worker_init ENTER           (forked worker, before unpatch)
-      - worker_init BEFORE torch.init  (forked worker)
-    Must be called BEFORE `import spaces`.
-    """
-    import site
-    sp = site.getsitepackages()[0]
-    client_py = f"{sp}/spaces/zero/client.py"
-    wrappers_py = f"{sp}/spaces/zero/wrappers.py"
-    def line(indent: int, tag: str) -> str:
-        pad = " " * indent
-        return (
-            f"{pad}import os as _o, sys as _s, time as _t; "
-            f"_s.stderr.write(f'[ZGPU-UP ' + _t.strftime('%H:%M:%S') + "
-            f"f' pid={{_o.getpid()}}] {tag}\\n'); _s.stderr.flush()\n"
-        )
-    def inject(path: str, marker: str, snippet: str) -> None:
-        text = open(path).read()
-        if snippet.strip() in text:
-            return
-        if marker not in text:
-            print(f"[zerogpu_logs] WARN marker not found in {path}: {marker[:60]!r}")
-            return
-        open(path, "w").write(text.replace(marker, snippet + marker, 1))
-    inject(
-        client_py,
-        "    if not (gradio_version := version.parse",
-        line(4, "client.schedule ENTER"),
-    )
-    inject(
-        wrappers_py,
-        "    # Immediately close file descriptors",
-        line(4, "worker_init ENTER"),
-    )
-    inject(
-        wrappers_py,
-        "            torch.init(nvidia_uuid)",
-        line(12, "worker_init BEFORE torch.init"),
-    )
-    inject(
-        wrappers_py,
-        "        if forked:\n",
-        line(8, "gradio_handler ENTER"),
-    )
-def _neutralize_warp_in_parent() -> None:
-    """Prevent NVIDIA Warp from calling cuInit() in the ZeroGPU parent.
-    Root cause of @spaces.GPU silent hangs (spaces>=0.50): kaolin imports
-    warp at module top-level. When any kaolin module triggers warp.init(),
-    Warp's `init_cuda_driver` dlopens libcuda.so + calls cuInit() in the
-    parent process. After spaces forks the worker, torch.init(nvidia_uuid)
-    in the worker hangs forever because the inherited CUDA driver state is
-    poisoned (parent never had a real GPU; ZeroGPU exposes one only post-fork).
-    Fix: stub warp.init / warp.context.runtime_init with a pid-aware no-op.
-    The parent-resident pid skips init; the forked worker (different pid)
-    runs the real init so warp keeps working inside @spaces.GPU code paths.
-    Must be called BEFORE any import that pulls kaolin (e.g. embodied_gen.data,
-    thirdparty.TRELLIS).
-    """
-    import os
-    import sys
-    try:
-        import warp  # noqa: F401  -- pure python import, no cuInit
-    except ImportError:
-        return
-    parent_pid = os.getpid()
-    def _make_pid_safe(orig):
-        def _wrapped(*args, **kwargs):
-            if os.getpid() == parent_pid:
-                sys.stderr.write(
-                    f"[warp-neutralize] skip {orig.__name__} in parent pid={parent_pid}\n"
-                )
-                sys.stderr.flush()
-                return None
-            return orig(*args, **kwargs)
-        _wrapped.__wrapped__ = orig
-        _wrapped.__name__ = getattr(orig, "__name__", "wrapped")
-        return _wrapped
-    if hasattr(warp, "init") and not hasattr(warp.init, "__wrapped__"):
-        warp.init = _make_pid_safe(warp.init)
-    try:
-        from warp import context as _wctx
-        if hasattr(_wctx, "runtime_init") and not hasattr(
-            _wctx.runtime_init, "__wrapped__"
-        ):
-            _wctx.runtime_init = _make_pid_safe(_wctx.runtime_init)
-    except Exception:
-        pass
-def _disable_xformers_flash3() -> None:
-    """Force xformers dispatcher to skip Flash-Attention v3 (Hopper-only).
-    sm_120 (Blackwell) has no FA3 kernel binary; the dispatcher still picks
-    flash3 and the launch aborts with:
-      `CUDA error ... hopper/flash_fwd_launch_template.h:188: invalid argument`
-    Env vars `XFORMERS_FLASH3_ATTENTION_DISABLED=1` are silently ignored in
-    xformers 0.0.32.post2, so we patch `not_supported_reasons` directly.
-    Cutlass and FA2 both work on sm_120, so removing flash3 from candidates
-    is enough.
-    """
-    try:
-        from xformers.ops.fmha import flash3 as _f3
-    except Exception:
-        return
-    _disabled = ["disabled by EmbodiedGen: no FA3 kernel for sm_120"]
-    def _ns(cls, d):  # noqa: ARG001
-        return list(_disabled)
-    if hasattr(_f3, "FwOp"):
-        _f3.FwOp.not_supported_reasons = classmethod(_ns)
-    if hasattr(_f3, "BwOp"):
-        _f3.BwOp.not_supported_reasons = classmethod(_ns)

 def _patch_open3d_cuda_device_count_bug() -> None:
+    """Patch open3d to avoid cuda device count bug."""
+    with fileinput.FileInput(
+        f'{site.getsitepackages()[0]}/open3d/__init__.py', inplace=True
+    ) as file:
         for line in file:
             print(
                 line.replace(
+                    '_pybind_cuda.open3d_core_cuda_device_count()', '1'
                 ),
                 end='',
             )

embodied_gen/utils/monkey_patch/infinigen.py CHANGED Viewed

@@ -119,13 +119,11 @@ def patch_doors_base_simple():
             if constants is None:
                 constants = RoomConstants()
             self.width = constants.door_width - 0.02
-            self.door_frame_style = np.random.choice(
-                ["single_column", "full_frame_square", "full_frame_dome"]
-            )
             self.door_frame_width = 0.02
-            handle_types = ["knob", "lever", "pull", "none"]
-            if self.door_frame_style != "full_frame_dome":
-                handle_types.append("bar")
             if self.door_frame_style != "single_column":
                 self.width += -0.02
                 self.height += -0.04

             if constants is None:
                 constants = RoomConstants()
             self.width = constants.door_width - 0.02
+            # Force a rectangular full frame so generated doors can close
+            # cleanly against the wall opening.
+            self.door_frame_style = "full_frame_square"
             self.door_frame_width = 0.02
+            handle_types = ["knob", "lever", "pull", "none", "bar"]
             if self.door_frame_style != "single_column":
                 self.width += -0.02
                 self.height += -0.04

embodied_gen/utils/simulation.py CHANGED Viewed

@@ -55,10 +55,19 @@ SIM_COORD_ALIGN = np.array(
 __all__ = [
     "SIM_COORD_ALIGN",
     "FrankaPandaGrasper",
     "load_assets_from_layout_file",
     "load_mani_skill_robot",
     "render_images",
-    "is_urdf_articulated",
 ]
@@ -722,6 +731,8 @@ class FrankaPandaGrasper(object):
         result[action_key] = result[action_key][::sample_ratio]
         n_step = len(result[action_key])
         actions = []
         for i in range(n_step):
             qpos = result[action_key][i]
@@ -805,10 +816,14 @@ class FrankaPandaGrasper(object):
                 gripper_state=1,
                 env_idx=env_idx,
             )
             actions.append(grasp_actions)
             close_actions = self.control_gripper(
                 gripper_state=-1,
-                env_idx=env_idx,
             )
             actions.append(close_actions)
             back_actions = self.move_to_pose(
@@ -817,6 +832,181 @@ class FrankaPandaGrasper(object):
                 gripper_state=-1,
                 env_idx=env_idx,
             )
             actions.append(back_actions)
         return np.concatenate(actions, axis=0)

 __all__ = [
     "SIM_COORD_ALIGN",
     "FrankaPandaGrasper",
+    "capture_frame",
+    "create_panda_agent",
+    "create_recording_camera",
+    "estimate_grasp_width",
+    "get_actor_bottom_z",
+    "get_actor_mesh",
+    "is_urdf_articulated",
     "load_assets_from_layout_file",
+    "load_collision_mesh_from_urdf",
     "load_mani_skill_robot",
+    "quat_from_yaw",
     "render_images",
+    "set_ground_base_color",
 ]
         result[action_key] = result[action_key][::sample_ratio]
         n_step = len(result[action_key])
+        if n_step == 0:
+            return None
         actions = []
         for i in range(n_step):
             qpos = result[action_key][i]
                 gripper_state=1,
                 env_idx=env_idx,
             )
+            if grasp_actions is None:
+                logger.warning(
+                    f"Failed to move from reach pose to grasp pose for `{actor.name}`."
+                )
+                return None
             actions.append(grasp_actions)
             close_actions = self.control_gripper(
                 gripper_state=-1,
             )
             actions.append(close_actions)
             back_actions = self.move_to_pose(
                 gripper_state=-1,
                 env_idx=env_idx,
             )
+            if back_actions is None:
+                logger.warning(
+                    f"Failed to retreat after grasping `{actor.name}`."
+                )
+                return None
             actions.append(back_actions)
         return np.concatenate(actions, axis=0)
+def load_collision_mesh_from_urdf(urdf_path: str) -> trimesh.Trimesh:
+    """Load the collision mesh referenced by a URDF in its link frame.
+    Applies the optional collision/origin transform so the returned mesh sits
+    in the same frame the simulator will use; required for correct spawn-z
+    estimation downstream.
+    """
+    root = ET.parse(urdf_path).getroot()
+    collision_mesh = root.find(".//collision/geometry/mesh")
+    if collision_mesh is None:
+        raise ValueError(f"Collision mesh not found in URDF: {urdf_path}")
+    collision_file = collision_mesh.get("filename")
+    if not collision_file:
+        raise ValueError(f"Collision mesh filename missing in {urdf_path}")
+    scale_attr = collision_mesh.get("scale", "1.0 1.0 1.0")
+    mesh_scale = np.array([float(x) for x in scale_attr.split()])
+    mesh_path = os.path.join(os.path.dirname(urdf_path), collision_file)
+    mesh = trimesh.load(mesh_path)
+    if isinstance(mesh, trimesh.Scene):
+        mesh = mesh.dump(concatenate=True)
+    mesh.apply_scale(mesh_scale)
+    collision_origin = root.find(".//collision/origin")
+    if collision_origin is not None:
+        xyz = [float(v) for v in collision_origin.get("xyz", "0 0 0").split()]
+        rpy = [float(v) for v in collision_origin.get("rpy", "0 0 0").split()]
+        transform = np.eye(4, dtype=np.float64)
+        transform[:3, :3] = R.from_euler("xyz", rpy, degrees=False).as_matrix()
+        transform[:3, 3] = np.array(xyz, dtype=np.float64)
+        mesh.apply_transform(transform)
+    return mesh
+def estimate_grasp_width(mesh: trimesh.Trimesh) -> float:
+    """Estimate a conservative top-down grasp width from OBB extents."""
+    extents = np.sort(mesh.bounding_box_oriented.extents)
+    return float(extents[1])
+def get_actor_mesh(actor: sapien.Entity) -> trimesh.Trimesh:
+    """Get the actor collision mesh in world coordinates."""
+    physx_rigid = actor.components[1]
+    mesh = get_component_mesh(physx_rigid, to_world_frame=True)
+    if mesh is None or mesh.is_empty:
+        raise ValueError(f"Actor `{actor.name}` has no valid collision mesh.")
+    return mesh
+def get_actor_bottom_z(actor: sapien.Entity) -> float:
+    """Get the actor world-space bottom z from its collision mesh."""
+    return float(get_actor_mesh(actor).bounds[0, 2])
+def quat_from_yaw(yaw_deg: float) -> list[float]:
+    """Convert z-axis yaw angle (degrees) to a SAPIEN quaternion (w,x,y,z)."""
+    yaw = np.deg2rad(yaw_deg)
+    return [float(np.cos(yaw / 2)), 0.0, 0.0, float(np.sin(yaw / 2))]
+def set_ground_base_color(scene: sapien.Scene, rgba: list[float]) -> None:
+    """Update the default ground plane material color for this scene."""
+    for actor in scene.get_all_actors():
+        if actor.name != "ground":
+            continue
+        for component in actor.components:
+            render_shapes = getattr(component, "render_shapes", None)
+            if render_shapes is None:
+                continue
+            for render_shape in render_shapes:
+                render_shape.material.set_base_color(rgba)
+        return
+    raise ValueError("Ground actor not found in the scene.")
+def capture_frame(
+    scene: sapien.Scene,
+    camera: sapien.render.RenderCameraComponent,
+) -> np.ndarray:
+    """Capture a single RGB frame from the camera (updates render first)."""
+    scene.update_render()
+    camera.take_picture()
+    return np.array(render_images(camera, ["Color"])["Color"])
+def create_recording_camera(
+    scene_manager: "SapienSceneManager",
+    eye_pos: list[float],
+    target_pt: list[float],
+    image_hw: tuple[int, int],
+    fovy_deg: float = 45.0,
+    cam_name: str = "recording_camera",
+) -> sapien.render.RenderCameraComponent:
+    """Create a camera looking from eye_pos at target_pt for video capture."""
+    eye_pos = np.array(eye_pos, dtype=np.float32)
+    target_pt = np.array(target_pt, dtype=np.float32)
+    world_up_vec = np.array([0.0, 0.0, 1.0], dtype=np.float32)
+    forward_vec = target_pt - eye_pos
+    forward_vec = forward_vec / np.linalg.norm(forward_vec)
+    temp_right_vec = np.cross(forward_vec, world_up_vec)
+    if np.linalg.norm(temp_right_vec) < 1e-6:
+        temp_right_vec = np.array([1.0, 0.0, 0.0], dtype=np.float32)
+    right_vec = temp_right_vec / np.linalg.norm(temp_right_vec)
+    up_vec = np.cross(right_vec, forward_vec)
+    rotation_matrix = np.array([forward_vec, -right_vec, up_vec]).T
+    scipy_quat = R.from_matrix(rotation_matrix).as_quat()
+    quat = [
+        float(scipy_quat[3]),
+        float(scipy_quat[0]),
+        float(scipy_quat[1]),
+        float(scipy_quat[2]),
+    ]
+    return scene_manager.create_camera(
+        cam_name,
+        pose=sapien.Pose(p=eye_pos.tolist(), q=quat),
+        image_hw=image_hw,
+        fovy_deg=fovy_deg,
+    )
+def create_panda_agent(
+    scene: sapien.Scene,
+    control_freq: int,
+    sim_backend: str,
+    render_backend: str,
+    initial_qpos: np.ndarray | None = None,
+    control_mode: str = "pd_joint_pos",
+) -> BaseAgent:
+    """Create a ManiSkill Panda agent attached to a SAPIEN scene."""
+    from mani_skill.agents import REGISTERED_AGENTS
+    from mani_skill.envs.utils.system.backend import (
+        parse_sim_and_render_backend,
+    )
+    backend = parse_sim_and_render_backend(sim_backend, render_backend)
+    ms_scene = ManiSkillScene([scene], device=sim_backend, backend=backend)
+    robot_cls = REGISTERED_AGENTS["panda"].agent_cls
+    agent = robot_cls(
+        scene=ms_scene,
+        control_freq=control_freq,
+        control_mode=control_mode,
+        initial_pose=sapien.Pose([0, 0, 0], [1, 0, 0, 0]),
+    )
+    if initial_qpos is None:
+        initial_qpos = np.array(
+            [
+                0.0,
+                np.pi / 8,
+                0.0,
+                -np.pi * 3 / 8,
+                0.0,
+                np.pi * 3 / 4,
+                np.pi / 4,
+                0.04,
+                0.04,
+            ],
+            dtype=np.float32,
+        )
+    agent.reset(initial_qpos[None, ...].copy())
+    agent.init_qpos = agent.robot.qpos
+    agent.controller.controllers["gripper"].reset()
+    return agent

embodied_gen/utils/trender.py CHANGED Viewed

@@ -49,16 +49,8 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer.rendering_options.far = options.get("far", 100)
     renderer.rendering_options.ssaa = options.get("ssaa", 4)
     rets = {}
-    import time as _time, sys as _sys
-    _renderer_t0 = _time.time()
-    _i = -1
-    for _i, (extr, intr) in enumerate(tqdm(zip(extrinsics, intrinsics), desc="Rendering")):
-        _t0 = _time.time()
         res = renderer.render(sample, extr, intr)
-        if torch.cuda.is_available():
-            torch.cuda.synchronize()
-        if _i < 3:
-            _sys.stderr.write(f"[STAGE] render_mesh frame {_i} took {_time.time()-_t0:.2f}s\n"); _sys.stderr.flush()
         if "normal" not in rets:
             rets["normal"] = []
         normal = torch.lerp(
@@ -68,7 +60,6 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
             normal.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
         ).astype(np.uint8)
         rets["normal"].append(normal)
-    _sys.stderr.write(f"[STAGE] render_mesh total {_i+1} frames took {_time.time()-_renderer_t0:.2f}s\n"); _sys.stderr.flush()
     return rets
@@ -112,22 +103,13 @@ def render_gs_frames(
     if verbose:
         iterator = tqdm(iterator, total=len(extrinsics), desc="Rendering")
-    import time as _time, sys as _sys
-    _renderer_t0 = _time.time()
-    _i = -1
-    for _i, (extr, intr) in enumerate(iterator):
-        _t0 = _time.time()
         res = renderer.render(
             sample, extr, intr, colors_overwrite=colors_overwrite
         )
-        if torch.cuda.is_available():
-            torch.cuda.synchronize()
-        if _i < 3:
-            _sys.stderr.write(f"[STAGE] render_gs frame {_i} took {_time.time()-_t0:.2f}s\n"); _sys.stderr.flush()
         outputs["color"].append(to_img(res["color"]))
         depth = res.get("percent_depth") or res.get("depth")
         outputs["depth"].append(to_numpy(depth) if depth is not None else None)
-    _sys.stderr.write(f"[STAGE] render_gs total {_i+1} frames took {_time.time()-_renderer_t0:.2f}s\n"); _sys.stderr.flush()
     return dict(outputs)

     renderer.rendering_options.far = options.get("far", 100)
     renderer.rendering_options.ssaa = options.get("ssaa", 4)
     rets = {}
+    for extr, intr in tqdm(zip(extrinsics, intrinsics), desc="Rendering"):
         res = renderer.render(sample, extr, intr)
         if "normal" not in rets:
             rets["normal"] = []
         normal = torch.lerp(
             normal.detach().cpu().numpy().transpose(1, 2, 0) * 255, 0, 255
         ).astype(np.uint8)
         rets["normal"].append(normal)
     return rets
     if verbose:
         iterator = tqdm(iterator, total=len(extrinsics), desc="Rendering")
+    for extr, intr in iterator:
         res = renderer.render(
             sample, extr, intr, colors_overwrite=colors_overwrite
         )
         outputs["color"].append(to_img(res["color"]))
         depth = res.get("percent_depth") or res.get("depth")
         outputs["depth"].append(to_numpy(depth) if depth is not None else None)
     return dict(outputs)

requirements.txt CHANGED Viewed

@@ -61,7 +61,7 @@ seaborn
 hydra-core
 modelscope
 timm
-open3d-cpu
 MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
 https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu128/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl

 hydra-core
 modelscope
 timm
+open3d
 MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
 https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu128/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl