Spaces:

HorizonRobotics
/

EmbodiedGen-Texture-Gen

Sleeping

App Files Files Community

xinjie.wang commited on Mar 2

Commit

1d3e2bd

1 Parent(s): 9b8a93e

update

Browse files

Files changed (26) hide show

app_style.py +1 -1
common.py +7 -7
embodied_gen/data/backproject_v2.py +1 -1
embodied_gen/data/backproject_v3.py +1 -1
embodied_gen/data/mesh_operator.py +1 -1
embodied_gen/models/delight_model.py +1 -1
embodied_gen/models/sr_model.py +2 -2
embodied_gen/scripts/render_gs.py +1 -1
embodied_gen/scripts/room_gen/export_scene.py +2 -9
embodied_gen/skills/spatial_computing/README.md +59 -0
embodied_gen/skills/spatial_computing/REFERENCE.md +204 -0
embodied_gen/skills/spatial_computing/SKILL.md +185 -0
embodied_gen/skills/spatial_computing/__init__.py +32 -0
embodied_gen/skills/spatial_computing/api/__init__.py +19 -0
embodied_gen/skills/spatial_computing/api/floorplan_api.py +499 -0
embodied_gen/skills/spatial_computing/cli/__init__.py +7 -0
embodied_gen/skills/spatial_computing/cli/main.py +151 -0
embodied_gen/skills/spatial_computing/core/__init__.py +23 -0
embodied_gen/skills/spatial_computing/core/collector.py +833 -0
embodied_gen/skills/spatial_computing/core/geometry.py +220 -0
embodied_gen/skills/spatial_computing/core/visualizer.py +212 -0
embodied_gen/utils/gpt_clients.py +33 -1
embodied_gen/utils/llm_resolve.py +108 -0
embodied_gen/utils/process_media.py +1 -1
embodied_gen/utils/trender.py +4 -4
requirements.txt +1 -1

app_style.py CHANGED Viewed

@@ -20,7 +20,7 @@ from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
 lighting_css = """
 <style>
 #lighter_mesh canvas {
-    filter: brightness(2.3) !important;
 }
 </style>
 """

 lighting_css = """
 <style>
 #lighter_mesh canvas {
+    filter: brightness(1) !important;
 }
 </style>
 """

common.py CHANGED Viewed

@@ -157,7 +157,7 @@ def end_session(req: gr.Request) -> None:
         shutil.rmtree(user_dir)
-@spaces.GPU
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
@@ -264,7 +264,7 @@ def select_point(
     return (image, masks), seg_image
-@spaces.GPU
 def image_to_3d(
     image: Image.Image,
     seed: int,
@@ -564,7 +564,7 @@ def extract_urdf(
     )
-@spaces.GPU
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
@@ -620,7 +620,7 @@ def text2image_fn(
     return save_paths + save_paths
-@spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
@@ -636,7 +636,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     return None, None, None
-@spaces.GPU
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
@@ -723,7 +723,7 @@ def backproject_texture(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
@@ -770,7 +770,7 @@ def backproject_texture_v2(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

         shutil.rmtree(user_dir)
+@spaces.GPU(duration=120)
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
     return (image, masks), seg_image
+@spaces.GPU(duration=300)
 def image_to_3d(
     image: Image.Image,
     seed: int,
     )
+@spaces.GPU(duration=300)
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
     return save_paths + save_paths
+@spaces.GPU(duration=120)
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     return None, None, None
+@spaces.GPU(duration=300)
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU(duration=300)
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU(duration=120)
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

embodied_gen/data/backproject_v2.py CHANGED Viewed

@@ -596,7 +596,7 @@ class TextureBacker:
         return texture
-    @spaces.GPU
     def compute_texture(
         self,
         colors: list[Image.Image],

         return texture
+    @spaces.GPU()
     def compute_texture(
         self,
         colors: list[Image.Image],

embodied_gen/data/backproject_v3.py CHANGED Viewed

@@ -425,7 +425,7 @@ def parse_args():
     return args
-@spaces.GPU
 def entrypoint(
     delight_model: DelightingModel = None,
     imagesr_model: ImageRealESRGAN = None,

     return args
+@spaces.GPU()
 def entrypoint(
     delight_model: DelightingModel = None,
     imagesr_model: ImageRealESRGAN = None,

embodied_gen/data/mesh_operator.py CHANGED Viewed

@@ -412,7 +412,7 @@ class MeshFixer(object):
             dtype=torch.int32,
         )
-    @spaces.GPU
     def __call__(
         self,
         filter_ratio: float,

             dtype=torch.int32,
         )
+    @spaces.GPU(duration=300)
     def __call__(
         self,
         filter_ratio: float,

embodied_gen/models/delight_model.py CHANGED Viewed

@@ -140,7 +140,7 @@ class DelightingModel(object):
         return new_image
-    @spaces.GPU
     @torch.no_grad()
     def __call__(
         self,

         return new_image
+    @spaces.GPU(duration=120)
     @torch.no_grad()
     def __call__(
         self,

embodied_gen/models/sr_model.py CHANGED Viewed

@@ -80,7 +80,7 @@ class ImageStableSR:
         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
-    @spaces.GPU
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
@@ -196,7 +196,7 @@ class ImageRealESRGAN:
                 half=True,
             )
-    @spaces.GPU
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         """Performs super-resolution on the input image.

         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
+    @spaces.GPU(duration=120)
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
                 half=True,
             )
+    @spaces.GPU(duration=120)
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         """Performs super-resolution on the input image.

embodied_gen/scripts/render_gs.py CHANGED Viewed

@@ -96,7 +96,7 @@ def parse_args():
     return args
-@spaces.GPU
 def entrypoint(**kwargs) -> None:
     args = parse_args()
     for k, v in kwargs.items():

     return args
+@spaces.GPU(duration=120)
 def entrypoint(**kwargs) -> None:
     args = parse_args()
     for k, v in kwargs.items():

embodied_gen/scripts/room_gen/export_scene.py CHANGED Viewed

@@ -428,7 +428,7 @@ def create_glass_shader(node_tree, export_usd):
     principled_bsdf_node.inputs["Transmission Weight"].default_value = 1
     if export_usd:
-        principled_bsdf_node.inputs["Alpha"].default_value = 0
     node_tree.links.new(
         principled_bsdf_node.outputs[0], nodes["Material Output"].inputs[0]
     )
@@ -1221,7 +1221,7 @@ def export_curr_scene(
     remove_obj_parents()
     delete_objects()
     triangulate_meshes()
-    if omniverse_export:
         split_glass_mats()
     rename_all_meshes()
@@ -1454,9 +1454,6 @@ def main(args):
     args.output_folder.mkdir(exist_ok=True)
     targets = sorted(list(args.input_folder.iterdir()))
     for blendfile in targets:
-        if blendfile.stem == "solve_state":
-            shutil.copy(blendfile, args.output_folder / "solve_state.json")
         if not blendfile.suffix == ".blend":
             print(f"Skipping non-blend file {blendfile}")
             continue
@@ -1474,10 +1471,6 @@ def main(args):
             deconvex=args.deconvex,
             center_scene=args.center_scene,
         )
-        # wanted to use shutil here but kept making corrupted files
-        subprocess.call(
-            ["zip", "-r", str(folder.with_suffix(".zip")), str(folder)]
-        )
     bpy.ops.wm.quit_blender()

     principled_bsdf_node.inputs["Transmission Weight"].default_value = 1
     if export_usd:
+        principled_bsdf_node.inputs["Alpha"].default_value = 0.6
     node_tree.links.new(
         principled_bsdf_node.outputs[0], nodes["Material Output"].inputs[0]
     )
     remove_obj_parents()
     delete_objects()
     triangulate_meshes()
+    if omniverse_export and format not in ["usda", "usdc"]:
         split_glass_mats()
     rename_all_meshes()
     args.output_folder.mkdir(exist_ok=True)
     targets = sorted(list(args.input_folder.iterdir()))
     for blendfile in targets:
         if not blendfile.suffix == ".blend":
             print(f"Skipping non-blend file {blendfile}")
             continue
             deconvex=args.deconvex,
             center_scene=args.center_scene,
         )
     bpy.ops.wm.quit_blender()

embodied_gen/skills/spatial_computing/README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+## Using with IDE Agent via Natural Language
+The Agent will automatically load this skill based on its **description** when you mention URDF, floorplan, indoor scene, object placement, etc. You only need to specify in natural language **what to do** and provide **key information like paths/room names**.
+### LLM Environment Configuration (When Using Semantic Matching)
+If you want to use natural language descriptions (e.g., "put lamp on bookshelf") instead of exact instance/room names, you need to configure the LLM environment first:
+```bash
+# If outputs/env.sh exists, source it first
+source outputs/env.sh
+```
+If access to the LLM interface is unavailable, please provide exact instance names (you can check them via `--list_instances`).
+### URDF Visualization Only (Generate Floorplan)
+**You can say:**
+- "Help me visualize `path_to/scene.urdf` or `path_to/folder_contain/scene.urdf`"
+**Agent will:** Use `visualize_floorplan(urdf_path=..., output_path=...)` or the corresponding CLI to generate the floorplan only, without modifying URDF/USD.
+### Insert Object and Update Scene (URDF, or URDF+USD)
+**You can say:**
+- "Put `chair.obj` into scene.urdf's kitchen room"
+- "Put `bottle.obj` into the URDF at `outputs/rooms/Kitchen_seed3773`, instance name bottle_1, update scene and generate floorplan"
+- "Put a cup on the table in the living room" → Agent will use `on_instance="table"`, `place_strategy="top"`, etc.
+**If you also want to update USD:**
+- "Put a chair in the kitchen, update both URDF and USD, USD path is `xxx/usd/export_scene.usdc`"
+- Note that you need to use **room-cli** to execute (this skill will prompt the Agent), because writing USD requires bpy.
+**Agent will:** Use `FloorplanManager` + `insert_object` (or `insert_object_to_scene`), execute according to the paths and room names you provided; when USD is needed, use room-cli to run the CLI.
+### View Instances and Rooms in the Scene
+Before placing objects, you can first view what instances and rooms are in the scene:
+**You can say:**
+- "Help me list all instances and room names in `.../scene.urdf`"
+**Agent will:** Execute `--list_instances` to display the instance names and room names in the current scene.
+### URDF/USD Output Notes
+- **URDF Output**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf`
+- **USD Output**: If `usd_path` is specified, the USD file will be written to `*_updated.usdc` following the same rule
+- **Only Update USD**: Requires using **room-cli** to execute, because writing USD needs Blender (bpy)
+### What Information to Provide
+| Goal | Suggested Information to Provide in Conversation |
+|------|-----------------------------------------------|
+| Visualization only | URDF path, floorplan save path (optional, Agent can default to floorplan.png in same directory) |
+| View instances/rooms | URDF path, let Agent list instance names and room names in current scene |
+| Placement + update | URDF path, object mesh path (.obj), instance name (e.g., chair_1), room name (e.g., kitchen); if placing on table, say "place on table"; if updating USD, also provide USD path and use room-cli |
+Example in one go: "Use spatial_computing skill, generate floorplan for `.../scene.urdf` and save to floorplan.png in same directory, then put `path/to/bottle.obj` into kitchen, instance name bottle_1, update URDF only."

embodied_gen/skills/spatial_computing/REFERENCE.md ADDED Viewed

	@@ -0,0 +1,204 @@

+# Floorplan Skill — API Reference
+This document provides API details, configuration items, errors, and dependencies for reference beyond the usage instructions in [SKILL.md](SKILL.md).
+## Contents
+- [Floorplan Skill — API Reference](#floorplan-skill--api-reference)
+  - [Contents](#contents)
+  - [LLM Environment Configuration](#llm-environment-configuration)
+  - [FloorplanManager](#floorplanmanager)
+    - [Constructor](#constructor)
+    - [Methods](#methods)
+  - [Convenience Functions](#convenience-functions)
+  - [CLI Features](#cli-features)
+    - [Command Line Parameters](#command-line-parameters)
+  - [Configuration and Ignore Items](#configuration-and-ignore-items)
+  - [USD and Blender](#usd-and-blender)
+  - [Errors and Return Values](#errors-and-return-values)
+  - [Dependencies](#dependencies)
+  - [Usage Recommendations](#usage-recommendations)
+---
+## LLM Environment Configuration
+Before using `resolve_instance_with_llm` or `FloorplanManager.resolve_on_instance`/`resolve_in_room` for semantic matching, configure the LLM API and ensure access to the interface.
+```bash
+# Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
+source outputs/env.sh
+```
+If access to the LLM interface is unavailable, prompt the user.
+---
+## FloorplanManager
+### Constructor
+```python
+from embodied_gen.skills.spatial_computing.api import FloorplanManager
+manager = FloorplanManager(
+    urdf_path="scene.urdf",      # Required
+    usd_path=None,               # Optional; USD write after insert if provided
+    mesh_sample_num=50000,
+    ignore_items=None,           # Default ["ceiling", "light", "exterior"]
+)
+```
+### Methods
+| Method | Description |
+|--------|-------------|
+| `visualize(output_path)` | Generate floorplan and save as image |
+| `insert_object(asset_path, instance_key, in_room=..., on_instance=..., place_strategy=..., n_max_attempt=2000, rotation_rpy=...)` | Place object, automatically write back to URDF/USD on success, return `[x,y,z]` or `None`. `on_instance` must be an exact instance name |
+| `update_scene(urdf_output_path=..., usd_output_path=...)` | Manually write back currently placed instances; generally not needed (called inside `insert_object`). Use for custom output paths |
+| `get_room_names()` | List of room names |
+| `get_instance_names()` | List of instance names (excluding walls/floor) |
+| `resolve_on_instance(on_instance, gpt_client=None)` | Resolve user description (e.g., "柜子", "书柜") to exact instance name; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
+| `resolve_in_room(in_room, gpt_client=None)` | Resolve user description to exact room name; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
+| `resolve_beside_instance(beside_instance, gpt_client=None)` | Resolve user description to exact instance name for beside placement; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
+| `get_occupied_area()` | Occupied area Shapely geometry |
+| `get_floor_union()` | Floor area union geometry |
+**Common `insert_object` parameters**: `in_room` to limit room; `on_instance` to place on top of an instance (exact instance name, can be resolved via `resolve_on_instance`); `beside_instance` to place beside an instance on the floor (exact instance name, can be resolved via `resolve_beside_instance`); `beside_distance` max distance in meters for beside placement (default 0.5); `place_strategy` is `"random"` (default) or `"top"`; `rotation_rpy` not required by default; `n_max_attempt` maximum placement attempts before failure. Note: `on_instance` and `beside_instance` are mutually exclusive.
+---
+## Convenience Functions
+| Function | Description |
+|----------|-------------|
+| `visualize_floorplan(urdf_path, output_path, mesh_sample_num=50000, ignore_items=None)` | Generate floorplan only, do not write back to scene |
+| `insert_object_to_scene(urdf_path, asset_path, instance_key, output_path, usd_path=None, in_room=None, on_instance=None, beside_instance=None, beside_distance=0.5, place_strategy="random", rotation_rpy=...)` | Create manager, place, automatically write back, generate floorplan; `on_instance` must be exact instance name; `beside_instance` places beside target on floor; returns placement center `[x,y,z]` or `None`. URDF output does not overwrite original file by default |
+| `resolve_instance_with_llm(gpt_client, instance_names, user_spec, prompt_template=None)` | Use LLM to semantically match user description to one exact instance name in the scene; return `None` if no match, caller should prompt "does not exist, please re-enter". Depends on `embodied_gen.utils.gpt_clients.GPTclient` |
+## CLI Features
+### Command Line Parameters
+| Parameter | Description |
+|-----------|-------------|
+| `--urdf_path` | Input URDF scene file path (required) |
+| `--usd_path` | Optional USD scene file path, update USD simultaneously if specified |
+| `--asset_path` | Placeholder object mesh file path (.obj) |
+| `--instance_key` | Unique identifier for the new instance, default `inserted_object` |
+| `--in_room` | Limit placement to specified room, supports semantic description (requires LLM environment) |
+| `--on_instance` | Place on top of specified instance, supports semantic description (requires LLM environment) |
+| `--beside_instance` | Place beside specified instance on the floor, supports semantic description (requires LLM environment) |
+| `--beside_distance` | Max distance (meters) from target instance for beside placement, default 0.5 |
+| `--place_strategy` | Placement strategy: `"random"` (default) or `"top"` (select highest surface) |
+| `--rotation_rpy` | Initial rotation angle (roll, pitch, yaw radians) |
+| `--output_path` | Floorplan output path |
+| `--list_instances` | List instance names and room names in current scene, print and exit |
+| `--max_placement_attempts` | Maximum placement attempts before failure, default 2000 |
+### CLI Usage Examples
+View scene instance names and room names:
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --list_instances
+```
+Visualize floorplan only:
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png
+```
+Put lamp on bookshelf (supports semantic description):
+```bash
+source outputs/env.sh
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../lamp.obj --instance_key lamp_on_bookcase \
+  --on_instance 书柜
+```
+Put table in a room:
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../table.obj --instance_key table_1 \
+  --in_room living_room
+```
+Place object on table in living room (room + on object):
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../apple.obj --instance_key apple_1 \
+  --in_room living_room --on_instance table --place_strategy top
+```
+Place chair beside table (on floor, collision-free):
+```bash
+source outputs/env.sh
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../chair.obj --instance_key chair_beside_table \
+  --beside_instance 桌子
+```
+Place beside with room constraint and custom distance:
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../chair.obj --instance_key chair_beside_table \
+  --in_room kitchen --beside_instance table --beside_distance 0.8
+```
+**URDF Output Note**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf` unless the user specifies a custom output path.
+---
+## Configuration and Ignore Items
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `mesh_sample_num` | 50000 | Number of mesh sampling points, larger values yield more precise floor plan polygons |
+| `ignore_items` | `["ceiling", "light", "exterior"]` | Link name patterns to skip during URDF parsing |
+---
+## USD and Blender
+- Writing USD converts `.obj` to `.usdc`, requiring **Blender (bpy)**. For USD writing in this project, use the **room-cli** environment (bpy installed).
+- Without `usd_path`, only URDF is updated, no bpy needed.
+- Assets in `.usd`/`.usdc`/`.usda` format are directly referenced; only `.obj` files are converted via bpy. If `*_collision.obj` exists in the same directory as the visual mesh, it will be written to URDF for collision.
+---
+## Errors and Return Values
+**Exceptions**
+- **ValueError**: Room or instance name does not exist; `update_scene()` called before `insert_object()` or after failed insertion; `instance_key` already exists.
+**Return Values**
+- `insert_object` / `insert_object_to_scene`: Returns `[x, y, z]` on success, `None` on failure (e.g., no valid placement after `n_max_attempt` attempts).
+---
+## Dependencies
+| Type | Package | Description |
+|------|---------|-------------|
+| Core | trimesh, shapely, matplotlib, numpy | Parsing and visualization |
+| USD Writing | pxr (e.g., `pip install usd-core`), bpy | Required only when using `usd_path`; bpy requires Blender |
+| LLM Semantic Matching | openai, project gpt_config | `resolve_instance_with_llm` requires `GPTclient` instance (see `embodied_gen.utils.gpt_clients`) and corresponding API configuration |
+| CLI | tyro | Required only for CLI entry point |
+---
+## Usage Recommendations
+- **Upright objects**: Default orientation applies, no need to set `rotation_rpy`; for special orientations, pass `(roll, pitch, yaw)` radians, e.g., upright `(1.57, 0, 0)`.
+- **Placing on furniture**: First use `resolve_instance_with_llm(gpt_client, get_instance_names(), user_input)` to get the exact instance name, then `insert_object(..., on_instance=resolved, place_strategy="top")`; if matching fails, prompt user to re-enter. For random ground placement, use `place_strategy="random"` (default).
+- **Placing beside furniture**: Use `insert_object(..., beside_instance=resolved, beside_distance=0.5)` to place the new object on the floor beside the target instance, collision-free. Increase `beside_distance` if placement fails (e.g., when the area around the target is crowded).
+- **Collision meshes**: If `*_collision.obj` exists in the same directory as the visual mesh, it will automatically be used for the collision node in URDF.

embodied_gen/skills/spatial_computing/SKILL.md ADDED Viewed

	@@ -0,0 +1,185 @@

+---
+name: spatial-computing-floorplan
+description: Visualizes floorplans from URDF scene files and inserts 3D assets with collision-aware placement on surfaces. Supports semantic instance matching via LLM (e.g., "put lamp on bookshelf"). Use when working with URDF/USD indoor scenes, floorplan visualization, object placement, or room-level scene editing.
+---
+# Floorplan & Object Placement
+## Overview
+Parse indoor scenes from URDF, generate 2D floorplans, or place 3D objects in scenes and write back to URDF/USD. After successful insertion, the corresponding file is automatically updated based on whether `urdf_path`/`usd_path` is provided.
+**When to use**: Use this skill when you need to generate floorplans from URDF, place objects on specified rooms/furniture surfaces, or batch update URDF/USD files.
+---
+## LLM Environment (Required for Semantic Matching)
+Before using `resolve_instance_with_llm` for semantic matching in **Python**, configure the LLM API and ensure access to the interface. Prompt the user if access is unavailable.
+```bash
+# Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
+source outputs/env.sh
+```
+---
+## Core Convention: Placement Requests Must Use This Skill's Interface
+When users request "put A somewhere" or "visualize urdf", you **must** implement it using this skill's interface:
+| User Request Example | Corresponding Parameter & Usage |
+|---------------------|---------------------------------|
+| **Put A on B** (e.g., "put lamp on bookshelf") | `on_instance` (instance name, obtained from `--list_instances`) |
+| **Put A beside B** (e.g., "put chair beside table") | `beside_instance` (instance name, obtained from `--list_instances`); placed on floor near target |
+| **Put A in a room** (e.g., "put table in living room") | `in_room` (room name, obtained from `--list_instances`) |
+| **Put A beside B in a room** (e.g., "put chair beside table in kitchen") | `beside_instance` + `in_room` |
+| **Put A on B in a room** (e.g., "put apple on table in living room") | Decomposed into "apple" and "living room" as `in_room` and `on_instance` |
+| **Visualize scene.urdf** | `cli.main --urdf_path .../scene.urdf --output_path .../floorplan.png`; output_path defaults to same directory as urdf |
+- When no match is found, prompt "The object/room does not exist, please re-enter" and provide the current scene object or room list.
+- Instance names should not use the `<link name="...">` from URDF. **Recommended**: Run `--list_instances` before placement to view current instance name list, and select the closest semantic match as `--on_instance`.
+---
+## CLI Examples
+> **Tip**: The URDF file is typically located at `<room_folder>/urdf/export_scene/scene.urdf` (e.g., `outputs/rooms/Kitchen_seed0/urdf/export_scene/scene.urdf`).
+### Example 1: View Instance Names and Room Names in Current Scene
+```bash
+# View instance names and room names in current scene (to fill in --on_instance / --in_room)
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --list_instances
+```
+### Example 2: Visualize Floorplan Only
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png
+```
+### Example 3: Put Lamp on Bookshelf (Place on an Object)
+`--on_instance` can be filled with the instance name returned by `--list_instances` or a semantic description.
+```bash
+source outputs/env.sh
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../lamp.obj --instance_key lamp_on_bookcase --on_instance 书柜
+```
+---
+### Example 4: Put Table in Living Room (Place in a Room)
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../table.obj --instance_key table_1 \
+  --in_room living_room
+```
+---
+### Example 5: Put Apple on Table in Living Room (Room + on Object)
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../apple.obj --instance_key apple_1 \
+  --in_room living_room --on_instance table --place_strategy top
+```
+---
+### Example 6: Put Chair Beside Table (Place Beside an Object)
+`--beside_instance` places the new object on the floor near the specified instance, avoiding collision.
+```bash
+python -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --output_path .../floorplan.png \
+  --asset_path .../chair.obj --instance_key chair_beside_table \
+  --beside_instance table
+```
+With room constraint and update usd:
+```bash
+room-cli -m embodied_gen.skills.spatial_computing.cli.main \
+  --urdf_path .../scene.urdf --usd_path .../scene.usdc \
+  --output_path .../floorplan.png \
+  --asset_path .../chair.obj --instance_key chair_beside_table \
+  --in_room kitchen --beside_instance table
+```
+---
+## Python Examples
+### Generate Floorplan Only
+Parse scene from URDF and export 2D floorplan without modifying scene files.
+```python
+from embodied_gen.skills.spatial_computing.api import visualize_floorplan
+visualize_floorplan(urdf_path="scene.urdf", output_path="floorplan.png")
+```
+### Place Object and Write Back to Scene
+Complete in one call: create manager, place, write back URDF/USD, generate floorplan. Returns placement position `[x, y, z]` or `None`. `on_instance` must be an exact instance name (can be obtained via `resolve_instance_with_llm`).
+**URDF output does not overwrite original file by default**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf` unless the user specifies overwrite.
+```python
+from embodied_gen.skills.spatial_computing.api import insert_object_to_scene
+# First get exact instance name via LLM semantic matching
+resolved_instance = resolve_instance_with_llm("table", scene_instances)
+position = insert_object_to_scene(
+    urdf_path="scene.urdf",
+    asset_path="chair.obj",
+    instance_key="chair_1",
+    output_path="floorplan.png",
+    in_room="living_room",
+    on_instance=resolved_instance,
+    usd_path="scene.usdc",  # Optional; requires room-cli if specified
+)
+# Place beside an existing instance (on the floor, collision-free)
+resolved_beside = resolve_instance_with_llm("table", scene_instances)
+position = insert_object_to_scene(
+    urdf_path="scene.urdf",
+    asset_path="chair.obj",
+    instance_key="chair_beside_table",
+    output_path="floorplan.png",
+    beside_instance=resolved_beside,
+    beside_distance=0.5,  # meters from target instance
+)
+```
+By default, `usd_path` is not specified, only URDF is updated. If `usd_path` is specified, you must use **room-cli** to execute (USD requires bpy environment) to update USD simultaneously.
+- Write only URDF: `python -m embodied_gen.skills.spatial_computing.cli.main ...`
+- Write USD: `room-cli -m embodied_gen.skills.spatial_computing.cli.main ... --usd_path <path>`
+### 6. Common Parameters
+| Parameter | Meaning |
+|-----------|---------|
+| `in_room` | Limit placement to specified room |
+| `on_instance` | Place on top of specified instance; must be **exact instance name** (obtained via `resolve_instance_with_llm`) |
+| `beside_instance` | Place beside specified instance on the floor; must be **exact instance name** (obtained via `resolve_instance_with_llm`). Mutually exclusive with `on_instance` |
+| `beside_distance` | Max distance (meters) from target instance for beside placement. Default `0.5`. Increase if placement fails |
+| `place_strategy` | `"random"` random placement (default, e.g., bookshelf with 3 layers will randomly select one), `"top"` select highest surface |
+| `rotation_rpy` | Not required by default; pass (roll, pitch, yaw) radians for special orientations |
+## Next Steps
+- For complete API, configuration, errors, and dependencies, see [REFERENCE.md](REFERENCE.md).

embodied_gen/skills/spatial_computing/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from embodied_gen.skills.spatial_computing.core import (
+    UrdfSemanticInfoCollector,
+    get_actionable_surface,
+    points_to_polygon,
+)
+from embodied_gen.skills.spatial_computing.core.visualizer import (
+    FloorplanVisualizer,
+)
+__all__ = [
+    "FloorplanVisualizer",
+    "UrdfSemanticInfoCollector",
+    "points_to_polygon",
+    "get_actionable_surface",
+]

embodied_gen/skills/spatial_computing/api/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""High-level API for floorplan operations.
+This module provides simplified interfaces for visualizing floorplans
+and inserting objects into 3D indoor scenes.
+"""
+from embodied_gen.skills.spatial_computing.api.floorplan_api import (
+    FloorplanManager,
+    insert_object_to_scene,
+    visualize_floorplan,
+)
+from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
+__all__ = [
+    "FloorplanManager",
+    "visualize_floorplan",
+    "insert_object_to_scene",
+    "resolve_instance_with_llm",
+]

embodied_gen/skills/spatial_computing/api/floorplan_api.py ADDED Viewed

	@@ -0,0 +1,499 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Literal
+from shapely.geometry import MultiPolygon, Polygon
+from embodied_gen.skills.spatial_computing.core import (
+    UrdfSemanticInfoCollector,
+)
+from embodied_gen.skills.spatial_computing.core.collector import (
+    DEFAULT_BESIDE_DISTANCE,
+    DEFAULT_IGNORE_ITEMS,
+    DEFAULT_MESH_SAMPLE_NUM,
+    DEFAULT_ROTATION_RPY,
+)
+from embodied_gen.skills.spatial_computing.core.visualizer import (
+    FloorplanVisualizer,
+)
+from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
+# Type aliases
+Geometry = Polygon | MultiPolygon
+logger = logging.getLogger(__name__)
+@dataclass
+class FloorplanConfig:
+    """Configuration for floorplan operations."""
+    urdf_path: str
+    """Path to the input URDF scene file."""
+    output_path: str | None = None
+    """Path to save the floorplan visualization image."""
+    usd_path: str | None = None
+    """Optional path to the USD scene file for USD export."""
+    asset_path: str | None = None
+    """Optional path to the asset mesh file (.obj)."""
+    instance_key: str = "inserted_object"
+    """Unique key for the added instance."""
+    in_room: str | None = None
+    """Optional room name to constrain asset placement."""
+    on_instance: str | None = None
+    """Optional instance name to place the asset on top of (exact key from get_instance_names())."""
+    beside_instance: str | None = None
+    """Optional instance name to place the asset beside (on floor, near the target)."""
+    beside_distance: float = DEFAULT_BESIDE_DISTANCE
+    """Max distance (meters) from the target instance for beside placement."""
+    place_strategy: Literal["top", "random"] = "random"
+    """Placement strategy for the asset."""
+    rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY
+    """Rotation in roll-pitch-yaw (radians)."""
+    ignore_items: list[str] = field(
+        default_factory=lambda: list(DEFAULT_IGNORE_ITEMS)
+    )
+    """List of item name patterns to ignore during parsing."""
+    mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM
+    """Number of points to sample from meshes."""
+    max_placement_attempts: int = 2000
+    """Maximum attempts for asset placement."""
+    update_urdf: bool = True
+    """Whether to update and save the URDF file."""
+    update_usd: bool = True
+    """Whether to update and save the USD file."""
+    list_instances: bool = False
+    """If True, print instance and room names then exit (no placement/visualization)."""
+class FloorplanManager:
+    """High-level API for floorplan operations.
+    This class provides simplified methods for:
+    - Loading and analyzing URDF scenes
+    - Visualizing floorplans
+    - Inserting objects into scenes
+    - Updating URDF and USD files
+    Example:
+        >>> manager = FloorplanManager(urdf_path="scene.urdf", usd_path="scene.usdc")
+        >>> manager.visualize(output_path="floorplan.png")
+        >>> position = manager.insert_object(
+        ...     asset_path="chair.obj",
+        ...     instance_key="chair_1",
+        ...     in_room="kitchen"
+        ... )
+        # URDF/USD are updated automatically after insert
+    """
+    def __init__(
+        self,
+        urdf_path: str,
+        usd_path: str | None = None,
+        mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+        ignore_items: list[str] | None = None,
+    ) -> None:
+        """Initialize the floorplan manager.
+        Args:
+            urdf_path: Path to the URDF file.
+            usd_path: Optional path to the USD file for scene updates.
+            mesh_sample_num: Number of points to sample from meshes.
+            ignore_items: List of item name patterns to ignore.
+        """
+        self.urdf_path = urdf_path
+        self.usd_path = usd_path
+        self.collector = UrdfSemanticInfoCollector(
+            mesh_sample_num=mesh_sample_num,
+            ignore_items=ignore_items,
+        )
+        self.collector.collect(urdf_path)
+        self.pending_instance_data: dict | None = None
+    def visualize(
+        self,
+        output_path: str,
+    ) -> None:
+        """Generate and save a floorplan visualization.
+        Args:
+            output_path: Path to save the output image.
+        """
+        FloorplanVisualizer.plot(
+            self.collector.rooms,
+            self.collector.footprints,
+            self.collector.occ_area,
+            output_path,
+        )
+        logger.info(f"✅ Floorplan visualization saved to {output_path}")
+    def insert_object(
+        self,
+        asset_path: str,
+        instance_key: str,
+        in_room: str | None = None,
+        on_instance: str | None = None,
+        beside_instance: str | None = None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        n_max_attempt: int = 2000,
+        place_strategy: Literal["top", "random"] = "random",
+    ) -> list[float] | None:
+        """Insert an object into the scene with automatic placement.
+        Args:
+            asset_path: Path to the asset mesh file (.obj).
+            instance_key: Unique key for the new instance.
+            in_room: Optional room name to constrain placement.
+            on_instance: Optional instance name to place on top of.
+            beside_instance: Optional instance name to place beside (on floor).
+            beside_distance: Max distance from target for beside placement.
+            rotation_rpy: Initial rotation in roll-pitch-yaw.
+            n_max_attempt: Maximum placement attempts.
+            place_strategy: Either "top" or "random".
+        Returns:
+            List [x, y, z] of the placed instance center, or None if failed.
+        """
+        center = self.collector.add_instance(
+            asset_path=asset_path,
+            instance_key=instance_key,
+            in_room=in_room,
+            on_instance=on_instance,
+            beside_instance=beside_instance,
+            beside_distance=beside_distance,
+            rotation_rpy=rotation_rpy,
+            n_max_attempt=n_max_attempt,
+            place_strategy=place_strategy,
+        )
+        if center is not None:
+            self.pending_instance_data = {
+                "asset_path": asset_path,
+                "instance_key": instance_key,
+                "center": center,
+                "rotation_rpy": rotation_rpy,
+            }
+            self.update_scene()
+        return center
+    def update_scene(
+        self,
+        urdf_output_path: str | None = None,
+        usd_output_path: str | None = None,
+    ) -> None:
+        """Update URDF and/or USD with inserted instances.
+        Updates URDF if self.urdf_path is set, USD if self.usd_path is set.
+        Both are updated when both paths are set. No-op when no instance was inserted.
+        Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
+        Args:
+            urdf_output_path: Optional custom path for URDF output.
+            usd_output_path: Optional custom path for USD output.
+        Raises:
+            ValueError: If no instance has been inserted.
+        """
+        if self.pending_instance_data is None:
+            raise ValueError(
+                "No instance to update. Call insert_object() first."
+            )
+        data = self.pending_instance_data
+        collision_path = data["asset_path"].replace(".obj", "_collision.obj")
+        if not os.path.exists(collision_path):
+            collision_path = None
+        if self.urdf_path:
+            urdf_out = urdf_output_path or self.urdf_path.replace(
+                ".urdf", "_updated.urdf"
+            )
+            self.collector.update_urdf_info(
+                output_path=urdf_out,
+                instance_key=data["instance_key"],
+                visual_mesh_path=data["asset_path"],
+                collision_mesh_path=collision_path,
+                trans_xyz=tuple(data["center"]),
+                rot_rpy=data["rotation_rpy"],
+                joint_type="fixed",
+            )
+        if self.usd_path:
+            usd_out = usd_output_path or self.usd_path.replace(
+                ".usdc", "_updated.usdc"
+            )
+            self.collector.update_usd_info(
+                usd_path=self.usd_path,
+                output_path=usd_out,
+                instance_key=data["instance_key"],
+                visual_mesh_path=data["asset_path"],
+                trans_xyz=data["center"],
+                rot_rpy=data["rotation_rpy"],
+            )
+    def get_room_names(self) -> list[str]:
+        """Get list of room names in the scene.
+        Returns:
+            List of room names.
+        """
+        return list(self.collector.rooms.keys())
+    def get_instance_names(self) -> list[str]:
+        """Get list of instance names in the scene.
+        Returns:
+            List of instance names.
+        """
+        return [
+            k
+            for k in self.collector.instances.keys()
+            if k != "walls" and "floor" not in k.lower()
+        ]
+    def get_instance_names_in_room(self, in_room: str) -> list[str]:
+        """Get instance names that are spatially inside a given room.
+        Buffers the room polygon slightly to handle mesh-sampling precision.
+        Args:
+            in_room: Exact room key (must exist in get_room_names()).
+        Returns:
+            List of instance names within the room.
+        """
+        room_poly = self.collector.rooms.get(in_room)
+        if room_poly is None:
+            return self.get_instance_names()
+        room_buffered = room_poly.buffer(0.1)
+        all_names = self.get_instance_names()
+        return [
+            k
+            for k in all_names
+            if room_buffered.contains(
+                self.collector.instances[k].representative_point()
+            )
+        ]
+    def resolve_on_instance(
+        self,
+        on_instance: str,
+        gpt_client: object | None = None,
+    ) -> str | None:
+        r"""Resolve on_instance to an exact key (for placement).
+        If on_instance is already in get_instance_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. \"柜子\", \"书柜\") to one exact instance key.
+        Args:
+            on_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
+        Returns:
+            Exact instance key, or None if not found / LLM returned NONE.
+        """
+        names = self.get_instance_names()
+        if on_instance in names:
+            return on_instance
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, on_instance  # type: ignore[arg-type]
+            )
+        return None
+    def resolve_in_room(
+        self,
+        in_room: str,
+        gpt_client: object | None = None,
+    ) -> str | None:
+        r"""Resolve in_room to an exact room name (for placement).
+        If in_room is already in get_room_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. \"kitchen\", \"the place for cooking\") to one exact room name.
+        Args:
+            in_room: Exact room name or semantic description.
+            gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
+        Returns:
+            Exact room name, or None if not found / LLM returned NONE.
+        """
+        names = self.get_room_names()
+        if in_room in names:
+            return in_room
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, in_room  # type: ignore[arg-type]
+            )
+        return None
+    def resolve_beside_instance(
+        self,
+        beside_instance: str,
+        gpt_client: object | None = None,
+        in_room: str | None = None,
+    ) -> str | None:
+        r"""Resolve beside_instance to an exact key (for beside placement).
+        If beside_instance is already in get_instance_names(), return it.
+        Otherwise if gpt_client is provided, use LLM to resolve user description
+        (e.g. "桌子", "沙发") to one exact instance key.
+        When `in_room` is given, only instances spatially inside that room are
+        considered as candidates.
+        Args:
+            beside_instance: Exact instance key or semantic description.
+            gpt_client: Optional GPT client for semantic resolve.
+            in_room: Optional resolved room key to restrict candidate scope.
+        Returns:
+            Exact instance key, or None if not found / LLM returned NONE.
+        """
+        if in_room is not None:
+            names = self.get_instance_names_in_room(in_room)
+        else:
+            names = self.get_instance_names()
+        if beside_instance in names:
+            return beside_instance
+        if gpt_client is not None:
+            return resolve_instance_with_llm(
+                gpt_client, names, beside_instance  # type: ignore[arg-type]
+            )
+        return None
+    def get_occupied_area(self) -> Geometry:
+        """Get the union of all occupied areas.
+        Returns:
+            Shapely geometry representing occupied areas.
+        """
+        return self.collector.occ_area
+    def get_floor_union(self) -> Geometry:
+        """Get the union of all floor areas.
+        Returns:
+            Shapely geometry representing floor areas.
+        """
+        return self.collector.floor_union
+def visualize_floorplan(
+    urdf_path: str,
+    output_path: str,
+    mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+    ignore_items: list[str] | None = None,
+) -> None:
+    """Quick function to visualize a floorplan.
+    Args:
+        urdf_path: Path to the URDF file.
+        output_path: Path to save the output image.
+        mesh_sample_num: Number of points to sample from meshes.
+        ignore_items: List of item name patterns to ignore.
+    """
+    manager = FloorplanManager(
+        urdf_path=urdf_path,
+        mesh_sample_num=mesh_sample_num,
+        ignore_items=ignore_items,
+    )
+    manager.visualize(output_path=output_path)
+def insert_object_to_scene(
+    urdf_path: str,
+    asset_path: str,
+    instance_key: str,
+    output_path: str,
+    usd_path: str | None = None,
+    in_room: str | None = None,
+    on_instance: str | None = None,
+    beside_instance: str | None = None,
+    beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+    place_strategy: Literal["top", "random"] = "random",
+    rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+) -> list[float] | None:
+    """Quick function to insert an object and generate floorplan.
+    Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
+    Args:
+        urdf_path: Path to the URDF file.
+        asset_path: Path to the asset mesh file (.obj).
+        instance_key: Unique key for the new instance.
+        output_path: Path to save the floorplan image.
+        usd_path: Optional path to the USD file (requires Blender).
+        in_room: Optional room name to constrain placement.
+        on_instance: Optional instance name to place on top of.
+        beside_instance: Optional instance name to place beside (on floor).
+        beside_distance: Max distance for beside placement (meters).
+        place_strategy: Either "top" or "random".
+        rotation_rpy: Initial rotation in roll-pitch-yaw.
+    Returns:
+        List [x, y, z] of the placed instance center, or None if failed.
+    """
+    manager = FloorplanManager(urdf_path=urdf_path, usd_path=usd_path)
+    center = manager.insert_object(
+        asset_path=asset_path,
+        instance_key=instance_key,
+        in_room=in_room,
+        on_instance=on_instance,
+        beside_instance=beside_instance,
+        beside_distance=beside_distance,
+        rotation_rpy=rotation_rpy,
+        place_strategy=place_strategy,
+    )
+    if center is not None:
+        manager.visualize(output_path=output_path)
+    return center

embodied_gen/skills/spatial_computing/cli/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Command-line interface for floorplan operations.
+This package provides CLI utilities for floorplan visualization and
+scene manipulation.
+"""
+__all__: list[str] = []

embodied_gen/skills/spatial_computing/cli/main.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import sys
+import warnings
+import tyro
+from embodied_gen.skills.spatial_computing.api.floorplan_api import (
+    FloorplanConfig,
+    FloorplanManager,
+)
+warnings.filterwarnings("ignore", category=RuntimeWarning)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+    force=True,
+)
+logger = logging.getLogger(__name__)
+def _get_gpt_client() -> object | None:
+    """Lazy-import GPT_CLIENT for semantic --on_instance resolution."""
+    try:
+        from embodied_gen.utils.gpt_clients import GPT_CLIENT
+        return GPT_CLIENT
+    except Exception:
+        return None
+def entrypoint(cfg: FloorplanConfig) -> None:
+    """Main entry point for floorplan visualization and scene manipulation.
+    Args:
+        cfg: Configuration object with all parameters.
+    """
+    manager = FloorplanManager(
+        urdf_path=cfg.urdf_path,
+        usd_path=cfg.usd_path,
+        mesh_sample_num=cfg.mesh_sample_num,
+        ignore_items=cfg.ignore_items,
+    )
+    # List instances/rooms and exit if requested
+    if cfg.list_instances:
+        names = manager.get_instance_names()
+        rooms = manager.get_room_names()
+        logger.info("instance_names:", names)
+        logger.info("room_names:", rooms)
+        return
+    gpt_client = _get_gpt_client()
+    on_instance = cfg.on_instance
+    if on_instance is not None:
+        resolved = manager.resolve_on_instance(
+            on_instance, gpt_client=gpt_client
+        )
+        if resolved is None:
+            logger.error(
+                "No object matched \"%s\"。Current scene instance name: %s。",
+                on_instance,
+                manager.get_instance_names(),
+            )
+            sys.exit(1)
+        on_instance = resolved
+        if resolved != cfg.on_instance:
+            logger.info("\"%s\" -> \"%s\"", cfg.on_instance, resolved)
+    in_room = cfg.in_room
+    if in_room is not None:
+        resolved = manager.resolve_in_room(in_room, gpt_client=gpt_client)
+        if resolved is None:
+            logger.error(
+                "No room matched \"%s\"。Current scene room names: %s。",
+                in_room,
+                manager.get_room_names(),
+            )
+            sys.exit(1)
+        in_room = resolved
+        if resolved != cfg.in_room:
+            logger.info("\"%s\" -> \"%s\"", cfg.in_room, resolved)
+    beside_instance = cfg.beside_instance
+    if beside_instance is not None:
+        resolved = manager.resolve_beside_instance(
+            beside_instance, gpt_client=gpt_client, in_room=in_room
+        )
+        if resolved is None:
+            logger.error(
+                "No object matched \"%s\"。Current scene instance name: %s。",
+                beside_instance,
+                manager.get_instance_names(),
+            )
+            sys.exit(1)
+        beside_instance = resolved
+        if resolved != cfg.beside_instance:
+            logger.info("\"%s\" -> \"%s\"", cfg.beside_instance, resolved)
+    # Add asset instance if specified
+    center = None
+    if cfg.asset_path is not None:
+        center = manager.insert_object(
+            asset_path=cfg.asset_path,
+            instance_key=cfg.instance_key,
+            in_room=in_room,
+            on_instance=on_instance,
+            beside_instance=beside_instance,
+            beside_distance=cfg.beside_distance,
+            rotation_rpy=cfg.rotation_rpy,
+            n_max_attempt=cfg.max_placement_attempts,
+            place_strategy=cfg.place_strategy,
+        )
+        if center is not None:
+            logger.info(
+                f"Successfully placed '{cfg.instance_key}' at "
+                f"({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})"
+            )
+        else:
+            logger.error(
+                f"❌ Failed to place '{cfg.instance_key}' in the scene."
+            )
+            sys.exit(1)
+    # Generate floorplan visualization
+    if cfg.output_path is not None:
+        manager.visualize(output_path=cfg.output_path)
+if __name__ == "__main__":
+    config = tyro.cli(FloorplanConfig)
+    entrypoint(config)

embodied_gen/skills/spatial_computing/core/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Floorplan skill core modules.
+This package provides core functionality for floorplan visualization
+and object placement in 3D indoor scenes.
+"""
+from embodied_gen.skills.spatial_computing.core.collector import (
+    UrdfSemanticInfoCollector,
+)
+from embodied_gen.skills.spatial_computing.core.geometry import (
+    get_actionable_surface,
+    points_to_polygon,
+)
+from embodied_gen.skills.spatial_computing.core.visualizer import (
+    FloorplanVisualizer,
+)
+__all__ = [
+    "FloorplanVisualizer",
+    "UrdfSemanticInfoCollector",
+    "points_to_polygon",
+    "get_actionable_surface",
+]

embodied_gen/skills/spatial_computing/core/collector.py ADDED Viewed

	@@ -0,0 +1,833 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import os
+import random
+import re
+import xml.etree.ElementTree as ET
+from shutil import copy2, copytree
+from typing import Literal
+import numpy as np
+import trimesh
+from scipy.spatial.transform import Rotation as R
+from shapely.affinity import translate
+from shapely.geometry import MultiPolygon, Polygon
+from shapely.ops import unary_union
+from embodied_gen.skills.spatial_computing.core.geometry import (
+    DEFAULT_MESH_SAMPLE_NUM,
+    get_actionable_surface,
+    points_to_polygon,
+)
+logger = logging.getLogger(__name__)
+# Type aliases
+Geometry = Polygon | MultiPolygon
+# Constants
+DEFAULT_ROTATION_RPY = (1.57, 0.0, 0.0)
+DEFAULT_MAX_PLACEMENT_ATTEMPTS = 3000
+DEFAULT_IGNORE_ITEMS = ("ceiling", "light", "exterior")
+DEFAULT_BESIDE_DISTANCE = 0.5
+class UrdfSemanticInfoCollector:
+    """Collector for URDF semantic information.
+    Parses URDF files to extract room layouts, object footprints, and
+    provides methods for adding new instances and updating URDF/USD files.
+    Attributes:
+        mesh_sample_num: Number of points to sample from meshes.
+        ignore_items: List of item name patterns to ignore.
+        instances: Dictionary of instance name to footprint polygon.
+        instance_meta: Dictionary of instance metadata (mesh path, pose).
+        rooms: Dictionary of room polygons.
+        footprints: Dictionary of object footprints.
+        occ_area: Union of all occupied areas.
+        floor_union: Union of all floor polygons.
+    """
+    def __init__(
+        self,
+        mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
+        ignore_items: list[str] | None = None,
+    ) -> None:
+        """Initialize the collector.
+        Args:
+            mesh_sample_num: Number of points to sample from meshes.
+            ignore_items: List of item name patterns to ignore during parsing.
+        """
+        self.mesh_sample_num = mesh_sample_num
+        self.ignore_items = ignore_items or list(DEFAULT_IGNORE_ITEMS)
+        self.instances: dict[str, Polygon] = {}
+        self.instance_meta: dict[str, dict] = {}
+        self.rooms: dict[str, Geometry] = {}
+        self.footprints: dict[str, Geometry] = {}
+        self.occ_area: Geometry = Polygon()
+        self.floor_union: Geometry = Polygon()
+        self.urdf_path: str = ""
+        self._tree: ET.ElementTree | None = None
+        self._root: ET.Element | None = None
+    def _get_transform(
+        self,
+        joint_elem: ET.Element,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Extract transform (xyz, rpy) from a joint element.
+        Args:
+            joint_elem: XML Element representing a URDF joint.
+        Returns:
+            Tuple of (xyz, rpy) arrays.
+        """
+        origin = joint_elem.find("origin")
+        if origin is not None:
+            xyz = np.fromstring(origin.attrib.get("xyz", "0 0 0"), sep=" ")
+            rpy = np.fromstring(origin.attrib.get("rpy", "0 0 0"), sep=" ")
+        else:
+            xyz, rpy = np.zeros(3), np.zeros(3)
+        return xyz, rpy
+    def _process_mesh_to_poly(
+        self,
+        mesh_path: str,
+        xyz: np.ndarray,
+        rpy: np.ndarray,
+    ) -> Polygon:
+        """Load mesh file and convert to 2D footprint polygon.
+        Args:
+            mesh_path: Path to the mesh file.
+            xyz: Translation vector.
+            rpy: Rotation in roll-pitch-yaw.
+        Returns:
+            Footprint polygon of the mesh.
+        """
+        if not os.path.exists(mesh_path):
+            return Polygon()
+        mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
+        matrix = np.eye(4)
+        matrix[:3, :3] = R.from_euler("xyz", rpy).as_matrix()
+        matrix[:3, 3] = xyz
+        mesh.apply_transform(matrix)
+        verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
+        return points_to_polygon(verts)
+    def collect(self, urdf_path: str) -> None:
+        """Parse URDF file and collect semantic information.
+        Args:
+            urdf_path: Path to the URDF file.
+        """
+        logger.info(f"Collecting URDF semantic info from {urdf_path}")
+        self.urdf_path = urdf_path
+        urdf_dir = os.path.dirname(urdf_path)
+        self._tree = ET.parse(urdf_path)
+        self._root = self._tree.getroot()
+        link_transforms = self._build_link_transforms()
+        self._process_links(urdf_dir, link_transforms)
+        self._update_internal_state()
+    def _build_link_transforms(
+        self,
+    ) -> dict[str, tuple[np.ndarray, np.ndarray]]:
+        """Build mapping from link names to their transforms.
+        Returns:
+            Dictionary mapping link names to (xyz, rpy) tuples.
+        """
+        link_transforms: dict[str, tuple[np.ndarray, np.ndarray]] = {}
+        for joint in self._tree.findall("joint"):
+            child = joint.find("child")
+            if child is not None:
+                link_name = child.attrib["link"]
+                link_transforms[link_name] = self._get_transform(joint)
+        return link_transforms
+    def _process_links(
+        self,
+        urdf_dir: str,
+        link_transforms: dict[str, tuple[np.ndarray, np.ndarray]],
+    ) -> None:
+        """Process all links in the URDF tree.
+        Args:
+            urdf_dir: Directory containing the URDF file.
+            link_transforms: Dictionary of link transforms.
+        """
+        self.instances = {}
+        self.instance_meta = {}
+        wall_polys: list[Polygon] = []
+        logger.info("Processing URDF links to extract geometry...")
+        for link in self._tree.findall("link"):
+            name = link.attrib.get("name", "").lower()
+            if any(ign in name for ign in self.ignore_items):
+                continue
+            visual = link.find("visual")
+            if visual is None:
+                continue
+            mesh_node = visual.find("geometry/mesh")
+            if mesh_node is None:
+                continue
+            mesh_path = os.path.join(urdf_dir, mesh_node.attrib["filename"])
+            default_transform = (np.zeros(3), np.zeros(3))
+            xyz, rpy = link_transforms.get(
+                link.attrib["name"], default_transform
+            )
+            poly = self._process_mesh_to_poly(mesh_path, xyz, rpy)
+            if poly.is_empty:
+                continue
+            if "wall" in name:
+                wall_polys.append(poly)
+            else:
+                key = self._process_safe_key_robust(link.attrib["name"])
+                self.instances[key] = poly
+                self.instance_meta[key] = {
+                    "mesh_path": mesh_path,
+                    "xyz": xyz,
+                    "rpy": rpy,
+                }
+        self.instances["walls"] = unary_union(wall_polys)
+    def _update_internal_state(self) -> None:
+        """Update derived state (rooms, footprints, occupied area)."""
+        self.rooms = {
+            k: v
+            for k, v in self.instances.items()
+            if "_floor" in k.lower() and not v.is_empty
+        }
+        self.footprints = {
+            k: v
+            for k, v in self.instances.items()
+            if k != "walls"
+            and "_floor" not in k.lower()
+            and "rug" not in k.lower()
+            and not v.is_empty
+        }
+        self.occ_area = unary_union(list(self.footprints.values()))
+        self.floor_union = unary_union(list(self.rooms.values()))
+    def _process_safe_key_robust(self, name: str) -> str:
+        """Convert a link name to a safe, normalized key.
+        Args:
+            name: Original link name.
+        Returns:
+            Normalized key string.
+        """
+        if name.endswith("_floor"):
+            parts = name.split("_")
+            return "_".join(parts[:-2] + ["floor"])
+        if "Factory" in name:
+            # Handle infinigen naming convention
+            prefix = name.split("Factory")[0]
+            suffix = f"_{name.split('_')[-1]}"
+        else:
+            prefix, suffix = name, ""
+        res = prefix.replace(" ", "_")
+        res = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", res)
+        res = res.lower()
+        res = re.sub(r"_+", "_", res).strip("_ ")
+        return f"{res}{suffix}"
+    def add_instance(
+        self,
+        asset_path: str,
+        instance_key: str,
+        in_room: str | None = None,
+        on_instance: str | None = None,
+        beside_instance: str | None = None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
+        place_strategy: Literal["top", "random"] = "random",
+    ) -> list[float] | None:
+        """Add a new instance to the scene with automatic placement.
+        Args:
+            asset_path: Path to the asset mesh file.
+            instance_key: Unique key for the new instance.
+            in_room: Optional room name to constrain placement.
+            on_instance: Optional instance name to place on top of.
+            beside_instance: Optional instance name to place beside (on floor).
+            beside_distance: Initial buffer distance from the target instance
+                for beside placement (meters). Will auto-expand if needed.
+            rotation_rpy: Initial rotation in roll-pitch-yaw.
+            n_max_attempt: Maximum placement attempts.
+            place_strategy: Either "top" or "random".
+        Returns:
+            List [x, y, z] of the placed instance center, or None if failed.
+        Raises:
+            ValueError: If instance_key already exists or room/instance not found.
+        """
+        if instance_key in self.instances:
+            raise ValueError(f"Instance key '{instance_key}' already exists.")
+        room_poly = self._resolve_room_polygon(in_room)
+        # Load mesh and compute base polygon (needed for all placement modes)
+        mesh = trimesh.load(asset_path, force="mesh")
+        mesh.apply_transform(
+            trimesh.transformations.euler_matrix(*rotation_rpy, "sxyz")
+        )
+        verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
+        base_poly = points_to_polygon(verts)
+        centroid = base_poly.centroid
+        base_poly = translate(base_poly, xoff=-centroid.x, yoff=-centroid.y)
+        if beside_instance is not None:
+            placement = self._try_place_beside(
+                base_poly=base_poly,
+                beside_instance=beside_instance,
+                room_poly=room_poly,
+                beside_distance=beside_distance,
+                n_max_attempt=n_max_attempt,
+            )
+            base_z = 0.0
+        else:
+            target_area, obstacles, base_z = self._resolve_placement_target(
+                on_instance, room_poly, place_strategy
+            )
+            if target_area.is_empty:
+                logger.error("Target area for placement is empty.")
+                return None
+            placement = self._try_place_polygon(
+                base_poly, target_area, obstacles, n_max_attempt
+            )
+        if placement is None:
+            logger.error(
+                f"Failed to place '{instance_key}' after all attempts."
+            )
+            return None
+        x, y, candidate = placement
+        self.instances[instance_key] = candidate
+        final_z = base_z + mesh.extents[2] / 2
+        self._update_internal_state()
+        return [round(v, 4) for v in (x, y, final_z)]
+    def _resolve_room_polygon(self, in_room: str | None) -> Geometry | None:
+        """Resolve room name to polygon.
+        Args:
+            in_room: Room name query string.
+        Returns:
+            Room polygon or None if not specified.
+        Raises:
+            ValueError: If room not found.
+        """
+        if in_room is None:
+            return None
+        query_room = in_room.lower()
+        room_matches = [
+            k for k in self.rooms.keys() if query_room in k.lower()
+        ]
+        if not room_matches:
+            raise ValueError(f"Room '{in_room}' not found.")
+        return unary_union([self.rooms[k] for k in room_matches])
+    def _try_place_beside(
+        self,
+        base_poly: Polygon,
+        beside_instance: str,
+        room_poly: Geometry | None,
+        beside_distance: float = DEFAULT_BESIDE_DISTANCE,
+        n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
+        max_expand_steps: int = 5,
+        expand_factor: float = 1.5,
+    ) -> tuple[float, float, Polygon] | None:
+        """Place object beside target with progressive distance expansion.
+        More robust than fixed-distance placement:
+        1. Ensures minimum distance accommodates the new object's size.
+        2. Pre-subtracts obstacles from the ring → sampling only in free area.
+        3. Progressively expands distance on failure (up to max_expand_steps).
+        4. Skips steps where the free area is too small for the object.
+        Args:
+            base_poly: Object footprint polygon centered at origin.
+            beside_instance: Target instance name to place beside.
+            room_poly: Optional room constraint polygon.
+            beside_distance: Initial buffer distance (meters).
+            n_max_attempt: Total max placement attempts across all steps.
+            max_expand_steps: Max number of distance expansion rounds.
+            expand_factor: Distance multiplier per expansion round.
+        Returns:
+            Tuple (x, y, placed_polygon) on success, or None if all failed.
+        Raises:
+            ValueError: If beside_instance not found in scene.
+        """
+        # --- Resolve target instance ---
+        query_obj = beside_instance.lower()
+        possible_matches = [
+            k
+            for k in self.instances.keys()
+            if query_obj in k.lower() and k != "walls"
+        ]
+        if room_poly is not None:
+            # Check that the object's representative point falls inside
+            # the room (buffered slightly for mesh-sampling tolerance).
+            room_buffered = room_poly.buffer(0.1)
+            possible_matches = [
+                k
+                for k in possible_matches
+                if room_buffered.contains(
+                    self.instances[k].representative_point()
+                )
+            ]
+        if not possible_matches:
+            location_msg = " in specified room" if room_poly else ""
+            # Log candidate distances for easier debugging
+            all_matches = [
+                k
+                for k in self.instances.keys()
+                if query_obj in k.lower() and k != "walls"
+            ]
+            if all_matches and room_poly is not None:
+                dists = {
+                    k: round(self.instances[k].distance(room_poly), 4)
+                    for k in all_matches
+                }
+                logger.error("Candidate distances to room polygon: %s", dists)
+            raise ValueError(
+                f"No instance matching '{beside_instance}' "
+                f"found{location_msg}."
+            )
+        target_key = possible_matches[0]
+        if len(possible_matches) > 1:
+            logger.warning(
+                f"Multiple matches for '{beside_instance}': "
+                f"{possible_matches}. Using '{target_key}'."
+            )
+        target_footprint = self.instances[target_key]
+        floor = room_poly if room_poly is not None else self.floor_union
+        # --- Ensure initial distance accommodates the object's size ---
+        obj_bounds = base_poly.bounds  # (minx, miny, maxx, maxy)
+        obj_half_diag = (
+            np.hypot(
+                obj_bounds[2] - obj_bounds[0],
+                obj_bounds[3] - obj_bounds[1],
+            )
+            / 2.0
+        )
+        current_distance = max(beside_distance, obj_half_diag * 1.5)
+        # Budget attempts across expansion steps
+        attempts_per_step = max(n_max_attempt // (max_expand_steps + 1), 500)
+        empty_obstacle = Polygon()  # pre-created; obstacles are pre-subtracted
+        for step in range(max_expand_steps + 1):
+            # Build ring: buffer - footprint, intersected with floor
+            buffered = target_footprint.buffer(current_distance)
+            ring_area = buffered.difference(target_footprint)
+            ring_area = ring_area.intersection(floor)
+            if ring_area.is_empty:
+                logger.info(
+                    f"[beside step {step}] Ring empty at "
+                    f"{current_distance:.2f}m, expanding..."
+                )
+                current_distance *= expand_factor
+                continue
+            # Pre-subtract all obstacles → sample only from actual free area
+            free_area = ring_area.difference(self.occ_area)
+            if free_area.is_empty or free_area.area < base_poly.area * 0.5:
+                logger.info(
+                    f"[beside step {step}] Free area too small at "
+                    f"{current_distance:.2f}m "
+                    f"(free={free_area.area:.4f}, "
+                    f"need≈{base_poly.area:.4f}), expanding..."
+                )
+                current_distance *= expand_factor
+                continue
+            # Attempt placement in the free area (obstacles already removed)
+            placement = self._try_place_polygon(
+                base_poly, free_area, empty_obstacle, attempts_per_step
+            )
+            if placement is not None:
+                logger.info(
+                    f"Placed beside '{target_key}' at distance "
+                    f"{current_distance:.2f}m (step {step})"
+                )
+                return placement
+            logger.info(
+                f"[beside step {step}] Failed at {current_distance:.2f}m "
+                f"after {attempts_per_step} attempts, expanding..."
+            )
+            current_distance *= expand_factor
+        logger.error(
+            f"Failed to place beside '{target_key}' after "
+            f"{max_expand_steps + 1} expansion steps "
+            f"(final distance: {current_distance / expand_factor:.2f}m)."
+        )
+        return None
+    def _resolve_placement_target(
+        self,
+        on_instance: str | None,
+        room_poly: Geometry | None,
+        place_strategy: Literal["top", "random"],
+    ) -> tuple[Geometry, Geometry, float]:
+        """Resolve the target placement area and obstacles.
+        Args:
+            on_instance: Instance name to place on.
+            room_poly: Room polygon constraint.
+            place_strategy: Placement strategy.
+        Returns:
+            Tuple of (target_area, obstacles, base_z_height).
+        Raises:
+            ValueError: If on_instance not found.
+        """
+        if on_instance is None:
+            if room_poly is not None:
+                return room_poly, self.occ_area, 0.0
+            return self.floor_union, self.occ_area, 0.0
+        query_obj = on_instance.lower()
+        possible_matches = [
+            k
+            for k in self.instances.keys()
+            if query_obj in k.lower() and k != "walls"
+        ]
+        if room_poly is not None:
+            room_buffered = room_poly.buffer(0.1)
+            possible_matches = [
+                k
+                for k in possible_matches
+                if room_buffered.contains(
+                    self.instances[k].representative_point()
+                )
+            ]
+        if not possible_matches:
+            location_msg = f" in room '{on_instance}'" if room_poly else ""
+            raise ValueError(
+                f"No instance matching '{on_instance}' found{location_msg}."
+            )
+        if place_strategy == "random":
+            target_parent_key = random.choice(possible_matches)
+        else:
+            target_parent_key = possible_matches[0]
+        if len(possible_matches) > 1:
+            logger.warning(
+                f"Multiple matches for '{on_instance}': {possible_matches}. "
+                f"Using '{target_parent_key}'."
+            )
+        meta = self.instance_meta[target_parent_key]
+        parent_mesh = trimesh.load(meta["mesh_path"], force="mesh")
+        matrix = np.eye(4)
+        matrix[:3, :3] = R.from_euler("xyz", meta["rpy"]).as_matrix()
+        matrix[:3, 3] = meta["xyz"]
+        parent_mesh.apply_transform(matrix)
+        best_z, surface_poly = get_actionable_surface(
+            parent_mesh, place_strategy=place_strategy
+        )
+        obstacles = self.occ_area.difference(self.instances[target_parent_key])
+        logger.info(f"Placing on '{target_parent_key}' (Z={best_z:.3f})")
+        return surface_poly, obstacles, best_z
+    def _try_place_polygon(
+        self,
+        base_poly: Polygon,
+        target_area: Geometry,
+        obstacles: Geometry,
+        n_max_attempt: int,
+    ) -> tuple[float, float, Polygon] | None:
+        """Try to place polygon in target area avoiding obstacles.
+        Args:
+            base_poly: Polygon to place (centered at origin).
+            target_area: Area where placement is allowed.
+            obstacles: Areas to avoid.
+            n_max_attempt: Maximum attempts.
+        Returns:
+            Tuple of (x, y, placed_polygon) or None if failed.
+        """
+        minx, miny, maxx, maxy = target_area.bounds
+        for _ in range(n_max_attempt):
+            x = np.random.uniform(minx, maxx)
+            y = np.random.uniform(miny, maxy)
+            candidate = translate(base_poly, xoff=x, yoff=y)
+            if target_area.contains(candidate) and not candidate.intersects(
+                obstacles
+            ):
+                return x, y, candidate
+        return None
+    def update_urdf_info(
+        self,
+        output_path: str,
+        instance_key: str,
+        visual_mesh_path: str,
+        collision_mesh_path: str | None = None,
+        trans_xyz: tuple[float, float, float] = (0, 0, 0),
+        rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+        joint_type: str = "fixed",
+    ) -> None:
+        """Add a new link to the URDF tree and save.
+        Args:
+            output_path: Path to save the updated URDF.
+            instance_key: Name for the new link.
+            visual_mesh_path: Path to the visual mesh file.
+            collision_mesh_path: Optional path to collision mesh.
+            trans_xyz: Translation (x, y, z).
+            rot_rpy: Rotation (roll, pitch, yaw).
+            joint_type: Type of joint (e.g., "fixed").
+        """
+        if self._root is None:
+            return
+        logger.info(f"Updating URDF for instance '{instance_key}'.")
+        urdf_dir = os.path.dirname(self.urdf_path)
+        # Copy mesh files
+        copytree(
+            os.path.dirname(visual_mesh_path),
+            f"{urdf_dir}/{instance_key}",
+            dirs_exist_ok=True,
+        )
+        visual_rel_path = (
+            f"{instance_key}/{os.path.basename(visual_mesh_path)}"
+        )
+        collision_rel_path = None
+        if collision_mesh_path is not None:
+            copytree(
+                os.path.dirname(collision_mesh_path),
+                f"{urdf_dir}/{instance_key}",
+                dirs_exist_ok=True,
+            )
+            collision_rel_path = (
+                f"{instance_key}/{os.path.basename(collision_mesh_path)}"
+            )
+        # Create link element
+        link = ET.SubElement(self._root, "link", attrib={"name": instance_key})
+        visual = ET.SubElement(link, "visual")
+        v_geo = ET.SubElement(visual, "geometry")
+        ET.SubElement(v_geo, "mesh", attrib={"filename": visual_rel_path})
+        if collision_rel_path is not None:
+            collision = ET.SubElement(link, "collision")
+            c_geo = ET.SubElement(collision, "geometry")
+            ET.SubElement(
+                c_geo, "mesh", attrib={"filename": collision_rel_path}
+            )
+        # Create joint element
+        joint_name = f"joint_{instance_key}"
+        joint = ET.SubElement(
+            self._root,
+            "joint",
+            attrib={"name": joint_name, "type": joint_type},
+        )
+        ET.SubElement(joint, "parent", attrib={"link": "base"})
+        ET.SubElement(joint, "child", attrib={"link": instance_key})
+        xyz_str = f"{trans_xyz[0]:.4f} {trans_xyz[1]:.4f} {trans_xyz[2]:.4f}"
+        rpy_str = f"{rot_rpy[0]:.4f} {rot_rpy[1]:.4f} {rot_rpy[2]:.4f}"
+        ET.SubElement(joint, "origin", attrib={"xyz": xyz_str, "rpy": rpy_str})
+        self.save_urdf(output_path)
+    def update_usd_info(
+        self,
+        usd_path: str,
+        output_path: str,
+        instance_key: str,
+        visual_mesh_path: str,
+        trans_xyz: list[float],
+        rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
+    ) -> None:
+        """Add a mesh instance to an existing USD file.
+        Uses Blender (bpy) to convert OBJ to USD format.
+        Args:
+            usd_path: Path to the source USD file.
+            output_path: Path to save the modified USD.
+            instance_key: Prim path name for the new instance.
+            visual_mesh_path: Path to the visual mesh (OBJ format).
+            trans_xyz: Translation [x, y, z].
+            rot_rpy: Rotation (roll, pitch, yaw).
+        Raises:
+            ImportError: If pxr (USD) library or bpy is not available.
+        """
+        import bpy
+        from pxr import Gf, Usd, UsdGeom
+        prim_path = f"/{instance_key}"
+        out_dir = os.path.dirname(output_path)
+        target_dir = os.path.join(out_dir, instance_key)
+        os.makedirs(target_dir, exist_ok=True)
+        mesh_filename = os.path.basename(visual_mesh_path)
+        usdc_filename = os.path.splitext(mesh_filename)[0] + ".usdc"
+        target_usdc_path = os.path.join(target_dir, usdc_filename)
+        logger.info(
+            f"Converting with Blender (bpy): "
+            f"{visual_mesh_path} -> {target_usdc_path}"
+        )
+        bpy.ops.wm.read_factory_settings(use_empty=True)
+        bpy.ops.wm.obj_import(
+            filepath=visual_mesh_path,
+            forward_axis="Y",
+            up_axis="Z",
+        )
+        bpy.ops.wm.usd_export(
+            filepath=target_usdc_path,
+            selected_objects_only=False,
+        )
+        # Copy texture files
+        src_dir = os.path.dirname(visual_mesh_path)
+        for f in os.listdir(src_dir):
+            if f.lower().endswith((".png", ".jpg", ".jpeg", ".mtl")):
+                copy2(os.path.join(src_dir, f), target_dir)
+        final_rel_path = f"./{instance_key}/{usdc_filename}"
+        # Update USD stage
+        stage = Usd.Stage.Open(usd_path)
+        mesh_prim = UsdGeom.Xform.Define(stage, prim_path)
+        ref_prim = UsdGeom.Mesh.Define(stage, f"{prim_path}/Mesh")
+        ref_prim.GetPrim().GetReferences().AddReference(final_rel_path)
+        # Build transform matrix
+        translation_mat = Gf.Matrix4d().SetTranslate(
+            Gf.Vec3d(trans_xyz[0], trans_xyz[1], trans_xyz[2])
+        )
+        rx = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(1, 0, 0), np.degrees(rot_rpy[0]))
+        )
+        ry = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(0, 1, 0), np.degrees(rot_rpy[1]))
+        )
+        rz = Gf.Matrix4d().SetRotate(
+            Gf.Rotation(Gf.Vec3d(0, 0, 1), np.degrees(rot_rpy[2]))
+        )
+        rotation_mat = rx * ry * rz
+        transform = rotation_mat * translation_mat
+        mesh_prim.AddTransformOp().Set(transform)
+        stage.GetRootLayer().Export(output_path)
+        logger.info(f"✅ Saved updated USD to {output_path}")
+    def save_urdf(self, output_path: str) -> None:
+        """Save the current URDF tree to file.
+        Args:
+            output_path: Path to save the URDF file.
+        """
+        if self._tree is None:
+            return
+        if hasattr(ET, "indent"):
+            ET.indent(self._tree, space="  ", level=0)
+        self._tree.write(output_path, encoding="utf-8", xml_declaration=True)
+        logger.info(f"✅ Saved updated URDF to {output_path}")

embodied_gen/skills/spatial_computing/core/geometry.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import random
+from typing import Literal
+import numpy as np
+import trimesh
+from shapely.geometry import MultiPoint, MultiPolygon, Polygon
+logger = logging.getLogger(__name__)
+# Type aliases
+Geometry = Polygon | MultiPolygon
+# Constants
+DEFAULT_MESH_SAMPLE_NUM = 50000
+DEFAULT_MAX_PLACEMENT_ATTEMPTS = 2000
+def points_to_polygon(
+    points: np.ndarray,
+    smooth_thresh: float = 0.2,
+    scanline_step: float = 0.01,
+) -> Polygon:
+    """Convert point clouds into polygon contours using sweep line algorithm.
+    Args:
+        points: Array of 2D points with shape (N, 2).
+        smooth_thresh: Buffer threshold for smoothing the polygon.
+        scanline_step: Step size for the scanline sweep.
+    Returns:
+        A Shapely Polygon representing the contour of the point cloud.
+    """
+    if len(points) == 0:
+        return Polygon()
+    ys = points[:, 1]
+    y_min, y_max = ys.min(), ys.max()
+    y_values = np.arange(y_min, y_max + scanline_step, scanline_step)
+    upper: list[list[float]] = []
+    lower: list[list[float]] = []
+    for y in y_values:
+        pts_in_strip = points[(ys >= y) & (ys < y + scanline_step)]
+        if len(pts_in_strip) == 0:
+            continue
+        xs = pts_in_strip[:, 0]
+        upper.append([xs.max(), y])
+        lower.append([xs.min(), y])
+    contour = upper + lower[::-1]
+    if len(contour) < 3:
+        return Polygon()
+    poly = Polygon(contour)
+    return poly.buffer(smooth_thresh).buffer(-smooth_thresh)
+def get_actionable_surface(
+    mesh: trimesh.Trimesh,
+    tol_angle: int = 10,
+    tol_z: float = 0.02,
+    area_tolerance: float = 0.15,
+    place_strategy: Literal["top", "random"] = "random",
+) -> tuple[float, Geometry]:
+    """Extract the actionable (placeable) surface from a mesh.
+    Finds upward-facing surfaces and returns the best one based on the
+    placement strategy.
+    Args:
+        mesh: The input trimesh object.
+        tol_angle: Angle tolerance in degrees for detecting up-facing normals.
+        tol_z: Z-coordinate tolerance for clustering faces.
+        area_tolerance: Tolerance for selecting candidate surfaces by area.
+        place_strategy: Either "top" (highest surface) or "random".
+    Returns:
+        A tuple of (z_height, surface_polygon) representing the selected
+        actionable surface.
+    """
+    up_vec = np.array([0, 0, 1])
+    dots = np.dot(mesh.face_normals, up_vec)
+    valid_mask = dots > np.cos(np.deg2rad(tol_angle))
+    if not np.any(valid_mask):
+        logger.warning(
+            "No up-facing surfaces found. Falling back to bounding box top."
+        )
+        verts = mesh.vertices[:, :2]
+        return mesh.bounds[1][2], MultiPoint(verts).convex_hull
+    valid_faces_indices = np.where(valid_mask)[0]
+    face_z = mesh.triangles_center[valid_mask][:, 2]
+    face_areas = mesh.area_faces[valid_mask]
+    z_clusters = _cluster_faces_by_z(
+        face_z, face_areas, valid_faces_indices, tol_z
+    )
+    if not z_clusters:
+        return mesh.bounds[1][2], MultiPoint(mesh.vertices[:, :2]).convex_hull
+    selected_z, selected_data = _select_surface_cluster(
+        z_clusters, area_tolerance, place_strategy
+    )
+    cluster_faces = mesh.faces[selected_data["indices"]]
+    temp_mesh = trimesh.Trimesh(vertices=mesh.vertices, faces=cluster_faces)
+    samples, _ = trimesh.sample.sample_surface(temp_mesh, 10000)
+    if len(samples) < 3:
+        logger.warning(
+            f"Failed to sample enough points on layer Z={selected_z}. "
+            "Returning empty polygon."
+        )
+        return selected_z, Polygon()
+    surface_poly = MultiPoint(samples[:, :2]).convex_hull
+    return selected_z, surface_poly
+def _cluster_faces_by_z(
+    face_z: np.ndarray,
+    face_areas: np.ndarray,
+    face_indices: np.ndarray,
+    tol_z: float,
+) -> dict[float, dict]:
+    """Cluster mesh faces by their Z coordinate.
+    Args:
+        face_z: Z coordinates of face centers.
+        face_areas: Areas of each face.
+        face_indices: Original indices of the faces.
+        tol_z: Tolerance for Z clustering.
+    Returns:
+        Dictionary mapping Z values to cluster data (area and indices).
+    """
+    z_clusters: dict[float, dict] = {}
+    for i, z in enumerate(face_z):
+        key = round(z / tol_z) * tol_z
+        if key not in z_clusters:
+            z_clusters[key] = {"area": 0.0, "indices": []}
+        z_clusters[key]["area"] += face_areas[i]
+        z_clusters[key]["indices"].append(face_indices[i])
+    return z_clusters
+def _select_surface_cluster(
+    z_clusters: dict[float, dict],
+    area_tolerance: float,
+    place_strategy: Literal["top", "random"],
+) -> tuple[float, dict]:
+    """Select the best surface cluster based on strategy.
+    Args:
+        z_clusters: Dictionary of Z clusters with area and indices.
+        area_tolerance: Tolerance for candidate selection by area.
+        place_strategy: Either "top" or "random".
+    Returns:
+        Tuple of (selected_z, cluster_data).
+    """
+    max_area = max(c["area"] for c in z_clusters.values())
+    candidates = [
+        (z, data)
+        for z, data in z_clusters.items()
+        if data["area"] >= max_area * (1.0 - area_tolerance)
+    ]
+    if not candidates:
+        best_item = max(z_clusters.items(), key=lambda x: x[1]["area"])
+        candidates = [best_item]
+    if place_strategy == "random":
+        selected_z, selected_data = random.choice(candidates)
+        logger.info(
+            f"Strategy 'random': Selected Z={selected_z:.3f} "
+            f"(Area={selected_data['area']:.3f}) "
+            f"from {len(candidates)} candidates."
+        )
+    else:
+        candidates.sort(key=lambda x: x[0], reverse=True)
+        selected_z, selected_data = candidates[0]
+        logger.info(
+            f"Strategy 'top': Selected highest Z={selected_z:.3f} "
+            f"(Area={selected_data['area']:.3f})"
+        )
+    return selected_z, selected_data

embodied_gen/skills/spatial_computing/core/visualizer.py ADDED Viewed

	@@ -0,0 +1,212 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+from shapely.geometry import MultiPolygon, Polygon
+from shapely.ops import unary_union
+# Type aliases
+Geometry = Polygon | MultiPolygon
+logger = logging.getLogger(__name__)
+class FloorplanVisualizer:
+    """Static utility class for visualizing floorplans."""
+    @staticmethod
+    def draw_poly(ax: Axes, poly: Geometry, **kwargs) -> None:
+        """Draw a polygon or multi-polygon on matplotlib axes.
+        Args:
+            ax: Matplotlib axes object.
+            poly: Shapely Polygon or MultiPolygon to draw.
+            **kwargs: Additional arguments passed to ax.fill().
+        """
+        if poly.is_empty:
+            return
+        geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
+        color = kwargs.pop("color", None)
+        if color is None:
+            cmap = plt.get_cmap("tab10")
+            colors = [cmap(i) for i in range(len(geoms))]
+        else:
+            colors = [color] * len(geoms)
+        for i, p in enumerate(geoms):
+            if p.is_empty:
+                continue
+            x, y = p.exterior.xy
+            ax.fill(x, y, facecolor=colors[i], **kwargs)
+    @classmethod
+    def plot(
+        cls,
+        rooms: dict[str, Geometry],
+        footprints: dict[str, Geometry],
+        occ_area: Geometry,
+        save_path: str,
+    ) -> None:
+        """Generate and save a floorplan visualization.
+        Args:
+            rooms: Dictionary mapping room names to floor polygons.
+            footprints: Dictionary mapping object names to footprint polygons.
+            occ_area: Union of all occupied areas.
+            save_path: Path to save the output image.
+        """
+        fig, ax = plt.subplots(figsize=(10, 10))
+        ax.set_aspect("equal")
+        cmap_rooms = plt.get_cmap("Pastel1")
+        cls._draw_room_floors(ax, rooms, cmap_rooms)
+        cls._draw_occupied_area(ax, occ_area)
+        cls._draw_footprint_outlines(ax, footprints)
+        cls._draw_footprint_labels(ax, footprints)
+        cls._draw_room_labels(ax, rooms)
+        cls._configure_axes(ax, rooms, occ_area)
+        plt.tight_layout()
+        plt.savefig(save_path, dpi=300)
+        plt.close(fig)
+    @classmethod
+    def _draw_room_floors(
+        cls,
+        ax: Axes,
+        rooms: dict[str, Geometry],
+        cmap: plt.cm.ScalarMappable,
+    ) -> None:
+        """Draw colored room floor polygons (Layer 1)."""
+        for i, (name, poly) in enumerate(rooms.items()):
+            color = cmap(i % cmap.N)
+            cls.draw_poly(
+                ax,
+                poly,
+                color=color,
+                alpha=0.6,
+                edgecolor="black",
+                linestyle="--",
+                zorder=1,
+            )
+    @classmethod
+    def _draw_occupied_area(cls, ax: Axes, occ_area: Geometry) -> None:
+        """Draw the occupied area overlay (Layer 2)."""
+        cls.draw_poly(
+            ax,
+            occ_area,
+            color="tab:blue",
+            alpha=0.3,
+            lw=0,
+            zorder=2,
+        )
+    @staticmethod
+    def _draw_footprint_outlines(
+        ax: Axes,
+        footprints: dict[str, Geometry],
+    ) -> None:
+        """Draw footprint outlines (Layer 3)."""
+        for poly in footprints.values():
+            if poly.is_empty:
+                continue
+            geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
+            for p in geoms:
+                ax.plot(*p.exterior.xy, "--", lw=0.8, color="gray", zorder=3)
+    @staticmethod
+    def _draw_footprint_labels(
+        ax: Axes,
+        footprints: dict[str, Geometry],
+    ) -> None:
+        """Draw footprint text labels (Layer 4)."""
+        for name, poly in footprints.items():
+            if poly.is_empty:
+                continue
+            ax.text(
+                poly.centroid.x,
+                poly.centroid.y,
+                name,
+                fontsize=5,
+                ha="center",
+                va="center",
+                bbox={
+                    "facecolor": "white",
+                    "alpha": 0.5,
+                    "edgecolor": "none",
+                    "pad": 0.1,
+                },
+                zorder=4,
+            )
+    @staticmethod
+    def _draw_room_labels(ax: Axes, rooms: dict[str, Geometry]) -> None:
+        """Draw room text labels (Layer 5)."""
+        for name, poly in rooms.items():
+            if poly.is_empty:
+                continue
+            label = name.replace("_floor", "")
+            ax.text(
+                poly.centroid.x,
+                poly.centroid.y,
+                label,
+                fontsize=9,
+                color="black",
+                weight="bold",
+                ha="center",
+                va="center",
+                bbox={
+                    "facecolor": "lightgray",
+                    "alpha": 0.7,
+                    "edgecolor": "black",
+                    "boxstyle": "round,pad=0.3",
+                },
+                zorder=5,
+            )
+    @staticmethod
+    def _configure_axes(
+        ax: Axes,
+        rooms: dict[str, Geometry],
+        occ_area: Geometry,
+    ) -> None:
+        """Configure axes limits and labels."""
+        total_geom = unary_union(list(rooms.values()) + [occ_area])
+        if total_geom.is_empty:
+            minx, miny, maxx, maxy = -1, -1, 1, 1
+        else:
+            minx, miny, maxx, maxy = total_geom.bounds
+        margin_x = max((maxx - minx) * 0.05, 0.5)
+        margin_y = max((maxy - miny) * 0.05, 0.5)
+        ax.set_xlim(minx - margin_x, maxx + margin_x)
+        ax.set_ylim(miny - margin_y, maxy + margin_y)
+        ax.set_title("Floorplan Analysis", fontsize=14)
+        ax.set_xlabel("X (m)")
+        ax.set_ylabel("Y (m)")

embodied_gen/utils/gpt_clients.py CHANGED Viewed

@@ -17,6 +17,7 @@
 import base64
 import logging
 import os
 from io import BytesIO
 from typing import Optional
@@ -31,7 +32,6 @@ from tenacity import (
     stop_after_attempt,
     wait_random_exponential,
 )
-from embodied_gen.utils.process_media import combine_images_to_grid
 logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.basicConfig(level=logging.WARNING)
@@ -46,6 +46,38 @@ _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
 class GPTclient:
     """A client to interact with GPT models via OpenAI or Azure API.

 import base64
 import logging
+import math
 import os
 from io import BytesIO
 from typing import Optional
     stop_after_attempt,
     wait_random_exponential,
 )
 logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.basicConfig(level=logging.WARNING)
 CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
+def combine_images_to_grid(
+    images: list[str | Image.Image],
+    cat_row_col: tuple[int, int] = None,
+    target_wh: tuple[int, int] = (512, 512),
+    image_mode: str = "RGB",
+) -> list[Image.Image]:
+    n_images = len(images)
+    if n_images == 1:
+        return images
+    if cat_row_col is None:
+        n_col = math.ceil(math.sqrt(n_images))
+        n_row = math.ceil(n_images / n_col)
+    else:
+        n_row, n_col = cat_row_col
+    images = [
+        Image.open(p).convert(image_mode) if isinstance(p, str) else p
+        for p in images
+    ]
+    images = [img.resize(target_wh) for img in images]
+    grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
+    grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
+    for idx, img in enumerate(images):
+        row, col = divmod(idx, n_col)
+        grid.paste(img, (col * target_wh[0], row * target_wh[1]))
+    return [grid]
 class GPTclient:
     """A client to interact with GPT models via OpenAI or Azure API.

embodied_gen/utils/llm_resolve.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# Project EmbodiedGen
+#
+# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+from __future__ import annotations
+import logging
+from embodied_gen.utils.gpt_clients import GPTclient
+logger = logging.getLogger(__name__)
+DEFAULT_RESOLVE_PROMPT = """You are matching a user's description to exactly one object in a 3D scene.
+Scene instance list (each is an identifier, may contain IDs like "kitchen_cabinet_9197760", "banana_001"):
+{instance_list}
+User description (what they want to refer to, e.g. "黄色水果", "the yellow fruit", "柜子", "oven"):
+"{user_spec}"
+Rules:
+1. Pick the ONE instance from the list that best matches the user's description (semantic match: e.g. "黄色水果" -> banana, "柜子" -> cabinet).
+2. If no instance matches, reply with exactly: NONE
+3. Otherwise reply with the EXACT instance name from the list, nothing else (no quotes, no explanation).
+Your reply (one line, exact instance name or NONE):"""
+def resolve_instance_with_llm(
+    gpt_client: GPTclient,
+    instance_names: list[str],
+    user_spec: str,
+    prompt_template: str | None = None,
+) -> str | None:
+    """Map a user description to a single scene instance name via LLM semantic matching.
+    E.g. user says "yellow fruit" and the scene has "banana_001" -> returns "banana_001".
+    Returns None when there is no match or the LLM replies NONE; the caller should
+    prompt the user that the object does not exist and ask for re-entry.
+    Args:
+        gpt_client: GPT client instance, e.g. embodied_gen.utils.gpt_clients.GPT_CLIENT.
+        instance_names: List of scene instance names from FloorplanManager.get_instance_names().
+        user_spec: User input, e.g. "yellow fruit", "柜子", "the table".
+        prompt_template: Optional custom prompt; placeholders {instance_list} and {user_spec}.
+    Returns:
+        The matched instance name (exactly one of instance_names), or None if no match.
+    """
+    if not user_spec or not instance_names:
+        return None
+    template = prompt_template or DEFAULT_RESOLVE_PROMPT
+    instance_list_str = "\n".join(f"- {n}" for n in instance_names)
+    prompt = template.format(
+        instance_list=instance_list_str,
+        user_spec=(user_spec or "").strip(),
+    )
+    try:
+        response = gpt_client.query(text_prompt=prompt)
+    except Exception as e:
+        logger.warning("LLM `resolve_instance_with_llm` query failed: %s", e)
+        return None
+    if not response:
+        return None
+    first_line = response.strip().split("\n")[0].strip()
+    if first_line.upper() == "NONE":
+        return None
+    candidate = first_line.strip('"\'')
+    if not candidate:
+        return None
+    names_lower = {n.lower(): n for n in instance_names}
+    candidate_lower = candidate.lower()
+    if candidate in instance_names:
+        return candidate
+    if candidate_lower in names_lower:
+        return names_lower[candidate_lower]
+    matches = [n for n in instance_names if candidate_lower in n.lower()]
+    if len(matches) == 1:
+        return matches[0]
+    logger.debug(
+        "resolve_instance_with_llm: LLM reply %r did not match any of %s",
+        first_line,
+        instance_names[:5],
+    )
+    return None

embodied_gen/utils/process_media.py CHANGED Viewed

@@ -53,7 +53,7 @@ __all__ = [
 ]
-@spaces.GPU
 def render_asset3d(
     mesh_path: str,
     output_root: str,

 ]
+@spaces.GPU(duration=120)
 def render_asset3d(
     mesh_path: str,
     output_root: str,

embodied_gen/utils/trender.py CHANGED Viewed

@@ -43,7 +43,7 @@ __all__ = [
 ]
-@spaces.GPU
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +66,7 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     return rets
-@spaces.GPU
 def render_gs_frames(
     sample,
     extrinsics,
@@ -117,7 +117,7 @@ def render_gs_frames(
     return dict(outputs)
-@spaces.GPU
 def render_video(
     sample,
     resolution=512,
@@ -149,7 +149,7 @@ def render_video(
     return result
-@spaces.GPU
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

 ]
+@spaces.GPU(duration=120)
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
     return rets
+@spaces.GPU(duration=120)
 def render_gs_frames(
     sample,
     extrinsics,
     return dict(outputs)
+@spaces.GPU(duration=120)
 def render_video(
     sample,
     resolution=512,
     return result
+@spaces.GPU(duration=120)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

requirements.txt CHANGED Viewed

@@ -20,7 +20,7 @@ igraph==0.11.8
 pyvista==0.36.1
 openai==1.58.1
 transformers==4.42.4
-gradio
 sentencepiece==0.2.0
 diffusers==0.31.0
 xatlas==0.0.9

 pyvista==0.36.1
 openai==1.58.1
 transformers==4.42.4
+gradio[oauth,mcp]==5.33.1
 sentencepiece==0.2.0
 diffusers==0.31.0
 xatlas==0.0.9