xinjie.wang commited on
Commit
1d3e2bd
·
1 Parent(s): 9b8a93e
app_style.py CHANGED
@@ -20,7 +20,7 @@ from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
20
  lighting_css = """
21
  <style>
22
  #lighter_mesh canvas {
23
- filter: brightness(2.3) !important;
24
  }
25
  </style>
26
  """
 
20
  lighting_css = """
21
  <style>
22
  #lighter_mesh canvas {
23
+ filter: brightness(1) !important;
24
  }
25
  </style>
26
  """
common.py CHANGED
@@ -157,7 +157,7 @@ def end_session(req: gr.Request) -> None:
157
  shutil.rmtree(user_dir)
158
 
159
 
160
- @spaces.GPU
161
  def preprocess_image_fn(
162
  image: str | np.ndarray | Image.Image,
163
  rmbg_tag: str = "rembg",
@@ -264,7 +264,7 @@ def select_point(
264
  return (image, masks), seg_image
265
 
266
 
267
- @spaces.GPU
268
  def image_to_3d(
269
  image: Image.Image,
270
  seed: int,
@@ -564,7 +564,7 @@ def extract_urdf(
564
  )
565
 
566
 
567
- @spaces.GPU
568
  def text2image_fn(
569
  prompt: str,
570
  guidance_scale: float,
@@ -620,7 +620,7 @@ def text2image_fn(
620
  return save_paths + save_paths
621
 
622
 
623
- @spaces.GPU
624
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
625
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
626
 
@@ -636,7 +636,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
636
  return None, None, None
637
 
638
 
639
- @spaces.GPU
640
  def generate_texture_mvimages(
641
  prompt: str,
642
  controlnet_cond_scale: float = 0.55,
@@ -723,7 +723,7 @@ def backproject_texture(
723
  return output_glb_mesh, output_obj_mesh, zip_file
724
 
725
 
726
- @spaces.GPU
727
  def backproject_texture_v2(
728
  mesh_path: str,
729
  input_image: str,
@@ -770,7 +770,7 @@ def backproject_texture_v2(
770
  return output_glb_mesh, output_obj_mesh, zip_file
771
 
772
 
773
- @spaces.GPU
774
  def render_result_video(
775
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
776
  ) -> str:
 
157
  shutil.rmtree(user_dir)
158
 
159
 
160
+ @spaces.GPU(duration=120)
161
  def preprocess_image_fn(
162
  image: str | np.ndarray | Image.Image,
163
  rmbg_tag: str = "rembg",
 
264
  return (image, masks), seg_image
265
 
266
 
267
+ @spaces.GPU(duration=300)
268
  def image_to_3d(
269
  image: Image.Image,
270
  seed: int,
 
564
  )
565
 
566
 
567
+ @spaces.GPU(duration=300)
568
  def text2image_fn(
569
  prompt: str,
570
  guidance_scale: float,
 
620
  return save_paths + save_paths
621
 
622
 
623
+ @spaces.GPU(duration=120)
624
  def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
625
  output_root = os.path.join(TMP_DIR, str(req.session_hash))
626
 
 
636
  return None, None, None
637
 
638
 
639
+ @spaces.GPU(duration=300)
640
  def generate_texture_mvimages(
641
  prompt: str,
642
  controlnet_cond_scale: float = 0.55,
 
723
  return output_glb_mesh, output_obj_mesh, zip_file
724
 
725
 
726
+ @spaces.GPU(duration=300)
727
  def backproject_texture_v2(
728
  mesh_path: str,
729
  input_image: str,
 
770
  return output_glb_mesh, output_obj_mesh, zip_file
771
 
772
 
773
+ @spaces.GPU(duration=120)
774
  def render_result_video(
775
  mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
776
  ) -> str:
embodied_gen/data/backproject_v2.py CHANGED
@@ -596,7 +596,7 @@ class TextureBacker:
596
 
597
  return texture
598
 
599
- @spaces.GPU
600
  def compute_texture(
601
  self,
602
  colors: list[Image.Image],
 
596
 
597
  return texture
598
 
599
+ @spaces.GPU()
600
  def compute_texture(
601
  self,
602
  colors: list[Image.Image],
embodied_gen/data/backproject_v3.py CHANGED
@@ -425,7 +425,7 @@ def parse_args():
425
  return args
426
 
427
 
428
- @spaces.GPU
429
  def entrypoint(
430
  delight_model: DelightingModel = None,
431
  imagesr_model: ImageRealESRGAN = None,
 
425
  return args
426
 
427
 
428
+ @spaces.GPU()
429
  def entrypoint(
430
  delight_model: DelightingModel = None,
431
  imagesr_model: ImageRealESRGAN = None,
embodied_gen/data/mesh_operator.py CHANGED
@@ -412,7 +412,7 @@ class MeshFixer(object):
412
  dtype=torch.int32,
413
  )
414
 
415
- @spaces.GPU
416
  def __call__(
417
  self,
418
  filter_ratio: float,
 
412
  dtype=torch.int32,
413
  )
414
 
415
+ @spaces.GPU(duration=300)
416
  def __call__(
417
  self,
418
  filter_ratio: float,
embodied_gen/models/delight_model.py CHANGED
@@ -140,7 +140,7 @@ class DelightingModel(object):
140
 
141
  return new_image
142
 
143
- @spaces.GPU
144
  @torch.no_grad()
145
  def __call__(
146
  self,
 
140
 
141
  return new_image
142
 
143
+ @spaces.GPU(duration=120)
144
  @torch.no_grad()
145
  def __call__(
146
  self,
embodied_gen/models/sr_model.py CHANGED
@@ -80,7 +80,7 @@ class ImageStableSR:
80
  self.up_pipeline_x4.set_progress_bar_config(disable=True)
81
  # self.up_pipeline_x4.enable_model_cpu_offload()
82
 
83
- @spaces.GPU
84
  def __call__(
85
  self,
86
  image: Union[Image.Image, np.ndarray],
@@ -196,7 +196,7 @@ class ImageRealESRGAN:
196
  half=True,
197
  )
198
 
199
- @spaces.GPU
200
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
201
  """Performs super-resolution on the input image.
202
 
 
80
  self.up_pipeline_x4.set_progress_bar_config(disable=True)
81
  # self.up_pipeline_x4.enable_model_cpu_offload()
82
 
83
+ @spaces.GPU(duration=120)
84
  def __call__(
85
  self,
86
  image: Union[Image.Image, np.ndarray],
 
196
  half=True,
197
  )
198
 
199
+ @spaces.GPU(duration=120)
200
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
201
  """Performs super-resolution on the input image.
202
 
embodied_gen/scripts/render_gs.py CHANGED
@@ -96,7 +96,7 @@ def parse_args():
96
  return args
97
 
98
 
99
- @spaces.GPU
100
  def entrypoint(**kwargs) -> None:
101
  args = parse_args()
102
  for k, v in kwargs.items():
 
96
  return args
97
 
98
 
99
+ @spaces.GPU(duration=120)
100
  def entrypoint(**kwargs) -> None:
101
  args = parse_args()
102
  for k, v in kwargs.items():
embodied_gen/scripts/room_gen/export_scene.py CHANGED
@@ -428,7 +428,7 @@ def create_glass_shader(node_tree, export_usd):
428
 
429
  principled_bsdf_node.inputs["Transmission Weight"].default_value = 1
430
  if export_usd:
431
- principled_bsdf_node.inputs["Alpha"].default_value = 0
432
  node_tree.links.new(
433
  principled_bsdf_node.outputs[0], nodes["Material Output"].inputs[0]
434
  )
@@ -1221,7 +1221,7 @@ def export_curr_scene(
1221
  remove_obj_parents()
1222
  delete_objects()
1223
  triangulate_meshes()
1224
- if omniverse_export:
1225
  split_glass_mats()
1226
  rename_all_meshes()
1227
 
@@ -1454,9 +1454,6 @@ def main(args):
1454
  args.output_folder.mkdir(exist_ok=True)
1455
  targets = sorted(list(args.input_folder.iterdir()))
1456
  for blendfile in targets:
1457
- if blendfile.stem == "solve_state":
1458
- shutil.copy(blendfile, args.output_folder / "solve_state.json")
1459
-
1460
  if not blendfile.suffix == ".blend":
1461
  print(f"Skipping non-blend file {blendfile}")
1462
  continue
@@ -1474,10 +1471,6 @@ def main(args):
1474
  deconvex=args.deconvex,
1475
  center_scene=args.center_scene,
1476
  )
1477
- # wanted to use shutil here but kept making corrupted files
1478
- subprocess.call(
1479
- ["zip", "-r", str(folder.with_suffix(".zip")), str(folder)]
1480
- )
1481
 
1482
  bpy.ops.wm.quit_blender()
1483
 
 
428
 
429
  principled_bsdf_node.inputs["Transmission Weight"].default_value = 1
430
  if export_usd:
431
+ principled_bsdf_node.inputs["Alpha"].default_value = 0.6
432
  node_tree.links.new(
433
  principled_bsdf_node.outputs[0], nodes["Material Output"].inputs[0]
434
  )
 
1221
  remove_obj_parents()
1222
  delete_objects()
1223
  triangulate_meshes()
1224
+ if omniverse_export and format not in ["usda", "usdc"]:
1225
  split_glass_mats()
1226
  rename_all_meshes()
1227
 
 
1454
  args.output_folder.mkdir(exist_ok=True)
1455
  targets = sorted(list(args.input_folder.iterdir()))
1456
  for blendfile in targets:
 
 
 
1457
  if not blendfile.suffix == ".blend":
1458
  print(f"Skipping non-blend file {blendfile}")
1459
  continue
 
1471
  deconvex=args.deconvex,
1472
  center_scene=args.center_scene,
1473
  )
 
 
 
 
1474
 
1475
  bpy.ops.wm.quit_blender()
1476
 
embodied_gen/skills/spatial_computing/README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Using with IDE Agent via Natural Language
2
+
3
+ The Agent will automatically load this skill based on its **description** when you mention URDF, floorplan, indoor scene, object placement, etc. You only need to specify in natural language **what to do** and provide **key information like paths/room names**.
4
+
5
+ ### LLM Environment Configuration (When Using Semantic Matching)
6
+
7
+ If you want to use natural language descriptions (e.g., "put lamp on bookshelf") instead of exact instance/room names, you need to configure the LLM environment first:
8
+
9
+ ```bash
10
+ # If outputs/env.sh exists, source it first
11
+ source outputs/env.sh
12
+ ```
13
+
14
+ If access to the LLM interface is unavailable, please provide exact instance names (you can check them via `--list_instances`).
15
+
16
+ ### URDF Visualization Only (Generate Floorplan)
17
+
18
+ **You can say:**
19
+ - "Help me visualize `path_to/scene.urdf` or `path_to/folder_contain/scene.urdf`"
20
+
21
+ **Agent will:** Use `visualize_floorplan(urdf_path=..., output_path=...)` or the corresponding CLI to generate the floorplan only, without modifying URDF/USD.
22
+
23
+ ### Insert Object and Update Scene (URDF, or URDF+USD)
24
+
25
+ **You can say:**
26
+ - "Put `chair.obj` into scene.urdf's kitchen room"
27
+ - "Put `bottle.obj` into the URDF at `outputs/rooms/Kitchen_seed3773`, instance name bottle_1, update scene and generate floorplan"
28
+ - "Put a cup on the table in the living room" → Agent will use `on_instance="table"`, `place_strategy="top"`, etc.
29
+
30
+ **If you also want to update USD:**
31
+ - "Put a chair in the kitchen, update both URDF and USD, USD path is `xxx/usd/export_scene.usdc`"
32
+ - Note that you need to use **room-cli** to execute (this skill will prompt the Agent), because writing USD requires bpy.
33
+
34
+ **Agent will:** Use `FloorplanManager` + `insert_object` (or `insert_object_to_scene`), execute according to the paths and room names you provided; when USD is needed, use room-cli to run the CLI.
35
+
36
+ ### View Instances and Rooms in the Scene
37
+
38
+ Before placing objects, you can first view what instances and rooms are in the scene:
39
+
40
+ **You can say:**
41
+ - "Help me list all instances and room names in `.../scene.urdf`"
42
+
43
+ **Agent will:** Execute `--list_instances` to display the instance names and room names in the current scene.
44
+
45
+ ### URDF/USD Output Notes
46
+
47
+ - **URDF Output**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf`
48
+ - **USD Output**: If `usd_path` is specified, the USD file will be written to `*_updated.usdc` following the same rule
49
+ - **Only Update USD**: Requires using **room-cli** to execute, because writing USD needs Blender (bpy)
50
+
51
+ ### What Information to Provide
52
+
53
+ | Goal | Suggested Information to Provide in Conversation |
54
+ |------|-----------------------------------------------|
55
+ | Visualization only | URDF path, floorplan save path (optional, Agent can default to floorplan.png in same directory) |
56
+ | View instances/rooms | URDF path, let Agent list instance names and room names in current scene |
57
+ | Placement + update | URDF path, object mesh path (.obj), instance name (e.g., chair_1), room name (e.g., kitchen); if placing on table, say "place on table"; if updating USD, also provide USD path and use room-cli |
58
+
59
+ Example in one go: "Use spatial_computing skill, generate floorplan for `.../scene.urdf` and save to floorplan.png in same directory, then put `path/to/bottle.obj` into kitchen, instance name bottle_1, update URDF only."
embodied_gen/skills/spatial_computing/REFERENCE.md ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Floorplan Skill — API Reference
2
+
3
+ This document provides API details, configuration items, errors, and dependencies for reference beyond the usage instructions in [SKILL.md](SKILL.md).
4
+
5
+ ## Contents
6
+
7
+ - [Floorplan Skill — API Reference](#floorplan-skill--api-reference)
8
+ - [Contents](#contents)
9
+ - [LLM Environment Configuration](#llm-environment-configuration)
10
+ - [FloorplanManager](#floorplanmanager)
11
+ - [Constructor](#constructor)
12
+ - [Methods](#methods)
13
+ - [Convenience Functions](#convenience-functions)
14
+ - [CLI Features](#cli-features)
15
+ - [Command Line Parameters](#command-line-parameters)
16
+ - [Configuration and Ignore Items](#configuration-and-ignore-items)
17
+ - [USD and Blender](#usd-and-blender)
18
+ - [Errors and Return Values](#errors-and-return-values)
19
+ - [Dependencies](#dependencies)
20
+ - [Usage Recommendations](#usage-recommendations)
21
+
22
+ ---
23
+
24
+ ## LLM Environment Configuration
25
+
26
+ Before using `resolve_instance_with_llm` or `FloorplanManager.resolve_on_instance`/`resolve_in_room` for semantic matching, configure the LLM API and ensure access to the interface.
27
+
28
+ ```bash
29
+ # Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
30
+ source outputs/env.sh
31
+ ```
32
+
33
+ If access to the LLM interface is unavailable, prompt the user.
34
+
35
+ ---
36
+
37
+ ## FloorplanManager
38
+
39
+ ### Constructor
40
+
41
+ ```python
42
+ from embodied_gen.skills.spatial_computing.api import FloorplanManager
43
+
44
+ manager = FloorplanManager(
45
+ urdf_path="scene.urdf", # Required
46
+ usd_path=None, # Optional; USD write after insert if provided
47
+ mesh_sample_num=50000,
48
+ ignore_items=None, # Default ["ceiling", "light", "exterior"]
49
+ )
50
+ ```
51
+
52
+ ### Methods
53
+
54
+ | Method | Description |
55
+ |--------|-------------|
56
+ | `visualize(output_path)` | Generate floorplan and save as image |
57
+ | `insert_object(asset_path, instance_key, in_room=..., on_instance=..., place_strategy=..., n_max_attempt=2000, rotation_rpy=...)` | Place object, automatically write back to URDF/USD on success, return `[x,y,z]` or `None`. `on_instance` must be an exact instance name |
58
+ | `update_scene(urdf_output_path=..., usd_output_path=...)` | Manually write back currently placed instances; generally not needed (called inside `insert_object`). Use for custom output paths |
59
+ | `get_room_names()` | List of room names |
60
+ | `get_instance_names()` | List of instance names (excluding walls/floor) |
61
+ | `resolve_on_instance(on_instance, gpt_client=None)` | Resolve user description (e.g., "柜子", "书柜") to exact instance name; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
62
+ | `resolve_in_room(in_room, gpt_client=None)` | Resolve user description to exact room name; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
63
+ | `resolve_beside_instance(beside_instance, gpt_client=None)` | Resolve user description to exact instance name for beside placement; if already exact, return directly. With gpt_client, use LLM semantic matching, return `None` if no match |
64
+ | `get_occupied_area()` | Occupied area Shapely geometry |
65
+ | `get_floor_union()` | Floor area union geometry |
66
+
67
+ **Common `insert_object` parameters**: `in_room` to limit room; `on_instance` to place on top of an instance (exact instance name, can be resolved via `resolve_on_instance`); `beside_instance` to place beside an instance on the floor (exact instance name, can be resolved via `resolve_beside_instance`); `beside_distance` max distance in meters for beside placement (default 0.5); `place_strategy` is `"random"` (default) or `"top"`; `rotation_rpy` not required by default; `n_max_attempt` maximum placement attempts before failure. Note: `on_instance` and `beside_instance` are mutually exclusive.
68
+
69
+ ---
70
+
71
+ ## Convenience Functions
72
+
73
+ | Function | Description |
74
+ |----------|-------------|
75
+ | `visualize_floorplan(urdf_path, output_path, mesh_sample_num=50000, ignore_items=None)` | Generate floorplan only, do not write back to scene |
76
+ | `insert_object_to_scene(urdf_path, asset_path, instance_key, output_path, usd_path=None, in_room=None, on_instance=None, beside_instance=None, beside_distance=0.5, place_strategy="random", rotation_rpy=...)` | Create manager, place, automatically write back, generate floorplan; `on_instance` must be exact instance name; `beside_instance` places beside target on floor; returns placement center `[x,y,z]` or `None`. URDF output does not overwrite original file by default |
77
+ | `resolve_instance_with_llm(gpt_client, instance_names, user_spec, prompt_template=None)` | Use LLM to semantically match user description to one exact instance name in the scene; return `None` if no match, caller should prompt "does not exist, please re-enter". Depends on `embodied_gen.utils.gpt_clients.GPTclient` |
78
+
79
+ ## CLI Features
80
+
81
+ ### Command Line Parameters
82
+
83
+ | Parameter | Description |
84
+ |-----------|-------------|
85
+ | `--urdf_path` | Input URDF scene file path (required) |
86
+ | `--usd_path` | Optional USD scene file path, update USD simultaneously if specified |
87
+ | `--asset_path` | Placeholder object mesh file path (.obj) |
88
+ | `--instance_key` | Unique identifier for the new instance, default `inserted_object` |
89
+ | `--in_room` | Limit placement to specified room, supports semantic description (requires LLM environment) |
90
+ | `--on_instance` | Place on top of specified instance, supports semantic description (requires LLM environment) |
91
+ | `--beside_instance` | Place beside specified instance on the floor, supports semantic description (requires LLM environment) |
92
+ | `--beside_distance` | Max distance (meters) from target instance for beside placement, default 0.5 |
93
+ | `--place_strategy` | Placement strategy: `"random"` (default) or `"top"` (select highest surface) |
94
+ | `--rotation_rpy` | Initial rotation angle (roll, pitch, yaw radians) |
95
+ | `--output_path` | Floorplan output path |
96
+ | `--list_instances` | List instance names and room names in current scene, print and exit |
97
+ | `--max_placement_attempts` | Maximum placement attempts before failure, default 2000 |
98
+
99
+ ### CLI Usage Examples
100
+
101
+ View scene instance names and room names:
102
+ ```bash
103
+ python -m embodied_gen.skills.spatial_computing.cli.main \
104
+ --urdf_path .../scene.urdf --list_instances
105
+ ```
106
+
107
+ Visualize floorplan only:
108
+ ```bash
109
+ python -m embodied_gen.skills.spatial_computing.cli.main \
110
+ --urdf_path .../scene.urdf --output_path .../floorplan.png
111
+ ```
112
+
113
+ Put lamp on bookshelf (supports semantic description):
114
+ ```bash
115
+ source outputs/env.sh
116
+ python -m embodied_gen.skills.spatial_computing.cli.main \
117
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
118
+ --asset_path .../lamp.obj --instance_key lamp_on_bookcase \
119
+ --on_instance 书柜
120
+ ```
121
+
122
+ Put table in a room:
123
+ ```bash
124
+ python -m embodied_gen.skills.spatial_computing.cli.main \
125
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
126
+ --asset_path .../table.obj --instance_key table_1 \
127
+ --in_room living_room
128
+ ```
129
+
130
+ Place object on table in living room (room + on object):
131
+ ```bash
132
+ python -m embodied_gen.skills.spatial_computing.cli.main \
133
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
134
+ --asset_path .../apple.obj --instance_key apple_1 \
135
+ --in_room living_room --on_instance table --place_strategy top
136
+ ```
137
+
138
+ Place chair beside table (on floor, collision-free):
139
+ ```bash
140
+ source outputs/env.sh
141
+ python -m embodied_gen.skills.spatial_computing.cli.main \
142
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
143
+ --asset_path .../chair.obj --instance_key chair_beside_table \
144
+ --beside_instance 桌子
145
+ ```
146
+
147
+ Place beside with room constraint and custom distance:
148
+ ```bash
149
+ python -m embodied_gen.skills.spatial_computing.cli.main \
150
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
151
+ --asset_path .../chair.obj --instance_key chair_beside_table \
152
+ --in_room kitchen --beside_instance table --beside_distance 0.8
153
+ ```
154
+
155
+ **URDF Output Note**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf` unless the user specifies a custom output path.
156
+
157
+ ---
158
+
159
+ ## Configuration and Ignore Items
160
+
161
+ | Parameter | Default | Description |
162
+ |-----------|---------|-------------|
163
+ | `mesh_sample_num` | 50000 | Number of mesh sampling points, larger values yield more precise floor plan polygons |
164
+ | `ignore_items` | `["ceiling", "light", "exterior"]` | Link name patterns to skip during URDF parsing |
165
+
166
+ ---
167
+
168
+ ## USD and Blender
169
+
170
+ - Writing USD converts `.obj` to `.usdc`, requiring **Blender (bpy)**. For USD writing in this project, use the **room-cli** environment (bpy installed).
171
+ - Without `usd_path`, only URDF is updated, no bpy needed.
172
+ - Assets in `.usd`/`.usdc`/`.usda` format are directly referenced; only `.obj` files are converted via bpy. If `*_collision.obj` exists in the same directory as the visual mesh, it will be written to URDF for collision.
173
+
174
+ ---
175
+
176
+ ## Errors and Return Values
177
+
178
+ **Exceptions**
179
+
180
+ - **ValueError**: Room or instance name does not exist; `update_scene()` called before `insert_object()` or after failed insertion; `instance_key` already exists.
181
+
182
+ **Return Values**
183
+
184
+ - `insert_object` / `insert_object_to_scene`: Returns `[x, y, z]` on success, `None` on failure (e.g., no valid placement after `n_max_attempt` attempts).
185
+
186
+ ---
187
+
188
+ ## Dependencies
189
+
190
+ | Type | Package | Description |
191
+ |------|---------|-------------|
192
+ | Core | trimesh, shapely, matplotlib, numpy | Parsing and visualization |
193
+ | USD Writing | pxr (e.g., `pip install usd-core`), bpy | Required only when using `usd_path`; bpy requires Blender |
194
+ | LLM Semantic Matching | openai, project gpt_config | `resolve_instance_with_llm` requires `GPTclient` instance (see `embodied_gen.utils.gpt_clients`) and corresponding API configuration |
195
+ | CLI | tyro | Required only for CLI entry point |
196
+
197
+ ---
198
+
199
+ ## Usage Recommendations
200
+
201
+ - **Upright objects**: Default orientation applies, no need to set `rotation_rpy`; for special orientations, pass `(roll, pitch, yaw)` radians, e.g., upright `(1.57, 0, 0)`.
202
+ - **Placing on furniture**: First use `resolve_instance_with_llm(gpt_client, get_instance_names(), user_input)` to get the exact instance name, then `insert_object(..., on_instance=resolved, place_strategy="top")`; if matching fails, prompt user to re-enter. For random ground placement, use `place_strategy="random"` (default).
203
+ - **Placing beside furniture**: Use `insert_object(..., beside_instance=resolved, beside_distance=0.5)` to place the new object on the floor beside the target instance, collision-free. Increase `beside_distance` if placement fails (e.g., when the area around the target is crowded).
204
+ - **Collision meshes**: If `*_collision.obj` exists in the same directory as the visual mesh, it will automatically be used for the collision node in URDF.
embodied_gen/skills/spatial_computing/SKILL.md ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: spatial-computing-floorplan
3
+ description: Visualizes floorplans from URDF scene files and inserts 3D assets with collision-aware placement on surfaces. Supports semantic instance matching via LLM (e.g., "put lamp on bookshelf"). Use when working with URDF/USD indoor scenes, floorplan visualization, object placement, or room-level scene editing.
4
+ ---
5
+
6
+ # Floorplan & Object Placement
7
+
8
+ ## Overview
9
+
10
+ Parse indoor scenes from URDF, generate 2D floorplans, or place 3D objects in scenes and write back to URDF/USD. After successful insertion, the corresponding file is automatically updated based on whether `urdf_path`/`usd_path` is provided.
11
+
12
+ **When to use**: Use this skill when you need to generate floorplans from URDF, place objects on specified rooms/furniture surfaces, or batch update URDF/USD files.
13
+
14
+ ---
15
+
16
+ ## LLM Environment (Required for Semantic Matching)
17
+
18
+ Before using `resolve_instance_with_llm` for semantic matching in **Python**, configure the LLM API and ensure access to the interface. Prompt the user if access is unavailable.
19
+
20
+ ```bash
21
+ # Use the project-provided env (Azure + proxy, etc.), if outputs/env.sh exists:
22
+ source outputs/env.sh
23
+ ```
24
+
25
+ ---
26
+
27
+ ## Core Convention: Placement Requests Must Use This Skill's Interface
28
+
29
+ When users request "put A somewhere" or "visualize urdf", you **must** implement it using this skill's interface:
30
+
31
+ | User Request Example | Corresponding Parameter & Usage |
32
+ |---------------------|---------------------------------|
33
+ | **Put A on B** (e.g., "put lamp on bookshelf") | `on_instance` (instance name, obtained from `--list_instances`) |
34
+ | **Put A beside B** (e.g., "put chair beside table") | `beside_instance` (instance name, obtained from `--list_instances`); placed on floor near target |
35
+ | **Put A in a room** (e.g., "put table in living room") | `in_room` (room name, obtained from `--list_instances`) |
36
+ | **Put A beside B in a room** (e.g., "put chair beside table in kitchen") | `beside_instance` + `in_room` |
37
+ | **Put A on B in a room** (e.g., "put apple on table in living room") | Decomposed into "apple" and "living room" as `in_room` and `on_instance` |
38
+ | **Visualize scene.urdf** | `cli.main --urdf_path .../scene.urdf --output_path .../floorplan.png`; output_path defaults to same directory as urdf |
39
+
40
+ - When no match is found, prompt "The object/room does not exist, please re-enter" and provide the current scene object or room list.
41
+ - Instance names should not use the `<link name="...">` from URDF. **Recommended**: Run `--list_instances` before placement to view current instance name list, and select the closest semantic match as `--on_instance`.
42
+
43
+ ---
44
+
45
+ ## CLI Examples
46
+
47
+ > **Tip**: The URDF file is typically located at `<room_folder>/urdf/export_scene/scene.urdf` (e.g., `outputs/rooms/Kitchen_seed0/urdf/export_scene/scene.urdf`).
48
+
49
+ ### Example 1: View Instance Names and Room Names in Current Scene
50
+
51
+ ```bash
52
+ # View instance names and room names in current scene (to fill in --on_instance / --in_room)
53
+ python -m embodied_gen.skills.spatial_computing.cli.main \
54
+ --urdf_path .../scene.urdf --list_instances
55
+ ```
56
+
57
+ ### Example 2: Visualize Floorplan Only
58
+
59
+ ```bash
60
+ python -m embodied_gen.skills.spatial_computing.cli.main \
61
+ --urdf_path .../scene.urdf --output_path .../floorplan.png
62
+ ```
63
+
64
+ ### Example 3: Put Lamp on Bookshelf (Place on an Object)
65
+
66
+ `--on_instance` can be filled with the instance name returned by `--list_instances` or a semantic description.
67
+
68
+ ```bash
69
+ source outputs/env.sh
70
+ python -m embodied_gen.skills.spatial_computing.cli.main \
71
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
72
+ --asset_path .../lamp.obj --instance_key lamp_on_bookcase --on_instance 书柜
73
+ ```
74
+
75
+ ---
76
+
77
+ ### Example 4: Put Table in Living Room (Place in a Room)
78
+
79
+ ```bash
80
+ python -m embodied_gen.skills.spatial_computing.cli.main \
81
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
82
+ --asset_path .../table.obj --instance_key table_1 \
83
+ --in_room living_room
84
+ ```
85
+
86
+ ---
87
+
88
+ ### Example 5: Put Apple on Table in Living Room (Room + on Object)
89
+
90
+ ```bash
91
+ python -m embodied_gen.skills.spatial_computing.cli.main \
92
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
93
+ --asset_path .../apple.obj --instance_key apple_1 \
94
+ --in_room living_room --on_instance table --place_strategy top
95
+ ```
96
+
97
+ ---
98
+
99
+ ### Example 6: Put Chair Beside Table (Place Beside an Object)
100
+
101
+ `--beside_instance` places the new object on the floor near the specified instance, avoiding collision.
102
+
103
+ ```bash
104
+ python -m embodied_gen.skills.spatial_computing.cli.main \
105
+ --urdf_path .../scene.urdf --output_path .../floorplan.png \
106
+ --asset_path .../chair.obj --instance_key chair_beside_table \
107
+ --beside_instance table
108
+ ```
109
+
110
+ With room constraint and update usd:
111
+ ```bash
112
+ room-cli -m embodied_gen.skills.spatial_computing.cli.main \
113
+ --urdf_path .../scene.urdf --usd_path .../scene.usdc \
114
+ --output_path .../floorplan.png \
115
+ --asset_path .../chair.obj --instance_key chair_beside_table \
116
+ --in_room kitchen --beside_instance table
117
+ ```
118
+
119
+ ---
120
+
121
+ ## Python Examples
122
+
123
+ ### Generate Floorplan Only
124
+
125
+ Parse scene from URDF and export 2D floorplan without modifying scene files.
126
+
127
+ ```python
128
+ from embodied_gen.skills.spatial_computing.api import visualize_floorplan
129
+
130
+ visualize_floorplan(urdf_path="scene.urdf", output_path="floorplan.png")
131
+ ```
132
+
133
+ ### Place Object and Write Back to Scene
134
+
135
+ Complete in one call: create manager, place, write back URDF/USD, generate floorplan. Returns placement position `[x, y, z]` or `None`. `on_instance` must be an exact instance name (can be obtained via `resolve_instance_with_llm`).
136
+
137
+ **URDF output does not overwrite original file by default**: The updated URDF is written to `*_updated.urdf` by default (e.g., `scene.urdf` → `scene_updated.urdf`), and **will not overwrite** the original `scene.urdf` unless the user specifies overwrite.
138
+
139
+ ```python
140
+ from embodied_gen.skills.spatial_computing.api import insert_object_to_scene
141
+
142
+ # First get exact instance name via LLM semantic matching
143
+ resolved_instance = resolve_instance_with_llm("table", scene_instances)
144
+
145
+ position = insert_object_to_scene(
146
+ urdf_path="scene.urdf",
147
+ asset_path="chair.obj",
148
+ instance_key="chair_1",
149
+ output_path="floorplan.png",
150
+ in_room="living_room",
151
+ on_instance=resolved_instance,
152
+ usd_path="scene.usdc", # Optional; requires room-cli if specified
153
+ )
154
+
155
+ # Place beside an existing instance (on the floor, collision-free)
156
+ resolved_beside = resolve_instance_with_llm("table", scene_instances)
157
+
158
+ position = insert_object_to_scene(
159
+ urdf_path="scene.urdf",
160
+ asset_path="chair.obj",
161
+ instance_key="chair_beside_table",
162
+ output_path="floorplan.png",
163
+ beside_instance=resolved_beside,
164
+ beside_distance=0.5, # meters from target instance
165
+ )
166
+ ```
167
+
168
+ By default, `usd_path` is not specified, only URDF is updated. If `usd_path` is specified, you must use **room-cli** to execute (USD requires bpy environment) to update USD simultaneously.
169
+ - Write only URDF: `python -m embodied_gen.skills.spatial_computing.cli.main ...`
170
+ - Write USD: `room-cli -m embodied_gen.skills.spatial_computing.cli.main ... --usd_path <path>`
171
+
172
+ ### 6. Common Parameters
173
+
174
+ | Parameter | Meaning |
175
+ |-----------|---------|
176
+ | `in_room` | Limit placement to specified room |
177
+ | `on_instance` | Place on top of specified instance; must be **exact instance name** (obtained via `resolve_instance_with_llm`) |
178
+ | `beside_instance` | Place beside specified instance on the floor; must be **exact instance name** (obtained via `resolve_instance_with_llm`). Mutually exclusive with `on_instance` |
179
+ | `beside_distance` | Max distance (meters) from target instance for beside placement. Default `0.5`. Increase if placement fails |
180
+ | `place_strategy` | `"random"` random placement (default, e.g., bookshelf with 3 layers will randomly select one), `"top"` select highest surface |
181
+ | `rotation_rpy` | Not required by default; pass (roll, pitch, yaw) radians for special orientations |
182
+
183
+ ## Next Steps
184
+
185
+ - For complete API, configuration, errors, and dependencies, see [REFERENCE.md](REFERENCE.md).
embodied_gen/skills/spatial_computing/__init__.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from embodied_gen.skills.spatial_computing.core import (
19
+ UrdfSemanticInfoCollector,
20
+ get_actionable_surface,
21
+ points_to_polygon,
22
+ )
23
+ from embodied_gen.skills.spatial_computing.core.visualizer import (
24
+ FloorplanVisualizer,
25
+ )
26
+
27
+ __all__ = [
28
+ "FloorplanVisualizer",
29
+ "UrdfSemanticInfoCollector",
30
+ "points_to_polygon",
31
+ "get_actionable_surface",
32
+ ]
embodied_gen/skills/spatial_computing/api/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """High-level API for floorplan operations.
2
+
3
+ This module provides simplified interfaces for visualizing floorplans
4
+ and inserting objects into 3D indoor scenes.
5
+ """
6
+
7
+ from embodied_gen.skills.spatial_computing.api.floorplan_api import (
8
+ FloorplanManager,
9
+ insert_object_to_scene,
10
+ visualize_floorplan,
11
+ )
12
+ from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
13
+
14
+ __all__ = [
15
+ "FloorplanManager",
16
+ "visualize_floorplan",
17
+ "insert_object_to_scene",
18
+ "resolve_instance_with_llm",
19
+ ]
embodied_gen/skills/spatial_computing/api/floorplan_api.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import os
21
+ from dataclasses import dataclass, field
22
+ from typing import Literal
23
+
24
+ from shapely.geometry import MultiPolygon, Polygon
25
+ from embodied_gen.skills.spatial_computing.core import (
26
+ UrdfSemanticInfoCollector,
27
+ )
28
+ from embodied_gen.skills.spatial_computing.core.collector import (
29
+ DEFAULT_BESIDE_DISTANCE,
30
+ DEFAULT_IGNORE_ITEMS,
31
+ DEFAULT_MESH_SAMPLE_NUM,
32
+ DEFAULT_ROTATION_RPY,
33
+ )
34
+ from embodied_gen.skills.spatial_computing.core.visualizer import (
35
+ FloorplanVisualizer,
36
+ )
37
+ from embodied_gen.utils.llm_resolve import resolve_instance_with_llm
38
+
39
+ # Type aliases
40
+ Geometry = Polygon | MultiPolygon
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ @dataclass
45
+ class FloorplanConfig:
46
+ """Configuration for floorplan operations."""
47
+
48
+ urdf_path: str
49
+ """Path to the input URDF scene file."""
50
+
51
+ output_path: str | None = None
52
+ """Path to save the floorplan visualization image."""
53
+
54
+ usd_path: str | None = None
55
+ """Optional path to the USD scene file for USD export."""
56
+
57
+ asset_path: str | None = None
58
+ """Optional path to the asset mesh file (.obj)."""
59
+
60
+ instance_key: str = "inserted_object"
61
+ """Unique key for the added instance."""
62
+
63
+ in_room: str | None = None
64
+ """Optional room name to constrain asset placement."""
65
+
66
+ on_instance: str | None = None
67
+ """Optional instance name to place the asset on top of (exact key from get_instance_names())."""
68
+
69
+ beside_instance: str | None = None
70
+ """Optional instance name to place the asset beside (on floor, near the target)."""
71
+
72
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE
73
+ """Max distance (meters) from the target instance for beside placement."""
74
+
75
+ place_strategy: Literal["top", "random"] = "random"
76
+ """Placement strategy for the asset."""
77
+
78
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY
79
+ """Rotation in roll-pitch-yaw (radians)."""
80
+
81
+ ignore_items: list[str] = field(
82
+ default_factory=lambda: list(DEFAULT_IGNORE_ITEMS)
83
+ )
84
+ """List of item name patterns to ignore during parsing."""
85
+
86
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM
87
+ """Number of points to sample from meshes."""
88
+
89
+ max_placement_attempts: int = 2000
90
+ """Maximum attempts for asset placement."""
91
+
92
+ update_urdf: bool = True
93
+ """Whether to update and save the URDF file."""
94
+
95
+ update_usd: bool = True
96
+ """Whether to update and save the USD file."""
97
+
98
+ list_instances: bool = False
99
+ """If True, print instance and room names then exit (no placement/visualization)."""
100
+
101
+
102
+ class FloorplanManager:
103
+ """High-level API for floorplan operations.
104
+
105
+ This class provides simplified methods for:
106
+ - Loading and analyzing URDF scenes
107
+ - Visualizing floorplans
108
+ - Inserting objects into scenes
109
+ - Updating URDF and USD files
110
+
111
+ Example:
112
+ >>> manager = FloorplanManager(urdf_path="scene.urdf", usd_path="scene.usdc")
113
+ >>> manager.visualize(output_path="floorplan.png")
114
+ >>> position = manager.insert_object(
115
+ ... asset_path="chair.obj",
116
+ ... instance_key="chair_1",
117
+ ... in_room="kitchen"
118
+ ... )
119
+ # URDF/USD are updated automatically after insert
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ urdf_path: str,
125
+ usd_path: str | None = None,
126
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
127
+ ignore_items: list[str] | None = None,
128
+ ) -> None:
129
+ """Initialize the floorplan manager.
130
+
131
+ Args:
132
+ urdf_path: Path to the URDF file.
133
+ usd_path: Optional path to the USD file for scene updates.
134
+ mesh_sample_num: Number of points to sample from meshes.
135
+ ignore_items: List of item name patterns to ignore.
136
+
137
+ """
138
+ self.urdf_path = urdf_path
139
+ self.usd_path = usd_path
140
+ self.collector = UrdfSemanticInfoCollector(
141
+ mesh_sample_num=mesh_sample_num,
142
+ ignore_items=ignore_items,
143
+ )
144
+ self.collector.collect(urdf_path)
145
+ self.pending_instance_data: dict | None = None
146
+
147
+ def visualize(
148
+ self,
149
+ output_path: str,
150
+ ) -> None:
151
+ """Generate and save a floorplan visualization.
152
+
153
+ Args:
154
+ output_path: Path to save the output image.
155
+
156
+ """
157
+ FloorplanVisualizer.plot(
158
+ self.collector.rooms,
159
+ self.collector.footprints,
160
+ self.collector.occ_area,
161
+ output_path,
162
+ )
163
+ logger.info(f"✅ Floorplan visualization saved to {output_path}")
164
+
165
+ def insert_object(
166
+ self,
167
+ asset_path: str,
168
+ instance_key: str,
169
+ in_room: str | None = None,
170
+ on_instance: str | None = None,
171
+ beside_instance: str | None = None,
172
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
173
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
174
+ n_max_attempt: int = 2000,
175
+ place_strategy: Literal["top", "random"] = "random",
176
+ ) -> list[float] | None:
177
+ """Insert an object into the scene with automatic placement.
178
+
179
+ Args:
180
+ asset_path: Path to the asset mesh file (.obj).
181
+ instance_key: Unique key for the new instance.
182
+ in_room: Optional room name to constrain placement.
183
+ on_instance: Optional instance name to place on top of.
184
+ beside_instance: Optional instance name to place beside (on floor).
185
+ beside_distance: Max distance from target for beside placement.
186
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
187
+ n_max_attempt: Maximum placement attempts.
188
+ place_strategy: Either "top" or "random".
189
+
190
+ Returns:
191
+ List [x, y, z] of the placed instance center, or None if failed.
192
+
193
+ """
194
+ center = self.collector.add_instance(
195
+ asset_path=asset_path,
196
+ instance_key=instance_key,
197
+ in_room=in_room,
198
+ on_instance=on_instance,
199
+ beside_instance=beside_instance,
200
+ beside_distance=beside_distance,
201
+ rotation_rpy=rotation_rpy,
202
+ n_max_attempt=n_max_attempt,
203
+ place_strategy=place_strategy,
204
+ )
205
+
206
+ if center is not None:
207
+ self.pending_instance_data = {
208
+ "asset_path": asset_path,
209
+ "instance_key": instance_key,
210
+ "center": center,
211
+ "rotation_rpy": rotation_rpy,
212
+ }
213
+ self.update_scene()
214
+
215
+ return center
216
+
217
+ def update_scene(
218
+ self,
219
+ urdf_output_path: str | None = None,
220
+ usd_output_path: str | None = None,
221
+ ) -> None:
222
+ """Update URDF and/or USD with inserted instances.
223
+
224
+ Updates URDF if self.urdf_path is set, USD if self.usd_path is set.
225
+ Both are updated when both paths are set. No-op when no instance was inserted.
226
+
227
+ Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
228
+
229
+ Args:
230
+ urdf_output_path: Optional custom path for URDF output.
231
+ usd_output_path: Optional custom path for USD output.
232
+
233
+ Raises:
234
+ ValueError: If no instance has been inserted.
235
+
236
+ """
237
+ if self.pending_instance_data is None:
238
+ raise ValueError(
239
+ "No instance to update. Call insert_object() first."
240
+ )
241
+
242
+ data = self.pending_instance_data
243
+ collision_path = data["asset_path"].replace(".obj", "_collision.obj")
244
+ if not os.path.exists(collision_path):
245
+ collision_path = None
246
+
247
+ if self.urdf_path:
248
+ urdf_out = urdf_output_path or self.urdf_path.replace(
249
+ ".urdf", "_updated.urdf"
250
+ )
251
+ self.collector.update_urdf_info(
252
+ output_path=urdf_out,
253
+ instance_key=data["instance_key"],
254
+ visual_mesh_path=data["asset_path"],
255
+ collision_mesh_path=collision_path,
256
+ trans_xyz=tuple(data["center"]),
257
+ rot_rpy=data["rotation_rpy"],
258
+ joint_type="fixed",
259
+ )
260
+
261
+ if self.usd_path:
262
+ usd_out = usd_output_path or self.usd_path.replace(
263
+ ".usdc", "_updated.usdc"
264
+ )
265
+ self.collector.update_usd_info(
266
+ usd_path=self.usd_path,
267
+ output_path=usd_out,
268
+ instance_key=data["instance_key"],
269
+ visual_mesh_path=data["asset_path"],
270
+ trans_xyz=data["center"],
271
+ rot_rpy=data["rotation_rpy"],
272
+ )
273
+
274
+ def get_room_names(self) -> list[str]:
275
+ """Get list of room names in the scene.
276
+
277
+ Returns:
278
+ List of room names.
279
+
280
+ """
281
+ return list(self.collector.rooms.keys())
282
+
283
+ def get_instance_names(self) -> list[str]:
284
+ """Get list of instance names in the scene.
285
+
286
+ Returns:
287
+ List of instance names.
288
+
289
+ """
290
+ return [
291
+ k
292
+ for k in self.collector.instances.keys()
293
+ if k != "walls" and "floor" not in k.lower()
294
+ ]
295
+
296
+ def get_instance_names_in_room(self, in_room: str) -> list[str]:
297
+ """Get instance names that are spatially inside a given room.
298
+
299
+ Buffers the room polygon slightly to handle mesh-sampling precision.
300
+
301
+ Args:
302
+ in_room: Exact room key (must exist in get_room_names()).
303
+
304
+ Returns:
305
+ List of instance names within the room.
306
+
307
+ """
308
+ room_poly = self.collector.rooms.get(in_room)
309
+ if room_poly is None:
310
+ return self.get_instance_names()
311
+ room_buffered = room_poly.buffer(0.1)
312
+ all_names = self.get_instance_names()
313
+ return [
314
+ k
315
+ for k in all_names
316
+ if room_buffered.contains(
317
+ self.collector.instances[k].representative_point()
318
+ )
319
+ ]
320
+
321
+ def resolve_on_instance(
322
+ self,
323
+ on_instance: str,
324
+ gpt_client: object | None = None,
325
+ ) -> str | None:
326
+ r"""Resolve on_instance to an exact key (for placement).
327
+
328
+ If on_instance is already in get_instance_names(), return it.
329
+ Otherwise if gpt_client is provided, use LLM to resolve user description
330
+ (e.g. \"柜子\", \"书柜\") to one exact instance key.
331
+
332
+ Args:
333
+ on_instance: Exact instance key or semantic description.
334
+ gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
335
+
336
+ Returns:
337
+ Exact instance key, or None if not found / LLM returned NONE.
338
+ """
339
+ names = self.get_instance_names()
340
+ if on_instance in names:
341
+ return on_instance
342
+ if gpt_client is not None:
343
+ return resolve_instance_with_llm(
344
+ gpt_client, names, on_instance # type: ignore[arg-type]
345
+ )
346
+ return None
347
+
348
+ def resolve_in_room(
349
+ self,
350
+ in_room: str,
351
+ gpt_client: object | None = None,
352
+ ) -> str | None:
353
+ r"""Resolve in_room to an exact room name (for placement).
354
+
355
+ If in_room is already in get_room_names(), return it.
356
+ Otherwise if gpt_client is provided, use LLM to resolve user description
357
+ (e.g. \"kitchen\", \"the place for cooking\") to one exact room name.
358
+
359
+ Args:
360
+ in_room: Exact room name or semantic description.
361
+ gpt_client: Optional GPT client for semantic resolve (e.g. GPT_CLIENT).
362
+
363
+ Returns:
364
+ Exact room name, or None if not found / LLM returned NONE.
365
+ """
366
+ names = self.get_room_names()
367
+ if in_room in names:
368
+ return in_room
369
+ if gpt_client is not None:
370
+ return resolve_instance_with_llm(
371
+ gpt_client, names, in_room # type: ignore[arg-type]
372
+ )
373
+ return None
374
+
375
+ def resolve_beside_instance(
376
+ self,
377
+ beside_instance: str,
378
+ gpt_client: object | None = None,
379
+ in_room: str | None = None,
380
+ ) -> str | None:
381
+ r"""Resolve beside_instance to an exact key (for beside placement).
382
+
383
+ If beside_instance is already in get_instance_names(), return it.
384
+ Otherwise if gpt_client is provided, use LLM to resolve user description
385
+ (e.g. "桌子", "沙发") to one exact instance key.
386
+
387
+ When `in_room` is given, only instances spatially inside that room are
388
+ considered as candidates.
389
+
390
+ Args:
391
+ beside_instance: Exact instance key or semantic description.
392
+ gpt_client: Optional GPT client for semantic resolve.
393
+ in_room: Optional resolved room key to restrict candidate scope.
394
+
395
+ Returns:
396
+ Exact instance key, or None if not found / LLM returned NONE.
397
+ """
398
+ if in_room is not None:
399
+ names = self.get_instance_names_in_room(in_room)
400
+ else:
401
+ names = self.get_instance_names()
402
+ if beside_instance in names:
403
+ return beside_instance
404
+ if gpt_client is not None:
405
+ return resolve_instance_with_llm(
406
+ gpt_client, names, beside_instance # type: ignore[arg-type]
407
+ )
408
+ return None
409
+
410
+ def get_occupied_area(self) -> Geometry:
411
+ """Get the union of all occupied areas.
412
+
413
+ Returns:
414
+ Shapely geometry representing occupied areas.
415
+
416
+ """
417
+ return self.collector.occ_area
418
+
419
+ def get_floor_union(self) -> Geometry:
420
+ """Get the union of all floor areas.
421
+
422
+ Returns:
423
+ Shapely geometry representing floor areas.
424
+
425
+ """
426
+ return self.collector.floor_union
427
+
428
+
429
+ def visualize_floorplan(
430
+ urdf_path: str,
431
+ output_path: str,
432
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
433
+ ignore_items: list[str] | None = None,
434
+ ) -> None:
435
+ """Quick function to visualize a floorplan.
436
+
437
+ Args:
438
+ urdf_path: Path to the URDF file.
439
+ output_path: Path to save the output image.
440
+ mesh_sample_num: Number of points to sample from meshes.
441
+ ignore_items: List of item name patterns to ignore.
442
+
443
+ """
444
+ manager = FloorplanManager(
445
+ urdf_path=urdf_path,
446
+ mesh_sample_num=mesh_sample_num,
447
+ ignore_items=ignore_items,
448
+ )
449
+ manager.visualize(output_path=output_path)
450
+
451
+
452
+ def insert_object_to_scene(
453
+ urdf_path: str,
454
+ asset_path: str,
455
+ instance_key: str,
456
+ output_path: str,
457
+ usd_path: str | None = None,
458
+ in_room: str | None = None,
459
+ on_instance: str | None = None,
460
+ beside_instance: str | None = None,
461
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
462
+ place_strategy: Literal["top", "random"] = "random",
463
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
464
+ ) -> list[float] | None:
465
+ """Quick function to insert an object and generate floorplan.
466
+
467
+ Note: USD updates require Blender (bpy) to convert .obj to .usdc format.
468
+
469
+ Args:
470
+ urdf_path: Path to the URDF file.
471
+ asset_path: Path to the asset mesh file (.obj).
472
+ instance_key: Unique key for the new instance.
473
+ output_path: Path to save the floorplan image.
474
+ usd_path: Optional path to the USD file (requires Blender).
475
+ in_room: Optional room name to constrain placement.
476
+ on_instance: Optional instance name to place on top of.
477
+ beside_instance: Optional instance name to place beside (on floor).
478
+ beside_distance: Max distance for beside placement (meters).
479
+ place_strategy: Either "top" or "random".
480
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
481
+
482
+ Returns:
483
+ List [x, y, z] of the placed instance center, or None if failed.
484
+
485
+ """
486
+ manager = FloorplanManager(urdf_path=urdf_path, usd_path=usd_path)
487
+ center = manager.insert_object(
488
+ asset_path=asset_path,
489
+ instance_key=instance_key,
490
+ in_room=in_room,
491
+ on_instance=on_instance,
492
+ beside_instance=beside_instance,
493
+ beside_distance=beside_distance,
494
+ rotation_rpy=rotation_rpy,
495
+ place_strategy=place_strategy,
496
+ )
497
+ if center is not None:
498
+ manager.visualize(output_path=output_path)
499
+ return center
embodied_gen/skills/spatial_computing/cli/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """Command-line interface for floorplan operations.
2
+
3
+ This package provides CLI utilities for floorplan visualization and
4
+ scene manipulation.
5
+ """
6
+
7
+ __all__: list[str] = []
embodied_gen/skills/spatial_computing/cli/main.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import sys
21
+ import warnings
22
+
23
+ import tyro
24
+ from embodied_gen.skills.spatial_computing.api.floorplan_api import (
25
+ FloorplanConfig,
26
+ FloorplanManager,
27
+ )
28
+
29
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
30
+
31
+ logging.basicConfig(
32
+ format="%(asctime)s - %(levelname)s - %(message)s",
33
+ level=logging.INFO,
34
+ force=True,
35
+ )
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ def _get_gpt_client() -> object | None:
40
+ """Lazy-import GPT_CLIENT for semantic --on_instance resolution."""
41
+ try:
42
+ from embodied_gen.utils.gpt_clients import GPT_CLIENT
43
+
44
+ return GPT_CLIENT
45
+ except Exception:
46
+ return None
47
+
48
+
49
+ def entrypoint(cfg: FloorplanConfig) -> None:
50
+ """Main entry point for floorplan visualization and scene manipulation.
51
+
52
+ Args:
53
+ cfg: Configuration object with all parameters.
54
+
55
+ """
56
+ manager = FloorplanManager(
57
+ urdf_path=cfg.urdf_path,
58
+ usd_path=cfg.usd_path,
59
+ mesh_sample_num=cfg.mesh_sample_num,
60
+ ignore_items=cfg.ignore_items,
61
+ )
62
+
63
+ # List instances/rooms and exit if requested
64
+ if cfg.list_instances:
65
+ names = manager.get_instance_names()
66
+ rooms = manager.get_room_names()
67
+ logger.info("instance_names:", names)
68
+ logger.info("room_names:", rooms)
69
+ return
70
+
71
+ gpt_client = _get_gpt_client()
72
+ on_instance = cfg.on_instance
73
+ if on_instance is not None:
74
+ resolved = manager.resolve_on_instance(
75
+ on_instance, gpt_client=gpt_client
76
+ )
77
+ if resolved is None:
78
+ logger.error(
79
+ "No object matched \"%s\"。Current scene instance name: %s。",
80
+ on_instance,
81
+ manager.get_instance_names(),
82
+ )
83
+ sys.exit(1)
84
+ on_instance = resolved
85
+ if resolved != cfg.on_instance:
86
+ logger.info("\"%s\" -> \"%s\"", cfg.on_instance, resolved)
87
+
88
+ in_room = cfg.in_room
89
+ if in_room is not None:
90
+ resolved = manager.resolve_in_room(in_room, gpt_client=gpt_client)
91
+ if resolved is None:
92
+ logger.error(
93
+ "No room matched \"%s\"。Current scene room names: %s。",
94
+ in_room,
95
+ manager.get_room_names(),
96
+ )
97
+ sys.exit(1)
98
+ in_room = resolved
99
+ if resolved != cfg.in_room:
100
+ logger.info("\"%s\" -> \"%s\"", cfg.in_room, resolved)
101
+
102
+ beside_instance = cfg.beside_instance
103
+ if beside_instance is not None:
104
+ resolved = manager.resolve_beside_instance(
105
+ beside_instance, gpt_client=gpt_client, in_room=in_room
106
+ )
107
+ if resolved is None:
108
+ logger.error(
109
+ "No object matched \"%s\"。Current scene instance name: %s。",
110
+ beside_instance,
111
+ manager.get_instance_names(),
112
+ )
113
+ sys.exit(1)
114
+ beside_instance = resolved
115
+ if resolved != cfg.beside_instance:
116
+ logger.info("\"%s\" -> \"%s\"", cfg.beside_instance, resolved)
117
+
118
+ # Add asset instance if specified
119
+ center = None
120
+ if cfg.asset_path is not None:
121
+ center = manager.insert_object(
122
+ asset_path=cfg.asset_path,
123
+ instance_key=cfg.instance_key,
124
+ in_room=in_room,
125
+ on_instance=on_instance,
126
+ beside_instance=beside_instance,
127
+ beside_distance=cfg.beside_distance,
128
+ rotation_rpy=cfg.rotation_rpy,
129
+ n_max_attempt=cfg.max_placement_attempts,
130
+ place_strategy=cfg.place_strategy,
131
+ )
132
+
133
+ if center is not None:
134
+ logger.info(
135
+ f"Successfully placed '{cfg.instance_key}' at "
136
+ f"({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})"
137
+ )
138
+ else:
139
+ logger.error(
140
+ f"❌ Failed to place '{cfg.instance_key}' in the scene."
141
+ )
142
+ sys.exit(1)
143
+
144
+ # Generate floorplan visualization
145
+ if cfg.output_path is not None:
146
+ manager.visualize(output_path=cfg.output_path)
147
+
148
+
149
+ if __name__ == "__main__":
150
+ config = tyro.cli(FloorplanConfig)
151
+ entrypoint(config)
embodied_gen/skills/spatial_computing/core/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Floorplan skill core modules.
2
+
3
+ This package provides core functionality for floorplan visualization
4
+ and object placement in 3D indoor scenes.
5
+ """
6
+
7
+ from embodied_gen.skills.spatial_computing.core.collector import (
8
+ UrdfSemanticInfoCollector,
9
+ )
10
+ from embodied_gen.skills.spatial_computing.core.geometry import (
11
+ get_actionable_surface,
12
+ points_to_polygon,
13
+ )
14
+ from embodied_gen.skills.spatial_computing.core.visualizer import (
15
+ FloorplanVisualizer,
16
+ )
17
+
18
+ __all__ = [
19
+ "FloorplanVisualizer",
20
+ "UrdfSemanticInfoCollector",
21
+ "points_to_polygon",
22
+ "get_actionable_surface",
23
+ ]
embodied_gen/skills/spatial_computing/core/collector.py ADDED
@@ -0,0 +1,833 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import os
22
+ import random
23
+ import re
24
+ import xml.etree.ElementTree as ET
25
+ from shutil import copy2, copytree
26
+ from typing import Literal
27
+
28
+ import numpy as np
29
+ import trimesh
30
+ from scipy.spatial.transform import Rotation as R
31
+ from shapely.affinity import translate
32
+ from shapely.geometry import MultiPolygon, Polygon
33
+ from shapely.ops import unary_union
34
+ from embodied_gen.skills.spatial_computing.core.geometry import (
35
+ DEFAULT_MESH_SAMPLE_NUM,
36
+ get_actionable_surface,
37
+ points_to_polygon,
38
+ )
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # Type aliases
43
+ Geometry = Polygon | MultiPolygon
44
+
45
+ # Constants
46
+ DEFAULT_ROTATION_RPY = (1.57, 0.0, 0.0)
47
+ DEFAULT_MAX_PLACEMENT_ATTEMPTS = 3000
48
+ DEFAULT_IGNORE_ITEMS = ("ceiling", "light", "exterior")
49
+ DEFAULT_BESIDE_DISTANCE = 0.5
50
+
51
+
52
+ class UrdfSemanticInfoCollector:
53
+ """Collector for URDF semantic information.
54
+
55
+ Parses URDF files to extract room layouts, object footprints, and
56
+ provides methods for adding new instances and updating URDF/USD files.
57
+
58
+ Attributes:
59
+ mesh_sample_num: Number of points to sample from meshes.
60
+ ignore_items: List of item name patterns to ignore.
61
+ instances: Dictionary of instance name to footprint polygon.
62
+ instance_meta: Dictionary of instance metadata (mesh path, pose).
63
+ rooms: Dictionary of room polygons.
64
+ footprints: Dictionary of object footprints.
65
+ occ_area: Union of all occupied areas.
66
+ floor_union: Union of all floor polygons.
67
+
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ mesh_sample_num: int = DEFAULT_MESH_SAMPLE_NUM,
73
+ ignore_items: list[str] | None = None,
74
+ ) -> None:
75
+ """Initialize the collector.
76
+
77
+ Args:
78
+ mesh_sample_num: Number of points to sample from meshes.
79
+ ignore_items: List of item name patterns to ignore during parsing.
80
+
81
+ """
82
+ self.mesh_sample_num = mesh_sample_num
83
+ self.ignore_items = ignore_items or list(DEFAULT_IGNORE_ITEMS)
84
+
85
+ self.instances: dict[str, Polygon] = {}
86
+ self.instance_meta: dict[str, dict] = {}
87
+ self.rooms: dict[str, Geometry] = {}
88
+ self.footprints: dict[str, Geometry] = {}
89
+ self.occ_area: Geometry = Polygon()
90
+ self.floor_union: Geometry = Polygon()
91
+
92
+ self.urdf_path: str = ""
93
+ self._tree: ET.ElementTree | None = None
94
+ self._root: ET.Element | None = None
95
+
96
+ def _get_transform(
97
+ self,
98
+ joint_elem: ET.Element,
99
+ ) -> tuple[np.ndarray, np.ndarray]:
100
+ """Extract transform (xyz, rpy) from a joint element.
101
+
102
+ Args:
103
+ joint_elem: XML Element representing a URDF joint.
104
+
105
+ Returns:
106
+ Tuple of (xyz, rpy) arrays.
107
+
108
+ """
109
+ origin = joint_elem.find("origin")
110
+ if origin is not None:
111
+ xyz = np.fromstring(origin.attrib.get("xyz", "0 0 0"), sep=" ")
112
+ rpy = np.fromstring(origin.attrib.get("rpy", "0 0 0"), sep=" ")
113
+ else:
114
+ xyz, rpy = np.zeros(3), np.zeros(3)
115
+ return xyz, rpy
116
+
117
+ def _process_mesh_to_poly(
118
+ self,
119
+ mesh_path: str,
120
+ xyz: np.ndarray,
121
+ rpy: np.ndarray,
122
+ ) -> Polygon:
123
+ """Load mesh file and convert to 2D footprint polygon.
124
+
125
+ Args:
126
+ mesh_path: Path to the mesh file.
127
+ xyz: Translation vector.
128
+ rpy: Rotation in roll-pitch-yaw.
129
+
130
+ Returns:
131
+ Footprint polygon of the mesh.
132
+
133
+ """
134
+ if not os.path.exists(mesh_path):
135
+ return Polygon()
136
+
137
+ mesh = trimesh.load(mesh_path, force="mesh", skip_materials=True)
138
+
139
+ matrix = np.eye(4)
140
+ matrix[:3, :3] = R.from_euler("xyz", rpy).as_matrix()
141
+ matrix[:3, 3] = xyz
142
+ mesh.apply_transform(matrix)
143
+
144
+ verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
145
+ return points_to_polygon(verts)
146
+
147
+ def collect(self, urdf_path: str) -> None:
148
+ """Parse URDF file and collect semantic information.
149
+
150
+ Args:
151
+ urdf_path: Path to the URDF file.
152
+
153
+ """
154
+ logger.info(f"Collecting URDF semantic info from {urdf_path}")
155
+ self.urdf_path = urdf_path
156
+ urdf_dir = os.path.dirname(urdf_path)
157
+
158
+ self._tree = ET.parse(urdf_path)
159
+ self._root = self._tree.getroot()
160
+
161
+ link_transforms = self._build_link_transforms()
162
+ self._process_links(urdf_dir, link_transforms)
163
+ self._update_internal_state()
164
+
165
+ def _build_link_transforms(
166
+ self,
167
+ ) -> dict[str, tuple[np.ndarray, np.ndarray]]:
168
+ """Build mapping from link names to their transforms.
169
+
170
+ Returns:
171
+ Dictionary mapping link names to (xyz, rpy) tuples.
172
+
173
+ """
174
+ link_transforms: dict[str, tuple[np.ndarray, np.ndarray]] = {}
175
+
176
+ for joint in self._tree.findall("joint"):
177
+ child = joint.find("child")
178
+ if child is not None:
179
+ link_name = child.attrib["link"]
180
+ link_transforms[link_name] = self._get_transform(joint)
181
+
182
+ return link_transforms
183
+
184
+ def _process_links(
185
+ self,
186
+ urdf_dir: str,
187
+ link_transforms: dict[str, tuple[np.ndarray, np.ndarray]],
188
+ ) -> None:
189
+ """Process all links in the URDF tree.
190
+
191
+ Args:
192
+ urdf_dir: Directory containing the URDF file.
193
+ link_transforms: Dictionary of link transforms.
194
+
195
+ """
196
+ self.instances = {}
197
+ self.instance_meta = {}
198
+ wall_polys: list[Polygon] = []
199
+
200
+ logger.info("Processing URDF links to extract geometry...")
201
+ for link in self._tree.findall("link"):
202
+ name = link.attrib.get("name", "").lower()
203
+ if any(ign in name for ign in self.ignore_items):
204
+ continue
205
+
206
+ visual = link.find("visual")
207
+ if visual is None:
208
+ continue
209
+
210
+ mesh_node = visual.find("geometry/mesh")
211
+ if mesh_node is None:
212
+ continue
213
+
214
+ mesh_path = os.path.join(urdf_dir, mesh_node.attrib["filename"])
215
+ default_transform = (np.zeros(3), np.zeros(3))
216
+ xyz, rpy = link_transforms.get(
217
+ link.attrib["name"], default_transform
218
+ )
219
+
220
+ poly = self._process_mesh_to_poly(mesh_path, xyz, rpy)
221
+ if poly.is_empty:
222
+ continue
223
+
224
+ if "wall" in name:
225
+ wall_polys.append(poly)
226
+ else:
227
+ key = self._process_safe_key_robust(link.attrib["name"])
228
+ self.instances[key] = poly
229
+ self.instance_meta[key] = {
230
+ "mesh_path": mesh_path,
231
+ "xyz": xyz,
232
+ "rpy": rpy,
233
+ }
234
+
235
+ self.instances["walls"] = unary_union(wall_polys)
236
+
237
+ def _update_internal_state(self) -> None:
238
+ """Update derived state (rooms, footprints, occupied area)."""
239
+ self.rooms = {
240
+ k: v
241
+ for k, v in self.instances.items()
242
+ if "_floor" in k.lower() and not v.is_empty
243
+ }
244
+
245
+ self.footprints = {
246
+ k: v
247
+ for k, v in self.instances.items()
248
+ if k != "walls"
249
+ and "_floor" not in k.lower()
250
+ and "rug" not in k.lower()
251
+ and not v.is_empty
252
+ }
253
+ self.occ_area = unary_union(list(self.footprints.values()))
254
+ self.floor_union = unary_union(list(self.rooms.values()))
255
+
256
+ def _process_safe_key_robust(self, name: str) -> str:
257
+ """Convert a link name to a safe, normalized key.
258
+
259
+ Args:
260
+ name: Original link name.
261
+
262
+ Returns:
263
+ Normalized key string.
264
+
265
+ """
266
+ if name.endswith("_floor"):
267
+ parts = name.split("_")
268
+ return "_".join(parts[:-2] + ["floor"])
269
+
270
+ if "Factory" in name:
271
+ # Handle infinigen naming convention
272
+ prefix = name.split("Factory")[0]
273
+ suffix = f"_{name.split('_')[-1]}"
274
+ else:
275
+ prefix, suffix = name, ""
276
+
277
+ res = prefix.replace(" ", "_")
278
+ res = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", res)
279
+ res = res.lower()
280
+ res = re.sub(r"_+", "_", res).strip("_ ")
281
+
282
+ return f"{res}{suffix}"
283
+
284
+ def add_instance(
285
+ self,
286
+ asset_path: str,
287
+ instance_key: str,
288
+ in_room: str | None = None,
289
+ on_instance: str | None = None,
290
+ beside_instance: str | None = None,
291
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
292
+ rotation_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
293
+ n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
294
+ place_strategy: Literal["top", "random"] = "random",
295
+ ) -> list[float] | None:
296
+ """Add a new instance to the scene with automatic placement.
297
+
298
+ Args:
299
+ asset_path: Path to the asset mesh file.
300
+ instance_key: Unique key for the new instance.
301
+ in_room: Optional room name to constrain placement.
302
+ on_instance: Optional instance name to place on top of.
303
+ beside_instance: Optional instance name to place beside (on floor).
304
+ beside_distance: Initial buffer distance from the target instance
305
+ for beside placement (meters). Will auto-expand if needed.
306
+ rotation_rpy: Initial rotation in roll-pitch-yaw.
307
+ n_max_attempt: Maximum placement attempts.
308
+ place_strategy: Either "top" or "random".
309
+
310
+ Returns:
311
+ List [x, y, z] of the placed instance center, or None if failed.
312
+
313
+ Raises:
314
+ ValueError: If instance_key already exists or room/instance not found.
315
+
316
+ """
317
+ if instance_key in self.instances:
318
+ raise ValueError(f"Instance key '{instance_key}' already exists.")
319
+
320
+ room_poly = self._resolve_room_polygon(in_room)
321
+
322
+ # Load mesh and compute base polygon (needed for all placement modes)
323
+ mesh = trimesh.load(asset_path, force="mesh")
324
+ mesh.apply_transform(
325
+ trimesh.transformations.euler_matrix(*rotation_rpy, "sxyz")
326
+ )
327
+
328
+ verts = np.asarray(mesh.sample(self.mesh_sample_num))[:, :2]
329
+ base_poly = points_to_polygon(verts)
330
+ centroid = base_poly.centroid
331
+ base_poly = translate(base_poly, xoff=-centroid.x, yoff=-centroid.y)
332
+
333
+ if beside_instance is not None:
334
+ placement = self._try_place_beside(
335
+ base_poly=base_poly,
336
+ beside_instance=beside_instance,
337
+ room_poly=room_poly,
338
+ beside_distance=beside_distance,
339
+ n_max_attempt=n_max_attempt,
340
+ )
341
+ base_z = 0.0
342
+ else:
343
+ target_area, obstacles, base_z = self._resolve_placement_target(
344
+ on_instance, room_poly, place_strategy
345
+ )
346
+
347
+ if target_area.is_empty:
348
+ logger.error("Target area for placement is empty.")
349
+ return None
350
+
351
+ placement = self._try_place_polygon(
352
+ base_poly, target_area, obstacles, n_max_attempt
353
+ )
354
+
355
+ if placement is None:
356
+ logger.error(
357
+ f"Failed to place '{instance_key}' after all attempts."
358
+ )
359
+ return None
360
+
361
+ x, y, candidate = placement
362
+ self.instances[instance_key] = candidate
363
+ final_z = base_z + mesh.extents[2] / 2
364
+ self._update_internal_state()
365
+
366
+ return [round(v, 4) for v in (x, y, final_z)]
367
+
368
+ def _resolve_room_polygon(self, in_room: str | None) -> Geometry | None:
369
+ """Resolve room name to polygon.
370
+
371
+ Args:
372
+ in_room: Room name query string.
373
+
374
+ Returns:
375
+ Room polygon or None if not specified.
376
+
377
+ Raises:
378
+ ValueError: If room not found.
379
+
380
+ """
381
+ if in_room is None:
382
+ return None
383
+
384
+ query_room = in_room.lower()
385
+ room_matches = [
386
+ k for k in self.rooms.keys() if query_room in k.lower()
387
+ ]
388
+
389
+ if not room_matches:
390
+ raise ValueError(f"Room '{in_room}' not found.")
391
+
392
+ return unary_union([self.rooms[k] for k in room_matches])
393
+
394
+ def _try_place_beside(
395
+ self,
396
+ base_poly: Polygon,
397
+ beside_instance: str,
398
+ room_poly: Geometry | None,
399
+ beside_distance: float = DEFAULT_BESIDE_DISTANCE,
400
+ n_max_attempt: int = DEFAULT_MAX_PLACEMENT_ATTEMPTS,
401
+ max_expand_steps: int = 5,
402
+ expand_factor: float = 1.5,
403
+ ) -> tuple[float, float, Polygon] | None:
404
+ """Place object beside target with progressive distance expansion.
405
+
406
+ More robust than fixed-distance placement:
407
+ 1. Ensures minimum distance accommodates the new object's size.
408
+ 2. Pre-subtracts obstacles from the ring → sampling only in free area.
409
+ 3. Progressively expands distance on failure (up to max_expand_steps).
410
+ 4. Skips steps where the free area is too small for the object.
411
+
412
+ Args:
413
+ base_poly: Object footprint polygon centered at origin.
414
+ beside_instance: Target instance name to place beside.
415
+ room_poly: Optional room constraint polygon.
416
+ beside_distance: Initial buffer distance (meters).
417
+ n_max_attempt: Total max placement attempts across all steps.
418
+ max_expand_steps: Max number of distance expansion rounds.
419
+ expand_factor: Distance multiplier per expansion round.
420
+
421
+ Returns:
422
+ Tuple (x, y, placed_polygon) on success, or None if all failed.
423
+
424
+ Raises:
425
+ ValueError: If beside_instance not found in scene.
426
+
427
+ """
428
+ # --- Resolve target instance ---
429
+ query_obj = beside_instance.lower()
430
+ possible_matches = [
431
+ k
432
+ for k in self.instances.keys()
433
+ if query_obj in k.lower() and k != "walls"
434
+ ]
435
+
436
+ if room_poly is not None:
437
+ # Check that the object's representative point falls inside
438
+ # the room (buffered slightly for mesh-sampling tolerance).
439
+ room_buffered = room_poly.buffer(0.1)
440
+ possible_matches = [
441
+ k
442
+ for k in possible_matches
443
+ if room_buffered.contains(
444
+ self.instances[k].representative_point()
445
+ )
446
+ ]
447
+
448
+ if not possible_matches:
449
+ location_msg = " in specified room" if room_poly else ""
450
+ # Log candidate distances for easier debugging
451
+ all_matches = [
452
+ k
453
+ for k in self.instances.keys()
454
+ if query_obj in k.lower() and k != "walls"
455
+ ]
456
+ if all_matches and room_poly is not None:
457
+ dists = {
458
+ k: round(self.instances[k].distance(room_poly), 4)
459
+ for k in all_matches
460
+ }
461
+ logger.error("Candidate distances to room polygon: %s", dists)
462
+ raise ValueError(
463
+ f"No instance matching '{beside_instance}' "
464
+ f"found{location_msg}."
465
+ )
466
+
467
+ target_key = possible_matches[0]
468
+ if len(possible_matches) > 1:
469
+ logger.warning(
470
+ f"Multiple matches for '{beside_instance}': "
471
+ f"{possible_matches}. Using '{target_key}'."
472
+ )
473
+
474
+ target_footprint = self.instances[target_key]
475
+ floor = room_poly if room_poly is not None else self.floor_union
476
+
477
+ # --- Ensure initial distance accommodates the object's size ---
478
+ obj_bounds = base_poly.bounds # (minx, miny, maxx, maxy)
479
+ obj_half_diag = (
480
+ np.hypot(
481
+ obj_bounds[2] - obj_bounds[0],
482
+ obj_bounds[3] - obj_bounds[1],
483
+ )
484
+ / 2.0
485
+ )
486
+ current_distance = max(beside_distance, obj_half_diag * 1.5)
487
+
488
+ # Budget attempts across expansion steps
489
+ attempts_per_step = max(n_max_attempt // (max_expand_steps + 1), 500)
490
+ empty_obstacle = Polygon() # pre-created; obstacles are pre-subtracted
491
+
492
+ for step in range(max_expand_steps + 1):
493
+ # Build ring: buffer - footprint, intersected with floor
494
+ buffered = target_footprint.buffer(current_distance)
495
+ ring_area = buffered.difference(target_footprint)
496
+ ring_area = ring_area.intersection(floor)
497
+
498
+ if ring_area.is_empty:
499
+ logger.info(
500
+ f"[beside step {step}] Ring empty at "
501
+ f"{current_distance:.2f}m, expanding..."
502
+ )
503
+ current_distance *= expand_factor
504
+ continue
505
+
506
+ # Pre-subtract all obstacles → sample only from actual free area
507
+ free_area = ring_area.difference(self.occ_area)
508
+
509
+ if free_area.is_empty or free_area.area < base_poly.area * 0.5:
510
+ logger.info(
511
+ f"[beside step {step}] Free area too small at "
512
+ f"{current_distance:.2f}m "
513
+ f"(free={free_area.area:.4f}, "
514
+ f"need≈{base_poly.area:.4f}), expanding..."
515
+ )
516
+ current_distance *= expand_factor
517
+ continue
518
+
519
+ # Attempt placement in the free area (obstacles already removed)
520
+ placement = self._try_place_polygon(
521
+ base_poly, free_area, empty_obstacle, attempts_per_step
522
+ )
523
+
524
+ if placement is not None:
525
+ logger.info(
526
+ f"Placed beside '{target_key}' at distance "
527
+ f"{current_distance:.2f}m (step {step})"
528
+ )
529
+ return placement
530
+
531
+ logger.info(
532
+ f"[beside step {step}] Failed at {current_distance:.2f}m "
533
+ f"after {attempts_per_step} attempts, expanding..."
534
+ )
535
+ current_distance *= expand_factor
536
+
537
+ logger.error(
538
+ f"Failed to place beside '{target_key}' after "
539
+ f"{max_expand_steps + 1} expansion steps "
540
+ f"(final distance: {current_distance / expand_factor:.2f}m)."
541
+ )
542
+ return None
543
+
544
+ def _resolve_placement_target(
545
+ self,
546
+ on_instance: str | None,
547
+ room_poly: Geometry | None,
548
+ place_strategy: Literal["top", "random"],
549
+ ) -> tuple[Geometry, Geometry, float]:
550
+ """Resolve the target placement area and obstacles.
551
+
552
+ Args:
553
+ on_instance: Instance name to place on.
554
+ room_poly: Room polygon constraint.
555
+ place_strategy: Placement strategy.
556
+
557
+ Returns:
558
+ Tuple of (target_area, obstacles, base_z_height).
559
+
560
+ Raises:
561
+ ValueError: If on_instance not found.
562
+
563
+ """
564
+ if on_instance is None:
565
+ if room_poly is not None:
566
+ return room_poly, self.occ_area, 0.0
567
+ return self.floor_union, self.occ_area, 0.0
568
+
569
+ query_obj = on_instance.lower()
570
+ possible_matches = [
571
+ k
572
+ for k in self.instances.keys()
573
+ if query_obj in k.lower() and k != "walls"
574
+ ]
575
+
576
+ if room_poly is not None:
577
+ room_buffered = room_poly.buffer(0.1)
578
+ possible_matches = [
579
+ k
580
+ for k in possible_matches
581
+ if room_buffered.contains(
582
+ self.instances[k].representative_point()
583
+ )
584
+ ]
585
+
586
+ if not possible_matches:
587
+ location_msg = f" in room '{on_instance}'" if room_poly else ""
588
+ raise ValueError(
589
+ f"No instance matching '{on_instance}' found{location_msg}."
590
+ )
591
+
592
+ if place_strategy == "random":
593
+ target_parent_key = random.choice(possible_matches)
594
+ else:
595
+ target_parent_key = possible_matches[0]
596
+
597
+ if len(possible_matches) > 1:
598
+ logger.warning(
599
+ f"Multiple matches for '{on_instance}': {possible_matches}. "
600
+ f"Using '{target_parent_key}'."
601
+ )
602
+
603
+ meta = self.instance_meta[target_parent_key]
604
+ parent_mesh = trimesh.load(meta["mesh_path"], force="mesh")
605
+ matrix = np.eye(4)
606
+ matrix[:3, :3] = R.from_euler("xyz", meta["rpy"]).as_matrix()
607
+ matrix[:3, 3] = meta["xyz"]
608
+ parent_mesh.apply_transform(matrix)
609
+
610
+ best_z, surface_poly = get_actionable_surface(
611
+ parent_mesh, place_strategy=place_strategy
612
+ )
613
+ obstacles = self.occ_area.difference(self.instances[target_parent_key])
614
+
615
+ logger.info(f"Placing on '{target_parent_key}' (Z={best_z:.3f})")
616
+
617
+ return surface_poly, obstacles, best_z
618
+
619
+ def _try_place_polygon(
620
+ self,
621
+ base_poly: Polygon,
622
+ target_area: Geometry,
623
+ obstacles: Geometry,
624
+ n_max_attempt: int,
625
+ ) -> tuple[float, float, Polygon] | None:
626
+ """Try to place polygon in target area avoiding obstacles.
627
+
628
+ Args:
629
+ base_poly: Polygon to place (centered at origin).
630
+ target_area: Area where placement is allowed.
631
+ obstacles: Areas to avoid.
632
+ n_max_attempt: Maximum attempts.
633
+
634
+ Returns:
635
+ Tuple of (x, y, placed_polygon) or None if failed.
636
+
637
+ """
638
+ minx, miny, maxx, maxy = target_area.bounds
639
+
640
+ for _ in range(n_max_attempt):
641
+ x = np.random.uniform(minx, maxx)
642
+ y = np.random.uniform(miny, maxy)
643
+ candidate = translate(base_poly, xoff=x, yoff=y)
644
+
645
+ if target_area.contains(candidate) and not candidate.intersects(
646
+ obstacles
647
+ ):
648
+ return x, y, candidate
649
+
650
+ return None
651
+
652
+ def update_urdf_info(
653
+ self,
654
+ output_path: str,
655
+ instance_key: str,
656
+ visual_mesh_path: str,
657
+ collision_mesh_path: str | None = None,
658
+ trans_xyz: tuple[float, float, float] = (0, 0, 0),
659
+ rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
660
+ joint_type: str = "fixed",
661
+ ) -> None:
662
+ """Add a new link to the URDF tree and save.
663
+
664
+ Args:
665
+ output_path: Path to save the updated URDF.
666
+ instance_key: Name for the new link.
667
+ visual_mesh_path: Path to the visual mesh file.
668
+ collision_mesh_path: Optional path to collision mesh.
669
+ trans_xyz: Translation (x, y, z).
670
+ rot_rpy: Rotation (roll, pitch, yaw).
671
+ joint_type: Type of joint (e.g., "fixed").
672
+
673
+ """
674
+ if self._root is None:
675
+ return
676
+
677
+ logger.info(f"Updating URDF for instance '{instance_key}'.")
678
+ urdf_dir = os.path.dirname(self.urdf_path)
679
+
680
+ # Copy mesh files
681
+ copytree(
682
+ os.path.dirname(visual_mesh_path),
683
+ f"{urdf_dir}/{instance_key}",
684
+ dirs_exist_ok=True,
685
+ )
686
+ visual_rel_path = (
687
+ f"{instance_key}/{os.path.basename(visual_mesh_path)}"
688
+ )
689
+
690
+ collision_rel_path = None
691
+ if collision_mesh_path is not None:
692
+ copytree(
693
+ os.path.dirname(collision_mesh_path),
694
+ f"{urdf_dir}/{instance_key}",
695
+ dirs_exist_ok=True,
696
+ )
697
+ collision_rel_path = (
698
+ f"{instance_key}/{os.path.basename(collision_mesh_path)}"
699
+ )
700
+
701
+ # Create link element
702
+ link = ET.SubElement(self._root, "link", attrib={"name": instance_key})
703
+
704
+ visual = ET.SubElement(link, "visual")
705
+ v_geo = ET.SubElement(visual, "geometry")
706
+ ET.SubElement(v_geo, "mesh", attrib={"filename": visual_rel_path})
707
+
708
+ if collision_rel_path is not None:
709
+ collision = ET.SubElement(link, "collision")
710
+ c_geo = ET.SubElement(collision, "geometry")
711
+ ET.SubElement(
712
+ c_geo, "mesh", attrib={"filename": collision_rel_path}
713
+ )
714
+
715
+ # Create joint element
716
+ joint_name = f"joint_{instance_key}"
717
+ joint = ET.SubElement(
718
+ self._root,
719
+ "joint",
720
+ attrib={"name": joint_name, "type": joint_type},
721
+ )
722
+
723
+ ET.SubElement(joint, "parent", attrib={"link": "base"})
724
+ ET.SubElement(joint, "child", attrib={"link": instance_key})
725
+
726
+ xyz_str = f"{trans_xyz[0]:.4f} {trans_xyz[1]:.4f} {trans_xyz[2]:.4f}"
727
+ rpy_str = f"{rot_rpy[0]:.4f} {rot_rpy[1]:.4f} {rot_rpy[2]:.4f}"
728
+ ET.SubElement(joint, "origin", attrib={"xyz": xyz_str, "rpy": rpy_str})
729
+
730
+ self.save_urdf(output_path)
731
+
732
+ def update_usd_info(
733
+ self,
734
+ usd_path: str,
735
+ output_path: str,
736
+ instance_key: str,
737
+ visual_mesh_path: str,
738
+ trans_xyz: list[float],
739
+ rot_rpy: tuple[float, float, float] = DEFAULT_ROTATION_RPY,
740
+ ) -> None:
741
+ """Add a mesh instance to an existing USD file.
742
+
743
+ Uses Blender (bpy) to convert OBJ to USD format.
744
+
745
+ Args:
746
+ usd_path: Path to the source USD file.
747
+ output_path: Path to save the modified USD.
748
+ instance_key: Prim path name for the new instance.
749
+ visual_mesh_path: Path to the visual mesh (OBJ format).
750
+ trans_xyz: Translation [x, y, z].
751
+ rot_rpy: Rotation (roll, pitch, yaw).
752
+
753
+ Raises:
754
+ ImportError: If pxr (USD) library or bpy is not available.
755
+
756
+ """
757
+ import bpy
758
+ from pxr import Gf, Usd, UsdGeom
759
+
760
+ prim_path = f"/{instance_key}"
761
+ out_dir = os.path.dirname(output_path)
762
+ target_dir = os.path.join(out_dir, instance_key)
763
+ os.makedirs(target_dir, exist_ok=True)
764
+
765
+ mesh_filename = os.path.basename(visual_mesh_path)
766
+ usdc_filename = os.path.splitext(mesh_filename)[0] + ".usdc"
767
+ target_usdc_path = os.path.join(target_dir, usdc_filename)
768
+
769
+ logger.info(
770
+ f"Converting with Blender (bpy): "
771
+ f"{visual_mesh_path} -> {target_usdc_path}"
772
+ )
773
+ bpy.ops.wm.read_factory_settings(use_empty=True)
774
+ bpy.ops.wm.obj_import(
775
+ filepath=visual_mesh_path,
776
+ forward_axis="Y",
777
+ up_axis="Z",
778
+ )
779
+ bpy.ops.wm.usd_export(
780
+ filepath=target_usdc_path,
781
+ selected_objects_only=False,
782
+ )
783
+
784
+ # Copy texture files
785
+ src_dir = os.path.dirname(visual_mesh_path)
786
+ for f in os.listdir(src_dir):
787
+ if f.lower().endswith((".png", ".jpg", ".jpeg", ".mtl")):
788
+ copy2(os.path.join(src_dir, f), target_dir)
789
+
790
+ final_rel_path = f"./{instance_key}/{usdc_filename}"
791
+
792
+ # Update USD stage
793
+ stage = Usd.Stage.Open(usd_path)
794
+ mesh_prim = UsdGeom.Xform.Define(stage, prim_path)
795
+
796
+ ref_prim = UsdGeom.Mesh.Define(stage, f"{prim_path}/Mesh")
797
+ ref_prim.GetPrim().GetReferences().AddReference(final_rel_path)
798
+
799
+ # Build transform matrix
800
+ translation_mat = Gf.Matrix4d().SetTranslate(
801
+ Gf.Vec3d(trans_xyz[0], trans_xyz[1], trans_xyz[2])
802
+ )
803
+ rx = Gf.Matrix4d().SetRotate(
804
+ Gf.Rotation(Gf.Vec3d(1, 0, 0), np.degrees(rot_rpy[0]))
805
+ )
806
+ ry = Gf.Matrix4d().SetRotate(
807
+ Gf.Rotation(Gf.Vec3d(0, 1, 0), np.degrees(rot_rpy[1]))
808
+ )
809
+ rz = Gf.Matrix4d().SetRotate(
810
+ Gf.Rotation(Gf.Vec3d(0, 0, 1), np.degrees(rot_rpy[2]))
811
+ )
812
+ rotation_mat = rx * ry * rz
813
+ transform = rotation_mat * translation_mat
814
+ mesh_prim.AddTransformOp().Set(transform)
815
+
816
+ stage.GetRootLayer().Export(output_path)
817
+ logger.info(f"✅ Saved updated USD to {output_path}")
818
+
819
+ def save_urdf(self, output_path: str) -> None:
820
+ """Save the current URDF tree to file.
821
+
822
+ Args:
823
+ output_path: Path to save the URDF file.
824
+
825
+ """
826
+ if self._tree is None:
827
+ return
828
+
829
+ if hasattr(ET, "indent"):
830
+ ET.indent(self._tree, space=" ", level=0)
831
+
832
+ self._tree.write(output_path, encoding="utf-8", xml_declaration=True)
833
+ logger.info(f"✅ Saved updated URDF to {output_path}")
embodied_gen/skills/spatial_computing/core/geometry.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import random
22
+ from typing import Literal
23
+
24
+ import numpy as np
25
+ import trimesh
26
+ from shapely.geometry import MultiPoint, MultiPolygon, Polygon
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Type aliases
31
+ Geometry = Polygon | MultiPolygon
32
+
33
+ # Constants
34
+ DEFAULT_MESH_SAMPLE_NUM = 50000
35
+ DEFAULT_MAX_PLACEMENT_ATTEMPTS = 2000
36
+
37
+
38
+ def points_to_polygon(
39
+ points: np.ndarray,
40
+ smooth_thresh: float = 0.2,
41
+ scanline_step: float = 0.01,
42
+ ) -> Polygon:
43
+ """Convert point clouds into polygon contours using sweep line algorithm.
44
+
45
+ Args:
46
+ points: Array of 2D points with shape (N, 2).
47
+ smooth_thresh: Buffer threshold for smoothing the polygon.
48
+ scanline_step: Step size for the scanline sweep.
49
+
50
+ Returns:
51
+ A Shapely Polygon representing the contour of the point cloud.
52
+
53
+ """
54
+ if len(points) == 0:
55
+ return Polygon()
56
+
57
+ ys = points[:, 1]
58
+ y_min, y_max = ys.min(), ys.max()
59
+ y_values = np.arange(y_min, y_max + scanline_step, scanline_step)
60
+
61
+ upper: list[list[float]] = []
62
+ lower: list[list[float]] = []
63
+
64
+ for y in y_values:
65
+ pts_in_strip = points[(ys >= y) & (ys < y + scanline_step)]
66
+ if len(pts_in_strip) == 0:
67
+ continue
68
+
69
+ xs = pts_in_strip[:, 0]
70
+ upper.append([xs.max(), y])
71
+ lower.append([xs.min(), y])
72
+
73
+ contour = upper + lower[::-1]
74
+ if len(contour) < 3:
75
+ return Polygon()
76
+
77
+ poly = Polygon(contour)
78
+ return poly.buffer(smooth_thresh).buffer(-smooth_thresh)
79
+
80
+
81
+ def get_actionable_surface(
82
+ mesh: trimesh.Trimesh,
83
+ tol_angle: int = 10,
84
+ tol_z: float = 0.02,
85
+ area_tolerance: float = 0.15,
86
+ place_strategy: Literal["top", "random"] = "random",
87
+ ) -> tuple[float, Geometry]:
88
+ """Extract the actionable (placeable) surface from a mesh.
89
+
90
+ Finds upward-facing surfaces and returns the best one based on the
91
+ placement strategy.
92
+
93
+ Args:
94
+ mesh: The input trimesh object.
95
+ tol_angle: Angle tolerance in degrees for detecting up-facing normals.
96
+ tol_z: Z-coordinate tolerance for clustering faces.
97
+ area_tolerance: Tolerance for selecting candidate surfaces by area.
98
+ place_strategy: Either "top" (highest surface) or "random".
99
+
100
+ Returns:
101
+ A tuple of (z_height, surface_polygon) representing the selected
102
+ actionable surface.
103
+
104
+ """
105
+ up_vec = np.array([0, 0, 1])
106
+ dots = np.dot(mesh.face_normals, up_vec)
107
+ valid_mask = dots > np.cos(np.deg2rad(tol_angle))
108
+
109
+ if not np.any(valid_mask):
110
+ logger.warning(
111
+ "No up-facing surfaces found. Falling back to bounding box top."
112
+ )
113
+ verts = mesh.vertices[:, :2]
114
+ return mesh.bounds[1][2], MultiPoint(verts).convex_hull
115
+
116
+ valid_faces_indices = np.where(valid_mask)[0]
117
+ face_z = mesh.triangles_center[valid_mask][:, 2]
118
+ face_areas = mesh.area_faces[valid_mask]
119
+
120
+ z_clusters = _cluster_faces_by_z(
121
+ face_z, face_areas, valid_faces_indices, tol_z
122
+ )
123
+
124
+ if not z_clusters:
125
+ return mesh.bounds[1][2], MultiPoint(mesh.vertices[:, :2]).convex_hull
126
+
127
+ selected_z, selected_data = _select_surface_cluster(
128
+ z_clusters, area_tolerance, place_strategy
129
+ )
130
+
131
+ cluster_faces = mesh.faces[selected_data["indices"]]
132
+ temp_mesh = trimesh.Trimesh(vertices=mesh.vertices, faces=cluster_faces)
133
+ samples, _ = trimesh.sample.sample_surface(temp_mesh, 10000)
134
+
135
+ if len(samples) < 3:
136
+ logger.warning(
137
+ f"Failed to sample enough points on layer Z={selected_z}. "
138
+ "Returning empty polygon."
139
+ )
140
+ return selected_z, Polygon()
141
+
142
+ surface_poly = MultiPoint(samples[:, :2]).convex_hull
143
+ return selected_z, surface_poly
144
+
145
+
146
+ def _cluster_faces_by_z(
147
+ face_z: np.ndarray,
148
+ face_areas: np.ndarray,
149
+ face_indices: np.ndarray,
150
+ tol_z: float,
151
+ ) -> dict[float, dict]:
152
+ """Cluster mesh faces by their Z coordinate.
153
+
154
+ Args:
155
+ face_z: Z coordinates of face centers.
156
+ face_areas: Areas of each face.
157
+ face_indices: Original indices of the faces.
158
+ tol_z: Tolerance for Z clustering.
159
+
160
+ Returns:
161
+ Dictionary mapping Z values to cluster data (area and indices).
162
+
163
+ """
164
+ z_clusters: dict[float, dict] = {}
165
+
166
+ for i, z in enumerate(face_z):
167
+ key = round(z / tol_z) * tol_z
168
+
169
+ if key not in z_clusters:
170
+ z_clusters[key] = {"area": 0.0, "indices": []}
171
+
172
+ z_clusters[key]["area"] += face_areas[i]
173
+ z_clusters[key]["indices"].append(face_indices[i])
174
+
175
+ return z_clusters
176
+
177
+
178
+ def _select_surface_cluster(
179
+ z_clusters: dict[float, dict],
180
+ area_tolerance: float,
181
+ place_strategy: Literal["top", "random"],
182
+ ) -> tuple[float, dict]:
183
+ """Select the best surface cluster based on strategy.
184
+
185
+ Args:
186
+ z_clusters: Dictionary of Z clusters with area and indices.
187
+ area_tolerance: Tolerance for candidate selection by area.
188
+ place_strategy: Either "top" or "random".
189
+
190
+ Returns:
191
+ Tuple of (selected_z, cluster_data).
192
+
193
+ """
194
+ max_area = max(c["area"] for c in z_clusters.values())
195
+ candidates = [
196
+ (z, data)
197
+ for z, data in z_clusters.items()
198
+ if data["area"] >= max_area * (1.0 - area_tolerance)
199
+ ]
200
+
201
+ if not candidates:
202
+ best_item = max(z_clusters.items(), key=lambda x: x[1]["area"])
203
+ candidates = [best_item]
204
+
205
+ if place_strategy == "random":
206
+ selected_z, selected_data = random.choice(candidates)
207
+ logger.info(
208
+ f"Strategy 'random': Selected Z={selected_z:.3f} "
209
+ f"(Area={selected_data['area']:.3f}) "
210
+ f"from {len(candidates)} candidates."
211
+ )
212
+ else:
213
+ candidates.sort(key=lambda x: x[0], reverse=True)
214
+ selected_z, selected_data = candidates[0]
215
+ logger.info(
216
+ f"Strategy 'top': Selected highest Z={selected_z:.3f} "
217
+ f"(Area={selected_data['area']:.3f})"
218
+ )
219
+
220
+ return selected_z, selected_data
embodied_gen/skills/spatial_computing/core/visualizer.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+
22
+ import matplotlib.pyplot as plt
23
+ from matplotlib.axes import Axes
24
+ from shapely.geometry import MultiPolygon, Polygon
25
+ from shapely.ops import unary_union
26
+
27
+ # Type aliases
28
+ Geometry = Polygon | MultiPolygon
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class FloorplanVisualizer:
34
+ """Static utility class for visualizing floorplans."""
35
+
36
+ @staticmethod
37
+ def draw_poly(ax: Axes, poly: Geometry, **kwargs) -> None:
38
+ """Draw a polygon or multi-polygon on matplotlib axes.
39
+
40
+ Args:
41
+ ax: Matplotlib axes object.
42
+ poly: Shapely Polygon or MultiPolygon to draw.
43
+ **kwargs: Additional arguments passed to ax.fill().
44
+
45
+ """
46
+ if poly.is_empty:
47
+ return
48
+
49
+ geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
50
+
51
+ color = kwargs.pop("color", None)
52
+ if color is None:
53
+ cmap = plt.get_cmap("tab10")
54
+ colors = [cmap(i) for i in range(len(geoms))]
55
+ else:
56
+ colors = [color] * len(geoms)
57
+
58
+ for i, p in enumerate(geoms):
59
+ if p.is_empty:
60
+ continue
61
+ x, y = p.exterior.xy
62
+ ax.fill(x, y, facecolor=colors[i], **kwargs)
63
+
64
+ @classmethod
65
+ def plot(
66
+ cls,
67
+ rooms: dict[str, Geometry],
68
+ footprints: dict[str, Geometry],
69
+ occ_area: Geometry,
70
+ save_path: str,
71
+ ) -> None:
72
+ """Generate and save a floorplan visualization.
73
+
74
+ Args:
75
+ rooms: Dictionary mapping room names to floor polygons.
76
+ footprints: Dictionary mapping object names to footprint polygons.
77
+ occ_area: Union of all occupied areas.
78
+ save_path: Path to save the output image.
79
+
80
+ """
81
+ fig, ax = plt.subplots(figsize=(10, 10))
82
+ ax.set_aspect("equal")
83
+ cmap_rooms = plt.get_cmap("Pastel1")
84
+
85
+ cls._draw_room_floors(ax, rooms, cmap_rooms)
86
+ cls._draw_occupied_area(ax, occ_area)
87
+ cls._draw_footprint_outlines(ax, footprints)
88
+ cls._draw_footprint_labels(ax, footprints)
89
+ cls._draw_room_labels(ax, rooms)
90
+ cls._configure_axes(ax, rooms, occ_area)
91
+
92
+ plt.tight_layout()
93
+ plt.savefig(save_path, dpi=300)
94
+ plt.close(fig)
95
+
96
+ @classmethod
97
+ def _draw_room_floors(
98
+ cls,
99
+ ax: Axes,
100
+ rooms: dict[str, Geometry],
101
+ cmap: plt.cm.ScalarMappable,
102
+ ) -> None:
103
+ """Draw colored room floor polygons (Layer 1)."""
104
+ for i, (name, poly) in enumerate(rooms.items()):
105
+ color = cmap(i % cmap.N)
106
+ cls.draw_poly(
107
+ ax,
108
+ poly,
109
+ color=color,
110
+ alpha=0.6,
111
+ edgecolor="black",
112
+ linestyle="--",
113
+ zorder=1,
114
+ )
115
+
116
+ @classmethod
117
+ def _draw_occupied_area(cls, ax: Axes, occ_area: Geometry) -> None:
118
+ """Draw the occupied area overlay (Layer 2)."""
119
+ cls.draw_poly(
120
+ ax,
121
+ occ_area,
122
+ color="tab:blue",
123
+ alpha=0.3,
124
+ lw=0,
125
+ zorder=2,
126
+ )
127
+
128
+ @staticmethod
129
+ def _draw_footprint_outlines(
130
+ ax: Axes,
131
+ footprints: dict[str, Geometry],
132
+ ) -> None:
133
+ """Draw footprint outlines (Layer 3)."""
134
+ for poly in footprints.values():
135
+ if poly.is_empty:
136
+ continue
137
+ geoms = poly.geoms if hasattr(poly, "geoms") else [poly]
138
+ for p in geoms:
139
+ ax.plot(*p.exterior.xy, "--", lw=0.8, color="gray", zorder=3)
140
+
141
+ @staticmethod
142
+ def _draw_footprint_labels(
143
+ ax: Axes,
144
+ footprints: dict[str, Geometry],
145
+ ) -> None:
146
+ """Draw footprint text labels (Layer 4)."""
147
+ for name, poly in footprints.items():
148
+ if poly.is_empty:
149
+ continue
150
+ ax.text(
151
+ poly.centroid.x,
152
+ poly.centroid.y,
153
+ name,
154
+ fontsize=5,
155
+ ha="center",
156
+ va="center",
157
+ bbox={
158
+ "facecolor": "white",
159
+ "alpha": 0.5,
160
+ "edgecolor": "none",
161
+ "pad": 0.1,
162
+ },
163
+ zorder=4,
164
+ )
165
+
166
+ @staticmethod
167
+ def _draw_room_labels(ax: Axes, rooms: dict[str, Geometry]) -> None:
168
+ """Draw room text labels (Layer 5)."""
169
+ for name, poly in rooms.items():
170
+ if poly.is_empty:
171
+ continue
172
+ label = name.replace("_floor", "")
173
+ ax.text(
174
+ poly.centroid.x,
175
+ poly.centroid.y,
176
+ label,
177
+ fontsize=9,
178
+ color="black",
179
+ weight="bold",
180
+ ha="center",
181
+ va="center",
182
+ bbox={
183
+ "facecolor": "lightgray",
184
+ "alpha": 0.7,
185
+ "edgecolor": "black",
186
+ "boxstyle": "round,pad=0.3",
187
+ },
188
+ zorder=5,
189
+ )
190
+
191
+ @staticmethod
192
+ def _configure_axes(
193
+ ax: Axes,
194
+ rooms: dict[str, Geometry],
195
+ occ_area: Geometry,
196
+ ) -> None:
197
+ """Configure axes limits and labels."""
198
+ total_geom = unary_union(list(rooms.values()) + [occ_area])
199
+
200
+ if total_geom.is_empty:
201
+ minx, miny, maxx, maxy = -1, -1, 1, 1
202
+ else:
203
+ minx, miny, maxx, maxy = total_geom.bounds
204
+
205
+ margin_x = max((maxx - minx) * 0.05, 0.5)
206
+ margin_y = max((maxy - miny) * 0.05, 0.5)
207
+
208
+ ax.set_xlim(minx - margin_x, maxx + margin_x)
209
+ ax.set_ylim(miny - margin_y, maxy + margin_y)
210
+ ax.set_title("Floorplan Analysis", fontsize=14)
211
+ ax.set_xlabel("X (m)")
212
+ ax.set_ylabel("Y (m)")
embodied_gen/utils/gpt_clients.py CHANGED
@@ -17,6 +17,7 @@
17
 
18
  import base64
19
  import logging
 
20
  import os
21
  from io import BytesIO
22
  from typing import Optional
@@ -31,7 +32,6 @@ from tenacity import (
31
  stop_after_attempt,
32
  wait_random_exponential,
33
  )
34
- from embodied_gen.utils.process_media import combine_images_to_grid
35
 
36
  logging.getLogger("httpx").setLevel(logging.WARNING)
37
  logging.basicConfig(level=logging.WARNING)
@@ -46,6 +46,38 @@ _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
46
  CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class GPTclient:
50
  """A client to interact with GPT models via OpenAI or Azure API.
51
 
 
17
 
18
  import base64
19
  import logging
20
+ import math
21
  import os
22
  from io import BytesIO
23
  from typing import Optional
 
32
  stop_after_attempt,
33
  wait_random_exponential,
34
  )
 
35
 
36
  logging.getLogger("httpx").setLevel(logging.WARNING)
37
  logging.basicConfig(level=logging.WARNING)
 
46
  CONFIG_FILE = os.path.join(_CURRENT_DIR, "gpt_config.yaml")
47
 
48
 
49
+ def combine_images_to_grid(
50
+ images: list[str | Image.Image],
51
+ cat_row_col: tuple[int, int] = None,
52
+ target_wh: tuple[int, int] = (512, 512),
53
+ image_mode: str = "RGB",
54
+ ) -> list[Image.Image]:
55
+ n_images = len(images)
56
+ if n_images == 1:
57
+ return images
58
+
59
+ if cat_row_col is None:
60
+ n_col = math.ceil(math.sqrt(n_images))
61
+ n_row = math.ceil(n_images / n_col)
62
+ else:
63
+ n_row, n_col = cat_row_col
64
+
65
+ images = [
66
+ Image.open(p).convert(image_mode) if isinstance(p, str) else p
67
+ for p in images
68
+ ]
69
+ images = [img.resize(target_wh) for img in images]
70
+
71
+ grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
72
+ grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
73
+
74
+ for idx, img in enumerate(images):
75
+ row, col = divmod(idx, n_col)
76
+ grid.paste(img, (col * target_wh[0], row * target_wh[1]))
77
+
78
+ return [grid]
79
+
80
+
81
  class GPTclient:
82
  """A client to interact with GPT models via OpenAI or Azure API.
83
 
embodied_gen/utils/llm_resolve.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project EmbodiedGen
2
+ #
3
+ # Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14
+ # implied. See the License for the specific language governing
15
+ # permissions and limitations under the License.
16
+
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+
22
+ from embodied_gen.utils.gpt_clients import GPTclient
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ DEFAULT_RESOLVE_PROMPT = """You are matching a user's description to exactly one object in a 3D scene.
27
+
28
+ Scene instance list (each is an identifier, may contain IDs like "kitchen_cabinet_9197760", "banana_001"):
29
+ {instance_list}
30
+
31
+ User description (what they want to refer to, e.g. "黄色水果", "the yellow fruit", "柜子", "oven"):
32
+ "{user_spec}"
33
+
34
+ Rules:
35
+ 1. Pick the ONE instance from the list that best matches the user's description (semantic match: e.g. "黄色水果" -> banana, "柜子" -> cabinet).
36
+ 2. If no instance matches, reply with exactly: NONE
37
+ 3. Otherwise reply with the EXACT instance name from the list, nothing else (no quotes, no explanation).
38
+
39
+ Your reply (one line, exact instance name or NONE):"""
40
+
41
+
42
+ def resolve_instance_with_llm(
43
+ gpt_client: GPTclient,
44
+ instance_names: list[str],
45
+ user_spec: str,
46
+ prompt_template: str | None = None,
47
+ ) -> str | None:
48
+ """Map a user description to a single scene instance name via LLM semantic matching.
49
+
50
+ E.g. user says "yellow fruit" and the scene has "banana_001" -> returns "banana_001".
51
+ Returns None when there is no match or the LLM replies NONE; the caller should
52
+ prompt the user that the object does not exist and ask for re-entry.
53
+
54
+ Args:
55
+ gpt_client: GPT client instance, e.g. embodied_gen.utils.gpt_clients.GPT_CLIENT.
56
+ instance_names: List of scene instance names from FloorplanManager.get_instance_names().
57
+ user_spec: User input, e.g. "yellow fruit", "柜子", "the table".
58
+ prompt_template: Optional custom prompt; placeholders {instance_list} and {user_spec}.
59
+
60
+ Returns:
61
+ The matched instance name (exactly one of instance_names), or None if no match.
62
+ """
63
+ if not user_spec or not instance_names:
64
+ return None
65
+
66
+ template = prompt_template or DEFAULT_RESOLVE_PROMPT
67
+ instance_list_str = "\n".join(f"- {n}" for n in instance_names)
68
+ prompt = template.format(
69
+ instance_list=instance_list_str,
70
+ user_spec=(user_spec or "").strip(),
71
+ )
72
+
73
+ try:
74
+ response = gpt_client.query(text_prompt=prompt)
75
+ except Exception as e:
76
+ logger.warning("LLM `resolve_instance_with_llm` query failed: %s", e)
77
+ return None
78
+
79
+ if not response:
80
+ return None
81
+
82
+ first_line = response.strip().split("\n")[0].strip()
83
+ if first_line.upper() == "NONE":
84
+ return None
85
+ candidate = first_line.strip('"\'')
86
+
87
+ if not candidate:
88
+ return None
89
+
90
+ names_lower = {n.lower(): n for n in instance_names}
91
+ candidate_lower = candidate.lower()
92
+
93
+ if candidate in instance_names:
94
+ return candidate
95
+
96
+ if candidate_lower in names_lower:
97
+ return names_lower[candidate_lower]
98
+
99
+ matches = [n for n in instance_names if candidate_lower in n.lower()]
100
+ if len(matches) == 1:
101
+ return matches[0]
102
+
103
+ logger.debug(
104
+ "resolve_instance_with_llm: LLM reply %r did not match any of %s",
105
+ first_line,
106
+ instance_names[:5],
107
+ )
108
+ return None
embodied_gen/utils/process_media.py CHANGED
@@ -53,7 +53,7 @@ __all__ = [
53
  ]
54
 
55
 
56
- @spaces.GPU
57
  def render_asset3d(
58
  mesh_path: str,
59
  output_root: str,
 
53
  ]
54
 
55
 
56
+ @spaces.GPU(duration=120)
57
  def render_asset3d(
58
  mesh_path: str,
59
  output_root: str,
embodied_gen/utils/trender.py CHANGED
@@ -43,7 +43,7 @@ __all__ = [
43
  ]
44
 
45
 
46
- @spaces.GPU
47
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
48
  renderer = MeshRenderer()
49
  renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +66,7 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
66
  return rets
67
 
68
 
69
- @spaces.GPU
70
  def render_gs_frames(
71
  sample,
72
  extrinsics,
@@ -117,7 +117,7 @@ def render_gs_frames(
117
  return dict(outputs)
118
 
119
 
120
- @spaces.GPU
121
  def render_video(
122
  sample,
123
  resolution=512,
@@ -149,7 +149,7 @@ def render_video(
149
  return result
150
 
151
 
152
- @spaces.GPU
153
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
154
  return {
155
  "gaussian": {
 
43
  ]
44
 
45
 
46
+ @spaces.GPU(duration=120)
47
  def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
48
  renderer = MeshRenderer()
49
  renderer.rendering_options.resolution = options.get("resolution", 512)
 
66
  return rets
67
 
68
 
69
+ @spaces.GPU(duration=120)
70
  def render_gs_frames(
71
  sample,
72
  extrinsics,
 
117
  return dict(outputs)
118
 
119
 
120
+ @spaces.GPU(duration=120)
121
  def render_video(
122
  sample,
123
  resolution=512,
 
149
  return result
150
 
151
 
152
+ @spaces.GPU(duration=120)
153
  def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
154
  return {
155
  "gaussian": {
requirements.txt CHANGED
@@ -20,7 +20,7 @@ igraph==0.11.8
20
  pyvista==0.36.1
21
  openai==1.58.1
22
  transformers==4.42.4
23
- gradio
24
  sentencepiece==0.2.0
25
  diffusers==0.31.0
26
  xatlas==0.0.9
 
20
  pyvista==0.36.1
21
  openai==1.58.1
22
  transformers==4.42.4
23
+ gradio[oauth,mcp]==5.33.1
24
  sentencepiece==0.2.0
25
  diffusers==0.31.0
26
  xatlas==0.0.9