stevee00 commited on
Commit
dcb20f6
·
verified ·
1 Parent(s): 2033370

Upload src/interiorfusion/models/scene_assembly.py

Browse files
src/interiorfusion/models/scene_assembly.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Phase 4: Scene Assembly Module.
2
+
3
+ Optimizes room layout, resolves collisions, normalizes scale,
4
+ and builds the editable scene graph representation.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+
14
+
15
+ class SceneAssemblyModule(nn.Module):
16
+ """Assemble individual objects into a coherent room scene."""
17
+
18
+ def __init__(
19
+ self,
20
+ device: str = "cuda",
21
+ dtype: torch.dtype = torch.float16,
22
+ ):
23
+ super().__init__()
24
+ self.device = device
25
+ self.dtype = dtype
26
+
27
+ # Furniture dimension priors (meters) for scale normalization
28
+ self.furniture_priors = {
29
+ "sofa": {"width": 2.0, "depth": 0.9, "height": 0.8},
30
+ "chair": {"width": 0.5, "depth": 0.5, "height": 0.9},
31
+ "table": {"width": 1.2, "depth": 0.8, "height": 0.75},
32
+ "coffee_table": {"width": 1.0, "depth": 0.6, "height": 0.45},
33
+ "bed": {"width": 2.0, "depth": 1.5, "height": 0.5},
34
+ "desk": {"width": 1.4, "depth": 0.7, "height": 0.75},
35
+ "bookshelf": {"width": 1.0, "depth": 0.3, "height": 2.0},
36
+ "lamp": {"width": 0.3, "depth": 0.3, "height": 1.5},
37
+ "wardrobe": {"width": 1.5, "depth": 0.6, "height": 2.1},
38
+ "tv_stand": {"width": 1.2, "depth": 0.4, "height": 0.5},
39
+ "rug": {"width": 2.0, "depth": 1.5, "height": 0.02},
40
+ "plant": {"width": 0.3, "depth": 0.3, "height": 1.0},
41
+ "furniture": {"width": 0.8, "depth": 0.8, "height": 0.8}, # default
42
+ }
43
+
44
+ def assemble(
45
+ self,
46
+ room_shell_mesh: "trimesh.Trimesh", # type: ignore
47
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
48
+ room_layout: Dict,
49
+ detected_objects: Dict,
50
+ depth_map: np.ndarray,
51
+ ) -> Dict:
52
+ """
53
+ Assemble room scene from individual components.
54
+
55
+ Steps:
56
+ 1. Place objects at detected positions
57
+ 2. Normalize scales using furniture priors
58
+ 3. Ensure objects rest on floor
59
+ 4. Resolve collisions
60
+ 5. Build scene graph
61
+ 6. Merge into unified mesh
62
+ """
63
+ # Step 1: Initial placement from detected positions
64
+ placed_objects = self._place_objects(
65
+ object_meshes, detected_objects, room_layout
66
+ )
67
+
68
+ # Step 2: Scale normalization
69
+ normalized_objects = self._normalize_scales(
70
+ placed_objects, detected_objects, depth_map
71
+ )
72
+
73
+ # Step 3: Gravity constraint (objects on floor)
74
+ grounded_objects = self._apply_gravity(
75
+ normalized_objects, room_layout
76
+ )
77
+
78
+ # Step 4: Collision detection and resolution
79
+ resolved_objects = self._resolve_collisions(
80
+ grounded_objects, room_layout
81
+ )
82
+
83
+ # Step 5: Build scene graph
84
+ scene_graph = self._build_scene_graph(
85
+ resolved_objects, room_layout, detected_objects
86
+ )
87
+
88
+ # Step 6: Merge into unified mesh
89
+ scene_mesh = self._merge_scene(
90
+ room_shell_mesh, resolved_objects
91
+ )
92
+
93
+ return {
94
+ "scene_mesh": scene_mesh,
95
+ "object_meshes": resolved_objects,
96
+ "scene_graph": scene_graph,
97
+ }
98
+
99
+ def _place_objects(
100
+ self,
101
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
102
+ detected_objects: Dict,
103
+ room_layout: Dict,
104
+ ) -> List["trimesh.Trimesh"]: # type: ignore
105
+ """Place objects at their detected positions in 3D space."""
106
+ placed = []
107
+
108
+ floor_height = room_layout.get("floor", {}).get("height", 0.0)
109
+
110
+ for i, mesh in enumerate(object_meshes):
111
+ if i in detected_objects:
112
+ obj_info = detected_objects[i]
113
+ bbox = obj_info.get("bbox", [0, 0, 100, 100])
114
+ depth_range = obj_info.get("depth_range", [1.0, 3.0])
115
+
116
+ # Compute 3D position from bbox center + depth
117
+ # Simple approximation: center of bbox at mean depth
118
+ img_h, img_w = depth_map.shape if 'depth_map' in locals() else (512, 512)
119
+ x1, y1, x2, y2 = bbox
120
+ cx = (x1 + x2) / 2
121
+ cy = (y1 + y2) / 2
122
+ mean_depth = np.mean(depth_range)
123
+
124
+ # Convert image coordinates to 3D
125
+ # Assume camera at origin, looking down -z
126
+ fx = fy = max(img_w, img_h)
127
+ cx_cam = img_w / 2
128
+ cy_cam = img_h / 2
129
+
130
+ x_3d = (cx - cx_cam) * mean_depth / fx
131
+ z_3d = mean_depth # depth is z in camera frame
132
+
133
+ # Position mesh
134
+ mesh_copy = mesh.copy()
135
+
136
+ # Center mesh
137
+ centroid = mesh_copy.centroid if hasattr(mesh_copy, 'centroid') else mesh_copy.bounds.mean(axis=0)
138
+ mesh_copy.apply_translation([-centroid[0], 0, -centroid[2]])
139
+
140
+ # Move to detected position
141
+ mesh_copy.apply_translation([x_3d, floor_height, z_3d])
142
+
143
+ placed.append(mesh_copy)
144
+ else:
145
+ placed.append(mesh.copy())
146
+
147
+ return placed
148
+
149
+ def _normalize_scales(
150
+ self,
151
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
152
+ detected_objects: Dict,
153
+ depth_map: np.ndarray,
154
+ ) -> List["trimesh.Trimesh"]: # type: ignore
155
+ """Normalize object scales using furniture priors and depth."""
156
+ normalized = []
157
+
158
+ for i, mesh in enumerate(object_meshes):
159
+ mesh_copy = mesh.copy()
160
+
161
+ # Get class name
162
+ class_name = "furniture"
163
+ if i in detected_objects:
164
+ class_name = detected_objects[i].get("class_name", "furniture")
165
+
166
+ # Get prior dimensions
167
+ prior = self.furniture_priors.get(
168
+ class_name, self.furniture_priors["furniture"]
169
+ )
170
+
171
+ # Compute current dimensions
172
+ bounds = mesh_copy.bounds
173
+ current_dims = bounds[1] - bounds[0]
174
+
175
+ # Compute scale factors
176
+ # Use largest dimension for scale reference
177
+ max_current = max(current_dims)
178
+ max_prior = max(prior["width"], prior["depth"], prior["height"])
179
+
180
+ if max_current > 0.001: # Avoid division by zero
181
+ scale_factor = max_prior / max_current
182
+
183
+ # Apply non-uniform scaling to match prior
184
+ target_scale = np.array([
185
+ prior["width"] / max(current_dims[0], 0.001),
186
+ prior["height"] / max(current_dims[1], 0.001),
187
+ prior["depth"] / max(current_dims[2], 0.001),
188
+ ])
189
+
190
+ # Clamp scale to reasonable range
191
+ scale_factor = np.clip(scale_factor, 0.1, 3.0)
192
+ target_scale = np.clip(target_scale, 0.1, 3.0)
193
+
194
+ # Use uniform scale for stability
195
+ mesh_copy.apply_scale(scale_factor)
196
+
197
+ normalized.append(mesh_copy)
198
+
199
+ return normalized
200
+
201
+ def _apply_gravity(
202
+ self,
203
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
204
+ room_layout: Dict,
205
+ ) -> List["trimesh.Trimesh"]: # type: ignore
206
+ """Ensure all objects rest on the floor."""
207
+ floor_height = room_layout.get("floor", {}).get("height", 0.0)
208
+
209
+ grounded = []
210
+ for mesh in object_meshes:
211
+ mesh_copy = mesh.copy()
212
+
213
+ # Find lowest point
214
+ if len(mesh_copy.vertices) > 0:
215
+ min_y = mesh_copy.vertices[:, 1].min()
216
+
217
+ # Move so lowest point is at floor height
218
+ delta_y = floor_height - min_y
219
+ mesh_copy.apply_translation([0, delta_y, 0])
220
+
221
+ grounded.append(mesh_copy)
222
+
223
+ return grounded
224
+
225
+ def _resolve_collisions(
226
+ self,
227
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
228
+ room_layout: Dict,
229
+ ) -> List["trimesh.Trimesh"]: # type: ignore
230
+ """Detect and resolve inter-object collisions."""
231
+ resolved = list(object_meshes)
232
+ max_iterations = 50
233
+
234
+ for iteration in range(max_iterations):
235
+ collisions_found = False
236
+
237
+ for i in range(len(resolved)):
238
+ for j in range(i + 1, len(resolved)):
239
+ try:
240
+ # Check collision
241
+ collision = resolved[i].collision_manager
242
+ is_collision = False # Placeholder
243
+
244
+ # Simple bounding box collision test
245
+ b1 = resolved[i].bounds
246
+ b2 = resolved[j].bounds
247
+
248
+ overlap = (
249
+ b1[0][0] < b2[1][0] and b1[1][0] > b2[0][0] and
250
+ b1[0][1] < b2[1][1] and b1[1][1] > b2[0][1] and
251
+ b1[0][2] < b2[1][2] and b1[1][2] > b2[0][2]
252
+ )
253
+
254
+ if overlap:
255
+ collisions_found = True
256
+ # Push apart along smallest overlap axis
257
+ overlaps = [
258
+ min(b1[1][0] - b2[0][0], b2[1][0] - b1[0][0]),
259
+ min(b1[1][1] - b2[0][1], b2[1][1] - b1[0][1]),
260
+ min(b1[1][2] - b2[0][2], b2[1][2] - b1[0][2]),
261
+ ]
262
+
263
+ min_axis = np.argmin(overlaps)
264
+ push_dir = np.zeros(3)
265
+ push_dir[min_axis] = 1.0
266
+
267
+ # Push in opposite directions
268
+ push_dist = overlaps[min_axis] * 0.5 + 0.05
269
+ center_i = resolved[i].bounds.mean(axis=0)
270
+ center_j = resolved[j].bounds.mean(axis=0)
271
+
272
+ if center_i[min_axis] < center_j[min_axis]:
273
+ resolved[i].apply_translation(-push_dir * push_dist)
274
+ resolved[j].apply_translation(push_dir * push_dist)
275
+ else:
276
+ resolved[i].apply_translation(push_dir * push_dist)
277
+ resolved[j].apply_translation(-push_dir * push_dist)
278
+ except Exception:
279
+ pass
280
+
281
+ if not collisions_found:
282
+ break
283
+
284
+ return resolved
285
+
286
+ def _build_scene_graph(
287
+ self,
288
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
289
+ room_layout: Dict,
290
+ detected_objects: Dict,
291
+ ) -> Dict:
292
+ """Build editable scene graph from assembled objects."""
293
+ nodes = []
294
+ edges = []
295
+
296
+ # Room shell node
297
+ nodes.append({
298
+ "id": "room_shell",
299
+ "type": "room",
300
+ "label": "room",
301
+ "bbox": None,
302
+ })
303
+
304
+ # Object nodes
305
+ for i, mesh in enumerate(object_meshes):
306
+ class_name = "furniture"
307
+ if i in detected_objects:
308
+ class_name = detected_objects[i].get("class_name", "furniture")
309
+
310
+ center = mesh.bounds.mean(axis=0)
311
+ dims = mesh.bounds[1] - mesh.bounds[0]
312
+
313
+ nodes.append({
314
+ "id": i,
315
+ "type": "object",
316
+ "label": class_name,
317
+ "position": center.tolist(),
318
+ "dimensions": dims.tolist(),
319
+ "mesh_index": i,
320
+ })
321
+
322
+ # Edge: object is IN room
323
+ edges.append({
324
+ "from": i,
325
+ "to": "room_shell",
326
+ "relation": "in",
327
+ })
328
+
329
+ # Infer spatial relationships between objects
330
+ for i in range(len(object_meshes)):
331
+ for j in range(i + 1, len(object_meshes)):
332
+ center_i = object_meshes[i].bounds.mean(axis=0)
333
+ center_j = object_meshes[j].bounds.mean(axis=0)
334
+ dist = np.linalg.norm(center_i - center_j)
335
+
336
+ # Proximity threshold
337
+ if dist < 2.0:
338
+ # Determine relationship
339
+ if abs(center_i[1] - center_j[1]) < 0.1:
340
+ relation = "next_to"
341
+ elif center_i[1] > center_j[1] + 0.2:
342
+ relation = "on"
343
+ else:
344
+ relation = "near"
345
+
346
+ edges.append({
347
+ "from": i,
348
+ "to": j,
349
+ "relation": relation,
350
+ "distance": float(dist),
351
+ })
352
+
353
+ return {
354
+ "nodes": nodes,
355
+ "edges": edges,
356
+ }
357
+
358
+ def _merge_scene(
359
+ self,
360
+ room_shell_mesh: "trimesh.Trimesh", # type: ignore
361
+ object_meshes: List["trimesh.Trimesh"], # type: ignore
362
+ ) -> "trimesh.Trimesh": # type: ignore
363
+ """Merge room shell and objects into unified scene mesh."""
364
+ import trimesh
365
+
366
+ meshes = [room_shell_mesh] + list(object_meshes)
367
+
368
+ # Filter out empty meshes
369
+ valid_meshes = [m for m in meshes if hasattr(m, 'vertices') and len(m.vertices) > 0]
370
+
371
+ if not valid_meshes:
372
+ return trimesh.Trimesh()
373
+
374
+ try:
375
+ scene_mesh = trimesh.util.concatenate(valid_meshes)
376
+ except Exception:
377
+ # Fallback: add meshes one by one
378
+ scene_mesh = valid_meshes[0]
379
+ for m in valid_meshes[1:]:
380
+ try:
381
+ scene_mesh += m
382
+ except Exception:
383
+ pass
384
+
385
+ return scene_mesh
386
+
387
+ def reassemble_with_textures(
388
+ self,
389
+ room_shell_mesh: "trimesh.Trimesh", # type: ignore
390
+ textured_objects: List["trimesh.Trimesh"], # type: ignore
391
+ scene_graph: Dict,
392
+ ) -> "trimesh.Trimesh": # type: ignore
393
+ """Re-assemble scene with textured objects."""
394
+ return self._merge_scene(room_shell_mesh, textured_objects)