"""Phase 3: 3D Reconstruction Module. Reconstructs: - Room shell (walls, floor, ceiling) as planar meshes - Per-object 3D meshes using TRELLIS.2 or native InteriorFusion-L - Scene-level Gaussian Splatting representation """ import os from typing import Dict, List, Optional, Tuple, Union import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from PIL import Image class Reconstruction3DModule(nn.Module): """Reconstruct 3D geometry from multi-view images.""" def __init__( self, model_size: str = "L", device: str = "cuda", dtype: torch.dtype = torch.float16, cache_dir: Optional[str] = None, ): super().__init__() self.model_size = model_size self.device = device self.dtype = dtype self.cache_dir = cache_dir # Lazy load reconstruction models self._trellis_model = None self._native_model = None def reconstruct_room_shell( self, room_shell_views: Dict[str, Image.Image], room_layout: Dict, depth_map: np.ndarray, ) -> "trimesh.Trimesh": # type: ignore """ Reconstruct room shell (walls, floor, ceiling) as planar meshes. Uses detected layout planes from scene understanding to create watertight room geometry. """ try: import trimesh except ImportError: print("Warning: trimesh not available, using numpy fallback") return None meshes = [] # Floor mesh floor = room_layout.get("floor", {}) if floor: floor_mesh = self._create_floor_mesh(floor, room_layout) if floor_mesh is not None: meshes.append(floor_mesh) # Ceiling mesh ceiling = room_layout.get("ceiling", {}) if ceiling: ceiling_mesh = self._create_ceiling_mesh(ceiling, room_layout) if ceiling_mesh is not None: meshes.append(ceiling_mesh) # Wall meshes walls = room_layout.get("walls", []) for wall in walls: wall_mesh = self._create_wall_mesh(wall, room_layout) if wall_mesh is not None: meshes.append(wall_mesh) # Combine all meshes if meshes: try: room_shell = trimesh.util.concatenate(meshes) except Exception: room_shell = meshes[0] for m in meshes[1:]: room_shell += m return room_shell # Fallback: create simple box room return self._create_fallback_room(room_layout) def _create_floor_mesh(self, floor: Dict, room_layout: Dict) -> Optional["trimesh.Trimesh"]: # type: ignore """Create floor plane mesh.""" try: import trimesh except ImportError: return None dims = room_layout.get("dimensions", {}) width = dims.get("width", 5.0) depth = dims.get("depth", 5.0) height = floor.get("height", 0.0) # Create rectangular floor vertices = np.array([ [-width/2, height, -depth/2], [width/2, height, -depth/2], [width/2, height, depth/2], [-width/2, height, depth/2], ]) faces = np.array([ [0, 1, 2], [0, 2, 3], ]) mesh = trimesh.Trimesh(vertices=vertices, faces=faces) # Add UV coordinates for texture mapping uvs = np.array([ [0, 0], [1, 0], [1, 1], [0, 1], ]) mesh.visual = trimesh.visual.TextureVisuals(uv=uvs) return mesh def _create_ceiling_mesh(self, ceiling: Dict, room_layout: Dict) -> Optional["trimesh.Trimesh"]: # type: ignore """Create ceiling plane mesh.""" try: import trimesh except ImportError: return None dims = room_layout.get("dimensions", {}) width = dims.get("width", 5.0) depth = dims.get("depth", 5.0) height = ceiling.get("height", 2.7) vertices = np.array([ [-width/2, height, -depth/2], [width/2, height, -depth/2], [width/2, height, depth/2], [-width/2, height, depth/2], ]) # Ceiling faces point downward faces = np.array([ [0, 2, 1], [0, 3, 2], ]) mesh = trimesh.Trimesh(vertices=vertices, faces=faces) return mesh def _create_wall_mesh(self, wall: Dict, room_layout: Dict) -> Optional["trimesh.Trimesh"]: # type: ignore """Create wall plane mesh.""" try: import trimesh except ImportError: return None dims = room_layout.get("dimensions", {}) width = dims.get("width", 5.0) depth = dims.get("depth", 5.0) height = dims.get("height", 2.7) normal = np.array(wall.get("normal", [0, 0, 1])) position = wall.get("position", 0.0) direction = wall.get("direction", "back") # Create wall based on direction if direction in ["back", "front"]: # Wall perpendicular to z-axis z = position if direction == "front" else -position vertices = np.array([ [-width/2, 0, z], [width/2, 0, z], [width/2, height, z], [-width/2, height, z], ]) else: # left or right # Wall perpendicular to x-axis x = position if direction == "right" else -position vertices = np.array([ [x, 0, -depth/2], [x, 0, depth/2], [x, height, depth/2], [x, height, -depth/2], ]) # Determine face orientation based on normal if normal[2] > 0.5 or normal[0] > 0.5: faces = np.array([[0, 1, 2], [0, 2, 3]]) else: faces = np.array([[0, 2, 1], [0, 3, 2]]) mesh = trimesh.Trimesh(vertices=vertices, faces=faces) return mesh def _create_fallback_room(self, room_layout: Dict) -> "trimesh.Trimesh": # type: ignore """Create a simple box room as fallback.""" import trimesh dims = room_layout.get("dimensions", {}) width = dims.get("width", 5.0) depth = dims.get("depth", 5.0) height = dims.get("height", 2.7) # Create box with interior box = trimesh.creation.box(extents=[width, height, depth]) box.apply_translation([0, height/2, 0]) return box def reconstruct_object( self, multiviews: List[Image.Image], room_layout: Optional[Dict] = None, depth_map: Optional[np.ndarray] = None, object_info: Optional[Dict] = None, ) -> Tuple["trimesh.Trimesh", Optional[torch.Tensor]]: # type: ignore """ Reconstruct a single furniture object from multi-view images. Uses TRELLIS.2 for high-quality object reconstruction, or falls back to simple point cloud reconstruction. Returns: (mesh, gaussian_cloud) """ # Try TRELLIS.2 if available mesh = self._try_trellis_reconstruction(multiviews) if mesh is not None: return mesh, None # Fallback: simple reconstruction from depth return self._fallback_object_reconstruction(multiviews, depth_map, object_info) def _try_trellis_reconstruction( self, multiviews: List[Image.Image], ) -> Optional["trimesh.Trimesh"]: # type: ignore """Try to use TRELLIS.2 for object reconstruction.""" try: # Attempt to import and use TRELLIS # In production: from trellis import TRELLISPipeline # For now, placeholder return None except ImportError: return None def _fallback_object_reconstruction( self, multiviews: List[Image.Image], depth_map: Optional[np.ndarray] = None, object_info: Optional[Dict] = None, ) -> Tuple["trimesh.Trimesh", Optional[torch.Tensor]]: # type: ignore """Simple reconstruction from first multi-view image and depth.""" import trimesh if depth_map is not None and object_info is not None: bbox = object_info.get("bbox", [0, 0, 100, 100]) x1, y1, x2, y2 = bbox # Extract depth region for this object obj_depth = depth_map[y1:y2, x1:x2] # Create point cloud from depth H, W = obj_depth.shape fx = fy = max(W, H) cx, cy = W / 2, H / 2 u, v = np.meshgrid(np.arange(W), np.arange(H)) z = obj_depth x = (u - cx) * z / fx y = (v - cy) * z / fy points = np.stack([x, y, z], axis=-1).reshape(-1, 3) # Remove invalid points valid = points[:, 2] > 0.1 points = points[valid] if len(points) > 100: # Create convex hull as simple mesh try: mesh = trimesh.convex.hull_points(points) return mesh, None except Exception: pass # If hull fails, return point cloud as mesh if len(points) > 0: mesh = trimesh.PointCloud(points) return mesh, None # Ultimate fallback: small cube mesh = trimesh.creation.box(extents=[0.5, 0.5, 0.5]) return mesh, None def build_scene_gaussians( self, room_shell_mesh: "trimesh.Trimesh", # type: ignore object_gaussians: List[Optional[torch.Tensor]], object_meshes: List["trimesh.Trimesh"], # type: ignore ) -> torch.Tensor: """ Build a unified Gaussian Splatting representation for the entire scene. Converts meshes to Gaussian primitives for fast rendering. """ gaussians = [] # Convert room shell mesh to Gaussians try: if hasattr(room_shell_mesh, 'vertices') and len(room_shell_mesh.vertices) > 0: room_gaussians = self._mesh_to_gaussians(room_shell_mesh) gaussians.append(room_gaussians) except Exception as e: print(f"Warning: could not convert room shell to Gaussians: {e}") # Add per-object Gaussians for obj_gauss in object_gaussians: if obj_gauss is not None: gaussians.append(obj_gauss) if gaussians: return torch.cat(gaussians, dim=0) # Fallback: return empty tensor return torch.zeros(0, 14, device=self.device) def _mesh_to_gaussians( self, mesh: "trimesh.Trimesh", # type: ignore num_gaussians_per_face: int = 4, ) -> torch.Tensor: """ Convert a mesh to 3D Gaussian primitives. Each face spawns multiple Gaussians with: - Position: near face centroid - Scale: based on face area - Rotation: aligned with face normal - Opacity: ~0.9 - Color: from vertex colors or white """ if len(mesh.faces) == 0: return torch.zeros(0, 14, device=self.device) vertices = torch.tensor(mesh.vertices, dtype=torch.float32, device=self.device) faces = torch.tensor(mesh.faces, dtype=torch.long, device=self.device) num_faces = len(faces) total_gaussians = num_faces * num_gaussians_per_face # Get face data v0 = vertices[faces[:, 0]] v1 = vertices[faces[:, 1]] v2 = vertices[faces[:, 2]] # Face centroids centroids = (v0 + v1 + v2) / 3.0 # Face normals edges1 = v1 - v0 edges2 = v2 - v0 normals = torch.cross(edges1, edges2, dim=-1) normals = F.normalize(normals, dim=-1) # Face areas areas = 0.5 * torch.norm(normals, dim=-1) # Build Gaussians # Gaussian parameters: [x, y, z, scale_x, scale_y, scale_z, # rot_qx, rot_qy, rot_qz, rot_qw, r, g, b, opacity] gaussians = [] for i in range(num_gaussians_per_face): # Offset from centroid offset = torch.randn_like(centroids) * 0.01 positions = centroids + offset # Scale based on area scales = torch.stack([ torch.sqrt(areas) * 0.1 + 0.001, torch.sqrt(areas) * 0.1 + 0.001, torch.sqrt(areas) * 0.05 + 0.001, ], dim=-1) # Rotation from normal # Simple: identity-ish rotation aligned with normal rot_identity = torch.tensor([0.0, 0.0, 0.0, 1.0], device=self.device) rotations = rot_identity.unsqueeze(0).expand(num_faces, -1) # Color: white default colors = torch.ones(num_faces, 3, device=self.device) * 0.8 # Opacity opacity = torch.ones(num_faces, 1, device=self.device) * 0.9 gaussians.append(torch.cat([ positions, scales, rotations, colors, opacity ], dim=-1)) return torch.cat(gaussians, dim=0)