| """InteriorFusion main inference pipeline.""" |
|
|
| import os |
| import tempfile |
| from dataclasses import dataclass, field |
| from pathlib import Path |
| from typing import List, Optional, Tuple, Union |
|
|
| import numpy as np |
| import torch |
| import torch.nn.functional as F |
| from PIL import Image |
|
|
| from .models.scene_understanding import SceneUnderstandingModule |
| from .models.multiview_generation import MultiViewGenerationModule |
| from .models.reconstruction_3d import Reconstruction3DModule |
| from .models.scene_assembly import SceneAssemblyModule |
| from .models.material_texture import MaterialTextureModule |
| from .utils.mesh_utils import export_mesh |
| from .utils.gaussian_utils import export_gaussian_splatting |
|
|
|
|
| @dataclass |
| class InteriorFusionOutput: |
| """Output container for InteriorFusion pipeline.""" |
| |
| |
| scene_mesh: Optional["trimesh.Trimesh"] = None |
| room_shell_mesh: Optional["trimesh.Trimesh"] = None |
| object_meshes: List["trimesh.Trimesh"] = field(default_factory=list) |
| gaussian_cloud: Optional[torch.Tensor] = None |
| |
| |
| pbr_materials: List[dict] = field(default_factory=list) |
| |
| |
| scene_graph: Optional[dict] = None |
| room_layout: Optional[dict] = None |
| |
| |
| room_type: str = "unknown" |
| style: str = "modern" |
| processing_time: float = 0.0 |
| |
| |
| glb_path: Optional[str] = None |
| fbx_path: Optional[str] = None |
| obj_path: Optional[str] = None |
| usdz_path: Optional[str] = None |
| ply_path: Optional[str] = None |
| |
| def export_all(self, output_dir: Union[str, Path]) -> "InteriorFusionOutput": |
| """Export all formats to output directory.""" |
| output_dir = Path(output_dir) |
| output_dir.mkdir(parents=True, exist_ok=True) |
| |
| if self.scene_mesh is not None: |
| self.glb_path = str(output_dir / "scene.glb") |
| export_mesh(self.scene_mesh, self.glb_path, format="glb") |
| |
| self.fbx_path = str(output_dir / "scene.fbx") |
| export_mesh(self.scene_mesh, self.fbx_path, format="fbx") |
| |
| self.obj_path = str(output_dir / "scene.obj") |
| export_mesh(self.scene_mesh, self.obj_path, format="obj") |
| |
| self.usdz_path = str(output_dir / "scene.usdz") |
| export_mesh(self.scene_mesh, self.usdz_path, format="usdz") |
| |
| if self.gaussian_cloud is not None: |
| self.ply_path = str(output_dir / "scene.ply") |
| export_gaussian_splatting(self.gaussian_cloud, self.ply_path) |
| |
| return self |
|
|
|
|
| class InteriorFusionPipeline: |
| """ |
| Main inference pipeline for InteriorFusion. |
| |
| Orchestrates 5 phases: |
| 1. Scene Understanding (depth, layout, segmentation) |
| 2. Multi-View Generation (per-object + room shell) |
| 3. 3D Reconstruction (room shell + per-object) |
| 4. Scene Assembly (layout optimization, scale normalization) |
| 5. Material & Texture (PBR generation, texture baking) |
| """ |
| |
| def __init__( |
| self, |
| model_size: str = "L", |
| device: str = "cuda", |
| dtype: torch.dtype = torch.float16, |
| use_scene_graph: bool = True, |
| use_pbr: bool = True, |
| use_gaussian_splatting: bool = True, |
| cache_dir: Optional[str] = None, |
| ): |
| self.model_size = model_size |
| self.device = device |
| self.dtype = dtype |
| self.use_scene_graph = use_scene_graph |
| self.use_pbr = use_pbr |
| self.use_gaussian_splatting = use_gaussian_splatting |
| self.cache_dir = cache_dir or os.path.expanduser("~/.cache/interiorfusion") |
| |
| os.makedirs(self.cache_dir, exist_ok=True) |
| |
| |
| self._scene_understanding = None |
| self._multiview_gen = None |
| self._reconstruction = None |
| self._scene_assembly = None |
| self._material_texture = None |
| |
| @property |
| def scene_understanding(self): |
| if self._scene_understanding is None: |
| self._scene_understanding = SceneUnderstandingModule( |
| model_size=self.model_size, |
| device=self.device, |
| dtype=self.dtype, |
| cache_dir=self.cache_dir, |
| ) |
| return self._scene_understanding |
| |
| @property |
| def multiview_gen(self): |
| if self._multiview_gen is None: |
| self._multiview_gen = MultiViewGenerationModule( |
| model_size=self.model_size, |
| device=self.device, |
| dtype=self.dtype, |
| cache_dir=self.cache_dir, |
| ) |
| return self._multiview_gen |
| |
| @property |
| def reconstruction(self): |
| if self._reconstruction is None: |
| self._reconstruction = Reconstruction3DModule( |
| model_size=self.model_size, |
| device=self.device, |
| dtype=self.dtype, |
| cache_dir=self.cache_dir, |
| ) |
| return self._reconstruction |
| |
| @property |
| def scene_assembly(self): |
| if self._scene_assembly is None: |
| self._scene_assembly = SceneAssemblyModule( |
| device=self.device, |
| dtype=self.dtype, |
| ) |
| return self._scene_assembly |
| |
| @property |
| def material_texture(self): |
| if self._material_texture is None: |
| self._material_texture = MaterialTextureModule( |
| model_size=self.model_size, |
| device=self.device, |
| dtype=self.dtype, |
| use_pbr=self.use_pbr, |
| cache_dir=self.cache_dir, |
| ) |
| return self._material_texture |
| |
| @torch.no_grad() |
| def __call__( |
| self, |
| image: Union[str, Path, Image.Image, np.ndarray], |
| room_type_hint: Optional[str] = None, |
| style_hint: Optional[str] = None, |
| output_formats: Optional[List[str]] = None, |
| return_intermediates: bool = False, |
| ) -> InteriorFusionOutput: |
| """ |
| Run full InteriorFusion pipeline on a single interior image. |
| |
| Args: |
| image: Input interior photograph |
| room_type_hint: Optional room type ("living_room", "bedroom", etc.) |
| style_hint: Optional style ("modern", "scandinavian", etc.) |
| output_formats: List of formats to export ["glb", "fbx", "obj", "usdz", "ply"] |
| return_intermediates: Whether to return intermediate stage outputs |
| |
| Returns: |
| InteriorFusionOutput with all generated 3D content |
| """ |
| import time |
| start_time = time.time() |
| |
| |
| if isinstance(image, (str, Path)): |
| image = Image.open(image).convert("RGB") |
| elif isinstance(image, np.ndarray): |
| image = Image.fromarray(image).convert("RGB") |
| |
| |
| |
| |
| print("[Phase 1/5] Scene Understanding...") |
| scene_info = self.scene_understanding(image) |
| |
| depth_map = scene_info["depth"] |
| room_layout = scene_info["room_layout"] |
| semantic_seg = scene_info["semantic_segmentation"] |
| detected_objects = scene_info["detected_objects"] |
| room_type = scene_info.get("room_type", room_type_hint or "living_room") |
| style = scene_info.get("style", style_hint or "modern") |
| |
| |
| |
| |
| print("[Phase 2/5] Multi-View Generation...") |
| |
| |
| object_multiviews = {} |
| for obj_id, obj_info in detected_objects.items(): |
| crop = obj_info["crop"] |
| mask = obj_info["mask"] |
| multiviews = self.multiview_gen.generate_object_views( |
| crop, mask, depth_map, num_views=6 |
| ) |
| object_multiviews[obj_id] = multiviews |
| |
| |
| room_shell_views = self.multiview_gen.generate_room_shell_views( |
| image, depth_map, room_layout |
| ) |
| |
| |
| |
| |
| print("[Phase 3/5] 3D Reconstruction...") |
| |
| |
| room_shell_mesh = self.reconstruction.reconstruct_room_shell( |
| room_shell_views, room_layout, depth_map |
| ) |
| |
| |
| object_meshes = [] |
| object_gaussians = [] |
| for obj_id, multiviews in object_multiviews.items(): |
| obj_mesh, obj_gaussians = self.reconstruction.reconstruct_object( |
| multiviews, |
| room_layout=room_layout, |
| depth_map=depth_map, |
| object_info=detected_objects[obj_id], |
| ) |
| object_meshes.append(obj_mesh) |
| object_gaussians.append(obj_gaussians) |
| |
| |
| gaussian_cloud = None |
| if self.use_gaussian_splatting: |
| gaussian_cloud = self.reconstruction.build_scene_gaussians( |
| room_shell_mesh, object_gaussians, object_meshes |
| ) |
| |
| |
| |
| |
| print("[Phase 4/5] Scene Assembly...") |
| |
| assembled_scene = self.scene_assembly.assemble( |
| room_shell_mesh=room_shell_mesh, |
| object_meshes=object_meshes, |
| room_layout=room_layout, |
| detected_objects=detected_objects, |
| depth_map=depth_map, |
| ) |
| |
| scene_mesh = assembled_scene["scene_mesh"] |
| scene_graph = assembled_scene.get("scene_graph") |
| |
| |
| |
| |
| print("[Phase 5/5] Material & Texture...") |
| |
| pbr_materials = [] |
| if self.use_pbr: |
| |
| room_shell_mesh = self.material_texture.generate_room_materials( |
| room_shell_mesh, image, semantic_seg |
| ) |
| |
| |
| textured_objects = [] |
| for i, obj_mesh in enumerate(object_meshes): |
| obj_id = list(detected_objects.keys())[i] |
| textured_obj, materials = self.material_texture.generate_object_materials( |
| obj_mesh, |
| object_multiviews[obj_id], |
| detected_objects[obj_id], |
| ) |
| textured_objects.append(textured_obj) |
| pbr_materials.extend(materials) |
| |
| |
| scene_mesh = self.scene_assembly.reassemble_with_textures( |
| room_shell_mesh, textured_objects, scene_graph |
| ) |
| |
| processing_time = time.time() - start_time |
| |
| output = InteriorFusionOutput( |
| scene_mesh=scene_mesh, |
| room_shell_mesh=room_shell_mesh, |
| object_meshes=object_meshes if not self.use_pbr else textured_objects, |
| gaussian_cloud=gaussian_cloud, |
| pbr_materials=pbr_materials, |
| scene_graph=scene_graph, |
| room_layout=room_layout, |
| room_type=room_type, |
| style=style, |
| processing_time=processing_time, |
| ) |
| |
| print(f"\n✅ Generation complete in {processing_time:.1f}s") |
| print(f" Room type: {room_type}") |
| print(f" Style: {style}") |
| print(f" Objects detected: {len(detected_objects)}") |
| print(f" PBR materials: {len(pbr_materials)}") |
| |
| return output |
| |
| def edit_scene( |
| self, |
| scene_output: InteriorFusionOutput, |
| edits: List[dict], |
| ) -> InteriorFusionOutput: |
| """ |
| Apply edits to a generated scene. |
| |
| Edits format: |
| [ |
| {"action": "move", "object_id": 0, "position": [x, y, z]}, |
| {"action": "replace", "object_id": 1, "new_image": Image}, |
| {"action": "remove", "object_id": 2}, |
| {"action": "add", "new_image": Image, "position": [x, y, z]}, |
| ] |
| """ |
| print(f"Applying {len(edits)} edits...") |
| |
| scene_graph = scene_output.scene_graph or {} |
| object_meshes = list(scene_output.object_meshes) |
| |
| for edit in edits: |
| action = edit["action"] |
| |
| if action == "move": |
| obj_id = edit["object_id"] |
| new_pos = edit["position"] |
| |
| if "nodes" in scene_graph and obj_id < len(scene_graph["nodes"]): |
| scene_graph["nodes"][obj_id]["position"] = new_pos |
| |
| if obj_id < len(object_meshes): |
| |
| mesh = object_meshes[obj_id] |
| mesh.vertices += np.array(new_pos) |
| |
| elif action == "replace": |
| obj_id = edit["object_id"] |
| new_image = edit["new_image"] |
| |
| new_multiviews = self.multiview_gen.generate_object_views( |
| new_image, None, None, num_views=6 |
| ) |
| new_mesh, _ = self.reconstruction.reconstruct_object( |
| new_multiviews, room_layout=scene_output.room_layout |
| ) |
| object_meshes[obj_id] = new_mesh |
| |
| elif action == "remove": |
| obj_id = edit["object_id"] |
| if obj_id < len(object_meshes): |
| object_meshes.pop(obj_id) |
| |
| elif action == "add": |
| new_image = edit["new_image"] |
| position = edit["position"] |
| new_multiviews = self.multiview_gen.generate_object_views( |
| new_image, None, None, num_views=6 |
| ) |
| new_mesh, _ = self.reconstruction.reconstruct_object( |
| new_multiviews, room_layout=scene_output.room_layout |
| ) |
| new_mesh.vertices += np.array(position) |
| object_meshes.append(new_mesh) |
| |
| |
| assembled = self.scene_assembly.reassemble_with_textures( |
| scene_output.room_shell_mesh, |
| object_meshes, |
| scene_graph, |
| ) |
| |
| return InteriorFusionOutput( |
| scene_mesh=assembled, |
| room_shell_mesh=scene_output.room_shell_mesh, |
| object_meshes=object_meshes, |
| gaussian_cloud=scene_output.gaussian_cloud, |
| pbr_materials=scene_output.pbr_materials, |
| scene_graph=scene_graph, |
| room_layout=scene_output.room_layout, |
| room_type=scene_output.room_type, |
| style=scene_output.style, |
| ) |
|
|