Upload src/interiorfusion/pipelines.py

6738da3 verified 8 days ago

15.3 kB

	"""InteriorFusion main inference pipeline."""

	import os
	import tempfile
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import List, Optional, Tuple, Union

	import numpy as np
	import torch
	import torch.nn.functional as F
	from PIL import Image

	from .models.scene_understanding import SceneUnderstandingModule
	from .models.multiview_generation import MultiViewGenerationModule
	from .models.reconstruction_3d import Reconstruction3DModule
	from .models.scene_assembly import SceneAssemblyModule
	from .models.material_texture import MaterialTextureModule
	from .utils.mesh_utils import export_mesh
	from .utils.gaussian_utils import export_gaussian_splatting


	@dataclass
	class InteriorFusionOutput:
	"""Output container for InteriorFusion pipeline."""

	# 3D representations
	scene_mesh: Optional["trimesh.Trimesh"] = None # type: ignore
	room_shell_mesh: Optional["trimesh.Trimesh"] = None # type: ignore
	object_meshes: List["trimesh.Trimesh"] = field(default_factory=list) # type: ignore
	gaussian_cloud: Optional[torch.Tensor] = None # Scene Gaussians

	# Materials
	pbr_materials: List[dict] = field(default_factory=list)

	# Scene graph
	scene_graph: Optional[dict] = None
	room_layout: Optional[dict] = None

	# Metadata
	room_type: str = "unknown"
	style: str = "modern"
	processing_time: float = 0.0

	# Export paths (populated after export)
	glb_path: Optional[str] = None
	fbx_path: Optional[str] = None
	obj_path: Optional[str] = None
	usdz_path: Optional[str] = None
	ply_path: Optional[str] = None # Gaussian splatting

	def export_all(self, output_dir: Union[str, Path]) -> "InteriorFusionOutput":
	"""Export all formats to output directory."""
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	if self.scene_mesh is not None:
	self.glb_path = str(output_dir / "scene.glb")
	export_mesh(self.scene_mesh, self.glb_path, format="glb")

	self.fbx_path = str(output_dir / "scene.fbx")
	export_mesh(self.scene_mesh, self.fbx_path, format="fbx")

	self.obj_path = str(output_dir / "scene.obj")
	export_mesh(self.scene_mesh, self.obj_path, format="obj")

	self.usdz_path = str(output_dir / "scene.usdz")
	export_mesh(self.scene_mesh, self.usdz_path, format="usdz")

	if self.gaussian_cloud is not None:
	self.ply_path = str(output_dir / "scene.ply")
	export_gaussian_splatting(self.gaussian_cloud, self.ply_path)

	return self


	class InteriorFusionPipeline:
	"""
	Main inference pipeline for InteriorFusion.

	Orchestrates 5 phases:
	1. Scene Understanding (depth, layout, segmentation)
	2. Multi-View Generation (per-object + room shell)
	3. 3D Reconstruction (room shell + per-object)
	4. Scene Assembly (layout optimization, scale normalization)
	5. Material & Texture (PBR generation, texture baking)
	"""

	def __init__(
	self,
	model_size: str = "L",
	device: str = "cuda",
	dtype: torch.dtype = torch.float16,
	use_scene_graph: bool = True,
	use_pbr: bool = True,
	use_gaussian_splatting: bool = True,
	cache_dir: Optional[str] = None,
	):
	self.model_size = model_size
	self.device = device
	self.dtype = dtype
	self.use_scene_graph = use_scene_graph
	self.use_pbr = use_pbr
	self.use_gaussian_splatting = use_gaussian_splatting
	self.cache_dir = cache_dir or os.path.expanduser("~/.cache/interiorfusion")

	os.makedirs(self.cache_dir, exist_ok=True)

	# Initialize sub-modules (lazy loading)
	self._scene_understanding = None
	self._multiview_gen = None
	self._reconstruction = None
	self._scene_assembly = None
	self._material_texture = None

	@property
	def scene_understanding(self):
	if self._scene_understanding is None:
	self._scene_understanding = SceneUnderstandingModule(
	model_size=self.model_size,
	device=self.device,
	dtype=self.dtype,
	cache_dir=self.cache_dir,
	)
	return self._scene_understanding

	@property
	def multiview_gen(self):
	if self._multiview_gen is None:
	self._multiview_gen = MultiViewGenerationModule(
	model_size=self.model_size,
	device=self.device,
	dtype=self.dtype,
	cache_dir=self.cache_dir,
	)
	return self._multiview_gen

	@property
	def reconstruction(self):
	if self._reconstruction is None:
	self._reconstruction = Reconstruction3DModule(
	model_size=self.model_size,
	device=self.device,
	dtype=self.dtype,
	cache_dir=self.cache_dir,
	)
	return self._reconstruction

	@property
	def scene_assembly(self):
	if self._scene_assembly is None:
	self._scene_assembly = SceneAssemblyModule(
	device=self.device,
	dtype=self.dtype,
	)
	return self._scene_assembly

	@property
	def material_texture(self):
	if self._material_texture is None:
	self._material_texture = MaterialTextureModule(
	model_size=self.model_size,
	device=self.device,
	dtype=self.dtype,
	use_pbr=self.use_pbr,
	cache_dir=self.cache_dir,
	)
	return self._material_texture

	@torch.no_grad()
	def __call__(
	self,
	image: Union[str, Path, Image.Image, np.ndarray],
	room_type_hint: Optional[str] = None,
	style_hint: Optional[str] = None,
	output_formats: Optional[List[str]] = None,
	return_intermediates: bool = False,
	) -> InteriorFusionOutput:
	"""
	Run full InteriorFusion pipeline on a single interior image.

	Args:
	image: Input interior photograph
	room_type_hint: Optional room type ("living_room", "bedroom", etc.)
	style_hint: Optional style ("modern", "scandinavian", etc.)
	output_formats: List of formats to export ["glb", "fbx", "obj", "usdz", "ply"]
	return_intermediates: Whether to return intermediate stage outputs

	Returns:
	InteriorFusionOutput with all generated 3D content
	"""
	import time
	start_time = time.time()

	# Convert input to PIL Image
	if isinstance(image, (str, Path)):
	image = Image.open(image).convert("RGB")
	elif isinstance(image, np.ndarray):
	image = Image.fromarray(image).convert("RGB")

	# ============================
	# Phase 1: Scene Understanding
	# ============================
	print("[Phase 1/5] Scene Understanding...")
	scene_info = self.scene_understanding(image)

	depth_map = scene_info["depth"]
	room_layout = scene_info["room_layout"]
	semantic_seg = scene_info["semantic_segmentation"]
	detected_objects = scene_info["detected_objects"]
	room_type = scene_info.get("room_type", room_type_hint or "living_room")
	style = scene_info.get("style", style_hint or "modern")

	# ============================
	# Phase 2: Multi-View Generation
	# ============================
	print("[Phase 2/5] Multi-View Generation...")

	# Per-object multi-view generation
	object_multiviews = {}
	for obj_id, obj_info in detected_objects.items():
	crop = obj_info["crop"]
	mask = obj_info["mask"]
	multiviews = self.multiview_gen.generate_object_views(
	crop, mask, depth_map, num_views=6
	)
	object_multiviews[obj_id] = multiviews

	# Room shell multi-view
	room_shell_views = self.multiview_gen.generate_room_shell_views(
	image, depth_map, room_layout
	)

	# ============================
	# Phase 3: 3D Reconstruction
	# ============================
	print("[Phase 3/5] 3D Reconstruction...")

	# Room shell reconstruction
	room_shell_mesh = self.reconstruction.reconstruct_room_shell(
	room_shell_views, room_layout, depth_map
	)

	# Per-object reconstruction
	object_meshes = []
	object_gaussians = []
	for obj_id, multiviews in object_multiviews.items():
	obj_mesh, obj_gaussians = self.reconstruction.reconstruct_object(
	multiviews,
	room_layout=room_layout,
	depth_map=depth_map,
	object_info=detected_objects[obj_id],
	)
	object_meshes.append(obj_mesh)
	object_gaussians.append(obj_gaussians)

	# Scene Gaussian splatting
	gaussian_cloud = None
	if self.use_gaussian_splatting:
	gaussian_cloud = self.reconstruction.build_scene_gaussians(
	room_shell_mesh, object_gaussians, object_meshes
	)

	# ============================
	# Phase 4: Scene Assembly
	# ============================
	print("[Phase 4/5] Scene Assembly...")

	assembled_scene = self.scene_assembly.assemble(
	room_shell_mesh=room_shell_mesh,
	object_meshes=object_meshes,
	room_layout=room_layout,
	detected_objects=detected_objects,
	depth_map=depth_map,
	)

	scene_mesh = assembled_scene["scene_mesh"]
	scene_graph = assembled_scene.get("scene_graph")

	# ============================
	# Phase 5: Material & Texture
	# ============================
	print("[Phase 5/5] Material & Texture...")

	pbr_materials = []
	if self.use_pbr:
	# Room shell materials
	room_shell_mesh = self.material_texture.generate_room_materials(
	room_shell_mesh, image, semantic_seg
	)

	# Per-object materials
	textured_objects = []
	for i, obj_mesh in enumerate(object_meshes):
	obj_id = list(detected_objects.keys())[i]
	textured_obj, materials = self.material_texture.generate_object_materials(
	obj_mesh,
	object_multiviews[obj_id],
	detected_objects[obj_id],
	)
	textured_objects.append(textured_obj)
	pbr_materials.extend(materials)

	# Re-assemble with textured objects
	scene_mesh = self.scene_assembly.reassemble_with_textures(
	room_shell_mesh, textured_objects, scene_graph
	)

	processing_time = time.time() - start_time

	output = InteriorFusionOutput(
	scene_mesh=scene_mesh,
	room_shell_mesh=room_shell_mesh,
	object_meshes=object_meshes if not self.use_pbr else textured_objects,
	gaussian_cloud=gaussian_cloud,
	pbr_materials=pbr_materials,
	scene_graph=scene_graph,
	room_layout=room_layout,
	room_type=room_type,
	style=style,
	processing_time=processing_time,
	)

	print(f"\n✅ Generation complete in {processing_time:.1f}s")
	print(f" Room type: {room_type}")
	print(f" Style: {style}")
	print(f" Objects detected: {len(detected_objects)}")
	print(f" PBR materials: {len(pbr_materials)}")

	return output

	def edit_scene(
	self,
	scene_output: InteriorFusionOutput,
	edits: List[dict],
	) -> InteriorFusionOutput:
	"""
	Apply edits to a generated scene.

	Edits format:
	[
	{"action": "move", "object_id": 0, "position": [x, y, z]},
	{"action": "replace", "object_id": 1, "new_image": Image},
	{"action": "remove", "object_id": 2},
	{"action": "add", "new_image": Image, "position": [x, y, z]},
	]
	"""
	print(f"Applying {len(edits)} edits...")

	scene_graph = scene_output.scene_graph or {}
	object_meshes = list(scene_output.object_meshes)

	for edit in edits:
	action = edit["action"]

	if action == "move":
	obj_id = edit["object_id"]
	new_pos = edit["position"]
	# Update scene graph
	if "nodes" in scene_graph and obj_id < len(scene_graph["nodes"]):
	scene_graph["nodes"][obj_id]["position"] = new_pos
	# Update mesh transform
	if obj_id < len(object_meshes):
	# Apply translation
	mesh = object_meshes[obj_id]
	mesh.vertices += np.array(new_pos)

	elif action == "replace":
	obj_id = edit["object_id"]
	new_image = edit["new_image"]
	# Generate new object from image
	new_multiviews = self.multiview_gen.generate_object_views(
	new_image, None, None, num_views=6
	)
	new_mesh, _ = self.reconstruction.reconstruct_object(
	new_multiviews, room_layout=scene_output.room_layout
	)
	object_meshes[obj_id] = new_mesh

	elif action == "remove":
	obj_id = edit["object_id"]
	if obj_id < len(object_meshes):
	object_meshes.pop(obj_id)

	elif action == "add":
	new_image = edit["new_image"]
	position = edit["position"]
	new_multiviews = self.multiview_gen.generate_object_views(
	new_image, None, None, num_views=6
	)
	new_mesh, _ = self.reconstruction.reconstruct_object(
	new_multiviews, room_layout=scene_output.room_layout
	)
	new_mesh.vertices += np.array(position)
	object_meshes.append(new_mesh)

	# Re-assemble
	assembled = self.scene_assembly.reassemble_with_textures(
	scene_output.room_shell_mesh,
	object_meshes,
	scene_graph,
	)

	return InteriorFusionOutput(
	scene_mesh=assembled,
	room_shell_mesh=scene_output.room_shell_mesh,
	object_meshes=object_meshes,
	gaussian_cloud=scene_output.gaussian_cloud,
	pbr_materials=scene_output.pbr_materials,
	scene_graph=scene_graph,
	room_layout=scene_output.room_layout,
	room_type=scene_output.room_type,
	style=scene_output.style,
	)