Image-to-3D

Sleeping

App Files Files Community

LejobuildYT commited on 3 days ago

Commit

c4eb67e

verified ·

1 Parent(s): 63b49f9

Update hy3dgen/texgen/pipelines.py

Browse files

Files changed (1) hide show

hy3dgen/texgen/pipelines.py +119 -89

hy3dgen/texgen/pipelines.py CHANGED Viewed

@@ -24,16 +24,29 @@ from pathlib import Path
 from .differentiable_renderer.mesh_render import MeshRender
 from .utils.dehighlight_utils import Light_Shadow_Remover
 from .utils.multiview_utils import Multiview_Diffusion_Net
-from .utils.imagesuper_utils import Image_Super_Net
 from .utils.uv_warp_utils import mesh_uv_wrap
 logger = logging.getLogger(__name__)
-class Hunyuan3DTexGenConfig:
     def __init__(self, light_remover_ckpt_path, multiview_ckpt_path):
-        self.device = 'cuda'
         self.light_remover_ckpt_path = light_remover_ckpt_path
         self.multiview_ckpt_path = multiview_ckpt_path
@@ -52,84 +65,94 @@ class Hunyuan3DPaintPipeline:
     def from_pretrained(cls, model_path):
         original_model_path = model_path
         print(f"原始路径 original_model_path: {model_path}")
         if not os.path.exists(model_path):
-            print(f"存在原始路径: {model_path}")
-            # try local path
             base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
             model_path = os.path.expanduser(os.path.join(base_dir, model_path))
-            print(f"基础路径 base_dir: {base_dir}")
-            print(f"模型路径 model_path: {model_path}")
             delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
             multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
-            print(f"路径 delight_model_path: {delight_model_path}")
-            print(f"路径 multiview_model_path: {multiview_model_path}")
             if not os.path.exists(delight_model_path) or not os.path.exists(multiview_model_path):
                 try:
                     import huggingface_hub
-                    # download from huggingface
-                    model_path = huggingface_hub.snapshot_download(repo_id=original_model_path,
-                                                                   allow_patterns=["hunyuan3d-delight-v2-0/*"])
-                    print(f"下载的 model_path 1 : {model_path}")
-                    snapshot_path = Path(model_path)
-                    for path in snapshot_path.rglob("*"):
-                        print(path.relative_to(snapshot_path))
-                    model_path = huggingface_hub.snapshot_download(repo_id=original_model_path,
-                                                                   allow_patterns=["hunyuan3d-paint-v2-0/*"])
-                    print(f"下载的 model_path 2 : {model_path}")
-                    snapshot_path = Path(model_path)
-                    for path in snapshot_path.rglob("*"):
-                        print(path.relative_to(snapshot_path))
                     delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
                     multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
-                    print(f"路径 delight_model_path : {delight_model_path}")
-                    print(f"路径 multiview_model_path : {multiview_model_path}")
-                    print(f"路径 delight_model_path 是否存在: {os.path.exists(delight_model_path)}")
-                    print(f"路径 multiview_model_path 是否存在: {os.path.exists(multiview_model_path)}")
                     return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
                 except Exception as e:
                     print("构造 Hunyuan3DPaintPipeline 实例时出错：", e)
-                    import traceback
-                    traceback.print_exc()
                     raise
-                # except ImportError:
-                #     logger.warning(
-                #         "You need to install HuggingFace Hub to load models from the hub."
-                #     )
-                #     raise RuntimeError(f"Model path {model_path} not found")
             else:
                 return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
-        raise FileNotFoundError(f"Model path {original_model_path} not found and we could not find it at huggingface")
     def __init__(self, config):
         self.config = config
         self.models = {}
         self.render = MeshRender(
             default_resolution=self.config.render_size,
-            texture_size=self.config.texture_size)
         self.load_models()
     def load_models(self):
-        # empty cude cache
-        torch.cuda.empty_cache()
-        # Load model
         self.models['delight_model'] = Light_Shadow_Remover(self.config)
         self.models['multiview_model'] = Multiview_Diffusion_Net(self.config)
         # self.models['super_model'] = Image_Super_Net(self.config)
-    def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
-        self.models['delight_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
-        self.models['multiview_model'].pipeline.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
     def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
         normal_maps = []
         for elev, azim in zip(camera_elevs, camera_azims):
@@ -139,23 +162,28 @@ class Hunyuan3DPaintPipeline:
         return normal_maps
     def render_position_multiview(self, camera_elevs, camera_azims):
         position_maps = []
         for elev, azim in zip(camera_elevs, camera_azims):
             position_map = self.render.render_position(
                 elev, azim, return_type='pl')
             position_maps.append(position_map)
         return position_maps
     def bake_from_multiview(self, views, camera_elevs,
                             camera_azims, view_weights, method='graphcut'):
         project_textures, project_weighted_cos_maps = [], []
         project_boundary_maps = []
         for view, camera_elev, camera_azim, weight in zip(
-            views, camera_elevs, camera_azims, view_weights):
             project_texture, project_cos_map, project_boundary_map = self.render.back_project(
-                view, camera_elev, camera_azim)
             project_cos_map = weight * (project_cos_map ** self.config.bake_exp)
             project_textures.append(project_texture)
             project_weighted_cos_maps.append(project_cos_map)
@@ -166,8 +194,10 @@ class Hunyuan3DPaintPipeline:
                 project_textures, project_weighted_cos_maps)
         else:
             raise f'no method {method}'
         return texture, ori_trust_map > 1E-8
     def texture_inpaint(self, texture, mask):
         texture_np = self.render.uv_inpaint(texture, mask)
@@ -175,39 +205,36 @@ class Hunyuan3DPaintPipeline:
         return texture
     def recenter_image(self, image, border_ratio=0.2):
         if image.mode == 'RGB':
             return image
         elif image.mode == 'L':
-            image = image.convert('RGB')
-            return image
-        alpha_channel = np.array(image)[:, :, 3]
-        non_zero_indices = np.argwhere(alpha_channel > 0)
-        if non_zero_indices.size == 0:
-            raise ValueError("Image is fully transparent")
-        min_row, min_col = non_zero_indices.min(axis=0)
-        max_row, max_col = non_zero_indices.max(axis=0)
-        cropped_image = image.crop((min_col, min_row, max_col + 1, max_row + 1))
-        width, height = cropped_image.size
-        border_width = int(width * border_ratio)
-        border_height = int(height * border_ratio)
-        new_width = width + 2 * border_width
-        new_height = height + 2 * border_height
-        square_size = max(new_width, new_height)
-        new_image = Image.new('RGBA', (square_size, square_size), (255, 255, 255, 0))
-        paste_x = (square_size - new_width) // 2 + border_width
-        paste_y = (square_size - new_height) // 2 + border_height
-        new_image.paste(cropped_image, (paste_x, paste_y))
-        return new_image
     @torch.no_grad()
     def __call__(self, mesh, image):
@@ -219,39 +246,42 @@ class Hunyuan3DPaintPipeline:
         image_prompt = self.recenter_image(image_prompt)
         image_prompt = self.models['delight_model'](image_prompt)
         mesh = mesh_uv_wrap(mesh)
         self.render.load_mesh(mesh)
-        selected_camera_elevs, selected_camera_azims, selected_view_weights = \
-            self.config.candidate_camera_elevs, self.config.candidate_camera_azims, self.config.candidate_view_weights
-        normal_maps = self.render_normal_multiview(
-            selected_camera_elevs, selected_camera_azims, use_abs_coor=True)
-        position_maps = self.render_position_multiview(
-            selected_camera_elevs, selected_camera_azims)
-        camera_info = [(((azim // 30) + 9) % 12) // {-20: 1, 0: 1, 20: 1, -90: 3, 90: 3}[
-            elev] + {-20: 0, 0: 12, 20: 24, -90: 36, 90: 40}[elev] for azim, elev in
-                       zip(selected_camera_azims, selected_camera_elevs)]
-        multiviews = self.models['multiview_model'](image_prompt, normal_maps + position_maps, camera_info)
         for i in range(len(multiviews)):
-            #     multiviews[i] = self.models['super_model'](multiviews[i])
             multiviews[i] = multiviews[i].resize(
-                (self.config.render_size, self.config.render_size))
-        texture, mask = self.bake_from_multiview(multiviews,
-                                                 selected_camera_elevs, selected_camera_azims, selected_view_weights,
-                                                 method=self.config.merge_method)
         mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
         texture = self.texture_inpaint(texture, mask_np)
         self.render.set_texture(texture)
-        textured_mesh = self.render.save_mesh()
-        return textured_mesh

 from .differentiable_renderer.mesh_render import MeshRender
 from .utils.dehighlight_utils import Light_Shadow_Remover
 from .utils.multiview_utils import Multiview_Diffusion_Net
+# from .utils.imagesuper_utils import Image_Super_Net
 from .utils.uv_warp_utils import mesh_uv_wrap
 logger = logging.getLogger(__name__)
+# -------------------------------------------
+# Device Selection (Global clean handling)
+# -------------------------------------------
+def get_best_device():
+    if torch.cuda.is_available():
+        return "cuda"
+    if torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
+class Hunyuan3DTexGenConfig:
     def __init__(self, light_remover_ckpt_path, multiview_ckpt_path):
+        # Old: self.device = 'cuda'
+        self.device = get_best_device()
         self.light_remover_ckpt_path = light_remover_ckpt_path
         self.multiview_ckpt_path = multiview_ckpt_path
     def from_pretrained(cls, model_path):
         original_model_path = model_path
         print(f"原始路径 original_model_path: {model_path}")
         if not os.path.exists(model_path):
+            print(f"不存在原始路径: {model_path}")
             base_dir = os.environ.get('HY3DGEN_MODELS', '~/.cache/hy3dgen')
             model_path = os.path.expanduser(os.path.join(base_dir, model_path))
             delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
             multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
             if not os.path.exists(delight_model_path) or not os.path.exists(multiview_model_path):
                 try:
                     import huggingface_hub
+                    model_path = huggingface_hub.snapshot_download(
+                        repo_id=original_model_path,
+                        allow_patterns=["hunyuan3d-delight-v2-0/*"]
+                    )
+                    model_path = huggingface_hub.snapshot_download(
+                        repo_id=original_model_path,
+                        allow_patterns=["hunyuan3d-paint-v2-0/*"]
+                    )
                     delight_model_path = os.path.join(model_path, 'hunyuan3d-delight-v2-0')
                     multiview_model_path = os.path.join(model_path, 'hunyuan3d-paint-v2-0')
                     return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
                 except Exception as e:
                     print("构造 Hunyuan3DPaintPipeline 实例时出错：", e)
                     raise
             else:
                 return cls(Hunyuan3DTexGenConfig(delight_model_path, multiview_model_path))
+        raise FileNotFoundError(f"Model path {original_model_path} not found and Hub download failed.")
     def __init__(self, config):
         self.config = config
         self.models = {}
         self.render = MeshRender(
             default_resolution=self.config.render_size,
+            texture_size=self.config.texture_size
+        )
         self.load_models()
+    # -------------------------------------------
+    # Load Models — Dynamic CUDA handling
+    # -------------------------------------------
     def load_models(self):
+        # Originally forced CUDA:
+        # torch.cuda.empty_cache()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         self.models['delight_model'] = Light_Shadow_Remover(self.config)
         self.models['multiview_model'] = Multiview_Diffusion_Net(self.config)
         # self.models['super_model'] = Image_Super_Net(self.config)
+    def enable_model_cpu_offload(
+        self,
+        gpu_id: Optional[int] = None,
+        device: Union[torch.device, str] = None
+    ):
+        if device is None:
+            device = self.config.device
+        if hasattr(self.models['delight_model'], "pipeline"):
+            self.models['delight_model'].pipeline.enable_model_cpu_offload(
+                gpu_id=gpu_id, device=device
+            )
+        if hasattr(self.models['multiview_model'], "pipeline"):
+            self.models['multiview_model'].pipeline.enable_model_cpu_offload(
+                gpu_id=gpu_id, device=device
+            )
+    # -------------------------------------------
+    # Rendering functions unchanged
+    # -------------------------------------------
     def render_normal_multiview(self, camera_elevs, camera_azims, use_abs_coor=True):
         normal_maps = []
         for elev, azim in zip(camera_elevs, camera_azims):
         return normal_maps
     def render_position_multiview(self, camera_elevs, camera_azims):
         position_maps = []
         for elev, azim in zip(camera_elevs, camera_azims):
             position_map = self.render.render_position(
                 elev, azim, return_type='pl')
             position_maps.append(position_map)
         return position_maps
     def bake_from_multiview(self, views, camera_elevs,
                             camera_azims, view_weights, method='graphcut'):
         project_textures, project_weighted_cos_maps = [], []
         project_boundary_maps = []
         for view, camera_elev, camera_azim, weight in zip(
+            views, camera_elevs, camera_azims, view_weights
+        ):
             project_texture, project_cos_map, project_boundary_map = self.render.back_project(
+                view, camera_elev, camera_azim
+            )
             project_cos_map = weight * (project_cos_map ** self.config.bake_exp)
             project_textures.append(project_texture)
             project_weighted_cos_maps.append(project_cos_map)
                 project_textures, project_weighted_cos_maps)
         else:
             raise f'no method {method}'
         return texture, ori_trust_map > 1E-8
     def texture_inpaint(self, texture, mask):
         texture_np = self.render.uv_inpaint(texture, mask)
         return texture
     def recenter_image(self, image, border_ratio=0.2):
         if image.mode == 'RGB':
             return image
         elif image.mode == 'L':
+            return image.convert('RGB')
+        alpha = np.array(image)[:, :, 3]
+        non_zero = np.argwhere(alpha > 0)
+        if non_zero.size == 0:
+            raise ValueError("Image fully transparent")
+        min_row, min_col = non_zero.min(axis=0)
+        max_row, max_col = non_zero.max(axis=0)
+        cropped = image.crop((min_col, min_row, max_col + 1, max_row + 1))
+        w, h = cropped.size
+        bw = int(w * border_ratio)
+        bh = int(h * border_ratio)
+        new_w = w + 2 * bw
+        new_h = h + 2 * bh
+        sq = max(new_w, new_h)
+        new_img = Image.new('RGBA', (sq, sq), (255, 255, 255, 0))
+        new_img.paste(cropped, ((sq - new_w) // 2 + bw, (sq - new_h) // 2 + bh))
+        return new_img
     @torch.no_grad()
     def __call__(self, mesh, image):
         image_prompt = self.recenter_image(image_prompt)
+        # delight
         image_prompt = self.models['delight_model'](image_prompt)
         mesh = mesh_uv_wrap(mesh)
         self.render.load_mesh(mesh)
+        elevs = self.config.candidate_camera_elevs
+        azims = self.config.candidate_camera_azims
+        weights = self.config.candidate_view_weights
+        normal_maps = self.render_normal_multiview(elevs, azims)
+        position_maps = self.render_position_multiview(elevs, azims)
+        camera_info = [
+            (((azim // 30) + 9) % 12) //
+            {-20: 1, 0: 1, 20: 1, -90: 3, 90: 3}[elev] +
+            {-20: 0, 0: 12, 20: 24, -90: 36, 90: 40}[elev]
+            for azim, elev in zip(azims, elevs)
+        ]
+        multiviews = self.models['multiview_model'](
+            image_prompt, normal_maps + position_maps, camera_info
+        )
         for i in range(len(multiviews)):
             multiviews[i] = multiviews[i].resize(
+                (self.config.render_size, self.config.render_size)
+            )
+        texture, mask = self.bake_from_multiview(
+            multiviews, elevs, azims, weights, method=self.config.merge_method
+        )
         mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
         texture = self.texture_inpaint(texture, mask_np)
         self.render.set_texture(texture)
+        return self.render.save_mesh()